From 8846ac052c71558f0c4d25a4819cb69c43a77293 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 10 Oct 2023 08:25:10 +0200 Subject: [PATCH 01/10] Factor out helper to read container configuration This is a plain code move from `containers_list` into a standalone function. Added value is - reusability - documentation, incl. type annotation Also adds rendering of the utility module's documentation. This is a first step towards resolving #238 --- datalad_container/containers_list.py | 19 ++---------- datalad_container/utils.py | 45 ++++++++++++++++++++++++++++ docs/source/index.rst | 2 ++ 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/datalad_container/containers_list.py b/datalad_container/containers_list.py index 38ee9b04..df71468a 100644 --- a/datalad_container/containers_list.py +++ b/datalad_container/containers_list.py @@ -19,6 +19,8 @@ from datalad.coreapi import subdatasets from datalad.ui import ui +from datalad_container.utils import get_container_configuration + lgr = logging.getLogger("datalad.containers.containers_list") @@ -77,22 +79,7 @@ def __call__(dataset=None, recursive=False, contains=None): yield c # all info is in the dataset config! - var_prefix = 'datalad.containers.' - containers = {} - for var, value in ds.config.items(): - if not var.startswith(var_prefix): - # not an interesting variable - continue - var_comps = var[len(var_prefix):].split('.') - cname = var_comps[0] - ccfgname = '.'.join(var_comps[1:]) - if not ccfgname: - continue - - cinfo = containers.get(cname, {}) - cinfo[ccfgname] = value - - containers[cname] = cinfo + containers = get_container_configuration(ds) for k, v in containers.items(): if 'image' not in v: diff --git a/datalad_container/utils.py b/datalad_container/utils.py index 0e272f2a..e4d80d9d 100644 --- a/datalad_container/utils.py +++ b/datalad_container/utils.py @@ -1,5 +1,9 @@ +"""Collection of common utilities""" + +from datalad.distribution.dataset import Dataset from datalad.support.external_versions import external_versions + def get_container_command(): for command in ["apptainer", "singularity"]: container_system_version = external_versions[f"cmd:{command}"] @@ -9,3 +13,44 @@ def get_container_command(): raise RuntimeError("Did not find apptainer or singularity") +def get_container_configuration( + ds: Dataset, +) -> dict: + """Report all container-related configuration in a dataset + + Such configuration is identified by the item name pattern:: + + datalad.containers.. + + Parameters + ---------- + ds: Dataset + Dataset instance to report configuration on. + + Returns + ------- + dict + Keys are the names of configured containers and values are dictionaries + with their respective configuration items (with the + ``datalad.containers..`` prefix removed from their + keys). + """ + # all info is in the dataset config! + var_prefix = 'datalad.containers.' + containers = {} + for var, value in ds.config.items(): + if not var.startswith(var_prefix): + # not an interesting variable + continue + var_comps = var[len(var_prefix):].split('.') + cname = var_comps[0] + ccfgname = '.'.join(var_comps[1:]) + if not ccfgname: + continue + + cinfo = containers.get(cname, {}) + cinfo[ccfgname] = value + + containers[cname] = cinfo + + return containers diff --git a/docs/source/index.rst b/docs/source/index.rst index a21ac4c8..e15bdd28 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -51,4 +51,6 @@ Python API containers_list containers_run + utils + .. |---| unicode:: U+02014 .. em dash From dbb2f914e43cf467858a2b7941ded686ae88908f Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 10 Oct 2023 08:51:11 +0200 Subject: [PATCH 02/10] Enhance `get_container_configuration()` to report on a single container This is a common use case in commands other than `containers_list`. Albeit not strictly necessary (all configuration could be read and processed first, and sub-selection could happen in user code), it is a low-complexity change. Moreover, we envision more configuration post-processing to happen in the future (see https://github.com/datalad/datalad-container/issues/238). This would change the cost assessment of loading everything upfront. --- datalad_container/utils.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/datalad_container/utils.py b/datalad_container/utils.py index e4d80d9d..b39dfbef 100644 --- a/datalad_container/utils.py +++ b/datalad_container/utils.py @@ -1,5 +1,7 @@ """Collection of common utilities""" +from __future__ import annotations + from datalad.distribution.dataset import Dataset from datalad.support.external_versions import external_versions @@ -15,6 +17,7 @@ def get_container_command(): def get_container_configuration( ds: Dataset, + name: str | None = None, ) -> dict: """Report all container-related configuration in a dataset @@ -26,6 +29,10 @@ def get_container_configuration( ---------- ds: Dataset Dataset instance to report configuration on. + name: str, optional + If given, the reported configuration will be limited to the container + with this exact name. In this case, only a single ``dict`` is returned, + not nested dictionaries. Returns ------- @@ -34,6 +41,11 @@ def get_container_configuration( with their respective configuration items (with the ``datalad.containers..`` prefix removed from their keys). + If `name` is given, only a single ``dict`` with the configuration + items of the matching container is returned (i.e., there will be no + outer ``dict`` with container names as keys). + If not (matching) container configuration exists, and empty dictionary + is returned. """ # all info is in the dataset config! var_prefix = 'datalad.containers.' @@ -44,6 +56,10 @@ def get_container_configuration( continue var_comps = var[len(var_prefix):].split('.') cname = var_comps[0] + if name and name != cname: + # we are looking for a specific container's configuration + # and this is not it + continue ccfgname = '.'.join(var_comps[1:]) if not ccfgname: continue @@ -53,4 +69,4 @@ def get_container_configuration( containers[cname] = cinfo - return containers + return containers if name is None else containers.get(name, {}) From b1b37626b19703dadb33bd49b0660237ccc9e52a Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 10 Oct 2023 08:59:02 +0200 Subject: [PATCH 03/10] Update implementation and documentation of `containers_remove` This changeset maintains the exact behavior of the command, and merely adds documentation and replaces custom configuration access with the new `get_container_configuration()` helper. The documentation may communicate suboptimal command semantics, but these were given before, and are only written down now. Also see https://github.com/datalad/datalad-container/issues/240 for additional aspects re a future refactoring. Configuration writes now use the ``scope=branch`` approach. It was introduced with datalad v0.16 and requires no dependency adjustments. --- datalad_container/containers_remove.py | 34 +++++++++++++++++--------- 1 file changed, 23 insertions(+), 11 deletions(-) diff --git a/datalad_container/containers_remove.py b/datalad_container/containers_remove.py index 7710a2d3..7e0e95ef 100644 --- a/datalad_container/containers_remove.py +++ b/datalad_container/containers_remove.py @@ -15,6 +15,8 @@ from datalad.support.constraints import EnsureStr from datalad.interface.results import get_status_dict +from datalad_container.utils import get_container_configuration + lgr = logging.getLogger("datalad.containers.containers_remove") @@ -24,15 +26,24 @@ class ContainersRemove(Interface): # first docstring line is used a short description in the cmdline help # the rest is put in the verbose help and manpage """Remove a known container from a dataset + + This command is only removing a container from the committed + Dataset configuration (configuration scope ``branch``). It will not + modify any other configuration scopes. + + This command is *not* dropping the container image associated with the + removed record, because it may still be needed for other dataset versions. + In order to drop the container image, use the 'drop' command prior + to removing the container configuration. """ # parameters of the command, must be exhaustive _params_ = dict( dataset=Parameter( args=("-d", "--dataset"), - doc="""specify the dataset to query. If no dataset is given, an - attempt is made to identify the dataset based on the current - working directory""", + doc="""specify the dataset from removing a container. If no dataset + is given, an attempt is made to identify the dataset based on the + current working directory""", constraints=EnsureDataset() | EnsureNone()), name=Parameter( args=("name",), @@ -42,7 +53,9 @@ class ContainersRemove(Interface): ), remove_image=Parameter( args=("-i", "--remove-image",), - doc="""if set, remove container image as well""", + doc="""if set, remove container image as well. Even with this flag, + the container image content will not be dropped. Use the 'drop' + command explicitly before removing the container configuration.""", action="store_true", ), ) @@ -59,12 +72,11 @@ def __call__(name, dataset=None, remove_image=False): action='containers_remove', logger=lgr) - section = 'datalad.containers.{}'.format(name) - imagecfg = '{}.image'.format(section) + container_cfg = get_container_configuration(ds, name) to_save = [] - if remove_image and imagecfg in ds.config: - imagepath = ds.config.get(imagecfg) + if remove_image and 'image' in container_cfg: + imagepath = container_cfg['image'] if op.lexists(op.join(ds.path, imagepath)): for r in ds.remove( path=imagepath, @@ -78,10 +90,10 @@ def __call__(name, dataset=None, remove_image=False): yield r to_save.append(imagepath) - if section in ds.config.sections(): + if container_cfg: ds.config.remove_section( - section, - where='dataset', + f'datalad.containers.{name}', + scope='branch', reload=True) res['status'] = 'ok' to_save.append(op.join('.datalad', 'config')) From 11c5bb2e4f0af539752b1cdb1b21047f34c79055 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 10 Oct 2023 09:12:59 +0200 Subject: [PATCH 04/10] Add a note why `containers_run()` can stay unmodified... ...for the purpose of a standardization of container configuration reads on `get_container_configuration()`. --- datalad_container/containers_run.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index d7b4e796..e2c2c84a 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -81,6 +81,14 @@ def __call__(cmd, container_name=None, dataset=None, ds = require_dataset(dataset, check_installed=True, purpose='run a containerized command execution') + # this following block locates the target container. this involves a + # configuration look-up. This is not using + # get_container_configuration(), because it needs to account for a + # wide range of scenarios, including the installation of the dataset(s) + # that will eventually provide (the configuration) for the container. + # However, internally this is calling `containers_list()`, which is + # using get_container_configuration(), so any normalization of + # configuration on-read, get still be implemented in this helper. container = None for res in find_container_(ds, container_name): if res.get("action") == "containers": From 9b95b7862c4419fb9e3b69621a8d447a5d09ced0 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 10 Oct 2023 09:47:02 +0200 Subject: [PATCH 05/10] Reduce some of the noise in the test battery This is done by turning of the result rendering. This revealed that the command implementations cause uncondtional result rendering https://github.com/datalad/datalad-container/issues/241 --- datalad_container/tests/test_containers.py | 89 ++++++++++------- datalad_container/tests/test_run.py | 110 +++++++++++---------- 2 files changed, 110 insertions(+), 89 deletions(-) diff --git a/datalad_container/tests/test_containers.py b/datalad_container/tests/test_containers.py index 16cb58dc..b08601ce 100644 --- a/datalad_container/tests/test_containers.py +++ b/datalad_container/tests/test_containers.py @@ -29,27 +29,32 @@ from datalad_container.tests.utils import add_pyscript_image +common_kwargs = {'result_renderer': 'disabled'} + @with_tempfile def test_add_noop(path=None): - ds = Dataset(path).create() + ds = Dataset(path).create(**common_kwargs) ok_clean_git(ds.path) assert_raises(TypeError, ds.containers_add) # fails when there is no image - assert_status('error', ds.containers_add('name', on_failure='ignore')) + assert_status( + 'error', + ds.containers_add('name', on_failure='ignore', **common_kwargs)) # no config change ok_clean_git(ds.path) # place a dummy "image" file with open(op.join(ds.path, 'dummy'), 'w') as f: f.write('some') - ds.save('dummy') + ds.save('dummy', **common_kwargs) ok_clean_git(ds.path) # config will be added, as long as there is a file, even when URL access # fails res = ds.containers_add( 'broken', url='bogus-protocol://bogus-server', image='dummy', - on_failure='ignore' + on_failure='ignore', + **common_kwargs ) assert_status('ok', res) assert_result_count(res, 1, action='save', status='ok') @@ -59,7 +64,7 @@ def test_add_noop(path=None): @with_tree(tree={"foo.img": "doesn't matter 0", "bar.img": "doesn't matter 1"}) def test_add_local_path(path=None, local_file=None): - ds = Dataset(path).create() + ds = Dataset(path).create(**common_kwargs) res = ds.containers_add(name="foobert", url=op.join(local_file, "foo.img")) foo_target = op.join(path, ".datalad", "environments", "foobert", "image") @@ -94,12 +99,12 @@ def test_container_files(ds_path=None, local_file=None, url=None): local_file = get_local_file_url(op.join(local_file, 'some_container.img')) # prepare dataset: - ds = Dataset(ds_path).create() + ds = Dataset(ds_path).create(**common_kwargs) # non-default location: ds.config.add("datalad.containers.location", value=op.join(".datalad", "test-environments"), where='dataset') - ds.save(message="Configure container mountpoint") + ds.save(message="Configure container mountpoint", **common_kwargs) # no containers yet: res = ds.containers_list(**RAW_KWDS) @@ -108,7 +113,7 @@ def test_container_files(ds_path=None, local_file=None, url=None): # add first "image": must end up at the configured default location target_path = op.join( ds.path, ".datalad", "test-environments", "first", "image") - res = ds.containers_add(name="first", url=local_file) + res = ds.containers_add(name="first", url=local_file, **common_kwargs) ok_clean_git(ds.repo) assert_result_count(res, 1, status="ok", type="file", path=target_path, @@ -125,7 +130,7 @@ def test_container_files(ds_path=None, local_file=None, url=None): # and kill it again # but needs name assert_raises(TypeError, ds.containers_remove) - res = ds.containers_remove('first', remove_image=True) + res = ds.containers_remove('first', remove_image=True, **common_kwargs) assert_status('ok', res) assert_result_count(ds.containers_list(**RAW_KWDS), 0) # image removed @@ -143,25 +148,28 @@ def test_extra_inputs(ds_path=None): overlay2_file = 'overlay2.img' # prepare dataset: - ds = Dataset(ds_path).create(force=True) - ds.save() + ds = Dataset(ds_path).create(force=True, **common_kwargs) + ds.save(**common_kwargs) ds.containers_add( name="container", image=container_file, call_fmt="apptainer exec {img} {cmd}", + **common_kwargs ) ds.containers_add( name="container-with-overlay", image=container_file, call_fmt="apptainer exec --overlay {img_dirpath}/overlay1.img {img} {cmd}", - extra_input=[overlay1_file] + extra_input=[overlay1_file], + **common_kwargs ) ds.containers_add( name="container-with-two-overlays", image=container_file, call_fmt="apptainer exec --overlay {img_dirpath}/overlay1.img --overlay {img_dirpath}/overlay2.img:ro {img} {cmd}", - extra_input=[overlay1_file, overlay2_file] + extra_input=[overlay1_file, overlay2_file], + **common_kwargs ) res = ds.containers_list(**RAW_KWDS) @@ -181,28 +189,33 @@ def test_container_update(ds_path=None, local_file=None, url=None): url_bar = get_local_file_url(op.join(local_file, 'bar.img')) img = op.join(".datalad", "environments", "foo", "image") - ds = Dataset(ds_path).create() + ds = Dataset(ds_path).create(**common_kwargs) - ds.containers_add(name="foo", call_fmt="call-fmt1", url=url_foo) + ds.containers_add(name="foo", call_fmt="call-fmt1", url=url_foo, + **common_kwargs) # Abort without --update flag. - res = ds.containers_add(name="foo", on_failure="ignore") + res = ds.containers_add(name="foo", on_failure="ignore", + **common_kwargs) assert_result_count(res, 1, action="containers_add", status="impossible") # Abort if nothing to update is specified. - res = ds.containers_add(name="foo", update=True, on_failure="ignore") + res = ds.containers_add(name="foo", update=True, on_failure="ignore", + **common_kwargs) assert_result_count(res, 1, action="containers_add", status="impossible", message="No values to update specified") # Update call format. - ds.containers_add(name="foo", update=True, call_fmt="call-fmt2") + ds.containers_add(name="foo", update=True, call_fmt="call-fmt2", + **common_kwargs) assert_equal(ds.config.get("datalad.containers.foo.cmdexec"), "call-fmt2") ok_file_has_content(op.join(ds.path, img), "foo") # Update URL/image. - ds.drop(img) # Make sure it works even with absent content. - res = ds.containers_add(name="foo", update=True, url=url_bar) + ds.drop(img, **common_kwargs) # Make sure it works even with absent content. + res = ds.containers_add(name="foo", update=True, url=url_bar, + **common_kwargs) assert_in_results(res, action="remove", status="ok") assert_in_results(res, action="save", status="ok") ok_file_has_content(op.join(ds.path, img), "bar") @@ -213,7 +226,8 @@ def test_container_update(ds_path=None, local_file=None, url=None): assert_in("Update ", get_commit_msg()) # If we add a new image with update=True should say Configure - res = ds.containers_add(name="foo2", update=True, url=url_bar) + res = ds.containers_add(name="foo2", update=True, url=url_bar, + **common_kwargs) assert_in("Configure ", get_commit_msg()) @@ -223,16 +237,17 @@ def test_container_update(ds_path=None, local_file=None, url=None): def test_container_from_subdataset(ds_path=None, src_subds_path=None, local_file=None): # prepare a to-be subdataset with a registered container - src_subds = Dataset(src_subds_path).create() - src_subds.containers_add(name="first", - url=get_local_file_url(op.join(local_file, - 'some_container.img')) - ) + src_subds = Dataset(src_subds_path).create(**common_kwargs) + src_subds.containers_add( + name="first", + url=get_local_file_url(op.join(local_file, 'some_container.img')), + **common_kwargs + ) # add it as subdataset to a super ds: - ds = Dataset(ds_path).create() - subds = ds.install("sub", source=src_subds_path) + ds = Dataset(ds_path).create(**common_kwargs) + subds = ds.install("sub", source=src_subds_path, **common_kwargs) # add it again one level down to see actual recursion: - subds.install("subsub", source=src_subds_path) + subds.install("subsub", source=src_subds_path, **common_kwargs) # We come up empty without recursive: res = ds.containers_list(recursive=False, **RAW_KWDS) @@ -254,9 +269,9 @@ def test_container_from_subdataset(ds_path=None, src_subds_path=None, local_file ) # not installed subdataset doesn't pose an issue: - sub2 = ds.create("sub2") - assert_result_count(ds.subdatasets(), 2, type="dataset") - ds.uninstall("sub2", check=False) + sub2 = ds.create("sub2", **common_kwargs) + assert_result_count(ds.subdatasets(**common_kwargs), 2, type="dataset") + ds.uninstall("sub2", check=False, **common_kwargs) from datalad.tests.utils_pytest import assert_false assert_false(sub2.is_installed()) @@ -285,17 +300,17 @@ def test_container_from_subdataset(ds_path=None, src_subds_path=None, local_file @with_tempfile def test_list_contains(path=None): - ds = Dataset(path).create() - subds_a = ds.create("a") - subds_b = ds.create("b") - subds_a_c = subds_a.create("c") + ds = Dataset(path).create(**common_kwargs) + subds_a = ds.create("a", **common_kwargs) + subds_b = ds.create("b", **common_kwargs) + subds_a_c = subds_a.create("c", **common_kwargs) add_pyscript_image(subds_a_c, "in-c", "img") add_pyscript_image(subds_a, "in-a", "img") add_pyscript_image(subds_b, "in-b", "img") add_pyscript_image(ds, "in-top", "img") - ds.save(recursive=True) + ds.save(recursive=True, **common_kwargs) assert_result_count(ds.containers_list(recursive=True, **RAW_KWDS), 4) diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py index 2c0bca9a..d9d4c5c7 100644 --- a/datalad_container/tests/test_run.py +++ b/datalad_container/tests/test_run.py @@ -41,40 +41,42 @@ testimg_url = 'shub://datalad/datalad-container:testhelper' +common_kwargs = {'result_renderer': 'disabled'} + @with_tree(tree={"dummy0.img": "doesn't matter 0", "dummy1.img": "doesn't matter 1"}) def test_run_mispecified(path=None): - ds = Dataset(path).create(force=True) - ds.save(path=["dummy0.img", "dummy1.img"]) + ds = Dataset(path).create(force=True, **common_kwargs) + ds.save(path=["dummy0.img", "dummy1.img"], **common_kwargs) ok_clean_git(path) # Abort if no containers exist. with assert_raises(ValueError) as cm: - ds.containers_run("doesn't matter") + ds.containers_run("doesn't matter", **common_kwargs) assert_in("No known containers", str(cm.value)) # Abort if more than one container exists but no container name is # specified. - ds.containers_add("d0", image="dummy0.img") - ds.containers_add("d1", image="dummy0.img") + ds.containers_add("d0", image="dummy0.img", **common_kwargs) + ds.containers_add("d1", image="dummy0.img", **common_kwargs) with assert_raises(ValueError) as cm: - ds.containers_run("doesn't matter") + ds.containers_run("doesn't matter", **common_kwargs) assert_in("explicitly specify container", str(cm.value)) # Abort if unknown container is specified. with assert_raises(ValueError) as cm: - ds.containers_run("doesn't matter", container_name="ghost") + ds.containers_run("doesn't matter", container_name="ghost", **common_kwargs) assert_in("Container selection impossible", str(cm.value)) @with_tree(tree={"i.img": "doesn't matter"}) def test_run_unknown_cmdexec_placeholder(path=None): ds = Dataset(path).create(force=True) - ds.containers_add("i", image="i.img", call_fmt="{youdontknowme}") + ds.containers_add("i", image="i.img", call_fmt="{youdontknowme}", **common_kwargs) assert_result_count( - ds.containers_run("doesn't matter", on_failure="ignore"), + ds.containers_run("doesn't matter", on_failure="ignore", **common_kwargs), 1, path=ds.path, action="run", @@ -95,9 +97,10 @@ def test_container_files(path=None, super_path=None): image='righthere', # the next one is auto-guessed #call_fmt='singularity exec {img} {cmd}' + **common_kwargs ) assert_result_count( - ds.containers_list(), 1, + ds.containers_list(**common_kwargs), 1, path=op.join(ds.path, 'righthere'), name='mycontainer') ok_clean_git(path) @@ -113,7 +116,7 @@ def assert_no_change(res, path): # now we can run stuff in the container # and because there is just one, we don't even have to name the container - res = ds.containers_run(cmd) + res = ds.containers_run(cmd, **common_kwargs) # container becomes an 'input' for `run` -> get request, but "notneeded" assert_result_count( res, 1, action='get', status='notneeded', @@ -122,7 +125,7 @@ def assert_no_change(res, path): # same thing as we specify the container by its name: res = ds.containers_run(cmd, - container_name='mycontainer') + container_name='mycontainer', **common_kwargs) # container becomes an 'input' for `run` -> get request, but "notneeded" assert_result_count( res, 1, action='get', status='notneeded', @@ -131,7 +134,7 @@ def assert_no_change(res, path): # we can also specify the container by its path: res = ds.containers_run(cmd, - container_name=op.join(ds.path, 'righthere')) + container_name=op.join(ds.path, 'righthere'), **common_kwargs) # container becomes an 'input' for `run` -> get request, but "notneeded" assert_result_count( res, 1, action='get', status='notneeded', @@ -141,15 +144,15 @@ def assert_no_change(res, path): # Now, test the same thing, but with this dataset being a subdataset of # another one: - super_ds = Dataset(super_path).create() - super_ds.install("sub", source=path) + super_ds = Dataset(super_path).create(**common_kwargs) + super_ds.install("sub", source=path, **common_kwargs) # When running, we don't discover containers in subdatasets with assert_raises(ValueError) as cm: - super_ds.containers_run(cmd) + super_ds.containers_run(cmd, **common_kwargs) assert_in("No known containers", str(cm.value)) # ... unless we need to specify the name - res = super_ds.containers_run(cmd, container_name="sub/mycontainer") + res = super_ds.containers_run(cmd, container_name="sub/mycontainer", **common_kwargs) # container becomes an 'input' for `run` -> get request (needed this time) assert_result_count( res, 1, action='get', status='ok', @@ -160,33 +163,34 @@ def assert_no_change(res, path): @with_tempfile @with_tree(tree={'some_container.img': "doesn't matter"}) def test_non0_exit(path=None, local_file=None): - ds = Dataset(path).create() + ds = Dataset(path).create(**common_kwargs) # plug in a proper singularity image ds.containers_add( 'mycontainer', url=get_local_file_url(op.join(local_file, 'some_container.img')), image='righthere', - call_fmt="sh -c '{cmd}'" + call_fmt="sh -c '{cmd}'", + **common_kwargs ) - ds.save() # record the effect in super-dataset + ds.save(**common_kwargs) # record the effect in super-dataset ok_clean_git(path) # now we can run stuff in the container # and because there is just one, we don't even have to name the container - ds.containers_run(['touch okfile']) + ds.containers_run(['touch okfile'], **common_kwargs) assert_repo_status(path) # Test that regular 'run' behaves as expected -- it does not proceed to save upon error with pytest.raises(IncompleteResultsError): - ds.run(['sh', '-c', 'touch nokfile && exit 1']) + ds.run(['sh', '-c', 'touch nokfile && exit 1'], **common_kwargs) assert_repo_status(path, untracked=['nokfile']) (ds.pathobj / "nokfile").unlink() # remove for the next test assert_repo_status(path) # Now test with containers-run which should behave similarly -- not save in case of error with pytest.raises(IncompleteResultsError): - ds.containers_run(['touch nokfile && exit 1']) + ds.containers_run(['touch nokfile && exit 1'], **common_kwargs) # check - must have created the file but not saved anything since failed to run! assert_repo_status(path, untracked=['nokfile']) @@ -194,8 +198,8 @@ def test_non0_exit(path=None, local_file=None): @with_tempfile @with_tree(tree={'some_container.img': "doesn't matter"}) def test_custom_call_fmt(path=None, local_file=None): - ds = Dataset(path).create() - subds = ds.create('sub') + ds = Dataset(path).create(**common_kwargs) + subds = ds.create('sub', **common_kwargs) # plug in a proper singularity image subds.containers_add( @@ -204,9 +208,10 @@ def test_custom_call_fmt(path=None, local_file=None): image='righthere', call_fmt='echo image={img} cmd={cmd} img_dspath={img_dspath} ' # and environment variable being set/propagated by default - 'name=$DATALAD_CONTAINER_NAME' + 'name=$DATALAD_CONTAINER_NAME', + **common_kwargs, ) - ds.save() # record the effect in super-dataset + ds.save(**common_kwargs) # record the effect in super-dataset # Running should work fine either within sub or within super out = WitlessRunner(cwd=subds.path).run( @@ -239,8 +244,8 @@ def test_custom_call_fmt(path=None, local_file=None): } ) def test_extra_inputs(path=None): - ds = Dataset(path).create(force=True) - subds = ds.create("sub", force=True) + ds = Dataset(path).create(force=True, **common_kwargs) + subds = ds.create("sub", force=True, **common_kwargs) subds.containers_add( "mycontainer", image="containers/container.img", @@ -250,9 +255,10 @@ def test_extra_inputs(path=None): "{img_dirpath}/../overlays/overlay2.img", "{img_dspath}/overlays/overlay3.img", ], + **common_kwargs ) - ds.save(recursive=True) # record the entire tree of files etc - ds.containers_run("XXX", container_name="sub/mycontainer") + ds.save(recursive=True, **common_kwargs) # record the entire tree of files etc + ds.containers_run("XXX", container_name="sub/mycontainer", **common_kwargs) ok_file_has_content( os.path.join(ds.repo.path, "out.log"), "image=sub/containers/container.img", @@ -273,10 +279,10 @@ def test_extra_inputs(path=None): @skip_if_no_network @with_tree(tree={"subdir": {"in": "innards"}}) def test_run_no_explicit_dataset(path=None): - ds = Dataset(path).create(force=True) - ds.save() + ds = Dataset(path).create(force=True, **common_kwargs) + ds.save(**common_kwargs) ds.containers_add("deb", url=testimg_url, - call_fmt="singularity exec {img} {cmd}") + call_fmt="singularity exec {img} {cmd}", **common_kwargs) # When no explicit dataset is given, paths are interpreted as relative to # the current working directory. @@ -285,14 +291,14 @@ def test_run_no_explicit_dataset(path=None): with chpwd(path): containers_run("cat {inputs[0]} {inputs[0]} >doubled", inputs=[op.join("subdir", "in")], - outputs=["doubled"]) + outputs=["doubled"], **common_kwargs) ok_file_has_content(op.join(path, "doubled"), "innardsinnards") # From under a subdirectory. subdir = op.join(ds.path, "subdir") with chpwd(subdir): containers_run("cat {inputs[0]} {inputs[0]} >doubled", - inputs=["in"], outputs=["doubled"]) + inputs=["in"], outputs=["doubled"], **common_kwargs) ok_file_has_content(op.join(subdir, "doubled"), "innardsinnards") @@ -316,16 +322,16 @@ def test_run_subdataset_install(path=None): # `-- d/ # `-- d2/ # `-- img - ds_src_a = ds_src.create("a") - ds_src_a2 = ds_src_a.create("a2") - ds_src_b = Dataset(ds_src.pathobj / "b").create() - ds_src_b2 = ds_src_b.create("b2") - ds_src_c = ds_src.create("c") - ds_src_c2 = ds_src_c.create("c2") - ds_src_d = Dataset(ds_src.pathobj / "d").create() - ds_src_d2 = ds_src_d.create("d2") + ds_src_a = ds_src.create("a", **common_kwargs) + ds_src_a2 = ds_src_a.create("a2", **common_kwargs) + ds_src_b = Dataset(ds_src.pathobj / "b").create(**common_kwargs) + ds_src_b2 = ds_src_b.create("b2", **common_kwargs) + ds_src_c = ds_src.create("c", **common_kwargs) + ds_src_c2 = ds_src_c.create("c2", **common_kwargs) + ds_src_d = Dataset(ds_src.pathobj / "d").create(**common_kwargs) + ds_src_d2 = ds_src_d.create("d2", **common_kwargs) - ds_src.save() + ds_src.save(**common_kwargs) add_pyscript_image(ds_src_a, "in-a", "img") add_pyscript_image(ds_src_a2, "in-a2", "img") @@ -333,9 +339,9 @@ def test_run_subdataset_install(path=None): add_pyscript_image(ds_src_c2, "in-c2", "img") add_pyscript_image(ds_src_d2, "in-d2", "img") - ds_src.save(recursive=True) + ds_src.save(recursive=True, **common_kwargs) - ds_dest = clone(ds_src.path, str(path / "dest")) + ds_dest = clone(ds_src.path, str(path / "dest"), **common_kwargs) ds_dest_a2 = Dataset(ds_dest.pathobj / "a" / "a2") ds_dest_b2 = Dataset(ds_dest.pathobj / "b" / "b2") ds_dest_c2 = Dataset(ds_dest.pathobj / "c" / "c2") @@ -346,7 +352,7 @@ def test_run_subdataset_install(path=None): assert_false(ds_dest_d2.is_installed()) # Needed subdatasets are installed if container name is given... - res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2") + res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2", **common_kwargs) assert_result_count( res, 1, action="install", status="ok", path=ds_dest_a2.path) assert_result_count( @@ -354,7 +360,7 @@ def test_run_subdataset_install(path=None): path=str(ds_dest_a2.pathobj / "img")) ok_(ds_dest_a2.is_installed()) # ... even if the name and path do not match. - res = ds_dest.containers_run(["arg"], container_name="b/b2/in-b2") + res = ds_dest.containers_run(["arg"], container_name="b/b2/in-b2", **common_kwargs) assert_result_count( res, 1, action="install", status="ok", path=ds_dest_b2.path) assert_result_count( @@ -362,15 +368,15 @@ def test_run_subdataset_install(path=None): path=str(ds_dest_b2.pathobj / "img")) ok_(ds_dest_b2.is_installed()) # Subdatasets will also be installed if given an image path... - res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img"))) + res = ds_dest.containers_run(["arg"], container_name=str(Path("c/c2/img")), **common_kwargs) assert_result_count( res, 1, action="install", status="ok", path=ds_dest_c2.path) assert_result_count( res, 1, action="get", status="ok", path=str(ds_dest_c2.pathobj / "img")) ok_(ds_dest_c2.is_installed()) - ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img"))) + ds_dest.containers_run(["arg"], container_name=str(Path("d/d2/img")), **common_kwargs) # There's no install record if subdataset is already present. - res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2") + res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2", **common_kwargs) assert_not_in_results(res, action="install") From 849975632aa60176cf451dd3862960d1a7238810 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 10 Oct 2023 10:00:57 +0200 Subject: [PATCH 06/10] RF `containers_add` to use `get_container_configuration()` This is the last change for establishing `get_container_configuration()` as the single code piece for reading container documentation. This opens the door for implementing on-read normalization of configuration items, such as the image path. Closes #238 --- datalad_container/containers_add.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/datalad_container/containers_add.py b/datalad_container/containers_add.py index 3840e05a..608407d7 100644 --- a/datalad_container/containers_add.py +++ b/datalad_container/containers_add.py @@ -22,6 +22,7 @@ from datalad.support.exceptions import InsufficientArgumentsError from datalad.interface.results import get_status_dict +from .utils import get_container_configuration from .definitions import definitions lgr = logging.getLogger("datalad.containers.containers_add") @@ -205,8 +206,8 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, "Container names can only contain alphanumeric characters " "and '-', got: '{}'".format(name)) - cfgbasevar = "datalad.containers.{}".format(name) - if cfgbasevar + ".image" in ds.config: + container_cfg = get_container_configuration(ds, name) + if 'image' in container_cfg: if not update: yield get_status_dict( action="containers_add", ds=ds, logger=lgr, @@ -219,7 +220,7 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, if not (url or image or call_fmt): # No updated values were provided. See if an update url is # configured (currently relevant only for Singularity Hub). - url = ds.config.get(cfgbasevar + ".updateurl") + url = container_cfg.get("updateurl") if not url: yield get_status_dict( action="containers_add", ds=ds, logger=lgr, @@ -227,8 +228,8 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, message="No values to update specified") return - call_fmt = call_fmt or ds.config.get(cfgbasevar + ".cmdexec") - image = image or ds.config.get(cfgbasevar + ".image") + call_fmt = call_fmt or container_cfg.get("cmdexec") + image = image or container_cfg.get("image") if not image: loc_cfg_var = "datalad.containers.location" @@ -329,6 +330,7 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, return # store configs + cfgbasevar = "datalad.containers.{}".format(name) if imgurl != url: # store originally given URL, as it resolves to something # different and maybe can be used to update the container From 8c6dba590d30d50372e081c7e52b95f639dea8e5 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 10 Oct 2023 10:22:02 +0200 Subject: [PATCH 07/10] Minor RF of `get_container_configuration()` Possible a little less complex to understand. --- datalad_container/utils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/datalad_container/utils.py b/datalad_container/utils.py index b39dfbef..0a524c3a 100644 --- a/datalad_container/utils.py +++ b/datalad_container/utils.py @@ -47,20 +47,24 @@ def get_container_configuration( If not (matching) container configuration exists, and empty dictionary is returned. """ - # all info is in the dataset config! var_prefix = 'datalad.containers.' + containers = {} + # all info is in the dataset config! for var, value in ds.config.items(): if not var.startswith(var_prefix): # not an interesting variable continue - var_comps = var[len(var_prefix):].split('.') - cname = var_comps[0] + var_comps = var.split('.') + # container name is the 3rd after 'datalad'.'container'. + cname = var_comps[2] if name and name != cname: # we are looking for a specific container's configuration # and this is not it continue - ccfgname = '.'.join(var_comps[1:]) + # reconstruct config item name, anything after + # datalad.containers.. + ccfgname = '.'.join(var_comps[3:]) if not ccfgname: continue From 85de78805bae239f4d96ae2b9b981e0038f2ecfc Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 10 Oct 2023 10:56:53 +0200 Subject: [PATCH 08/10] Unconditionally store image path in POSIX notation in configuration This establishes a fixed format for the configuration. Previously, the path was stored in platform conventions without any indication of the nature of that platform. This turned platform detection into guesswork. Some of that guesswork is now implemented in a path normalization helper (`_normalize_image_path()`). it is executed on-read, in order to keep older dataset configuration functional for the cases - we can tell the platform conventions, because the image is placed under `.datalad\\environments\\` (the default) - we can locate a matching element on the file system Notably, this guesswork does not cover the case of an image that was configured on windows, and is hosted in a currently uninstalled subdataset. Closes #224 --- datalad_container/containers_add.py | 8 ++- datalad_container/tests/test_containers.py | 7 ++ datalad_container/tests/test_run.py | 45 +++++++++++++ datalad_container/utils.py | 75 ++++++++++++++++++++++ 4 files changed, 133 insertions(+), 2 deletions(-) diff --git a/datalad_container/containers_add.py b/datalad_container/containers_add.py index 608407d7..024a6d40 100644 --- a/datalad_container/containers_add.py +++ b/datalad_container/containers_add.py @@ -6,8 +6,11 @@ import logging import os import os.path as op +from pathlib import ( + Path, + PurePosixPath, +) import re -import sys from shutil import copyfile from datalad.cmd import WitlessRunner @@ -339,7 +342,8 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, # force store the image, and prevent multiple entries ds.config.set( "{}.image".format(cfgbasevar), - op.relpath(image, start=ds.path), + # always store a POSIX path, relative to dataset root + str(PurePosixPath(Path(image).relative_to(ds.pathobj))), force=True) if call_fmt: ds.config.set( diff --git a/datalad_container/tests/test_containers.py b/datalad_container/tests/test_containers.py index b08601ce..f59cec68 100644 --- a/datalad_container/tests/test_containers.py +++ b/datalad_container/tests/test_containers.py @@ -70,6 +70,10 @@ def test_add_local_path(path=None, local_file=None): foo_target = op.join(path, ".datalad", "environments", "foobert", "image") assert_result_count(res, 1, status="ok", type="file", path=foo_target, action="containers_add") + # the image path configuration is always in POSIX format + assert ds.config.get('datalad.containers.foobert.image') \ + == '.datalad/environments/foobert/image' + # We've just copied and added the file. assert_not_in(ds.repo.WEB_UUID, ds.repo.whereis(foo_target)) @@ -219,6 +223,9 @@ def test_container_update(ds_path=None, local_file=None, url=None): assert_in_results(res, action="remove", status="ok") assert_in_results(res, action="save", status="ok") ok_file_has_content(op.join(ds.path, img), "bar") + # the image path configuration is (still) always in POSIX format + assert ds.config.get('datalad.containers.foo.image') \ + == '.datalad/environments/foo/image' # Test commit message # In the above case it was updating existing image so should have "Update " diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py index d9d4c5c7..83403c82 100644 --- a/datalad_container/tests/test_run.py +++ b/datalad_container/tests/test_run.py @@ -380,3 +380,48 @@ def test_run_subdataset_install(path=None): # There's no install record if subdataset is already present. res = ds_dest.containers_run(["arg"], container_name="a/a2/in-a2", **common_kwargs) assert_not_in_results(res, action="install") + + +@skip_if_no_network +def test_nonPOSIX_imagepath(tmp_path): + ds = Dataset(tmp_path).create(**common_kwargs) + + # plug in a proper singularity image + ds.containers_add( + 'mycontainer', + url=testimg_url, + **common_kwargs + ) + assert_result_count( + ds.containers_list(**common_kwargs), 1, + # we get a report in platform conventions + path=str(ds.pathobj / '.datalad' / 'environments' / + 'mycontainer' / 'image'), + name='mycontainer') + assert_repo_status(tmp_path) + + # now reconfigure the image path to look as if a version <= 1.2.4 + # configured it on windows + # this must still run across all platforms + ds.config.set( + 'datalad.containers.mycontainer.image', + '.datalad\\environments\\mycontainer\\image', + scope='branch', + reload=True, + ) + ds.save(**common_kwargs) + assert_repo_status(tmp_path) + + assert_result_count( + ds.containers_list(**common_kwargs), 1, + # we still get a report in platform conventions + path=str(ds.pathobj / '.datalad' / 'environments' / + 'mycontainer' / 'image'), + name='mycontainer') + + res = ds.containers_run(['ls'], **common_kwargs) + assert_result_count( + res, 1, + action='run', + status='ok', + ) diff --git a/datalad_container/utils.py b/datalad_container/utils.py index 0a524c3a..a13266c7 100644 --- a/datalad_container/utils.py +++ b/datalad_container/utils.py @@ -2,6 +2,15 @@ from __future__ import annotations +# the pathlib equivalent is only available in PY3.12 +from os.path import lexists + +from pathlib import ( + PurePath, + PurePosixPath, + PureWindowsPath, +) + from datalad.distribution.dataset import Dataset from datalad.support.external_versions import external_versions @@ -68,9 +77,75 @@ def get_container_configuration( if not ccfgname: continue + if ccfgname == 'image': + # run image path normalization to get a relative path + # in platform conventions, regardless of the input. + # for now we report a str, because the rest of the code + # is not using pathlib + value = str(_normalize_image_path(value, ds)) + cinfo = containers.get(cname, {}) cinfo[ccfgname] = value containers[cname] = cinfo return containers if name is None else containers.get(name, {}) + + +def _normalize_image_path(path: str, ds: Dataset) -> str: + """Helper to standardize container image path handling + + Previously, container configuration would contain platform-paths + for container image location (e.g., windows paths when added on + windows, POSIX paths elsewhere). This made cross-platform reuse + impossible out-of-the box, but it also means that such dataset + are out there in unknown numbers. + + This helper inspects an image path READ FROM CONFIG(!) and ensures + that is matches platform conventions (because all other arguments) + also come in platform conventions. This enables standardizing + the storage conventions to be POSIX-only (for the future). + + Parameters + ---------- + path: str + A str-path, as read from the configuration, matching its conventions + (relative path, pointing to a container image relative to the + dataset's root. + ds: Dataset + This dataset's base path is used as a reference for resolving + the relative image path to an absolute location on the file system. + + Returns + ------- + PurePath + Relative path in platform conventions + """ + # we only need to act differently, when an incoming path is + # windows. This is not possible to say with 100% confidence, + # because a POSIX path can also contain a backslash. We support + # a few standard cases where we CAN tell + pathobj = None + if '\\' not in path: + # no windows pathsep, no problem + pathobj = PurePosixPath(path) + elif path.startswith('.datalad\\environments\\'): + # this is the default location setup in windows conventions + pathobj = PureWindowsPath(path) + else: + # let's assume it is windows for a moment + if lexists(str(ds.pathobj / PureWindowsPath(path))): + # if there is something on the filesystem for this path, + # we can be reasonably sure that this is indeed a windows + # path. This won't catch images in uninstalled subdataset, + # but better than nothing + pathobj = PureWindowsPath(path) + else: + # if we get here, we have no idea, and no means to verify + # further hypotheses -- go with the POSIX assumption + # and hope for the best + pathobj = PurePosixPath(path) + + assert pathobj is not None + # we report in platform-conventions + return PurePath(pathobj) From 6435a34d8a2b596005e822827a172ecbd850b7f1 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 11 Oct 2023 21:59:24 -0400 Subject: [PATCH 09/10] Minor obvious typo and type annotation fixes --- datalad_container/utils.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/datalad_container/utils.py b/datalad_container/utils.py index a13266c7..b874ae4a 100644 --- a/datalad_container/utils.py +++ b/datalad_container/utils.py @@ -92,7 +92,7 @@ def get_container_configuration( return containers if name is None else containers.get(name, {}) -def _normalize_image_path(path: str, ds: Dataset) -> str: +def _normalize_image_path(path: str, ds: Dataset) -> PurePath: """Helper to standardize container image path handling Previously, container configuration would contain platform-paths @@ -102,7 +102,7 @@ def _normalize_image_path(path: str, ds: Dataset) -> str: are out there in unknown numbers. This helper inspects an image path READ FROM CONFIG(!) and ensures - that is matches platform conventions (because all other arguments) + that it matches platform conventions (because all other arguments) also come in platform conventions. This enables standardizing the storage conventions to be POSIX-only (for the future). @@ -111,7 +111,7 @@ def _normalize_image_path(path: str, ds: Dataset) -> str: path: str A str-path, as read from the configuration, matching its conventions (relative path, pointing to a container image relative to the - dataset's root. + dataset's root). ds: Dataset This dataset's base path is used as a reference for resolving the relative image path to an absolute location on the file system. From cd49d66a03b2e4b6d25431afb3cfe2ce6dec0372 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 12 Oct 2023 07:46:25 +0200 Subject: [PATCH 10/10] Fix conditional to actually detect the target scenario The faulty path delimiter is a `\\`, not just a single backlash. Co-authored-by: Yaroslav Halchenko --- datalad_container/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datalad_container/utils.py b/datalad_container/utils.py index b874ae4a..260db2ce 100644 --- a/datalad_container/utils.py +++ b/datalad_container/utils.py @@ -129,7 +129,7 @@ def _normalize_image_path(path: str, ds: Dataset) -> PurePath: if '\\' not in path: # no windows pathsep, no problem pathobj = PurePosixPath(path) - elif path.startswith('.datalad\\environments\\'): + elif path.startswith(r'.datalad\\environments\\'): # this is the default location setup in windows conventions pathobj = PureWindowsPath(path) else: