From 30abc086a3121a36df7dacb541147114fc76c1ce Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 10 Oct 2023 10:56:53 +0200 Subject: [PATCH] Unconditionally store image path in POSIX notation in configuration This establishes a fixed format for the configuration. Previously, the path was stored in platform conventions without any indication of the nature of that platform. This turned platform detection into guesswork. Some of that guesswork is now implemented in a path normalization helper (`_normalize_image_path()`). it is executed on-read, in order to keep older dataset configuration functional for the cases - we can tell the platform conventions, because the image is placed under `.datalad\\environments\\` (the default) - we can locate a matching element on the file system Notably, this guesswork does not cover the case of an image that was configured on windows, and is hosted in a currently uninstalled subdataset. Closes #224 --- datalad_container/containers_add.py | 8 ++- datalad_container/tests/test_containers.py | 7 ++ datalad_container/utils.py | 75 ++++++++++++++++++++++ 3 files changed, 88 insertions(+), 2 deletions(-) diff --git a/datalad_container/containers_add.py b/datalad_container/containers_add.py index 608407d7..024a6d40 100644 --- a/datalad_container/containers_add.py +++ b/datalad_container/containers_add.py @@ -6,8 +6,11 @@ import logging import os import os.path as op +from pathlib import ( + Path, + PurePosixPath, +) import re -import sys from shutil import copyfile from datalad.cmd import WitlessRunner @@ -339,7 +342,8 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None, # force store the image, and prevent multiple entries ds.config.set( "{}.image".format(cfgbasevar), - op.relpath(image, start=ds.path), + # always store a POSIX path, relative to dataset root + str(PurePosixPath(Path(image).relative_to(ds.pathobj))), force=True) if call_fmt: ds.config.set( diff --git a/datalad_container/tests/test_containers.py b/datalad_container/tests/test_containers.py index b08601ce..f59cec68 100644 --- a/datalad_container/tests/test_containers.py +++ b/datalad_container/tests/test_containers.py @@ -70,6 +70,10 @@ def test_add_local_path(path=None, local_file=None): foo_target = op.join(path, ".datalad", "environments", "foobert", "image") assert_result_count(res, 1, status="ok", type="file", path=foo_target, action="containers_add") + # the image path configuration is always in POSIX format + assert ds.config.get('datalad.containers.foobert.image') \ + == '.datalad/environments/foobert/image' + # We've just copied and added the file. assert_not_in(ds.repo.WEB_UUID, ds.repo.whereis(foo_target)) @@ -219,6 +223,9 @@ def test_container_update(ds_path=None, local_file=None, url=None): assert_in_results(res, action="remove", status="ok") assert_in_results(res, action="save", status="ok") ok_file_has_content(op.join(ds.path, img), "bar") + # the image path configuration is (still) always in POSIX format + assert ds.config.get('datalad.containers.foo.image') \ + == '.datalad/environments/foo/image' # Test commit message # In the above case it was updating existing image so should have "Update " diff --git a/datalad_container/utils.py b/datalad_container/utils.py index 0a524c3a..a13266c7 100644 --- a/datalad_container/utils.py +++ b/datalad_container/utils.py @@ -2,6 +2,15 @@ from __future__ import annotations +# the pathlib equivalent is only available in PY3.12 +from os.path import lexists + +from pathlib import ( + PurePath, + PurePosixPath, + PureWindowsPath, +) + from datalad.distribution.dataset import Dataset from datalad.support.external_versions import external_versions @@ -68,9 +77,75 @@ def get_container_configuration( if not ccfgname: continue + if ccfgname == 'image': + # run image path normalization to get a relative path + # in platform conventions, regardless of the input. + # for now we report a str, because the rest of the code + # is not using pathlib + value = str(_normalize_image_path(value, ds)) + cinfo = containers.get(cname, {}) cinfo[ccfgname] = value containers[cname] = cinfo return containers if name is None else containers.get(name, {}) + + +def _normalize_image_path(path: str, ds: Dataset) -> str: + """Helper to standardize container image path handling + + Previously, container configuration would contain platform-paths + for container image location (e.g., windows paths when added on + windows, POSIX paths elsewhere). This made cross-platform reuse + impossible out-of-the box, but it also means that such dataset + are out there in unknown numbers. + + This helper inspects an image path READ FROM CONFIG(!) and ensures + that is matches platform conventions (because all other arguments) + also come in platform conventions. This enables standardizing + the storage conventions to be POSIX-only (for the future). + + Parameters + ---------- + path: str + A str-path, as read from the configuration, matching its conventions + (relative path, pointing to a container image relative to the + dataset's root. + ds: Dataset + This dataset's base path is used as a reference for resolving + the relative image path to an absolute location on the file system. + + Returns + ------- + PurePath + Relative path in platform conventions + """ + # we only need to act differently, when an incoming path is + # windows. This is not possible to say with 100% confidence, + # because a POSIX path can also contain a backslash. We support + # a few standard cases where we CAN tell + pathobj = None + if '\\' not in path: + # no windows pathsep, no problem + pathobj = PurePosixPath(path) + elif path.startswith('.datalad\\environments\\'): + # this is the default location setup in windows conventions + pathobj = PureWindowsPath(path) + else: + # let's assume it is windows for a moment + if lexists(str(ds.pathobj / PureWindowsPath(path))): + # if there is something on the filesystem for this path, + # we can be reasonably sure that this is indeed a windows + # path. This won't catch images in uninstalled subdataset, + # but better than nothing + pathobj = PureWindowsPath(path) + else: + # if we get here, we have no idea, and no means to verify + # further hypotheses -- go with the POSIX assumption + # and hope for the best + pathobj = PurePosixPath(path) + + assert pathobj is not None + # we report in platform-conventions + return PurePath(pathobj)