Skip to content

Commit

Permalink
Unconditionally store image path in POSIX notation in configuration
Browse files Browse the repository at this point in the history
This establishes a fixed format for the configuration. Previously,
the path was stored in platform conventions without any indication
of the nature of that platform. This turned platform detection into
guesswork.

Some of that guesswork is now implemented in a path normalization
helper (`_normalize_image_path()`). it is executed on-read, in order
to keep older dataset configuration functional for the cases

- we can tell the platform conventions, because the image is placed
  under `.datalad\\environments\\` (the default)
- we can locate a matching element on the file system

Notably, this guesswork does not cover the case of an image that
was configured on windows, and is hosted in a currently uninstalled
subdataset.

Closes datalad#224
  • Loading branch information
mih committed Oct 10, 2023
1 parent 8c6dba5 commit 30abc08
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 2 deletions.
8 changes: 6 additions & 2 deletions datalad_container/containers_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,11 @@
import logging
import os
import os.path as op
from pathlib import (
Path,
PurePosixPath,
)
import re
import sys
from shutil import copyfile

from datalad.cmd import WitlessRunner
Expand Down Expand Up @@ -339,7 +342,8 @@ def __call__(name, url=None, dataset=None, call_fmt=None, image=None,
# force store the image, and prevent multiple entries
ds.config.set(
"{}.image".format(cfgbasevar),
op.relpath(image, start=ds.path),
# always store a POSIX path, relative to dataset root
str(PurePosixPath(Path(image).relative_to(ds.pathobj))),
force=True)
if call_fmt:
ds.config.set(
Expand Down
7 changes: 7 additions & 0 deletions datalad_container/tests/test_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def test_add_local_path(path=None, local_file=None):
foo_target = op.join(path, ".datalad", "environments", "foobert", "image")
assert_result_count(res, 1, status="ok", type="file", path=foo_target,
action="containers_add")
# the image path configuration is always in POSIX format
assert ds.config.get('datalad.containers.foobert.image') \
== '.datalad/environments/foobert/image'

# We've just copied and added the file.
assert_not_in(ds.repo.WEB_UUID, ds.repo.whereis(foo_target))

Expand Down Expand Up @@ -219,6 +223,9 @@ def test_container_update(ds_path=None, local_file=None, url=None):
assert_in_results(res, action="remove", status="ok")
assert_in_results(res, action="save", status="ok")
ok_file_has_content(op.join(ds.path, img), "bar")
# the image path configuration is (still) always in POSIX format
assert ds.config.get('datalad.containers.foo.image') \
== '.datalad/environments/foo/image'

# Test commit message
# In the above case it was updating existing image so should have "Update "
Expand Down
75 changes: 75 additions & 0 deletions datalad_container/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,15 @@

from __future__ import annotations

# the pathlib equivalent is only available in PY3.12
from os.path import lexists

from pathlib import (
PurePath,
PurePosixPath,
PureWindowsPath,
)

from datalad.distribution.dataset import Dataset
from datalad.support.external_versions import external_versions

Expand Down Expand Up @@ -68,9 +77,75 @@ def get_container_configuration(
if not ccfgname:
continue

if ccfgname == 'image':
# run image path normalization to get a relative path
# in platform conventions, regardless of the input.
# for now we report a str, because the rest of the code
# is not using pathlib
value = str(_normalize_image_path(value, ds))

cinfo = containers.get(cname, {})
cinfo[ccfgname] = value

containers[cname] = cinfo

return containers if name is None else containers.get(name, {})


def _normalize_image_path(path: str, ds: Dataset) -> str:
"""Helper to standardize container image path handling
Previously, container configuration would contain platform-paths
for container image location (e.g., windows paths when added on
windows, POSIX paths elsewhere). This made cross-platform reuse
impossible out-of-the box, but it also means that such dataset
are out there in unknown numbers.
This helper inspects an image path READ FROM CONFIG(!) and ensures
that is matches platform conventions (because all other arguments)
also come in platform conventions. This enables standardizing
the storage conventions to be POSIX-only (for the future).
Parameters
----------
path: str
A str-path, as read from the configuration, matching its conventions
(relative path, pointing to a container image relative to the
dataset's root.
ds: Dataset
This dataset's base path is used as a reference for resolving
the relative image path to an absolute location on the file system.
Returns
-------
PurePath
Relative path in platform conventions
"""
# we only need to act differently, when an incoming path is
# windows. This is not possible to say with 100% confidence,
# because a POSIX path can also contain a backslash. We support
# a few standard cases where we CAN tell
pathobj = None
if '\\' not in path:
# no windows pathsep, no problem
pathobj = PurePosixPath(path)
elif path.startswith('.datalad\\environments\\'):
# this is the default location setup in windows conventions
pathobj = PureWindowsPath(path)
else:
# let's assume it is windows for a moment
if lexists(str(ds.pathobj / PureWindowsPath(path))):
# if there is something on the filesystem for this path,
# we can be reasonably sure that this is indeed a windows
# path. This won't catch images in uninstalled subdataset,
# but better than nothing
pathobj = PureWindowsPath(path)
else:
# if we get here, we have no idea, and no means to verify
# further hypotheses -- go with the POSIX assumption
# and hope for the best
pathobj = PurePosixPath(path)

assert pathobj is not None
# we report in platform-conventions
return PurePath(pathobj)

0 comments on commit 30abc08

Please sign in to comment.