From 00994cd293fc24a12bb03f09e6b99405ab029fc0 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Wed, 29 May 2024 05:18:37 +0200 Subject: [PATCH 1/4] test(ria/ora): blacklist datalad-core tests This commit blacklists the tests in datalad core that would fail with the patched ria/ora code. It also fixes a comment in the appveyor definition (type and misleading info) --- .appveyor.yml | 26 +++++++++++++++++-- .../test_patched_ria_ora.py} | 0 2 files changed, 24 insertions(+), 2 deletions(-) rename datalad_next/patches/{fix_ria_ora_tests.py => tests/test_patched_ria_ora.py} (100%) diff --git a/.appveyor.yml b/.appveyor.yml index 42e97b15..bacbf455 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -162,7 +162,29 @@ environment: # the one in test_create_sibling_gitlab.py. However, there is one with # identical name in test_create_sibling_ghlike.py, now also disabled # because MIH does not know better - KEYWORDS: not test_gh1811 and not test_fake_gitlab and not test_dryrun + KEYWORDS: > + not test_gh1811 + and not test_fake_gitlab + and not test_dryrun + and not test_initremote + and not test_push_url + and not test_read_access + and not test_ria_postclone_noannex + and not test_version_check_ssh + and not test_version_check + and not test_obtain_permission_ssh + and not test_obtain_permission_root + and not test_remote_layout + and not test_remote_layout_ssh + and not test_setup_store + and not test_setup_ds_in_store + and not test_initremote_basic_fileurl + and not test_initremote_basic_httpurl + and not test_initremote_basic_httpsurl + and not test_initremote_basic_sshurl + and not test_gitannex_local + and not test_gitannex_ssh + and not test_ria_postclonecfg APPVEYOR_BUILD_WORKER_IMAGE: Ubuntu2004 PY: 3.8 INSTALL_SYSPKGS: @@ -369,7 +391,7 @@ for: # run tests on installed module, not source tree files - cmd: md __testhome__ - cmd: cd __testhome__ - # run test selecion (--traverse-namespace needed from Python 3.8 onwards) + # run test selection - cmd: python -m pytest -s -v --durations 20 -m "not (turtle)" -k "%KEYWORDS%" --cov=datalad_next --cov-config=..\.coveragerc --pyargs %DTS% after_test: diff --git a/datalad_next/patches/fix_ria_ora_tests.py b/datalad_next/patches/tests/test_patched_ria_ora.py similarity index 100% rename from datalad_next/patches/fix_ria_ora_tests.py rename to datalad_next/patches/tests/test_patched_ria_ora.py From 1a32fc58379d537a863678311c3ed9d2c5e2e999 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Wed, 29 May 2024 05:20:08 +0200 Subject: [PATCH 2/4] test(ria/ora): remove patching of tests This commit removes the patching of datalad-core tests for ria/ora code --- datalad_next/patches/patch_ria_ora.py | 10 ---------- docs/source/patches.rst | 1 - 2 files changed, 11 deletions(-) diff --git a/datalad_next/patches/patch_ria_ora.py b/datalad_next/patches/patch_ria_ora.py index 78428bf0..f33b11c8 100644 --- a/datalad_next/patches/patch_ria_ora.py +++ b/datalad_next/patches/patch_ria_ora.py @@ -7,7 +7,6 @@ class :class:`SSHRemoteIO`. 2. Improve ORA/RIA-related code so that it also works on Windows. """ -from os import environ from . import ( add_method_url2transport_path, @@ -16,15 +15,6 @@ class :class:`SSHRemoteIO`. # The following patches add Windows-support to ORA/RIA code ria_utils, replace_ora_remote, -) - -# we only want to import the patches for the tests when actually running -# under pytest. this prevents inflating the runtime dependency with -# test-only dependencies -- which would be needed for the necessary imports -if environ.get("PYTEST_VERSION"): - from . import fix_ria_ora_tests - -from . import ( # `replace_create_sibling_ria` be imported after `replace_sshremoteio` # and `ria_utils`. replace_create_sibling_ria, diff --git a/docs/source/patches.rst b/docs/source/patches.rst index b12d2fc8..58a77918 100644 --- a/docs/source/patches.rst +++ b/docs/source/patches.rst @@ -18,7 +18,6 @@ Patches that are automatically applied to DataLad when loading the create_sibling_gitlab customremotes_main distribution_dataset - fix_ria_ora_tests interface_utils patch_ria_ora push_optimize From 78e29b5df843225f4906eaaeb0e3f9d1ba5c1726 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Mon, 3 Jun 2024 18:25:33 +0200 Subject: [PATCH 3/4] fix(patch): fix create_sibling_ria patching This commit ensures that `datalad.distribution.datalad.Datalad.create_sibling_ria` is pointing to the patched `CreateSiblingRia` class, i.e. to `datalad_next.patches.replace_create_sibling_ria.CreateSiblingRia.__call__`. --- datalad_next/patches/enabled.py | 2 +- .../patches/replace_create_sibling_ria.py | 19 +++++++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/datalad_next/patches/enabled.py b/datalad_next/patches/enabled.py index de7157cf..fbcf3e56 100644 --- a/datalad_next/patches/enabled.py +++ b/datalad_next/patches/enabled.py @@ -12,10 +12,10 @@ test_keyring, customremotes_main, create_sibling_gitlab, + patch_ria_ora, run, update, # the following two patches have been taken verbatim from datalad-ria ssh_exec, sshconnector, - patch_ria_ora, ) diff --git a/datalad_next/patches/replace_create_sibling_ria.py b/datalad_next/patches/replace_create_sibling_ria.py index a1ca48b4..16f822bd 100644 --- a/datalad_next/patches/replace_create_sibling_ria.py +++ b/datalad_next/patches/replace_create_sibling_ria.py @@ -817,9 +817,28 @@ def _create_sibling_ria( ) +# Replace `create_sibling_ria`-method in `Dataset`. This is necessary on +# Windows. I don't know why. +def _adaptor_create_sibling_ria(self, *args, **kwargs): + return CreateSiblingRia.__call__( + *args, + **{ + **kwargs, + 'dataset': self, + } + ) +apply_patch( + 'datalad.distribution.dataset', + 'Dataset', + 'create_sibling_ria', + _adaptor_create_sibling_ria, +) + + class UnknownLayoutVersion(Exception): pass + known_versions_objt = ['1', '2'] # Dataset tree versions we introduced so far. This is about the layout of # datasets in a RIA store From 571c7efc0654b7a142de78e64dedd4eb84b88489 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Wed, 29 May 2024 05:20:59 +0200 Subject: [PATCH 4/4] test(ria/ora): add datalad-next ria/ora tests This commit adds versions of the blacklisted texts that are compatible with the patched ria/ora code. --- .appveyor.yml | 1 + .../patches/tests/test_patched_ria_ora.py | 870 ++++++++++++------ datalad_next/patches/tests/test_push.py | 146 ++- 3 files changed, 705 insertions(+), 312 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index bacbf455..e7e3e77b 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -164,6 +164,7 @@ environment: # because MIH does not know better KEYWORDS: > not test_gh1811 + and not test_nested_pushclone_cycle_allplatforms and not test_fake_gitlab and not test_dryrun and not test_initremote diff --git a/datalad_next/patches/tests/test_patched_ria_ora.py b/datalad_next/patches/tests/test_patched_ria_ora.py index 7d7bf818..a62cad0b 100644 --- a/datalad_next/patches/tests/test_patched_ria_ora.py +++ b/datalad_next/patches/tests/test_patched_ria_ora.py @@ -1,4 +1,6 @@ -"""Patch ria-, ora-, ria_utils-, and clone-tests to work with modified ria_utils +"""ria-, ora-, ria_utils-, and clone-tests for patched ria/ora code + +This are patched tests that work with the patched ria/ora code. The ria-utils-patches use an abstract path representation for RIA-store elements. This patch adapts the tests that use `ria_utils.create_store` and @@ -7,12 +9,16 @@ from __future__ import annotations import logging +import os.path as op +import random import shutil import stat +import string from pathlib import ( Path, PurePosixPath, ) +from urllib.parse import urlparse from urllib.request import pathname2url from datalad.api import ( @@ -45,37 +51,51 @@ ) from datalad.support.network import get_local_file_url from datalad.tests.utils_pytest import ( + DEFAULT_REMOTE, SkipTest, assert_equal, assert_false, - assert_in, assert_not_in, - assert_raises, + assert_not_is_instance, assert_repo_status, assert_result_count, assert_status, assert_true, create_tree, has_symlink_capability, - known_failure_githubci_win, known_failure_windows, + ok_, rmtree, serve_path_via_http, skip_if_adjusted_branch, - swallow_logs, + skip_if_root, + skip_ssh, + slow, + turtle, with_tempfile, ) -from . import apply_patch +from datalad_next.tests import ( + assert_in, + assert_raises, + eq_, + swallow_logs, +) -def local_path2pure_posix_path(path: Path | str): +def _local_path2pure_posix_path(path: Path | str): return PurePosixPath(pathname2url(str(path))) +def _random_name(prefix: str = '') -> str: + return prefix + ''.join( + random.choices(string.ascii_letters + string.digits, k=8) + ) + + # taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 @with_tempfile -def patched__postclonetest_prepare(lcl, storepath, storepath2, link): +def _postclonetest_prepare(lcl, storepath, storepath2, link): from datalad.customremotes.ria_utils import ( create_ds_in_store, @@ -100,8 +120,8 @@ def patched__postclonetest_prepare(lcl, storepath, storepath2, link): storepath2 = Path(storepath2) # PATCH: introduce `ppp_storepath` and `ppp_storepath2` and use them instead # of `storepath` and `storepath2`. - ppp_storepath = local_path2pure_posix_path(storepath) - ppp_storepath2 = local_path2pure_posix_path(storepath2) + ppp_storepath = _local_path2pure_posix_path(storepath) + ppp_storepath2 = _local_path2pure_posix_path(storepath2) link = Path(link) link.symlink_to(storepath) @@ -129,7 +149,7 @@ def patched__postclonetest_prepare(lcl, storepath, storepath2, link): create_ds_in_store(io, ppp_storepath2, d.id, '2', '1') d.create_sibling_ria(url2, "anotherstore", new_store_ok=True) d.push('.', to='anotherstore', data='nothing') - store2_loc, _, _ = get_layout_locations(1, ppp_storepath2, d.id) + store2_loc, _, _ = get_layout_locations(1, storepath2, d.id) Runner(cwd=str(store2_loc)).run(['git', 'update-server-info']) # Now the store to clone from: @@ -158,7 +178,7 @@ def patched__postclonetest_prepare(lcl, storepath, storepath2, link): 'store-storage', 'autoenable=false']) d.push('.', to='store') - store_loc, _, _ = get_layout_locations(1, ppp_storepath, d.id) + store_loc, _, _ = get_layout_locations(1, storepath, d.id) Runner(cwd=str(store_loc)).run(['git', 'update-server-info']) link.unlink() @@ -170,22 +190,11 @@ def patched__postclonetest_prepare(lcl, storepath, storepath2, link): return ds.id -# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 -@known_failure_githubci_win # in datalad/git-annex as e.g. of 20201218 -@with_tempfile(mkdir=True) -@with_tempfile -@with_tempfile -def patched_test_ria_postclone_noannex(dspath=None, storepath=None, clonepath=None): - +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 and patched +# and refactored +def test_ria_postclone_noannex(existing_dataset, tmp_path): # Test for gh-5186: Cloning from local FS, shouldn't lead to annex # initializing origin. - - dspath = Path(dspath) - storepath = Path(storepath) - clonepath = Path(clonepath) - # PATCH: introduce `ppp_storepath` and use it instead of `storepath`. - ppp_storepath = local_path2pure_posix_path(storepath) - from datalad.customremotes.ria_utils import ( create_ds_in_store, create_store, @@ -193,133 +202,136 @@ def patched_test_ria_postclone_noannex(dspath=None, storepath=None, clonepath=No ) from datalad.distributed.ora_remote import LocalIO - # First create a dataset in a RIA store the standard way - somefile = dspath / 'a_file.txt' - somefile.write_text('irrelevant') - ds = Dataset(dspath).create(force=True) + ds = existing_dataset + some_file = ds.pathobj / 'a_file.txt' + some_file.write_text('irrelevant') + + ria_store_path = tmp_path / 'ria_store' + url_ria_store_path = _local_path2pure_posix_path(ria_store_path) io = LocalIO() - create_store(io, ppp_storepath, '1') - lcl_url = "ria+{}".format(get_local_file_url(str(storepath))) - create_ds_in_store(io, ppp_storepath, ds.id, '2', '1') + create_store(io, url_ria_store_path, '1') + lcl_url = "ria+{}".format(get_local_file_url(str(ria_store_path))) + create_ds_in_store(io, url_ria_store_path, ds.id, '2', '1') ds.create_sibling_ria(lcl_url, "store", new_store_ok=True) ds.push('.', to='store') - - # now, remove annex/ tree from store in order to see, that clone - # doesn't cause annex to recreate it. - store_loc, _, _ = get_layout_locations(1, storepath, ds.id) + # Remove annex-tree from store to check that clone doesn't cause annex to + # recreate it. + store_loc = get_layout_locations(1, ria_store_path, ds.id)[0] annex = store_loc / 'annex' rmtree(str(annex)) assert_false(annex.exists()) - clone_url = get_local_file_url(str(storepath), compatibility='git') + \ - '#{}'.format(ds.id) - clone("ria+{}".format(clone_url), clonepath) - - # no need to test the cloning itself - we do that over and over in here - - # bare repo in store still has no local annex: + clone( + f"ria+{get_local_file_url(str(ria_store_path), compatibility='git')}#{ds.id}", + tmp_path / 'cloned_ds' + ) assert_false(annex.exists()) # taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 -@with_tempfile -def patched_test_setup_store(io_cls, io_args, store=None): - io = io_cls(*io_args) - store = Path(store) - # PATCH: introduce `ppp_store` and use it instead of `store` - ppp_store = local_path2pure_posix_path(store) - version_file = store / 'ria-layout-version' - error_logs = store / 'error_logs' +def _test_setup_store(io, store_url, local_store_path): + + store_url_path = PurePosixPath(urlparse(store_url).path) # invalid version raises: - assert_raises(UnknownLayoutVersion, create_store, io, ppp_store, '2') + assert_raises( + UnknownLayoutVersion, + create_store, io, store_url_path, '2') # non-existing path should work: - create_store(io, ppp_store, '1') + create_store(io, store_url_path, '1') + + version_file = local_store_path / 'ria-layout-version' + error_logs = local_store_path / 'error_logs' assert_true(version_file.exists()) assert_true(error_logs.exists()) assert_true(error_logs.is_dir()) assert_equal([f for f in error_logs.iterdir()], []) # empty target directory should work as well: - rmtree(str(store)) - store.mkdir(exist_ok=False) - create_store(io, ppp_store, '1') + rmtree(str(local_store_path)) + local_store_path.mkdir(exist_ok=False) + create_store(io, store_url_path, '1') assert_true(version_file.exists()) assert_true(error_logs.exists()) assert_true(error_logs.is_dir()) assert_equal([f for f in error_logs.iterdir()], []) # re-execution also fine: - create_store(io, ppp_store, '1') + create_store(io, store_url_path, '1') # but version conflict with existing target isn't: version_file.write_text("2|unknownflags\n") - assert_raises(ValueError, create_store, io, ppp_store, '1') + assert_raises(ValueError, create_store, io, store_url_path, '1') # TODO: check output reporting conflicting version "2" # taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 -@with_tempfile -def patched_test_setup_ds_in_store(io_cls, io_args, store=None): - io = io_cls(*io_args) - store = Path(store) - # PATCH: introduce `ppp_store` and use it instead of `store` - ppp_store = local_path2pure_posix_path(store) +def _test_setup_ds_in_store(io, store_url, local_store_path): + + store_url_path = PurePosixPath(urlparse(store_url).path) + # ATM create_ds_in_store doesn't care what kind of ID is provided dsid = "abc123456" - ds_path = store / dsid[:3] / dsid[3:] # store layout version 1 + ds_path = local_store_path / dsid[:3] / dsid[3:] # store layout version 1 version_file = ds_path / 'ria-layout-version' archives = ds_path / 'archives' objects = ds_path / 'annex' / 'objects' git_config = ds_path / 'config' # invalid store version: - assert_raises(UnknownLayoutVersion, - create_ds_in_store, io, ppp_store, dsid, '1', 'abc') + assert_raises( + UnknownLayoutVersion, + create_ds_in_store, io, store_url_path, dsid, '1', 'abc' + ) # invalid obj version: - assert_raises(UnknownLayoutVersion, - create_ds_in_store, io, ppp_store, dsid, 'abc', '1') + assert_raises( + UnknownLayoutVersion, + create_ds_in_store, io, store_url_path, dsid, 'abc', '1' + ) # version 1 - create_store(io, ppp_store, '1') - create_ds_in_store(io, ppp_store, dsid, '1', '1') + create_store(io, store_url_path, '1') + create_ds_in_store(io, store_url_path, dsid, '1', '1') for p in [ds_path, archives, objects]: assert_true(p.is_dir(), msg="Not a directory: %s" % str(p)) for p in [version_file]: assert_true(p.is_file(), msg="Not a file: %s" % str(p)) - assert_equal(version_file.read_text(), "1\n") + assert_equal(version_file.read_text().strip(), "1") # conflicting version exists at target: - assert_raises(ValueError, create_ds_in_store, io, ppp_store, dsid, '2', '1') + assert_raises( + ValueError, + create_ds_in_store, io, store_url_path, dsid, '2', '1' + ) # version 2 # Note: The only difference between version 1 and 2 are supposed to be the # key paths (dirhashlower vs mixed), which has nothing to do with # setup routine. - rmtree(str(store)) - create_store(io, ppp_store, '1') - create_ds_in_store(io, ppp_store, dsid, '2', '1') + rmtree(local_store_path) + create_store(io, store_url_path, '1') + create_ds_in_store(io, store_url_path, dsid, '2', '1') for p in [ds_path, archives, objects]: assert_true(p.is_dir(), msg="Not a directory: %s" % str(p)) for p in [version_file]: assert_true(p.is_file(), msg="Not a file: %s" % str(p)) - assert_equal(version_file.read_text(), "2\n") + assert_equal(version_file.read_text().strip(), "2") # taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 @with_tempfile(mkdir=True) @serve_path_via_http @with_tempfile -def patched_test_initremote(store_path=None, store_url=None, ds_path=None): +def test_initremote(store_path=None, store_url=None, ds_path=None): ds = Dataset(ds_path).create() store_path = Path(store_path) # PATCH: introduce `ppp_store_path` and use it instead of `store_path` - ppp_store_path = local_path2pure_posix_path(store_path) + ppp_store_path = _local_path2pure_posix_path(store_path) url = "ria+" + store_url init_opts = common_init_opts + ['url={}'.format(url)] @@ -375,7 +387,7 @@ def patched_test_initremote(store_path=None, store_url=None, ds_path=None): @with_tempfile(mkdir=True) @serve_path_via_http @with_tempfile -def patched_test_read_access(store_path=None, store_url=None, ds_path=None): +def test_read_access(store_path=None, store_url=None, ds_path=None): ds = Dataset(ds_path).create() populate_dataset(ds) @@ -383,7 +395,7 @@ def patched_test_read_access(store_path=None, store_url=None, ds_path=None): files = [Path('one.txt'), Path('subdir') / 'two'] store_path = Path(store_path) # PATCH: introduce `ppp_store_path` and use it instead of `store_path` - ppp_store_path = local_path2pure_posix_path(store_path) + ppp_store_path = _local_path2pure_posix_path(store_path) url = "ria+" + store_url init_opts = common_init_opts + ['url={}'.format(url)] @@ -441,65 +453,80 @@ def patched_test_read_access(store_path=None, store_url=None, ds_path=None): # taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 -@with_tempfile -@with_tempfile -def patched_test_initremote_basic(url, io, store, ds_path, link): +def _test_initremote_basic(io, store_url, local_store_path, ds, link, overwrite = None): - ds_path = Path(ds_path) - store = Path(store) - # PATCH: introduce `ppp_store` and use it instead of `store` - ppp_store = local_path2pure_posix_path(store) - link = Path(link) - ds = Dataset(ds_path).create() populate_dataset(ds) - init_opts = common_init_opts + ['url={}'.format(url)] + store_url_path = overwrite or PurePosixPath(urlparse(store_url).path) + link = Path(link) + + init_opts = common_init_opts + [f'url=ria+{store_url}'] # fails on non-existing storage location - assert_raises(CommandError, - ds.repo.init_remote, 'ria-remote', options=init_opts) + assert_raises( + CommandError, + ds.repo.init_remote, 'ria-remote', options=init_opts + ) # Doesn't actually create a remote if it fails - assert_not_in('ria-remote', - [cfg['name'] - for uuid, cfg in ds.repo.get_special_remotes().items()] - ) + assert_not_in( + 'ria-remote', + [ + cfg['name'] + for uuid, cfg in ds.repo.get_special_remotes().items() + ] + ) # fails on non-RIA URL - assert_raises(CommandError, ds.repo.init_remote, 'ria-remote', - options=common_init_opts + ['url={}'.format(store.as_uri())] - ) + assert_raises( + CommandError, + ds.repo.init_remote, 'ria-remote', + options=common_init_opts + [f'url={store_url}'] + ) + # Doesn't actually create a remote if it fails - assert_not_in('ria-remote', - [cfg['name'] - for uuid, cfg in ds.repo.get_special_remotes().items()] - ) + assert_not_in( + 'ria-remote', + [ + cfg['name'] + for uuid, cfg in ds.repo.get_special_remotes().items() + ] + ) # set up store: - create_store(io, ppp_store, '1') + create_store(io, store_url_path, '1') # still fails, since ds isn't setup in the store - assert_raises(CommandError, - ds.repo.init_remote, 'ria-remote', options=init_opts) + assert_raises( + CommandError, + ds.repo.init_remote, 'ria-remote', options=init_opts + ) # Doesn't actually create a remote if it fails - assert_not_in('ria-remote', - [cfg['name'] - for uuid, cfg in ds.repo.get_special_remotes().items()] - ) + assert_not_in( + 'ria-remote', + [ + cfg['name'] + for uuid, cfg in ds.repo.get_special_remotes().items() + ] + ) # set up the dataset as well - create_ds_in_store(io, ppp_store, ds.id, '2', '1') + create_ds_in_store(io, store_url_path, ds.id, '2', '1') # now should work ds.repo.init_remote('ria-remote', options=init_opts) - assert_in('ria-remote', - [cfg['name'] - for uuid, cfg in ds.repo.get_special_remotes().items()] - ) + assert_in( + 'ria-remote', + [ + cfg['name'] + for uuid, cfg in ds.repo.get_special_remotes().items() + ] + ) assert_repo_status(ds.path) # git-annex:remote.log should have: # - url # - common_init_opts # - archive_id (which equals ds id) - remote_log = ds.repo.call_git(['cat-file', 'blob', 'git-annex:remote.log'], - read_only=True) - assert_in("url={}".format(url), remote_log) + remote_log = ds.repo.call_git( + ['cat-file', 'blob', 'git-annex:remote.log'], read_only=True + ) + assert_in(f'url=ria+{store_url}', remote_log) [assert_in(c, remote_log) for c in common_init_opts] assert_in("archive-id={}".format(ds.id), remote_log) @@ -508,21 +535,25 @@ def patched_test_initremote_basic(url, io, store, ds_path, link): CommandError, ds.repo.call_annex, ['enableremote', 'ria-remote'] + common_init_opts + [ - 'url=ria+file:///non-existing']) + 'url=ria+file:///non-existing' + ] + ) # but re-configure with valid URL should work if has_symlink_capability(): - link.symlink_to(store) + link.symlink_to(local_store_path) new_url = 'ria+{}'.format(link.as_uri()) ds.repo.call_annex( ['enableremote', 'ria-remote'] + common_init_opts + [ - 'url={}'.format(new_url)]) + 'url={}'.format(new_url) + ] + ) # git-annex:remote.log should have: # - url # - common_init_opts # - archive_id (which equals ds id) - remote_log = ds.repo.call_git(['cat-file', 'blob', - 'git-annex:remote.log'], - read_only=True) + remote_log = ds.repo.call_git( + ['cat-file', 'blob', 'git-annex:remote.log'], read_only=True + ) assert_in("url={}".format(new_url), remote_log) [assert_in(c, remote_log) for c in common_init_opts] assert_in("archive-id={}".format(ds.id), remote_log) @@ -530,8 +561,10 @@ def patched_test_initremote_basic(url, io, store, ds_path, link): # we can deal with --sameas, which leads to a special remote not having a # 'name' property, but only a 'sameas-name'. See gh-4259 try: - ds.repo.init_remote('ora2', - options=init_opts + ['--sameas', 'ria-remote']) + ds.repo.init_remote( + 'ora2', + options=init_opts + ['--sameas', 'ria-remote'] + ) except CommandError as e: if 'Invalid option `--sameas' in e.stderr: # annex too old - doesn't know --sameas @@ -544,48 +577,38 @@ def patched_test_initremote_basic(url, io, store, ds_path, link): # taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 @known_failure_windows # see gh-4469 -@with_tempfile -@with_tempfile -@with_tempfile -def patched_test_remote_layout(host, dspath, store, archiv_store): +def _test_remote_layout(io, + store_url, + archive_store_url, + local_store_path, + local_archive_store_path, + ds + ): - dspath = Path(dspath) - store = Path(store) - archiv_store = Path(archiv_store) - # PATCH: introduce `ppp_store` and use it instead of `store` - ppp_store = local_path2pure_posix_path(store) - ppp_archiv_store = local_path2pure_posix_path(archiv_store) - ds = Dataset(dspath).create() populate_dataset(ds) assert_repo_status(ds.path) - # set up store: - io = SSHRemoteIO(host) if host else LocalIO() - if host: - store_url = "ria+ssh://{host}{path}".format(host=host, - path=store) - arch_url = "ria+ssh://{host}{path}".format(host=host, - path=archiv_store) - else: - store_url = "ria+{}".format(store.as_uri()) - arch_url = "ria+{}".format(archiv_store.as_uri()) + store_url_path = PurePosixPath(urlparse(store_url).path) + archive_store_url_path = PurePosixPath(urlparse(archive_store_url).path) - create_store(io, ppp_store, '1') + # set up store: + create_store(io, store_url_path, '1') # TODO: Re-establish test for version 1 # version 2: dirhash - create_ds_in_store(io, ppp_store, ds.id, '2', '1') + create_ds_in_store(io, store_url_path, ds.id, '2', '1') # add special remote - init_opts = common_init_opts + ['url={}'.format(store_url)] + init_opts = common_init_opts + [f'url=ria+{store_url}'] ds.repo.init_remote('store', options=init_opts) # copy files into the RIA store ds.push('.', to='store') # we should see the exact same annex object tree - dsgit_dir, archive_dir, dsobj_dir = \ - get_layout_locations(1, store, ds.id) + dsgit_dir, archive_dir, dsobj_dir = get_layout_locations( + 1, local_store_path, ds.id + ) store_objects = get_all_files(dsobj_dir) local_objects = get_all_files(ds.pathobj / '.git' / 'annex' / 'objects') assert_equal(len(store_objects), 4) @@ -607,14 +630,14 @@ def patched_test_remote_layout(host, dspath, store, archiv_store): # 7z archive and place it in the right location to get a functional # archive remote - create_store(io, ppp_archiv_store, '1') - create_ds_in_store(io, ppp_archiv_store, ds.id, '2', '1') + create_store(io, archive_store_url_path, '1') + create_ds_in_store(io, archive_store_url_path, ds.id, '2', '1') whereis = ds.repo.whereis('one.txt') - dsgit_dir, archive_dir, dsobj_dir = \ - get_layout_locations(1, archiv_store, ds.id) + dsgit_dir, archive_dir, dsobj_dir = get_layout_locations( + 1, local_archive_store_path, ds.id) ds.export_archive_ora(archive_dir / 'archive.7z') - init_opts = common_init_opts + ['url={}'.format(arch_url)] + init_opts = common_init_opts + [f'url=ria+{archive_store_url}'] ds.repo.init_remote('archive', options=init_opts) # now fsck the new remote to get the new special remote indexed ds.repo.fsck(remote='archive', fast=True) @@ -632,43 +655,31 @@ def patched_test_remote_layout(host, dspath, store, archiv_store): # taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 -@known_failure_windows # see gh-4469 -@with_tempfile -@with_tempfile -def patched_test_version_check(host, dspath, store): +def _test_version_check(io, store_url, local_store_path, ds): - dspath = Path(dspath) - store = Path(store) - # PATCH: introduce `ppp_store` and use it instead of `store` - ppp_store = local_path2pure_posix_path(store) + store_url_path = PurePosixPath(urlparse(store_url).path) - ds = Dataset(dspath).create() populate_dataset(ds) assert_repo_status(ds.path) - # set up store: - io = SSHRemoteIO(host) if host else LocalIO() - if host: - store_url = "ria+ssh://{host}{path}".format(host=host, - path=store) - else: - store_url = "ria+{}".format(store.as_uri()) - - create_store(io, ppp_store, '1') + create_store(io, store_url_path, '1') # TODO: Re-establish test for version 1 # version 2: dirhash - create_ds_in_store(io, ppp_store, ds.id, '2', '1') + create_ds_in_store(io, store_url_path, ds.id, '2', '1') # add special remote - init_opts = common_init_opts + ['url={}'.format(store_url)] + init_opts = common_init_opts + [f'url={"ria+" + store_url}'] ds.repo.init_remote('store', options=init_opts) ds.push('.', to='store') # check version files - remote_ds_tree_version_file = store / 'ria-layout-version' - dsgit_dir, archive_dir, dsobj_dir = \ - get_layout_locations(1, store, ds.id) + remote_ds_tree_version_file = local_store_path / 'ria-layout-version' + dsgit_dir, archive_dir, dsobj_dir = get_layout_locations( + 1, + local_store_path, + ds.id + ) remote_obj_tree_version_file = dsgit_dir / 'ria-layout-version' assert_true(remote_ds_tree_version_file.exists()) @@ -682,7 +693,7 @@ def patched_test_version_check(host, dspath, store): # Accessing the remote should not yield any output regarding versioning, # since it's the "correct" version. Note that "fsck" is an arbitrary choice. # We need just something to talk to the special remote. - with swallow_logs(new_level=logging.INFO) as cml: + with swallow_logs(new_level=logging.INFO, name='datalad.annex') as cml: ds.repo.fsck(remote='store', fast=True) # TODO: For some reason didn't get cml.assert_logged to assert # "nothing was logged" @@ -693,7 +704,7 @@ def patched_test_version_check(host, dspath, store): f.write('X\n') # Now we should see a message about it - with swallow_logs(new_level=logging.INFO) as cml: + with swallow_logs(new_level=logging.INFO, name='datalad.annex') as cml: ds.repo.fsck(remote='store', fast=True) cml.assert_logged(level="INFO", msg="Remote object tree reports version X", @@ -724,11 +735,11 @@ def patched_test_version_check(host, dspath, store): @known_failure_windows # see gh-4469 @with_tempfile @with_tempfile -def patched_test_gitannex(host, store, dspath): +def _test_gitannex(host, store, dspath): dspath = Path(dspath) store = Path(store) # PATCH: introduce `ppp_store` and use it instead of `store` - ppp_store = local_path2pure_posix_path(store) + ppp_store = _local_path2pure_posix_path(store) ds = Dataset(dspath).create() @@ -738,8 +749,10 @@ def patched_test_gitannex(host, store, dspath): # set up store: io = SSHRemoteIO(host) if host else LocalIO() if host: - store_url = "ria+ssh://{host}{path}".format(host=host, - path=store) + store_url = "ria+{host}{path}".format( + host=host[:-1], + path=store + ) else: store_url = "ria+{}".format(store.as_uri()) @@ -771,12 +784,12 @@ def patched_test_gitannex(host, store, dspath): @with_tempfile @with_tempfile @with_tempfile -def patched_test_push_url(storepath=None, dspath=None, blockfile=None): +def test_push_url(storepath=None, dspath=None, blockfile=None): dspath = Path(dspath) store = Path(storepath) # PATCH: introduce `ppp_store` and use it instead of `store` - ppp_store = local_path2pure_posix_path(store) + ppp_store = _local_path2pure_posix_path(store) blockfile = Path(blockfile) blockfile.touch() @@ -787,7 +800,7 @@ def patched_test_push_url(storepath=None, dspath=None, blockfile=None): # set up store: io = LocalIO() - store_url = "ria+{}".format(store.as_uri()) + store_url = "ria+{}".format(ppp_store.as_uri()) create_store(io, ppp_store, '1') create_ds_in_store(io, ppp_store, ds.id, '2', '1') @@ -838,17 +851,15 @@ def patched_test_push_url(storepath=None, dspath=None, blockfile=None): assert_in(store_uuid, known_sources) -# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732, refactored, +# and patched. # Skipping on adjusted branch as a proxy for crippledFS. Write permissions of # the owner on a directory can't be revoked on VFAT. "adjusted branch" is a # bit broad but covers the CI cases. And everything RIA/ORA doesn't currently # properly run on crippled/windows anyway. Needs to be more precise when # RF'ing will hopefully lead to support on windows in principle. @skip_if_adjusted_branch -@known_failure_windows -@with_tempfile -@with_tempfile -def patched_test_permission(host, storepath, dspath): +def _test_permission(io, store_url, local_ria_store_path, ds): # Test whether ORA correctly revokes and obtains write permissions within # the annex object tree. That is: Revoke after ORA pushed a key to store @@ -856,37 +867,31 @@ def patched_test_permission(host, storepath, dspath): # clone. And on removal obtain write permissions, like annex would # internally on a drop (but be sure to restore if something went wrong). - dspath = Path(dspath) - storepath = Path(storepath) - # PATCH: introduce `ppp_storepath` and use it instead of `storepath` - ppp_storepath = local_path2pure_posix_path(storepath) - ds = Dataset(dspath).create() populate_dataset(ds) ds.save() assert_repo_status(ds.path) testfile = 'one.txt' - # set up store: - io = SSHRemoteIO(host) if host else LocalIO() - if host: - store_url = "ria+ssh://{host}{path}".format(host=host, - path=storepath) - else: - store_url = "ria+{}".format(storepath.as_uri()) + store_url_path = PurePosixPath(urlparse(store_url).path) - create_store(io, ppp_storepath, '1') - create_ds_in_store(io, ppp_storepath, ds.id, '2', '1') - _, _, obj_tree = get_layout_locations(1, storepath, ds.id) + create_store(io, store_url_path, '1') + create_ds_in_store(io, store_url_path, ds.id, '2', '1') + + _, _, obj_tree = get_layout_locations(1, local_ria_store_path, ds.id) assert_true(obj_tree.is_dir()) file_key_in_store = obj_tree / 'X9' / '6J' / 'MD5E-s8--7e55db001d319a94b0b713529a756623.txt' / 'MD5E-s8--7e55db001d319a94b0b713529a756623.txt' - init_opts = common_init_opts + ['url={}'.format(store_url)] + init_opts = common_init_opts + [f'url={"ria+" + store_url}'] ds.repo.init_remote('store', options=init_opts) - store_uuid = ds.siblings(name='store', - return_type='item-or-list')['annex-uuid'] - here_uuid = ds.siblings(name='here', - return_type='item-or-list')['annex-uuid'] + store_uuid = ds.siblings( + name='store', + return_type='item-or-list' + )['annex-uuid'] + here_uuid = ds.siblings( + name='here', + return_type='item-or-list' + )['annex-uuid'] known_sources = ds.repo.whereis(testfile) assert_in(here_uuid, known_sources) @@ -902,8 +907,9 @@ def patched_test_permission(host, storepath, dspath): # Revoke write permissions from parent dir in-store to test whether we # still can drop (if we can obtain the permissions). Note, that this has # no effect on VFAT. - file_key_in_store.parent.chmod(file_key_in_store.parent.stat().st_mode & - ~stat.S_IWUSR) + file_key_in_store.parent.chmod( + file_key_in_store.parent.stat().st_mode & ~stat.S_IWUSR + ) # we can't directly delete; key in store should be protected assert_raises(PermissionError, file_key_in_store.unlink) @@ -915,114 +921,364 @@ def patched_test_permission(host, storepath, dspath): assert_false(file_key_in_store.exists()) -# Overwrite `_postclonetest_prepare` to handle paths properly -apply_patch( - 'datalad.core.distributed.tests.test_clone', - None, - '_postclonetest_prepare', - patched__postclonetest_prepare, - 'modify _postclonetest_prepare to use PurePosixPath-arguments ' - 'in RIA-methodes' -) +def _get_host_from_ssh_server(ssh_server): + url_parts = urlparse(ssh_server[0]) + return ( + 'ssh://' + + ((url_parts.username + '@') if url_parts.username else '') + + url_parts.hostname + + ((':' + str(url_parts.port)) if url_parts.port else '') + + '/' + ) -apply_patch( - 'datalad.core.distributed.tests.test_clone', - None, - 'test_ria_postclone_noannex', - patched_test_ria_postclone_noannex, - 'modify test_ria_postclone_noannex to use PurePosixPath-arguments ' - 'in RIA-methods' -) +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +@slow # 17sec + ? on travis +@skip_ssh +def test_version_check_ssh(sshserver, existing_dataset): + store_dir = _random_name('ria-store-') + store_url = sshserver[0] + '/' + store_dir + _test_version_check( + SSHRemoteIO(store_url), + store_url, + sshserver[1] / store_dir, + existing_dataset + ) -apply_patch( - 'datalad.customremotes.tests.test_ria_utils', - None, - '_test_setup_store', - patched_test_setup_store, - 'modify _test_setup_store to use PurePosixPath-arguments in RIA-methods' -) +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +def test_version_check(tmp_path, existing_dataset): + _test_version_check( + LocalIO(), + 'file://' + (tmp_path / 'ria-store').as_uri(), + tmp_path / 'ria-store', + existing_dataset + ) -apply_patch( - 'datalad.customremotes.tests.test_ria_utils', - None, - '_test_setup_ds_in_store', - patched_test_setup_ds_in_store, - 'modify _test_setup_ds_in_store to use PurePosixPath-arguments ' - 'in RIA-methods' -) +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +@skip_ssh +def test_obtain_permission_ssh(sshserver, existing_dataset): + store_dir = _random_name('ria-store-') + store_url = sshserver[0] + '/' + store_dir + _test_permission( + SSHRemoteIO(store_url), + store_url, + sshserver[1] / store_dir, + existing_dataset + ) -apply_patch( - 'datalad.distributed.tests.test_ora_http', - None, - 'test_initremote', - patched_test_initremote, - 'modify test_initremote to use PurePosixPath-arguments in RIA-methods' -) +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +@skip_if_root +def test_obtain_permission_root(tmp_path, existing_dataset): + _test_permission( + LocalIO(), + 'file://' + (tmp_path / 'ria-store').as_uri(), + tmp_path / 'ria-store', + existing_dataset + ) -apply_patch( - 'datalad.distributed.tests.test_ora_http', - None, - 'test_read_access', - patched_test_read_access, - 'modify test_read_access to use PurePosixPath-arguments in RIA-methods' -) +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +def test_setup_store_local(tmp_path): + _test_setup_store( + LocalIO(), + 'file://' + (tmp_path / 'ria-store').as_uri(), + tmp_path / 'ria-store', + ) -apply_patch( - 'datalad.distributed.tests.test_ria_basics', - None, - '_test_initremote_basic', - patched_test_initremote_basic, - 'modify _test_initremote_basic to use PurePosixPath-arguments ' - 'in RIA-methods' -) +@skip_ssh +def test_setup_store_ssh(sshserver): + store_dir = _random_name('ria-store-') + store_url = sshserver[0] + '/' + store_dir + _test_setup_store( + SSHRemoteIO(store_url), + store_url, + sshserver[1] / store_dir, + ) -apply_patch( - 'datalad.distributed.tests.test_ria_basics', - None, - '_test_remote_layout', - patched_test_remote_layout, - 'modify _test_remote_layout to use PurePosixPath-arguments in RIA-methods' -) +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +def test_setup_ds_in_store_local(tmp_path): + _test_setup_ds_in_store( + LocalIO(), + 'file://' + (tmp_path / 'ria-store').as_uri(), + tmp_path / 'ria-store', + ) -apply_patch( - 'datalad.distributed.tests.test_ria_basics', - None, - '_test_version_check', - patched_test_version_check, - 'modify _test_version_check to use PurePosixPath-arguments in RIA-methods' -) +@skip_ssh +def test_setup_ds_in_store_ssh(sshserver): + store_dir = _random_name('ria-store-') + store_url = sshserver[0] + '/' + store_dir + _test_setup_ds_in_store( + SSHRemoteIO(store_url), + store_url, + sshserver[1] / store_dir, + ) -apply_patch( - 'datalad.distributed.tests.test_ria_basics', - None, - '_test_gitannex', - patched_test_gitannex, - 'modify _test_gitannex to use PurePosixPath-arguments in RIA-methods' -) +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +@slow # 12sec + ? on travis +@skip_ssh +def test_remote_layout_ssh(sshserver, existing_dataset): + store_dir = _random_name('ria-store-') + store_url = sshserver[0] + '/' + store_dir + archive_store_dir = _random_name('ria-archive-store-') + archive_store_url = sshserver[0] + '/' + archive_store_dir + _test_remote_layout( + SSHRemoteIO(store_url), + store_url, + archive_store_url, + sshserver[1] / store_dir, + sshserver[1] / archive_store_dir, + existing_dataset, + ) -apply_patch( - 'datalad.distributed.tests.test_ria_basics', - None, - 'test_push_url', - patched_test_push_url, - 'modify test_push_url to use PurePosixPath-arguments in RIA-methods' -) +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +def test_remote_layout(tmp_path, existing_dataset): + _test_remote_layout( + LocalIO(), + 'file://' + (tmp_path / 'ria-store').as_uri(), + 'file://' + (tmp_path / 'ria-archive-store').as_uri(), + tmp_path / 'ria-store', + tmp_path / 'ria-archive-store', + existing_dataset, + ) -apply_patch( - 'datalad.distributed.tests.test_ria_basics', - None, - '_test_permission', - patched_test_permission, - 'modify _test_permission to use PurePosixPath-arguments in RIA-methods' -) +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +# PATCH remove @know_failure_windows +@skip_ssh +def test_initremote_basic_sshurl(sshserver, tmp_path, existing_dataset): + store_dir = _random_name('ria-store-') + store_url = sshserver[0] + '/' + store_dir + _test_initremote_basic( + SSHRemoteIO(store_url), + store_url, + sshserver[1] / store_dir, + existing_dataset, + tmp_path / _random_name('link-'), + ) + + +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +# PATCH remove @know_failure_windows +def test_initremote_basic_fileurl(tmp_path, existing_dataset): + store_dir = _random_name('ria-store-') + _test_initremote_basic( + LocalIO(), + 'file://' + (tmp_path / store_dir).as_uri(), + tmp_path / store_dir, + existing_dataset, + tmp_path / 'link', + ) + + +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +# https://github.com/datalad/datalad/issues/6160 +@known_failure_windows +def test_initremote_basic_httpurl(http_server, tmp_path, existing_dataset): + # TODO: add a test for https + _test_initremote_basic( + LocalIO(), + http_server.url, + http_server.path, + existing_dataset, + tmp_path / _random_name('link-'), + _local_path2pure_posix_path(http_server.path), + ) + + +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +@turtle +# PATCH remove @know_failure_windows +@skip_ssh +def test_gitannex_ssh(sshserver): + _test_gitannex(_get_host_from_ssh_server(sshserver)) + + +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +@slow # 41sec on travis +def test_gitannex_local(): + _test_gitannex(None) + + +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +# TODO?: make parametric again on _test_ria_postclonecfg +# PATCH remove @know_failure_windows +@slow # 14 sec on travis +@known_failure_windows # https://github.com/datalad/datalad/issues/5134 +def test_ria_postclonecfg(sshserver): + + if not has_symlink_capability(): + # This is needed to create an ORA remote using an URL for upload, + # that is then invalidated later on (delete the symlink it's based on). + raise SkipTest("Can't create symlinks") + + lcl_dir = _random_name('lcl-') + store_dir = _random_name('ria-store-') + store2_dir = _random_name('ria-store-2-') + + lcl = sshserver[1] / lcl_dir + store = sshserver[1] / store_dir + store2 = sshserver[1] / store2_dir + + lcl.mkdir(parents=True) + store.mkdir(parents=True) + store2.mkdir(parents=True) + + #lcl_url = sshserver[0] + '/' + lcl_dir + store_url = sshserver[0] + '/' + store_dir + #store2_url = sshserver[0] + '/' + store2_dir + + id = _postclonetest_prepare(lcl, store, store2) + + # test cloning via ria+file:// + _test_ria_postclonecfg( + get_local_file_url(store, compatibility='git'), id + ) + + # Note: HTTP disabled for now. Requires proper implementation in ORA + # remote. See + # https://github.com/datalad/datalad/pull/4203#discussion_r410284649 + + # # test cloning via ria+http:// + # with HTTPPath(store) as url: + # yield _test_ria_postclonecfg, url, id + + # test cloning via ria+ssh:// + skip_ssh(_test_ria_postclonecfg)(store_url, id) + + +# taken from datalad-core@864dc4ae24c8aac0ec4003604543b86de4735732 +@with_tempfile +@with_tempfile +def _test_ria_postclonecfg(url, dsid, clone_path, superds): + # Test cloning from RIA store while ORA special remote autoenabling failed + # due to an invalid URL from the POV of the cloner. + # Origin's git-config-file should contain the UUID to enable. This needs to + # work via HTTP, SSH and local cloning. + + # Autoenabling should fail initially by git-annex-init and we would report + # on INFO level. Only postclone routine would deal with it. + with swallow_logs( + new_level=logging.INFO, + name='datalad.core.distributed.clone', + ) as cml: + # First, the super ds: + riaclone = clone('ria+{}#{}'.format(url, dsid), clone_path) + cml.assert_logged(msg="access to 1 dataset sibling store-storage not " + "auto-enabled", + level="INFO", + regex=False) + + # However, we now can retrieve content since clone should have enabled the + # special remote with new URL (or origin in case of HTTP). + res = riaclone.get('test.txt') + assert_result_count(res, 1, + status='ok', + path=str(riaclone.pathobj / 'test.txt'), + message="from {}...".format(DEFAULT_REMOTE + if url.startswith('http') + else "store-storage")) + + # Second ORA remote is enabled and not reconfigured: + untouched_remote = riaclone.siblings(name='anotherstore-storage', + return_type='item-or-list') + assert_not_is_instance(untouched_remote, list) + untouched_url = riaclone.repo.get_special_remotes()[ + untouched_remote['annex-uuid']]['url'] + ok_(untouched_url.startswith("ria+file://")) + ok_(not untouched_url.startswith("ria+{}".format(url))) + + # publication dependency was set for store-storage but not for + # anotherstore-storage: + eq_(riaclone.config.get(f"remote.{DEFAULT_REMOTE}.datalad-publish-depends", + get_all=True), + "store-storage") + + # same thing for the sub ds (we don't need a store-url and id - get should + # figure those itself): + with swallow_logs( + new_level=logging.INFO, + name='datalad.core.distributed.clone', + ) as cml: + riaclonesub = riaclone.get( + op.join('subdir', 'subds'), get_data=False, + result_xfm='datasets', return_type='item-or-list') + cml.assert_logged(msg="access to 1 dataset sibling store-storage not " + "auto-enabled", + level="INFO", + regex=False) + res = riaclonesub.get('testsub.txt') + assert_result_count(res, 1, + status='ok', + path=str(riaclonesub.pathobj / 'testsub.txt'), + message="from {}...".format(DEFAULT_REMOTE + if url.startswith('http') + else "store-storage")) + + # publication dependency was set for store-storage but not for + # anotherstore-storage: + eq_(riaclonesub.config.get(f"remote.{DEFAULT_REMOTE}.datalad-publish-depends", + get_all=True), + "store-storage") + + # finally get the plain git subdataset. + # Clone should figure to also clone it from a ria+ URL + # (subdataset-source-candidate), notice that there wasn't an autoenabled ORA + # remote, but shouldn't stumble upon it, since it's a plain git. + res = riaclone.get(op.join('subdir', 'subgit', 'testgit.txt')) + assert_result_count(res, 1, status='ok', type='dataset', action='install') + assert_result_count(res, 1, status='notneeded', type='file') + assert_result_count(res, 2) + # no ORA remote, no publication dependency: + riaclonesubgit = Dataset(riaclone.pathobj / 'subdir' / 'subgit') + eq_(riaclonesubgit.config.get(f"remote.{DEFAULT_REMOTE}.datalad-publish-depends", + get_all=True), + None) + + # Now, test that if cloning into a dataset, ria-URL is preserved and + # post-clone configuration is triggered again, when we remove the subds and + # retrieve it again via `get`: + ds = Dataset(superds).create() + ria_url = 'ria+{}#{}'.format(url, dsid) + ds.clone(ria_url, 'sub') + sds = ds.subdatasets('sub') + eq_(len(sds), 1) + eq_(sds[0]['gitmodule_datalad-url'], ria_url) + assert_repo_status(ds.path) + ds.drop('sub', what='all', reckless='kill', recursive=True) + assert_repo_status(ds.path) + + # .gitmodules still there: + sds = ds.subdatasets('sub') + eq_(len(sds), 1) + eq_(sds[0]['gitmodule_datalad-url'], ria_url) + # get it again: + + # Autoenabling should fail initially by git-annex-init and we would report + # on INFO level. Only postclone routine would deal with it. + with swallow_logs( + new_level=logging.INFO, + name='datalad.core.distributed.clone', + ) as cml: + ds.get('sub', get_data=False) + cml.assert_logged(msg="access to 1 dataset sibling store-storage not " + "auto-enabled", + level="INFO", + regex=False) + + subds = Dataset(ds.pathobj / 'sub') + # special remote is fine: + res = subds.get('test.txt') + assert_result_count(res, 1, + status='ok', + path=str(subds.pathobj / 'test.txt'), + message="from {}...".format(DEFAULT_REMOTE + if url.startswith('http') + else "store-storage")) diff --git a/datalad_next/patches/tests/test_push.py b/datalad_next/patches/tests/test_push.py index aa6a5b02..00ca557c 100644 --- a/datalad_next/patches/tests/test_push.py +++ b/datalad_next/patches/tests/test_push.py @@ -1,12 +1,22 @@ -from datalad_next.tests import ( - DEFAULT_REMOTE, - assert_result_count, -) -from datalad.core.distributed.clone import Clone +import datalad_next.patches.enabled # run all -core tests, because with _push() we patched a central piece from datalad.core.distributed.tests.test_push import * +# import those directly to pass mypy tests, although they are already imported +# by the above import +from datalad.tests.utils_pytest import ( + DEFAULT_REMOTE, + SkipTest, + assert_in_results, + assert_not_in_results, + assert_result_count, + eq_, + known_failure_githubci_win, + slow, + with_tempfile, +) + from datalad_next.datasets import Dataset @@ -35,3 +45,129 @@ def test_gh1811(tmp_path, no_result_rendering): message='There is no active branch, cannot determine remote ' 'branch', ) + + +# taken from datalad-core@250386f1fd83af7a3df72347c9b26a4afd66baa7 +@slow # can run over 30 sec when running in parallel with n=2. Cannot force serial yet, see https://github.com/pytest-dev/pytest-xdist/issues/385 +# Removed @known_failure_githubci_win-decorator +@with_tempfile(mkdir=True) +@with_tempfile(mkdir=True) +@with_tempfile(mkdir=True) +def test_nested_pushclone_cycle_allplatforms(origpath=None, storepath=None, clonepath=None): + if 'DATALAD_SEED' in os.environ: + # we are using create-sibling-ria via the cmdline in here + # this will create random UUIDs for datasets + # however, given a fixed seed each call to this command will start + # with the same RNG seed, hence yield the same UUID on the same + # machine -- leading to a collision + raise SkipTest( + 'Test incompatible with fixed random number generator seed' + ) + # the aim here is this high-level test a std create-push-clone cycle for a + # dataset with a subdataset, with the goal to ensure that correct branches + # and commits are tracked, regardless of platform behavior and condition + # of individual clones. Nothing fancy, just that the defaults behave in + # sensible ways + from datalad.cmd import WitlessRunner as Runner + run = Runner().run + + os.environ['DATALAD_EXTENSIONS_LOAD'] = 'next' + + # create original nested dataset + with chpwd(origpath): + run(['datalad', 'create', 'super'], env=os.environ) + run( + [ + 'datalad', 'create', '-d', 'super', + str(Path('super', 'sub')) + ], + env=os.environ + ) + + # verify essential linkage properties + orig_super = Dataset(Path(origpath, 'super')) + orig_sub = Dataset(orig_super.pathobj / 'sub') + + (orig_super.pathobj / 'file1.txt').write_text('some1') + (orig_sub.pathobj / 'file2.txt').write_text('some1') + with chpwd(orig_super.path): + run(['datalad', 'save', '--recursive'], env=os.environ) + + # TODO not yet reported clean with adjusted branches + #assert_repo_status(orig_super.path) + + # the "true" branch that sub is on, and the gitsha of the HEAD commit of it + orig_sub_corr_branch = \ + orig_sub.repo.get_corresponding_branch() or orig_sub.repo.get_active_branch() + orig_sub_corr_commit = orig_sub.repo.get_hexsha(orig_sub_corr_branch) + + # make sure the super trackes this commit + assert_in_results( + orig_super.subdatasets(), + path=orig_sub.path, + gitshasum=orig_sub_corr_commit, + # TODO it should also track the branch name + # Attempted: https://github.com/datalad/datalad/pull/3817 + # But reverted: https://github.com/datalad/datalad/pull/4375 + ) + + # publish to a store, to get into a platform-agnostic state + # (i.e. no impact of an annex-init of any kind) + store_url = 'ria+' + get_local_file_url(storepath) + with chpwd(orig_super.path): + run( + [ + 'datalad', 'create-sibling-ria', '--recursive', + '-s', 'store', store_url, '--new-store-ok' + ], + env=os.environ + ) + run( + ['datalad', 'push', '--recursive', '--to', 'store'], + env=os.environ + ) + + # we are using the 'store' sibling's URL, which should be a plain path + store_super = AnnexRepo(orig_super.siblings(name='store')[0]['url'], init=False) + store_sub = AnnexRepo(orig_sub.siblings(name='store')[0]['url'], init=False) + + # both datasets in the store only carry the real branches, and nothing + # adjusted + for r in (store_super, store_sub): + eq_(set(r.get_branches()), set([orig_sub_corr_branch, 'git-annex'])) + + # and reobtain from a store + cloneurl = 'ria+' + get_local_file_url(str(storepath), compatibility='git') + with chpwd(clonepath): + run( + ['datalad', 'clone', cloneurl + '#' + orig_super.id, 'super'], + env=os.environ + ) + run( + ['datalad', '-C', 'super', 'get', '--recursive', '.'], + env=os.environ + ) + + # verify that nothing has changed as a result of a push/clone cycle + clone_super = Dataset(Path(clonepath, 'super')) + clone_sub = Dataset(clone_super.pathobj / 'sub') + assert_in_results( + clone_super.subdatasets(), + path=clone_sub.path, + gitshasum=orig_sub_corr_commit, + ) + + for ds1, ds2, f in ((orig_super, clone_super, 'file1.txt'), + (orig_sub, clone_sub, 'file2.txt')): + eq_((ds1.pathobj / f).read_text(), (ds2.pathobj / f).read_text()) + + # get status info that does not recursive into subdatasets, i.e. not + # looking for uncommitted changes + # we should see no modification reported + assert_not_in_results( + clone_super.status(eval_subdataset_state='commit'), + state='modified') + # and now the same for a more expensive full status + assert_not_in_results( + clone_super.status(recursive=True), + state='modified')