From c4c8e9f4bb579b27d3111bfdc945c396692c092b Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Tue, 1 Oct 2024 08:13:10 +0200 Subject: [PATCH] TMP --- datalad_next/annexremotes/__init__.py | 14 +++- datalad_next/annexremotes/archivist.py | 2 +- datalad_next/config/default.py | 3 +- datalad_next/config/git.py | 82 ++++++++++--------- datalad_next/config/gitenv.py | 2 +- datalad_next/config/item.py | 3 +- datalad_next/config/legacy.py | 64 +++++++++++---- datalad_next/config/manager.py | 37 ++++++++- datalad_next/config/tests/test_core.py | 9 +- datalad_next/config/utils.py | 5 +- .../tests/test_iterannexworktree.py | 9 +- datalad_next/patches/replace_ora_remote.py | 6 +- 12 files changed, 159 insertions(+), 77 deletions(-) diff --git a/datalad_next/annexremotes/__init__.py b/datalad_next/annexremotes/__init__.py index 06eeca02..d260838f 100644 --- a/datalad_next/annexremotes/__init__.py +++ b/datalad_next/annexremotes/__init__.py @@ -4,6 +4,7 @@ # in a single place from annexremote import UnsupportedRequest from typing import Any +from os import environ from datalad.customremotes import ( # this is an enhanced RemoteError that self-documents its cause @@ -32,9 +33,20 @@ def repo(self) -> LeanAnnexRepo: to limit further proliferation of the ``AnnexRepo`` API. """ if self._repo is None: - self._repo = LeanAnnexRepo(self.annex.getgitdir()) + self._repo = LeanAnnexRepo(self.repodir) return self._repo + @property + def repodir(self) -> str: + import sys + repodir = self.annex.getgitdir() + # git-annex also sets GIT_DIR, and we want to account for that + # to be able to run regular Git command in this environment + gitdir_env = environ.get("GIT_DIR") + if gitdir_env and repodir.endswith(gitdir_env): + repodir = repodir[:-len(gitdir_env)] + return repodir + @property def remotename(self) -> str: """Name of the (git) remote the special remote is operating under""" diff --git a/datalad_next/annexremotes/archivist.py b/datalad_next/annexremotes/archivist.py index 7696f121..a10a0c19 100644 --- a/datalad_next/annexremotes/archivist.py +++ b/datalad_next/annexremotes/archivist.py @@ -162,7 +162,7 @@ def prepare(self): # us a `LeanAnnexRepo`. # TODO it is unclear to MIH what is actually needed API-wise of the legacy # interface. Needs research. - self._repo = LegacyAnnexRepo(self.annex.getgitdir()) + self._repo = LegacyAnnexRepo(self.repodir) # are we in legacy mode? # let remote-specific setting take priority (there could be # multiple archivist-type remotes configured), and use unspecific switch diff --git a/datalad_next/config/default.py b/datalad_next/config/default.py index b77eb2c2..33d95f66 100644 --- a/datalad_next/config/default.py +++ b/datalad_next/config/default.py @@ -82,7 +82,7 @@ def legacy_register_config( title: str, *, default: Any = UnsetValue, - default_fn: Callable | type[UnsetValue] = UnsetValue, + default_fn: Callable | None = None, description: str | None = None, type: Constraint | None = None, # noqa: A002 dialog: str | None = None, @@ -99,7 +99,6 @@ def legacy_register_config( ), store_target=get_store_target_from_destination_label(scope), ) - # lastly trigger legacy registration _legacy_register_config( name=name, diff --git a/datalad_next/config/git.py b/datalad_next/config/git.py index ec67688c..fd97b614 100644 --- a/datalad_next/config/git.py +++ b/datalad_next/config/git.py @@ -8,6 +8,7 @@ from os import name as os_name from typing import ( TYPE_CHECKING, + Hashable, ) if TYPE_CHECKING: @@ -22,7 +23,7 @@ from datasalad.runners import CommandError as SaladCommandError from datasalad.settings import CachingSource -from datalad.consts import DATASET_CONFIG_FILE +from datalad.consts import DATASET_CONFIG_FILE # type: ignore from datalad_next.config.item import ConfigurationItem from datalad_next.runners import ( @@ -93,33 +94,30 @@ def load(self) -> None: # the "blobs" is known self._sources = origin_paths.union(origin_blobs) + setter = '__setitem__' for k, v in dct.items(): - if isinstance(v, tuple): - vals = tuple( - ConfigurationItem( - value=val, - store_target=self.__class__, - ) - for val in v - ) - else: - vals = ConfigurationItem( - value=v, + if not isinstance(v, tuple): + v = (v,) + for val in v: + item = ConfigurationItem( + value=val, store_target=self.__class__, ) - super().__setitem__(k, vals) + getattr(super(), setter)(k, item) + # for every subsequent value we must call add() + setter = 'add' - def __setitem__(self, key: str, value: Setting) -> None: + def __setitem__(self, key: Hashable, value: Setting) -> None: call_git( - [*self._get_git_config_cmd(), '--replace-all', key, str(value.value)], + [*self._get_git_config_cmd(), '--replace-all', str(key), str(value.value)], capture_output=True, ) super().__setitem__(key, value) - def add(self, key: str, value: Setting) -> None: + def add(self, key: Hashable, value: Setting) -> None: call_git( - [*self._get_git_config_cmd(), '--add', key, str(value.value)], + [*self._get_git_config_cmd(), '--add', str(key), str(value.value)], capture_output=True, ) @@ -145,35 +143,48 @@ def _get_git_config_cwd(self) -> Path: class LocalGitConfig(GitConfig): def __init__(self, path: PathLike): super().__init__() - self._path = path + pathobj = Path(path) + try: - self._is_bare_repo = call_git_oneline( - ['rev-parse', '--is-bare-repository'], - cwd=path, + #TODO CHECK FOR GIT_DIR and adjust + self._in_worktree = call_git_oneline( + ['rev-parse', '--is-inside-work-tree'], + cwd=pathobj, force_c_locale=True, ) == 'true' - except CommandError: - # TODO: this is too simplistic. It could also be - # that there is no repo (yet) - self._is_bare_repo = False + except CommandError as e: + from os import environ + msg = f"no Git repository at {path}: {e!r} {environ.get('GIT_DIR')}" + raise ValueError(msg) from e + + self._gitdir = Path( + path if not self._in_worktree + else call_git_oneline( + ['rev-parse', '--path-format=absolute', '--git-dir'], + cwd=pathobj, + force_c_locale=True, + ) + ) def _get_git_config_cmd(self) -> list[str]: - return ['-C', str(self._path), 'config', '--local'] + return ['--git-dir', str(self._gitdir), 'config', '--local'] def _get_git_config_cwd(self) -> Path: - return self._path + # we set --git-dir, CWD does not matter + return None class DataladBranchConfig(LocalGitConfig): def __init__(self, path: PathLike): super().__init__(path) + self._path = path def _get_git_config_cmd(self) -> list[str]: return [ - '-C', str(self._path), - 'config', - *(('--blob', 'HEAD:.datalad/config') if self._is_bare_repo else - ('--file', str(self._path / DATASET_CONFIG_FILE))), + '--git-dir', str(self._gitdir), 'config', + *(('--file', str(self._path / DATASET_CONFIG_FILE)) + if self._in_worktree + else ('--blob', f'HEAD:{DATASET_CONFIG_FILE}')) ] def _ensure_target_dir(self): @@ -182,11 +193,11 @@ def _ensure_target_dir(self): custom_file = Path(cmd[cmd.index('--file') + 1]) custom_file.parent.mkdir(exist_ok=True) - def __setitem__(self, key: str, value: Setting) -> None: + def __setitem__(self, key: Hashable, value: Setting) -> None: self._ensure_target_dir() super().__setitem__(key, value) - def add(self, key: str, value: Setting) -> None: + def add(self, key: Hashable, value: Setting) -> None: self._ensure_target_dir() super().add(key, value) @@ -224,10 +235,7 @@ def _proc_dump_line( # man git-config: # just name, which is a short-hand to say that the variable is # the boolean - #v = "true" - # BUUUUUT datalad of old want it to stay `None` - # BUUUUUUUUT it also want it to be reported as True later on - v = None + v = "true" # multi-value reporting present_v = dct.get(k) if present_v is None: diff --git a/datalad_next/config/gitenv.py b/datalad_next/config/gitenv.py index 1d091ae4..fb434474 100644 --- a/datalad_next/config/gitenv.py +++ b/datalad_next/config/gitenv.py @@ -60,7 +60,7 @@ def getall( default: Any = None, ) -> tuple[Setting, ...]: try: - val = get_gitconfig_items_from_env()[key] + val = get_gitconfig_items_from_env()[str(key)] except KeyError: return (self._get_default_setting(default),) vals = val if isinstance(val, tuple) else (val,) diff --git a/datalad_next/config/item.py b/datalad_next/config/item.py index f2ae5510..9d883cb8 100644 --- a/datalad_next/config/item.py +++ b/datalad_next/config/item.py @@ -14,7 +14,6 @@ from datasalad.settings import Source from datalad_next.config import ( - Dialog, dialog as dialog_collection, ) from datalad_next.constraints import Constraint @@ -60,7 +59,7 @@ def __init__( self._store_target = store_target @property - def dialog(self) -> Dialog | None: + def dialog(self) -> dialog_collection.Dialog | None: return self._dialog @property diff --git a/datalad_next/config/legacy.py b/datalad_next/config/legacy.py index 80693d67..fb66cdf5 100644 --- a/datalad_next/config/legacy.py +++ b/datalad_next/config/legacy.py @@ -59,6 +59,37 @@ def wrapper(*args, **kwargs): return wrapper +class LegacyOverridesProxy: + """Proxy class to wrap the legacy ConfigManager overrides + + There were handed out for direct manipulation of their holding + dict. This allowed for arbitrary modification. This class is + supposed to give us a fighting change to keep supporting this + interface, while being able to issue deprecation warnings + and continue to integrate with the new setup. + + For now this wraps the legacy-override source, but it could + eventually migrate to read from and write to the git-command + source. + """ + def __init__(self, overrides: InMemory): + self._ov = overrides + + def items(self): + for k, v in self._ov._items.items(): + yield k, v.value if not isinstance(v, tuple) \ + else (i.value for i in v) + + def update(self, other): + for k, v in other.items(): + self._ov._items[k] = self._ov.item_type(v) \ + if not isinstance(v, tuple) \ + else tuple(self._ov.item_type(i) for i in v) + + def copy(self): + return dict(self.items()) + + class ConfigManager: def __init__( self, @@ -76,8 +107,7 @@ def __init__( source=source, )) self._defaults = manager.sources['defaults'] - for src in self._mngr.sources.values(): - src.load() + self.reload() # TODO: make obsolete self._repo_dot_git = None @@ -93,10 +123,10 @@ def __init__( @property def overrides(self): - # this is a big hassle. the original class hands out the real dict to do any - # manipulation with it. for a transition we want to keep some control, and - # hand out a proxy only - return MappingProxyType(self._mngr.sources['legacy-overrides']._items) + # this is a big hassle. the original class hands out the real dict to + # do any manipulation with it. for a transition we want to keep some + # control, and hand out a proxy only + return LegacyOverridesProxy(self._mngr.sources['legacy-overrides']) @property def _stores(self): @@ -114,7 +144,10 @@ def _stores(self): return {'git': {'files': files}} def reload(self, force: bool = False) -> None: - for s in self._mngr.sources.values(): + for n, s in self._mngr.sources.items(): + if n in ('legacy-overrides', 'defaults'): + continue + s.reinit() s.load() def obtain(self, var, default=None, dialog_type=None, valtype=None, @@ -187,7 +220,7 @@ def _obtain_from_user( **kwargs, ): # now we need to try to obtain something from the user - from datalad.ui import ui + from datalad.ui import ui # type: ignore if (not ui.is_interactive or default_item.dialog is None) and default is None: raise RuntimeError( @@ -387,6 +420,8 @@ def unset(self, var, scope='branch', reload=True): src.load() def get_src(self, scope): + if scope is None: + scope = 'local' name = scope_label_to_source_label_map.get(scope) if name is None: raise ValueError(f'unknown scope {scope!r}') @@ -427,7 +462,7 @@ def get_sources( and 'branch-local'. """ nodataset_errmsg = ( - 'ConfigManager configured to read from a branch of a dataset only, ' + 'ConfigManager configured to read from (a branch of) a dataset, ' 'but no dataset given' ) # if applicable, we want to reuse the exact same source instances as the @@ -450,7 +485,7 @@ def get_sources( 'datalad-branch': DataladBranchConfig(dataset.pathobj), } if source == 'local': - if not dataset: + if dataset is None: return { 'legacy-environment': global_sources['legacy-environment'], 'legacy-overrides': ovsrc, @@ -465,6 +500,8 @@ def get_sources( 'git-system': global_sources['git-system'], } if source == 'branch-local': + if dataset is None: + raise ValueError(nodataset_errmsg) return { 'legacy-overrides': ovsrc, 'git-local': LocalGitConfig(dataset.pathobj), @@ -492,11 +529,6 @@ def get_sources( def anything2bool(val): - if val is None: - # TODO: just changes this behavior - # forced by a test in old core that forces _proc_dump_line - # to work this way - return True if val == '': return False if hasattr(val, 'lower'): @@ -568,4 +600,4 @@ def rewrite_url(cfg, url): # for convenience, bind to class too -ConfigManager.rewrite_url = rewrite_url +ConfigManager.rewrite_url = rewrite_url # type: ignore diff --git a/datalad_next/config/manager.py b/datalad_next/config/manager.py index ca94c14a..d84b17ed 100644 --- a/datalad_next/config/manager.py +++ b/datalad_next/config/manager.py @@ -1,4 +1,11 @@ -from datasalad.settings import Settings +from __future__ import annotations + +from contextlib import contextmanager +from typing import Generator +from datasalad.settings import ( + Settings, + UnsetValue, +) from datalad_next.config.default import ImplementationDefault from datalad_next.config.env import LegacyEnvironment @@ -20,3 +27,31 @@ def __init__(self, defaults: ImplementationDefault): 'git-system': SystemGitConfig(), 'defaults': defaults, }) + + @contextmanager + def overrides(self, overrides: dict) -> Generator[ConfigManager]: + """Context manager to temporarily set configuration overrides + + Internally, these overrides are posted to the 'git-command' scope, + hence affect the process environment and newly spawn subprocesses. + + .. todo:: + + ATM this implementation cannot handle multi-value settings. + Neither as incoming overrides, nor as to-be-replaced existing + items. + """ + gitcmdsrc = self.sources['git-command'] + restore = {} + # TODO: handle multivalue settings + for k, v in overrides.items(): + restore[k] = gitcmdsrc.get(k, gitcmdsrc.item_type(UnsetValue)) + gitcmdsrc[k] = gitcmdsrc.item_type(v) + try: + yield self + finally: + for k, v in restore.items(): + if v.pristine_value is UnsetValue: + del gitcmdsrc[k] + else: + gitcmdsrc[k] = v diff --git a/datalad_next/config/tests/test_core.py b/datalad_next/config/tests/test_core.py index 666d324e..a76bae76 100644 --- a/datalad_next/config/tests/test_core.py +++ b/datalad_next/config/tests/test_core.py @@ -145,13 +145,15 @@ def test_something(path=None, new_home=None): [(u'onemore.complicated の beast with.dot.findme', '5.0'), ('something.empty', ''), ('something.myint', '3'), - ('something.novalue', None), + # no-value is a shorthand for 'true' + ('something.novalue', 'true'), ('something.user', ('name=Jane Doe', 'email=jd@example.com'))]) assert_equal( sorted(cfg.items('something')), [('something.empty', ''), ('something.myint', '3'), - ('something.novalue', None), + # no-value is a shorthand for 'true' + ('something.novalue', 'true'), ('something.user', ('name=Jane Doe', 'email=jd@example.com'))]) # by default get last value only @@ -167,7 +169,8 @@ def test_something(path=None, new_home=None): assert_equal(cfg.getbool('something', 'myint'), True) # git demands a key without value at all to be used as a flag, thus True assert_equal(cfg.getbool('something', 'novalue'), True) - assert_equal(cfg.get('something.novalue'), None) + # no-value is a shorthand for 'true' + assert_equal(cfg.get('something.novalue'), 'true') # empty value is False assert_equal(cfg.getbool('something', 'empty'), False) assert_equal(cfg.get('something.empty'), '') diff --git a/datalad_next/config/utils.py b/datalad_next/config/utils.py index c605ded1..03b692fc 100644 --- a/datalad_next/config/utils.py +++ b/datalad_next/config/utils.py @@ -2,13 +2,12 @@ from os import environ from typing import ( - Dict, Mapping, Tuple, ) -def get_gitconfig_items_from_env() -> Mapping[str, str | Tuple[str, ...]]: +def get_gitconfig_items_from_env() -> dict[str, str | Tuple[str, ...]]: """Parse git-config ENV (``GIT_CONFIG_COUNT|KEY|VALUE``) and return as dict This implementation does not use ``git-config`` directly, but aims to @@ -29,7 +28,7 @@ def get_gitconfig_items_from_env() -> Mapping[str, str | Tuple[str, ...]]: times, the respective values are aggregated in reported as a tuple for that specific key. """ - items: Dict[str, str | Tuple[str, ...]] = {} + items: dict[str, str | Tuple[str, ...]] = {} for k, v in ((_get_gitconfig_var_from_env(i, 'key'), _get_gitconfig_var_from_env(i, 'value')) for i in range(_get_gitconfig_itemcount())): diff --git a/datalad_next/iter_collections/tests/test_iterannexworktree.py b/datalad_next/iter_collections/tests/test_iterannexworktree.py index 2a767a43..bdb99369 100644 --- a/datalad_next/iter_collections/tests/test_iterannexworktree.py +++ b/datalad_next/iter_collections/tests/test_iterannexworktree.py @@ -2,10 +2,9 @@ PurePath, ) -from datalad import cfg as dlcfg +from datalad_next.config import manager from datalad_next.datasets import Dataset -from datalad_next.utils import check_symlink_capability from ..gitworktree import ( GitTreeItemType, @@ -17,13 +16,9 @@ def _mkds(tmp_path_factory, monkeypatch, cfg_overrides): - with monkeypatch.context() as m: - for k, v in cfg_overrides.items(): - m.setitem(dlcfg.overrides, k, v) - dlcfg.reload() + with manager.overrides(cfg_overrides): ds = Dataset(tmp_path_factory.mktemp('ds')).create( result_renderer='disabled') - dlcfg.reload() return ds diff --git a/datalad_next/patches/replace_ora_remote.py b/datalad_next/patches/replace_ora_remote.py index 952d0845..9a2dc0f8 100644 --- a/datalad_next/patches/replace_ora_remote.py +++ b/datalad_next/patches/replace_ora_remote.py @@ -41,7 +41,6 @@ from datalad.config import anything2bool from datalad.customremotes import ( ProtocolError, - SpecialRemote, ) from datalad.distributed.ora_remote import ( HTTPRemoteIO, @@ -61,6 +60,7 @@ verify_ria_url, ) from datalad.utils import on_windows +from datalad_next.annexremotes import SpecialRemote from . import apply_patch @@ -229,7 +229,7 @@ def _load_local_cfg(self): # this will work, even when this is not a bare repo # but it is not capable of reading out dataset/branch config - self._repo = AnnexRepo(self.gitdir) + self._repo = AnnexRepo(self.repodir) cfg_map = {"ora-force-write": "force_write", "ora-ignore-ria-config": "ignore_remote_config", @@ -723,7 +723,7 @@ def push_io(self): def prepare(self): gitdir = self.annex.getgitdir() - self._repo = AnnexRepo(gitdir) + self._repo = AnnexRepo(self.repodir) self._verify_config() self.get_store()