From 2719e2ede9696554e9a72d779c7a287b31eb8e23 Mon Sep 17 00:00:00 2001 From: Christian Monch Date: Sat, 28 Oct 2023 21:13:23 +0200 Subject: [PATCH] use iter_gitworktree to determine git-files This commit uses the "real" iter_gitworktree iterator instead a run-context that executes `git ls-files` --- .../iter_collections/annexworktree.py | 53 +------------------ 1 file changed, 2 insertions(+), 51 deletions(-) diff --git a/datalad_next/iter_collections/annexworktree.py b/datalad_next/iter_collections/annexworktree.py index e5cf284c8..05eb5cb92 100644 --- a/datalad_next/iter_collections/annexworktree.py +++ b/datalad_next/iter_collections/annexworktree.py @@ -17,64 +17,18 @@ from typing import Generator from datalad.support.annexrepo import GeneratorAnnexJsonNoStderrProtocol -from datalad_next.runners import StdOutCaptureGeneratorProtocol from datalad_next.runners.batch import annexjson_batchcommand from datalad_next.runners.run import run from .gitworktree import ( GitWorktreeItem, GitWorktreeFileSystemItem, - _lsfiles_line2props, - _mode_type_map, - lsfiles_untracked_args, + iter_gitworktree, ) lgr = logging.getLogger('datalad.ext.next.iter_collections.annexworktree') -# The following protocol is part of the POC implementation. It allows a simple -# `with run()`-commandline that returns `GitWorkTreeItem`-objects or -# `GitWorkTreeFileSystemItem`-objects. In the real implementation we would -# probably use `iter_gitworktree()`. -class GitLsFilesProtocol(StdOutCaptureGeneratorProtocol): - def __init__(self, done_future=None, encoding=None) -> None: - from datalad_next.runners import LineSplitter - - StdOutCaptureGeneratorProtocol.__init__(self, done_future, encoding) - self.encoding = encoding or 'utf-8' - self.line_splitter = LineSplitter(separator='\x00', keep_ends=False) - - def send_line_result(self, line: str) -> None: - ipath, lsfiles_props = _lsfiles_line2props(line) - if not lsfiles_props: - self.send_result( - GitWorktreeItem( - name=ipath, - gittype=None, - gitsha=None, - ) - ) - else: - self.send_result( - GitWorktreeItem( - name=ipath, - gittype=_mode_type_map[lsfiles_props['mode']], - gitsha=lsfiles_props['gitsha'], - ) - ) - - def pipe_data_received(self, fd: int, data: bytes) -> None: - assert fd == 1 - for line in self.line_splitter.process(data.decode()): - self.send_line_result(line) - - def pipe_connection_lost(self, fd: int, exc: BaseException | None) -> None: - if fd == 1: - remaining_string = self.line_splitter.finish_processing() - if remaining_string: - self.send_line_result(remaining_string) - - # TODO Could be `StrEnum`, came with PY3.11 class AnnexTreeItemType(Enum): """Enumeration of item types of Git trees @@ -149,9 +103,6 @@ def iter_annexworktree( # we cannot use git-annex-contentlocation, because it only reports on # present annex objects, and here we also need to report on would-be # locations - git_ls_files_cmd = ['git', 'ls-files', '-z', '--stage', '--cached'] - if untracked: - git_ls_files_cmd.extend(lsfiles_untracked_args[untracked]) git_annex_find_cmd = [ 'git', 'annex', 'find', '--include=*', '--json', '--json-error-messages', '.' @@ -161,9 +112,9 @@ def iter_annexworktree( glf_store = dict() with \ run(git_annex_find_cmd, protocol_class=GeneratorAnnexJsonNoStderrProtocol, **common_args) as git_annex_find, \ - run(git_ls_files_cmd, protocol_class=GitLsFilesProtocol, **common_args) as git_ls_files, \ annexjson_batchcommand(['git', 'annex', 'examinekey', '--json', '--batch'], **common_args) as examine_key: + git_ls_files = iter_gitworktree(path=path, untracked=untracked) for gaf_item, glf_item in zip_longest(git_annex_find, git_ls_files): if gaf_item: gaf_store[PurePath(PurePosixPath(gaf_item['file']))] = gaf_item