diff --git a/datalad_next/iter_collections/annexworktree.py b/datalad_next/iter_collections/annexworktree.py index e5cf284c8..07f4919a0 100644 --- a/datalad_next/iter_collections/annexworktree.py +++ b/datalad_next/iter_collections/annexworktree.py @@ -17,64 +17,18 @@ from typing import Generator from datalad.support.annexrepo import GeneratorAnnexJsonNoStderrProtocol -from datalad_next.runners import StdOutCaptureGeneratorProtocol from datalad_next.runners.batch import annexjson_batchcommand from datalad_next.runners.run import run from .gitworktree import ( GitWorktreeItem, GitWorktreeFileSystemItem, - _lsfiles_line2props, - _mode_type_map, - lsfiles_untracked_args, + iter_gitworktree, ) lgr = logging.getLogger('datalad.ext.next.iter_collections.annexworktree') -# The following protocol is part of the POC implementation. It allows a simple -# `with run()`-commandline that returns `GitWorkTreeItem`-objects or -# `GitWorkTreeFileSystemItem`-objects. In the real implementation we would -# probably use `iter_gitworktree()`. -class GitLsFilesProtocol(StdOutCaptureGeneratorProtocol): - def __init__(self, done_future=None, encoding=None) -> None: - from datalad_next.runners import LineSplitter - - StdOutCaptureGeneratorProtocol.__init__(self, done_future, encoding) - self.encoding = encoding or 'utf-8' - self.line_splitter = LineSplitter(separator='\x00', keep_ends=False) - - def send_line_result(self, line: str) -> None: - ipath, lsfiles_props = _lsfiles_line2props(line) - if not lsfiles_props: - self.send_result( - GitWorktreeItem( - name=ipath, - gittype=None, - gitsha=None, - ) - ) - else: - self.send_result( - GitWorktreeItem( - name=ipath, - gittype=_mode_type_map[lsfiles_props['mode']], - gitsha=lsfiles_props['gitsha'], - ) - ) - - def pipe_data_received(self, fd: int, data: bytes) -> None: - assert fd == 1 - for line in self.line_splitter.process(data.decode()): - self.send_line_result(line) - - def pipe_connection_lost(self, fd: int, exc: BaseException | None) -> None: - if fd == 1: - remaining_string = self.line_splitter.finish_processing() - if remaining_string: - self.send_line_result(remaining_string) - - # TODO Could be `StrEnum`, came with PY3.11 class AnnexTreeItemType(Enum): """Enumeration of item types of Git trees @@ -149,9 +103,6 @@ def iter_annexworktree( # we cannot use git-annex-contentlocation, because it only reports on # present annex objects, and here we also need to report on would-be # locations - git_ls_files_cmd = ['git', 'ls-files', '-z', '--stage', '--cached'] - if untracked: - git_ls_files_cmd.extend(lsfiles_untracked_args[untracked]) git_annex_find_cmd = [ 'git', 'annex', 'find', '--include=*', '--json', '--json-error-messages', '.' @@ -161,9 +112,9 @@ def iter_annexworktree( glf_store = dict() with \ run(git_annex_find_cmd, protocol_class=GeneratorAnnexJsonNoStderrProtocol, **common_args) as git_annex_find, \ - run(git_ls_files_cmd, protocol_class=GitLsFilesProtocol, **common_args) as git_ls_files, \ annexjson_batchcommand(['git', 'annex', 'examinekey', '--json', '--batch'], **common_args) as examine_key: + git_ls_files = iter_gitworktree(path=path, untracked=untracked) for gaf_item, glf_item in zip_longest(git_annex_find, git_ls_files): if gaf_item: gaf_store[PurePath(PurePosixPath(gaf_item['file']))] = gaf_item @@ -187,7 +138,7 @@ def iter_annexworktree( del glf_store[path] del gaf_store[path] - # Remaining git ls-files results are al unannexed, yield them + # Remaining git ls-files results are all unannexed, yield them assert len(gaf_store) == 0 for path, glf_item in glf_store.items(): yield AnnexWorktreeItem(