Skip to content

Commit

Permalink
use iter_gitworktree to determine git-files
Browse files Browse the repository at this point in the history
This commit uses the "real" iter_gitworktree
iterator instead a run-context that executes
`git ls-files`
  • Loading branch information
christian-monch committed Oct 28, 2023
1 parent 892243c commit 2719e2e
Showing 1 changed file with 2 additions and 51 deletions.
53 changes: 2 additions & 51 deletions datalad_next/iter_collections/annexworktree.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,64 +17,18 @@
from typing import Generator

from datalad.support.annexrepo import GeneratorAnnexJsonNoStderrProtocol
from datalad_next.runners import StdOutCaptureGeneratorProtocol
from datalad_next.runners.batch import annexjson_batchcommand
from datalad_next.runners.run import run

from .gitworktree import (
GitWorktreeItem,
GitWorktreeFileSystemItem,
_lsfiles_line2props,
_mode_type_map,
lsfiles_untracked_args,
iter_gitworktree,
)

lgr = logging.getLogger('datalad.ext.next.iter_collections.annexworktree')


# The following protocol is part of the POC implementation. It allows a simple
# `with run()`-commandline that returns `GitWorkTreeItem`-objects or
# `GitWorkTreeFileSystemItem`-objects. In the real implementation we would
# probably use `iter_gitworktree()`.
class GitLsFilesProtocol(StdOutCaptureGeneratorProtocol):
def __init__(self, done_future=None, encoding=None) -> None:
from datalad_next.runners import LineSplitter

StdOutCaptureGeneratorProtocol.__init__(self, done_future, encoding)
self.encoding = encoding or 'utf-8'
self.line_splitter = LineSplitter(separator='\x00', keep_ends=False)

def send_line_result(self, line: str) -> None:
ipath, lsfiles_props = _lsfiles_line2props(line)
if not lsfiles_props:
self.send_result(
GitWorktreeItem(
name=ipath,
gittype=None,
gitsha=None,
)
)
else:
self.send_result(
GitWorktreeItem(
name=ipath,
gittype=_mode_type_map[lsfiles_props['mode']],
gitsha=lsfiles_props['gitsha'],
)
)

def pipe_data_received(self, fd: int, data: bytes) -> None:
assert fd == 1
for line in self.line_splitter.process(data.decode()):
self.send_line_result(line)

def pipe_connection_lost(self, fd: int, exc: BaseException | None) -> None:
if fd == 1:
remaining_string = self.line_splitter.finish_processing()
if remaining_string:
self.send_line_result(remaining_string)


# TODO Could be `StrEnum`, came with PY3.11
class AnnexTreeItemType(Enum):
"""Enumeration of item types of Git trees
Expand Down Expand Up @@ -149,9 +103,6 @@ def iter_annexworktree(
# we cannot use git-annex-contentlocation, because it only reports on
# present annex objects, and here we also need to report on would-be
# locations
git_ls_files_cmd = ['git', 'ls-files', '-z', '--stage', '--cached']
if untracked:
git_ls_files_cmd.extend(lsfiles_untracked_args[untracked])
git_annex_find_cmd = [
'git', 'annex', 'find', '--include=*',
'--json', '--json-error-messages', '.'
Expand All @@ -161,9 +112,9 @@ def iter_annexworktree(
glf_store = dict()
with \
run(git_annex_find_cmd, protocol_class=GeneratorAnnexJsonNoStderrProtocol, **common_args) as git_annex_find, \
run(git_ls_files_cmd, protocol_class=GitLsFilesProtocol, **common_args) as git_ls_files, \
annexjson_batchcommand(['git', 'annex', 'examinekey', '--json', '--batch'], **common_args) as examine_key:

git_ls_files = iter_gitworktree(path=path, untracked=untracked)
for gaf_item, glf_item in zip_longest(git_annex_find, git_ls_files):
if gaf_item:
gaf_store[PurePath(PurePosixPath(gaf_item['file']))] = gaf_item
Expand Down

0 comments on commit 2719e2e

Please sign in to comment.