diff --git a/datalad_next/iter_collections/annexworktree.py b/datalad_next/iter_collections/annexworktree.py index 58402407a..4763cc6ea 100644 --- a/datalad_next/iter_collections/annexworktree.py +++ b/datalad_next/iter_collections/annexworktree.py @@ -117,8 +117,7 @@ def iter_annexworktree( # "path"-property (the "path"-property is `item.name` if `item` is a result # of `git_ls_files`, and `item['file']`, if item is a result of # `git_annex_find`). - gaf_store = dict() - glf_store = dict() + glf_store = [] with \ run(git_annex_find_cmd, protocol_class=GeneratorAnnexJsonNoStderrProtocol, **common_args) as git_annex_find, \ @@ -131,39 +130,35 @@ def iter_annexworktree( # files is a subset of the files in git, i.e. the `git_annex_find` # generator yields less or equal results then the `git_ls_files` # generator. - for gaf_item, glf_item in zip_longest(git_annex_find, git_ls_files): - # Store both results (if they exist) + gaf_store = { + PurePath(PurePosixPath(gaf_item['file'])): gaf_item + for gaf_item in git_annex_find + } + lookup_list = [] + for glf_item in git_ls_files: + gaf_item = gaf_store.get(glf_item.name) if gaf_item: - gaf_store[PurePath(PurePosixPath(gaf_item['file']))] = gaf_item - glf_store[glf_item.name] = glf_item - - # Check the "path"-properties of all `git_ls_files`-items and - # check for a matching path in `git_annex_find`-items. If a - # matching pair exists, yield a result for an annexed file and - # mark the pair for deletion. - remove = [] - for path, glf_item in glf_store.items(): - gaf_item = gaf_store.get(path) - if gaf_item: - remove.append(path) - key_properties = examine_key(gaf_item['key'].encode() + b'\n') - yield AnnexWorktreeItem( - name=glf_item.name, - gitsha=glf_item.gitsha, - gittype=glf_item.gittype, - annexkey=gaf_item['key'], - annexsize=int(gaf_item['bytesize']), - annexobjpath=PurePath(key_properties['objectpath']), - ) - - # Delete marked pairs from both item-stores. - for path in remove: - del glf_store[path] - del gaf_store[path] + lookup_list.append((glf_item.name, glf_item, gaf_item)) + examine_key._stdin_queue.put(gaf_item['key'].encode() + b'\n') + else: + glf_store.append(glf_item) + + for (path, glf_item, gaf_item) in lookup_list: + key_properties = next(examine_key._rgen) + yield AnnexWorktreeItem( + name=glf_item.name, + gitsha=glf_item.gitsha, + gittype=glf_item.gittype, + annexkey=gaf_item['key'], + annexsize=int(gaf_item['bytesize']), + annexobjpath=PurePath(key_properties['objectpath']), + ) + + del gaf_store[path] # Remaining git ls-files results are all unannexed, yield them. assert len(gaf_store) == 0 - for path, glf_item in glf_store.items(): + for glf_item in glf_store: yield AnnexWorktreeItem( name=glf_item.name, gitsha=glf_item.gitsha,