Skip to content

Commit

Permalink
Merge pull request #561 from mih/enh-zipfile
Browse files Browse the repository at this point in the history
Add `zipfile` as a supported collection type to `ls-file-collection`
  • Loading branch information
mih authored Dec 7, 2023
2 parents bd59fce + e8c9f0e commit bfe8e24
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 25 deletions.
13 changes: 10 additions & 3 deletions datalad_next/commands/ls_file_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@

from datalad_next.iter_collections.directory import iter_dir
from datalad_next.iter_collections.tarfile import iter_tar
from datalad_next.iter_collections.zipfile import iter_zip
from datalad_next.iter_collections.utils import (
FileSystemItemType,
compute_multihash_from_fp,
Expand All @@ -74,6 +75,7 @@
_supported_collection_types = (
'directory',
'tarfile',
'zipfile',
'gitworktree',
'annexworktree',
)
Expand Down Expand Up @@ -114,7 +116,7 @@ def get_collection_iter(self, **kwargs):
hash = kwargs['hash']
iter_fx = None
iter_kwargs = None
if type in ('directory', 'tarfile', 'gitworktree', 'annexworktree'):
if type in ('directory', 'tarfile', 'zipfile', 'gitworktree', 'annexworktree'):
if not isinstance(collection, Path):
self.raise_for(
kwargs,
Expand All @@ -132,6 +134,9 @@ def get_collection_iter(self, **kwargs):
elif type == 'tarfile':
iter_fx = iter_tar
item2res = fsitem_to_dict
elif type == 'zipfile':
iter_fx = iter_zip
item2res = fsitem_to_dict
elif type == 'gitworktree':
iter_fx = iter_gitworktree
item2res = gitworktreeitem_to_dict
Expand Down Expand Up @@ -364,7 +369,9 @@ def custom_result_renderer(res, **kwargs):
type = res.get('type', None)

# if there is no mode, produces '?---------'
mode = filemode(res.get('mode', 0))
# .. or 0 is needed, because some iterators report an explicit
# `None` mode
mode = filemode(res.get('mode', 0) or 0)

size = None
if type in ('file', 'hardlink'):
Expand All @@ -384,7 +391,7 @@ def custom_result_renderer(res, **kwargs):
# stick with numerical IDs (although less accessible), we cannot
# know in general whether this particular system can map numerical
# IDs to valid target names (think stored name in tarballs)
owner_info = f'{res["uid"]}:{res["gid"]}' if 'uid' in res else ''
owner_info = f'{res["uid"]}:{res["gid"]}' if res.get('uid') else ''

ui.message('{mode} {size: >6} {owner: >9} {hts: >11} {item} ({type})'.format(
mode=mode,
Expand Down
57 changes: 36 additions & 21 deletions datalad_next/commands/tests/test_ls_file_collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from datalad.api import ls_file_collection

from datalad_next.constraints.exceptions import CommandParametrizationError
# we need this fixture
from datalad_next.iter_collections.tests.test_iterzip import sample_zip
from datalad_next.tests.marker import skipif_no_network

from ..ls_file_collection import LsFileCollectionParamValidator
Expand All @@ -31,29 +33,42 @@ def test_ls_file_collection_insufficient_args():
ls_file_collection('bogus', 'http://example.com')


def _check_archive_member_result(r, collection):
# basics of a result
assert r['action'] == 'ls_file_collection'
assert r['status'] == 'ok'
# a collection identifier, here the tar location
assert 'collection' in r
assert r['collection'] == collection
# an item identifier, here a path of an archive member
assert 'item' in r
assert isinstance(r['item'], PurePath)
# item type info, here some filesystem-related category
assert 'type' in r
assert r['type'] in ('file', 'directory', 'symlink', 'hardlink')


def test_ls_file_collection_zipfile(sample_zip, no_result_rendering):
for res in (
ls_file_collection('zipfile', sample_zip),
ls_file_collection('zipfile', sample_zip, hash='md5'),
):
assert len(res) == 4
# test a few basic properties that should be true for any result
for r in res:
_check_archive_member_result(r, sample_zip)


@skipif_no_network
def test_ls_file_collection_tarfile(sample_tar_xz, no_result_rendering):
# smoke test first
res = ls_file_collection(
'tarfile',
sample_tar_xz,
hash='md5',
)
assert len(res) == 6
# test a few basic properties that should be true for any result
for r in res:
# basics of a result
assert r['action'] == 'ls_file_collection'
assert r['status'] == 'ok'
# a collection identifier, here the tar location
assert 'collection' in r
assert r['collection'] == sample_tar_xz
# an item identifier, here a path of an archive member
assert 'item' in r
assert isinstance(r['item'], PurePath)
# item type info, here some filesystem-related category
assert 'type' in r
assert r['type'] in ('file', 'directory', 'symlink', 'hardlink')
for res in (
ls_file_collection('tarfile', sample_tar_xz),
ls_file_collection('tarfile', sample_tar_xz, hash='md5'),
):
assert len(res) == 6
# test a few basic properties that should be true for any result
for r in res:
_check_archive_member_result(r, sample_tar_xz)


def test_ls_file_collection_directory(tmp_path, no_result_rendering):
Expand Down
3 changes: 2 additions & 1 deletion datalad_next/iter_collections/zipfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ class _ZipFileDirPath(PurePosixPath):
directory members to be identified with a trailing slash.
"""
def __str__(self) -> str:
return f'{super().__str__()}/'
super_str = super().__str__()
return super_str if super_str.endswith('/') else f'{super_str}/'

def __eq__(self, other):
if not isinstance(other, _ZipFileDirPath):
Expand Down

0 comments on commit bfe8e24

Please sign in to comment.