From 18afe596ba2ef52219f0f62e83a6f815e7240133 Mon Sep 17 00:00:00 2001 From: Bernd Schuller Date: Thu, 18 Jul 2024 13:04:12 +0200 Subject: [PATCH] cli: copy w/ wildcards --- pyunicore/cli/io.py | 142 ++++++++++++++++++++++++++++--- pyunicore/cli/main.py | 11 +++ pyunicore/client.py | 3 + tests/integration/cli/test_io.py | 30 +++++++ tests/integration/test_basic.py | 3 + tests/unit/cli/test_io.py | 29 +++++++ 6 files changed, 204 insertions(+), 14 deletions(-) create mode 100644 tests/integration/cli/test_io.py create mode 100644 tests/unit/cli/test_io.py diff --git a/pyunicore/cli/io.py b/pyunicore/cli/io.py index 633ccd8..3ad9a4e 100644 --- a/pyunicore/cli/io.py +++ b/pyunicore/cli/io.py @@ -1,13 +1,38 @@ """ Storage-related commands """ +import fnmatch +import os +import pathlib import re +import sys +from os.path import basename from pyunicore.cli.base import Base from pyunicore.client import PathFile from pyunicore.client import Storage -class LS(Base): +class IOBase(Base): + + def get_group(self): + return "Data management" + + def parse_location(self, location: str): + m = re.match(r"(https://\S+/rest/core/storages/).*", location) + if m is not None: + base = m.group(1) + storage_id = re.match(r"https://\S+/rest/core/storages/(\S+).*", location).group(1) + tok = storage_id.split("/files") + storage_id = tok[0] + path = tok[1] if len(tok) > 1 else "/" + endpoint = base + storage_id + else: + endpoint = None + path = location + return endpoint, path + + +class LS(IOBase): def add_command_args(self): self.parser.prog = "unicore ls" self.parser.description = self.get_synopsis() @@ -26,18 +51,7 @@ def get_synopsis(self): def get_description(self): return "list directories" - def get_group(self): - return "Data management" - - def split_storage_url(self, url: str): - base = re.match(r"(https://\S+/rest/core/storages/).*", url).group(1) - storage_id = re.match(r"https://\S+/rest/core/storages/(\S+).*", url).group(1) - tok = storage_id.split("/files") - storage_id = tok[0] - path = tok[1] if len(tok) > 1 else "/" - return base + storage_id, path - - def _detailed(self, name, p): + def _detailed(self, name: str, p: dict): d = "d" if p["isDirectory"] is True else "-" print(f"{d}{p['permissions']} {p['size']} {p['lastAccessed']} {name}") @@ -50,7 +64,7 @@ def print_single(self, p: PathFile): def run(self, args): super().setup(args) for endpoint in self.args.remote_dirs: - storage_url, file_path = self.split_storage_url(endpoint) + storage_url, file_path = self.parse_location(endpoint) self.verbose(f"Listing: {file_path} on {storage_url}") storage = Storage(self.credential, storage_url=storage_url) p = storage.stat(file_path) @@ -63,3 +77,103 @@ def run(self, args): print(p) else: self.print_single(p) + + +class CP(IOBase): + def add_command_args(self): + self.parser.prog = "unicore cp" + self.parser.description = self.get_synopsis() + self.parser.add_argument("source", nargs="+", help="Source(s)") + self.parser.add_argument("target", help="Target") + + def get_synopsis(self): + return """Copy files from/to local or UNICORE storages""" + + def get_description(self): + return "copy files" + + def _download(self, source_endpoint, source_path, target_path): + storage = Storage(self.credential, storage_url=source_endpoint) + base_dir, file_pattern = split_path(source_path) + for fname in crawl_remote(storage, base_dir, file_pattern): + p = storage.stat(fname) + have_stdout = False + if target_path == "-": + have_stdout = True + target = os.fdopen(sys.stdout.fileno(), "wb", closefd=False) + elif os.path.isdir(target_path): + target = normalized(target_path + "/" + os.path.basename(fname)) + else: + target = target_path + self.verbose(f"... {source_endpoint}/files{fname} -> {target}") + p.download(target) + if have_stdout: + target.close() + + def _upload(self, source_path, target_endpoint, target_path): + storage = Storage(self.credential, storage_url=target_endpoint) + if target_path.endswith("/"): + target = normalized(target_path + os.path.basename(source_path)) + else: + target = normalized(target_path) + self.verbose(f"... {source_path} -> {target_endpoint}/files{target}") + storage.upload(source_path, destination=target) + + def run(self, args): + super().setup(args) + target_endpoint, target_path = self.parse_location(self.args.target) + for s in self.args.source: + source_endpoint, source_path = self.parse_location(s) + if len(self.args.source) > 1: + target = target_path + "/" + basename(source_path) + else: + target = target_path + if target.endswith("/"): + target = target + basename(source_path) + if target.endswith("."): + target = target + "/" + basename(source_path) + if source_endpoint is not None: + self._download(source_endpoint, source_path, target) + else: + self._upload(source_path, target_endpoint, target) + + +def normalized(path: str): + return pathlib.Path(path).as_posix() + + +def split_path(path: str): + pattern = os.path.basename(path) + base = os.path.dirname(path) + if len(base) == 0: + base = "/" + return base, pattern + + +def crawl_remote( + storage: Storage, + base_dir, + file_pattern="*", + recurse=False, + all=False, + files_only=True, + _level=0, +): + """returns matching paths""" + if not files_only and _level == 0: + # return top-level dir because Unix 'find' does it + bd = storage.stat(base_dir) + if bd.isdir(): + yield normalized(base_dir) + file_list = storage.contents(base_dir)["content"] + for fname in file_list: + x = file_list[fname] + if x["isDirectory"] is False or not files_only: + if not fnmatch.fnmatchcase(os.path.basename(fname), file_pattern): + continue + else: + yield fname + if x["isDirectory"] and (all or (recurse and fnmatch.fnmatch(fname, file_pattern))): + yield from crawl_remote( + storage, base_dir + "/" + fname, file_pattern, recurse, all, _level + 1 + ) diff --git a/pyunicore/cli/main.py b/pyunicore/cli/main.py index c5b410a..7613c23 100644 --- a/pyunicore/cli/main.py +++ b/pyunicore/cli/main.py @@ -9,6 +9,7 @@ _commands = { "cancel-job": pyunicore.cli.exec.CancelJob, + "cp": pyunicore.cli.io.CP, "exec": pyunicore.cli.exec.Exec, "issue-token": pyunicore.cli.base.IssueToken, "list-jobs": pyunicore.cli.exec.ListJobs, @@ -36,6 +37,7 @@ def help(): The following commands are available:""" % pyunicore._version.get_versions().get( "version", "n/a" ) + print(_header) print(s) for cmd in sorted(_commands): print(f" {cmd:20} - {get_command(cmd).get_description()}") @@ -63,6 +65,15 @@ def run(args): command.run(args[1:]) +_header = """ _ _ _ _ _____ _____ ____ _____ ______ +| | | | \\ | |_ _/ ____/ __ \\| __ \\| ____| +| | | | \\| | | || | | | | | |__) | |__ +| | | | . ` | | || | | | | | _ /| __| +| |__| | |\\ |_| |_ |____ |__| | | \\ \\| |____ + \\____/|_| \\_|_____\\_____\\____/|_| \\_\\______| +""" + + def main(): """ Main entry point diff --git a/pyunicore/client.py b/pyunicore/client.py index a790424..c68e0e7 100755 --- a/pyunicore/client.py +++ b/pyunicore/client.py @@ -884,6 +884,9 @@ def isfile(self): """is a file""" return False + def size(self): + return self.properties["size"] + def get_metadata(self, name=None): if name: return self.properties["metadata"][name] diff --git a/tests/integration/cli/test_io.py b/tests/integration/cli/test_io.py new file mode 100644 index 0000000..fa16725 --- /dev/null +++ b/tests/integration/cli/test_io.py @@ -0,0 +1,30 @@ +import unittest + +import pyunicore.cli.base as base +import pyunicore.cli.io as io +import pyunicore.client + + +class TestIO(unittest.TestCase): + + def test_crawl(self): + cmd = base.Base() + cmd.config_file = "tests/integration/cli/preferences" + cmd.load_user_properties() + ep = "https://localhost:8080/DEMO-SITE/rest/core/storages/HOME" + registry = cmd.create_registry() + self.assertTrue(len(registry.site_urls) > 0) + storage = pyunicore.client.Storage(registry.transport, ep) + for x in io.crawl_remote(storage, "/", "*"): + print(x) + + def test_ls(self): + cmd = io.LS() + config_file = "tests/integration/cli/preferences" + ep = "https://localhost:8080/DEMO-SITE/rest/core/storages/HOME" + args = ["-c", config_file, "-v", "--long", ep] + cmd.run(args) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/integration/test_basic.py b/tests/integration/test_basic.py index fff53f3..1124293 100644 --- a/tests/integration/test_basic.py +++ b/tests/integration/test_basic.py @@ -75,6 +75,9 @@ def test_alloc_and_run_date(self): try: print(allocation) allocation.wait_until_available() + if allocation.status != uc_client.JobStatus.RUNNING: + print("Skipping, allocation not available.") + return job_desc = {"Executable": "date"} job = allocation.new_job(job_desc) print(job) diff --git a/tests/unit/cli/test_io.py b/tests/unit/cli/test_io.py new file mode 100644 index 0000000..3f49ee1 --- /dev/null +++ b/tests/unit/cli/test_io.py @@ -0,0 +1,29 @@ +import unittest + +import pyunicore.cli.io as io + + +class TestIO(unittest.TestCase): + + def test_split_path(self): + tests = { + "/foo/*": ("/foo", "*"), + "/foo/test.txt": ("/foo", "test.txt"), + "test.txt": ("/", "test.txt"), + "/test.txt": ("/", "test.txt"), + "/foo/bar/test.txt": ("/foo/bar", "test.txt"), + } + for p in tests: + base, pattern = io.split_path(p) + self.assertEqual((base, pattern), tests[p]) + + def test_normalize(self): + tests = { + "/foo//bar": "/foo/bar", + } + for p in tests: + self.assertEqual(io.normalized(p), tests[p]) + + +if __name__ == "__main__": + unittest.main()