From f9a828e0860a63f2913efc1d159fa25e915abc4a Mon Sep 17 00:00:00 2001 From: pavlemarinkovic Date: Wed, 11 Dec 2024 13:23:05 +0100 Subject: [PATCH] Support for file ids in sample sheets Default execution mode is multi-instance Update wrabbit version to support images in markdown --- .gitignore | 2 + requirements.txt | 5 +- sbpack/noncwl/Readme.md | 8 +- sbpack/noncwl/manifest.py | 185 ++++++++++++++++++++++++-------------- sbpack/noncwl/nextflow.py | 80 ++++++++++------- 5 files changed, 172 insertions(+), 108 deletions(-) diff --git a/.gitignore b/.gitignore index 220202f..2b0fb4e 100755 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ /sbpack.egg-info/ /.idea/ /venv/ +/.nextflow/ +/.pytest_cache/ \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 5f98f31..230fddf 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ ruamel.yaml >= 0.16 sevenbridges-python >= 2.0 nf-core==2.1 -wrabbit==0.2.4 -cwlformat -packaging \ No newline at end of file +wrabbit==0.3.0 +pillow >= 11.0.0 \ No newline at end of file diff --git a/sbpack/noncwl/Readme.md b/sbpack/noncwl/Readme.md index 78bd668..2999112 100755 --- a/sbpack/noncwl/Readme.md +++ b/sbpack/noncwl/Readme.md @@ -220,7 +220,7 @@ Given the contents of this sample sheet is: Remapped file will be: -| sample | fastq_1 | fastq_2 | strandedness | -|:--------|:------------------------------------------------------------------|:------------------------------------------------------------------|:-------------| -| SAMPLE1 | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_2.fastq.gz | reverse | -| SAMPLE2 | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_2.fastq.gz | reverse | +| sample | fastq_1 | fastq_2 | strandedness | +|:--------|:-----------------------------------------------------------------|:------------------------------------------------------------------|:-------------| +| SAMPLE1 | vs://Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_2.fastq.gz | reverse | +| SAMPLE2 | vs://Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_2.fastq.gz | reverse | diff --git a/sbpack/noncwl/manifest.py b/sbpack/noncwl/manifest.py index 354d32a..f2b28ba 100755 --- a/sbpack/noncwl/manifest.py +++ b/sbpack/noncwl/manifest.py @@ -1,10 +1,12 @@ from sevenbridges.models.project import Project from sevenbridges import Api +from sevenbridges.errors import NotFound, Forbidden import logging import sbpack.lib as lib import argparse import os +import re logger = logging.getLogger(__name__) @@ -14,10 +16,11 @@ def paths_to_check(file_name: str) -> list: """ :param file_name: Contents of a single manifest file cell that contains - path(s) to files. + path(s) to files. Can be multiple files if separated with ";". + :return: Files that need to be checked """ chk = [] - rtrn = [] + to_check = [] if ";" in file_name: # This should handle the case when there are multiple files in the @@ -29,18 +32,18 @@ def paths_to_check(file_name: str) -> list: chk.append(file_name) for file_name in chk: - if ":" in file_name: + if "://" in file_name: # If a file is in cloud storage, skip it continue file_name = file_name.strip('/') - rtrn.append(file_name) + to_check.append(file_name) cur_path = file_name while os.path.dirname(cur_path): cur_path = os.path.dirname(cur_path) - rtrn.append(cur_path) + to_check.append(cur_path) - return rtrn + return to_check def get_path_from_id(api: Api, file: str) -> str: @@ -48,21 +51,22 @@ def get_path_from_id(api: Api, file: str) -> str: Extracts the full path of a file from ID :param api: Initialized SevenBridges API :param file: id of a file - :return: Path to the File + :return: Path to the File on vs:// """ file = api.files.get(file) temp = file full_path = [file.name] - project_root = api.projects.get(file.project) + project = api.projects.get(file.project) + project_root = api.files.get(project.root_folder) project_root_name = api.files.get(project_root).name - while temp.parent != project_root: + while temp.parent != project_root.id: temp = api.files.get(temp.parent) full_path.append(temp.name) full_path.append(project_root_name) - return "vs:///Projects/" + "/".join(full_path[::-1]) + return "vs://Projects/" + "/".join(full_path[::-1]) def get_path_from_name(api: Api, file_name: str, project: Project) -> str: @@ -71,7 +75,7 @@ def get_path_from_name(api: Api, file_name: str, project: Project) -> str: :param api: Initialized SevenBridges API :param file_name: Name of the file :param project: SevenBridges Project - :return: + :return: Path to the File on vs:// """ file = api.files.query(project=project, names=[file_name]) @@ -83,25 +87,52 @@ def get_path_from_name(api: Api, file_name: str, project: Project) -> str: ) -def remap_cell(project_root: str, path: str) -> str: +def try_to_get_file(api, id_): + """ + Tries to get a file through the SevenBridges API + :param api: SevenBridges API + :param id_: File ID on the SevenBridges Platform + :return: File object if found, else None + """ + try: + return api.files.get(id_) + except NotFound: + return None + except Forbidden: + return None + except Exception as e: + return None + + +def remap_cell(api, project_root: str, path: str) -> str: """ Remaps a file path to the 'vs:' file system. Supports multiple files separated with ';'. + :param api: SebenBridges API :param project_root: Name of the project root directory. :param path: File path. - :return: File path(s) prefixed with 'vs:///Projects/' and project_root. + :return: File path(s) prefixed with 'vs://Projects/' and project_root. """ # prefix it with the project root if ";" in path: - return ";".join([remap_cell(project_root, f) for f in path.split(";")]) + return ";".join([ + remap_cell(api, project_root, f) for f in path.split(";")]) if path and ":" not in path: while path.startswith('/'): path = path[1:] if path: - return f"vs:///Projects/{project_root}/{path}" + remapped_path = None + if re.match(r'[a-f0-9]{24}', path): + # file ids are MongoDB Object IDs + remapped_path = try_to_get_file(api, path) + + if remapped_path: + return remapped_path + else: + return f"vs://Projects/{project_root}/{path}" else: return path @@ -181,12 +212,22 @@ def validate_sheet( if os.path.dirname(path): parent = checked[os.path.dirname(path)] - file = api.files.query( - names=[basename], - project=project if not parent else None, - parent=parent) + file = None + if re.match(r'[a-f0-9]{24}', path): + # file ids are MongoDB Object IDs + file = try_to_get_file(api, path) + + if file is None: + file = api.files.query( + names=[basename], + project=project if not parent else None, + parent=parent + ) + if file: + file = file[0] + if file: - checked[path] = file[0] + checked[path] = file else: raise FileExistsError( f"File <{path}> does not exist within " @@ -194,6 +235,7 @@ def validate_sheet( def remap( + api, project_root: str, path_to_file: str, remap_columns: list, @@ -209,6 +251,7 @@ def remap( The function assumes that the first row is always the header. + :param api: SevenBridges API :param project_root: Name of the project root directory. :param path_to_file: Path to the manifest file. :param remap_columns: Names of manifest file columns that contain paths to @@ -245,67 +288,21 @@ def remap( if line: line = line.strip('\n').split(split_char) for i in indices: - line[i] = remap_cell(project_root, line[i]) + line[i] = remap_cell(api, project_root, line[i]) line = split_char.join(line) sheet.append(line) return "\n".join(sheet) -def main(): - # CLI parameters - parser = argparse.ArgumentParser() - parser.add_argument( - "--profile", required=False, - default="default", type=str, - help="SB platform profile as set in the SB API credentials file.", - ) - parser.add_argument( - "--projectid", required=True, - type=str, - help="Takes the form {user or division}/{project}.", - ) - parser.add_argument( - "--sample-sheet", required=True, - type=str, - help="Path to the sample sheet." - ) - parser.add_argument( - "--columns", required=True, - metavar='string', nargs='+', type=str, - help="Specify columns that contain paths to files on the platform" - "as a list of strings separated by spaces.", - ) - parser.add_argument( - "--output", '-o', required=False, - type=str, - help="Name of the output file.", - ) - parser.add_argument( - "--upload", action='store_true', required=False, - help="Upload the file to the project after making it.", - ) - parser.add_argument( - "--tags", required=False, - metavar='string', nargs='+', type=str, - help="Specify tags that you want the sample sheet to have on the " - "platform, after it is uploaded.", - ) - parser.add_argument( - "--validate", action='store_true', required=False, - help="Validate if each file exists on target project location.", - ) - - args = parser.parse_args() - +def make_manifest(api, args): project = args.projectid - api = lib.get_profile(args.profile) - project = api.projects.get(project) project_root = api.files.get(project.root_folder).name logger.info('Remapping manifest files.') sheet = remap( + api, project_root, args.sample_sheet, args.columns @@ -366,5 +363,55 @@ def main(): file.save() +def main(): + # CLI parameters + parser = argparse.ArgumentParser() + parser.add_argument( + "--profile", required=False, + default="default", type=str, + help="SB platform profile as set in the SB API credentials file.", + ) + parser.add_argument( + "--projectid", required=True, + type=str, + help="Takes the form {user or division}/{project}.", + ) + parser.add_argument( + "--sample-sheet", required=True, + type=str, + help="Path to the sample sheet." + ) + parser.add_argument( + "--columns", required=True, + metavar='string', nargs='+', type=str, + help="Specify columns that contain paths to files on the platform" + "as a list of strings separated by spaces.", + ) + parser.add_argument( + "--output", '-o', required=False, + type=str, + help="Name of the output file.", + ) + parser.add_argument( + "--upload", action='store_true', required=False, + help="Upload the file to the project after making it.", + ) + parser.add_argument( + "--tags", required=False, + metavar='string', nargs='+', type=str, + help="Specify tags that you want the sample sheet to have on the " + "platform, after it is uploaded.", + ) + parser.add_argument( + "--validate", action='store_true', required=False, + help="Validate if each file exists on target project location.", + ) + + args = parser.parse_args() + + api = lib.get_profile(args.profile) + make_manifest(api, args) + + if __name__ == "__main__": main() diff --git a/sbpack/noncwl/nextflow.py b/sbpack/noncwl/nextflow.py index 046c984..8075592 100755 --- a/sbpack/noncwl/nextflow.py +++ b/sbpack/noncwl/nextflow.py @@ -5,8 +5,6 @@ import sbpack.lib as lib -from wrabbit.parser.nextflow import NextflowParser - from nf_core.schema import PipelineSchema from sbpack.version import __version__ @@ -22,7 +20,6 @@ ) from wrabbit.parser.utils import ( - get_readme, get_latest_sb_schema, get_sample_sheet_schema, ) @@ -34,6 +31,10 @@ SB_SCHEMA_DEFAULT_NAME, ) +from wrabbit.parser.nextflow import ( + NextflowParser +) + logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) @@ -51,8 +52,11 @@ def nf_schema_build(self): if self.nf_schema_path: return + base_dir = os.path.join( + self.workflow_path, os.path.dirname(self.entrypoint) + ) nf_schema_path = os.path.join( - self.workflow_path, + base_dir, NF_SCHEMA_DEFAULT_NAME, ) @@ -62,7 +66,7 @@ def nf_schema_build(self): self.nf_ps.schema_filename = nf_schema_path # if not os.path.exists(nf_schema_path): self.nf_ps.build_schema( - pipeline_dir=self.workflow_path, + pipeline_dir=base_dir, no_prompts=True, web_only=False, url='', @@ -126,7 +130,7 @@ def main(): ) parser.add_argument( "--execution-mode", type=ExecMode, choices=list(ExecMode), - required=False, default=None, + required=False, default=ExecMode.multi, help="Execution mode for your application. Can be multi-instance or " "single-instance", ) @@ -160,7 +164,9 @@ def main(): parser.add_argument( "--sample-sheet-schema", required=False, default=None, type=str, - help="Path to the sample sheet schema yaml. The sample sheet schema " + help="This options is deprecated. Please use sbmanifest to generate " + "valid sample sheets for the SevenBridges powered platforms.\n" + "Path to the sample sheet schema yaml. The sample sheet schema " "should contain the following keys: 'sample_sheet_input', " "'sample_sheet_name', 'header', 'rows', 'defaults', 'group_by', " "'format_'" @@ -183,6 +189,7 @@ def main(): f"Uploaded using sbpack v{__version__}" sample_sheet_schema = args.sample_sheet_schema or None label = args.app_name or None + readme_path = args.sb_doc or None dump_sb_app = args.dump_sb_app or False sb_package_id = args.sb_package_id or None workflow_path = args.workflow_path or None @@ -203,38 +210,46 @@ def main(): "--dump-sb-app and/or --auto are not used" ) - if sb_schema and execution_mode: - logger.warning( - "Using --sb-schema option overwrites --execution-mode" - ) + if git_url and not label: + label = os.path.basename(git_url) + if branch: + label += f" {branch}" - if sb_schema and label: - logger.warning( - "Using --sb-schema option overwrites --app-name" - ) + if sb_schema: + if execution_mode: + logger.warning( + "Using --sb-schema option overwrites --execution-mode." + ) - if sb_schema and executor_version: - logger.warning( - "Using --sb-schema option overwrites --executor-version" - ) + if label: + logger.warning( + "Using --sb-schema option overwrites --app-name." + ) - if sb_schema and entrypoint: - logger.warning( - "Using --sb-schema option overwrites --entrypoint" - ) + if executor_version: + logger.warning( + "Using --sb-schema option overwrites --executor-version." + ) + + if entrypoint: + logger.warning( + "Using --sb-schema option overwrites --entrypoint." + ) + + if readme_path: + logger.warning( + "Using --sb-schema option overwrites --sb-doc." + ) + + if revision_note: + logger.warning( + "Using --sb-schema option overwrites --revision-note." + ) if git_url: cleanup_workflow_path = True workflow_path = get_git_repo(git_url, branch) - sb_doc = None - if args.sb_doc: - with open(args.sb_doc, 'r') as f: - sb_doc = f.read() - elif get_readme(workflow_path): - with open(get_readme(workflow_path), 'r') as f: - sb_doc = f.read() - if args.auto: # This is where the magic happens if not sb_schema: @@ -264,11 +279,12 @@ def main(): nf_wrapper = SBNextflowWrapper( workflow_path=workflow_path, - sb_doc=sb_doc, + readme_path=readme_path, label=label, entrypoint=entrypoint, executor_version=executor_version, sb_package_id=sb_package_id, + search_subfolders=True, ) if sb_schema: