From f9a828e0860a63f2913efc1d159fa25e915abc4a Mon Sep 17 00:00:00 2001
From: pavlemarinkovic <pavle.marinkovic@sbgenomics.com>
Date: Wed, 11 Dec 2024 13:23:05 +0100
Subject: [PATCH 1/2] Support for file ids in sample sheets Default execution
 mode is multi-instance Update wrabbit version to support images in markdown

---
 .gitignore                |   2 +
 requirements.txt          |   5 +-
 sbpack/noncwl/Readme.md   |   8 +-
 sbpack/noncwl/manifest.py | 185 ++++++++++++++++++++++++--------------
 sbpack/noncwl/nextflow.py |  80 ++++++++++-------
 5 files changed, 172 insertions(+), 108 deletions(-)

diff --git a/.gitignore b/.gitignore
index 220202f..2b0fb4e 100755
--- a/.gitignore
+++ b/.gitignore
@@ -3,3 +3,5 @@
 /sbpack.egg-info/
 /.idea/
 /venv/
+/.nextflow/
+/.pytest_cache/
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 5f98f31..230fddf 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
 ruamel.yaml >= 0.16
 sevenbridges-python >= 2.0
 nf-core==2.1
-wrabbit==0.2.4
-cwlformat
-packaging
\ No newline at end of file
+wrabbit==0.3.0
+pillow >= 11.0.0
\ No newline at end of file
diff --git a/sbpack/noncwl/Readme.md b/sbpack/noncwl/Readme.md
index 78bd668..2999112 100755
--- a/sbpack/noncwl/Readme.md
+++ b/sbpack/noncwl/Readme.md
@@ -220,7 +220,7 @@ Given the contents of this sample sheet is:
 
 Remapped file will be:
 
-| sample  | fastq_1                                                           | fastq_2                                                           | strandedness |
-|:--------|:------------------------------------------------------------------|:------------------------------------------------------------------|:-------------|
-| SAMPLE1 | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_2.fastq.gz | reverse      |
-| SAMPLE2 | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_2.fastq.gz | reverse      |
+| sample  | fastq_1                                                          | fastq_2                                                           | strandedness |
+|:--------|:-----------------------------------------------------------------|:------------------------------------------------------------------|:-------------|
+| SAMPLE1 | vs://Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_2.fastq.gz | reverse      |
+| SAMPLE2 | vs://Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_2.fastq.gz | reverse      |
diff --git a/sbpack/noncwl/manifest.py b/sbpack/noncwl/manifest.py
index 354d32a..f2b28ba 100755
--- a/sbpack/noncwl/manifest.py
+++ b/sbpack/noncwl/manifest.py
@@ -1,10 +1,12 @@
 from sevenbridges.models.project import Project
 from sevenbridges import Api
+from sevenbridges.errors import NotFound, Forbidden
 
 import logging
 import sbpack.lib as lib
 import argparse
 import os
+import re
 
 
 logger = logging.getLogger(__name__)
@@ -14,10 +16,11 @@
 def paths_to_check(file_name: str) -> list:
     """
     :param file_name: Contents of a single manifest file cell that contains
-    path(s) to files.
+    path(s) to files. Can be multiple files if separated with ";".
+    :return: Files that need to be checked
     """
     chk = []
-    rtrn = []
+    to_check = []
 
     if ";" in file_name:
         # This should handle the case when there are multiple files in the
@@ -29,18 +32,18 @@ def paths_to_check(file_name: str) -> list:
         chk.append(file_name)
 
     for file_name in chk:
-        if ":" in file_name:
+        if "://" in file_name:
             # If a file is in cloud storage, skip it
             continue
 
         file_name = file_name.strip('/')
-        rtrn.append(file_name)
+        to_check.append(file_name)
         cur_path = file_name
         while os.path.dirname(cur_path):
             cur_path = os.path.dirname(cur_path)
-            rtrn.append(cur_path)
+            to_check.append(cur_path)
 
-    return rtrn
+    return to_check
 
 
 def get_path_from_id(api: Api, file: str) -> str:
@@ -48,21 +51,22 @@ def get_path_from_id(api: Api, file: str) -> str:
     Extracts the full path of a file from ID
     :param api: Initialized SevenBridges API
     :param file: id of a file
-    :return: Path to the File
+    :return: Path to the File on vs://
     """
     file = api.files.get(file)
     temp = file
     full_path = [file.name]
 
-    project_root = api.projects.get(file.project)
+    project = api.projects.get(file.project)
+    project_root = api.files.get(project.root_folder)
     project_root_name = api.files.get(project_root).name
 
-    while temp.parent != project_root:
+    while temp.parent != project_root.id:
         temp = api.files.get(temp.parent)
         full_path.append(temp.name)
 
     full_path.append(project_root_name)
-    return "vs:///Projects/" + "/".join(full_path[::-1])
+    return "vs://Projects/" + "/".join(full_path[::-1])
 
 
 def get_path_from_name(api: Api, file_name: str, project: Project) -> str:
@@ -71,7 +75,7 @@ def get_path_from_name(api: Api, file_name: str, project: Project) -> str:
     :param api: Initialized SevenBridges API
     :param file_name: Name of the file
     :param project: SevenBridges Project
-    :return:
+    :return: Path to the File on vs://
     """
 
     file = api.files.query(project=project, names=[file_name])
@@ -83,25 +87,52 @@ def get_path_from_name(api: Api, file_name: str, project: Project) -> str:
         )
 
 
-def remap_cell(project_root: str, path: str) -> str:
+def try_to_get_file(api, id_):
+    """
+    Tries to get a file through the SevenBridges API
+    :param api: SevenBridges API
+    :param id_: File ID on the SevenBridges Platform
+    :return: File object if found, else None
+    """
+    try:
+        return api.files.get(id_)
+    except NotFound:
+        return None
+    except Forbidden:
+        return None
+    except Exception as e:
+        return None
+
+
+def remap_cell(api, project_root: str, path: str) -> str:
     """
     Remaps a file path to the 'vs:' file system.
 
     Supports multiple files separated with ';'.
 
+    :param api: SebenBridges API
     :param project_root: Name of the project root directory.
     :param path: File path.
-    :return: File path(s) prefixed with 'vs:///Projects/' and project_root.
+    :return: File path(s) prefixed with 'vs://Projects/' and project_root.
     """
     # prefix it with the project root
     if ";" in path:
-        return ";".join([remap_cell(project_root, f) for f in path.split(";")])
+        return ";".join([
+            remap_cell(api, project_root, f) for f in path.split(";")])
 
     if path and ":" not in path:
         while path.startswith('/'):
             path = path[1:]
         if path:
-            return f"vs:///Projects/{project_root}/{path}"
+            remapped_path = None
+            if re.match(r'[a-f0-9]{24}', path):
+                # file ids are MongoDB Object IDs
+                remapped_path = try_to_get_file(api, path)
+
+            if remapped_path:
+                return remapped_path
+            else:
+                return f"vs://Projects/{project_root}/{path}"
     else:
         return path
 
@@ -181,12 +212,22 @@ def validate_sheet(
             if os.path.dirname(path):
                 parent = checked[os.path.dirname(path)]
 
-            file = api.files.query(
-                names=[basename],
-                project=project if not parent else None,
-                parent=parent)
+            file = None
+            if re.match(r'[a-f0-9]{24}', path):
+                # file ids are MongoDB Object IDs
+                file = try_to_get_file(api, path)
+
+            if file is None:
+                file = api.files.query(
+                    names=[basename],
+                    project=project if not parent else None,
+                    parent=parent
+                )
+                if file:
+                    file = file[0]
+
             if file:
-                checked[path] = file[0]
+                checked[path] = file
             else:
                 raise FileExistsError(
                     f"File <{path}> does not exist within "
@@ -194,6 +235,7 @@ def validate_sheet(
 
 
 def remap(
+        api,
         project_root: str,
         path_to_file: str,
         remap_columns: list,
@@ -209,6 +251,7 @@ def remap(
 
     The function assumes that the first row is always the header.
 
+    :param api: SevenBridges API
     :param project_root: Name of the project root directory.
     :param path_to_file: Path to the manifest file.
     :param remap_columns: Names of manifest file columns that contain paths to
@@ -245,67 +288,21 @@ def remap(
             if line:
                 line = line.strip('\n').split(split_char)
                 for i in indices:
-                    line[i] = remap_cell(project_root, line[i])
+                    line[i] = remap_cell(api, project_root, line[i])
                 line = split_char.join(line)
                 sheet.append(line)
 
     return "\n".join(sheet)
 
 
-def main():
-    # CLI parameters
-    parser = argparse.ArgumentParser()
-    parser.add_argument(
-        "--profile", required=False,
-        default="default", type=str,
-        help="SB platform profile as set in the SB API credentials file.",
-    )
-    parser.add_argument(
-        "--projectid", required=True,
-        type=str,
-        help="Takes the form {user or division}/{project}.",
-    )
-    parser.add_argument(
-        "--sample-sheet", required=True,
-        type=str,
-        help="Path to the sample sheet."
-    )
-    parser.add_argument(
-        "--columns", required=True,
-        metavar='string', nargs='+', type=str,
-        help="Specify columns that contain paths to files on the platform"
-             "as a list of strings separated by spaces.",
-    )
-    parser.add_argument(
-        "--output", '-o', required=False,
-        type=str,
-        help="Name of the output file.",
-    )
-    parser.add_argument(
-        "--upload", action='store_true', required=False,
-        help="Upload the file to the project after making it.",
-    )
-    parser.add_argument(
-        "--tags", required=False,
-        metavar='string', nargs='+', type=str,
-        help="Specify tags that you want the sample sheet to have on the "
-             "platform, after it is uploaded.",
-    )
-    parser.add_argument(
-        "--validate", action='store_true', required=False,
-        help="Validate if each file exists on target project location.",
-    )
-
-    args = parser.parse_args()
-
+def make_manifest(api, args):
     project = args.projectid
-    api = lib.get_profile(args.profile)
-
     project = api.projects.get(project)
     project_root = api.files.get(project.root_folder).name
 
     logger.info('Remapping manifest files.')
     sheet = remap(
+        api,
         project_root,
         args.sample_sheet,
         args.columns
@@ -366,5 +363,55 @@ def main():
             file.save()
 
 
+def main():
+    # CLI parameters
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--profile", required=False,
+        default="default", type=str,
+        help="SB platform profile as set in the SB API credentials file.",
+    )
+    parser.add_argument(
+        "--projectid", required=True,
+        type=str,
+        help="Takes the form {user or division}/{project}.",
+    )
+    parser.add_argument(
+        "--sample-sheet", required=True,
+        type=str,
+        help="Path to the sample sheet."
+    )
+    parser.add_argument(
+        "--columns", required=True,
+        metavar='string', nargs='+', type=str,
+        help="Specify columns that contain paths to files on the platform"
+             "as a list of strings separated by spaces.",
+    )
+    parser.add_argument(
+        "--output", '-o', required=False,
+        type=str,
+        help="Name of the output file.",
+    )
+    parser.add_argument(
+        "--upload", action='store_true', required=False,
+        help="Upload the file to the project after making it.",
+    )
+    parser.add_argument(
+        "--tags", required=False,
+        metavar='string', nargs='+', type=str,
+        help="Specify tags that you want the sample sheet to have on the "
+             "platform, after it is uploaded.",
+    )
+    parser.add_argument(
+        "--validate", action='store_true', required=False,
+        help="Validate if each file exists on target project location.",
+    )
+
+    args = parser.parse_args()
+
+    api = lib.get_profile(args.profile)
+    make_manifest(api, args)
+
+
 if __name__ == "__main__":
     main()
diff --git a/sbpack/noncwl/nextflow.py b/sbpack/noncwl/nextflow.py
index 046c984..8075592 100755
--- a/sbpack/noncwl/nextflow.py
+++ b/sbpack/noncwl/nextflow.py
@@ -5,8 +5,6 @@
 
 import sbpack.lib as lib
 
-from wrabbit.parser.nextflow import NextflowParser
-
 from nf_core.schema import PipelineSchema
 from sbpack.version import __version__
 
@@ -22,7 +20,6 @@
 )
 
 from wrabbit.parser.utils import (
-    get_readme,
     get_latest_sb_schema,
     get_sample_sheet_schema,
 )
@@ -34,6 +31,10 @@
     SB_SCHEMA_DEFAULT_NAME,
 )
 
+from wrabbit.parser.nextflow import (
+    NextflowParser
+)
+
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
 
@@ -51,8 +52,11 @@ def nf_schema_build(self):
         if self.nf_schema_path:
             return
 
+        base_dir = os.path.join(
+            self.workflow_path, os.path.dirname(self.entrypoint)
+        )
         nf_schema_path = os.path.join(
-            self.workflow_path,
+            base_dir,
             NF_SCHEMA_DEFAULT_NAME,
         )
 
@@ -62,7 +66,7 @@ def nf_schema_build(self):
         self.nf_ps.schema_filename = nf_schema_path
         # if not os.path.exists(nf_schema_path):
         self.nf_ps.build_schema(
-            pipeline_dir=self.workflow_path,
+            pipeline_dir=base_dir,
             no_prompts=True,
             web_only=False,
             url='',
@@ -126,7 +130,7 @@ def main():
     )
     parser.add_argument(
         "--execution-mode", type=ExecMode, choices=list(ExecMode),
-        required=False, default=None,
+        required=False, default=ExecMode.multi,
         help="Execution mode for your application. Can be multi-instance or "
              "single-instance",
     )
@@ -160,7 +164,9 @@ def main():
     parser.add_argument(
         "--sample-sheet-schema", required=False,
         default=None, type=str,
-        help="Path to the sample sheet schema yaml. The sample sheet schema "
+        help="This options is deprecated. Please use sbmanifest to generate "
+             "valid sample sheets for the SevenBridges powered platforms.\n"
+             "Path to the sample sheet schema yaml. The sample sheet schema "
              "should contain the following keys: 'sample_sheet_input', "
              "'sample_sheet_name', 'header', 'rows', 'defaults', 'group_by', "
              "'format_'"
@@ -183,6 +189,7 @@ def main():
         f"Uploaded using sbpack v{__version__}"
     sample_sheet_schema = args.sample_sheet_schema or None
     label = args.app_name or None
+    readme_path = args.sb_doc or None
     dump_sb_app = args.dump_sb_app or False
     sb_package_id = args.sb_package_id or None
     workflow_path = args.workflow_path or None
@@ -203,38 +210,46 @@ def main():
             "--dump-sb-app and/or --auto are not used"
         )
 
-    if sb_schema and execution_mode:
-        logger.warning(
-            "Using --sb-schema option overwrites --execution-mode"
-        )
+    if git_url and not label:
+        label = os.path.basename(git_url)
+        if branch:
+            label += f" {branch}"
 
-    if sb_schema and label:
-        logger.warning(
-            "Using --sb-schema option overwrites --app-name"
-        )
+    if sb_schema:
+        if execution_mode:
+            logger.warning(
+                "Using --sb-schema option overwrites --execution-mode."
+            )
 
-    if sb_schema and executor_version:
-        logger.warning(
-            "Using --sb-schema option overwrites --executor-version"
-        )
+        if label:
+            logger.warning(
+                "Using --sb-schema option overwrites --app-name."
+            )
 
-    if sb_schema and entrypoint:
-        logger.warning(
-            "Using --sb-schema option overwrites --entrypoint"
-        )
+        if executor_version:
+            logger.warning(
+                "Using --sb-schema option overwrites --executor-version."
+            )
+
+        if entrypoint:
+            logger.warning(
+                "Using --sb-schema option overwrites --entrypoint."
+            )
+
+        if readme_path:
+            logger.warning(
+                "Using --sb-schema option overwrites --sb-doc."
+            )
+
+        if revision_note:
+            logger.warning(
+                "Using --sb-schema option overwrites --revision-note."
+            )
 
     if git_url:
         cleanup_workflow_path = True
         workflow_path = get_git_repo(git_url, branch)
 
-    sb_doc = None
-    if args.sb_doc:
-        with open(args.sb_doc, 'r') as f:
-            sb_doc = f.read()
-    elif get_readme(workflow_path):
-        with open(get_readme(workflow_path), 'r') as f:
-            sb_doc = f.read()
-
     if args.auto:
         # This is where the magic happens
         if not sb_schema:
@@ -264,11 +279,12 @@ def main():
 
     nf_wrapper = SBNextflowWrapper(
         workflow_path=workflow_path,
-        sb_doc=sb_doc,
+        readme_path=readme_path,
         label=label,
         entrypoint=entrypoint,
         executor_version=executor_version,
         sb_package_id=sb_package_id,
+        search_subfolders=True,
     )
 
     if sb_schema:

From 73c0b4a31064bd318b03fe6692854b8ec46b8083 Mon Sep 17 00:00:00 2001
From: pavlemarinkovic <pavle.marinkovic@sbgenomics.com>
Date: Tue, 17 Dec 2024 16:37:15 +0100
Subject: [PATCH 2/2] Update versions

---
 requirements.txt  | 4 ++--
 sbpack/version.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 230fddf..cee043a 100755
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,5 +1,5 @@
 ruamel.yaml >= 0.16
 sevenbridges-python >= 2.0
-nf-core==2.1
-wrabbit==0.3.0
+nf-core == 2.1
+wrabbit == 0.3.0
 pillow >= 11.0.0
\ No newline at end of file
diff --git a/sbpack/version.py b/sbpack/version.py
index 740a06e..346a568 100755
--- a/sbpack/version.py
+++ b/sbpack/version.py
@@ -1 +1 @@
-__version__ = "2024.10.30"
+__version__ = "2024.12.17"