Skip to content

Commit

Permalink
Support for file ids in sample sheets
Browse files Browse the repository at this point in the history
Default execution mode is multi-instance
Update wrabbit version to support images in markdown
  • Loading branch information
pavlemarinkovic committed Dec 11, 2024
1 parent 457f5cf commit f9a828e
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 108 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,5 @@
/sbpack.egg-info/
/.idea/
/venv/
/.nextflow/
/.pytest_cache/
5 changes: 2 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
ruamel.yaml >= 0.16
sevenbridges-python >= 2.0
nf-core==2.1
wrabbit==0.2.4
cwlformat
packaging
wrabbit==0.3.0
pillow >= 11.0.0
8 changes: 4 additions & 4 deletions sbpack/noncwl/Readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ Given the contents of this sample sheet is:

Remapped file will be:

| sample | fastq_1 | fastq_2 | strandedness |
|:--------|:------------------------------------------------------------------|:------------------------------------------------------------------|:-------------|
| SAMPLE1 | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_2.fastq.gz | reverse |
| SAMPLE2 | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_2.fastq.gz | reverse |
| sample | fastq_1 | fastq_2 | strandedness |
|:--------|:-----------------------------------------------------------------|:------------------------------------------------------------------|:-------------|
| SAMPLE1 | vs://Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE1_2.fastq.gz | reverse |
| SAMPLE2 | vs://Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_1.fastq.gz | vs:///Projects/project-root-uuid/RNAseq_inputs/SAMPLE2_2.fastq.gz | reverse |
185 changes: 116 additions & 69 deletions sbpack/noncwl/manifest.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from sevenbridges.models.project import Project
from sevenbridges import Api
from sevenbridges.errors import NotFound, Forbidden

import logging
import sbpack.lib as lib
import argparse
import os
import re


logger = logging.getLogger(__name__)
Expand All @@ -14,10 +16,11 @@
def paths_to_check(file_name: str) -> list:
"""
:param file_name: Contents of a single manifest file cell that contains
path(s) to files.
path(s) to files. Can be multiple files if separated with ";".
:return: Files that need to be checked
"""
chk = []
rtrn = []
to_check = []

if ";" in file_name:
# This should handle the case when there are multiple files in the
Expand All @@ -29,40 +32,41 @@ def paths_to_check(file_name: str) -> list:
chk.append(file_name)

for file_name in chk:
if ":" in file_name:
if "://" in file_name:
# If a file is in cloud storage, skip it
continue

file_name = file_name.strip('/')
rtrn.append(file_name)
to_check.append(file_name)
cur_path = file_name
while os.path.dirname(cur_path):
cur_path = os.path.dirname(cur_path)
rtrn.append(cur_path)
to_check.append(cur_path)

return rtrn
return to_check


def get_path_from_id(api: Api, file: str) -> str:
"""
Extracts the full path of a file from ID
:param api: Initialized SevenBridges API
:param file: id of a file
:return: Path to the File
:return: Path to the File on vs://
"""
file = api.files.get(file)
temp = file
full_path = [file.name]

project_root = api.projects.get(file.project)
project = api.projects.get(file.project)
project_root = api.files.get(project.root_folder)
project_root_name = api.files.get(project_root).name

while temp.parent != project_root:
while temp.parent != project_root.id:
temp = api.files.get(temp.parent)
full_path.append(temp.name)

full_path.append(project_root_name)
return "vs:///Projects/" + "/".join(full_path[::-1])
return "vs://Projects/" + "/".join(full_path[::-1])


def get_path_from_name(api: Api, file_name: str, project: Project) -> str:
Expand All @@ -71,7 +75,7 @@ def get_path_from_name(api: Api, file_name: str, project: Project) -> str:
:param api: Initialized SevenBridges API
:param file_name: Name of the file
:param project: SevenBridges Project
:return:
:return: Path to the File on vs://
"""

file = api.files.query(project=project, names=[file_name])
Expand All @@ -83,25 +87,52 @@ def get_path_from_name(api: Api, file_name: str, project: Project) -> str:
)


def remap_cell(project_root: str, path: str) -> str:
def try_to_get_file(api, id_):
"""
Tries to get a file through the SevenBridges API
:param api: SevenBridges API
:param id_: File ID on the SevenBridges Platform
:return: File object if found, else None
"""
try:
return api.files.get(id_)
except NotFound:
return None
except Forbidden:
return None
except Exception as e:
return None


def remap_cell(api, project_root: str, path: str) -> str:
"""
Remaps a file path to the 'vs:' file system.
Supports multiple files separated with ';'.
:param api: SebenBridges API
:param project_root: Name of the project root directory.
:param path: File path.
:return: File path(s) prefixed with 'vs:///Projects/' and project_root.
:return: File path(s) prefixed with 'vs://Projects/' and project_root.
"""
# prefix it with the project root
if ";" in path:
return ";".join([remap_cell(project_root, f) for f in path.split(";")])
return ";".join([
remap_cell(api, project_root, f) for f in path.split(";")])

if path and ":" not in path:
while path.startswith('/'):
path = path[1:]
if path:
return f"vs:///Projects/{project_root}/{path}"
remapped_path = None
if re.match(r'[a-f0-9]{24}', path):
# file ids are MongoDB Object IDs
remapped_path = try_to_get_file(api, path)

if remapped_path:
return remapped_path
else:
return f"vs://Projects/{project_root}/{path}"
else:
return path

Expand Down Expand Up @@ -181,19 +212,30 @@ def validate_sheet(
if os.path.dirname(path):
parent = checked[os.path.dirname(path)]

file = api.files.query(
names=[basename],
project=project if not parent else None,
parent=parent)
file = None
if re.match(r'[a-f0-9]{24}', path):
# file ids are MongoDB Object IDs
file = try_to_get_file(api, path)

if file is None:
file = api.files.query(
names=[basename],
project=project if not parent else None,
parent=parent
)
if file:
file = file[0]

if file:
checked[path] = file[0]
checked[path] = file
else:
raise FileExistsError(
f"File <{path}> does not exist within "
f"project <{project}>")


def remap(
api,
project_root: str,
path_to_file: str,
remap_columns: list,
Expand All @@ -209,6 +251,7 @@ def remap(
The function assumes that the first row is always the header.
:param api: SevenBridges API
:param project_root: Name of the project root directory.
:param path_to_file: Path to the manifest file.
:param remap_columns: Names of manifest file columns that contain paths to
Expand Down Expand Up @@ -245,67 +288,21 @@ def remap(
if line:
line = line.strip('\n').split(split_char)
for i in indices:
line[i] = remap_cell(project_root, line[i])
line[i] = remap_cell(api, project_root, line[i])
line = split_char.join(line)
sheet.append(line)

return "\n".join(sheet)


def main():
# CLI parameters
parser = argparse.ArgumentParser()
parser.add_argument(
"--profile", required=False,
default="default", type=str,
help="SB platform profile as set in the SB API credentials file.",
)
parser.add_argument(
"--projectid", required=True,
type=str,
help="Takes the form {user or division}/{project}.",
)
parser.add_argument(
"--sample-sheet", required=True,
type=str,
help="Path to the sample sheet."
)
parser.add_argument(
"--columns", required=True,
metavar='string', nargs='+', type=str,
help="Specify columns that contain paths to files on the platform"
"as a list of strings separated by spaces.",
)
parser.add_argument(
"--output", '-o', required=False,
type=str,
help="Name of the output file.",
)
parser.add_argument(
"--upload", action='store_true', required=False,
help="Upload the file to the project after making it.",
)
parser.add_argument(
"--tags", required=False,
metavar='string', nargs='+', type=str,
help="Specify tags that you want the sample sheet to have on the "
"platform, after it is uploaded.",
)
parser.add_argument(
"--validate", action='store_true', required=False,
help="Validate if each file exists on target project location.",
)

args = parser.parse_args()

def make_manifest(api, args):
project = args.projectid
api = lib.get_profile(args.profile)

project = api.projects.get(project)
project_root = api.files.get(project.root_folder).name

logger.info('Remapping manifest files.')
sheet = remap(
api,
project_root,
args.sample_sheet,
args.columns
Expand Down Expand Up @@ -366,5 +363,55 @@ def main():
file.save()


def main():
# CLI parameters
parser = argparse.ArgumentParser()
parser.add_argument(
"--profile", required=False,
default="default", type=str,
help="SB platform profile as set in the SB API credentials file.",
)
parser.add_argument(
"--projectid", required=True,
type=str,
help="Takes the form {user or division}/{project}.",
)
parser.add_argument(
"--sample-sheet", required=True,
type=str,
help="Path to the sample sheet."
)
parser.add_argument(
"--columns", required=True,
metavar='string', nargs='+', type=str,
help="Specify columns that contain paths to files on the platform"
"as a list of strings separated by spaces.",
)
parser.add_argument(
"--output", '-o', required=False,
type=str,
help="Name of the output file.",
)
parser.add_argument(
"--upload", action='store_true', required=False,
help="Upload the file to the project after making it.",
)
parser.add_argument(
"--tags", required=False,
metavar='string', nargs='+', type=str,
help="Specify tags that you want the sample sheet to have on the "
"platform, after it is uploaded.",
)
parser.add_argument(
"--validate", action='store_true', required=False,
help="Validate if each file exists on target project location.",
)

args = parser.parse_args()

api = lib.get_profile(args.profile)
make_manifest(api, args)


if __name__ == "__main__":
main()
Loading

0 comments on commit f9a828e

Please sign in to comment.