-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathNC_runner.py
88 lines (77 loc) · 4.13 KB
/
NC_runner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import os
import subprocess
import argparse
from rich import print as rprint
# Function to run
def run_nanoclust(input_file, output_directory, nanoclust_path, database, tax_database):
command = f"nextflow run {nanoclust_path} -profile docker --reads {input_file} --db {database} --taxdb {tax_database} --outdir {output_directory}"
rprint(f"[green]Starting NanoCLUST for {input_file}...[/green]")
subprocess.run(command, shell=True)
def main():
title = ("""
_ _ ___ _ _ _ ___ _____
| \| | __ _ _ _ ___ / __|| | | | | |/ __||_ _|
| .` |/ _` || ' \ / _ \| (__ | |__| |_| |\__ \ | |
|_|\_|\__,_||_||_|\___/ \___||____|\___/ |___/ |_|
""")
titleRUNNER= ("""
___ _ _ _ _ _ _ ___ ___
| _ \| | | || \| || \| || __|| _ \
| /| |_| || .` || .` || _| | /
|_|_\ \___/ |_|\_||_|\_||___||_|_\
""")
# Print pretty logo and explanation to script
rprint(f"\n[yellow]Welcome to[/yellow]")
rprint(f'[green]{title}[magenta]{titleRUNNER}[/green]')
rprint(f'[blue]This script runs NanoCLUST on files in a directory you specify.[/blue]\n')
# Create argumentparser
parser = argparse.ArgumentParser(
prog='NanoCLUST runner on directory',
description='This program starts a NanoCLUST run (on default settings) for each passed fastq file in a directory',
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
epilog='Thanks for using this script!'
)
# Add arguments to parser
parser.add_argument('input_directory', type=str,
help='Path to directory containing fastq files')
parser.add_argument('-o', '--outdir', dest='output_directory', type=str,
default='.',
help='Path to output directory')
parser.add_argument('-s', '--suffix', dest='file_suffix', type=str,
default='pass.fastq',
help='the suffix of all files in the directory that need to be processed')
parser.add_argument('-n', '--nanoclust', dest='nanoclust_path', type=str,
default='programs/NanoCLUST/main.nf',
help='Path to the NanoCLUST main.nf file')
parser.add_argument('-d', '--db', dest='database', type=str,
default='/user/programs/NanoCLUST/db/16S_ribosomal_RNA',
help='Absolute path to the database files')
parser.add_argument('-t', '--taxdb', dest='tax_database', type=str,
default='/user/programs/NanoCLUST/db/taxdb/taxdb',
help='Absolute path to the tax database files')
# Parse!
args = parser.parse_args()
# Check if specified input_directory exists, exit if it doesn't
if not os.path.exists(args.input_directory):
rprint(f'[red]Error: The input directory {args.input_directory} does not exist.[/red]')
exit(1)
# Check if specified output_directory exists, create it if it doesn't
if not os.path.exists(args.output_directory):
os.mkdir(args.output_directory)
rprint(f'[dark_orange]The output directory {args.output_directory} did not exist, but now it does [/dark_orange][green] :)[/green]\n')
# Create list with all files from input_directory with correct suffix
fastqfiles = [filename for filename in os.listdir(args.input_directory)
if filename.endswith(args.file_suffix)]
# Loop through the files
for filename in fastqfiles:
# Run NanoCLUST on each file
run_nanoclust(
os.path.join(args.input_directory, filename),
os.path.join(args.output_directory, filename),
args.nanoclust_path,
args.database,
args.tax_database
)
# Ensure that main code is only executed when this script runs directly, not when imported as module
if __name__ == "__main__":
main()