diff --git a/bin/multiqc_mappings_config.py b/bin/multiqc_mappings_config.py deleted file mode 100755 index 3ffe35ec..00000000 --- a/bin/multiqc_mappings_config.py +++ /dev/null @@ -1,13 +0,0 @@ -#!/usr/bin/env python - -import sys - -with open(sys.argv[1], "r") as fin, open(sys.argv[2], "w") as fout: - header = fin.readline().split(",") - config = "sample_names_rename_buttons:\n" - config += "\n".join([" - " + x.strip('"') for x in header]) - config += "sample_names_rename:\n" - rename = [] - for line in fin: - rename.append(f" - [{', '.join(line.strip().split(','))}]") - fout.write(config + "\n".join(sorted(rename)) + "\n") diff --git a/conf/base.config b/conf/base.config index 6af79a7b..5d7c5389 100644 --- a/conf/base.config +++ b/conf/base.config @@ -10,16 +10,16 @@ process { - cpus = { check_max( 1 * task.attempt, 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } - - publishDir = [ - path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + resourceLimits = [ + cpus: params.max_cpus, + memory: params.max_memory, + time: params.max_time ] + cpus = { 1 * task.attempt } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } + errorStrategy = { task.exitStatus in ((130..145) + 104) ? 'retry' : 'finish' } maxRetries = 1 maxErrors = '-1' @@ -31,30 +31,30 @@ process { // adding in your local modules too. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel:process_single { - cpus = { check_max( 1 , 'cpus' ) } - memory = { check_max( 6.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 1 } + memory = { 6.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_low { - cpus = { check_max( 2 * task.attempt, 'cpus' ) } - memory = { check_max( 12.GB * task.attempt, 'memory' ) } - time = { check_max( 4.h * task.attempt, 'time' ) } + cpus = { 2 * task.attempt } + memory = { 12.GB * task.attempt } + time = { 4.h * task.attempt } } withLabel:process_medium { - cpus = { check_max( 6 * task.attempt, 'cpus' ) } - memory = { check_max( 36.GB * task.attempt, 'memory' ) } - time = { check_max( 8.h * task.attempt, 'time' ) } + cpus = { 6 * task.attempt } + memory = { 36.GB * task.attempt } + time = { 8.h * task.attempt } } withLabel:process_high { - cpus = { check_max( 12 * task.attempt, 'cpus' ) } - memory = { check_max( 72.GB * task.attempt, 'memory' ) } - time = { check_max( 16.h * task.attempt, 'time' ) } + cpus = { 12 * task.attempt } + memory = { 72.GB * task.attempt } + time = { 16.h * task.attempt } } withLabel:process_long { - time = { check_max( 20.h * task.attempt, 'time' ) } + time = { 20.h * task.attempt } } withLabel:process_high_memory { - memory = { check_max( 200.GB * task.attempt, 'memory' ) } + memory = { 200.GB * task.attempt } } withLabel:error_ignore { errorStrategy = 'ignore' diff --git a/main.nf b/main.nf index 52539e40..1b85ba11 100644 --- a/main.nf +++ b/main.nf @@ -9,38 +9,62 @@ ---------------------------------------------------------------------------------------- */ -nextflow.enable.dsl = 2 +nextflow.preview.types = true /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS / WORKFLOWS + IMPORT FUNCTIONS / MODULES / WORKFLOWS / TYPES ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ include { SRA } from './workflows/sra' include { PIPELINE_INITIALISATION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_fetchngs_pipeline' +include { SOFTWARE_VERSIONS } from './subworkflows/nf-core/utils_nfcore_pipeline' +include { DownloadMethod } from './workflows/sra' +include { SraParams } from './workflows/sra' +include { Sample } from './workflows/sra' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - NAMED WORKFLOWS FOR PIPELINE + WORKFLOW INPUTS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -// -// WORKFLOW: Run main nf-core/fetchngs analysis pipeline depending on type of identifier provided -// -workflow NFCORE_FETCHNGS { +params { - take: - ids // channel: database ids read in from --input + // TODO: declare as Set and construct SraId with isSraId() + input: Set { + description 'Set of SRA/ENA/GEO/DDBJ identifiers to download their associated metadata and FastQ files' + } - main: + // TODO: declare as EnaMetadataFields and construct with sraCheckENAMetadataFields() + ena_metadata_fields: String { + description "Comma-separated list of ENA metadata fields to fetch before downloading data." + help "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)." + icon 'fas fa-columns' + defaultValue '' + } - // - // WORKFLOW: Download FastQ files for SRA / ENA / GEO / DDBJ ids - // - SRA ( ids ) + download_method: DownloadMethod { + description "Method to download FastQ files. Available options are 'aspera', 'ftp' or 'sratools'. Default is 'ftp'." + help 'FTP and Aspera CLI download FastQ files directly from the ENA FTP whereas sratools uses sra-tools to download *.sra files and convert to FastQ.' + icon 'fas fa-download' + defaultValue 'ftp' + } + + skip_fastq_download: boolean { + description "Only download metadata for public data database ids and don't download the FastQ files." + icon 'fas fa-fast-forward' + } + + dbgap_key: Path? { + description 'dbGaP repository key.' + help 'Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit. Users with granted access to controlled data can download the JWT cart file for the study from the SRA Run Selector upon logging in. The JWT file can only be used on cloud platforms and is valid for 1 hour upon creation.' + icon 'fas fa-address-card' + } + + // TODO: ... } @@ -52,6 +76,7 @@ workflow NFCORE_FETCHNGS { workflow { + main: // // SUBWORKFLOW: Run initialisation tasks // @@ -61,18 +86,27 @@ workflow { params.validate_params, params.monochrome_logs, args, - params.outdir, - params.input, - params.ena_metadata_fields + workflow.outputDir ) // // WORKFLOW: Run primary workflows for the pipeline // - NFCORE_FETCHNGS ( - PIPELINE_INITIALISATION.out.ids + samples = SRA ( + Channel.fromList(params.input), + SraParams( + params.ena_metadata_fields, + params.download_method, + params.skip_fastq_download, + params.dbgap_key + ) ) + // + // SUBWORKFLOW: Collect software versions + // + versions = SOFTWARE_VERSIONS() + // // SUBWORKFLOW: Run completion tasks // @@ -80,10 +114,43 @@ workflow { params.email, params.email_on_fail, params.plaintext_email, - params.outdir, + workflow.outputDir, params.monochrome_logs, params.hook_url ) + + publish: + samples >> 'samples' + versions >> 'versions' +} + +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + WORKFLOW OUTPUTS +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +*/ + +output { + samples: Sample { + path { _sample -> + def dirs = [ + 'fastq': 'fastq', + 'md5': 'fastq/md5' + ] + return { file -> "${dirs[file.ext]}/${file.baseName}" } + } + index { + path 'samplesheet/samplesheet.json' + sort { sample -> sample.id } + } + } + + versions: Map> { + path '.' + index { + path 'nf_core_fetchngs_software_mqc_versions.yml' + } + } } /* diff --git a/modules/local/aspera_cli/main.nf b/modules/local/aspera_cli/main.nf index b38d17c0..956f1cec 100644 --- a/modules/local/aspera_cli/main.nf +++ b/modules/local/aspera_cli/main.nf @@ -8,18 +8,23 @@ process ASPERA_CLI { 'biocontainers/aspera-cli:4.14.0--hdfd78af_1' }" input: - tuple val(meta), val(fastq) - val user + meta : Map + user : String output: - tuple val(meta), path("*fastq.gz"), emit: fastq - tuple val(meta), path("*md5") , emit: md5 - path "versions.yml" , emit: versions + fastq_1 : Path = file('*_1.fastq.gz') + fastq_2 : Path? = file('*_2.fastq.gz') + md5_1 : Path = file('*_1.fastq.gz.md5') + md5_2 : Path? = file('*_2.fastq.gz.md5') + + topic: + ( task.process, 'aspera_cli', eval('ascli --version') ) >> 'versions' script: def args = task.ext.args ?: '' def conda_prefix = ['singularity', 'apptainer'].contains(workflow.containerEngine) ? "export CONDA_PREFIX=/usr/local" : "" - if (meta.single_end) { + def fastq = meta.fastq_aspera.tokenize(';') + if (meta.single_end.toBoolean()) { """ $conda_prefix @@ -31,11 +36,6 @@ process ASPERA_CLI { echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 md5sum -c ${meta.id}.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - aspera_cli: \$(ascli --version) - END_VERSIONS """ } else { """ @@ -58,11 +58,6 @@ process ASPERA_CLI { echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 md5sum -c ${meta.id}_2.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - aspera_cli: \$(ascli --version) - END_VERSIONS """ } } diff --git a/modules/local/aspera_cli/nextflow.config b/modules/local/aspera_cli/nextflow.config index fa2dbd90..9a808242 100644 --- a/modules/local/aspera_cli/nextflow.config +++ b/modules/local/aspera_cli/nextflow.config @@ -1,17 +1,5 @@ process { withName: 'ASPERA_CLI' { ext.args = '-QT -l 300m -P33001' - publishDir = [ - [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ], - [ - path: { "${params.outdir}/fastq/md5" }, - mode: params.publish_dir_mode, - pattern: "*.md5" - ] - ] } } diff --git a/modules/local/multiqc_mappings_config/main.nf b/modules/local/multiqc_mappings_config/main.nf deleted file mode 100644 index 8efe1caa..00000000 --- a/modules/local/multiqc_mappings_config/main.nf +++ /dev/null @@ -1,27 +0,0 @@ - -process MULTIQC_MAPPINGS_CONFIG { - - conda "conda-forge::python=3.9.5" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9--1' : - 'biocontainers/python:3.9--1' }" - - input: - path csv - - output: - path "*yml" , emit: yml - path "versions.yml", emit: versions - - script: - """ - multiqc_mappings_config.py \\ - $csv \\ - multiqc_config.yml - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS - """ -} diff --git a/modules/local/multiqc_mappings_config/nextflow.config b/modules/local/multiqc_mappings_config/nextflow.config deleted file mode 100644 index 11c58341..00000000 --- a/modules/local/multiqc_mappings_config/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - withName: 'MULTIQC_MAPPINGS_CONFIG' { - publishDir = [ - path: { "${params.outdir}/samplesheet" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/modules/local/multiqc_mappings_config/tests/main.nf.test b/modules/local/multiqc_mappings_config/tests/main.nf.test deleted file mode 100644 index dbb4d74f..00000000 --- a/modules/local/multiqc_mappings_config/tests/main.nf.test +++ /dev/null @@ -1,24 +0,0 @@ -nextflow_process { - - name "Test process: MULTIQC_MAPPINGS_CONFIG" - script "../main.nf" - process "MULTIQC_MAPPINGS_CONFIG" - - test("Should run without failures") { - - when { - process { - """ - input[0] = file(params.pipelines_testdata_base_path + 'csv/SRX9626017_SRR13191702.mappings.csv', checkIfExists: true) - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/local/multiqc_mappings_config/tests/main.nf.test.snap b/modules/local/multiqc_mappings_config/tests/main.nf.test.snap deleted file mode 100644 index 43e46f61..00000000 --- a/modules/local/multiqc_mappings_config/tests/main.nf.test.snap +++ /dev/null @@ -1,31 +0,0 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - [ - "multiqc_config.yml:md5,7f3cb10fff83ba9eb3e8fa6862d1290a", - "versions.yml:md5,dd4c66f0551d15510b36bb2e2b2fdd73" - ] - ], - "1": [ - "versions.yml:md5,dd4c66f0551d15510b36bb2e2b2fdd73" - ], - "versions": [ - "versions.yml:md5,dd4c66f0551d15510b36bb2e2b2fdd73" - ], - "yml": [ - [ - "multiqc_config.yml:md5,7f3cb10fff83ba9eb3e8fa6862d1290a", - "versions.yml:md5,dd4c66f0551d15510b36bb2e2b2fdd73" - ] - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T11:52:12.65888" - } -} \ No newline at end of file diff --git a/modules/local/sra_fastq_ftp/main.nf b/modules/local/sra_fastq_ftp/main.nf index e2274d46..a5817833 100644 --- a/modules/local/sra_fastq_ftp/main.nf +++ b/modules/local/sra_fastq_ftp/main.nf @@ -10,36 +10,35 @@ process SRA_FASTQ_FTP { 'biocontainers/wget:1.20.1' }" input: - tuple val(meta), val(fastq) + meta : Map output: - tuple val(meta), path("*fastq.gz"), emit: fastq - tuple val(meta), path("*md5") , emit: md5 - path "versions.yml" , emit: versions + fastq_1 : Path = file('*_1.fastq.gz') + fastq_2 : Path? = file('*_2.fastq.gz') + md5_1 : Path = file('*_1.fastq.gz.md5') + md5_2 : Path? = file('*_2.fastq.gz.md5') + + topic: + ( task.process, 'wget', eval("echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')") ) >> 'versions' script: def args = task.ext.args ?: '' - if (meta.single_end) { + if (meta.single_end.toBoolean()) { """ wget \\ $args \\ -O ${meta.id}.fastq.gz \\ - ${fastq[0]} + ${meta.fastq_1} echo "${meta.md5_1} ${meta.id}.fastq.gz" > ${meta.id}.fastq.gz.md5 md5sum -c ${meta.id}.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wget: \$(echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')) - END_VERSIONS """ } else { """ wget \\ $args \\ -O ${meta.id}_1.fastq.gz \\ - ${fastq[0]} + ${meta.fastq_1} echo "${meta.md5_1} ${meta.id}_1.fastq.gz" > ${meta.id}_1.fastq.gz.md5 md5sum -c ${meta.id}_1.fastq.gz.md5 @@ -47,15 +46,10 @@ process SRA_FASTQ_FTP { wget \\ $args \\ -O ${meta.id}_2.fastq.gz \\ - ${fastq[1]} + ${meta.fastq_2} echo "${meta.md5_2} ${meta.id}_2.fastq.gz" > ${meta.id}_2.fastq.gz.md5 md5sum -c ${meta.id}_2.fastq.gz.md5 - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - wget: \$(echo \$(wget --version | head -n 1 | sed 's/^GNU Wget //; s/ .*\$//')) - END_VERSIONS """ } } diff --git a/modules/local/sra_fastq_ftp/nextflow.config b/modules/local/sra_fastq_ftp/nextflow.config index 56e43959..26261f26 100644 --- a/modules/local/sra_fastq_ftp/nextflow.config +++ b/modules/local/sra_fastq_ftp/nextflow.config @@ -1,17 +1,5 @@ process { withName: 'SRA_FASTQ_FTP' { ext.args = '-t 5 -nv -c -T 60' - publishDir = [ - [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ], - [ - path: { "${params.outdir}/fastq/md5" }, - mode: params.publish_dir_mode, - pattern: "*.md5" - ] - ] } } diff --git a/modules/local/sra_ids_to_runinfo/main.nf b/modules/local/sra_ids_to_runinfo/main.nf index 7d47f5e3..7d644b18 100644 --- a/modules/local/sra_ids_to_runinfo/main.nf +++ b/modules/local/sra_ids_to_runinfo/main.nf @@ -9,12 +9,14 @@ process SRA_IDS_TO_RUNINFO { 'biocontainers/python:3.9--1' }" input: - val id - val fields + id : String + fields : String output: - path "*.tsv" , emit: tsv - path "versions.yml", emit: versions + file('*.runinfo.tsv') + + topic: + ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' script: def metadata_fields = fields ? "--ena_metadata_fields ${fields}" : '' @@ -24,10 +26,5 @@ process SRA_IDS_TO_RUNINFO { id.txt \\ ${id}.runinfo.tsv \\ $metadata_fields - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS """ } diff --git a/modules/local/sra_ids_to_runinfo/nextflow.config b/modules/local/sra_ids_to_runinfo/nextflow.config deleted file mode 100644 index 9b9d0b16..00000000 --- a/modules/local/sra_ids_to_runinfo/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: 'SRA_IDS_TO_RUNINFO' { - publishDir = [ - path: { "${params.outdir}/metadata" }, - enabled: false - ] - } -} diff --git a/modules/local/sra_runinfo_to_ftp/main.nf b/modules/local/sra_runinfo_to_ftp/main.nf index 9c83cf53..1c56c336 100644 --- a/modules/local/sra_runinfo_to_ftp/main.nf +++ b/modules/local/sra_runinfo_to_ftp/main.nf @@ -7,21 +7,18 @@ process SRA_RUNINFO_TO_FTP { 'biocontainers/python:3.9--1' }" input: - path runinfo + runinfo : Path output: - path "*.tsv" , emit: tsv - path "versions.yml", emit: versions + file('*.runinfo_ftp.tsv') + + topic: + ( task.process, 'python', eval("python --version | sed 's/Python //g'") ) >> 'versions' script: """ sra_runinfo_to_ftp.py \\ - ${runinfo.join(',')} \\ - ${runinfo.toString().tokenize(".")[0]}.runinfo_ftp.tsv - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - python: \$(python --version | sed 's/Python //g') - END_VERSIONS + ${runinfo} \\ + ${runinfo.baseName.tokenize(".")[0]}.runinfo_ftp.tsv """ } diff --git a/modules/local/sra_runinfo_to_ftp/nextflow.config b/modules/local/sra_runinfo_to_ftp/nextflow.config deleted file mode 100644 index 43263648..00000000 --- a/modules/local/sra_runinfo_to_ftp/nextflow.config +++ /dev/null @@ -1,9 +0,0 @@ -process { - withName: 'SRA_RUNINFO_TO_FTP' { - publishDir = [ - path: { "${params.outdir}/metadata" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } -} diff --git a/modules/local/sra_to_samplesheet/main.nf b/modules/local/sra_to_samplesheet/main.nf deleted file mode 100644 index 92edf5df..00000000 --- a/modules/local/sra_to_samplesheet/main.nf +++ /dev/null @@ -1,75 +0,0 @@ - -process SRA_TO_SAMPLESHEET { - tag "$meta.id" - - executor 'local' - memory 100.MB - - input: - val meta - val pipeline - val strandedness - val mapping_fields - - output: - tuple val(meta), path("*samplesheet.csv"), emit: samplesheet - tuple val(meta), path("*mappings.csv") , emit: mappings - - exec: - // - // Create samplesheet containing metadata - // - - // Remove custom keys needed to download the data - def meta_clone = meta.clone() - meta_clone.remove("id") - meta_clone.remove("fastq_1") - meta_clone.remove("fastq_2") - meta_clone.remove("md5_1") - meta_clone.remove("md5_2") - meta_clone.remove("single_end") - - // Add relevant fields to the beginning of the map - pipeline_map = [ - sample : "${meta.id.split('_')[0..-2].join('_')}", - fastq_1 : meta.fastq_1, - fastq_2 : meta.fastq_2 - ] - - // Add nf-core pipeline specific entries - if (pipeline) { - if (pipeline == 'rnaseq') { - pipeline_map << [ strandedness: strandedness ] - } else if (pipeline == 'atacseq') { - pipeline_map << [ replicate: 1 ] - } else if (pipeline == 'taxprofiler') { - pipeline_map << [ fasta: '' ] - } - } - pipeline_map << meta_clone - - // Create a samplesheet - samplesheet = pipeline_map.keySet().collect{ '"' + it + '"'}.join(",") + '\n' - samplesheet += pipeline_map.values().collect{ '"' + it + '"'}.join(",") - - // Write samplesheet to file - def samplesheet_file = task.workDir.resolve("${meta.id}.samplesheet.csv") - samplesheet_file.text = samplesheet - - // - // Create sample id mappings file - // - mappings_map = pipeline_map.clone() - def fields = mapping_fields ? ['sample'] + mapping_fields.split(',').collect{ it.trim().toLowerCase() } : [] - if ((mappings_map.keySet() + fields).unique().size() != mappings_map.keySet().size()) { - error("Invalid option for '--sample_mapping_fields': ${mapping_fields}.\nValid options: ${mappings_map.keySet().join(', ')}") - } - - // Create mappings - mappings = fields.collect{ '"' + it + '"'}.join(",") + '\n' - mappings += mappings_map.subMap(fields).values().collect{ '"' + it + '"'}.join(",") - - // Write mappings to file - def mappings_file = task.workDir.resolve("${meta.id}.mappings.csv") - mappings_file.text = mappings -} diff --git a/modules/local/sra_to_samplesheet/nextflow.config b/modules/local/sra_to_samplesheet/nextflow.config deleted file mode 100644 index da241c1a..00000000 --- a/modules/local/sra_to_samplesheet/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: SRA_TO_SAMPLESHEET { - publishDir = [ - path: { "${params.outdir}/samplesheet" }, - enabled: false - ] - } -} diff --git a/modules/local/sra_to_samplesheet/tests/main.nf.test b/modules/local/sra_to_samplesheet/tests/main.nf.test deleted file mode 100644 index ed765158..00000000 --- a/modules/local/sra_to_samplesheet/tests/main.nf.test +++ /dev/null @@ -1,27 +0,0 @@ -nextflow_process { - - name "Test process: SRA_TO_SAMPLESHEET" - script "../main.nf" - process "SRA_TO_SAMPLESHEET" - - test("Should run without failures") { - - when { - process { - """ - input[0] = [id:'ERX1188904_ERR1109373', run_accession:'ERR1109373', experiment_accession:'ERX1188904', sample_accession:'SAMEA3643867', experiment_alias:'ena-EXPERIMENT-CAM-03-11-2015-17:01:52:847-7', run_alias:'ena-RUN-CAM-03-11-2015-17:01:52:847-7', sample_alias:'sample_56', study_alias:'ena-STUDY-CAM-02-11-2015-17:42:24:189-13', library_layout:'PAIRED', experiment_title:'Illumina HiSeq 2500 paired end sequencing', sample_title:'RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome', sample_description:'RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome', fastq_md5:'8d7d7b854d0207d1226477a30103fade;9fd57225d6c07a31843276d6df9b15c0;5a62e8f785687dce890cfb4fe3e607f9', fastq_ftp:'ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_2.fastq.gz', fastq_1:'./results/fastq/ERX1188904_ERR1109373_1.fastq.gz', fastq_2:'./results/fastq/ERX1188904_ERR1109373_2.fastq.gz', md5_1:'9fd57225d6c07a31843276d6df9b15c0', md5_2:'5a62e8f785687dce890cfb4fe3e607f9', single_end:false] - input[1] = 'rnaseq' - input[2] = 'auto' - input[3] = 'experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description' - """ - } - } - - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out).match() } - ) - } - } -} diff --git a/modules/local/sra_to_samplesheet/tests/main.nf.test.snap b/modules/local/sra_to_samplesheet/tests/main.nf.test.snap deleted file mode 100644 index 568f3ea7..00000000 --- a/modules/local/sra_to_samplesheet/tests/main.nf.test.snap +++ /dev/null @@ -1,117 +0,0 @@ -{ - "Should run without failures": { - "content": [ - { - "0": [ - [ - { - "id": "ERX1188904_ERR1109373", - "run_accession": "ERR1109373", - "experiment_accession": "ERX1188904", - "sample_accession": "SAMEA3643867", - "experiment_alias": "ena-EXPERIMENT-CAM-03-11-2015-17:01:52:847-7", - "run_alias": "ena-RUN-CAM-03-11-2015-17:01:52:847-7", - "sample_alias": "sample_56", - "study_alias": "ena-STUDY-CAM-02-11-2015-17:42:24:189-13", - "library_layout": "PAIRED", - "experiment_title": "Illumina HiSeq 2500 paired end sequencing", - "sample_title": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "sample_description": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "fastq_md5": "8d7d7b854d0207d1226477a30103fade;9fd57225d6c07a31843276d6df9b15c0;5a62e8f785687dce890cfb4fe3e607f9", - "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_2.fastq.gz", - "fastq_1": "./results/fastq/ERX1188904_ERR1109373_1.fastq.gz", - "fastq_2": "./results/fastq/ERX1188904_ERR1109373_2.fastq.gz", - "md5_1": "9fd57225d6c07a31843276d6df9b15c0", - "md5_2": "5a62e8f785687dce890cfb4fe3e607f9", - "single_end": false - }, - "ERX1188904_ERR1109373.samplesheet.csv:md5,e7898191d57258e049ee7129d36f5c08" - ] - ], - "1": [ - [ - { - "id": "ERX1188904_ERR1109373", - "run_accession": "ERR1109373", - "experiment_accession": "ERX1188904", - "sample_accession": "SAMEA3643867", - "experiment_alias": "ena-EXPERIMENT-CAM-03-11-2015-17:01:52:847-7", - "run_alias": "ena-RUN-CAM-03-11-2015-17:01:52:847-7", - "sample_alias": "sample_56", - "study_alias": "ena-STUDY-CAM-02-11-2015-17:42:24:189-13", - "library_layout": "PAIRED", - "experiment_title": "Illumina HiSeq 2500 paired end sequencing", - "sample_title": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "sample_description": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "fastq_md5": "8d7d7b854d0207d1226477a30103fade;9fd57225d6c07a31843276d6df9b15c0;5a62e8f785687dce890cfb4fe3e607f9", - "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_2.fastq.gz", - "fastq_1": "./results/fastq/ERX1188904_ERR1109373_1.fastq.gz", - "fastq_2": "./results/fastq/ERX1188904_ERR1109373_2.fastq.gz", - "md5_1": "9fd57225d6c07a31843276d6df9b15c0", - "md5_2": "5a62e8f785687dce890cfb4fe3e607f9", - "single_end": false - }, - "ERX1188904_ERR1109373.mappings.csv:md5,d09ddb4f0709675e5dfe1eadf12c608f" - ] - ], - "mappings": [ - [ - { - "id": "ERX1188904_ERR1109373", - "run_accession": "ERR1109373", - "experiment_accession": "ERX1188904", - "sample_accession": "SAMEA3643867", - "experiment_alias": "ena-EXPERIMENT-CAM-03-11-2015-17:01:52:847-7", - "run_alias": "ena-RUN-CAM-03-11-2015-17:01:52:847-7", - "sample_alias": "sample_56", - "study_alias": "ena-STUDY-CAM-02-11-2015-17:42:24:189-13", - "library_layout": "PAIRED", - "experiment_title": "Illumina HiSeq 2500 paired end sequencing", - "sample_title": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "sample_description": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "fastq_md5": "8d7d7b854d0207d1226477a30103fade;9fd57225d6c07a31843276d6df9b15c0;5a62e8f785687dce890cfb4fe3e607f9", - "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_2.fastq.gz", - "fastq_1": "./results/fastq/ERX1188904_ERR1109373_1.fastq.gz", - "fastq_2": "./results/fastq/ERX1188904_ERR1109373_2.fastq.gz", - "md5_1": "9fd57225d6c07a31843276d6df9b15c0", - "md5_2": "5a62e8f785687dce890cfb4fe3e607f9", - "single_end": false - }, - "ERX1188904_ERR1109373.mappings.csv:md5,d09ddb4f0709675e5dfe1eadf12c608f" - ] - ], - "samplesheet": [ - [ - { - "id": "ERX1188904_ERR1109373", - "run_accession": "ERR1109373", - "experiment_accession": "ERX1188904", - "sample_accession": "SAMEA3643867", - "experiment_alias": "ena-EXPERIMENT-CAM-03-11-2015-17:01:52:847-7", - "run_alias": "ena-RUN-CAM-03-11-2015-17:01:52:847-7", - "sample_alias": "sample_56", - "study_alias": "ena-STUDY-CAM-02-11-2015-17:42:24:189-13", - "library_layout": "PAIRED", - "experiment_title": "Illumina HiSeq 2500 paired end sequencing", - "sample_title": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "sample_description": "RNA-Seq reads mapped onto L. Boulardi Toti-like virus genome", - "fastq_md5": "8d7d7b854d0207d1226477a30103fade;9fd57225d6c07a31843276d6df9b15c0;5a62e8f785687dce890cfb4fe3e607f9", - "fastq_ftp": "ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_1.fastq.gz;ftp.sra.ebi.ac.uk/vol1/fastq/ERR110/003/ERR1109373/ERR1109373_2.fastq.gz", - "fastq_1": "./results/fastq/ERX1188904_ERR1109373_1.fastq.gz", - "fastq_2": "./results/fastq/ERX1188904_ERR1109373_2.fastq.gz", - "md5_1": "9fd57225d6c07a31843276d6df9b15c0", - "md5_2": "5a62e8f785687dce890cfb4fe3e607f9", - "single_end": false - }, - "ERX1188904_ERR1109373.samplesheet.csv:md5,e7898191d57258e049ee7129d36f5c08" - ] - ] - } - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T11:51:38.244046" - } -} \ No newline at end of file diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf index 577117ed..4180beee 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/main.nf +++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf @@ -8,14 +8,13 @@ process CUSTOM_SRATOOLSNCBISETTINGS { 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }" input: - val ids + ids : Bag> output: - path('*.mkfg') , emit: ncbi_settings - path 'versions.yml', emit: versions + file('*.mkfg') - when: - task.ext.when == null || task.ext.when + topic: + ( task.process, 'sratools', eval("vdb-config --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' shell: config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n" diff --git a/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh b/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh index cfe3a324..b553659b 100644 --- a/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh +++ b/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh @@ -38,8 +38,3 @@ else fi cp "${NCBI_SETTINGS}" ./ fi - -cat <<-END_VERSIONS > versions.yml -"!{task.process}": - sratools: $(vdb-config --version 2>&1 | grep -Eo '[0-9.]+') -END_VERSIONS diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index e7cf157a..330a3b85 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -8,48 +8,43 @@ process SRATOOLS_FASTERQDUMP { 'quay.io/biocontainers/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:6a9ff0e76ec016c3d0d27e0c0d362339f2d787e6-0' }" input: - tuple val(meta), path(sra) - path ncbi_settings - path certificate + meta : Map + sra : Path + ncbi_settings : Path + certificate : Path? output: - tuple val(meta), path('*.fastq.gz'), emit: reads - path "versions.yml" , emit: versions + files('*.fastq.gz').sort() - when: - task.ext.when == null || task.ext.when + topic: + ( task.process, 'sratools', eval("fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' + ( task.process, 'pigz', eval("pigz --version 2>&1 | sed 's/pigz //g'") ) >> 'versions' script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' + def args_fasterqdump = task.ext.args_fasterqdump ?: '' + def args_pigz = task.ext.args_pigz ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def outfile = meta.single_end ? "${prefix}.fastq" : prefix def key_file = '' - if (certificate.toString().endsWith('.jwt')) { + if (certificate.baseName.endsWith('.jwt')) { key_file += " --perm ${certificate}" - } else if (certificate.toString().endsWith('.ngc')) { + } else if (certificate.baseName.endsWith('.ngc')) { key_file += " --ngc ${certificate}" } """ export NCBI_SETTINGS="\$PWD/${ncbi_settings}" fasterq-dump \\ - $args \\ + $args_fasterqdump \\ --threads $task.cpus \\ --outfile $outfile \\ ${key_file} \\ ${sra} pigz \\ - $args2 \\ + $args_pigz \\ --no-name \\ --processes $task.cpus \\ *.fastq - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - sratools: \$(fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+') - pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) - END_VERSIONS """ } diff --git a/modules/nf-core/sratools/fasterqdump/nextflow.config b/modules/nf-core/sratools/fasterqdump/nextflow.config index f98b140d..6b14b7ba 100644 --- a/modules/nf-core/sratools/fasterqdump/nextflow.config +++ b/modules/nf-core/sratools/fasterqdump/nextflow.config @@ -1,10 +1,5 @@ process { withName: SRATOOLS_FASTERQDUMP { - ext.args = '--split-files --include-technical' - publishDir = [ - path: { "${params.outdir}/fastq" }, - mode: params.publish_dir_mode, - pattern: "*.fastq.gz" - ] + ext.args_fasterqdump = '--split-files --include-technical' } } \ No newline at end of file diff --git a/modules/nf-core/sratools/fasterqdump/tests/nextflow.config b/modules/nf-core/sratools/fasterqdump/tests/nextflow.config index 23e4100b..e62eb6ec 100644 --- a/modules/nf-core/sratools/fasterqdump/tests/nextflow.config +++ b/modules/nf-core/sratools/fasterqdump/tests/nextflow.config @@ -1,5 +1,5 @@ process { withName: SRATOOLS_FASTERQDUMP { - ext.args = '' + ext.args_fasterqdump = '' } } \ No newline at end of file diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf index 3c30739a..38ab2728 100644 --- a/modules/nf-core/sratools/prefetch/main.nf +++ b/modules/nf-core/sratools/prefetch/main.nf @@ -8,26 +8,26 @@ process SRATOOLS_PREFETCH { 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }" input: - tuple val(meta), val(id) - path ncbi_settings - path certificate + meta : Map + ncbi_settings : Path + certificate : Path? output: - tuple val(meta), path(id), emit: sra - path 'versions.yml' , emit: versions + file(id) - when: - task.ext.when == null || task.ext.when + topic: + ( task.process, 'sratools', eval("prefetch --version 2>&1 | grep -Eo '[0-9.]+'") ) >> 'versions' shell: - args = task.ext.args ?: '' - args2 = task.ext.args2 ?: '5 1 100' // + args_prefetch = task.ext.args_prefetch ?: '' + args_retry = task.ext.args_retry ?: '5 1 100' // + id = meta.run_accession if (certificate) { - if (certificate.toString().endsWith('.jwt')) { - args += " --perm ${certificate}" + if (certificate.baseName.endsWith('.jwt')) { + args_prefetch += " --perm ${certificate}" } - else if (certificate.toString().endsWith('.ngc')) { - args += " --ngc ${certificate}" + else if (certificate.baseName.endsWith('.ngc')) { + args_prefetch += " --ngc ${certificate}" } } diff --git a/modules/nf-core/sratools/prefetch/nextflow.config b/modules/nf-core/sratools/prefetch/nextflow.config deleted file mode 100644 index a2ca8848..00000000 --- a/modules/nf-core/sratools/prefetch/nextflow.config +++ /dev/null @@ -1,8 +0,0 @@ -process { - withName: SRATOOLS_PREFETCH { - publishDir = [ - path: { "${params.outdir}/sra" }, - enabled: false - ] - } -} \ No newline at end of file diff --git a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh index a72a4bfb..c097198c 100755 --- a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh +++ b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh @@ -42,14 +42,9 @@ retry_with_backoff() { export NCBI_SETTINGS="$PWD/!{ncbi_settings}" -retry_with_backoff !{args2} \ +retry_with_backoff !{args_retry} \ prefetch \ - !{args} \ + !{args_prefetch} \ !{id} [ -f !{id}.sralite ] && vdb-validate !{id}.sralite || vdb-validate !{id} - -cat <<-END_VERSIONS > versions.yml -"!{task.process}": - sratools: $(prefetch --version 2>&1 | grep -Eo '[0-9.]+') -END_VERSIONS diff --git a/modules/nf-core/untar/environment.yml b/modules/nf-core/untar/environment.yml deleted file mode 100644 index 0c9cbb10..00000000 --- a/modules/nf-core/untar/environment.yml +++ /dev/null @@ -1,11 +0,0 @@ -name: untar - -channels: - - conda-forge - - bioconda - - defaults - -dependencies: - - conda-forge::grep=3.11 - - conda-forge::sed=4.7 - - conda-forge::tar=1.34 diff --git a/modules/nf-core/untar/main.nf b/modules/nf-core/untar/main.nf deleted file mode 100644 index 8a75bb95..00000000 --- a/modules/nf-core/untar/main.nf +++ /dev/null @@ -1,63 +0,0 @@ -process UNTAR { - tag "$archive" - label 'process_single' - - conda "${moduleDir}/environment.yml" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/ubuntu:20.04' : - 'nf-core/ubuntu:20.04' }" - - input: - tuple val(meta), path(archive) - - output: - tuple val(meta), path("$prefix"), emit: untar - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - def args = task.ext.args ?: '' - def args2 = task.ext.args2 ?: '' - prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.baseName.toString().replaceFirst(/\.tar$/, "")) - - """ - mkdir $prefix - - ## Ensures --strip-components only applied when top level of tar contents is a directory - ## If just files or multiple directories, place all in prefix - if [[ \$(tar -taf ${archive} | grep -o -P "^.*?\\/" | uniq | wc -l) -eq 1 ]]; then - tar \\ - -C $prefix --strip-components 1 \\ - -xavf \\ - $args \\ - $archive \\ - $args2 - else - tar \\ - -C $prefix \\ - -xavf \\ - $args \\ - $archive \\ - $args2 - fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS - """ - - stub: - prefix = task.ext.prefix ?: ( meta.id ? "${meta.id}" : archive.toString().replaceFirst(/\.[^\.]+(.gz)?$/, "")) - """ - mkdir $prefix - touch ${prefix}/file.txt - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - untar: \$(echo \$(tar --version 2>&1) | sed 's/^.*(GNU tar) //; s/ Copyright.*\$//') - END_VERSIONS - """ -} diff --git a/modules/nf-core/untar/meta.yml b/modules/nf-core/untar/meta.yml deleted file mode 100644 index a9a2110f..00000000 --- a/modules/nf-core/untar/meta.yml +++ /dev/null @@ -1,46 +0,0 @@ -name: untar -description: Extract files. -keywords: - - untar - - uncompress - - extract -tools: - - untar: - description: | - Extract tar.gz files. - documentation: https://www.gnu.org/software/tar/manual/ - licence: ["GPL-3.0-or-later"] -input: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - archive: - type: file - description: File to be untar - pattern: "*.{tar}.{gz}" -output: - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - untar: - type: directory - description: Directory containing contents of archive - pattern: "*/" - - versions: - type: file - description: File containing software versions - pattern: "versions.yml" -authors: - - "@joseespinosa" - - "@drpatelh" - - "@matthdsm" - - "@jfy133" -maintainers: - - "@joseespinosa" - - "@drpatelh" - - "@matthdsm" - - "@jfy133" diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test deleted file mode 100644 index 98b769ad..00000000 --- a/modules/nf-core/untar/tests/main.nf.test +++ /dev/null @@ -1,45 +0,0 @@ -nextflow_process { - - name "Test Process UNTAR" - script "../main.nf" - process "UNTAR" - - test("test_untar") { - - when { - process { - """ - input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out.untar).match("test_untar") }, - ) - } - - } - - test("test_untar_onlyfiles") { - - when { - process { - """ - input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ] - """ - } - } - - then { - assertAll ( - { assert process.success }, - { assert snapshot(process.out.untar).match("test_untar_onlyfiles") }, - ) - } - - } - -} diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap deleted file mode 100644 index 64550292..00000000 --- a/modules/nf-core/untar/tests/main.nf.test.snap +++ /dev/null @@ -1,42 +0,0 @@ -{ - "test_untar_onlyfiles": { - "content": [ - [ - [ - [ - - ], - [ - "hello.txt:md5,e59ff97941044f85df5297e1c302d260" - ] - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T11:49:41.320643" - }, - "test_untar": { - "content": [ - [ - [ - [ - - ], - [ - "hash.k2d:md5,8b8598468f54a7087c203ad0190555d9", - "opts.k2d:md5,a033d00cf6759407010b21700938f543", - "taxo.k2d:md5,094d5891cdccf2f1468088855c214b2c" - ] - ] - ] - ], - "meta": { - "nf-test": "0.8.4", - "nextflow": "23.10.1" - }, - "timestamp": "2024-02-28T11:49:33.795172" - } -} \ No newline at end of file diff --git a/nextflow.config b/nextflow.config index 7f4f8ebf..7efb1ceb 100644 --- a/nextflow.config +++ b/nextflow.config @@ -6,35 +6,14 @@ ---------------------------------------------------------------------------------------- */ -// Global default params, used in configs +// Config params params { - // Input options - input = null - nf_core_pipeline = null - nf_core_rnaseq_strandedness = 'auto' - ena_metadata_fields = null - sample_mapping_fields = 'experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description' - download_method = 'ftp' - skip_fastq_download = false - dbgap_key = null - - // Boilerplate options - outdir = null - publish_dir_mode = 'copy' - email = null - email_on_fail = null - plaintext_email = false - monochrome_logs = false - hook_url = null - help = false - version = false - - // Config options + // Institutional config options config_profile_name = null config_profile_description = null custom_config_version = 'master' - custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" + custom_config_base = "https://raw.githubusercontent.com/nf-core/configs" config_profile_contact = null config_profile_url = null @@ -44,16 +23,8 @@ params { max_cpus = 16 max_time = '240.h' - // Schema validation default options - validationFailUnrecognisedParams = false - validationLenientMode = false - validationShowHiddenParams = false - validationSchemaIgnoreParams = '' - validate_params = true - - // Deprecated options - // See: https://github.com/nf-core/fetchngs/pull/279/files#r1494459480 - force_sratools_download = false + // Report options + trace_suffix = "_${new java.util.Date().format('yyyy-MM-dd_HH-mm-ss')}" } @@ -61,11 +32,7 @@ params { includeConfig 'conf/base.config' // Load nf-core custom profiles from different Institutions -try { - includeConfig "${params.custom_config_base}/nfcore_custom.config" -} catch (Exception e) { - System.err.println("WARNING: Could not load nf-core/config profiles: ${params.custom_config_base}/nfcore_custom.config") -} +includeConfig "${params.custom_config_base}/${params.custom_config_version}/nfcore_custom.config" // Workflow specific configs includeConfig './workflows/sra/nextflow.config' @@ -75,7 +42,6 @@ profiles { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' cleanup = false - nextflow.enable.configProcessNamesValidation = true } conda { conda.enabled = true @@ -193,25 +159,24 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -// Disable process selector warnings by default. Use debug profile to enable warnings. -nextflow.enable.configProcessNamesValidation = false +// workflow outputs +workflow.output.mode = 'copy' -def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" + file = "${outputDir}/pipeline_info/execution_timeline${params.trace_suffix}.html" } report { enabled = true - file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" + file = "${outputDir}/pipeline_info/execution_report${params.trace_suffix}.html" } trace { enabled = true - file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" + file = "${outputDir}/pipeline_info/execution_trace${params.trace_suffix}.txt" } dag { enabled = true - file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" + file = "${outputDir}/pipeline_info/pipeline_dag${params.trace_suffix}.html" } manifest { @@ -224,36 +189,3 @@ manifest { version = '1.13.0dev' doi = 'https://doi.org/10.5281/zenodo.5070524' } - -// Function to ensure that resource requirements don't go beyond -// a maximum limit -def check_max(obj, type) { - if (type == 'memory') { - try { - if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1) - return params.max_memory as nextflow.util.MemoryUnit - else - return obj - } catch (all) { - println " ### ERROR ### Max memory '${params.max_memory}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'time') { - try { - if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1) - return params.max_time as nextflow.util.Duration - else - return obj - } catch (all) { - println " ### ERROR ### Max time '${params.max_time}' is not valid! Using default value: $obj" - return obj - } - } else if (type == 'cpus') { - try { - return Math.min( obj, params.max_cpus as int ) - } catch (all) { - println " ### ERROR ### Max cpus '${params.max_cpus}' is not valid! Using default value: $obj" - return obj - } - } -} diff --git a/nextflow_schema.json b/nextflow_schema.json index 29f7b710..dbac4c00 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -5,12 +5,12 @@ "description": "Pipeline to fetch metadata and raw FastQ files from public databases", "type": "object", "definitions": { - "input_output_options": { - "title": "Input/output options", + "input_options": { + "title": "Input options", "type": "object", "fa_icon": "fas fa-terminal", "description": "Define where the pipeline should find input data and save output data.", - "required": ["input", "outdir"], + "required": ["input"], "properties": { "input": { "type": "string", @@ -28,25 +28,6 @@ "description": "Comma-separated list of ENA metadata fields to fetch before downloading data.", "help_text": "The default list of fields used by the pipeline can be found at the top of the [`bin/sra_ids_to_runinfo.py`](https://github.com/nf-core/fetchngs/blob/master/bin/sra_ids_to_runinfo.py) script within the pipeline repo. This pipeline requires a minimal set of fields to download FastQ files i.e. `'run_accession,experiment_accession,library_layout,fastq_ftp,fastq_md5'`. Full list of accepted metadata fields can be obtained from the [ENA API](https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run)." }, - "sample_mapping_fields": { - "type": "string", - "fa_icon": "fas fa-columns", - "description": "Comma-separated list of ENA metadata fields used to create a separate 'id_mappings.csv' and 'multiqc_config.yml' with selected fields that can be used to rename samples in general and in MultiQC.", - "default": "experiment_accession,run_accession,sample_accession,experiment_alias,run_alias,sample_alias,experiment_title,sample_title,sample_description" - }, - "nf_core_pipeline": { - "type": "string", - "fa_icon": "fab fa-apple", - "description": "Name of supported nf-core pipeline e.g. 'rnaseq'. A samplesheet for direct use with the pipeline will be created with the appropriate columns.", - "enum": ["rnaseq", "atacseq", "viralrecon", "taxprofiler"] - }, - "nf_core_rnaseq_strandedness": { - "type": "string", - "fa_icon": "fas fa-dna", - "description": "Value for 'strandedness' entry added to samplesheet created when using '--nf_core_pipeline rnaseq'.", - "help_text": "The default is 'auto' which can be used with nf-core/rnaseq v3.10 onwards to auto-detect strandedness during the pipeline execution.", - "default": "auto" - }, "download_method": { "type": "string", "default": "ftp", @@ -67,12 +48,6 @@ "format": "file-path", "description": "dbGaP repository key." }, - "outdir": { - "type": "string", - "format": "directory-path", - "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", - "fa_icon": "fas fa-folder-open" - }, "email": { "type": "string", "description": "Email address for completion summary.", @@ -99,7 +74,7 @@ "custom_config_base": { "type": "string", "description": "Base directory for Institutional configs.", - "default": "https://raw.githubusercontent.com/nf-core/configs/master", + "default": "https://raw.githubusercontent.com/nf-core/configs", "hidden": true, "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.", "fa_icon": "fas fa-users-cog" @@ -184,15 +159,6 @@ "fa_icon": "fas fa-question-circle", "hidden": true }, - "publish_dir_mode": { - "type": "string", - "default": "copy", - "description": "Method used to save pipeline results to output directory.", - "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.", - "fa_icon": "fas fa-copy", - "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"], - "hidden": true - }, "email_on_fail": { "type": "string", "description": "Email address for completion summary, only when pipeline fails.", @@ -219,69 +185,16 @@ "fa_icon": "fas fa-people-group", "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.", "hidden": true - }, - "validate_params": { - "type": "boolean", - "description": "Boolean whether to validate parameters against the schema at runtime", - "default": true, - "fa_icon": "fas fa-check-square", - "hidden": true - }, - "validationShowHiddenParams": { - "type": "boolean", - "fa_icon": "far fa-eye-slash", - "description": "Show all params when using `--help`", - "hidden": true, - "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." - }, - "validationFailUnrecognisedParams": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters fails when an unrecognised parameter is found.", - "hidden": true, - "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." - }, - "validationLenientMode": { - "type": "boolean", - "fa_icon": "far fa-check-circle", - "description": "Validation of parameters in lenient more.", - "hidden": true, - "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." - } - } - }, - "deprecated_options": { - "title": "Deprecated options", - "type": "object", - "description": "List of parameters that have been deprecated.", - "default": "", - "fa_icon": "fas fa-calendar-times", - "properties": { - "force_sratools_download": { - "type": "boolean", - "fa_icon": "fas fa-times-circle", - "description": "This parameter has been deprecated. Please use '--download_method sratools' instead.", - "enum": [false], - "hidden": true } } } }, "allOf": [ { - "$ref": "#/definitions/input_output_options" - }, - { - "$ref": "#/definitions/institutional_config_options" - }, - { - "$ref": "#/definitions/max_job_request_options" + "$ref": "#/definitions/input_options" }, { "$ref": "#/definitions/generic_options" - }, - { - "$ref": "#/definitions/deprecated_options" } ] } diff --git a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf index 0c4307b5..3f4dbe51 100644 --- a/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_fetchngs_pipeline/main.nf @@ -29,14 +29,12 @@ include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline' workflow PIPELINE_INITIALISATION { take: - version // boolean: Display version and exit - help // boolean: Display help text - validate_params // boolean: Boolean whether to validate parameters against the schema at runtime - monochrome_logs // boolean: Do not use coloured log outputs - nextflow_cli_args // array: List of positional nextflow CLI args - outdir // string: The output directory where the results will be saved - input // string: File containing SRA/ENA/GEO/DDBJ identifiers one per line to download their associated metadata and FastQ files - ena_metadata_fields // string: Comma-separated list of ENA metadata fields to fetch before downloading data + version : boolean // Display version and exit + help : boolean // Display help text + validate_params : boolean // Validate parameters against the schema at runtime + monochrome_logs : boolean // Do not use coloured log outputs + nextflow_cli_args : List // List of positional nextflow CLI args + outdir : String // The output directory where the results will be saved main: @@ -55,7 +53,7 @@ workflow PIPELINE_INITIALISATION { // pre_help_text = nfCoreLogo(monochrome_logs) post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs) - def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input ids.csv --outdir " + workflow_command = "nextflow run ${workflow.manifest.name} -profile --input ids.csv -output-dir " UTILS_NFVALIDATION_PLUGIN ( help, workflow_command, @@ -71,27 +69,6 @@ workflow PIPELINE_INITIALISATION { UTILS_NFCORE_PIPELINE ( nextflow_cli_args ) - - // - // Auto-detect input id type - // - ch_input = file(input) - if (isSraId(ch_input)) { - sraCheckENAMetadataFields(ena_metadata_fields) - } else { - error('Ids provided via --input not recognised please make sure they are either SRA / ENA / GEO / DDBJ ids!') - } - - // Read in ids from --input file - Channel - .from(ch_input) - .splitCsv(header:false, sep:'', strip:true) - .map { it[0] } - .unique() - .set { ch_ids } - - emit: - ids = ch_ids } /* @@ -103,12 +80,12 @@ workflow PIPELINE_INITIALISATION { workflow PIPELINE_COMPLETION { take: - email // string: email address - email_on_fail // string: email address sent on pipeline failure - plaintext_email // boolean: Send plain-text email instead of HTML - outdir // path: Path to output directory where results will be published - monochrome_logs // boolean: Disable ANSI colour codes in log output - hook_url // string: hook URL for notifications + email : String // email address + email_on_fail : String // email address sent on pipeline failure + plaintext_email : boolean // Send plain-text email instead of HTML + outdir : Path // Path to output directory where results will be published + monochrome_logs : boolean // Disable ANSI colour codes in log output + hook_url : String // hook URL for notifications main: @@ -141,7 +118,7 @@ workflow PIPELINE_COMPLETION { // // Check if input ids are from the SRA // -def isSraId(input) { +def isSraId(input: Path) -> boolean { def is_sra = false def total_ids = 0 def no_match_ids = [] diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf index fbeacf4a..ea3292ba 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf @@ -7,33 +7,29 @@ include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/ // workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS { take: - ch_sra_ids // channel: [ val(meta), val(id) ] - ch_dbgap_key // channel: [ path(dbgap_key) ] + sra_metadata : Channel> + dbgap_key : Path? main: - - ch_versions = Channel.empty() - // // Detect existing NCBI user settings or create new ones. // - CUSTOM_SRATOOLSNCBISETTINGS ( ch_sra_ids.collect() ) - ch_ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings - ch_versions = ch_versions.mix(CUSTOM_SRATOOLSNCBISETTINGS.out.versions) + ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS( sra_metadata.collect() ) - // - // Prefetch sequencing reads in SRA format. - // - SRATOOLS_PREFETCH ( ch_sra_ids, ch_ncbi_settings, ch_dbgap_key ) - ch_versions = ch_versions.mix(SRATOOLS_PREFETCH.out.versions.first()) + reads = sra_metadata.map { meta -> + // + // Prefetch sequencing reads in SRA format. + // + def sra = SRATOOLS_PREFETCH ( meta, ncbi_settings, dbgap_key ) - // - // Convert the SRA format into one or more compressed FASTQ files. - // - SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra, ch_ncbi_settings, ch_dbgap_key ) - ch_versions = ch_versions.mix(SRATOOLS_FASTERQDUMP.out.versions.first()) + // + // Convert the SRA format into one or more compressed FASTQ files. + // + def fastq = SRATOOLS_FASTERQDUMP ( meta, sra, ncbi_settings, dbgap_key ) + + ( meta, fastq ) + } emit: - reads = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ] - versions = ch_versions // channel: [ versions.yml ] + reads : Channel<(Map, List)> } diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config index de803a38..187faf6d 100644 --- a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config +++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config @@ -1,2 +1 @@ -includeConfig '../../../modules/nf-core/sratools/prefetch/nextflow.config' includeConfig '../../../modules/nf-core/sratools/fasterqdump/nextflow.config' diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf index ac31f28f..6e4db179 100644 --- a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf @@ -2,9 +2,9 @@ // Subworkflow with functionality that may be useful for any Nextflow pipeline // -import org.yaml.snakeyaml.Yaml -import groovy.json.JsonOutput -import nextflow.extension.FilesEx +// import org.yaml.snakeyaml.Yaml +// import groovy.json.JsonOutput +// import nextflow.extension.FilesEx /* ======================================================================================== @@ -15,10 +15,10 @@ import nextflow.extension.FilesEx workflow UTILS_NEXTFLOW_PIPELINE { take: - print_version // boolean: print version - dump_parameters // boolean: dump parameters - outdir // path: base directory used to publish pipeline results - check_conda_channels // boolean: check conda channels + print_version : boolean // print version + dump_parameters : boolean // dump parameters + outdir : String // base directory used to publish pipeline results + check_conda_channels: boolean // check conda channels main: @@ -45,7 +45,7 @@ workflow UTILS_NEXTFLOW_PIPELINE { } emit: - dummy_emit = true + true } /* @@ -57,8 +57,8 @@ workflow UTILS_NEXTFLOW_PIPELINE { // // Generate version string // -def getWorkflowVersion() { - String version_string = "" +def getWorkflowVersion() -> String { + def version_string = "" if (workflow.manifest.version) { def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' version_string += "${prefix_v}${workflow.manifest.version}" @@ -75,7 +75,7 @@ def getWorkflowVersion() { // // Dump pipeline parameters to a JSON file // -def dumpParametersToJSON(outdir) { +def dumpParametersToJSON(outdir: String) { def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') def filename = "params_${timestamp}.json" def temp_pf = new File(workflow.launchDir.toString(), ".${filename}") @@ -90,8 +90,8 @@ def dumpParametersToJSON(outdir) { // When running with -profile conda, warn if channels have not been set-up appropriately // def checkCondaChannels() { - Yaml parser = new Yaml() - def channels = [] + def parser = new Yaml() + def channels: Set = [] try { def config = parser.load("conda config --show channels".execute().text) channels = config.channels @@ -102,15 +102,15 @@ def checkCondaChannels() { // Check that all channels are present // This channel list is ordered by required channel priority. - def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults'] - def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean + def required_channels_in_order: Set = ['conda-forge', 'bioconda', 'defaults'] + def channels_missing = !(required_channels_in_order - channels).isEmpty() // Check that they are in the right order def channel_priority_violation = false def n = required_channels_in_order.size() - for (int i = 0; i < n - 1; i++) { - channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) - } + // for (int i = 0; i < n - 1; i++) { + // channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1])) + // } if (channels_missing | channel_priority_violation) { log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" + diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf index a8b55d6f..06969ea4 100644 --- a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf +++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf @@ -2,9 +2,6 @@ // Subworkflow with utility functions specific to the nf-core pipeline template // -import org.yaml.snakeyaml.Yaml -import nextflow.extension.FilesEx - /* ======================================================================================== SUBWORKFLOW DEFINITION @@ -14,7 +11,7 @@ import nextflow.extension.FilesEx workflow UTILS_NFCORE_PIPELINE { take: - nextflow_cli_args + nextflow_cli_args : List main: valid_config = checkConfigProvided() @@ -24,6 +21,32 @@ workflow UTILS_NFCORE_PIPELINE { valid_config } +// +// Get channel of software versions used in pipeline +// +workflow SOFTWARE_VERSIONS { + main: + processVersions = Channel.topic('versions', (String,String,String)) + workflowVersions = Channel.of( + ( 'Workflow', workflow.manifest.name, getWorkflowVersion() ), + ( 'Workflow', 'Nextflow', workflow.nextflow.version ) + ) + + emit: + processVersions + .mix(workflowVersions) // Channel<(String,String,String)> + .gather { (process, name, version) -> + (process, -1, (name, version)) + } // Channel<(String,Bag<(String,String)>)> + .map { (process, tools) -> + def simpleName = process.tokenize(':').last() + def toolsMap = tools.unique().inject([:]) { acc, (name, version) -> + acc + [ (name): version ] + } + return [ simpleName: toolsMap ] + } // Channel>> +} + /* ======================================================================================== FUNCTIONS @@ -33,8 +56,7 @@ workflow UTILS_NFCORE_PIPELINE { // // Warn if a -profile or Nextflow config has not been provided to run the pipeline // -def checkConfigProvided() { - valid_config = true +def checkConfigProvided() -> boolean { if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) { log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" + "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" + @@ -42,15 +64,15 @@ def checkConfigProvided() { " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" + " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" + "Please refer to the quick start section and usage docs for the pipeline.\n " - valid_config = false + return false } - return valid_config + return true } // // Exit pipeline if --profile contains spaces // -def checkProfileProvided(nextflow_cli_args) { +def checkProfileProvided(nextflow_cli_args: List) { if (workflow.profile.endsWith(',')) { error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" + "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n" @@ -64,7 +86,7 @@ def checkProfileProvided(nextflow_cli_args) { // // Citation string for pipeline // -def workflowCitation() { +def workflowCitation() -> String { return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" + "* The pipeline\n" + " ${workflow.manifest.doi}\n\n" + @@ -77,8 +99,8 @@ def workflowCitation() { // // Generate workflow version string // -def getWorkflowVersion() { - String version_string = "" +def getWorkflowVersion() -> String { + def version_string = "" if (workflow.manifest.version) { def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : '' version_string += "${prefix_v}${workflow.manifest.version}" @@ -92,41 +114,10 @@ def getWorkflowVersion() { return version_string } -// -// Get software versions for pipeline -// -def processVersionsFromYAML(yaml_file) { - Yaml yaml = new Yaml() - versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] } - return yaml.dumpAsMap(versions).trim() -} - -// -// Get workflow version for pipeline -// -def workflowVersionToYAML() { - return """ - Workflow: - $workflow.manifest.name: ${getWorkflowVersion()} - Nextflow: $workflow.nextflow.version - """.stripIndent().trim() -} - -// -// Get channel of software versions used in pipeline in YAML format -// -def softwareVersionsToYAML(ch_versions) { - return ch_versions - .unique() - .map { processVersionsFromYAML(it) } - .unique() - .mix(Channel.of(workflowVersionToYAML())) -} - // // Get workflow summary for MultiQC // -def paramsSummaryMultiqc(summary_params) { +def paramsSummaryMultiqc(summary_params: Map) -> String { def summary_section = '' for (group in summary_params.keySet()) { def group_params = summary_params.get(group) // This gets the parameters of that particular group @@ -140,7 +131,7 @@ def paramsSummaryMultiqc(summary_params) { } } - String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" + def yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n" yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n" yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n" yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n" @@ -154,8 +145,8 @@ def paramsSummaryMultiqc(summary_params) { // // nf-core logo // -def nfCoreLogo(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) +def nfCoreLogo(monochrome_logs: boolean = true) -> String { + def colors = logColours(monochrome_logs) String.format( """\n ${dashedLine(monochrome_logs)} @@ -173,16 +164,16 @@ def nfCoreLogo(monochrome_logs=true) { // // Return dashed line // -def dashedLine(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) +def dashedLine(monochrome_logs: boolean = true) -> String { + def colors = logColours(monochrome_logs) return "-${colors.dim}----------------------------------------------------${colors.reset}-" } // // ANSII colours used for terminal logging // -def logColours(monochrome_logs=true) { - Map colorcodes = [:] +def logColours(monochrome_logs: boolean = true) -> Map { + def colorcodes = [:] // Reset / Meta colorcodes['reset'] = monochrome_logs ? '' : "\033[0m" @@ -246,39 +237,22 @@ def logColours(monochrome_logs=true) { return colorcodes } -// -// Attach the multiqc report to email -// -def attachMultiqcReport(multiqc_report) { - def mqc_report = null - try { - if (workflow.success) { - mqc_report = multiqc_report.getVal() - if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) { - if (mqc_report.size() > 1) { - log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one" - } - mqc_report = mqc_report[0] - } - } - } catch (all) { - if (multiqc_report) { - log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email" - } - } - return mqc_report -} - // // Construct and send completion email // -def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) { +def completionEmail( + summary_params: Map, + email: String, + email_on_fail: String, + plaintext_email: boolean, + outdir: String, + monochrome_logs: boolean = true, + multiqc_report: Path = null) { // Set up the e-mail variables - def subject = "[$workflow.manifest.name] Successful: $workflow.runName" - if (!workflow.success) { - subject = "[$workflow.manifest.name] FAILED: $workflow.runName" - } + def subject = workflow.success + ? "[$workflow.manifest.name] Successful: $workflow.runName" + : "[$workflow.manifest.name] FAILED: $workflow.runName" def summary = [:] for (group in summary_params.keySet()) { @@ -311,13 +285,14 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi email_fields['summary'] = summary << misc_fields // On success try attach the multiqc report - def mqc_report = attachMultiqcReport(multiqc_report) + def mqc_report = workflow.success + ? multiqc_report + : null // Check if we are only sending emails on failure - def email_address = email - if (!email && email_on_fail && !workflow.success) { - email_address = email_on_fail - } + def email_address = !email && email_on_fail && !workflow.success + ? email_on_fail + : email // Render the TXT template def engine = new groovy.text.GStringTemplateEngine() @@ -338,7 +313,7 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi def sendmail_html = sendmail_template.toString() // Send the HTML e-mail - Map colors = logColours(monochrome_logs) + def colors = logColours(monochrome_logs) if (email_address) { try { if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') } @@ -358,21 +333,21 @@ def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdi // Write summary e-mail HTML to a file def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html") output_hf.withWriter { w -> w << email_html } - FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html"); + output_hf.toPath().copyTo("${outdir}/pipeline_info/pipeline_report.html"); output_hf.delete() // Write summary e-mail TXT to a file def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt") output_tf.withWriter { w -> w << email_txt } - FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt"); + output_tf.toPath().copyTo("${outdir}/pipeline_info/pipeline_report.txt"); output_tf.delete() } // // Print pipeline summary on completion // -def completionSummary(monochrome_logs=true) { - Map colors = logColours(monochrome_logs) +def completionSummary(monochrome_logs: boolean = true) { + def colors = logColours(monochrome_logs) if (workflow.success) { if (workflow.stats.ignoredCount == 0) { log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-" @@ -387,7 +362,7 @@ def completionSummary(monochrome_logs=true) { // // Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack // -def imNotification(summary_params, hook_url) { +def imNotification(summary_params: Map, hook_url: String) { def summary = [:] for (group in summary_params.keySet()) { summary << summary_params[group] diff --git a/workflows/sra/main.nf b/workflows/sra/main.nf index 0c8cac0c..7b1d1b7f 100644 --- a/workflows/sra/main.nf +++ b/workflows/sra/main.nf @@ -9,8 +9,6 @@ include { SRA_FASTQ_FTP } from '../../modules/local/sra_fastq_ftp' include { SRA_IDS_TO_RUNINFO } from '../../modules/local/sra_ids_to_runinfo' include { SRA_RUNINFO_TO_FTP } from '../../modules/local/sra_runinfo_to_ftp' include { ASPERA_CLI } from '../../modules/local/aspera_cli' -include { SRA_TO_SAMPLESHEET } from '../../modules/local/sra_to_samplesheet' -include { softwareVersionsToYAML } from '../../subworkflows/nf-core/utils_nfcore_pipeline' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -29,164 +27,119 @@ include { FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS } from '../../subworkflow workflow SRA { take: - ids // channel: [ ids ] + ids : Channel + params : SraParams main: - ch_versions = Channel.empty() - - // - // MODULE: Get SRA run information for public database ids - // - SRA_IDS_TO_RUNINFO ( - ids, - params.ena_metadata_fields ?: '' - ) - ch_versions = ch_versions.mix(SRA_IDS_TO_RUNINFO.out.versions.first()) - - // - // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] - // - SRA_RUNINFO_TO_FTP ( - SRA_IDS_TO_RUNINFO.out.tsv - ) - ch_versions = ch_versions.mix(SRA_RUNINFO_TO_FTP.out.versions.first()) - - SRA_RUNINFO_TO_FTP - .out - .tsv - .splitCsv(header:true, sep:'\t') - .map { - meta -> - def meta_clone = meta.clone() - meta_clone.single_end = meta_clone.single_end.toBoolean() - return meta_clone - } - .unique() - .set { ch_sra_metadata } - - if (!params.skip_fastq_download) { - - ch_sra_metadata - .branch { - meta -> - def download_method = 'ftp' - // meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera - // For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz' - // For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz' - if (meta.fastq_aspera && params.download_method == 'aspera') { - download_method = 'aspera' - } - if ((!meta.fastq_aspera && !meta.fastq_1) || params.download_method == 'sratools') { - download_method = 'sratools' - } - - aspera: download_method == 'aspera' - return [ meta, meta.fastq_aspera.tokenize(';').take(2) ] - ftp: download_method == 'ftp' - return [ meta, [ meta.fastq_1, meta.fastq_2 ] ] - sratools: download_method == 'sratools' - return [ meta, meta.run_accession ] - } - .set { ch_sra_reads } - + runinfo_ftp = ids // - // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums + // MODULE: Get SRA run information for public database ids // - SRA_FASTQ_FTP ( - ch_sra_reads.ftp - ) - ch_versions = ch_versions.mix(SRA_FASTQ_FTP.out.versions.first()) - + .map { id -> + SRA_IDS_TO_RUNINFO ( id, params.ena_metadata_fields ) + } // Channel // - // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. + // MODULE: Parse SRA run information, create file containing FTP links and read into workflow as [ meta, [reads] ] // - FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( - ch_sra_reads.sratools, - params.dbgap_key ? file(params.dbgap_key, checkIfExists: true) : [] - ) - ch_versions = ch_versions.mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.versions.first()) + .map(SRA_RUNINFO_TO_FTP) // Channel - // - // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums - // - ASPERA_CLI ( - ch_sra_reads.aspera, - 'era-fasp' - ) - ch_versions = ch_versions.mix(ASPERA_CLI.out.versions.first()) - - // Isolate FASTQ channel which will be added to emit block - SRA_FASTQ_FTP - .out - .fastq - .mix(FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS.out.reads) - .mix(ASPERA_CLI.out.fastq) - .map { - meta, fastq -> - def reads = fastq instanceof List ? fastq.flatten() : [ fastq ] - def meta_clone = meta.clone() - - meta_clone.fastq_1 = reads[0] ? "${params.outdir}/fastq/${reads[0].getName()}" : '' - meta_clone.fastq_2 = reads[1] && !meta.single_end ? "${params.outdir}/fastq/${reads[1].getName()}" : '' - - return meta_clone - } - .set { ch_sra_metadata } - } + sra_metadata = runinfo_ftp.scatter { tsv -> + tsv.splitCsv(header:true, sep:'\t').unique() + } // Channel> // - // MODULE: Stage FastQ files downloaded by SRA together and auto-create a samplesheet + // MODULE: If FTP link is provided in run information then download FastQ directly via FTP and validate with md5sums // - SRA_TO_SAMPLESHEET ( - ch_sra_metadata, - params.nf_core_pipeline ?: '', - params.nf_core_rnaseq_strandedness ?: 'auto', - params.sample_mapping_fields - ) - - // Merge samplesheets and mapping files across all samples - SRA_TO_SAMPLESHEET - .out - .samplesheet - .map { it[1] } - .collectFile(name:'tmp_samplesheet.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'samplesheet.csv', storeDir: "${params.outdir}/samplesheet") - .set { ch_samplesheet } - - SRA_TO_SAMPLESHEET - .out - .mappings - .map { it[1] } - .collectFile(name:'tmp_id_mappings.csv', newLine: true, keepHeader: true, sort: { it.baseName }) - .map { it.text.tokenize('\n').join('\n') } - .collectFile(name:'id_mappings.csv', storeDir: "${params.outdir}/samplesheet") - .set { ch_mappings } + ftp_samples = sra_metadata + .filter { meta -> + !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.FTP + } // Channel> + .map { meta -> + def out = SRA_FASTQ_FTP ( meta ) + new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) + } // Channel // - // MODULE: Create a MutiQC config file with sample name mappings + // SUBWORKFLOW: Download sequencing reads without FTP links using sra-tools. // - ch_sample_mappings_yml = Channel.empty() - if (params.sample_mapping_fields) { - MULTIQC_MAPPINGS_CONFIG ( - ch_mappings - ) - ch_versions = ch_versions.mix(MULTIQC_MAPPINGS_CONFIG.out.versions) - ch_sample_mappings_yml = MULTIQC_MAPPINGS_CONFIG.out.yml - } + sratools_metadata = sra_metadata.filter { meta -> + !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.SRATOOLS + } // Channel> + + sratools_reads = FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS ( + sratools_metadata, + params.dbgap_key + ) // Channel<(Map, List)> + + sratools_samples = sra_metadata.map { (meta, fastq) -> + def fastq_1 = fastq[0] + def fastq_2 = !meta.single_end ? fastq[1] : null + new Sample(meta.id, fastq_1, fastq_2, null, null) + } // Channel // - // Collate and save software versions + // MODULE: If Aspera link is provided in run information then download FastQ directly via Aspera CLI and validate with md5sums // - softwareVersionsToYAML(ch_versions) - .collectFile(storeDir: "${params.outdir}/pipeline_info", name: 'nf_core_fetchngs_software_mqc_versions.yml', sort: true, newLine: true) + aspera_samples = sra_metadata + .filter { meta -> + !skip_fastq_download && getDownloadMethod(meta, params.download_method) == DownloadMethod.ASPERA + } // Channel> + .map { meta -> + def out = ASPERA_CLI ( meta, 'era-fasp' ) + new Sample(meta.id, out.fastq_1, out.fastq_2, out.md5_1, out.md5_2) + } // Channel emit: - samplesheet = ch_samplesheet - mappings = ch_mappings - sample_mappings = ch_sample_mappings_yml - sra_metadata = ch_sra_metadata - versions = ch_versions.unique() + ftp_samples + .mix(sratools_samples) + .mix(aspera_samples) + + publish: + runinfo_ftp >> 'metadata' +} + +/* +======================================================================================== + FUNCTIONS +======================================================================================== +*/ + +def getDownloadMethod(meta: Map, userMethod: DownloadMethod) -> DownloadMethod { + // meta.fastq_aspera is a metadata string with ENA fasp links supported by Aspera + // For single-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/ERR116/006/ERR1160846/ERR1160846.fastq.gz' + // For paired-end: 'fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_1.fastq.gz;fasp.sra.ebi.ac.uk:/vol1/fastq/SRR130/020/SRR13055520/SRR13055520_2.fastq.gz' + if (meta.fastq_aspera && userMethod == DownloadMethod.ASPERA) + return DownloadMethod.ASPERA + if ((!meta.fastq_aspera && !meta.fastq_1) || userMethod == DownloadMethod.SRATOOLS) + return DownloadMethod.SRATOOLS + return DownloadMethod.FTP +} + +/* +======================================================================================== + TYPES +======================================================================================== +*/ + +record SraParams { + ena_metadata_fields : String + download_method : DownloadMethod + skip_fastq_download : boolean + dbgap_key : Path? +} + +enum DownloadMethod { + ASPERA, + FTP, + SRATOOLS +} + +record Sample { + id : String + fastq_1 : Path + fastq_2 : Path? + md5_1 : Path? + md5_2 : Path? } /* diff --git a/workflows/sra/nextflow.config b/workflows/sra/nextflow.config index d242c238..522b05b8 100644 --- a/workflows/sra/nextflow.config +++ b/workflows/sra/nextflow.config @@ -1,8 +1,3 @@ -includeConfig "../../modules/local/multiqc_mappings_config/nextflow.config" includeConfig "../../modules/local/aspera_cli/nextflow.config" includeConfig "../../modules/local/sra_fastq_ftp/nextflow.config" -includeConfig "../../modules/local/sra_ids_to_runinfo/nextflow.config" -includeConfig "../../modules/local/sra_runinfo_to_ftp/nextflow.config" -includeConfig "../../modules/local/sra_to_samplesheet/nextflow.config" -includeConfig "../../modules/nf-core/sratools/prefetch/nextflow.config" includeConfig "../../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/nextflow.config"