diff --git a/.github/workflows/awsfulltest.yml b/.github/workflows/awsfulltest.yml index a4c9a575..ff6537e3 100644 --- a/.github/workflows/awsfulltest.yml +++ b/.github/workflows/awsfulltest.yml @@ -17,19 +17,23 @@ jobs: aligner: ["alevin", "kallisto", "star", "cellranger", "universc"] steps: - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/scrnaseq/work-${{ github.sha }} parameters: | { - "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/scrnaseq/results-${{ github.sha }}/aligner_${{ matrix.aligner }}", - "aligner": "${{ matrix.aligner }}" + "hook_url": "${{ secrets.MEGATESTS_ALERTS_SLACK_HOOK_URL }}", + "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/scrnaseq/results-${{ github.sha }}" } - profiles: test_full,public_aws_ecr + profiles: test_full + - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/awstest.yml b/.github/workflows/awstest.yml index e51330ee..3807805b 100644 --- a/.github/workflows/awstest.yml +++ b/.github/workflows/awstest.yml @@ -15,19 +15,22 @@ jobs: steps: # Launch workflow using Tower CLI tool action - name: Launch workflow via tower - uses: seqeralabs/action-tower-launch@v1 + uses: seqeralabs/action-tower-launch@v2 with: workspace_id: ${{ secrets.TOWER_WORKSPACE_ID }} access_token: ${{ secrets.TOWER_ACCESS_TOKEN }} compute_env: ${{ secrets.TOWER_COMPUTE_ENV }} + revision: ${{ github.sha }} workdir: s3://${{ secrets.AWS_S3_BUCKET }}/work/scrnaseq/work-${{ github.sha }} parameters: | { "outdir": "s3://${{ secrets.AWS_S3_BUCKET }}/scrnaseq/results-test-${{ github.sha }}/aligner_${{ matrix.aligner }}", "aligner": "${{ matrix.aligner }}" } - profiles: test,public_aws_ecr + profiles: test - uses: actions/upload-artifact@v3 with: name: Tower debug log file - path: tower_action_*.log + path: | + tower_action_*.log + tower_action_*.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ce37d142..875d1b83 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -24,7 +24,7 @@ jobs: strategy: matrix: NXF_VER: - - "22.10.1" + - "23.04.0" - "latest-everything" profile: [ diff --git a/.gitpod.yml b/.gitpod.yml index 85d95ecc..25488dcc 100644 --- a/.gitpod.yml +++ b/.gitpod.yml @@ -1,4 +1,9 @@ image: nfcore/gitpod:latest +tasks: + - name: Update Nextflow and setup pre-commit + command: | + pre-commit install --install-hooks + nextflow self-update vscode: extensions: # based on nf-core.nf-core-extensionpack diff --git a/.nf-core.yml b/.nf-core.yml index a76e0ec8..738ad918 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,3 +1,5 @@ repository_type: pipeline lint: template_strings: False + files_unchanged: + - .github/ISSUE_TEMPLATE/bug_report.yml diff --git a/CHANGELOG.md b/CHANGELOG.md index 95cafcd5..a6383a5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unpublished Version / DEV] + +- [#237](https://github.com/nf-core/scrnaseq/pull/237) Add `singularity.registry = 'quay.io'` and bump NF version to 23.04.0 +- Fixed issue with file collisions while using cellranger ([#232](https://github.com/nf-core/scrnaseq/pull/232)) +- Fix issue where multiqc inputs tried to access objects that did not exist ([#239](https://github.com/nf-core/scrnaseq/pull/239)) +- Removed `public_aws_ecr` profile +- Include cellranger in MultiQC report ([#244](https://github.com/nf-core/scrnaseq/pull/244)) +- Nf-core template update to v2.9 ([#245](https://github.com/nf-core/scrnaseq/pull/245)) +- Update cellranger and fastqc module ([#246](https://github.com/nf-core/scrnaseq/pull/246)). + The [updated cellranger module](https://github.com/nf-core/modules/pull/3537) now automatically renames input FASTQ + files to match the expected naming conventions. + ## v2.3.2 - 2023-06-07 Patched Yellow Strontium Pinscher - Move containers for pipeline to quay.io ([#233](https://github.com/nf-core/scrnaseq/pull/233)) diff --git a/CITATIONS.md b/CITATIONS.md index e1ff5d2e..1cdb5897 100644 --- a/CITATIONS.md +++ b/CITATIONS.md @@ -12,7 +12,10 @@ - [FastQC](https://www.bioinformatics.babraham.ac.uk/projects/fastqc/) + > Andrews, S. (2010). FastQC: A Quality Control Tool for High Throughput Sequence Data [Online]. Available online https://www.bioinformatics.babraham.ac.uk/projects/fastqc/. + - [MultiQC](https://pubmed.ncbi.nlm.nih.gov/27312411/) + > Ewels P, Magnusson M, Lundin S, Käller M. MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics. 2016 Oct 1;32(19):3047-8. doi: 10.1093/bioinformatics/btw354. Epub 2016 Jun 16. PubMed PMID: 27312411; PubMed Central PMCID: PMC5039924. * [Alevin](https://doi.org/10.1186/s13059-019-1670-y) @@ -46,5 +49,8 @@ - [Docker](https://dl.acm.org/doi/10.5555/2600239.2600241) + > Merkel, D. (2014). Docker: lightweight linux containers for consistent development and deployment. Linux Journal, 2014(239), 2. doi: 10.5555/2600239.2600241. + - [Singularity](https://pubmed.ncbi.nlm.nih.gov/28494014/) + > Kurtzer GM, Sochat V, Bauer MW. Singularity: Scientific containers for mobility of compute. PLoS One. 2017 May 11;12(5):e0177459. doi: 10.1371/journal.pone.0177459. eCollection 2017. PubMed PMID: 28494014; PubMed Central PMCID: PMC5426675. diff --git a/README.md b/README.md index f9f3eb20..2a71739b 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![GitHub Actions Linting Status](https://github.com/nf-core/scrnaseq/workflows/nf-core%20linting/badge.svg)](https://github.com/nf-core/scrnaseq/actions?query=workflow%3A%22nf-core+linting%22) [![AWS CI](https://img.shields.io/badge/CI%20tests-full%20size-FF9900?logo=Amazon%20AWS)](https://nf-co.re/scrnaseq/results) [![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.3568187-1073c8)](https://doi.org/10.5281/zenodo.3568187) -[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A522.10.1-23aa62.svg)](https://www.nextflow.io/) +[![Nextflow](https://img.shields.io/badge/nextflow%20DSL2-%E2%89%A523.04.0-23aa62.svg)](https://www.nextflow.io/) [![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/) [![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/) [![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/) @@ -67,7 +67,7 @@ nextflow run nf-core/scrnaseq \ > provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; > see [docs](https://nf-co.re/usage/configuration#custom-configuration-files). -For more details, please refer to the [usage documentation](https://nf-co.re/scrnaseq/usage) and the [parameter documentation](https://nf-co.re/scrnaseq/parameters). +For more details and further functionality, please refer to the [usage documentation](https://nf-co.re/scrnaseq/usage) and the [parameter documentation](https://nf-co.re/scrnaseq/parameters). ## Decision Tree for users @@ -86,7 +86,7 @@ Options for the respective alignment method can be found [here](https://github.c ## Pipeline output -To see the the results of a test run with a full size dataset refer to the [results](https://nf-co.re/scrnaseq/results) tab on the nf-core website pipeline page. +To see the results of an example test run with a full size dataset refer to the [results](https://nf-co.re/scrnaseq/results) tab on the nf-core website pipeline page. For more details about the output files and reports, please refer to the [output documentation](https://nf-co.re/scrnaseq/output). diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml index 1be6ed21..107f3120 100644 --- a/assets/methods_description_template.yml +++ b/assets/methods_description_template.yml @@ -6,13 +6,17 @@ plot_type: "html" ## You inject any metadata in the Nextflow '${workflow}' object data: |

Methods

-

Data was processed using nf-core/scrnaseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).

+

Data was processed using nf-core/scrnaseq v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020), utilising reproducible software environments from the Bioconda (Grüning et al., 2018) and Biocontainers (da Veiga Leprevost et al., 2017) projects.

The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:

${workflow.commandLine}
+

${tool_citations}

References

Notes:
diff --git a/assets/slackreport.json b/assets/slackreport.json index 043d02f2..f6ba6baf 100644 --- a/assets/slackreport.json +++ b/assets/slackreport.json @@ -3,7 +3,7 @@ { "fallback": "Plain-text summary of the attachment.", "color": "<% if (success) { %>good<% } else { %>danger<%} %>", - "author_name": "sanger-tol/readmapping v${version} - ${runName}", + "author_name": "nf-core/scrnaseq v${version} - ${runName}", "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico", "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>", "fields": [ diff --git a/conf/multiqc_config.yaml b/conf/multiqc_config.yaml deleted file mode 100644 index 7bcca017..00000000 --- a/conf/multiqc_config.yaml +++ /dev/null @@ -1,7 +0,0 @@ -report_comment: > - This report has been generated by the nf-core/scrnaseq - analysis pipeline. For information about how to interpret these results, please see the - documentation. -report_section_order: - nf-core/scrnaseq-software-versions: - order: -1000 diff --git a/conf/public_aws_ecr.config b/conf/public_aws_ecr.config deleted file mode 100644 index 0f4a7163..00000000 --- a/conf/public_aws_ecr.config +++ /dev/null @@ -1,51 +0,0 @@ -/* -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - AWS ECR Config -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Config to set public AWS ECR images wherever possible - This improves speed when running on AWS infrastructure. - Use this as an example template when using your own private registry. ----------------------------------------------------------------------------------------- -*/ - -docker.registry = 'public.ecr.aws' -podman.registry = 'public.ecr.aws' - -process { - withName: 'CELLRANGER_COUNT' { - container = 'quay.io/nf-core/cellranger:7.1.0' - } - withName: 'CELLRANGER_MKGTF' { - container = 'quay.io/nf-core/cellranger:7.1.0' - } - withName: 'CELLRANGER_MKREF' { - container = 'quay.io/nf-core/cellranger:7.1.0' - } - withName: 'CONCAT_H5AD' { - container = 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' - } - withName: 'GENE_MAP' { - container = 'quay.io/biocontainers/python:3.8.3' - } - withName: 'GTF_GENE_FILTER' { - container = 'quay.io/biocontainers/python:3.9--1' - } - withName: 'GUNZIP' { - container = 'quay.io/nf-core/ubuntu:20.04' - } - withName: 'MTX_TO_H5AD' { - container = 'quay.io/biocontainers/scanpy:1.7.2--pyhdfd78af_0' - } - withName: 'MTX_TO_SEURAT' { - container = 'quay.io/nf-core/seurat:4.3.0' - } - withName: 'SAMPLESHEET_CHECK' { - container = 'quay.io/biocontainers/python:3.8.3' - } - withName: 'STAR_GENOMEGENERATE' { - container = 'quay.io/biocontainers/mulled-v2-1fa26d1ce03c295fe2fdcf85831a92fbcbd7e8c2:1df389393721fc66f3fd8778ad938ac711951107-0' - } - withName: 'UNIVERSC' { - container = 'quay.io/nf-core/universc:1.2.5.1' - } -} diff --git a/conf/test.config b/conf/test.config index 34111c6c..322dd7ae 100644 --- a/conf/test.config +++ b/conf/test.config @@ -31,3 +31,9 @@ params { // Ignore `--input` as otherwise the parameter validation will throw an error schema_ignore_params = 'genomes,input_paths,input' } + +process { + withName: '.*:CELLRANGER_COUNT' { + maxForks = 1 + } +} diff --git a/docs/usage.md b/docs/usage.md index bfa53ac2..089cd517 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -75,16 +75,21 @@ Other aligner options for running the pipeline are: ### If using cellranger or universc -In order to use cellranger aligner, reads must be named as [required by the tool](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/fastq-input): +This pipeline automatically renames input FASTQ files to follow the +[naming convention by 10x](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/fastq-input): -`[Sample Name]_S1_L00[Lane Number]_[Read Type]_001.fastq.gz` +``` +[Sample Name]_S1_L00[Lane Number]_[Read Type]_001.fastq.gz +``` -Besides that, the sample name given in the samplesheet must be the same that is present in the reads name. E.g. +For more details, see -```console -sample,fastq_1,fastq_2, -TEST1,TEST1_S1_L001_R1_001.fastq.gz,TEST1_S1_L001_R2_001.fastq.gz -``` +- [this issue](https://github.com/nf-core/scrnaseq/issues/241), discussing various mechanisms to deal with non-conformant filenames +- [the README of the cellranger/count module](https://github.com/nf-core/modules/blob/master/modules/nf-core/cellranger/count/README.md) which demonstrates that renaming files does not affect the results. +- [the code for renaming files in the cellranger/count module](https://github.com/nf-core/modules/blob/master/modules/nf-core/cellranger/count/templates/cellranger_count.py) +- [the code for renaming files in UniverSC](https://github.com/minoda-lab/universc/blob/99a20652430c1dc9f962536a2793536f643810b7/launch_universc.sh#L1411-L1609) + +As a sanity check, we verify that filenames of a pair of FASTQ files only differ by `R1`/`R2`. #### UniverSC technology configuration @@ -92,14 +97,12 @@ UniverSC automatically updates the barcode whitelist and chemistry parameters. U Currently only 3\' scRNA-Seq parameters are supported in nextflow, although chemistry parameters for 5\' scRNA-Seq and full-length scRNA-Seq libraries are supported by teh container. -Filenames are recommended to be the same format as for Cell Ranger but automated correction is attempted before calling Cell Ranger. - ## Running the pipeline The minimum typical command for running the pipeline is as follows: ```bash -nextflow run nf-core/scrnaseq --input 'samplesheet.csv' --genome GRCh38 -profile docker +nextflow run nf-core/scrnaseq --input ./samplesheet.csv --outdir ./results --genome GRCh38 -profile docker ``` This will launch the pipeline with the `docker` configuration profile and default `--type` and `--barcode_whitelist`. See below for more information about profiles and these options. @@ -118,7 +121,8 @@ If you wish to repeatedly use the same parameters for multiple runs, rather than Pipeline settings can be provided in a `yaml` or `json` file via `-params-file `. > ⚠️ Do not use `-c ` to specify parameters as this will result in errors. Custom config files specified with `-c` must only be used for [tuning process resource specifications](https://nf-co.re/docs/usage/configuration#tuning-workflow-resources), other infrastructural tweaks (such as output directories), or module arguments (args). -> The above pipeline run specified with a params file in yaml format: + +The above pipeline run specified with a params file in yaml format: ```bash nextflow run nf-core/scrnaseq -profile docker -params-file params.yaml @@ -130,7 +134,6 @@ with `params.yaml` containing: input: './samplesheet.csv' outdir: './results/' genome: 'GRCh37' -input: 'data' <...> ``` diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy deleted file mode 100755 index 9b34804d..00000000 --- a/lib/NfcoreSchema.groovy +++ /dev/null @@ -1,530 +0,0 @@ -// -// This file holds several functions used to perform JSON parameter validation, help and summary rendering for the nf-core pipeline template. -// - -import nextflow.Nextflow -import org.everit.json.schema.Schema -import org.everit.json.schema.loader.SchemaLoader -import org.everit.json.schema.ValidationException -import org.json.JSONObject -import org.json.JSONTokener -import org.json.JSONArray -import groovy.json.JsonSlurper -import groovy.json.JsonBuilder - -class NfcoreSchema { - - // - // Resolve Schema path relative to main workflow directory - // - public static String getSchemaPath(workflow, schema_filename='nextflow_schema.json') { - return "${workflow.projectDir}/${schema_filename}" - } - - // - // Function to loop over all parameters defined in schema and check - // whether the given parameters adhere to the specifications - // - /* groovylint-disable-next-line UnusedPrivateMethodParameter */ - public static void validateParameters(workflow, params, log, schema_filename='nextflow_schema.json') { - def has_error = false - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Check for nextflow core params and unexpected params - def json = new File(getSchemaPath(workflow, schema_filename=schema_filename)).text - def Map schemaParams = (Map) new JsonSlurper().parseText(json).get('definitions') - def nf_params = [ - // Options for base `nextflow` command - 'bg', - 'c', - 'C', - 'config', - 'd', - 'D', - 'dockerize', - 'h', - 'log', - 'q', - 'quiet', - 'syslog', - 'v', - - // Options for `nextflow run` command - 'ansi', - 'ansi-log', - 'bg', - 'bucket-dir', - 'c', - 'cache', - 'config', - 'dsl2', - 'dump-channels', - 'dump-hashes', - 'E', - 'entry', - 'latest', - 'lib', - 'main-script', - 'N', - 'name', - 'offline', - 'params-file', - 'pi', - 'plugins', - 'poll-interval', - 'pool-size', - 'profile', - 'ps', - 'qs', - 'queue-size', - 'r', - 'resume', - 'revision', - 'stdin', - 'stub', - 'stub-run', - 'test', - 'w', - 'with-apptainer', - 'with-charliecloud', - 'with-conda', - 'with-dag', - 'with-docker', - 'with-mpi', - 'with-notification', - 'with-podman', - 'with-report', - 'with-singularity', - 'with-timeline', - 'with-tower', - 'with-trace', - 'with-weblog', - 'without-docker', - 'without-podman', - 'work-dir' - ] - def unexpectedParams = [] - - // Collect expected parameters from the schema - def expectedParams = [] - def enums = [:] - for (group in schemaParams) { - for (p in group.value['properties']) { - expectedParams.push(p.key) - if (group.value['properties'][p.key].containsKey('enum')) { - enums[p.key] = group.value['properties'][p.key]['enum'] - } - } - } - - for (specifiedParam in params.keySet()) { - // nextflow params - if (nf_params.contains(specifiedParam)) { - log.error "ERROR: You used a core Nextflow option with two hyphens: '--${specifiedParam}'. Please resubmit with '-${specifiedParam}'" - has_error = true - } - // unexpected params - def params_ignore = params.schema_ignore_params.split(',') + 'schema_ignore_params' - def expectedParamsLowerCase = expectedParams.collect{ it.replace("-", "").toLowerCase() } - def specifiedParamLowerCase = specifiedParam.replace("-", "").toLowerCase() - def isCamelCaseBug = (specifiedParam.contains("-") && !expectedParams.contains(specifiedParam) && expectedParamsLowerCase.contains(specifiedParamLowerCase)) - if (!expectedParams.contains(specifiedParam) && !params_ignore.contains(specifiedParam) && !isCamelCaseBug) { - // Temporarily remove camelCase/camel-case params #1035 - def unexpectedParamsLowerCase = unexpectedParams.collect{ it.replace("-", "").toLowerCase()} - if (!unexpectedParamsLowerCase.contains(specifiedParamLowerCase)){ - unexpectedParams.push(specifiedParam) - } - } - } - - //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~// - // Validate parameters against the schema - InputStream input_stream = new File(getSchemaPath(workflow, schema_filename=schema_filename)).newInputStream() - JSONObject raw_schema = new JSONObject(new JSONTokener(input_stream)) - - // Remove anything that's in params.schema_ignore_params - raw_schema = removeIgnoredParams(raw_schema, params) - - Schema schema = SchemaLoader.load(raw_schema) - - // Clean the parameters - def cleanedParams = cleanParameters(params) - - // Convert to JSONObject - def jsonParams = new JsonBuilder(cleanedParams) - JSONObject params_json = new JSONObject(jsonParams.toString()) - - // Validate - try { - schema.validate(params_json) - } catch (ValidationException e) { - println '' - log.error 'ERROR: Validation of pipeline parameters failed!' - JSONObject exceptionJSON = e.toJSON() - printExceptions(exceptionJSON, params_json, log, enums) - println '' - has_error = true - } - - // Check for unexpected parameters - if (unexpectedParams.size() > 0) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - println '' - def warn_msg = 'Found unexpected parameters:' - for (unexpectedParam in unexpectedParams) { - warn_msg = warn_msg + "\n* --${unexpectedParam}: ${params[unexpectedParam].toString()}" - } - log.warn warn_msg - log.info "- ${colors.dim}Ignore this warning: params.schema_ignore_params = \"${unexpectedParams.join(',')}\" ${colors.reset}" - println '' - } - - if (has_error) { - Nextflow.error('Exiting!') - } - } - - // - // Beautify parameters for --help - // - public static String paramsHelp(workflow, params, command, schema_filename='nextflow_schema.json') { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - Integer num_hidden = 0 - String output = '' - output += 'Typical pipeline command:\n\n' - output += " ${colors.cyan}${command}${colors.reset}\n\n" - Map params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - Integer max_chars = paramsMaxChars(params_map) + 1 - Integer desc_indent = max_chars + 14 - Integer dec_linewidth = 160 - desc_indent - for (group in params_map.keySet()) { - Integer num_params = 0 - String group_output = colors.underlined + colors.bold + group + colors.reset + '\n' - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (group_params.get(param).hidden && !params.show_hidden_params) { - num_hidden += 1 - continue; - } - def type = '[' + group_params.get(param).type + ']' - def description = group_params.get(param).description - def defaultValue = group_params.get(param).default != null ? " [default: " + group_params.get(param).default.toString() + "]" : '' - def description_default = description + colors.dim + defaultValue + colors.reset - // Wrap long description texts - // Loosely based on https://dzone.com/articles/groovy-plain-text-word-wrap - if (description_default.length() > dec_linewidth){ - List olines = [] - String oline = "" // " " * indent - description_default.split(" ").each() { wrd -> - if ((oline.size() + wrd.size()) <= dec_linewidth) { - oline += wrd + " " - } else { - olines += oline - oline = wrd + " " - } - } - olines += oline - description_default = olines.join("\n" + " " * desc_indent) - } - group_output += " --" + param.padRight(max_chars) + colors.dim + type.padRight(10) + colors.reset + description_default + '\n' - num_params += 1 - } - group_output += '\n' - if (num_params > 0){ - output += group_output - } - } - if (num_hidden > 0){ - output += colors.dim + "!! Hiding $num_hidden params, use --show_hidden_params to show them !!\n" + colors.reset - } - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Groovy Map summarising parameters/workflow options used by the pipeline - // - public static LinkedHashMap paramsSummaryMap(workflow, params, schema_filename='nextflow_schema.json') { - // Get a selection of core Nextflow workflow options - def Map workflow_summary = [:] - if (workflow.revision) { - workflow_summary['revision'] = workflow.revision - } - workflow_summary['runName'] = workflow.runName - if (workflow.containerEngine) { - workflow_summary['containerEngine'] = workflow.containerEngine - } - if (workflow.container) { - workflow_summary['container'] = workflow.container - } - workflow_summary['launchDir'] = workflow.launchDir - workflow_summary['workDir'] = workflow.workDir - workflow_summary['projectDir'] = workflow.projectDir - workflow_summary['userName'] = workflow.userName - workflow_summary['profile'] = workflow.profile - workflow_summary['configFiles'] = workflow.configFiles.join(', ') - - // Get pipeline parameters defined in JSON Schema - def Map params_summary = [:] - def params_map = paramsLoad(getSchemaPath(workflow, schema_filename=schema_filename)) - for (group in params_map.keySet()) { - def sub_params = new LinkedHashMap() - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (params.containsKey(param)) { - def params_value = params.get(param) - def schema_value = group_params.get(param).default - def param_type = group_params.get(param).type - if (schema_value != null) { - if (param_type == 'string') { - if (schema_value.contains('$projectDir') || schema_value.contains('${projectDir}')) { - def sub_string = schema_value.replace('\$projectDir', '') - sub_string = sub_string.replace('\${projectDir}', '') - if (params_value.contains(sub_string)) { - schema_value = params_value - } - } - if (schema_value.contains('$params.outdir') || schema_value.contains('${params.outdir}')) { - def sub_string = schema_value.replace('\$params.outdir', '') - sub_string = sub_string.replace('\${params.outdir}', '') - if ("${params.outdir}${sub_string}" == params_value) { - schema_value = params_value - } - } - } - } - - // We have a default in the schema, and this isn't it - if (schema_value != null && params_value != schema_value) { - sub_params.put(param, params_value) - } - // No default in the schema, and this isn't empty - else if (schema_value == null && params_value != "" && params_value != null && params_value != false) { - sub_params.put(param, params_value) - } - } - } - params_summary.put(group, sub_params) - } - return [ 'Core Nextflow options' : workflow_summary ] << params_summary - } - - // - // Beautify parameters for summary and return as string - // - public static String paramsSummaryLog(workflow, params) { - Map colors = NfcoreTemplate.logColours(params.monochrome_logs) - String output = '' - def params_map = paramsSummaryMap(workflow, params) - def max_chars = paramsMaxChars(params_map) - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - if (group_params) { - output += colors.bold + group + colors.reset + '\n' - for (param in group_params.keySet()) { - output += " " + colors.blue + param.padRight(max_chars) + ": " + colors.green + group_params.get(param) + colors.reset + '\n' - } - output += '\n' - } - } - output += "!! Only displaying parameters that differ from the pipeline defaults !!\n" - output += NfcoreTemplate.dashedLine(params.monochrome_logs) - return output - } - - // - // Loop over nested exceptions and print the causingException - // - private static void printExceptions(ex_json, params_json, log, enums, limit=5) { - def causingExceptions = ex_json['causingExceptions'] - if (causingExceptions.length() == 0) { - def m = ex_json['message'] =~ /required key \[([^\]]+)\] not found/ - // Missing required param - if (m.matches()) { - log.error "* Missing required parameter: --${m[0][1]}" - } - // Other base-level error - else if (ex_json['pointerToViolation'] == '#') { - log.error "* ${ex_json['message']}" - } - // Error with specific param - else { - def param = ex_json['pointerToViolation'] - ~/^#\// - def param_val = params_json[param].toString() - if (enums.containsKey(param)) { - def error_msg = "* --${param}: '${param_val}' is not a valid choice (Available choices" - if (enums[param].size() > limit) { - log.error "${error_msg} (${limit} of ${enums[param].size()}): ${enums[param][0..limit-1].join(', ')}, ... )" - } else { - log.error "${error_msg}: ${enums[param].join(', ')})" - } - } else { - log.error "* --${param}: ${ex_json['message']} (${param_val})" - } - } - } - for (ex in causingExceptions) { - printExceptions(ex, params_json, log, enums) - } - } - - // - // Remove an element from a JSONArray - // - private static JSONArray removeElement(json_array, element) { - def list = [] - int len = json_array.length() - for (int i=0;i - if(raw_schema.keySet().contains('definitions')){ - raw_schema.definitions.each { definition -> - for (key in definition.keySet()){ - if (definition[key].get("properties").keySet().contains(ignore_param)){ - // Remove the param to ignore - definition[key].get("properties").remove(ignore_param) - // If the param was required, change this - if (definition[key].has("required")) { - def cleaned_required = removeElement(definition[key].required, ignore_param) - definition[key].put("required", cleaned_required) - } - } - } - } - } - if(raw_schema.keySet().contains('properties') && raw_schema.get('properties').keySet().contains(ignore_param)) { - raw_schema.get("properties").remove(ignore_param) - } - if(raw_schema.keySet().contains('required') && raw_schema.required.contains(ignore_param)) { - def cleaned_required = removeElement(raw_schema.required, ignore_param) - raw_schema.put("required", cleaned_required) - } - } - return raw_schema - } - - // - // Clean and check parameters relative to Nextflow native classes - // - private static Map cleanParameters(params) { - def new_params = params.getClass().newInstance(params) - for (p in params) { - // remove anything evaluating to false - if (!p['value']) { - new_params.remove(p.key) - } - // Cast MemoryUnit to String - if (p['value'].getClass() == nextflow.util.MemoryUnit) { - new_params.replace(p.key, p['value'].toString()) - } - // Cast Duration to String - if (p['value'].getClass() == nextflow.util.Duration) { - new_params.replace(p.key, p['value'].toString().replaceFirst(/d(?!\S)/, "day")) - } - // Cast LinkedHashMap to String - if (p['value'].getClass() == LinkedHashMap) { - new_params.replace(p.key, p['value'].toString()) - } - } - return new_params - } - - // - // This function tries to read a JSON params file - // - private static LinkedHashMap paramsLoad(String json_schema) { - def params_map = new LinkedHashMap() - try { - params_map = paramsRead(json_schema) - } catch (Exception e) { - println "Could not read parameters settings from JSON. $e" - params_map = new LinkedHashMap() - } - return params_map - } - - // - // Method to actually read in JSON file using Groovy. - // Group (as Key), values are all parameters - // - Parameter1 as Key, Description as Value - // - Parameter2 as Key, Description as Value - // .... - // Group - // - - private static LinkedHashMap paramsRead(String json_schema) throws Exception { - def json = new File(json_schema).text - def Map schema_definitions = (Map) new JsonSlurper().parseText(json).get('definitions') - def Map schema_properties = (Map) new JsonSlurper().parseText(json).get('properties') - /* Tree looks like this in nf-core schema - * definitions <- this is what the first get('definitions') gets us - group 1 - title - description - properties - parameter 1 - type - description - parameter 2 - type - description - group 2 - title - description - properties - parameter 1 - type - description - * properties <- parameters can also be ungrouped, outside of definitions - parameter 1 - type - description - */ - - // Grouped params - def params_map = new LinkedHashMap() - schema_definitions.each { key, val -> - def Map group = schema_definitions."$key".properties // Gets the property object of the group - def title = schema_definitions."$key".title - def sub_params = new LinkedHashMap() - group.each { innerkey, value -> - sub_params.put(innerkey, value) - } - params_map.put(title, sub_params) - } - - // Ungrouped params - def ungrouped_params = new LinkedHashMap() - schema_properties.each { innerkey, value -> - ungrouped_params.put(innerkey, value) - } - params_map.put("Other parameters", ungrouped_params) - - return params_map - } - - // - // Get maximum number of characters across all parameter names - // - private static Integer paramsMaxChars(params_map) { - Integer max_chars = 0 - for (group in params_map.keySet()) { - def group_params = params_map.get(group) // This gets the parameters of that particular group - for (param in group_params.keySet()) { - if (param.size() > max_chars) { - max_chars = param.size() - } - } - } - return max_chars - } -} diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy index 25a0a74a..408951ae 100755 --- a/lib/NfcoreTemplate.groovy +++ b/lib/NfcoreTemplate.groovy @@ -128,7 +128,7 @@ class NfcoreTemplate { def email_html = html_template.toString() // Render the sendmail template - def max_multiqc_email_size = params.max_multiqc_email_size as nextflow.util.MemoryUnit + def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "$projectDir", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ] def sf = new File("$projectDir/assets/sendmail_template.txt") def sendmail_template = engine.createTemplate(sf).make(smail_fields) diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy index e6f4f1a2..6ec444fb 100755 --- a/lib/WorkflowMain.groovy +++ b/lib/WorkflowMain.groovy @@ -19,40 +19,11 @@ class WorkflowMain { " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md" } - // - // Generate help string - // - public static String help(workflow, params) { - def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --fasta human.fasta --gtf human.gtf -profile docker" - def help_string = '' - help_string += NfcoreTemplate.logo(workflow, params.monochrome_logs) - help_string += NfcoreSchema.paramsHelp(workflow, params, command) - help_string += '\n' + citation(workflow) + '\n' - help_string += NfcoreTemplate.dashedLine(params.monochrome_logs) - return help_string - } - - // - // Generate parameter summary log string - // - public static String paramsSummaryLog(workflow, params) { - def summary_log = '' - summary_log += NfcoreTemplate.logo(workflow, params.monochrome_logs) - summary_log += NfcoreSchema.paramsSummaryLog(workflow, params) - summary_log += '\n' + citation(workflow) + '\n' - summary_log += NfcoreTemplate.dashedLine(params.monochrome_logs) - return summary_log - } // // Validate parameters and print summary to screen // public static void initialise(workflow, params, log) { - // Print help to screen if required - if (params.help) { - log.info help(workflow, params) - System.exit(0) - } // Print workflow version and exit on --version if (params.version) { @@ -61,14 +32,6 @@ class WorkflowMain { System.exit(0) } - // Print parameter summary log to screen - log.info paramsSummaryLog(workflow, params) - - // Validate workflow parameters via the JSON schema - if (params.validate_params) { - NfcoreSchema.validateParameters(workflow, params, log) - } - // Check that a -profile or Nextflow config has been provided to run the pipeline NfcoreTemplate.checkConfigProvided(workflow, log) diff --git a/lib/WorkflowScrnaseq.groovy b/lib/WorkflowScrnaseq.groovy index 15584b23..5767add6 100755 --- a/lib/WorkflowScrnaseq.groovy +++ b/lib/WorkflowScrnaseq.groovy @@ -49,15 +49,57 @@ class WorkflowScrnaseq { return yaml_file_text } - public static String methodsDescriptionText(run_workflow, mqc_methods_yaml) { + // + // Generate methods description for MultiQC + // + + public static String toolCitationText(params) { + + // TODO Optionally add in-text citation tools to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "Tool (Foo et al. 2023)" : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def citation_text = [ + "Tools used in the workflow included:", + "FastQC (Andrews 2010),", + "MultiQC (Ewels et al. 2016)", + "." + ].join(' ').trim() + + return citation_text + } + + public static String toolBibliographyText(params) { + + // TODO Optionally add bibliographic entries to this list. + // Can use ternary operators to dynamically construct based conditions, e.g. params["run_xyz"] ? "
  • Author (2023) Pub name, Journal, DOI
  • " : "", + // Uncomment function in methodsDescriptionText to render in MultiQC report + def reference_text = [ + "
  • Andrews S, (2010) FastQC, URL: https://www.bioinformatics.babraham.ac.uk/projects/fastqc/).
  • ", + "
  • Ewels, P., Magnusson, M., Lundin, S., & Käller, M. (2016). MultiQC: summarize analysis results for multiple tools and samples in a single report. Bioinformatics , 32(19), 3047–3048. doi: /10.1093/bioinformatics/btw354
  • " + ].join(' ').trim() + + return reference_text + } + + public static String methodsDescriptionText(run_workflow, mqc_methods_yaml, params) { // Convert to a named map so can be used as with familar NXF ${workflow} variable syntax in the MultiQC YML file def meta = [:] meta.workflow = run_workflow.toMap() meta["manifest_map"] = run_workflow.manifest.toMap() + // Pipeline DOI meta["doi_text"] = meta.manifest_map.doi ? "(doi: ${meta.manifest_map.doi})" : "" meta["nodoi_text"] = meta.manifest_map.doi ? "": "
  • If available, make sure to update the text to include the Zenodo DOI of version of the pipeline used.
  • " + // Tool references + meta["tool_citations"] = "" + meta["tool_bibliography"] = "" + + // TODO Only uncomment below if logic in toolCitationText/toolBibliographyText has been filled! + //meta["tool_citations"] = toolCitationText(params).replaceAll(", \\.", ".").replaceAll("\\. \\.", ".").replaceAll(", \\.", ".") + //meta["tool_bibliography"] = toolBibliographyText(params) + + def methods_text = mqc_methods_yaml.text def engine = new SimpleTemplateEngine() diff --git a/main.nf b/main.nf index fe560c6d..670b64ce 100644 --- a/main.nf +++ b/main.nf @@ -20,6 +20,22 @@ nextflow.enable.dsl = 2 params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta') params.gtf = WorkflowMain.getGenomeAttribute(params, 'gtf') +include { validateParameters; paramsHelp } from 'plugin/nf-validation' + +// Print help message if needed +if (params.help) { + def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs) + def citation = '\n' + WorkflowMain.citation(workflow) + '\n' + def String command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --genome GRCh37 -profile docker" + log.info logo + paramsHelp(command) + citation + NfcoreTemplate.dashedLine(params.monochrome_logs) + System.exit(0) +} + +// Validate input parameters +if (params.validate_params) { + validateParameters() +} + WorkflowMain.initialise(workflow, params, log) /* diff --git a/modules.json b/modules.json index 76e69565..95175143 100644 --- a/modules.json +++ b/modules.json @@ -7,17 +7,17 @@ "nf-core": { "cellranger/count": { "branch": "master", - "git_sha": "4b7d4863a5883b76e6bff13b6e52468fab090c5b", + "git_sha": "716ef3019b66772a817b417078edce2f7b337858", "installed_by": ["modules"] }, "cellranger/mkgtf": { "branch": "master", - "git_sha": "4b7d4863a5883b76e6bff13b6e52468fab090c5b", + "git_sha": "716ef3019b66772a817b417078edce2f7b337858", "installed_by": ["modules"] }, "cellranger/mkref": { "branch": "master", - "git_sha": "4b7d4863a5883b76e6bff13b6e52468fab090c5b", + "git_sha": "716ef3019b66772a817b417078edce2f7b337858", "installed_by": ["modules"] }, "custom/dumpsoftwareversions": { @@ -27,7 +27,7 @@ }, "fastqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "bd8092b67b5103bdd52e300f75889442275c3117", "installed_by": ["modules"] }, "gffread": { @@ -37,7 +37,7 @@ }, "gunzip": { "branch": "master", - "git_sha": "5c460c5a4736974abde2843294f35307ee2b0e5e", + "git_sha": "e06548bfa36ee31869b81041879dd6b3a83b1d57", "installed_by": ["modules"] }, "kallistobustools/count": { @@ -52,7 +52,7 @@ }, "multiqc": { "branch": "master", - "git_sha": "911696ea0b62df80e900ef244d7867d177971f73", + "git_sha": "a6e11ac655e744f7ebc724be669dd568ffdc0e80", "installed_by": ["modules"] }, "star/genomegenerate": { diff --git a/modules/local/mtx_to_seurat.nf b/modules/local/mtx_to_seurat.nf index cc74e906..4351f4b3 100644 --- a/modules/local/mtx_to_seurat.nf +++ b/modules/local/mtx_to_seurat.nf @@ -20,9 +20,9 @@ process MTX_TO_SEURAT { script: def aligner = params.aligner if (params.aligner == "cellranger") { - matrix = "filtered_feature_bc_matrix/matrix.mtx.gz" - barcodes = "filtered_feature_bc_matrix/barcodes.tsv.gz" - features = "filtered_feature_bc_matrix/features.tsv.gz" + matrix = "matrix.mtx.gz" + barcodes = "barcodes.tsv.gz" + features = "features.tsv.gz" } else if (params.aligner == "kallisto") { matrix = "*count/counts_unfiltered/*.mtx" barcodes = "*count/counts_unfiltered/*.barcodes.txt" diff --git a/modules/nf-core/cellranger/count/README.md b/modules/nf-core/cellranger/count/README.md new file mode 100644 index 00000000..feafbfa9 --- /dev/null +++ b/modules/nf-core/cellranger/count/README.md @@ -0,0 +1,99 @@ +# Automatically renaming files for Cellranger + +See also https://github.com/nf-core/scrnaseq/issues/241 and the [Cellranger documentation](https://support.10xgenomics.com/single-cell-gene-expression/software/pipelines/latest/using/fastq-input). + +## Motivation + +Cellranger (and also Spaceranger, and probably other 10x pipelines) rely on the following input + +- `--fastqs`, to point to a directory with fastq files that are named according to `{sample_name}_S{i}_L00{j}_{R1,R2}_001.fastq.gz`. +- `--sample`, with a sample name (that is the prefix of all associated fastq files) + +In the easiest case, `--fastqs` points to a directory that contains all fastqs of a single sample that already follow the naming conventions. However, it's not always easy: + +1. the directory may contain fastqs for multiple samples + - this is not a problem for cellranger, it will automatically choose the correct fastq files via the `--sample` flag as long as they follow the naming convention, **but** + - if we stage the whole folder (or all files in that folder) using nextflow, it breaks caching in that if an additional sample (or any other file) gets added to the folder, the cache gets invalidated for _all_ samples. +2. the sample has been sequenced across multiple flow cells + - In this case, we need to specify multiple input folders. Cellranger allows passing a _list of directories_ e.g. `--fastqs=/path/dir1,path/dir2` + - Staging all _files_ in these folders into a single directory using nextflow doesn't do the job, as there may be duplicate file names across the different folders. +3. the raw sequencing data may have been downloaded from a sequence database and doesn't follow the naming convention anymore. + - In that case we need to rename the files to follow the bcl2fastq naming convention. + +## Solution + +Rename files automatically to match the `{sample_name}_S{i}_L00{j}_{R1,R2}_001.fastq.gz` pattern. We assume that +files in the input channel are ordered according to `file1 R1, file1 R2, file2 R1, file2 R2, ...`. +If the files are explicitly listed in a samplesheet, we have this order anyway. If the files follow any sensible naming +convention, this order can be achieved by sorting by filename. + +## Does renaming the files change the result? + +No. Here's how the same dataset was tested in four different scenarios. The result is the same down +to identical md5sums of the output files. + +### Prepare datasets + +1. Download dataset: + +``` +curl https://s3-us-west-2.amazonaws.com/10x.files/samples/cell-exp/6.1.2/10k_PBMC_3p_nextgem_Chromium_X_intron/10k_PBMC_3p_nextgem_Chromium_X_intron_fastqs.tar | tar -xv +INPUT_DIR=10k_PBMC_3p_nextgem_Chromium_X_fastqs +# remove index files +rm $INPUT_DIR/*_I{1,2}_*.fastq.gz +``` + +2. Simulate scenario where files were generated using multiple flow cells: + +``` +MFC=fastq_multiflowcell +mkdir -p $MFC/fc{1,2,3,4} +for f in $INPUT_DIR/*.fastq.gz; do ; ln $f $MFC; done +for i in $(seq 4) ; do ; mv $MFC/*L00$i* $MFC/fc$i ; done +for i in $(seq 4) ; do ; rename L00$i L001 $MFC/fc$i/*; done +``` + +3. Simulate scenario where sample was multiplexed: + +``` +MPX=fastq_multiplexed +mkdir $MPX +for f in $INPUT_DIR/*.fastq.gz; do ; ln $f $MPX; done +for i in $(seq 4) ; do ; rename S1_L00$i S${i}_L001 $MPX/* ; done +``` + +4. Concatenate files + +``` +mkdir fastq_cat +cat $INPUT_DIR/*_R1_*.fastq.gz > fastq_cat/10k_PBMC_3p_nextgem_Chromium_X_S1_L001_R1_001.fastq.gz +cat $INPUT_DIR/*_R2_*.fastq.gz > fastq_cat/10k_PBMC_3p_nextgem_Chromium_X_S1_L001_R2_001.fastq.gz +``` + +### Run cellranger + +``` +singularity pull docker://docker.io/nfcore/cellranger:7.1.0 +cd $INPUT_DIR && singularity exec -B $(pwd):$(pwd) -B /cfs:/cfs docker://docker.io/nfcore/cellranger:7.1.0 cellranger count --localcores=8 --id=10k_PBMC_3p_nextgem_Chromium_X --fastqs=. --transcriptome=/cfs/sturmgre/tmp/cellranger-ref/refdata-gex-GRCh38-2020-A --localmem=64 +cd $MFC && singularity exec -B $(pwd):$(pwd) -B /cfs:/cfs docker://docker.io/nfcore/cellranger:7.1.0 cellranger count --localcores=8 --id=10k_PBMC_3p_nextgem_Chromium_X --fastqs=fc1,fc2,fc3,fc4 --transcriptome=/cfs/sturmgre/tmp/cellranger-ref/refdata-gex-GRCh38-2020-A --localmem=64 +cd $MPX && singularity exec -B $(pwd):$(pwd) -B /cfs:/cfs docker://docker.io/nfcore/cellranger:7.1.0 cellranger count --localcores=8 --id=10k_PBMC_3p_nextgem_Chromium_X --fastqs=. --transcriptome=/cfs/sturmgre/tmp/cellranger-ref/refdata-gex-GRCh38-2020-A --localmem=64 +cd fastq_cat && singularity exec -B $(pwd):$(pwd) -B /cfs:/cfs docker://docker.io/nfcore/cellranger:7.1.0 cellranger count --localcores=8 --id=10k_PBMC_3p_nextgem_Chromium_X --fastqs=. --transcriptome=/cfs/sturmgre/tmp/cellranger-ref/refdata-gex-GRCh38-2020-A --localmem=64 +``` + +### Check results + +``` +> md5sum **/outs/*.h5 | sort +2a7a5a022f01cb8d95965980f0d95ca5 10k_PBMC_3p_nextgem_Chromium_X_fastqs/10k_PBMC_3p_nextgem_Chromium_X/outs/raw_feature_bc_matrix.h5 +2a7a5a022f01cb8d95965980f0d95ca5 fastq_cat/10k_PBMC_3p_nextgem_Chromium_X/outs/raw_feature_bc_matrix.h5 +2a7a5a022f01cb8d95965980f0d95ca5 fastq_multiflowcell/10k_PBMC_3p_nextgem_Chromium_X/outs/raw_feature_bc_matrix.h5 +2a7a5a022f01cb8d95965980f0d95ca5 fastq_multiplexed/10k_PBMC_3p_nextgem_Chromium_X/outs/raw_feature_bc_matrix.h5 +533f4156f2d90152594ebc587b7fde0a 10k_PBMC_3p_nextgem_Chromium_X_fastqs/10k_PBMC_3p_nextgem_Chromium_X/outs/filtered_feature_bc_matrix.h5 +533f4156f2d90152594ebc587b7fde0a fastq_cat/10k_PBMC_3p_nextgem_Chromium_X/outs/filtered_feature_bc_matrix.h5 +533f4156f2d90152594ebc587b7fde0a fastq_multiflowcell/10k_PBMC_3p_nextgem_Chromium_X/outs/filtered_feature_bc_matrix.h5 +533f4156f2d90152594ebc587b7fde0a fastq_multiplexed/10k_PBMC_3p_nextgem_Chromium_X/outs/filtered_feature_bc_matrix.h5 +cd33d3aa95c0d5cda7cf50908c5399c1 10k_PBMC_3p_nextgem_Chromium_X_fastqs/10k_PBMC_3p_nextgem_Chromium_X/outs/molecule_info.h5 +cd33d3aa95c0d5cda7cf50908c5399c1 fastq_cat/10k_PBMC_3p_nextgem_Chromium_X/outs/molecule_info.h5 +cd33d3aa95c0d5cda7cf50908c5399c1 fastq_multiflowcell/10k_PBMC_3p_nextgem_Chromium_X/outs/molecule_info.h5 +cd33d3aa95c0d5cda7cf50908c5399c1 fastq_multiplexed/10k_PBMC_3p_nextgem_Chromium_X/outs/molecule_info.h5 +``` diff --git a/modules/nf-core/cellranger/count/main.nf b/modules/nf-core/cellranger/count/main.nf index 6aacdbc3..d7a191fc 100644 --- a/modules/nf-core/cellranger/count/main.nf +++ b/modules/nf-core/cellranger/count/main.nf @@ -4,13 +4,8 @@ process CELLRANGER_COUNT { container "nf-core/cellranger:7.1.0" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "CELLRANGER_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." - } - input: - tuple val(meta), path(reads) + tuple val(meta), path(reads, stageAs: "fastq_???/*") path reference output: @@ -21,26 +16,19 @@ process CELLRANGER_COUNT { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}" - def reference_name = reference.name - """ - cellranger \\ - count \\ - --id='$prefix' \\ - --fastqs=. \\ - --transcriptome=$reference_name \\ - --localcores=$task.cpus \\ - --localmem=${task.memory.toGiga()} \\ - $args - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - cellranger: \$(echo \$( cellranger --version 2>&1) | sed 's/^.*[^0-9]\\([0-9]*\\.[0-9]*\\.[0-9]*\\).*\$/\\1/' ) - END_VERSIONS - """ + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CELLRANGER_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + template "cellranger_count.py" stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CELLRANGER_COUNT module does not support Conda. Please use Docker / Singularity / Podman instead." + } def prefix = task.ext.prefix ?: "${meta.id}" """ mkdir -p "${prefix}/outs/" diff --git a/modules/nf-core/cellranger/count/meta.yml b/modules/nf-core/cellranger/count/meta.yml index 51d82dd5..c7d82bbc 100644 --- a/modules/nf-core/cellranger/count/meta.yml +++ b/modules/nf-core/cellranger/count/meta.yml @@ -21,9 +21,17 @@ input: - reads: type: file description: | - List of input FastQ files of size 1 and 2 for single-end and paired-end data, - respectively. - pattern: "${Sample_Name}_S1_L00${Lane_Number}_${I1,I2,R1,R2}_001.fastq.gz" + List of input FastQ files. The order of the input files MUST be ["sample1 R1", "sample1 R2", "sample2, R1", + "sample2, R2", ...]. This can usually be achieved by sorting the input files by file name. + + Background: 10x data is always paired-end with R1 containing cell barcode and UMI + and R2 containing the actual read sequence. Cell Ranger requires files to adhere to the following file-name + convention: `${Sample_Name}_S1_L00${Lane_Number}_${R1,R2}_001.fastq.gz`. This module automatically + renames files to match this convention based on the order of input files to avoid various + issues (see https://github.com/nf-core/scrnaseq/issues/241). To avoid mistakes, the module + throws an error if a pair of R1 and R2 fastq files does not have the same filename except for the "_R1"/"_R2" part. + Renaming the files does not affect the results (see README.md for detailed tests). + pattern: "*{R1,R2}*.fastq.gz" - reference: type: directory description: Folder containing all the reference indices needed by Cell Ranger diff --git a/modules/nf-core/cellranger/count/templates/cellranger_count.py b/modules/nf-core/cellranger/count/templates/cellranger_count.py new file mode 100644 index 00000000..b121a538 --- /dev/null +++ b/modules/nf-core/cellranger/count/templates/cellranger_count.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 +from subprocess import run +from pathlib import Path +from textwrap import dedent +import shlex + + +def chunk_iter(seq, size): + """iterate over `seq` in chunks of `size`""" + return (seq[pos : pos + size] for pos in range(0, len(seq), size)) + + +sample_id = "${meta.id}" + +# get fastqs, ordered by path. Files are staged into +# - "fastq_001/{original_name.fastq.gz}" +# - "fastq_002/{oritinal_name.fastq.gz}" +# - ... +# Since we require fastq files in the input channel to be ordered such that a R1/R2 pair +# of files follows each other, ordering will get us a sequence of [R1, R2, R1, R2, ...] +fastqs = sorted(Path(".").glob("fastq_*/*")) +assert len(fastqs) % 2 == 0 + +# target directory in which the renamed fastqs will be placed +fastq_all = Path("./fastq_all") +fastq_all.mkdir(exist_ok=True) + + +for i, (r1, r2) in enumerate(chunk_iter(fastqs, 2)): + if r1.name.replace("R1", "R2") != r2.name: + raise AssertionError( + dedent( + f"""\ + We expect R1 and R2 of the same sample to have the same filename except for R1/R2. + Files involved: + - {r1} + - {r2} + """ + ) + ) + r1.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R1_001.fastq.gz") + r2.rename(fastq_all / f"{sample_id}_S1_L{i:03d}_R2_001.fastq.gz") + +run( + # fmt: off + [ + "cellranger", "count", + "--id", "${prefix}", + "--fastqs", str(fastq_all), + "--transcriptome", "${reference.name}", + "--localcores", "${task.cpus}", + "--localmem", "${task.memory.toGiga()}", + *shlex.split("""${args}""") + ], + # fmt: on + check=True, +) + +# Output version information +version = run( + ["cellranger", "-V"], + text=True, + check=True, + capture_output=True, +).stdout.replace("cellranger cellranger-", "") + +# alas, no `pyyaml` pre-installed in the cellranger container +with open("versions.yml", "w") as f: + f.write('"${task.process}":\\n') + f.write(f' cellranger: "{version}"\\n') diff --git a/modules/nf-core/cellranger/mkgtf/main.nf b/modules/nf-core/cellranger/mkgtf/main.nf index 5411b0fd..e0b0dd67 100644 --- a/modules/nf-core/cellranger/mkgtf/main.nf +++ b/modules/nf-core/cellranger/mkgtf/main.nf @@ -4,11 +4,6 @@ process CELLRANGER_MKGTF { container "nf-core/cellranger:7.1.0" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "CELLRANGER_MKGTF module does not support Conda. Please use Docker / Singularity / Podman instead." - } - input: path gtf @@ -20,6 +15,10 @@ process CELLRANGER_MKGTF { task.ext.when == null || task.ext.when script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CELLRANGER_MKGTF module does not support Conda. Please use Docker / Singularity / Podman instead." + } def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${gtf.baseName}.filtered" """ diff --git a/modules/nf-core/cellranger/mkref/main.nf b/modules/nf-core/cellranger/mkref/main.nf index b4b9c547..986891b8 100644 --- a/modules/nf-core/cellranger/mkref/main.nf +++ b/modules/nf-core/cellranger/mkref/main.nf @@ -4,11 +4,6 @@ process CELLRANGER_MKREF { container "nf-core/cellranger:7.1.0" - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - exit 1, "CELLRANGER_MKREF module does not support Conda. Please use Docker / Singularity / Podman instead." - } - input: path fasta path gtf @@ -22,6 +17,10 @@ process CELLRANGER_MKREF { task.ext.when == null || task.ext.when script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "CELLRANGER_MKREF module does not support Conda. Please use Docker / Singularity / Podman instead." + } def args = task.ext.args ?: '' """ cellranger \\ diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 07d5e433..249f9064 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -29,7 +29,11 @@ process FASTQC { printf "%s %s\\n" $rename_to | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done - fastqc $args --threads $task.cpus $renamed_files + + fastqc \\ + $args \\ + --threads $task.cpus \\ + $renamed_files cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/gunzip/main.nf b/modules/nf-core/gunzip/main.nf index e7189d2f..73bf08cd 100644 --- a/modules/nf-core/gunzip/main.nf +++ b/modules/nf-core/gunzip/main.nf @@ -21,10 +21,14 @@ process GUNZIP { def args = task.ext.args ?: '' gunzip = archive.toString() - '.gz' """ - gunzip \\ - -f \\ + # Not calling gunzip itself because it creates files + # with the original group ownership rather than the + # default one for that user / the work directory + gzip \\ + -cd \\ $args \\ - $archive + $archive \\ + > $gunzip cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/multiqc/main.nf b/modules/nf-core/multiqc/main.nf index 1fc387be..65d7dd0d 100644 --- a/modules/nf-core/multiqc/main.nf +++ b/modules/nf-core/multiqc/main.nf @@ -1,10 +1,10 @@ process MULTIQC { label 'process_single' - conda "bioconda::multiqc=1.14" + conda "bioconda::multiqc=1.15" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' : - 'biocontainers/multiqc:1.14--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/multiqc:1.15--pyhdfd78af_0' : + 'biocontainers/multiqc:1.15--pyhdfd78af_0' }" input: path multiqc_files, stageAs: "?/*" diff --git a/nextflow.config b/nextflow.config index 6c0cc5f4..a86e4add 100644 --- a/nextflow.config +++ b/nextflow.config @@ -61,7 +61,6 @@ params { multiqc_methods_description = null // Boilerplate options - tracedir = "${params.outdir}/pipeline_info" publish_dir_mode = 'copy' email = null email_on_fail = null @@ -70,17 +69,15 @@ params { hook_url = null help = false version = false - validate_params = true - show_hidden_params = false - schema_ignore_params = 'genomes' // Config options + config_profile_name = null + config_profile_description = null custom_config_version = 'master' custom_config_base = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}" - config_profile_description = null config_profile_contact = null config_profile_url = null - config_profile_name = null + // Max resource options // Defaults only, expecting to be overwritten @@ -88,6 +85,13 @@ params { max_cpus = 16 max_time = '240.h' + // Schema validation default options + validationFailUnrecognisedParams = false + validationLenientMode = false + validationSchemaIgnoreParams = 'genomes' + validationShowHiddenParams = false + validate_params = true + } // Load base.config by default for all pipelines @@ -107,13 +111,11 @@ try { // } catch (Exception e) { // System.err.println("WARNING: Could not load nf-core/config/scrnaseq profiles: ${params.custom_config_base}/pipeline/scrnaseq.config") // } - - profiles { debug { dumpHashes = true process.beforeScript = 'echo $HOSTNAME' - cleanup = false + cleanup = false } conda { conda.enabled = true @@ -198,20 +200,29 @@ profiles { executor.cpus = 16 executor.memory = 60.GB } - public_aws_ecr { - includeConfig 'conf/public_aws_ecr.config' - } test { includeConfig 'conf/test.config' } test_full { includeConfig 'conf/test_full.config' } } +// Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile +// Will not be used unless Apptainer / Docker / Podman / Singularity are enabled +// Set to your registry if you have a mirror of containers +apptainer.registry = 'quay.io' +docker.registry = 'quay.io' +podman.registry = 'quay.io' +singularity.registry = 'quay.io' + +// Nextflow plugins +plugins { + id 'nf-validation' // Validation of pipeline parameters and creation of an input channel from a sample sheet +} + // Load igenomes.config if required if (!params.igenomes_ignore) { includeConfig 'conf/igenomes.config' } else { params.genomes = [:] } - // Export these variables to prevent local Python/R libraries from conflicting with those in the container // The JULIA depot path has been adjusted to a fixed path `/usr/local/share/julia` that needs to be used for packages in the container. // See https://apeltzer.github.io/post/03-julia-lang-nextflow/ for details on that. Once we have a common agreement on where to keep Julia packages, this is adjustable. @@ -226,28 +237,22 @@ env { // Capture exit codes from upstream processes when piping process.shell = ['/bin/bash', '-euo', 'pipefail'] -// Set default registry for Docker and Podman independent of -profile -// Will not be used unless Docker / Podman are enabled -// Set to your registry if you have a mirror of containers -docker.registry = 'quay.io' -podman.registry = 'quay.io' - def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss') timeline { enabled = true - file = "${params.tracedir}/execution_timeline_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html" } report { enabled = true - file = "${params.tracedir}/execution_report_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html" } trace { enabled = true - file = "${params.tracedir}/execution_trace_${trace_timestamp}.txt" + file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt" } dag { enabled = true - file = "${params.tracedir}/pipeline_dag_${trace_timestamp}.html" + file = "${params.outdir}/pipeline_info/pipeline_dag_${trace_timestamp}.html" } manifest { @@ -256,7 +261,7 @@ manifest { homePage = 'https://github.com/nf-core/scrnaseq' description = """Pipeline for processing 10x Genomics single cell rnaseq data""" mainScript = 'main.nf' - nextflowVersion = '!>=22.10.1' + nextflowVersion = '!>=23.04.0' version = '2.4.0dev' doi = '10.5281/zenodo.3568187' } diff --git a/nextflow_schema.json b/nextflow_schema.json index 2061e7c0..e31e273c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -14,9 +14,10 @@ "properties": { "input": { "type": "string", + "format": "file-path", + "exists": true, "mimetype": "text/csv", "pattern": "^\\S+\\.csv$", - "fa_icon": "fas fa-dna", "description": "Path to comma-separated file containing information about the samples in the experiment.", "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 4 columns, and a header row. See [usage docs](https://nf-co.re/rnaseq/usage#samplesheet-input)." }, @@ -101,6 +102,7 @@ "fasta": { "type": "string", "format": "file-path", + "exists": true, "mimetype": "text/plain", "pattern": "^\\S+\\.fn?a(sta)?(\\.gz)?$", "description": "Path to FASTA genome file.", @@ -326,7 +328,7 @@ "description": "Maximum amount of time that can be requested for any single job.", "default": "240.h", "fa_icon": "far fa-clock", - "pattern": "^(\\d+\\.?\\s*(s|m|h|day)\\s*)+$", + "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$", "hidden": true, "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`" } @@ -343,12 +345,14 @@ "type": "boolean", "description": "Display help text.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "version": { "type": "boolean", "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", + "default": false, "hidden": true }, "publish_dir_mode": { @@ -372,6 +376,7 @@ "type": "boolean", "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", + "default": false, "hidden": true }, "max_multiqc_email_size": { @@ -386,6 +391,7 @@ "type": "boolean", "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", + "default": false, "hidden": true }, "hook_url": { @@ -397,6 +403,7 @@ }, "multiqc_config": { "type": "string", + "format": "file-path", "description": "Custom config file to supply to MultiQC.", "fa_icon": "fas fa-cog", "hidden": true @@ -412,13 +419,6 @@ "description": "Custom MultiQC yaml file containing HTML including a methods description.", "fa_icon": "fas fa-cog" }, - "tracedir": { - "type": "string", - "description": "Directory to keep pipeline Nextflow logs and reports.", - "default": "${params.outdir}/pipeline_info", - "fa_icon": "fas fa-cogs", - "hidden": true - }, "validate_params": { "type": "boolean", "description": "Boolean whether to validate parameters against the schema at runtime", @@ -426,12 +426,29 @@ "fa_icon": "fas fa-check-square", "hidden": true }, - "show_hidden_params": { + "validationShowHiddenParams": { "type": "boolean", "fa_icon": "far fa-eye-slash", "description": "Show all params when using `--help`", + "default": false, "hidden": true, "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters." + }, + "validationFailUnrecognisedParams": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters fails when an unrecognised parameter is found.", + "default": false, + "hidden": true, + "help_text": "By default, when an unrecognised parameter is found, it returns a warinig." + }, + "validationLenientMode": { + "type": "boolean", + "fa_icon": "far fa-check-circle", + "description": "Validation of parameters in lenient more.", + "default": false, + "hidden": true, + "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)." } } } diff --git a/subworkflows/local/alevin.nf b/subworkflows/local/alevin.nf index a13e596d..8fc0a983 100644 --- a/subworkflows/local/alevin.nf +++ b/subworkflows/local/alevin.nf @@ -40,8 +40,8 @@ workflow SCRNASEQ_ALEVIN { transcript_tsv = SIMPLEAF_INDEX.out.transcript_tsv.collect() ch_versions = ch_versions.mix(SIMPLEAF_INDEX.out.versions) - if (!txp2gene) { - txp2gene = SIMPLEAF_INDEX.out.transcript_tsv + if (!txp2gene) { + txp2gene = SIMPLEAF_INDEX.out.transcript_tsv } } @@ -69,5 +69,4 @@ workflow SCRNASEQ_ALEVIN { ch_versions alevin_results = SIMPLEAF_QUANT.out.alevin_results alevinqc = ALEVINQC.out.report - for_multiqc = SIMPLEAF_QUANT.out.alevin_results.collect{it[1]}.ifEmpty([]) } diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 9bc9cff2..1a8381ce 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -14,6 +14,14 @@ workflow MTX_CONVERSION { main: ch_versions = Channel.empty() + // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need. + if ( params.aligner == "cellranger" ) { + mtx_matrices = mtx_matrices.map { meta, mtx_files -> + [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ] + } + .filter { meta, mtx_files -> mtx_files } // Remove any that are missing the relevant files + } + // // Convert matrix to h5ad // diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 30ca6359..76531200 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -1,13 +1,12 @@ /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - VALIDATE INPUTS + PRINT PARAMS SUMMARY ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params) +include { paramsSummaryLog; paramsSummaryMap } from 'plugin/nf-validation' -// Validate input parameters -WorkflowScrnaseq.initialise(params, log) +def summary_params = paramsSummaryMap(workflow) def checkPathParamList = [ params.input, params.multiqc_config, params.fasta, params.gtf, @@ -79,6 +78,7 @@ ch_transcript_fasta = params.transcript_fasta ? file(params.transcript_fasta): [ ch_txp2gene = params.txp2gene ? file(params.txp2gene) : [] ch_multiqc_alevin = Channel.empty() ch_multiqc_star = Channel.empty() +ch_multiqc_cellranger = Channel.empty() if (params.barcode_whitelist) { ch_barcode_whitelist = file(params.barcode_whitelist) } else if (params.protocol.contains("10X")) { @@ -114,6 +114,9 @@ workflow SCRNASEQ { ch_fastq = INPUT_CHECK( ch_input ).reads ch_versions = ch_versions.mix(INPUT_CHECK.out.versions) + // TODO: OPTIONAL, you can use nf-validation plugin to create an input channel from the samplesheet with Channel.fromSamplesheet("input") + // See the documentation https://nextflow-io.github.io/nf-validation/samplesheets/fromSamplesheet/ + // ! There is currently no tooling to help you write a sample sheet schema // Run FastQC ch_multiqc_fastqc = Channel.empty() @@ -158,7 +161,7 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions) - ch_multiqc_alevin = SCRNASEQ_ALEVIN.out.for_multiqc + ch_multiqc_alevin = SCRNASEQ_ALEVIN.out.alevin_results ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results) } @@ -191,6 +194,9 @@ workflow SCRNASEQ { ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_out) ch_star_index = CELLRANGER_ALIGN.out.star_index + ch_multiqc_cellranger = CELLRANGER_ALIGN.out.cellranger_out.map{ + meta, outs -> outs.findAll{ it -> it.name == "web_summary.html"} + } } // Run universc pipeline @@ -228,16 +234,21 @@ workflow SCRNASEQ { workflow_summary = WorkflowScrnaseq.paramsSummaryMultiqc(workflow, summary_params) ch_workflow_summary = Channel.value(workflow_summary) - methods_description = WorkflowScrnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description) + methods_description = WorkflowScrnaseq.methodsDescriptionText(workflow, ch_multiqc_custom_methods_description, params) ch_methods_description = Channel.value(methods_description) + ch_multiqc_fastqc.dump(tag: 'fastqc', pretty: true) + ch_multiqc_alevin.dump(tag: 'alevin', pretty: true) + ch_multiqc_star.dump(tag: 'star', pretty: true) + ch_multiqc_files = Channel.empty() ch_multiqc_files = ch_multiqc_files.mix(ch_workflow_summary.collectFile(name: 'workflow_summary_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(ch_methods_description.collectFile(name: 'methods_description_mqc.yaml')) ch_multiqc_files = ch_multiqc_files.mix(CUSTOM_DUMPSOFTWAREVERSIONS.out.mqc_yml.collect()) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_fastqc.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin.collect{it[1]}.ifEmpty([])) - ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star.collect{it[1]}.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_fastqc.collect{ meta, qcfile -> qcfile }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_alevin.collect{ meta, qcfile -> qcfile }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_star.collect{ meta, qcfile -> qcfile }.ifEmpty([])) + ch_multiqc_files = ch_multiqc_files.mix(ch_multiqc_cellranger.collect().ifEmpty([])) MULTIQC ( ch_multiqc_files.collect(),