From 4b9085244350c9990f9f29b5c4c4de1f16487f00 Mon Sep 17 00:00:00 2001 From: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> Date: Fri, 8 Nov 2024 19:21:18 +0000 Subject: [PATCH 01/30] Improve docs for using the GPU accelerator directive (#5488) [ci skip] Signed-off-by: adamrtalbot <12817534+adamrtalbot@users.noreply.github.com> --- docs/reference/process.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docs/reference/process.md b/docs/reference/process.md index 646bebbdf4..81d36f13c1 100644 --- a/docs/reference/process.md +++ b/docs/reference/process.md @@ -232,6 +232,10 @@ The above examples will request 4 GPUs of type `nvidia-tesla-k80`. This directive is only used by certain executors. Refer to the {ref}`executor-page` page to see which executors support this directive. ::: +:::{note} +Additional options may be required to fully enable the use of accelerators. When using containers with GPUs, you must pass the GPU drivers through to the container. For Docker, this requires the option `--gpus all` in the docker run command. For Apptainer/Singularity, this requires the option `--nv`. The specific implementation details depend on the accelerator and container type being used. +::: + :::{note} The accelerator `type` option depends on the target execution platform. Refer to the platform-specific documentation for details on the available accelerators: From ad56c89b78a36f0d61fc0b33451148c3c755b5f6 Mon Sep 17 00:00:00 2001 From: Tom Sellman Date: Sun, 10 Nov 2024 17:42:31 +0000 Subject: [PATCH 02/30] Fix wave GCP test (#5490) Signed-off-by: Tom Sellman Signed-off-by: Paolo Di Tommaso Co-authored-by: Paolo Di Tommaso --- .github/workflows/build.yml | 6 ++++++ validation/google.sh | 3 --- validation/wave-tests/example6/nextflow.config | 11 ----------- validation/wave-tests/example6/run-aws.sh | 2 +- validation/wave-tests/example6/run-gcp.sh | 2 +- validation/wave-tests/example6/run.sh | 5 ++++- validation/wave.sh | 2 +- 7 files changed, 13 insertions(+), 18 deletions(-) diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 3cf322c98d..9a8675a013 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -90,6 +90,7 @@ jobs: - name: Test if: steps.changed-files.outputs.any_changed == 'true' run: | + env | sort # configure test env if [[ "$GOOGLE_SECRET" ]]; then echo $GOOGLE_SECRET | base64 -d > $PWD/google_credentials.json @@ -158,6 +159,11 @@ jobs: - name: Run tests run: | env | sort + # configure test env + if [[ "$GOOGLE_SECRET" ]]; then + echo $GOOGLE_SECRET | base64 -d > $PWD/google_credentials.json + export GOOGLE_APPLICATION_CREDENTIALS=$PWD/google_credentials.json + fi cat $HOME/.nextflow/scm make clean assemble install bash test-ci.sh diff --git a/validation/google.sh b/validation/google.sh index 8dcda8ab99..0dcb9606f8 100644 --- a/validation/google.sh +++ b/validation/google.sh @@ -6,9 +6,6 @@ get_abs_filename() { export NXF_CMD=${NXF_CMD:-$(get_abs_filename ../launch.sh)} -echo $GOOGLE_SECRET | base64 -d > $PWD/google_credentials.json -export GOOGLE_APPLICATION_CREDENTIALS=$PWD/google_credentials.json - [[ $TOWER_ACCESS_TOKEN ]] && OPTS='-with-tower' || OPTS='' set -x diff --git a/validation/wave-tests/example6/nextflow.config b/validation/wave-tests/example6/nextflow.config index f58834306f..c757b3bb7b 100644 --- a/validation/wave-tests/example6/nextflow.config +++ b/validation/wave-tests/example6/nextflow.config @@ -1,12 +1,3 @@ -process { - container = 'quay.io/nextflow/rnaseq-nf:v1.1' -} - -params { - reads = 's3://rnaseq-nf/data/ggal/lung_{1,2}.fq' - transcriptome = 's3://rnaseq-nf/data/ggal/transcript.fa' -} - docker { enabled = true envWhitelist = 'AWS_ACCESS_KEY_ID,AWS_SECRET_ACCESS_KEY' @@ -16,5 +7,3 @@ fusion { enabled = true } -workDir = 's3://nextflow-ci/wave' - diff --git a/validation/wave-tests/example6/run-aws.sh b/validation/wave-tests/example6/run-aws.sh index b63724d33c..0f2d2247fa 100644 --- a/validation/wave-tests/example6/run-aws.sh +++ b/validation/wave-tests/example6/run-aws.sh @@ -1,6 +1,6 @@ $NXF_CMD run \ rnaseq-nf \ - -profile batch \ + -profile batch,s3-data \ -with-wave \ -with-fusion \ -process.scratch false diff --git a/validation/wave-tests/example6/run-gcp.sh b/validation/wave-tests/example6/run-gcp.sh index b522ac13c5..c55dcdb3ec 100644 --- a/validation/wave-tests/example6/run-gcp.sh +++ b/validation/wave-tests/example6/run-gcp.sh @@ -1,6 +1,6 @@ $NXF_CMD run \ rnaseq-nf \ - -profile gcb \ + -profile google-batch,gs-data \ -with-wave \ -with-fusion \ -process.scratch false diff --git a/validation/wave-tests/example6/run.sh b/validation/wave-tests/example6/run.sh index 0bb6d254c5..971a65f351 100644 --- a/validation/wave-tests/example6/run.sh +++ b/validation/wave-tests/example6/run.sh @@ -1,2 +1,5 @@ -$NXF_CMD run rnaseq-nf -with-wave +$NXF_CMD run \ + rnaseq-nf \ + -with-wave \ + -w s3://nextflow-ci/wave diff --git a/validation/wave.sh b/validation/wave.sh index 6b8953a7f3..b24b718be6 100644 --- a/validation/wave.sh +++ b/validation/wave.sh @@ -25,4 +25,4 @@ echo "Test Wave running rnaseq-nf with Fusion on AWS Batch" (cd wave-tests/example6; bash run-aws.sh) echo "Test Wave running rnaseq-nf with Fusion on Google Batch" -(cd wave-tests/example6; bash run-aws.sh) +(cd wave-tests/example6; bash run-gcp.sh) From eaaeb3ded4cb7bf6293e06a5ddd00fb38e68b5aa Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Mon, 11 Nov 2024 12:19:54 +0100 Subject: [PATCH 03/30] Fix overlapping file lock exception (#5489) [ci fast] Signed-off-by: jorgee Signed-off-by: Paolo Di Tommaso Co-authored-by: Paolo Di Tommaso --- .../groovy/nextflow/conda/CondaCache.groovy | 23 +++++++++++++------ .../nextflow/conda/CondaCacheTest.groovy | 10 ++++---- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/conda/CondaCache.groovy b/modules/nextflow/src/main/groovy/nextflow/conda/CondaCache.groovy index 7d1ff901f5..17c605f19b 100644 --- a/modules/nextflow/src/main/groovy/nextflow/conda/CondaCache.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/conda/CondaCache.groovy @@ -22,6 +22,7 @@ import java.nio.file.Path import java.nio.file.Paths import java.util.concurrent.ConcurrentHashMap +import com.google.common.hash.Hashing import groovy.transform.CompileStatic import groovy.transform.PackageScope import groovy.util.logging.Slf4j @@ -32,7 +33,6 @@ import nextflow.file.FileMutex import nextflow.util.CacheHelper import nextflow.util.Duration import nextflow.util.Escape -import org.yaml.snakeyaml.Yaml /** * Handle Conda environment creation and caching * @@ -166,6 +166,18 @@ class CondaCache { str.endsWith('.txt') && !str.contains('\n') } + static protected String sipHash(CharSequence data) { + Hashing + .sipHash24() + .newHasher() + .putUnencodedChars(data) + .hash() + .toString() + } + + static protected String sipHash(Path path) { + sipHash(path.toAbsolutePath().normalize().toString()) + } /** * Get the path on the file system where store a Conda environment @@ -188,11 +200,8 @@ class CondaCache { try { final path = condaEnv as Path content = path.text - final yaml = (Map)new Yaml().load(content) - if( yaml.name ) - name = yaml.name - else - name = path.baseName + name = 'env-' + sipHash(path) + } catch( NoSuchFileException e ) { throw new IllegalArgumentException("Conda environment file does not exist: $condaEnv") @@ -205,7 +214,7 @@ class CondaCache { try { final path = condaEnv as Path content = path.text - name = path.baseName + name = 'env-' + sipHash(path) } catch( NoSuchFileException e ) { throw new IllegalArgumentException("Conda environment file does not exist: $condaEnv") diff --git a/modules/nextflow/src/test/groovy/nextflow/conda/CondaCacheTest.groovy b/modules/nextflow/src/test/groovy/nextflow/conda/CondaCacheTest.groovy index 637ae5623a..9a8baf952c 100644 --- a/modules/nextflow/src/test/groovy/nextflow/conda/CondaCacheTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/conda/CondaCacheTest.groovy @@ -89,6 +89,7 @@ class CondaCacheTest extends Specification { def cache = Spy(CondaCache) def BASE = Paths.get('/conda/envs') def ENV = folder.resolve('foo.yml') + def hash = CondaCache.sipHash(ENV) ENV.text = ''' channels: - bioconda @@ -99,13 +100,12 @@ class CondaCacheTest extends Specification { - bwa=0.7.15 ''' .stripIndent(true) // https://issues.apache.org/jira/browse/GROOVY-9423 - when: def prefix = cache.condaPrefixPath(ENV.toString()) then: 1 * cache.isYamlFilePath(ENV.toString()) 1 * cache.getCacheDir() >> BASE - prefix.toString() == '/conda/envs/foo-9416240708c49c4e627414b46a743664' + prefix.toString() == "/conda/envs/env-${hash}-9416240708c49c4e627414b46a743664" cleanup: folder?.deleteDir() @@ -118,6 +118,7 @@ class CondaCacheTest extends Specification { def cache = Spy(CondaCache) def BASE = Paths.get('/conda/envs') def ENV = Files.createTempFile('test','.yml') + def hash = CondaCache.sipHash(ENV) ENV.text = ''' name: my-env-1.1 channels: @@ -135,7 +136,7 @@ class CondaCacheTest extends Specification { then: 1 * cache.isYamlFilePath(ENV.toString()) 1 * cache.getCacheDir() >> BASE - prefix.toString() == '/conda/envs/my-env-1.1-e7fafe40ca966397a2c0d9bed7181aa7' + prefix.toString() == "/conda/envs/env-${hash}-e7fafe40ca966397a2c0d9bed7181aa7" } @@ -146,6 +147,7 @@ class CondaCacheTest extends Specification { def cache = Spy(CondaCache) def BASE = Paths.get('/conda/envs') def ENV = folder.resolve('bar.txt') + def hash = CondaCache.sipHash(ENV) ENV.text = ''' star=2.5.4a bwa=0.7.15 @@ -159,7 +161,7 @@ class CondaCacheTest extends Specification { 1 * cache.isYamlFilePath(ENV.toString()) 1 * cache.isTextFilePath(ENV.toString()) 1 * cache.getCacheDir() >> BASE - prefix.toString() == '/conda/envs/bar-8a4aa7db8ddb8ce4eb4d450d4814a437' + prefix.toString() == "/conda/envs/env-${hash}-8a4aa7db8ddb8ce4eb4d450d4814a437" cleanup: folder?.deleteDir() From f7fd56db91f1fe1af470e1fd6d564dec2c44d154 Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Mon, 11 Nov 2024 17:31:26 +0100 Subject: [PATCH 04/30] Fixing bug when execution with stub and no stub defined (#5473) Signed-off-by: jorgee Signed-off-by: Paolo Di Tommaso Co-authored-by: Paolo Di Tommaso --- .../groovy/nextflow/processor/TaskRun.groovy | 2 +- .../nextflow/processor/TaskRunTest.groovy | 17 +++++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy index f3926c0b60..38b4ba4782 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskRun.groovy @@ -790,7 +790,7 @@ class TaskRun implements Cloneable { * @param body A {@code BodyDef} object instance */ void resolve(BodyDef body) { - processor.session.stubRun + processor.session.stubRun && config.getStubBlock() ? resolveStub(config.getStubBlock()) : resolveBody(body) } diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/TaskRunTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/TaskRunTest.groovy index 9ea25d7db8..6f1e988899 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/TaskRunTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/TaskRunTest.groovy @@ -907,6 +907,23 @@ class TaskRunTest extends Specification { 0 * task.resolveStub(_) >> null } + def 'should resolve task body when no stub' () { + given: + def task = Spy(TaskRun) + task.processor = Mock(TaskProcessor) { + getSession()>>Mock(Session) { getStubRun() >> true} + } + task.config = Mock(TaskConfig) { getStubBlock()>> null } + and: + def body = Mock(BodyDef) + + when: + task.resolve(body) + then: + 1 * task.resolveBody(body) >> null + 0 * task.resolveStub(_) >> null + } + def 'should resolve task stub' () { given: def body = Mock(BodyDef) From 68fd85d220237c1ae729479489cd9c1c060c5091 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Tue, 12 Nov 2024 18:03:03 +0100 Subject: [PATCH 05/30] Fix typo [ci skip] Signed-off-by: Paolo Di Tommaso --- docs/container.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/container.md b/docs/container.md index 1d0ad99e51..c82583c7d4 100644 --- a/docs/container.md +++ b/docs/container.md @@ -123,7 +123,7 @@ This feature requires the `apptainer` tool to be installed where the workflow ex Nextflow caches those images in the `apptainer` directory in the pipeline work directory by default. However it is suggested to provide a centralised cache directory by using either the `NXF_APPTAINER_CACHEDIR` environment variable or the `apptainer.cacheDir` setting in the Nextflow config file. :::{versionadded} 21.09.0-edge -When looking for a Apptainer image file, Nextflow first checks the *library* directory, and if the image file is not found, the *cache* directory is used s usual. The library directory can be defined either using the `NXF_APPTAINER_LIBRARYDIR` environment variable or the `apptainer.libraryDir` configuration setting (the latter overrides the former). +When looking for a Apptainer image file, Nextflow first checks the *library* directory, and if the image file is not found, the *cache* directory is used as usual. The library directory can be defined either using the `NXF_APPTAINER_LIBRARYDIR` environment variable or the `apptainer.libraryDir` configuration setting (the latter overrides the former). ::: :::{warning} From c9115659c90aa9f9e28af15e415308b42cf8b358 Mon Sep 17 00:00:00 2001 From: Kevin Date: Wed, 13 Nov 2024 03:26:10 -0500 Subject: [PATCH 06/30] Update install docs to reflect change from 'all' to 'dist' (#5496) [ci skip] Signed-off-by: Kevin Galens Signed-off-by: Paolo Di Tommaso Co-authored-by: Paolo Di Tommaso --- docs/install.md | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/docs/install.md b/docs/install.md index efee6f883d..cb1f8bcad8 100644 --- a/docs/install.md +++ b/docs/install.md @@ -119,15 +119,16 @@ NXF_VER=24.06.0-edge nextflow info ## Standalone distribution -Nextflow has a set of {ref}`core plugins ` which are downloaded at runtime by default. There is also a standalone distribution (i.e. the `all` distribution) which comes pre-packaged with all core plugins. This distribution is mainly useful for offline environments. +The Nextflow standalone distribution (i.e. the `dist` distribution) consists of self-contained `nextflow` executable file +that includes all the application dependencies for core functionalities, and it can run without downloading third parties +libraries. This distribution is mainly useful for offline environments. -The installer for the `all` distribution can be found on the [GitHub releases page](https://github.com/nextflow-io/nextflow/releases), under the "Assets" section for a specific release. The installation procedure is the same as for the standard distribution, only using this URL instead of `https://get.nextflow.io`: +Note however the support for cloud services e.g. AWS, Seqera Platform, Wave, etc. still require the download +of the corresponding Nextflow plugins. + +The installer for the `dist` distribution can be found on the [GitHub releases page](https://github.com/nextflow-io/nextflow/releases), under the "Assets" section for a specific release. The installation procedure is the same as for the standard distribution, only using this URL instead of `https://get.nextflow.io`: ```bash -export NXF_VER=23.10.0 -curl -s https://github.com/nextflow-io/nextflow/releases/download/v$NXF_VER/nextflow-$NXF_VER-all +export NXF_VER=24.10.0 +curl -s https://github.com/nextflow-io/nextflow/releases/download/v$NXF_VER/nextflow-$NXF_VER-dist ``` - -:::{warning} -The `all` distribution does not support third-party plugins. Only the {ref}`core plugins ` are supported. -::: From 8041a5799bd2187e4758d453a924ffc75be4e656 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 13 Nov 2024 10:04:59 +0100 Subject: [PATCH 07/30] Document Fusion compatibility for Oracle storage (#5495) [ci skip] Signed-off-by: Paolo Di Tommaso Co-authored-by: Jordi Deu-Pons Co-authored-by: Christopher Hakkaart Co-authored-by: Ben Sherman --- docs/fusion.md | 50 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/docs/fusion.md b/docs/fusion.md index 925393bc23..bf15304522 100644 --- a/docs/fusion.md +++ b/docs/fusion.md @@ -214,7 +214,7 @@ Then you can run your pipeline using the following command: nextflow run -work-dir s3:///scratch ``` -Replace `` and `` with a pipeline script and bucket or your choice, for example: +Replace `` and `` with a pipeline script and bucket of your choice, for example: ```bash nextflow run https://github.com/nextflow-io/rnaseq-nf -work-dir s3://nextflow-ci/scratch @@ -222,7 +222,7 @@ nextflow run https://github.com/nextflow-io/rnaseq-nf -work-dir s3://nextflow-ci :::{warning} The option `fusion.exportStorageCredentials` leaks the AWS credentials on the task launcher script created by Nextflow. -This option should only be used for development purposes. +This option should only be used for testing and development purposes. ::: ### Local execution with Minio @@ -267,11 +267,53 @@ Then you can run your pipeline using the following command: nextflow run -work-dir s3://foobar/scratch ``` -Replace `` with a pipeline script and bucket or your choice: +Replace `` with a pipeline script and bucket of your choice: :::{warning} The option `fusion.exportStorageCredentials` leaks the AWS credentials on the task launcher script created by Nextflow. -This option should only be used for development purposes. +This option should only be used for testing and development purposes. +::: + +### Local execution with Oracle Object Storage + +Fusion file system and Nextflow are compatible with [Oracle Object Storage](https://www.oracle.com/cloud/storage/object-storage/). + +:::{note} +This capability relies on the S3-like API compatibility provided by Oracle storage and not by a native support in +Nextflow and Fusion. As such it may not fully work and support all Nextflow and Fusion features. +::: + +This configuration requires the execution of your pipeline tasks using Docker or a similar container engine. + +The following should be included in your Nextflow configuration file: + +```groovy +aws.region = '' +aws.accessKey = '' +aws.secretKey = '' +aws.client.endpoint = 'https://.compat.objectstorage..oraclecloud.com' +aws.client.s3PathStyleAccess = true +aws.client.protocol = 'https' +aws.client.signerOverride = 'AWSS3V4SignerType' +docker.enabled = true +docker.containerOptions = '-e FUSION_AWS_REGION=' +fusion.enabled = true +fusion.exportStorageCredentials = true +wave.enabled = true +tower.accessToken = '' // optional +``` + +Then you can run your pipeline using the following command: + +```bash +nextflow run -work-dir s3:///scratch +``` + +In the above snippet replace the placeholders `` and `` with your [Oracle Customer Secret Key](https://docs.oracle.com/en-us/iaas/Content/Identity/Tasks/managingcredentials.htm#Working2), +and the placeholders `` and `` with the namespace and region of your Oracle bucket. + +:::{warning} +The `fusion.exportStorageCredentials` option leaks the Oracle credentials to the Nextflow task launcher script and should only be used for testing and development purposes. ::: ## Advanced settings From 763a5e274c584f168e09306d0b227dbabc18a8c6 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 14 Nov 2024 11:43:33 -0600 Subject: [PATCH 08/30] Clarify behavior of template scripts (#5472) [ci skip] Signed-off-by: Ben Sherman Co-authored-by: Christopher Hakkaart --- docs/process.md | 28 +++++++++++++++++++--------- 1 file changed, 19 insertions(+), 9 deletions(-) diff --git a/docs/process.md b/docs/process.md index d84112d99a..6dba761401 100644 --- a/docs/process.md +++ b/docs/process.md @@ -161,9 +161,9 @@ In the above example, the process will execute one of several scripts depending ### Template -Process scripts can be externalised to **template** files, which can be reused across different processes and tested independently from the overall pipeline execution. +Process scripts can be externalized to **template** files, which allows them to be reused across different processes and tested independently from the pipeline execution. -A template is simply a shell script file that Nextflow is able to execute by using the `template` function as shown below: +A template can be used in place of an embedded script using the `template` function in the script section: ```nextflow process templateExample { @@ -179,9 +179,9 @@ workflow { } ``` -By default, Nextflow looks for the `my_script.sh` template file in the `templates` directory located alongside the Nextflow script and/or the module script in which the process is defined. Any other location can be specified by using an absolute template path. +By default, Nextflow looks for the template script in the `templates` directory located alongside the Nextflow script in which the process is defined. An absolute path can be used to specify a different location. However, this practice is discouraged because it hinders pipeline portability. -The template script may contain any code that can be executed by the underlying environment. For example: +An example template script is provided below: ```bash #!/bin/bash @@ -190,12 +190,22 @@ echo $STR echo "process completed" ``` -:::{tip} -The dollar character (`$`) is interpreted as a Nextflow variable when the script is run as a Nextflow template, whereas it is evaluated as a Bash variable when run as a Bash script. This can be very useful for testing your script independently from Nextflow execution. You only need to provide a Bash environment variable for each of the Nextflow variables that are referenced in your script. For example, it would be possible to execute the above script with the following command in the terminal: `STR='foo' bash templates/my_script.sh` -::: +Variables prefixed with the dollar character (`$`) are interpreted as Nextflow variables when the template script is executed by Nextflow and Bash variables when executed directly. For example, the above script can be executed from the command line by providing each input as an environment variable: + +```bash +STR='foo' bash templates/my_script.sh +``` + +The following caveats should be considered: + +- Template scripts are recommended only for Bash scripts. Languages that do not prefix variables with `$` (e.g. Python and R) can't be executed directly as a template script. + +- Variables escaped with `\$` will be interpreted as Bash variables when executed by Nextflow, but will not be interpreted as variables when executed from the command line. This practice should be avoided to ensure that the template script behaves consistently. + +- Template variables are evaluated even if they are commented out in the template script. If a template variable is missing, it will cause the pipeline to fail regardless of where it occurs in the template. :::{tip} -As a best practice, the template script should not contain any `\$` escaped variables, because these variables will not be evaluated properly when the script is executed directly. +Template scripts are generally discouraged due to the caveats described above. The best practice for using a custom script is to embed it in the process definition at first and move it to a separate file with its own command line interface once the code matures. ::: (process-shell)= @@ -227,7 +237,7 @@ In the above example, `$USER` is treated as a Bash variable, while `!{str}` is t :::{note} - Shell script definitions require the use of single-quote `'` delimited strings. When using double-quote `"` delimited strings, dollar variables are interpreted as Nextflow variables as usual. See {ref}`string-interpolation`. - Variables prefixed with `!` must always be enclosed in curly brackets, i.e. `!{str}` is a valid variable whereas `!str` is ignored. -- Shell scripts support the use of the {ref}`process-template` mechanism. The same rules are applied to the variables defined in the script template. +- Shell scripts support the use of the {ref}`process-template` mechanism. The same rules are applied to the variables defined in the template script. ::: (process-native)= From 7551933a2b5c0c61f7f5be51d4f760bc62bcb653 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 14 Nov 2024 12:19:44 -0600 Subject: [PATCH 09/30] Clarify meaning of `task.id` versus `task.index` (#5505) [ci skip] Signed-off-by: Ben Sherman Co-authored-by: Christopher Hakkaart --- docs/reference/process.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docs/reference/process.md b/docs/reference/process.md index 81d36f13c1..2aa13cf321 100644 --- a/docs/reference/process.md +++ b/docs/reference/process.md @@ -19,8 +19,11 @@ The following task properties are defined in the process body: : *Available only in `exec:` blocks* : The task unique hash ID. +`task.id` +: The pipeline-level task index. Corresponds to `task_id` in the {ref}`execution trace `. + `task.index` -: The task index (corresponds to `task_id` in the {ref}`execution trace `). +: The process-level task index. `task.name` : *Available only in `exec:` blocks* From f0a4c526f2f6e9df08bad83b3c33f1b3ae9a8fd1 Mon Sep 17 00:00:00 2001 From: Nathan Johnson Date: Fri, 15 Nov 2024 09:29:32 +0000 Subject: [PATCH 10/30] feat: RepositoryProvider.revision now public property (#5500) Signed-off-by: Nathan Johnson --- .../groovy/nextflow/scm/RepositoryProvider.groovy | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryProvider.groovy b/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryProvider.groovy index 75a6d2798e..dc87b2f827 100644 --- a/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryProvider.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/scm/RepositoryProvider.groovy @@ -73,11 +73,23 @@ abstract class RepositoryProvider { return this } + String getRevision() { + return this.revision + } + RepositoryProvider setRevision(String revision) { this.revision = revision return this } + String getProject() { + return this.project + } + + ProviderConfig getConfig() { + return this.config + } + boolean hasCredentials() { getUser() && getPassword() } From 3215afa81492c356121d7c04d4d1921f90af9f74 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Mon, 18 Nov 2024 09:14:57 +0100 Subject: [PATCH 11/30] Fix isContainerReady when wave is disabled (#5509) [ci fast] Signed-off-by: Paolo Di Tommaso --- .../seqera/wave/plugin/resolver/WaveContainerResolver.groovy | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/plugins/nf-wave/src/main/io/seqera/wave/plugin/resolver/WaveContainerResolver.groovy b/plugins/nf-wave/src/main/io/seqera/wave/plugin/resolver/WaveContainerResolver.groovy index 6d57c7ccd1..69521b9e78 100644 --- a/plugins/nf-wave/src/main/io/seqera/wave/plugin/resolver/WaveContainerResolver.groovy +++ b/plugins/nf-wave/src/main/io/seqera/wave/plugin/resolver/WaveContainerResolver.groovy @@ -129,6 +129,9 @@ class WaveContainerResolver implements ContainerResolver { @Override boolean isContainerReady(String key) { - return client().isContainerReady(key) + final c=client() + return c.enabled() + ? c.isContainerReady(key) + : defaultResolver.isContainerReady(key) } } From d1bbd3d010e4684230eb114a0838b5c2dd111bd7 Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Mon, 18 Nov 2024 09:24:07 +0100 Subject: [PATCH 12/30] Bump netty-common to version 4.1.115.Final Signed-off-by: Paolo Di Tommaso --- plugins/nf-amazon/build.gradle | 3 +++ plugins/nf-azure/build.gradle | 3 +++ 2 files changed, 6 insertions(+) diff --git a/plugins/nf-amazon/build.gradle b/plugins/nf-amazon/build.gradle index 05ccf6cf45..82a3709305 100644 --- a/plugins/nf-amazon/build.gradle +++ b/plugins/nf-amazon/build.gradle @@ -53,6 +53,9 @@ dependencies { constraints { api 'com.fasterxml.jackson.core:jackson-databind:2.12.7.1' } + + // address security vulnerabilities + runtimeOnly 'io.netty:netty-common:4.1.115.Final' testImplementation(testFixtures(project(":nextflow"))) testImplementation project(':nextflow') diff --git a/plugins/nf-azure/build.gradle b/plugins/nf-azure/build.gradle index 4be09b723d..79a4487ae2 100644 --- a/plugins/nf-azure/build.gradle +++ b/plugins/nf-azure/build.gradle @@ -47,6 +47,9 @@ dependencies { exclude group: 'org.slf4j', module: 'slf4j-api' } + // address security vulnerabilities + runtimeOnly 'io.netty:netty-common:4.1.115.Final' + testImplementation(testFixtures(project(":nextflow"))) testImplementation project(':nextflow') testImplementation "org.apache.groovy:groovy:4.0.24" From fa0e8e0f3d01e27dac60230d304dcec95dd1c2f5 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Mon, 18 Nov 2024 10:39:23 -0600 Subject: [PATCH 13/30] Add `env()` function (#5506) Signed-off-by: Ben Sherman Signed-off-by: Paolo Di Tommaso Co-authored-by: Paolo Di Tommaso --- docs/config.md | 9 +++++++ docs/reference/stdlib.md | 5 ++++ docs/vscode.md | 8 +++++++ .../src/main/groovy/nextflow/Nextflow.groovy | 11 +++++++++ .../groovy/nextflow/config/ConfigBase.groovy | 18 +++++++++++--- .../test/groovy/nextflow/NextflowTest.groovy | 11 +++++++++ .../nextflow/config/ConfigParserTest.groovy | 24 +++++++++++++++---- 7 files changed, 79 insertions(+), 7 deletions(-) diff --git a/docs/config.md b/docs/config.md index ef1ac5a97c..ccdc71933a 100644 --- a/docs/config.md +++ b/docs/config.md @@ -113,6 +113,15 @@ The following constants are globally available in a Nextflow configuration file: `projectDir` : The directory where the main script is located. +## Functions + +The following functions are globally available in a Nextflow configuration file: + +`env( name )` +: :::{versionadded} 24.11.0-edge + ::: +: Get the value of the environment variable with the specified name in the Nextflow launch environment. + (config-params)= ## Parameters diff --git a/docs/reference/stdlib.md b/docs/reference/stdlib.md index 8c09f1ac96..633e51b0eb 100644 --- a/docs/reference/stdlib.md +++ b/docs/reference/stdlib.md @@ -197,6 +197,11 @@ The following functions are available in Nextflow scripts: `branchCriteria( closure )` : Create a branch criteria to use with the {ref}`operator-branch` operator. +`env( name )` +: :::{versionadded} 24.11.0-edge + ::: +: Get the value of the environment variable with the specified name in the Nextflow launch environment. + `error( message = null )` : Throw a script runtime error with an optional error message. diff --git a/docs/vscode.md b/docs/vscode.md index f132368289..c889ddb27c 100644 --- a/docs/vscode.md +++ b/docs/vscode.md @@ -275,6 +275,14 @@ The Nextflow language specification does not support implicit environment variab println "PWD = ${System.getenv('PWD')}" ``` +:::{versionadded} 24.11.0-edge +The `env()` function can be used instead of `System.getenv()`: + +```nextflow +println "PWD = ${env('PWD')}" +``` +::: + ### Restricted syntax The following patterns are still supported but have been restricted, i.e. some syntax variants have been removed. diff --git a/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy b/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy index 3f1257a01c..e7dc15884c 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Nextflow.groovy @@ -57,6 +57,17 @@ class Nextflow { private static final Random random = new Random() + /** + * Get the value of an environment variable from the launch environment. + * + * @param name + * The environment variable name to be referenced + * @return + * The value associate with the specified variable name or {@code null} if the variable does not exist. + */ + static String env(String name) { + return SysEnv.get(name) + } static private fileNamePattern( FilePatternSplitter splitter, Map opts ) { diff --git a/modules/nextflow/src/main/groovy/nextflow/config/ConfigBase.groovy b/modules/nextflow/src/main/groovy/nextflow/config/ConfigBase.groovy index 46a6f182da..a97e23f19a 100644 --- a/modules/nextflow/src/main/groovy/nextflow/config/ConfigBase.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/config/ConfigBase.groovy @@ -16,12 +16,12 @@ package nextflow.config -import ch.artecat.grengine.Grengine -import groovy.transform.Memoized - import java.nio.file.NoSuchFileException import java.nio.file.Path +import ch.artecat.grengine.Grengine +import groovy.transform.Memoized +import nextflow.SysEnv import nextflow.exception.IllegalConfigException import nextflow.file.FileHelper import org.codehaus.groovy.control.CompilerConfiguration @@ -74,6 +74,18 @@ abstract class ConfigBase extends Script { this.configStack = stack } + /** + * Get the value of an environment variable from the launch environment. + * + * @param name + * The environment variable name to be referenced + * @return + * The value associate with the specified variable name or {@code null} if the variable does not exist. + */ + String env(String name) { + return SysEnv.get(name) + } + /** * Implements the config file include */ diff --git a/modules/nextflow/src/test/groovy/nextflow/NextflowTest.groovy b/modules/nextflow/src/test/groovy/nextflow/NextflowTest.groovy index c32a461ecb..f49db65779 100644 --- a/modules/nextflow/src/test/groovy/nextflow/NextflowTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/NextflowTest.groovy @@ -35,6 +35,17 @@ class NextflowTest extends Specification { System.getenv('CI_GROOVY_VERSION') == GroovySystem.getVersion() } + def 'should get an environment variable' () { + given: + SysEnv.push(FOO: 'FOO_VALUE') + + expect: + Nextflow.env('FOO') == 'FOO_VALUE' + + cleanup: + SysEnv.pop() + } + def testFile() { expect: Nextflow.file('file.log').toFile() == new File('file.log').canonicalFile diff --git a/modules/nextflow/src/test/groovy/nextflow/config/ConfigParserTest.groovy b/modules/nextflow/src/test/groovy/nextflow/config/ConfigParserTest.groovy index ebe6f02e47..a128f0fd36 100644 --- a/modules/nextflow/src/test/groovy/nextflow/config/ConfigParserTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/config/ConfigParserTest.groovy @@ -16,8 +16,6 @@ package nextflow.config -import spock.lang.Ignore - import java.nio.file.Files import java.nio.file.NoSuchFileException import java.nio.file.Path @@ -26,11 +24,12 @@ import com.sun.net.httpserver.Headers import com.sun.net.httpserver.HttpExchange import com.sun.net.httpserver.HttpHandler import com.sun.net.httpserver.HttpServer +import nextflow.SysEnv import nextflow.exception.ConfigParseException -import spock.lang.Specification - import nextflow.util.Duration import nextflow.util.MemoryUnit +import spock.lang.Ignore +import spock.lang.Specification /** * @@ -38,6 +37,23 @@ import nextflow.util.MemoryUnit */ class ConfigParserTest extends Specification { + def 'should get an environment variable' () { + given: + SysEnv.push(MAX_CPUS: '1') + + when: + def CONFIG = ''' + process.cpus = env('MAX_CPUS') + ''' + def config = new ConfigParser().parse(CONFIG) + + then: + config.process.cpus == '1' + + cleanup: + SysEnv.pop() + } + def 'should parse plugins id' () { given: def CONFIG = ''' From 99a7f3788bf500d3733d138e8e0030c36f3f022c Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 20 Nov 2024 16:56:48 +0100 Subject: [PATCH 14/30] Fix secondary exceptions from console (#5518) Signed-off-by: Paolo Di Tommaso --- .../src/main/groovy/nextflow/Session.groovy | 4 +++- .../groovy/nextflow/util/LoggerHelper.groovy | 16 ++++++++++++---- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/Session.groovy b/modules/nextflow/src/main/groovy/nextflow/Session.groovy index 9b7ff555e7..f394245259 100644 --- a/modules/nextflow/src/main/groovy/nextflow/Session.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/Session.groovy @@ -71,6 +71,7 @@ import nextflow.util.Barrier import nextflow.util.ConfigHelper import nextflow.util.Duration import nextflow.util.HistoryFile +import nextflow.util.LoggerHelper import nextflow.util.NameGenerator import nextflow.util.SysHelper import nextflow.util.ThreadPoolManager @@ -787,10 +788,11 @@ class Session implements ISession { */ void abort(Throwable cause = null) { if( aborted ) return - if( !(cause instanceof ScriptCompilationException) ) + if( cause !instanceof ScriptCompilationException ) log.debug "Session aborted -- Cause: ${cause?.message ?: cause ?: '-'}" aborted = true error = cause + LoggerHelper.aborted = true try { // log the dataflow network status def status = dumpNetworkStatus() diff --git a/modules/nextflow/src/main/groovy/nextflow/util/LoggerHelper.groovy b/modules/nextflow/src/main/groovy/nextflow/util/LoggerHelper.groovy index 75795e5667..9a23cd77fd 100644 --- a/modules/nextflow/src/main/groovy/nextflow/util/LoggerHelper.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/util/LoggerHelper.groovy @@ -16,10 +16,6 @@ package nextflow.util -import ch.qos.logback.core.encoder.Encoder -import ch.qos.logback.core.spi.FilterAttachable -import ch.qos.logback.core.spi.LifeCycle - import static nextflow.Const.* import java.lang.reflect.Field @@ -28,6 +24,7 @@ import java.nio.file.FileAlreadyExistsException import java.nio.file.NoSuchFileException import java.nio.file.Path import java.util.concurrent.atomic.AtomicBoolean +import java.util.concurrent.atomic.AtomicInteger import java.util.regex.Pattern import ch.qos.logback.classic.Level @@ -42,13 +39,16 @@ import ch.qos.logback.core.ConsoleAppender import ch.qos.logback.core.CoreConstants import ch.qos.logback.core.FileAppender import ch.qos.logback.core.LayoutBase +import ch.qos.logback.core.encoder.Encoder import ch.qos.logback.core.encoder.LayoutWrappingEncoder import ch.qos.logback.core.filter.Filter import ch.qos.logback.core.joran.spi.NoAutoStart import ch.qos.logback.core.rolling.FixedWindowRollingPolicy import ch.qos.logback.core.rolling.RollingFileAppender import ch.qos.logback.core.rolling.TriggeringPolicyBase +import ch.qos.logback.core.spi.FilterAttachable import ch.qos.logback.core.spi.FilterReply +import ch.qos.logback.core.spi.LifeCycle import ch.qos.logback.core.util.FileSize import groovy.transform.CompileStatic import groovy.transform.PackageScope @@ -85,6 +85,10 @@ import org.slf4j.MarkerFactory @CompileStatic class LoggerHelper { + static volatile boolean aborted + + static final AtomicInteger errCount = new AtomicInteger() + static private Logger log = LoggerFactory.getLogger(LoggerHelper) static public Marker STICKY = MarkerFactory.getMarker('sticky') @@ -419,6 +423,10 @@ class LoggerHelper { return FilterReply.NEUTRAL; } + // print to console only the very first error log and ignore the others + if( aborted && event.level==Level.ERROR && errCount.getAndIncrement()>0 ) + return FilterReply.DENY; + def logger = event.getLoggerName() def level = event.getLevel() for( int i=0; i Date: Thu, 21 Nov 2024 00:44:13 -0600 Subject: [PATCH 15/30] Update process snippets to comply with strict syntax (#5526) [ci skip] Signed-off-by: Ben Sherman --- docs/cache-and-resume.md | 4 ++ docs/channel.md | 1 + docs/conda.md | 15 ++-- docs/container.md | 20 +++--- docs/developer/nextflow.ast.md | 4 ++ docs/developer/plugins.md | 2 + docs/dsl1.md | 2 + docs/google.md | 10 ++- docs/metrics.md | 5 ++ docs/overview.md | 2 + docs/process.md | 70 +++++++++++++----- docs/reference/channel.md | 4 ++ docs/reference/process.md | 128 ++++++++++++++++++++------------- docs/secrets.md | 1 + docs/spack.md | 3 + docs/vscode.md | 4 ++ docs/your-first-script.md | 1 + 17 files changed, 193 insertions(+), 83 deletions(-) diff --git a/docs/cache-and-resume.md b/docs/cache-and-resume.md index e0bf78cca1..7184909aa2 100644 --- a/docs/cache-and-resume.md +++ b/docs/cache-and-resume.md @@ -148,6 +148,8 @@ process gather { input: tuple val(id), file(foo) tuple val(id), file(bar) + + script: """ merge_command $foo $bar """ @@ -168,6 +170,8 @@ workflow { process gather { input: tuple val(id), file(foo), file(bar) + + script: """ merge_command $foo $bar """ diff --git a/docs/channel.md b/docs/channel.md index 7c5c6404ac..8eb4c1b576 100644 --- a/docs/channel.md +++ b/docs/channel.md @@ -45,6 +45,7 @@ process foo { output: path 'x.txt' + script: """ echo $x > x.txt """ diff --git a/docs/conda.md b/docs/conda.md index bb7189aa05..6debcec745 100644 --- a/docs/conda.md +++ b/docs/conda.md @@ -49,9 +49,10 @@ Conda package names can specified using the `conda` directive. Multiple package process foo { conda 'bwa samtools multiqc' - ''' + script: + """ your_command --here - ''' + """ } ``` @@ -98,9 +99,10 @@ The path of an environment file can be specified using the `conda` directive: process foo { conda '/some/path/my-env.yaml' - ''' + script: + """ your_command --here - ''' + """ } ``` @@ -128,9 +130,10 @@ If you already have a local Conda environment, you can use it in your workflow s process foo { conda '/path/to/an/existing/env/directory' - ''' + script: + """ your_command --here - ''' + """ } ``` diff --git a/docs/container.md b/docs/container.md index c82583c7d4..8eb8df5c51 100644 --- a/docs/container.md +++ b/docs/container.md @@ -293,17 +293,19 @@ It is possible to specify a different Docker image for each process definition i process foo { container 'image_name_1' - ''' + script: + """ do this - ''' + """ } process bar { container 'image_name_2' - ''' + script: + """ do that - ''' + """ } ``` @@ -380,17 +382,19 @@ It is possible to specify a different container image for each process definitio process foo { container 'image_name_1' - ''' + script: + """ do this - ''' + """ } process bar { container 'image_name_2' - ''' + script: + """ do that - ''' + """ } ``` diff --git a/docs/developer/nextflow.ast.md b/docs/developer/nextflow.ast.md index 0fe680e344..2b3bad9150 100644 --- a/docs/developer/nextflow.ast.md +++ b/docs/developer/nextflow.ast.md @@ -32,6 +32,7 @@ process splitLetters { output: path 'chunk_*' + script: """ printf '${params.str}' | split -b 6 - chunk_ """ @@ -43,6 +44,7 @@ process convertToUpper { output: stdout + script: """ cat $x | tr '[a-z]' '[A-Z]' """ @@ -62,6 +64,7 @@ process( splitLetters( { output: path('chunk_*') + script: """ printf '${params.str}' | split -b 6 - chunk_ """ @@ -73,6 +76,7 @@ process( convertToUpper( { output: stdout + script: """ cat $x | tr '[a-z]' '[A-Z]' """ diff --git a/docs/developer/plugins.md b/docs/developer/plugins.md index e058eec355..5a95ec8f0f 100644 --- a/docs/developer/plugins.md +++ b/docs/developer/plugins.md @@ -141,6 +141,8 @@ You can then use this executor in your pipeline: ```nextflow process foo { executor 'my-executor' + + // ... } ``` diff --git a/docs/dsl1.md b/docs/dsl1.md index e7503fda91..04b82fec76 100644 --- a/docs/dsl1.md +++ b/docs/dsl1.md @@ -29,6 +29,7 @@ process splitLetters { output: file 'chunk_*' into letters + script: """ printf '${params.str}' | split -b 6 - chunk_ """ @@ -41,6 +42,7 @@ process convertToUpper { output: stdout result + script: """ cat $x | tr '[a-z]' '[A-Z]' """ diff --git a/docs/google.md b/docs/google.md index b187b35685..703948c103 100644 --- a/docs/google.md +++ b/docs/google.md @@ -104,6 +104,7 @@ process myTask { cpus 8 memory '40 GB' + script: """ your_command --here """ @@ -112,6 +113,7 @@ process myTask { process anotherTask { machineType 'n1-highmem-8' + script: """ your_command --here """ @@ -130,6 +132,7 @@ process myTask { memory '20 GB' machineType 'n2-*,c2-*,m3-*' + script: """ your_command --here """ @@ -148,6 +151,7 @@ process myTask { memory '20 GB' machineType 'template://my-template' + script: """ your_command --here """ @@ -341,16 +345,18 @@ process custom_resources_task { memory '40 GB' disk '200 GB' + script: """ - + your_command --here """ } process predefined_resources_task { machineType 'n1-highmem-8' + script: """ - + your_command --here """ } ``` diff --git a/docs/metrics.md b/docs/metrics.md index 8ccb43bbaa..eac8ae0e3d 100644 --- a/docs/metrics.md +++ b/docs/metrics.md @@ -21,6 +21,7 @@ In the first example, let's consider the simple use case in which a process perf process CpuUsageEx1 { cpus 2 + script: """ stress -c 1 -t 10 # compute square-root of random numbers during 10s using 1 CPU """ @@ -35,6 +36,7 @@ In the second example, some time will be spent performing pure computation and s process CpuUsageEx2 { cpus 1 + script: """ stress -c 1 -t 10 # compute square-root of random numbers during 10s using 1 CPU stress -c 1 -t 5 # compute square-root of random numbers during 5s using 1 CPU @@ -57,6 +59,7 @@ The third example is similar to the second one except that the pure computation process CpuUsageEx3 { cpus 2 + script: """ stress -c 2 -t 10 # compute square-root of random numbers during 10s using 2 CPUs sleep 10 # use no CPU during 10s @@ -232,6 +235,7 @@ The first and second programs are executed in `foo` and `bar` processes respecti process foo { memory '1.5 GB' + script: """ memory_vmem_1GiB_ram_0Gib """ @@ -240,6 +244,7 @@ process foo { process bar { memory '1.5 GB' + script: """ memory_vmem_1GiB_ram_1Gib """ diff --git a/docs/overview.md b/docs/overview.md index a393c823de..44b1a3cb8a 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -33,6 +33,7 @@ process blastSearch { output: path "top_hits.txt" + script: """ blastp -db $db -query $query -outfmt 6 > blast_result cat blast_result | head -n 10 | cut -f 2 > top_hits.txt @@ -47,6 +48,7 @@ process extractTopHits { output: path "sequences.txt" + script: """ blastdbcmd -db $db -entry_batch $top_hits > sequences.txt """ diff --git a/docs/process.md b/docs/process.md index 6dba761401..1803e4712c 100644 --- a/docs/process.md +++ b/docs/process.md @@ -345,7 +345,10 @@ process basicExample { input: val x - "echo process job $x" + script: + """ + echo process job $x + """ } workflow { @@ -374,7 +377,10 @@ process basicExample { input: val x - "echo process job $x" + script: + """ + echo process job $x + """ } workflow { @@ -394,7 +400,10 @@ process blastThemAll { input: path query_file - "blastp -query ${query_file} -db nr" + script: + """ + blastp -query ${query_file} -db nr + """ } workflow { @@ -428,7 +437,10 @@ process blastThemAll { input: path 'query.fa' - "blastp -query query.fa -db nr" + script: + """ + blastp -query query.fa -db nr + """ } workflow { @@ -450,6 +462,7 @@ process foo { input: path x + script: """ your_command --in $x """ @@ -497,7 +510,10 @@ process blastThemAll { input: path 'seq' - "echo seq*" + script: + """ + echo seq* + """ } workflow { @@ -536,7 +552,10 @@ process blastThemAll { input: path 'seq?.fa' - "cat seq1.fa seq2.fa seq3.fa" + script: + """ + cat seq1.fa seq2.fa seq3.fa + """ } workflow { @@ -559,6 +578,7 @@ process simpleCount { val x path "${x}.fa" + script: """ cat ${x}.fa | grep '>' """ @@ -582,6 +602,7 @@ process printEnv { input: env 'HELLO' + script: ''' echo $HELLO world! ''' @@ -608,6 +629,7 @@ process printAll { input: stdin + script: """ cat - """ @@ -640,6 +662,7 @@ process tupleExample { input: tuple val(x), path('input.txt') + script: """ echo "Processing $x" cat input.txt > copy @@ -665,6 +688,7 @@ process alignSequences { path seq each mode + script: """ t_coffee -in $seq -mode $mode > result """ @@ -689,6 +713,7 @@ process alignSequences { each mode each path(lib) + script: """ t_coffee -in $seq -mode $mode -lib $lib > result """ @@ -828,6 +853,7 @@ process foo { output: val x + script: """ echo $x > file """ @@ -879,6 +905,7 @@ process randomNum { output: path 'result.txt' + script: ''' echo $RANDOM > result.txt ''' @@ -919,9 +946,10 @@ process splitLetters { output: path 'chunk_*' - ''' + script: + """ printf 'Hola' | split -b 1 - chunk_ - ''' + """ } workflow { @@ -966,6 +994,7 @@ process align { output: path "${species}.aln" + script: """ t_coffee -in $seq > ${species}.aln """ @@ -1066,9 +1095,10 @@ process foo { output: path 'result.txt', hidden: true - ''' + script: + """ echo 'another new line' >> result.txt - ''' + """ } ``` @@ -1079,10 +1109,11 @@ process foo { output: tuple path('last_result.txt'), path('result.txt', hidden: true) - ''' + script: + """ echo 'another new line' >> result.txt echo 'another new line' > last_result.txt - ''' + """ } ``` ::: @@ -1099,6 +1130,7 @@ process FOO { path 'hello.txt', emit: hello path 'bye.txt', emit: bye + script: """ echo "hello" > hello.txt echo "bye" > bye.txt @@ -1215,7 +1247,7 @@ process foo { script: """ - < your job here > + your_command --here """ } ``` @@ -1255,7 +1287,9 @@ process foo { maxRetries 3 script: - + """ + your_command --here + """ } ``` @@ -1278,7 +1312,9 @@ process foo { maxRetries 3 script: - + """ + your_command --here + """ } ``` In the above example, the {ref}`process-memory` is set according to previous trace record metrics. In the first attempt, when no trace metrics are available, it is set to one GB. In the subsequent attempts, it doubles the previously allocated memory. See {ref}`trace-report` for more information about trace records. @@ -1294,9 +1330,9 @@ process foo { maxRetries 5 script: - ''' + """ your_command --here - ''' + """ } ``` diff --git a/docs/reference/channel.md b/docs/reference/channel.md index fa1cb16825..13e2bfd414 100644 --- a/docs/reference/channel.md +++ b/docs/reference/channel.md @@ -428,11 +428,15 @@ A process output can be assigned to a topic using the `topic` option on an outpu process foo { output: val('foo'), topic: my_topic + + // ... } process bar { output: val('bar'), topic: my_topic + + // ... } ``` diff --git a/docs/reference/process.md b/docs/reference/process.md index 2aa13cf321..7faa8097e7 100644 --- a/docs/reference/process.md +++ b/docs/reference/process.md @@ -269,6 +269,7 @@ process cpu_task { spack 'blast-plus@2.13.0' arch 'linux/x86_64', target: 'cascadelake' + script: """ blastp -query input_sequence -num_threads ${task.cpus} """ @@ -311,9 +312,10 @@ process cpu_task { executor 'slurm' array 100 - ''' + script: + """ your_command --here - ''' + """ } ``` @@ -361,6 +363,7 @@ For example: process foo { beforeScript 'source /cluster/bin/setup' + script: """ echo bar """ @@ -380,9 +383,7 @@ The cache is enabled by default, but you can disable it for a specific process b ```nextflow process noCacheThis { cache false - - script: - + // ... } ``` @@ -451,9 +452,10 @@ Nextflow automatically sets up an environment for the given package names listed process foo { conda 'bwa=0.7.15' - ''' + script: + """ your_command --here - ''' + """ } ``` @@ -475,8 +477,9 @@ For example: process runThisInDocker { container 'dockerbox:tag' + script: """ - + your_command --here """ } ``` @@ -505,9 +508,10 @@ process runThisWithDocker { output: path 'output.txt' - ''' + script: + """ your_command --data /db > output.txt - ''' + """ } ``` @@ -526,6 +530,7 @@ process big_job { cpus 8 executor 'sge' + script: """ blastp -query input_sequence -num_threads ${task.cpus} """ @@ -549,7 +554,9 @@ process sayHello { debug true script: - "echo Hello" + """ + echo Hello + """ } ``` @@ -570,8 +577,9 @@ process big_job { disk '2 GB' executor 'cirrus' + script: """ - your task script here + your_command --here """ } ``` @@ -631,8 +639,7 @@ For example: process ignoreAnyError { errorStrategy 'ignore' - script: - + // ... } ``` @@ -644,8 +651,7 @@ The `retry` error strategy allows you to re-submit for execution a process retur process retryIfFail { errorStrategy 'retry' - script: - + // ... } ``` @@ -691,8 +697,7 @@ The following example shows how to set the process's executor: process doSomething { executor 'sge' - script: - + // ... } ``` @@ -714,6 +719,7 @@ process mapping { path genome tuple val(sampleId), path(reads) + script: """ STAR --genomeDir $genome --readFilesIn $reads ${task.ext.args ?: ''} """ @@ -727,6 +733,8 @@ The `ext` directive can be set in the process definition: ```nextflow process mapping { ext version: '2.5.3', args: '--foo --bar' + + // ... } ``` @@ -785,9 +793,10 @@ The `label` directive allows the annotation of processes with mnemonic identifie process bigTask { label 'big_mem' - ''' - - ''' + script: + """ + your_command --here + """ } ``` @@ -816,8 +825,9 @@ This directive is optional and if specified overrides the cpus and memory direct process foo { machineType 'n1-highmem-8' + script: """ - + your_command --here """ } ``` @@ -840,10 +850,11 @@ process foo { maxSubmitAwait '10 mins' maxRetries 3 queue "${task.submitAttempt==1 : 'spot-compute' : 'on-demand-compute'}" + script: - ''' - your_job --here - ''' + """ + your_command --here + """ } ``` @@ -862,6 +873,7 @@ process retryIfFail { errorStrategy 'retry' maxErrors 5 + script: """ echo 'do this as that .. ' """ @@ -886,9 +898,10 @@ If you want to execute a process in a sequential manner, set this directive to o process doNotParallelizeIt { maxForks 1 - ''' - - ''' + script: + """ + your_command --here + """ } ``` @@ -903,6 +916,7 @@ process retryIfFail { errorStrategy 'retry' maxRetries 3 + script: """ echo 'do this as that .. ' """ @@ -926,8 +940,9 @@ process big_job { memory '2 GB' executor 'sge' + script: """ - your task script here + your_command --here """ } ``` @@ -960,6 +975,7 @@ In a process definition you can use the `module` directive to load a specific mo process basicExample { module 'ncbi-blast/2.2.27' + script: """ blastp -query """ @@ -972,6 +988,7 @@ You can repeat the `module` directive for each module you need to load. Alternat process manyModules { module 'ncbi-blast/2.2.27:t_coffee/10.0:clustalw/2.1' + script: """ blastp -query """ @@ -990,6 +1007,7 @@ process big_job { penv 'smp' executor 'sge' + script: """ blastp -query input_sequence -num_threads ${task.cpus} """ @@ -1012,9 +1030,10 @@ For example: process your_task { pod env: 'FOO', value: 'bar' - ''' + script: + """ echo $FOO - ''' + """ } ``` @@ -1209,9 +1228,10 @@ process foo { output: path 'chunk_*' - ''' + script: + """ printf 'Hola' | split -b 1 - chunk_ - ''' + """ } ``` @@ -1234,9 +1254,10 @@ process foo { output: path 'chunk_*' - ''' + script: + """ printf 'Hola' | split -b 1 - chunk_ - ''' + """ } ``` @@ -1306,8 +1327,9 @@ process grid_job { queue 'long' executor 'sge' + script: """ - your task script here + your_command --here """ } ``` @@ -1339,9 +1361,10 @@ The `resourceLabels` directive allows you to specify custom name-value pairs tha process my_task { resourceLabels region: 'some-region', user: 'some-username' - ''' - - ''' + script: + """ + your_command --here + """ } ``` @@ -1377,9 +1400,9 @@ process my_task { resourceLimits cpus: 24, memory: 768.GB, time: 72.h script: - ''' + """ your_command --here - ''' + """ } ``` @@ -1417,9 +1440,10 @@ process simpleTask { output: path 'data_out' - ''' - - ''' + script: + """ + your_command --here + """ } ``` @@ -1456,9 +1480,10 @@ The `shell` directive allows you to define a custom shell command for process sc process doMoreThings { shell '/bin/bash', '-euo', 'pipefail' - ''' - your_command_here - ''' + script: + """ + your_command --here + """ } ``` @@ -1480,9 +1505,10 @@ Nextflow automatically sets up an environment for the given package names listed process foo { spack 'bwa@0.7.15' - ''' + script: + """ your_command --here - ''' + """ } ``` @@ -1587,6 +1613,7 @@ process foo { input: val code + script: """ echo $code """ @@ -1617,8 +1644,9 @@ The `time` directive allows you to define how long a process is allowed to run. process big_job { time '1h' + script: """ - your task script here + your_command --here """ } ``` diff --git a/docs/secrets.md b/docs/secrets.md index 393992b18d..b20ce13134 100644 --- a/docs/secrets.md +++ b/docs/secrets.md @@ -56,6 +56,7 @@ process someJob { secret 'MY_ACCESS_KEY' secret 'MY_SECRET_KEY' + script: """ your_command --access \$MY_ACCESS_KEY --secret \$MY_SECRET_KEY """ diff --git a/docs/spack.md b/docs/spack.md index f721fb6971..922947beb0 100644 --- a/docs/spack.md +++ b/docs/spack.md @@ -49,6 +49,7 @@ Spack package names can specified using the `spack` directive. Multiple package process foo { spack 'bwa samtools py-multiqc' + script: ''' your_command --here ''' @@ -93,6 +94,7 @@ The path of an environment file can be specified using the `spack` directive: process foo { spack '/some/path/my-env.yaml' + script: ''' your_command --here ''' @@ -111,6 +113,7 @@ If you already have a local Spack environment, you can use it in your workflow s process foo { spack '/path/to/an/existing/env/directory' + script: ''' your_command --here ''' diff --git a/docs/vscode.md b/docs/vscode.md index c889ddb27c..53f4de50a6 100644 --- a/docs/vscode.md +++ b/docs/vscode.md @@ -342,6 +342,8 @@ process PROC { input: env FOO env 'BAR' + + // ... } ``` @@ -352,6 +354,8 @@ process PROC { input: env 'FOO' env 'BAR' + + // ... } ``` diff --git a/docs/your-first-script.md b/docs/your-first-script.md index e0ded98a7c..b5101d4b9c 100644 --- a/docs/your-first-script.md +++ b/docs/your-first-script.md @@ -55,6 +55,7 @@ process convertToUpper { output: stdout + script: """ rev $x """ From 99d404252a4f8e096a4c88bb130ed25ffd5455d6 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Thu, 21 Nov 2024 07:47:10 +0100 Subject: [PATCH 16/30] Add `batch:TagResource` to AWS docs (#5521) [ci skip] Signed-off-by: Phil Ewels --- docs/aws.md | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/docs/aws.md b/docs/aws.md index 5c703ada9c..ba518ad7ae 100644 --- a/docs/aws.md +++ b/docs/aws.md @@ -46,53 +46,54 @@ Minimal permissions policies to be attached to the AWS account used by Nextflow - To use AWS Batch: ```json - "batch:DescribeJobQueues" "batch:CancelJob" - "batch:SubmitJob" - "batch:ListJobs" "batch:DescribeComputeEnvironments" - "batch:TerminateJob" + "batch:DescribeJobDefinitions" + "batch:DescribeJobQueues" "batch:DescribeJobs" + "batch:ListJobs" "batch:RegisterJobDefinition" - "batch:DescribeJobDefinitions" + "batch:SubmitJob" + "batch:TagResource" + "batch:TerminateJob" ``` - To view [EC2](https://aws.amazon.com/ec2/) instances: ```json - "ecs:DescribeTasks" + "ec2:DescribeInstanceAttribute" "ec2:DescribeInstances" + "ec2:DescribeInstanceStatus" "ec2:DescribeInstanceTypes" - "ec2:DescribeInstanceAttribute" "ecs:DescribeContainerInstances" - "ec2:DescribeInstanceStatus" + "ecs:DescribeTasks" ``` - To pull container images from [ECR](https://aws.amazon.com/ecr/) repositories: ```json - "ecr:GetAuthorizationToken" "ecr:BatchCheckLayerAvailability" - "ecr:GetDownloadUrlForLayer" - "ecr:GetRepositoryPolicy" - "ecr:DescribeRepositories" - "ecr:ListImages" - "ecr:DescribeImages" "ecr:BatchGetImage" + "ecr:DescribeImages" + "ecr:DescribeImageScanFindings" + "ecr:DescribeRepositories" + "ecr:GetAuthorizationToken" + "ecr:GetDownloadUrlForLayer" "ecr:GetLifecyclePolicy" "ecr:GetLifecyclePolicyPreview" + "ecr:GetRepositoryPolicy" + "ecr:ListImages" "ecr:ListTagsForResource" - "ecr:DescribeImageScanFindings" ``` :::{note} If you are running Fargate or Fargate Spot, you may need the following policies in addition to the listed above: ```json + "ec2:DescribeSubnets" "ecs:CreateCluster" "ecs:DeleteCluster" "ecs:DescribeClusters" "ecs:ListClusters" - "ec2:DescribeSubnets" ``` ::: From 8fbf8ffced42fb66e0f9b4c2905c00fdc6548503 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 21 Nov 2024 00:53:04 -0600 Subject: [PATCH 17/30] Document spread dot and safe dot (#5519) [ci skip] Signed-off-by: Ben Sherman --- docs/reference/syntax.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index bb7cd098dd..20360f9af4 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -767,10 +767,12 @@ myList[0] ### Property expression -A property expression consists of an *object expression* and a *property*, separated by a dot: +A property expression consists of an *object expression* and a *property*, separated by a *dot*, *spread dot*, or *safe dot*: ```nextflow -file.text +myFile.text // dot +myFiles*.text // spread dot: myFiles.collect { myFile -> myFile.text } +myFile?.text // safe dot: myFile != null ? myFile.text : null ``` The property must be an identifier or string literal. @@ -783,10 +785,12 @@ A function call consists of a name and argument list: printf('Hello %s!\n', 'World') ``` -A *method call* consists of an *object expression* and a function call separated by a dot: +A *method call* consists of an *object expression* and a function call separated by a *dot*, *spread dot*, or *safe dot*: ```nextflow -myList.size() +myFile.getText() // dot +myFiles*.getText() // spread dot: myFiles.collect { myFile -> myFile.getText() } +myFile?.getText() // safe dot: myFile != null ? myFile.getText() : null ``` The argument list may contain any number of *positional arguments* and *named arguments*: From 6f527551f9d1f8026b3f72d8d89948eb6fb80f80 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Thu, 21 Nov 2024 00:55:09 -0600 Subject: [PATCH 18/30] Deprecate process `shell` block (#5508) Signed-off-by: Ben Sherman Co-authored-by: Paolo Di Tommaso --- docs/process.md | 4 ++++ docs/reference/syntax.md | 23 ++++++------------- docs/vscode.md | 5 ++++ .../nextflow/processor/TaskProcessor.groovy | 2 ++ 4 files changed, 18 insertions(+), 16 deletions(-) diff --git a/docs/process.md b/docs/process.md index 1803e4712c..871ecea093 100644 --- a/docs/process.md +++ b/docs/process.md @@ -212,6 +212,10 @@ Template scripts are generally discouraged due to the caveats described above. T ### Shell +:::{deprecated} 24.11.0-edge +Use the `script` block instead. Consider using the {ref}`VS Code extension `, which provides syntax highlighting and error checking to distinguish Nextflow variables from Bash variables in the process script. +::: + The `shell` block is a string expression that defines the script that is executed by the process. It is an alternative to the {ref}`process-script` definition with one important difference: it uses the exclamation mark `!` character, instead of the usual dollar `$` character, to denote Nextflow variables. This way, it is possible to use both Nextflow and Bash variables in the same script without having to escape the latter, which makes process scripts easier to read and maintain. For example: diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index 20360f9af4..e454756646 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -187,7 +187,7 @@ process sayHello { } ``` -A process may define additional sections for *directives*, *inputs*, *outputs*, *script*, *shell*, *exec*, and *stub*: +A process may define additional sections for *directives*, *inputs*, *outputs*, *script*, *exec*, and *stub*: ```nextflow process greet { @@ -202,7 +202,7 @@ process greet { output: stdout - script: // or shell: or exec: + script: // or exec: """ echo '${greeting}, ${name}!' """ @@ -214,7 +214,7 @@ process greet { } ``` -- A process must define a script, shell, or exec section (see below). All other sections are optional. Directives do not have an explicit section label, but must be defined first. +- A process must define a script or exec section (see below). All other sections are optional. Directives do not have an explicit section label, but must be defined first. - The `script:` section label can be omitted only when there are no other sections in the body. @@ -222,19 +222,9 @@ process greet { Each section may contain one or more statements. For directives, inputs, and outputs, these statements must be [function calls](#function-call). See {ref}`process-reference` for the set of available input qualifiers, output qualifiers, and directives. -The script section can be substituted with a shell or exec section: +The script section can be substituted with an exec section: ```nextflow -process greetShell { - input: - val greeting - - shell: - ''' - echo '!{greeting}, ${USER}!' - ''' -} - process greetExec { input: val greeting @@ -248,7 +238,7 @@ process greetExec { } ``` -The script, shell, and stub sections must return a string in the same manner as a [function](#function). +The script and stub sections must return a string in the same manner as a [function](#function). See {ref}`process-page` for more information on the semantics of each process section. @@ -356,7 +346,7 @@ Variables declared in a function, as well as the parameters of that function, ex Workflow inputs exist for the entire workflow body. Variables declared in the main section exist for the main, emit, and publish sections. Named outputs are not considered variable declarations and therefore do not have any scope. -Process input variables exist for the entire process body. Variables declared in the process script, shell, exec, and stub sections exist only in their respective section, with one exception -- variables declared without the `def` keyword also exist in the output section. +Process input variables exist for the entire process body. Variables declared in the process script, exec, and stub sections exist only in their respective section, with one exception -- variables declared without the `def` keyword also exist in the output section. Variables declared in an if or else branch exist only within that branch: @@ -958,4 +948,5 @@ The following legacy features were excluded from this page because they are depr - The `addParams` and `params` clauses of include declarations. See {ref}`module-params` for more information. - The `when:` section of a process definition. See {ref}`process-when` for more information. +- The `shell:` section of a process definition. See {ref}`process-shell` for more information. - The implicit `it` closure parameter. See {ref}`script-closure` for more information. diff --git a/docs/vscode.md b/docs/vscode.md index 53f4de50a6..eb0d6d40eb 100644 --- a/docs/vscode.md +++ b/docs/vscode.md @@ -1,3 +1,4 @@ +(vscode-page)= # VS Code integration @@ -443,6 +444,10 @@ The `each` process input is deprecated. Use the `combine` or `cross` operator to The process `when` section is deprecated. Use conditional logic, such as an `if` statement or the `filter` operator, to control the process invocation in the calling workflow. +**Process shell section** + +The process `shell` section is deprecated. Use the `script` block instead. The VS Code extension provides syntax highlighting and error checking to help distinguish between Nextflow variables and Bash variables. + ### Configuration syntax See {ref}`config-syntax` for a comprehensive description of the configuration language. diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy index a1d48e5316..4b5fbf2791 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskProcessor.groovy @@ -309,6 +309,8 @@ class TaskProcessor { this.ownerScript = script this.config = config this.taskBody = taskBody + if( taskBody.isShell ) + log.warn "Process ${name} > the `shell` block is deprecated, use `script` instead" this.name = name this.maxForks = config.maxForks && config.maxForks>0 ? config.maxForks as int : 0 this.forksCount = maxForks ? new LongAdder() : null From 25bbb621fc09b60ec9171050d1d96ff4a072f2ac Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Thu, 21 Nov 2024 08:12:45 +0100 Subject: [PATCH 19/30] Fix possible deadlock in dynamic maxRetry resolution [ci fast] (#5474) Signed-off-by: jorgee Signed-off-by: Paolo Di Tommaso Co-authored-by: Paolo Di Tommaso --- .../src/main/groovy/nextflow/processor/TaskConfig.groovy | 3 +-- .../src/main/groovy/nextflow/script/ProcessConfig.groovy | 2 +- .../test/groovy/nextflow/processor/TaskConfigTest.groovy | 8 ++++---- .../test/groovy/nextflow/script/ProcessConfigTest.groovy | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/processor/TaskConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/processor/TaskConfig.groovy index c8ecc872a4..cb78900b33 100644 --- a/modules/nextflow/src/main/groovy/nextflow/processor/TaskConfig.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/processor/TaskConfig.groovy @@ -345,8 +345,7 @@ class TaskConfig extends LazyMap implements Cloneable { int getMaxRetries() { def result = get('maxRetries') - def defResult = getErrorStrategy() == ErrorStrategy.RETRY ? 1 : 0 - result ? result as int : defResult + result ? result as int : 1 } int getMaxErrors() { diff --git a/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy b/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy index df7ce27ec6..089eb781da 100644 --- a/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/script/ProcessConfig.groovy @@ -126,7 +126,7 @@ class ProcessConfig implements Map, Cloneable { debug: false, cacheable: true, shell: BashWrapperBuilder.BASH, - maxRetries: 0, + maxRetries: 1, maxErrors: -1, errorStrategy: ErrorStrategy.TERMINATE ] diff --git a/modules/nextflow/src/test/groovy/nextflow/processor/TaskConfigTest.groovy b/modules/nextflow/src/test/groovy/nextflow/processor/TaskConfigTest.groovy index a51d0fc15c..e2a09b1fdf 100644 --- a/modules/nextflow/src/test/groovy/nextflow/processor/TaskConfigTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/processor/TaskConfigTest.groovy @@ -157,8 +157,8 @@ class TaskConfigTest extends Specification { where: value | expected - null | 0 - 0 | 0 + null | 1 + 0 | 1 1 | 1 '3' | 3 10 | 10 @@ -171,8 +171,8 @@ class TaskConfigTest extends Specification { when: config = new TaskConfig() then: - config.maxRetries == 0 - config.getMaxRetries() == 0 + config.maxRetries == 1 + config.getMaxRetries() == 1 config.getErrorStrategy() == ErrorStrategy.TERMINATE when: diff --git a/modules/nextflow/src/test/groovy/nextflow/script/ProcessConfigTest.groovy b/modules/nextflow/src/test/groovy/nextflow/script/ProcessConfigTest.groovy index bad0784768..59dd323c7f 100644 --- a/modules/nextflow/src/test/groovy/nextflow/script/ProcessConfigTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/script/ProcessConfigTest.groovy @@ -47,7 +47,7 @@ class ProcessConfigTest extends Specification { expect: config.shell == ['/bin/bash','-ue'] config.cacheable - config.maxRetries == 0 + config.maxRetries == 1 config.maxErrors == -1 config.errorStrategy == ErrorStrategy.TERMINATE } From e025a846ecc471c42277ac17e800a4d59c15611b Mon Sep 17 00:00:00 2001 From: Adam Talbot <12817534+adamrtalbot@users.noreply.github.com> Date: Thu, 21 Nov 2024 16:25:31 +0000 Subject: [PATCH 20/30] Update machineType docs to reflect supported executors (#5533) [ci skip] --- docs/reference/process.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/process.md b/docs/reference/process.md index 7faa8097e7..95a9456ef8 100644 --- a/docs/reference/process.md +++ b/docs/reference/process.md @@ -817,7 +817,7 @@ See also: [resourceLabels](#resourcelabels) :::{versionadded} 19.07.0 ::: -The `machineType` can be used to specify a predefined Google Compute Platform [machine type](https://cloud.google.com/compute/docs/machine-types) when running using the {ref}`Google Life Sciences ` executor. +The `machineType` can be used to specify a predefined Google Compute Platform [machine type](https://cloud.google.com/compute/docs/machine-types) when running using the {ref}`Google Batch ` or {ref}`Google Life Sciences ` executor, or when using the autopools feature of the {ref}`Azure Batch executor`. This directive is optional and if specified overrides the cpus and memory directives: From df69b8f612e4bd85352841838dbcc31adb5d5aa5 Mon Sep 17 00:00:00 2001 From: Christopher Hakkaart Date: Fri, 22 Nov 2024 17:23:13 +0100 Subject: [PATCH 21/30] Docs: Move `-params-file` config from CLI to config (#5534) [ci skip] Signed-off-by: Christopher Hakkaart --- docs/cli.md | 30 ++++++++++++++++++++++++++++++ docs/config.md | 2 ++ docs/reference/cli.md | 24 +----------------------- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/docs/cli.md b/docs/cli.md index 016c4f2075..a6d0b24544 100644 --- a/docs/cli.md +++ b/docs/cli.md @@ -243,6 +243,36 @@ $ nextflow run --files "*.fasta" ``` ::: +Parameters specified on the command line can be also specified in a params file using the `-params-file` option. + +```bash +nextflow run main.nf -params-file pipeline_params.yml +``` + +The `-params-file` option loads parameters for your Nextflow pipeline from a JSON or YAML file. Parameters defined in the file are equivalent to specifying them directly on the command line. For example, instead of specifying parameters on the command line: + +```bash +nextflow run main.nf --alpha 1 --beta foo +``` + +Parameters can be represented in YAML format: + +```yaml +alpha: 1 +beta: 'foo' +``` + +Or in JSON format: + +```json +{ + "alpha": 1, + "beta": "foo" +} +``` + +The parameters specified in a params file are merged with the resolved configuration. The values provided via a params file overwrite those of the same name in the Nextflow configuration file, but not those specified on the command line. + ## Managing projects Nextflow seamlessly integrates with popular Git providers, including [BitBucket](http://bitbucket.org/), [GitHub](http://github.com), and [GitLab](http://gitlab.com) for managing Nextflow pipelines as version-controlled Git repositories. diff --git a/docs/config.md b/docs/config.md index ccdc71933a..05fbc1130b 100644 --- a/docs/config.md +++ b/docs/config.md @@ -138,6 +138,8 @@ params { } ``` +See {ref}`cli-params` for information about how to modify these on the command line. + (config-process)= ## Process configuration diff --git a/docs/reference/cli.md b/docs/reference/cli.md index 43ab14c4e2..591d1fa151 100644 --- a/docs/reference/cli.md +++ b/docs/reference/cli.md @@ -1172,29 +1172,7 @@ The `run` command is used to execute a local pipeline script or remote pipeline $ nextflow run main.nf -params-file pipeline_params.yml ``` - For example, the following params file in YAML format: - - ```yaml - alpha: 1 - beta: 'foo' - ``` - - Or in JSON format: - - ```json - { - "alpha": 1, - "beta": "foo" - } - ``` - - Is equivalent to the following command line: - - ```console - $ nextflow run main.nf --alpha 1 --beta foo - ``` - - The parameters specified with this mechanism are merged with the resolved configuration (base configuration and profiles). The values provided via a params file overwrite those of the same name in the Nextflow configuration file. + See {ref}`cli-params` for more information about writing custom parameters files. ### `self-update` From b5c63a9f93b9d6b62e4494f45aedc360439ff668 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Fri, 22 Nov 2024 10:28:42 -0600 Subject: [PATCH 22/30] Improve groupTuple docs with scatter/gather example (#5520) [ci skip] Signed-off-by: Ben Sherman --- docs/snippets/grouptuple-groupkey.nf | 19 ++++++++++--------- docs/snippets/grouptuple-groupkey.out | 9 +++++++-- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/docs/snippets/grouptuple-groupkey.nf b/docs/snippets/grouptuple-groupkey.nf index fa9f08c6fd..5c8fce8e7a 100644 --- a/docs/snippets/grouptuple-groupkey.nf +++ b/docs/snippets/grouptuple-groupkey.nf @@ -1,12 +1,13 @@ -chr_frequency = ["chr1": 2, "chr2": 3] - Channel.of( - ['region1', 'chr1', '/path/to/region1_chr1.vcf'], - ['region2', 'chr1', '/path/to/region2_chr1.vcf'], - ['region1', 'chr2', '/path/to/region1_chr2.vcf'], - ['region2', 'chr2', '/path/to/region2_chr2.vcf'], - ['region3', 'chr2', '/path/to/region3_chr2.vcf'] + ['chr1', ['/path/to/region1_chr1.vcf', '/path/to/region2_chr1.vcf']], + ['chr2', ['/path/to/region1_chr2.vcf', '/path/to/region2_chr2.vcf', '/path/to/region3_chr2.vcf']], ) - .map { region, chr, vcf -> tuple( groupKey(chr, chr_frequency[chr]), vcf ) } + .flatMap { chr, vcfs -> + vcfs.collect { vcf -> + tuple(groupKey(chr, vcfs.size()), vcf) // preserve group size with key + } + } + .view { v -> "scattered: ${v}" } .groupTuple() - .view() \ No newline at end of file + .map { key, vcfs -> tuple(key.getGroupTarget(), vcfs) } // unwrap group key + .view { v -> "gathered: ${v}" } \ No newline at end of file diff --git a/docs/snippets/grouptuple-groupkey.out b/docs/snippets/grouptuple-groupkey.out index e97159c872..be3f00185b 100644 --- a/docs/snippets/grouptuple-groupkey.out +++ b/docs/snippets/grouptuple-groupkey.out @@ -1,2 +1,7 @@ -[chr1, [/path/to/region1_chr1.vcf, /path/to/region2_chr1.vcf]] -[chr2, [/path/to/region1_chr2.vcf, /path/to/region2_chr2.vcf, /path/to/region3_chr2.vcf]] \ No newline at end of file +scattered: [chr1, /path/to/region1_chr1.vcf] +scattered: [chr1, /path/to/region2_chr1.vcf] +scattered: [chr2, /path/to/region1_chr2.vcf] +scattered: [chr2, /path/to/region2_chr2.vcf] +scattered: [chr2, /path/to/region3_chr2.vcf] +gathered: [chr1, [/path/to/region1_chr1.vcf, /path/to/region2_chr1.vcf]] +gathered: [chr2, [/path/to/region1_chr2.vcf, /path/to/region2_chr2.vcf, /path/to/region3_chr2.vcf]] \ No newline at end of file From b65fc66ef02a37ea778cd1307debd39490480f79 Mon Sep 17 00:00:00 2001 From: Phil Ewels Date: Fri, 22 Nov 2024 17:32:52 +0100 Subject: [PATCH 23/30] Docs: Improve Conda docs - PyPI + lock files (#5531) [ci skip] Signed-off-by: Phil Ewels Co-authored-by: Christopher Hakkaart Co-authored-by: Paolo Di Tommaso --- docs/conda.md | 77 +++++++++++++++++++++++++++++++++++++++------------ docs/wave.md | 2 ++ 2 files changed, 62 insertions(+), 17 deletions(-) diff --git a/docs/conda.md b/docs/conda.md index 6debcec745..8c7405c1a4 100644 --- a/docs/conda.md +++ b/docs/conda.md @@ -6,7 +6,7 @@ Nextflow has built-in support for Conda that allows the configuration of workflow dependencies using Conda recipes and environment files. -This allows Nextflow applications to use popular tool collections such as [Bioconda](https://bioconda.github.io) whilst taking advantage of the configuration flexibility provided by Nextflow. +This allows Nextflow applications to use popular tool collections such as [Bioconda](https://bioconda.github.io) and the [Python Package index](https://pypi.org/), whilst taking advantage of the configuration flexibility provided by Nextflow. ## Prerequisites @@ -22,7 +22,7 @@ Dependencies are specified by using the {ref}`process-conda` directive, providin Conda environments are stored on the file system. By default, Nextflow instructs Conda to save the required environments in the pipeline work directory. The same environment may be created/saved multiple times across multiple executions when using different work directories. ::: -You can specify the directory where the Conda environments are stored using the `conda.cacheDir` configuration property. When using a computing cluster, make sure to use a shared file system path accessible from all compute nodes. See the {ref}`configuration page ` for details about Conda configuration. +You can specify the directory where the Conda environments are stored using the `conda.cacheDir` configuration property. When using a computing cluster, make sure to use a shared file system path accessible from all compute nodes. See the {ref}`configuration page ` for details about Conda configuration. :::{warning} The Conda environment feature is not supported by executors that use remote object storage as a work directory. For example, AWS Batch. @@ -62,6 +62,7 @@ The usual Conda package syntax and naming conventions can be used. The version o The name of the channel where a package is located can be specified prefixing the package with the channel name as shown here `bioconda::bwa=0.7.15`. +(conda-env-files)= ### Use Conda environment files Conda environments can also be defined using one or more Conda environment files. This is a file that lists the required packages and channels structured using the YAML format. For example: @@ -77,20 +78,6 @@ dependencies: - bwa=0.7.15 ``` -This other example shows how to leverage a Conda environment file to install Python packages from the [PyPI repository](https://pypi.org/)), through the `pip` package manager (which must also be explicitly listed as a required package): - -```yaml -name: my-env-2 -channels: - - defaults -dependencies: - - pip - - pip: - - numpy - - pandas - - matplotlib -``` - Read the Conda documentation for more details about how to create [environment files](https://conda.io/docs/user-guide/tasks/manage-environments.html#creating-an-environment-file-manually). The path of an environment file can be specified using the `conda` directive: @@ -110,7 +97,26 @@ process foo { The environment file name **must** have a `.yml` or `.yaml` extension or else it won't be properly recognised. ::: -Alternatively, it is possible to provide the dependencies using a plain text file, just listing each package name as a separate line. For example: +(conda-pypi)= +### Python Packages from PyPI + +Conda environment files can also be used to install Python packages from the [PyPI repository](https://pypi.org/), through the `pip` package manager (which must also be explicitly listed as a required package): + +```yaml +name: my-env-2 +channels: + - defaults +dependencies: + - pip + - pip: + - numpy + - pandas + - matplotlib +``` + +### Conda text files + +It is possible to provide dependencies by listing each package name as a separate line in a plain text file. For example: ``` bioconda::star=2.5.4a @@ -122,6 +128,43 @@ bioconda::multiqc=1.4 Like before, the extension matters. Make sure the dependencies file has a `.txt` extension. ::: +### Conda lock files + +The final way to provide packages to Conda is with [Conda lock files](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#identical-conda-envs). + +These are generated from existing Conda environments using the following command: + +```bash +conda list --explicit > spec-file.txt +``` + +or if using Mamba / Micromamba: + +```bash +micromamba env export --explicit > spec-file.txt +``` + +Conda lock files can also be downloaded from [Wave](https://seqera.io/wave/) build pages. + +These files include every package and their dependencies. As such, no Conda environment resolution step is needed. This is faster and more reproducible. + +The files contain package URLs and an optional md5hash for each download to confirm identity: + +``` +# micromamba env export --explicit +# This file may be used to create an environment using: +# $ conda create --name --file +# platform: linux-64 +@EXPLICIT +https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81 +https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h77fa898_7.conda#abf3fec87c2563697defa759dec3d639 +https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d +https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978 +# .. and so on +``` + +To use with Nextflow, simply set the `conda` directive to the lock file path. + ### Use existing Conda environments If you already have a local Conda environment, you can use it in your workflow specifying the installation directory of such environment by using the `conda` directive: diff --git a/docs/wave.md b/docs/wave.md index 668de765f0..c27c034f6b 100644 --- a/docs/wave.md +++ b/docs/wave.md @@ -90,6 +90,8 @@ conda.channels = 'conda-forge,bioconda' ``` ::: +Packages from the [Python Package Index](https://pypi.org/) can also be added to a Conda `environment.yml` file. See {ref}`Conda and PyPI ` for more information. + (wave-singularity)= ### Build Singularity native images From 508af3b10540b797965701d87adc54a4ba3a390b Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Tue, 26 Nov 2024 12:20:58 +0100 Subject: [PATCH 24/30] Improve installer docs (#5541) [ci skip] Signed-off-by: Paolo Di Tommaso Co-authored-by: Christopher Hakkaart --- docs/install.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/docs/install.md b/docs/install.md index cb1f8bcad8..246ce57fcb 100644 --- a/docs/install.md +++ b/docs/install.md @@ -126,9 +126,18 @@ libraries. This distribution is mainly useful for offline environments. Note however the support for cloud services e.g. AWS, Seqera Platform, Wave, etc. still require the download of the corresponding Nextflow plugins. -The installer for the `dist` distribution can be found on the [GitHub releases page](https://github.com/nextflow-io/nextflow/releases), under the "Assets" section for a specific release. The installation procedure is the same as for the standard distribution, only using this URL instead of `https://get.nextflow.io`: +To use the standalone distribution: -```bash -export NXF_VER=24.10.0 -curl -s https://github.com/nextflow-io/nextflow/releases/download/v$NXF_VER/nextflow-$NXF_VER-dist -``` +1. Download it from the [GitHub releases page](https://github.com/nextflow-io/nextflow/releases), under the "Assets" section for a specific + +2. Grant execution permissions to the downloaded file e.g. + + ``` + chmod -x nextflow-24.10.1-dist + ``` + +3. Then you can use it as a drop-in replacement for `nextflow` command. For example: + + ``` + ./nextflow-24.10.1-dist run hello + ``` From b5e31bb05cc923199eb4b13a39e58754e0e2c945 Mon Sep 17 00:00:00 2001 From: Christopher Hakkaart Date: Tue, 26 Nov 2024 16:32:26 +0100 Subject: [PATCH 25/30] Add singularity.libraryDir to config page (#5498) [ci skip] Signed-off-by: Christopher Hakkaart --- docs/container.md | 20 ++++++++++---------- docs/reference/config.md | 6 ++++++ 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/docs/container.md b/docs/container.md index 8eb8df5c51..ba8a563828 100644 --- a/docs/container.md +++ b/docs/container.md @@ -94,7 +94,7 @@ Read the {ref}`Process scope ` section to learn more about proce Nextflow is able to transparently pull remote container images stored in any Docker compatible registry. -By default when a container name is specified, Nextflow checks if an image file with that name exists in the local file system. If that image file exists, it's used to execute the container. If a matching file does not exist, Nextflow automatically tries to pull an image with the specified name from the container registry. +By default, when a container name is specified, Nextflow checks if an image file with that name exists in the local file system. If that image file exists, it's used to execute the container. If a matching file does not exist, Nextflow automatically tries to pull an image with the specified name from the container registry. If you want Nextflow to check only for local file images, prefix the container name with the `file://` pseudo-protocol. For example: @@ -107,7 +107,7 @@ apptainer.enabled = true Use three `/` slashes to specify an **absolute** file path, otherwise the path will be interpreted as relative to the workflow launch directory. ::: -To pull images from Apptainer Hub or a third party Docker registry, simply prefix the image name with the `shub://`, `docker://` or `docker-daemon://` pseudo-protocol as required by Apptainer. For example: +To pull images from Apptainer Hub or a third party Docker registry, prefix the image name with the `shub://`, `docker://` or `docker-daemon://` pseudo-protocol as required by Apptainer. For example: ```groovy process.container = 'docker://quay.io/biocontainers/multiqc:1.3--py35_2' @@ -120,11 +120,11 @@ You do not need to specify `docker://` to pull from a Docker repository. Nextflo This feature requires the `apptainer` tool to be installed where the workflow execution is launched (as opposed to the compute nodes). ::: -Nextflow caches those images in the `apptainer` directory in the pipeline work directory by default. However it is suggested to provide a centralised cache directory by using either the `NXF_APPTAINER_CACHEDIR` environment variable or the `apptainer.cacheDir` setting in the Nextflow config file. +Nextflow caches Apptainer images in the `apptainer` directory, in the pipeline work directory, by default. However, it is recommended to provide a centralized cache directory using the `NXF_APPTAINER_CACHEDIR` environment variable or the `apptainer.cacheDir` setting in the Nextflow config file. -:::{versionadded} 21.09.0-edge -When looking for a Apptainer image file, Nextflow first checks the *library* directory, and if the image file is not found, the *cache* directory is used as usual. The library directory can be defined either using the `NXF_APPTAINER_LIBRARYDIR` environment variable or the `apptainer.libraryDir` configuration setting (the latter overrides the former). -::: +Nextflow uses the library directory to determine the location of Apptainer containers. The library directory can be defined using the `apptainer.libraryDir` configuration setting or the `NXF_APPTAINER_LIBRARYDIR` environment variable. The configuration file option overrides the environment variable if both are set. + +Nextflow first checks the library directory when searching for the image. If the image is not found it then checks the cache directory. The main difference between the library directory and the cache directory is that the first is assumed to be a read-only container repository, while the latter is expected to be writable path where container images can added for caching purposes. :::{warning} When using a compute cluster, the Apptainer cache directory must reside in a shared filesystem accessible to all compute nodes. @@ -653,11 +653,11 @@ process.container = 'library://library/default/alpine:3.8' The `library://` pseudo-protocol allows you to import Singularity images from a local Docker installation instead of downloading them from a Docker registry. This feature requires the `singularity` tool to be installed where the workflow execution is launched (as opposed to the compute nodes). -Nextflow caches the images in `${NXF_WORK}/singularity` by default. However, it is recommended to define a centralised cache directory using either the `NXF_SINGULARITY_CACHEDIR` environment variable or the `singularity.cacheDir` setting in the Nextflow config file. +Nextflow caches Singularity images in the `singularity` directory, in the pipeline work directory, by default. However, it is recommended to provide a centralized cache directory using the `NXF_SINGULARITY_CACHEDIR` environment variable or the `singularity.cacheDir` setting in the Nextflow config file. -:::{versionadded} 21.09.0-edge -When looking for a Singularity image file, Nextflow first checks the *library* directory, and if the image file is not found, the *cache* directory is used as usual. The library directory can be defined either using the `NXF_SINGULARITY_LIBRARYDIR` environment variable or the `singularity.libraryDir` configuration setting (the latter overrides the former). -::: +Nextflow uses the library directory to determine the location of Singularity images. The library directory can be defined using the `singularity.libraryDir` configuration setting or the `NXF_SINGULARITY_LIBRARYDIR` environment variable. The configuration file option overrides the environment variable if both are set. + +Nextflow first checks the library directory when searching for the image. If the image is not found it then checks the cache directory. The main difference between the library directory and the cache directory is that the first is assumed to be a read-only container repository, while the latter is expected to be writable path where container images can added for caching purposes. :::{warning} When using a compute cluster, the Singularity cache directory must reside in a shared filesystem accessible to all compute nodes. diff --git a/docs/reference/config.md b/docs/reference/config.md index 464ca480ba..0efb1ce6ae 100644 --- a/docs/reference/config.md +++ b/docs/reference/config.md @@ -52,6 +52,9 @@ The following settings are available: `apptainer.envWhitelist` : Comma separated list of environment variable names to be included in the container environment. +`apptainer.libraryDir` +: Directory where remote Apptainer images are retrieved. When using a computing cluster it must be a shared folder accessible to all compute nodes. + `apptainer.noHttps` : Pull the Apptainer image with http protocol (default: `false`). @@ -1375,6 +1378,9 @@ The following settings are available: `singularity.envWhitelist` : Comma separated list of environment variable names to be included in the container environment. +`singularity.libraryDir` +: Directory where remote Singularity images are retrieved. When using a computing cluster it must be a shared folder accessible to all compute nodes. + `singularity.noHttps` : Pull the Singularity image with http protocol (default: `false`). From 9248c04d86b30d05f0d34ecc79fda080ecd39fd3 Mon Sep 17 00:00:00 2001 From: Jorge Ejarque Date: Tue, 26 Nov 2024 16:45:23 +0100 Subject: [PATCH 26/30] Fix overlapping conda lock file (#5540) Signed-off-by: jorgee --- .../groovy/nextflow/conda/CondaCache.groovy | 17 +++++++++-------- .../groovy/nextflow/conda/CondaCacheTest.groovy | 15 +++++---------- 2 files changed, 14 insertions(+), 18 deletions(-) diff --git a/modules/nextflow/src/main/groovy/nextflow/conda/CondaCache.groovy b/modules/nextflow/src/main/groovy/nextflow/conda/CondaCache.groovy index 17c605f19b..15234c6039 100644 --- a/modules/nextflow/src/main/groovy/nextflow/conda/CondaCache.groovy +++ b/modules/nextflow/src/main/groovy/nextflow/conda/CondaCache.groovy @@ -251,8 +251,8 @@ class CondaCache { * @return the conda environment prefix {@link Path} */ @PackageScope - Path createLocalCondaEnv(String condaEnv) { - final prefixPath = condaPrefixPath(condaEnv) + Path createLocalCondaEnv(String condaEnv, Path prefixPath) { + if( prefixPath.isDirectory() ) { log.debug "${binaryName} found local env for environment=$condaEnv; path=$prefixPath" return prefixPath @@ -360,17 +360,18 @@ class CondaCache { */ @PackageScope DataflowVariable getLazyImagePath(String condaEnv) { - - if( condaEnv in condaPrefixPaths ) { + final prefixPath = condaPrefixPath(condaEnv) + final condaEnvPath = prefixPath.toString() + if( condaEnvPath in condaPrefixPaths ) { log.trace "${binaryName} found local environment `$condaEnv`" - return condaPrefixPaths[condaEnv] + return condaPrefixPaths[condaEnvPath] } synchronized (condaPrefixPaths) { - def result = condaPrefixPaths[condaEnv] + def result = condaPrefixPaths[condaEnvPath] if( result == null ) { - result = new LazyDataflowVariable({ createLocalCondaEnv(condaEnv) }) - condaPrefixPaths[condaEnv] = result + result = new LazyDataflowVariable({ createLocalCondaEnv(condaEnv, prefixPath) }) + condaPrefixPaths[condaEnvPath] = result } else { log.trace "${binaryName} found local cache for environment `$condaEnv` (2)" diff --git a/modules/nextflow/src/test/groovy/nextflow/conda/CondaCacheTest.groovy b/modules/nextflow/src/test/groovy/nextflow/conda/CondaCacheTest.groovy index 9a8baf952c..f6b7fac271 100644 --- a/modules/nextflow/src/test/groovy/nextflow/conda/CondaCacheTest.groovy +++ b/modules/nextflow/src/test/groovy/nextflow/conda/CondaCacheTest.groovy @@ -197,9 +197,8 @@ class CondaCacheTest extends Specification { when: // the prefix directory exists ==> no conda command is executed - def result = cache.createLocalCondaEnv(ENV) + def result = cache.createLocalCondaEnv(ENV, PREFIX) then: - 1 * cache.condaPrefixPath(ENV) >> PREFIX 0 * cache.isYamlFilePath(ENV) 0 * cache.runCommand(_) result == PREFIX @@ -224,9 +223,8 @@ class CondaCacheTest extends Specification { when: // the prefix directory exists ==> no mamba command is executed - def result = cache.createLocalCondaEnv(ENV) + def result = cache.createLocalCondaEnv(ENV, PREFIX) then: - 1 * cache.condaPrefixPath(ENV) >> PREFIX 0 * cache.isYamlFilePath(ENV) 0 * cache.runCommand(_) result == PREFIX @@ -251,9 +249,8 @@ class CondaCacheTest extends Specification { when: // the prefix directory exists ==> no mamba command is executed - def result = cache.createLocalCondaEnv(ENV) + def result = cache.createLocalCondaEnv(ENV, PREFIX) then: - 1 * cache.condaPrefixPath(ENV) >> PREFIX 0 * cache.isYamlFilePath(ENV) 0 * cache.runCommand(_) result == PREFIX @@ -278,9 +275,8 @@ class CondaCacheTest extends Specification { when: // the prefix directory exists ==> no mamba command is executed - def result = cache.createLocalCondaEnv(ENV) + def result = cache.createLocalCondaEnv(ENV, PREFIX) then: - 1 * cache.condaPrefixPath(ENV) >> PREFIX 0 * cache.isYamlFilePath(ENV) 0 * cache.runCommand(_) result == PREFIX @@ -304,9 +300,8 @@ class CondaCacheTest extends Specification { when: // the prefix directory exists ==> no mamba command is executed - def result = cache.createLocalCondaEnv(ENV) + def result = cache.createLocalCondaEnv(ENV, PREFIX) then: - 1 * cache.condaPrefixPath(ENV) >> PREFIX 0 * cache.isYamlFilePath(ENV) 0 * cache.runCommand(_) result == PREFIX From 308d5a5e893a7b3691d58025cbf673d62e1d7481 Mon Sep 17 00:00:00 2001 From: Ben Sherman Date: Tue, 26 Nov 2024 15:25:07 -0600 Subject: [PATCH 27/30] Update syntax docs (#5542) Signed-off-by: Ben Sherman --- docs/reference/syntax.md | 10 ----- docs/vscode.md | 90 +++++++++++++++++++++++++--------------- 2 files changed, 57 insertions(+), 43 deletions(-) diff --git a/docs/reference/syntax.md b/docs/reference/syntax.md index e454756646..06d7824527 100644 --- a/docs/reference/syntax.md +++ b/docs/reference/syntax.md @@ -622,16 +622,6 @@ A *slashy string* is enclosed by slashes instead of quotes: /no escape!/ ``` -Slashy strings can also span multiple lines: - -```nextflow -/ -Patterns in the code, -Symbols dance to match and find, -Logic unconfined. -/ -``` - :::{note} A slashy string cannot be empty because it would become a line comment. ::: diff --git a/docs/vscode.md b/docs/vscode.md index eb0d6d40eb..11c847ae29 100644 --- a/docs/vscode.md +++ b/docs/vscode.md @@ -230,38 +230,6 @@ if (aligner == 'bowtie2') { } ``` -**Slashy dollar strings** - -Groovy supports a wide variety of strings, including multi-line strings, dynamic strings, slashy strings, multi-line dynamic slashy strings, and more. - -The Nextflow language specification supports single- and double-quoted strings, multi-line strings, and slashy strings. Dynamic slashy strings are not supported: - -```groovy -def logo = /--cl-config 'custom_logo: "${multiqc_logo}"'/ -``` - -Use a double-quoted string instead: - -```nextflow -def logo = "--cl-config 'custom_logo: \"${multiqc_logo}\"'" -``` - -Slashy dollar strings are not supported: - -```groovy -$/ -echo "Hello world!" -/$ -``` - -Use a multi-line string instead: - -```nextflow -""" -echo "Hello world!" -""" -``` - **Implicit environment variables** In Nextflow DSL1 and DSL2, you can reference environment variables directly in strings: @@ -334,6 +302,62 @@ To ease the migration of existing scripts, the language server only reports warn Type annotations and static type checking will be addressed in a future version of the Nextflow language specification. ::: +**Strings** + +Groovy supports a wide variety of strings, including multi-line strings, dynamic strings, slashy strings, multi-line dynamic slashy strings, and more. + +The Nextflow language specification supports single- and double-quoted strings, multi-line strings, and slashy strings. + +Slashy strings cannot be interpolated: + +```nextflow +def id = 'SRA001' +assert 'SRA001.fastq' ~= /${id}\.f(?:ast)?q/ +``` + +Use a double-quoted string instead: + +```nextflow +def id = 'SRA001' +assert 'SRA001.fastq' ~= "${id}\\.f(?:ast)?q" +``` + +Slashy strings cannot span multiple lines: + +```groovy +/ +Patterns in the code, +Symbols dance to match and find, +Logic unconfined. +/ +``` + +Use a multi-line string instead: + +```nextflow +""" +Patterns in the code, +Symbols dance to match and find, +Logic unconfined. +""" +``` + +Dollar slashy strings are not supported: + +```groovy +$/ +echo "Hello world!" +/$ +``` + +Use a multi-line string instead: + +```nextflow +""" +echo "Hello world!" +""" +``` + **Process env inputs/outputs** In Nextflow DSL1 and DSL2, the name of a process `env` input/output can be specified with or without quotes: @@ -481,7 +505,7 @@ includeConfig ({ return 'large.config' else return '/dev/null' -})() +}()) ``` The include source is a closure that is immediately invoked. It includes a different config file based on the return value of the closure. Including `/dev/null` is equivalent to including nothing. From 12fc1d60027d9d2db95170b3780b25e2bfc12a7e Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 27 Nov 2024 15:55:57 +0100 Subject: [PATCH 28/30] Prevent NPE with null AWS Batch response Signed-off-by: Paolo Di Tommaso --- .../main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy index 4245821a7a..29a2261e25 100644 --- a/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy +++ b/plugins/nf-amazon/src/main/nextflow/cloud/aws/batch/AwsBatchTaskHandler.groovy @@ -198,7 +198,7 @@ class AwsBatchTaskHandler extends TaskHandler implements BatchHandler Date: Wed, 27 Nov 2024 22:12:10 +0100 Subject: [PATCH 29/30] Update wave deps Signed-off-by: Paolo Di Tommaso --- plugins/nf-wave/build.gradle | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/nf-wave/build.gradle b/plugins/nf-wave/build.gradle index bd5da121f0..b6a8367f0d 100644 --- a/plugins/nf-wave/build.gradle +++ b/plugins/nf-wave/build.gradle @@ -36,8 +36,8 @@ dependencies { api 'org.apache.commons:commons-lang3:3.12.0' api 'com.google.code.gson:gson:2.10.1' api 'org.yaml:snakeyaml:2.2' - api 'io.seqera:wave-api:0.13.3' - api 'io.seqera:wave-utils:0.14.1' + api 'io.seqera:wave-api:0.14.0' + api 'io.seqera:wave-utils:0.15.0' testImplementation(testFixtures(project(":nextflow"))) testImplementation "org.apache.groovy:groovy:4.0.24" From ee252173bf823d4bbef5522bd726ccd4df595a7d Mon Sep 17 00:00:00 2001 From: Paolo Di Tommaso Date: Wed, 27 Nov 2024 22:13:09 +0100 Subject: [PATCH 30/30] Fix missing wave response (#5547) [ci fast] Signed-off-by: Paolo Di Tommaso --- .../io/seqera/wave/plugin/WaveClient.groovy | 17 ++++++++++++----- .../io/seqera/wave/plugin/WaveClientTest.groovy | 17 ++++++++--------- 2 files changed, 20 insertions(+), 14 deletions(-) diff --git a/plugins/nf-wave/src/main/io/seqera/wave/plugin/WaveClient.groovy b/plugins/nf-wave/src/main/io/seqera/wave/plugin/WaveClient.groovy index 6c8bb44e8e..c3640ce42d 100644 --- a/plugins/nf-wave/src/main/io/seqera/wave/plugin/WaveClient.groovy +++ b/plugins/nf-wave/src/main/io/seqera/wave/plugin/WaveClient.groovy @@ -27,6 +27,7 @@ import java.time.Duration import java.time.Instant import java.time.OffsetDateTime import java.time.temporal.ChronoUnit +import java.util.concurrent.ConcurrentHashMap import java.util.concurrent.Executors import java.util.concurrent.TimeUnit import java.util.function.Predicate @@ -104,7 +105,9 @@ class WaveClient { final private String endpoint - private Cache cache + private Cache cache + + private Map responses = new ConcurrentHashMap<>() private Session session @@ -135,7 +138,7 @@ class WaveClient { this.packer = new Packer().withPreserveTimestamp(config.preserveFileTimestamp()) this.waveRegistry = new URI(endpoint).getAuthority() // create cache - cache = CacheBuilder + this.cache = CacheBuilder .newBuilder() .expireAfterWrite(config.tokensCacheMaxDuration().toSeconds(), TimeUnit.SECONDS) .build() @@ -572,8 +575,12 @@ class WaveClient { final key = assets.fingerprint() log.trace "Wave fingerprint: $key; assets: $assets" // get from cache or submit a new request - final handle = cache.get(key, () -> new Handle(sendRequest(assets),Instant.now()) ) - return new ContainerInfo(assets.containerImage, handle.response.targetImage, key) + final resp = cache.get(key, () -> { + final ret = sendRequest(assets); + responses.put(key,new Handle(ret,Instant.now())); + return ret + }) + return new ContainerInfo(assets.containerImage, resp.targetImage, key) } catch ( UncheckedExecutionException e ) { throw e.cause @@ -633,7 +640,7 @@ class WaveClient { } boolean isContainerReady(String key) { - final handle = cache.getIfPresent(key) + final handle = responses.get(key) if( !handle ) throw new IllegalStateException("Unable to find any container with key: $key") final resp = handle.response diff --git a/plugins/nf-wave/src/test/io/seqera/wave/plugin/WaveClientTest.groovy b/plugins/nf-wave/src/test/io/seqera/wave/plugin/WaveClientTest.groovy index bbd0a397b6..1f54b0a3d7 100644 --- a/plugins/nf-wave/src/test/io/seqera/wave/plugin/WaveClientTest.groovy +++ b/plugins/nf-wave/src/test/io/seqera/wave/plugin/WaveClientTest.groovy @@ -27,7 +27,6 @@ import java.nio.file.attribute.FileTime import java.time.Duration import java.time.Instant -import com.google.common.cache.Cache import com.sun.net.httpserver.HttpExchange import com.sun.net.httpserver.HttpHandler import com.sun.net.httpserver.HttpServer @@ -1303,18 +1302,18 @@ class WaveClientTest extends Specification { def 'should validate isContainerReady' () { given: def sess = Mock(Session) {getConfig() >> [wave: [build:[maxDuration: '500ms']]] } - def cache = Mock(Cache) + def cache = Mock(Map) and: def resp = Mock(SubmitContainerTokenResponse) def handle = new WaveClient.Handle(resp,Instant.now()) - def wave = Spy(new WaveClient(session:sess, cache: cache)) + def wave = Spy(new WaveClient(session:sess, responses: cache)) boolean ready // container succeeded when: ready = wave.isContainerReady('xyz') then: - cache.getIfPresent('xyz') >> handle + cache.get('xyz') >> handle and: resp.requestId >> '12345' resp.succeeded >> true @@ -1328,7 +1327,7 @@ class WaveClientTest extends Specification { when: ready = wave.isContainerReady('xyz') then: - cache.getIfPresent('xyz') >> handle + cache.get('xyz') >> handle and: resp.requestId >> '12345' resp.succeeded >> null @@ -1342,7 +1341,7 @@ class WaveClientTest extends Specification { when: ready = wave.isContainerReady('xyz') then: - cache.getIfPresent('xyz') >> handle + cache.get('xyz') >> handle and: resp.requestId >> '12345' resp.succeeded >> false @@ -1357,7 +1356,7 @@ class WaveClientTest extends Specification { when: ready = wave.isContainerReady('xyz') then: - cache.getIfPresent('xyz') >> handle + cache.get('xyz') >> handle and: resp.buildId >> 'bd-5678' resp.cached >> false @@ -1371,7 +1370,7 @@ class WaveClientTest extends Specification { when: ready = wave.isContainerReady('xyz') then: - cache.getIfPresent('xyz') >> handle + cache.get('xyz') >> handle and: resp.requestId >> null resp.buildId >> 'bd-5678' @@ -1386,7 +1385,7 @@ class WaveClientTest extends Specification { when: ready = wave.isContainerReady('xyz') then: - cache.getIfPresent('xyz') >> handle + cache.get('xyz') >> handle and: resp.requestId >> null resp.buildId >> 'bd-5678'