Merge branch 'master' into azure_batch_no_container_spec

nextflow-io · Nov 28, 2024 · 7838c1e · 7838c1e
2 parents 12965bf + ee25217
commit 7838c1e
Show file tree

Hide file tree

Showing 59 changed files with 727 additions and 333 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -90,6 +90,7 @@ jobs:
       - name: Test
         if: steps.changed-files.outputs.any_changed == 'true'
         run: |
+            env | sort
             # configure test env
             if [[ "$GOOGLE_SECRET" ]]; then
             echo $GOOGLE_SECRET | base64 -d > $PWD/google_credentials.json
@@ -158,6 +159,11 @@ jobs:
       - name: Run tests
         run: |
           env | sort
+          # configure test env
+          if [[ "$GOOGLE_SECRET" ]]; then
+          echo $GOOGLE_SECRET | base64 -d > $PWD/google_credentials.json
+          export GOOGLE_APPLICATION_CREDENTIALS=$PWD/google_credentials.json
+          fi
           cat $HOME/.nextflow/scm
           make clean assemble install
           bash test-ci.sh

diff --git a/docs/aws.md b/docs/aws.md
@@ -46,53 +46,54 @@ Minimal permissions policies to be attached to the AWS account used by Nextflow
 - To use AWS Batch:
 
   ```json
-  "batch:DescribeJobQueues"
   "batch:CancelJob"
-  "batch:SubmitJob"
-  "batch:ListJobs"
   "batch:DescribeComputeEnvironments"
-  "batch:TerminateJob"
+  "batch:DescribeJobDefinitions"
+  "batch:DescribeJobQueues"
   "batch:DescribeJobs"
+  "batch:ListJobs"
   "batch:RegisterJobDefinition"
-  "batch:DescribeJobDefinitions"
+  "batch:SubmitJob"
+  "batch:TagResource"
+  "batch:TerminateJob"
   ```
 
 - To view [EC2](https://aws.amazon.com/ec2/) instances:
 
   ```json
-  "ecs:DescribeTasks"
+  "ec2:DescribeInstanceAttribute"
   "ec2:DescribeInstances"
+  "ec2:DescribeInstanceStatus"
   "ec2:DescribeInstanceTypes"
-  "ec2:DescribeInstanceAttribute"
   "ecs:DescribeContainerInstances"
-  "ec2:DescribeInstanceStatus"
+  "ecs:DescribeTasks"
   ```
 
 - To pull container images from [ECR](https://aws.amazon.com/ecr/) repositories:
 
   ```json
-  "ecr:GetAuthorizationToken"
   "ecr:BatchCheckLayerAvailability"
-  "ecr:GetDownloadUrlForLayer"
-  "ecr:GetRepositoryPolicy"
-  "ecr:DescribeRepositories"
-  "ecr:ListImages"
-  "ecr:DescribeImages"
   "ecr:BatchGetImage"
+  "ecr:DescribeImages"
+  "ecr:DescribeImageScanFindings"
+  "ecr:DescribeRepositories"
+  "ecr:GetAuthorizationToken"
+  "ecr:GetDownloadUrlForLayer"
   "ecr:GetLifecyclePolicy"
   "ecr:GetLifecyclePolicyPreview"
+  "ecr:GetRepositoryPolicy"
+  "ecr:ListImages"
   "ecr:ListTagsForResource"
-  "ecr:DescribeImageScanFindings"
   ```
 
 :::{note}
 If you are running Fargate or Fargate Spot, you may need the following policies in addition to the listed above:
   ```json
+  "ec2:DescribeSubnets"
   "ecs:CreateCluster"
   "ecs:DeleteCluster"
   "ecs:DescribeClusters"
   "ecs:ListClusters"
-  "ec2:DescribeSubnets"
   ```
 :::
 

diff --git a/docs/cache-and-resume.md b/docs/cache-and-resume.md
@@ -148,6 +148,8 @@ process gather {
     input:
     tuple val(id), file(foo)
     tuple val(id), file(bar)
+
+    script:
     """
     merge_command $foo $bar
     """
@@ -168,6 +170,8 @@ workflow {
 process gather {
     input:
     tuple val(id), file(foo), file(bar)
+
+    script:
     """
     merge_command $foo $bar
     """

diff --git a/docs/channel.md b/docs/channel.md
@@ -45,6 +45,7 @@ process foo {
   output:
   path 'x.txt'
 
+  script:
   """
   echo $x > x.txt
   """

diff --git a/docs/cli.md b/docs/cli.md
@@ -243,6 +243,36 @@ $ nextflow run <pipeline> --files "*.fasta"
 ```
 :::
 
+Parameters specified on the command line can be also specified in a params file using the `-params-file` option.
+
+```bash
+nextflow run main.nf -params-file pipeline_params.yml
+```
+
+The `-params-file` option loads parameters for your Nextflow pipeline from a JSON or YAML file. Parameters defined in the file are equivalent to specifying them directly on the command line. For example, instead of specifying parameters on the command line:
+
+```bash
+nextflow run main.nf --alpha 1 --beta foo
+```
+
+Parameters can be represented in YAML format:
+
+```yaml
+alpha: 1
+beta: 'foo'
+```
+
+Or in JSON format:
+
+```json
+{
+  "alpha": 1,
+  "beta": "foo"
+}
+```
+
+The parameters specified in a params file are merged with the resolved configuration. The values provided via a params file overwrite those of the same name in the Nextflow configuration file, but not those specified on the command line.
+
 ## Managing projects
 
 Nextflow seamlessly integrates with popular Git providers, including [BitBucket](http://bitbucket.org/), [GitHub](http://github.com), and [GitLab](http://gitlab.com) for managing Nextflow pipelines as version-controlled Git repositories.

diff --git a/docs/conda.md b/docs/conda.md
@@ -6,7 +6,7 @@
 
 Nextflow has built-in support for Conda that allows the configuration of workflow dependencies using Conda recipes and environment files.
 
-This allows Nextflow applications to use popular tool collections such as [Bioconda](https://bioconda.github.io) whilst taking advantage of the configuration flexibility provided by Nextflow.
+This allows Nextflow applications to use popular tool collections such as [Bioconda](https://bioconda.github.io) and the [Python Package index](https://pypi.org/), whilst taking advantage of the configuration flexibility provided by Nextflow.
 
 ## Prerequisites
 
@@ -22,7 +22,7 @@ Dependencies are specified by using the {ref}`process-conda` directive, providin
 Conda environments are stored on the file system. By default, Nextflow instructs Conda to save the required environments in the pipeline work directory. The same environment may be created/saved multiple times across multiple executions when using different work directories.
 :::
 
-You can specify the directory where the Conda environments are stored using the `conda.cacheDir` configuration property. When using a computing cluster, make sure to use a shared file system path accessible from all compute nodes. See the {ref}`configuration page <config-conda>` for details about Conda configuration. 
+You can specify the directory where the Conda environments are stored using the `conda.cacheDir` configuration property. When using a computing cluster, make sure to use a shared file system path accessible from all compute nodes. See the {ref}`configuration page <config-conda>` for details about Conda configuration.
 
 :::{warning}
 The Conda environment feature is not supported by executors that use remote object storage as a work directory. For example, AWS Batch.
@@ -49,9 +49,10 @@ Conda package names can specified using the `conda` directive. Multiple package
 process foo {
   conda 'bwa samtools multiqc'
 
-  '''
+  script:
+  """
   your_command --here
-  '''
+  """
 }
 ```
 
@@ -61,6 +62,7 @@ The usual Conda package syntax and naming conventions can be used. The version o
 
 The name of the channel where a package is located can be specified prefixing the package with the channel name as shown here `bioconda::bwa=0.7.15`.
 
+(conda-env-files)=
 ### Use Conda environment files
 
 Conda environments can also be defined using one or more Conda environment files. This is a file that lists the required packages and channels structured using the YAML format. For example:
@@ -76,20 +78,6 @@ dependencies:
   - bwa=0.7.15
 ```
 
-This other example shows how to leverage a Conda environment file to install Python packages from the [PyPI repository](https://pypi.org/)), through the `pip` package manager (which must also be explicitly listed as a required package):
-
-```yaml
-name: my-env-2
-channels:
-  - defaults
-dependencies:
-  - pip
-  - pip:
-    - numpy
-    - pandas
-    - matplotlib
-```
-
 Read the Conda documentation for more details about how to create [environment files](https://conda.io/docs/user-guide/tasks/manage-environments.html#creating-an-environment-file-manually).
 
 The path of an environment file can be specified using the `conda` directive:
@@ -98,17 +86,37 @@ The path of an environment file can be specified using the `conda` directive:
 process foo {
   conda '/some/path/my-env.yaml'
 
-  '''
+  script:
+  """
   your_command --here
-  '''
+  """
 }
 ```
 
 :::{warning}
 The environment file name **must** have a `.yml` or `.yaml` extension or else it won't be properly recognised.
 :::
 
-Alternatively, it is possible to provide the dependencies using a plain text file, just listing each package name as a separate line. For example:
+(conda-pypi)=
+### Python Packages from PyPI
+
+Conda environment files can also be used to install Python packages from the [PyPI repository](https://pypi.org/), through the `pip` package manager (which must also be explicitly listed as a required package):
+
+```yaml
+name: my-env-2
+channels:
+  - defaults
+dependencies:
+  - pip
+  - pip:
+    - numpy
+    - pandas
+    - matplotlib
+```
+
+### Conda text files
+
+It is possible to provide dependencies by listing each package name as a separate line in a plain text file. For example:
 
 ```
 bioconda::star=2.5.4a
@@ -120,6 +128,43 @@ bioconda::multiqc=1.4
 Like before, the extension matters. Make sure the dependencies file has a `.txt` extension.
 :::
 
+### Conda lock files
+
+The final way to provide packages to Conda is with [Conda lock files](https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#identical-conda-envs).
+
+These are generated from existing Conda environments using the following command:
+
+```bash
+conda list --explicit > spec-file.txt
+```
+
+or if using Mamba / Micromamba:
+
+```bash
+micromamba env export --explicit > spec-file.txt
+```
+
+Conda lock files can also be downloaded from [Wave](https://seqera.io/wave/) build pages.
+
+These files include every package and their dependencies. As such, no Conda environment resolution step is needed. This is faster and more reproducible.
+
+The files contain package URLs and an optional md5hash for each download to confirm identity:
+
+```
+# micromamba env export --explicit
+# This file may be used to create an environment using:
+# $ conda create --name <env> --file <this file>
+# platform: linux-64
+@EXPLICIT
+https://conda.anaconda.org/conda-forge/linux-64/_libgcc_mutex-0.1-conda_forge.tar.bz2#d7c89558ba9fa0495403155b64376d81
+https://conda.anaconda.org/conda-forge/linux-64/libgomp-13.2.0-h77fa898_7.conda#abf3fec87c2563697defa759dec3d639
+https://conda.anaconda.org/conda-forge/linux-64/_openmp_mutex-4.5-2_gnu.tar.bz2#73aaf86a425cc6e73fcf236a5a46396d
+https://conda.anaconda.org/conda-forge/linux-64/libgcc-ng-13.2.0-h77fa898_7.conda#72ec1b1b04c4d15d4204ece1ecea5978
+# .. and so on
+```
+
+To use with Nextflow, simply set the `conda` directive to the lock file path.
+
 ### Use existing Conda environments
 
 If you already have a local Conda environment, you can use it in your workflow specifying the installation directory of such environment by using the `conda` directive:
@@ -128,9 +173,10 @@ If you already have a local Conda environment, you can use it in your workflow s
 process foo {
   conda '/path/to/an/existing/env/directory'
 
-  '''
+  script:
+  """
   your_command --here
-  '''
+  """
 }
 ```
 

diff --git a/docs/config.md b/docs/config.md
@@ -113,6 +113,15 @@ The following constants are globally available in a Nextflow configuration file:
 `projectDir`
 : The directory where the main script is located.
 
+## Functions
+
+The following functions are globally available in a Nextflow configuration file:
+
+`env( name )`
+: :::{versionadded} 24.11.0-edge
+  :::
+: Get the value of the environment variable with the specified name in the Nextflow launch environment.
+
 (config-params)=
 
 ## Parameters
@@ -129,6 +138,8 @@ params {
 }
 ```
 
+See {ref}`cli-params` for information about how to modify these on the command line.
+
 (config-process)=
 
 ## Process configuration
-Original file line number
+Diff line change
@@ Expand Up / @@ -45,6 +45,7 @@ process foo { @@
       output:
       path 'x.txt'
+      script:
       """
       echo $x > x.txt
       """
@@ Expand Down @@