-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add testing pipeline * Add env files * prepare mob-suite db prior to use * Use full path to mob-suite db * Use different sample for test data * update * Fix provenance * Update README * fix provenance * fix provenance * fix provenance in README
- Loading branch information
Showing
20 changed files
with
385 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
GCF024700185.1,.github/data/assemblies/GCF024700185.1.fa |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
name: art | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- art=2016.06.05 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
name: check-outputs | ||
channels: | ||
- conda-forge | ||
- bioconda | ||
- defaults | ||
dependencies: | ||
- python=3 | ||
- jsonschema=4.20.0 | ||
- pyyaml=6.0.1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
#!/usr/bin/env python3 | ||
|
||
import argparse | ||
import csv | ||
import glob | ||
import json | ||
import os | ||
import urllib.request | ||
|
||
from jsonschema import validate | ||
import yaml | ||
|
||
|
||
def check_provenance_format_valid(provenance_files, schema): | ||
""" | ||
Check that the provenance files are valid according to the schema. | ||
""" | ||
for provenance_file in provenance_files: | ||
with open(provenance_file) as f: | ||
try: | ||
provenance = yaml.load(f, Loader=yaml.BaseLoader) | ||
validate(provenance, schema) | ||
except Exception as e: | ||
print(f"Error validating {provenance_file}: {e}") | ||
exit(1) | ||
return False | ||
|
||
return True | ||
|
||
def check_expected_files_exist(output_dir, sample_ids): | ||
""" | ||
Check that the expected files exist in the output directory. | ||
:param output_dir: Path to the output directory | ||
:param sample_ids: List of sample IDs | ||
:return: True if all expected files exist, False otherwise | ||
:rtype: bool | ||
""" | ||
for sample_id in sample_ids: | ||
expected_files = [ | ||
f"{sample_id}/{sample_id}_fastp.csv", | ||
f"{sample_id}/{sample_id}_fastp.json", | ||
f"{sample_id}/{sample_id}_quast.csv", | ||
f"{sample_id}/{sample_id}_abricate_ncbi.tsv", | ||
f"{sample_id}/{sample_id}_abricate_plasmidfinder.tsv", | ||
f"{sample_id}/{sample_id}_resistance_gene_report.tsv", | ||
] | ||
|
||
for expected_file in expected_files: | ||
expected_file_path = os.path.join(output_dir, expected_file) | ||
if not os.path.exists(expected_file_path): | ||
print(f"Expected file {expected_file_path} not found") | ||
return False | ||
|
||
return True | ||
|
||
|
||
def main(args): | ||
|
||
output_dir = os.path.dirname(args.output) | ||
os.makedirs(output_dir, exist_ok=True) | ||
|
||
provenance_schema_url = "https://raw.githubusercontent.com/BCCDC-PHL/pipeline-provenance-schema/main/schema/pipeline-provenance.json" | ||
provenance_schema_path = ".github/data/pipeline-provenance.json" | ||
urllib.request.urlretrieve(provenance_schema_url, provenance_schema_path) | ||
|
||
provenance_schema = None | ||
with open(provenance_schema_path) as f: | ||
provenance_schema = json.load(f) | ||
|
||
provenace_files_glob = f"{args.pipeline_outdir}/**/*_provenance.yml" | ||
provenance_files = glob.glob(provenace_files_glob, recursive=True) | ||
|
||
sample_ids = [os.path.basename(provenance_file).split("_")[0] for provenance_file in provenance_files] | ||
|
||
# TODO: Add more tests | ||
tests = [ | ||
{ | ||
"test_name": "provenance_format_valid", | ||
"test_passed": check_provenance_format_valid(provenance_files, provenance_schema), | ||
}, | ||
{ | ||
"test_name": "all_expected_files_exist", | ||
"test_passed": check_expected_files_exist(args.pipeline_outdir, sample_ids), | ||
}, | ||
] | ||
|
||
output_fields = [ | ||
"test_name", | ||
"test_result" | ||
] | ||
|
||
output_path = args.output | ||
with open(output_path, 'w') as f: | ||
writer = csv.DictWriter(f, fieldnames=output_fields, extrasaction='ignore') | ||
writer.writeheader() | ||
for test in tests: | ||
if test["test_passed"]: | ||
test["test_result"] = "PASS" | ||
else: | ||
test["test_result"] = "FAIL" | ||
writer.writerow(test) | ||
|
||
for test in tests: | ||
if not test['test_passed']: | ||
exit(1) | ||
|
||
|
||
if __name__ == '__main__': | ||
parser = argparse.ArgumentParser(description='Check outputs') | ||
parser.add_argument('--pipeline-outdir', type=str, help='Path to the pipeline output directory') | ||
parser.add_argument('-o', '--output', type=str, help='Path to the output file') | ||
args = parser.parse_args() | ||
main(args) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
#!/usr/bin/env bash | ||
|
||
set -e -o pipefail | ||
|
||
source ${HOME}/.bashrc | ||
|
||
eval "$(conda shell.bash hook)" | ||
|
||
conda activate check-outputs | ||
|
||
|
||
.github/scripts/check_outputs.py --pipeline-outdir .github/data/test_output -o artifacts/check_outputs_results.csv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
conda env create -f .github/environments/art.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
conda env create -f .github/environments/check-outputs.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
|
||
echo 'ID,R1,R2,ASSEMBLY' > .github/data/samplesheet.csv | ||
|
||
for i in $(ls ${PWD}/.github/data/fastq/*_R1.fastq.gz); do | ||
ID=$(basename $i _R1.fastq.gz) | ||
R1=$i | ||
R2=${PWD}/.github/data/fastq/${ID}_R2.fastq.gz | ||
ASSEMBLY=${PWD}/.github/data/assemblies/${ID}.fa | ||
echo "$ID,$R1,$R2,$ASSEMBLY" >> .github/data/samplesheet.csv | ||
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
#!/bin/bash | ||
|
||
mkdir -p .github/data/assemblies | ||
|
||
rm -f .github/data/assemblies/GCF_024700185.1.zip | ||
rm -f .github/data/assemblies/GCF024700185.1.fa | ||
rm -f .github/data/assemblies/README.md | ||
|
||
curl -o .github/data/assemblies/GCF_024700185.1.zip "https://api.ncbi.nlm.nih.gov/datasets/v2alpha/genome/accession/GCF_024700185.1/download?include_annotation_type=GENOME_FASTA&include_annotation_type=SEQUENCE_REPORT&hydrated=FULLY_HYDRATED" | ||
|
||
unzip .github/data/assemblies/GCF_024700185.1.zip -d .github/data/assemblies | ||
|
||
mv .github/data/assemblies/ncbi_dataset/data/GCF_024700185.1/GCF_024700185.1_ASM2470018v1_genomic.fna .github/data/assemblies/GCF024700185.1.fa | ||
|
||
rm -r .github/data/assemblies/ncbi_dataset | ||
rm -f .github/data/assemblies/README.md |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
#!/bin/bash | ||
|
||
source ${HOME}/.bashrc | ||
|
||
eval "$(conda shell.bash hook)" | ||
|
||
mkdir -p .github/data | ||
|
||
rm -rf .github/data/mob-suite-db | ||
|
||
pushd .github/data | ||
|
||
wget -O data.tar.gz https://zenodo.org/records/10304948/files/data.tar.gz?download=1 | ||
|
||
tar -xzf data.tar.gz | ||
|
||
rm data.tar.gz | ||
|
||
mv data mob-suite-db | ||
|
||
conda activate plasmid-screen-35d122a137231eda3b8a0039d42f24f6 | ||
|
||
mash sketch -i mob-suite-db/ncbi_plasmid_full_seqs.fas | ||
|
||
makeblastdb -in mob-suite-db/ncbi_plasmid_full_seqs.fas -dbtype nucl | ||
|
||
popd |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
#!/bin/bash | ||
set -eo pipefail | ||
|
||
artifacts_dir="artifacts" | ||
|
||
echo "Install Miniconda .." >> ${artifacts_dir}/test.log | ||
|
||
export PATH=/opt/miniconda3/bin:$PATH | ||
|
||
wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh | ||
|
||
/bin/bash ~/miniconda.sh -b -p /opt/miniconda3 | ||
|
||
rm ~/miniconda.sh | ||
|
||
echo ". /opt/minconda3/etc/profile.d/conda.sh" >> ~/.bashrc | ||
|
||
conda update -n base -c defaults conda | ||
|
||
conda install -y -c conda-forge mamba | ||
|
||
conda init bash |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/bin/bash | ||
|
||
set -eo pipefail | ||
|
||
artifacts_dir="artifacts" | ||
|
||
echo Install Nextflow .. >> ${artifacts_dir}/test.log | ||
|
||
wget -qO- https://get.nextflow.io | bash | ||
|
||
sudo mv nextflow /usr/local/bin/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
#!/bin/bash | ||
|
||
artifacts_dir="artifacts" | ||
|
||
echo "Prepare artifacts .." >> ${artifacts_dir}/test.log | ||
|
||
mkdir -p ${artifacts_dir}/fastq | ||
|
||
mv .github/data/fastq/*.fastq.gz ${artifacts_dir}/fastq | ||
|
||
mkdir -p ${artifacts_dir}/pipeline_outputs | ||
|
||
mv .github/data/test_output/* ${artifacts_dir}/pipeline_outputs |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#!/bin/bash | ||
|
||
set -eo pipefail | ||
|
||
if [ -n "${GITHUB_ACTIONS}" ]; then | ||
echo "Running in GitHub Actions Environment" | ||
echo "Adjusting nextflow.config" | ||
sed -i 's/cpus = 16/cpus = 4/g' nextflow.config | ||
else | ||
echo "Not running in GitHub Actions Environment" | ||
fi | ||
|
||
nextflow run main.nf \ | ||
-profile conda \ | ||
--cache ${HOME}/.conda/envs \ | ||
--samplesheet_input .github/data/samplesheet.csv \ | ||
--pre_assembled \ | ||
--mob_db ${PWD}/.github/data/mob-suite-db \ | ||
--collect_outputs \ | ||
--collected_outputs_prefix test \ | ||
--outdir .github/data/test_output \ | ||
-with-report .github/data/test_output/nextflow_report.html \ | ||
-with-trace .github/data/test_output/nextflow_trace.tsv |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/bash | ||
|
||
rm -rf .github/data/assemblies/* | ||
rm -rf .github/data/fastq/* | ||
rm -rf .github/data/mob-suite-db | ||
rm -rf .github/data/samplesheet.csv | ||
rm -rf .github/data/test_output | ||
|
||
.github/scripts/download_assemblies.sh | ||
|
||
.github/scripts/simulate_reads.sh | ||
|
||
.github/scripts/download_mob-suite_db.sh | ||
|
||
.github/scripts/create_samplesheet.sh | ||
|
||
.github/scripts/run_pipeline.sh | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#!/bin/bash | ||
|
||
|
||
source ${HOME}/.bashrc | ||
|
||
eval "$(conda shell.bash hook)" | ||
|
||
conda activate art | ||
|
||
mkdir -p .github/data/fastq | ||
|
||
while IFS=',' read -r sample_id assembly; do | ||
art_illumina \ | ||
--paired \ | ||
--in ${assembly} \ | ||
--fcov 12 \ | ||
--len 150 \ | ||
--mflen 400 \ | ||
--sdev 100 \ | ||
--rndSeed 42 \ | ||
--qShift 0 \ | ||
--qShift2 0 \ | ||
--out .github/data/fastq/${sample_id}_R | ||
|
||
rm -f .github/data/fastq/${sample_id}_R1.aln | ||
rm -f .github/data/fastq/${sample_id}_R2.aln | ||
|
||
mv .github/data/fastq/${sample_id}_R1.fq .github/data/fastq/${sample_id}_R1.fastq | ||
mv .github/data/fastq/${sample_id}_R2.fq .github/data/fastq/${sample_id}_R2.fastq | ||
|
||
gzip -f .github/data/fastq/${sample_id}_R1.fastq | ||
gzip -f .github/data/fastq/${sample_id}_R2.fastq | ||
|
||
done < .github/data/reads_to_simulate.csv | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
on: | ||
pull_request: | ||
branches: | ||
- main | ||
push: | ||
branches: | ||
- main | ||
workflow_dispatch: | ||
name: Tests | ||
jobs: | ||
test: | ||
strategy: | ||
fail-fast: false | ||
matrix: | ||
nextflow_version: ["21.04.3", "23.10.1"] | ||
name: Run tests | ||
runs-on: ubuntu-latest | ||
steps: | ||
- uses: actions/checkout@master | ||
- name: Create Artifacts Directory | ||
run: mkdir artifacts | ||
- name: Install Miniconda | ||
run: bash .github/scripts/install_conda.sh | ||
- name: Install Nextflow | ||
env: | ||
NXF_VER: ${{ matrix.nextflow_version }} | ||
run: bash .github/scripts/install_nextflow.sh | ||
- name: Create ART Read-Simulation Environment | ||
run: bash .github/scripts/create_art_environment.sh | ||
- name: Download Assemblies | ||
run: bash .github/scripts/download_assemblies.sh | ||
- name: Simulate Reads | ||
run: bash .github/scripts/simulate_reads.sh | ||
- name: Download mob-suite db | ||
run: bash .github/scripts/download_mob-suite_db.sh | ||
- name: Create SampleSheet | ||
run: bash .github/scripts/create_samplesheet.sh | ||
- name: Run Pipeline | ||
run: bash .github/scripts/run_pipeline.sh | ||
- name: Create Output Checking Environment | ||
run: bash .github/scripts/create_output_checking_environment.sh | ||
- name: Check Outputs | ||
run: bash .github/scripts/check_outputs.sh | ||
- name: Prepare Artifacts | ||
if: always() | ||
run: bash .github/scripts/prepare_artifacts.sh | ||
- name: Upload Artifacts | ||
uses: actions/upload-artifact@v4 | ||
if: always() | ||
with: | ||
name: artifacts-BCCDC-PHL-routine-assembly-nextflow-v${{ matrix.nextflow_version }}-${{ github.run_id }}.${{ github.run_attempt }} | ||
path: artifacts |
Oops, something went wrong.