Skip to content

Commit

Permalink
Delete serotype nextclade dataset
Browse files Browse the repository at this point in the history
  • Loading branch information
j23414 committed Feb 6, 2024
1 parent de11274 commit 35ee5d3
Show file tree
Hide file tree
Showing 9 changed files with 0 additions and 200,313 deletions.
72 changes: 0 additions & 72 deletions ingest/workflow/snakemake_rules/nextclade.smk
Original file line number Diff line number Diff line change
@@ -1,75 +1,3 @@
rule nextclade_all:
"""
Classify sequences into DENV1, DENV2, DENV3, and DENV4
"""
input:
sequences="data/sequences.fasta",
dataset="../nextclade_data/all",
output:
"data/nextclade_results/nextclade_all.tsv",
threads: 4
params:
min_length=1000, # E gene length is approximately 1400
min_seed_cover=0.01,
shell:
"""
nextclade run \
--input-dataset {input.dataset} \
-j {threads} \
--output-tsv {output} \
--min-seed-cover {params.min_seed_cover} \
--min-length {params.min_length} \
--silent \
{input.sequences}
"""

# Might be able to parallelize this rule
rule split_dengue_sequences:
"""
Split sequences into DENV1, DENV2, DENV3, and DENV4
"""
input:
sequences="data/sequences.fasta",
metadata="data/metadata.tsv",
nextclade_all_results="data/nextclade_results/nextclade_all.tsv",
output:
sequences_all="results/sequences_all.fasta",
sequences_denv1="results/sequences_denv1.fasta",
sequences_denv2="results/sequences_denv2.fasta",
sequences_denv3="results/sequences_denv3.fasta",
sequences_denv4="results/sequences_denv4.fasta",
shell:
"""
cp {input.sequences} {output.sequences_all}
augur filter \
--sequences {input.sequences} \
--metadata {input.nextclade_all_results} \
--metadata-id-columns seqName \
--query "clade=='DENV1'" \
--output-sequences {output.sequences_denv1}
augur filter \
--sequences {input.sequences} \
--metadata {input.nextclade_all_results} \
--metadata-id-columns seqName \
--query "clade=='DENV2'" \
--output-sequences {output.sequences_denv2}
augur filter \
--sequences {input.sequences} \
--metadata {input.nextclade_all_results} \
--metadata-id-columns seqName \
--query "clade=='DENV3'" \
--output-sequences {output.sequences_denv3}
augur filter \
--sequences {input.sequences} \
--metadata {input.nextclade_all_results} \
--metadata-id-columns seqName \
--query "clade=='DENV4'" \
--output-sequences {output.sequences_denv4}
"""

rule nextclade_denvX:
"""
Expand Down
1 change: 0 additions & 1 deletion nextclade_data/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

| Serotype | Reference | Nextclade link |
|:--|:--|:--|
| all | [NC_002640.1](https://www.ncbi.nlm.nih.gov/nuccore/NC_002640.1) | [nextclade all](https://clades.nextstrain.org/?dataset-url=https://github.com/nextstrain/dengue/tree/main/nextclade_data/all) |
| denv1 | [NC_001477.1](https://www.ncbi.nlm.nih.gov/nuccore/NC_001477.1) | [nextclade denv1](https://clades.nextstrain.org/?dataset-url=https://github.com/nextstrain/dengue/tree/main/nextclade_data/denv1) |
| denv2 | [NC_001474.2](https://www.ncbi.nlm.nih.gov/nuccore/NC_001474.2) | [nextclade denv2](https://clades.nextstrain.org/?dataset-url=https://github.com/nextstrain/dengue/tree/main/nextclade_data/denv2) |
| denv3 | [NC_001475.2](https://www.ncbi.nlm.nih.gov/nuccore/NC_001475.2) | [nextclade denv3](https://clades.nextstrain.org/?dataset-url=https://github.com/nextstrain/dengue/tree/main/nextclade_data/denv3) |
Expand Down
5 changes: 0 additions & 5 deletions nextclade_data/all/CHANGELOG.md

This file was deleted.

7 changes: 0 additions & 7 deletions nextclade_data/all/README.md

This file was deleted.

14 changes: 0 additions & 14 deletions nextclade_data/all/genome_annotation.gff3

This file was deleted.

66 changes: 0 additions & 66 deletions nextclade_data/all/pathogen.json

This file was deleted.

179 changes: 0 additions & 179 deletions nextclade_data/all/reference.fasta

This file was deleted.

Loading

0 comments on commit 35ee5d3

Please sign in to comment.