-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.nf
executable file
·222 lines (168 loc) · 5.87 KB
/
main.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
#!/usr/bin/env nextflow
import Helper
if (params.help) {
Help.print_help(params)
exit(0)
}
if (params.db_name instanceof String) {
IN_db_name = Channel.value(params.db_name)
} else {
println("Variable db_name isn't a string and should be a string.")
}
if (params.sequencesRemove == true) {
IN_sequences_removal = Channel.value("--search-sequences-to-remove")
} else {
IN_sequences_removal = Channel.value("")
}
// Download plasmid sequences from ncbi refseq ftp
process downloadFastas {
tag {"downloading plasmids from ncbi refseq ftp"}
output:
file "plasmid.*.1.genomic.fna" into downloadedFastas
//file "plasmid.*.1.genomic.fna" into downloadedFastas
"""
wget $params.ncbi_ftp
gunzip plasmid.*.1.genomic.fna.gz
"""
}
// Run MASHix.py
process runMASHix {
tag {"Running MASHix"}
publishDir "results/MASHix/"
input:
file fastas from downloadedFastas
val db_name_var from IN_db_name
val sequencesToRemove from IN_sequences_removal
output:
file "${db_name_var}/*.fas" into (masterFasta_abricate, masterFasta_abricatepf, masterFasta_samtools, masterFasta_bowtie2, masterFasta_diamond)
file "${db_name_var}/results/*.json" into patlasJson
file "*.json" into taxaTree
file "*sql" into sqlFileMashix
file "${db_name_var}/*json" into lenghtJson
file "${db_name_var}/reference_sketch/${db_name_var}_reference.msh" into mashIndex
file "${db_name_var}/*.txt" into actualRemovedSequences
"""
echo "Configuring psql and creating $db_name_var"
service postgresql start
service postgresql status
sudo -u postgres createuser -w -s root
createdb $db_name_var
db_create.py $db_name_var
echo "Downloading ncbi taxonomy"
wget ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz
tar -xvzf taxdump.tar.gz
echo "Running MASHix.py"
MASHix.py -i ${fastas} -o ${db_name_var} -t ${task.cpus} -non nodes.dmp \
-nan names.dmp -rm ${sequencesToRemove} -db ${db_name_var}
echo "Dumping to database file"
pg_dump ${db_name_var} > ${db_name_var}.sql
rm *.dmp *.prt *.txt *.tar.gz
"""
}
process samtoolsIndex{
tag {"Creating samtools index"}
publishDir "results/samtools_indexes/"
input:
file masterFastaFile from masterFasta_samtools
output:
file "*.fai" into samtoolsIndexChannel
"""
echo "Creating samtools index"
samtools faidx ${masterFastaFile}
"""
}
// executes abricate for the fasta with pATLAS database
process abricate {
tag {"running abricate"}
input:
file masterFastaFile from masterFasta_abricate
each db from params.abricateDatabases
output:
file "*.tsv" into abricateOutputs
"""
abricate --db ${db} ${masterFastaFile} > abr_${db}.tsv
"""
}
// a process for the updated plasmidfinder db
process abricate_plasmidfinder_db {
tag {"updating plasmidfinder database and running abricate"}
input:
file masterFastaFile from masterFasta_abricatepf
output:
file "*.tsv" into abricateOutputsPlasmidFinder
"""
git clone https://bitbucket.org/genomicepidemiology/plasmidfinder_db/
cd plasmidfinder_db/ && cat *.fsa >> sequences
makeblastdb -in sequences -title tinyamr -dbtype nucl -parse_seqids -hash_index
cd ..
abricate --db plasmidfinder_db --datadir ./ ${masterFastaFile} > abr_plasmidfinder_db.tsv
"""
}
// process to run diamond for the bacmet database
process diamond {
tag {"running diamond"}
input:
file masterFastaFile from masterFasta_diamond
each db from params.diamondDatabases
output:
file "*.txt" into diamondOutputs
"""
diamond blastx -d /ngstools/bin/bacmet/bacmet -q ${masterFastaFile} \
-o ${db}.txt -e 1E-20 -p ${task.cpus} \
-f 6 qseqid sseqid pident length mismatch gapopen qstart qend slen sstart send evalue bitscore
"""
}
// dump abricate and diamond results to database
process abricate2db {
tag {"sending abricate to database"}
publishDir "results/sql_file/"
input:
file abricate from abricateOutputs.collect()
file diamond from diamondOutputs.collect()
file abricatePlasmidFinder from abricateOutputsPlasmidFinder
file sqlFile from sqlFileMashix
val db_name_var from IN_db_name
output:
file "*final.sql" into FinalDbSql
file "*.json" into dropdownJsons
"""
echo ${abricate}
echo "Configuring psql and creating $db_name_var"
service postgresql start
service postgresql status
sudo -u postgres createuser -w -s root
createdb $db_name_var
psql -d ${db_name_var} -f ${db_name_var}.sql
echo "Dumping into database - resistance"
abricate2db.py -i abr_card.tsv abr_resfinder.tsv -db resistance \
-id ${params.abricateId} -cov ${params.abricateCov} -csv ${params.cardCsv} \
-db_psql ${db_name_var}
echo "Dumping into database - plasmidfinder_db latest"
abricate2db.py -i abr_plasmidfinder_db.tsv -db plasmidfinder \
-id ${params.abricateId} -cov ${params.abricateCov} -csv ${params.cardCsv} \
-db_psql ${db_name_var}
echo "Dumping into database - virulence"
abricate2db.py -i abr_vfdb.tsv -db virulence \
-id ${params.abricateId} -cov ${params.abricateCov} -csv ${params.cardCsv} \
-db_psql ${db_name_var}
echo "Dumping into database - bacmet"
diamond2db.py -db metal -i bacmet.txt -db_psql ${db_name_var}
echo "Writing to sql file"
pg_dump ${db_name_var} > ${db_name_var}_final.sql
"""
}
// Generate indexes for bowtie2 nd samtools using fasta retrieved by MASHix.py
process bowtieIndex {
tag {"creating bowtie2 index"}
publishDir "results/bowtie_indexes/"
input:
file masterFastaFile from masterFasta_bowtie2
output:
file "*bowtie2_index.*" into bowtieIndexChannel
//file "*.fai" into samtoolsIndexChannel
"""
echo "Creating bowtie2 index"
bowtie2-build -q ${masterFastaFile} --threads ${task.cpus} \
patlas_bowtie2_index
"""
}