forked from pedroscampoy/bacterial_wgs_training
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconfig2.file
197 lines (142 loc) · 6.08 KB
/
config2.file
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
## This config file contains the information about the analysis for Outbreak detection and characterization of foodborne bacteria
## All paths must be absoluted and end WITHOUT "/"
############################################## RunInfo ################################################################
AUTHOR=BU-ISCIII
DATE=20181005
DATE_RUN=20171122
LIBRARY=NEXTERA-XT
SEQUENCING_CENTER=ISCIII
PLATFORM=ILLUMINA
MODEL=NEXTSEQ
RUN_PLATFORM=NS0001
SEQ_TYPE=paired
############################# Pipeline steps: Fill in with YES or NO (capital letter) ###################################
TRIMMING=NO
CHECK_REFERENCES=YES
MAPPING=YES
DUPLICATE_FILTER=YES
VARIANT_CALLING=YES
KMERFINDER=NO
SRST2=NO
CFSAN=NO
VCF_TO_MSA=YES
RAXML=YES
STATS=YES
############################################# System variables #######################################################
# Set memory for Java Virtual Machine
# #Example: -Xmx15g
JAVA_RAM=-Xmx2g
# If pipeline is executed in HPC =1, if pipeline is executed in local =0
USE_SGE=0
# Set memory for slot
#Example: 20g
H_VMEM=
# Set number of threads
THREADS=8
#Queues
QUEUE=
############################################# Directories #########################################################
# Path to directory which containt all scripts
SCRIPTS_DIR=/home/smonzon/tmp_singularity/WGS-Outbreaker/lib
# Path to temporary directory
TEMP_DIR=/home/smonzon/tmp
# Directory with input files
INPUT_DIR=/home/smonzon/Documents/bacterial_wgs_training/results/trimming/trimmed
# Directory for output files
OUTPUT_DIR=/home/smonzon/Documents/bacterial_wgs_training/results/wgs_outbreaker
########################################## INPUT VARIABLES########################################################
# Samples info:
# All samples ID must be separated by ":", then for each sample there must be a line with the names for
# R1 and R2 separated by tabulator
# Example:
#=AAAA_01:BBBB_02
# AAAA_01=AAAA_01_R1.fastq.gz AAAA_01_R2.fastq.gz
# BBBB_02=BBBB_02_R1.fastq.gz BBBB_02_R2.fastq.gz
SAMPLES=RA-L2073:RA-L2281:RA-L2327:RA-L2391:RA-L2450:RA-L2677:RA-L2701:RA-L2782:RA-L2805:RA-L2978
RA-L2073=RA-L2073_paired_R1.fastq.gz RA-L2073_paired_R2.fastq.gz
RA-L2281=RA-L2281_paired_R1.fastq.gz RA-L2281_paired_R2.fastq.gz
RA-L2327=RA-L2327_paired_R1.fastq.gz RA-L2327_paired_R2.fastq.gz
RA-L2391=RA-L2391_paired_R1.fastq.gz RA-L2391_paired_R2.fastq.gz
RA-L2450=RA-L2450_paired_R1.fastq.gz RA-L2450_paired_R2.fastq.gz
RA-L2677=RA-L2677_paired_R1.fastq.gz RA-L2677_paired_R2.fastq.gz
RA-L2701=RA-L2701_paired_R1.fastq.gz RA-L2701_paired_R2.fastq.gz
RA-L2782=RA-L2782_paired_R1.fastq.gz RA-L2782_paired_R2.fastq.gz
RA-L2805=RA-L2805_paired_R1.fastq.gz RA-L2805_paired_R2.fastq.gz
RA-L2978=RA-L2978_paired_R1.fastq.gz RA-L2978_paired_R2.fastq.gz
######################################################### Reference Variables ###########################################
# Path to reference genome
GENOME_REF=listeria_NC_021827.1_NoPhagues.fna
# Path to reference genome without ".fasta"
GENOME_NAME=listeria_NC_021827.1_NoPhagues
# bed enrichment exomas. For outbreakWGS pipeline =NO
EXOME_ENRICHMENT=NO
# Data base of known snps for GATK. For outbreakWGS pipeline =NO
KNOWN_SNPS=NO
# Data base of known indels for GATK. For outbreakWGS pipeline =NO
KNOWN_INDELS=NO
# Path to kmerfinder database
BACT_DB_PATH=
# Path to resistence srst2 database
SRST2_DB_PATH_ARGannot=
# Path to plasmid srst2 database
SRST2_DB_PATH_PlasmidFinder=
# Path to alleles srst2 file
SRST2_DB_PATH_mlst_db=
# Path to profiles srst2 file
SRST2_DB_PATH_mlst_definitions=
####################################################### Software variables #######################################################
################# Trimmomatic ##################
# Trimomatic version
trimmomatic_version=0.33
# Trimmomatic path
TRIMMOMATIC_PATH=/scif/apps/trimmomatic/bin
# Trimomatic argummens must be separated by "_"
# Example: /opt/Trimmomatic-0.33/adapters/TruSeq3-PE.fa:2:30:10_TRAILING:10_SLIDINGWINDOW:4:15_MINLEN:70
TRIM_ARGS=/scif/apps/trimmomatic/Trimmomatic-0.38/adapters/TruSeq3-PE.fa:2:30:10_TRAILING:10_SLIDINGWINDOW:4:15_MINLEN:70
################ Picardtools #####################
# Picardtools path
PICARD_PATH=/scif/apps/picard/bin
#Picardtools arguments
PICARD_ARGS=ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT REMOVE_DUPLICATES=false
################# Kmerfinder ###################
## Kmerfinder path
KMERFINDER_PATH=/processing_Data/bioinformatics/references/kmerFinder/genomicepidemiology-kmerdb-077ba38a546a
################### srst2 ######################
## Character(s) separating gene name from allele number in MLST database
SRST2_DELIMITER=
################## RAxML ###########################
##Bootstrap
BOOTSTRAP=100
## Model of substitution, for SNP data GTRCAT is recommended
MODEL_RAXML=GTRCAT
################ R_coverage #####################
## Maximum coverage for R_grap coverage
DEPTH_COVERAGE=100
################## GATK #########################
## GATK path
GATK_PATH=/scif/apps/gatk/gatk-3.8
################## CFSAN_ARGUMENTS ####################
# VarScan minimum base quality at a position to count a read
VarScan_qual=15
#VarScan minimum variant allele frequency threshold
VarScan_frec=0.90
# Samtools minimum base quality for a base to be considered
samtoolsQ=13
# The length of the edge regions in a contig, in which all SNPs will be removed.
edge_length=500
# Mimimum base quality score to count a read.By default "0"
minBaseQual=0
# Mimimum fraction of high-quality reads supporting the consensus to make a call.
minConsFrec=0.6
# Minimum number of high-quality reads supporting the consensus which must be present on both the forward and
# reverse strands to make a call. By default "0"
minConsStrdDpth=0
# Minimum fraction of the high-quality consensus-supporting reads which must be present on both the forward and
# reverse strands to make a call. By default "0"
minConsStrdBias=0
############## SNP FILTERS #########################
# The maximum number of SNPs allowed in a window.
MAX_SNP=3
# The length of the window in which the number of SNPs should be no more than max_num_snp
WINDOW_SIZE=1000