config2.file

## This config file contains the information about the analysis for Outbreak detection and characterization of foodborne bacteria
## All paths must be absoluted and end WITHOUT "/"


############################################## RunInfo ################################################################
AUTHOR=BU-ISCIII
DATE=20181005
MAIL=bioinformatica@isciii.es

DATE_RUN=20171122
LIBRARY=NEXTERA-XT
SEQUENCING_CENTER=ISCIII
PLATFORM=ILLUMINA
MODEL=NEXTSEQ
RUN_PLATFORM=NS0001
SEQ_TYPE=paired

############################# Pipeline steps: Fill in with YES or NO (capital letter) ###################################

TRIMMING=NO
CHECK_REFERENCES=YES
MAPPING=YES
DUPLICATE_FILTER=YES
VARIANT_CALLING=YES
KMERFINDER=NO
SRST2=NO
CFSAN=NO
VCF_TO_MSA=YES
RAXML=YES
STATS=YES

############################################# System variables #######################################################

# Set memory for Java Virtual Machine
# #Example: -Xmx15g
JAVA_RAM=-Xmx2g

# If pipeline is executed in HPC =1, if pipeline is executed in local =0
USE_SGE=0

# Set memory for slot
#Example: 20g
H_VMEM=

# Set number of threads
THREADS=8

#Queues
QUEUE=


############################################# Directories #########################################################

# Path to directory which containt all scripts
SCRIPTS_DIR=/home/smonzon/tmp_singularity/WGS-Outbreaker/lib

# Path to temporary directory
TEMP_DIR=/home/smonzon/tmp

# Directory with input files
INPUT_DIR=/home/smonzon/Documents/bacterial_wgs_training/results/trimming/trimmed

# Directory for output files
OUTPUT_DIR=/home/smonzon/Documents/bacterial_wgs_training/results/wgs_outbreaker

########################################## INPUT VARIABLES########################################################

# Samples info:
# All samples ID must be separated by ":", then for each sample there must be a line with the names for
# R1 and R2 separated by tabulator
# Example:
	#=AAAA_01:BBBB_02
	# AAAA_01=AAAA_01_R1.fastq.gz    AAAA_01_R2.fastq.gz
	# BBBB_02=BBBB_02_R1.fastq.gz    BBBB_02_R2.fastq.gz

SAMPLES=RA-L2073:RA-L2281:RA-L2327:RA-L2391:RA-L2450:RA-L2677:RA-L2701:RA-L2782:RA-L2805:RA-L2978

RA-L2073=RA-L2073_paired_R1.fastq.gz	RA-L2073_paired_R2.fastq.gz
RA-L2281=RA-L2281_paired_R1.fastq.gz	RA-L2281_paired_R2.fastq.gz
RA-L2327=RA-L2327_paired_R1.fastq.gz	RA-L2327_paired_R2.fastq.gz
RA-L2391=RA-L2391_paired_R1.fastq.gz	RA-L2391_paired_R2.fastq.gz
RA-L2450=RA-L2450_paired_R1.fastq.gz	RA-L2450_paired_R2.fastq.gz
RA-L2677=RA-L2677_paired_R1.fastq.gz	RA-L2677_paired_R2.fastq.gz
RA-L2701=RA-L2701_paired_R1.fastq.gz	RA-L2701_paired_R2.fastq.gz
RA-L2782=RA-L2782_paired_R1.fastq.gz	RA-L2782_paired_R2.fastq.gz
RA-L2805=RA-L2805_paired_R1.fastq.gz	RA-L2805_paired_R2.fastq.gz
RA-L2978=RA-L2978_paired_R1.fastq.gz	RA-L2978_paired_R2.fastq.gz


######################################################### Reference Variables ###########################################

# Path to reference genome
GENOME_REF=listeria_NC_021827.1_NoPhagues.fna

# Path to reference genome without ".fasta"
GENOME_NAME=listeria_NC_021827.1_NoPhagues

# bed enrichment exomas. For outbreakWGS pipeline =NO
EXOME_ENRICHMENT=NO

# Data base of known snps for GATK. For outbreakWGS pipeline =NO
KNOWN_SNPS=NO

# Data base of known indels for GATK. For outbreakWGS pipeline =NO
KNOWN_INDELS=NO

# Path to kmerfinder database
BACT_DB_PATH=

# Path to resistence srst2 database
SRST2_DB_PATH_ARGannot=

# Path to plasmid srst2 database
SRST2_DB_PATH_PlasmidFinder=

# Path to alleles srst2 file
SRST2_DB_PATH_mlst_db=

# Path to profiles srst2 file
SRST2_DB_PATH_mlst_definitions=

####################################################### Software variables #######################################################

################# Trimmomatic ##################
# Trimomatic version
trimmomatic_version=0.33

# Trimmomatic path
TRIMMOMATIC_PATH=/scif/apps/trimmomatic/bin

# Trimomatic argummens must be separated by "_"
# Example: /opt/Trimmomatic-0.33/adapters/TruSeq3-PE.fa:2:30:10_TRAILING:10_SLIDINGWINDOW:4:15_MINLEN:70
TRIM_ARGS=/scif/apps/trimmomatic/Trimmomatic-0.38/adapters/TruSeq3-PE.fa:2:30:10_TRAILING:10_SLIDINGWINDOW:4:15_MINLEN:70

################ Picardtools #####################
# Picardtools path
PICARD_PATH=/scif/apps/picard/bin

#Picardtools arguments
PICARD_ARGS=ASSUME_SORTED=true VALIDATION_STRINGENCY=LENIENT REMOVE_DUPLICATES=false

################# Kmerfinder ###################
## Kmerfinder path
KMERFINDER_PATH=/processing_Data/bioinformatics/references/kmerFinder/genomicepidemiology-kmerdb-077ba38a546a

################### srst2 ######################
## Character(s) separating gene name from allele number in MLST database
SRST2_DELIMITER=

################## RAxML ###########################
##Bootstrap
BOOTSTRAP=100

## Model of substitution, for SNP data GTRCAT is recommended
MODEL_RAXML=GTRCAT

################ R_coverage #####################
## Maximum coverage for R_grap coverage
DEPTH_COVERAGE=100

################## GATK #########################
## GATK path
GATK_PATH=/scif/apps/gatk/gatk-3.8

################## CFSAN_ARGUMENTS ####################
# VarScan minimum base quality at a position to count a read
VarScan_qual=15

#VarScan minimum variant allele frequency threshold
VarScan_frec=0.90

# Samtools minimum base quality for a base to be considered
samtoolsQ=13

# The length of the edge regions in a contig, in which all SNPs will be removed.
edge_length=500

# Mimimum base quality score to count a read.By default "0"
minBaseQual=0

# Mimimum fraction of high-quality reads supporting the consensus to make a call.
minConsFrec=0.6

# Minimum number of high-quality reads supporting the consensus which must be present on both the forward and
# reverse strands to make a call. By default "0"
minConsStrdDpth=0

# Minimum fraction of the high-quality consensus-supporting reads which must be present on both the forward and
# reverse strands to make a call. By default "0"
minConsStrdBias=0

##############  SNP FILTERS #########################
# The maximum number of SNPs allowed in a window.
MAX_SNP=3

# The length of the window in which the number of SNPs should be no more than max_num_snp
WINDOW_SIZE=1000