-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgatk_case.sh
81 lines (75 loc) · 3.9 KB
/
gatk_case.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/bin/bash
bundle=${HOME}/bundle #path to ref.hg38 (Homo_sapiens_assembly38.fasta),ref.hg38.interval_list (generated by gatk_cohort.sh)
scatter=${bundle}/scatter
case_bams=${HOME}/bams # path to the CASE input BAMs
case_tsv_files=${case_bams}/tsv_files
output_dir=${HOME}/output_dir # path to results from CASE pipeline: ploidy and VCF calls
cohort=${output_dir}/cohort
cohort_ploidy=${cohort}/cohort_ploidy
case=${output_dir}/case
case_ploidy=${case}/case_ploidy
case_cnv_caller=${case}/case_cnv_caller
case_results=${case}/case_results
case-results=${case}/case-results
gatk=${HOME}/gatk #path to GATK packages
# gatk CollectReadCounts collects the reads counts at each interval defined in the previous step.
if [ ! $(ls -A ${case_tsv_files}) ];then
for bam in ${case_bams}/*.bam;do
${gatk} CollectReadCounts \
-L ${bundle}/ref.hg38.interval_list \
-R ${bundle}/Homo_sapiens_assembly38.fasta \
-imr OVERLAPPING_ONLY \
-I $bam \
--format TSV \
-O ${case_tsv_files}/$(basename $bam .bam).tsv
done
else
echo "CaseCollectReadCounts ready"
fi
# gatk DetermineGermlineContigPloidy calls chromosomal ploidy states for the CASE using the COHORT model, generated by gatk_cohort.sh
if [ ! -d ${case_ploidy}/ploidy-case-calls ];then
${gatk} DetermineGermlineContigPloidy \
--model ${cohort_ploidy}/ploidy-model \
-I ${case_tsv_files}/sample1.tsv \
-I ${case_tsv_files}/sample2.tsv \
-I .tsv \
-I .tsv \
-I ${case_tsv_files}/samplen.tsv \
-O ${case_ploidy}/ \
--output-prefix ploidy-case
else
echo "Case DetermineGermlineContigPloidy ready"
fi
# gatk GermlineCNVCaller applies COHORT model to the CASE.
# gatk PostprocessGermlineCNVCalls calls the VCF files for each CASE sample.
if [ ! -d ${case_cnv_caller}/temp_0001_of_3-calls/ ] && [ ! -d ${case_cnv_caller}/temp_0001_of_3-tracking/ ] && [ ! -d ${case_cnv_caller}/temp_0002_of_3-calls/ ] && [ ! -d ${case_cnv_caller}/temp_0002_of_3-tracking/ ] && [ ! -d ${case_cnv_caller}/temp_0003_of_3-calls/ ] && [ ! -d ${case_cnv_caller}/temp_0003_of_3-tracking/ ];then
for sample in ${case_tsv_files}/*.tsv;do
file=$(basename $sample .tsv)
for index in $(cat tempfile.txt);do #tempfile is a file numbering the shards( 1..3)
${gatk} GermlineCNVCaller \
--run-mode CASE \
-I $sample \
--contig-ploidy-calls ${case_ploidy}/ploidy-case-calls \
--model ${cohort_cnv_caller}/temp_000${index}_of_3-model \
-O ${case_cnv_caller} \
--output-prefix temp_000${index}_of_3
done
${gatk} PostprocessGermlineCNVCalls \
--model-shard-path ${cohort_cnv_caller}/temp_0001_of_3-model \
--model-shard-path ${cohort_cnv_caller}/temp_0002_of_3-model \
--model-shard-path ${cohort_cnv_caller}/temp_0003_of_3-model \
--calls-shard-path ${case_cnv_caller}/temp_0001_of_3-calls \
--calls-shard-path ${case_cnv_caller}/temp_0002_of_3-calls \
--calls-shard-path ${case_cnv_caller}/temp_0003_of_3-calls \
--autosomal-ref-copy-number 2 \
--allosomal-contig chrX --allosomal-contig chrY \
--contig-ploidy-calls ${case_ploidy}/ploidy-case-calls \
--sample-index 0 \
--output-genotyped-intervals ${case_results}/genotyped-intervals-case-sample_0.vcf.gz \
--output-genotyped-segments ${case_results}/genotyped-segments-case-sample_0.vcf.gz \
--output-denoised-copy-ratios ${case_results}/sample_0.denoised_copy_ratios.tsv
cd ${case-results}/;mkdir $file
mv ${case_cnv_caller}/* ${case-results}/$file
mv ${case_results}/* ${case-results}/$file
cd ${gatk}
done