gatk_case.sh

#!/bin/bash

bundle=${HOME}/bundle #path to ref.hg38 (Homo_sapiens_assembly38.fasta),ref.hg38.interval_list (generated by gatk_cohort.sh) 
scatter=${bundle}/scatter
case_bams=${HOME}/bams # path to the CASE input BAMs
case_tsv_files=${case_bams}/tsv_files
output_dir=${HOME}/output_dir # path to results from CASE pipeline: ploidy and VCF calls
cohort=${output_dir}/cohort
cohort_ploidy=${cohort}/cohort_ploidy
case=${output_dir}/case
case_ploidy=${case}/case_ploidy
case_cnv_caller=${case}/case_cnv_caller
case_results=${case}/case_results
case-results=${case}/case-results
gatk=${HOME}/gatk #path to GATK packages 

# gatk CollectReadCounts collects the reads counts at each interval defined in the previous step.
if [ ! $(ls -A ${case_tsv_files}) ];then
for bam in ${case_bams}/*.bam;do
	${gatk} CollectReadCounts \
                -L ${bundle}/ref.hg38.interval_list \
                -R ${bundle}/Homo_sapiens_assembly38.fasta \
                -imr OVERLAPPING_ONLY \
                -I $bam \
                --format TSV \
                -O ${case_tsv_files}/$(basename $bam .bam).tsv
done
else
	echo "CaseCollectReadCounts ready"
fi

# gatk DetermineGermlineContigPloidy calls chromosomal ploidy states for the CASE using the COHORT model, generated by gatk_cohort.sh
if [ ! -d  ${case_ploidy}/ploidy-case-calls ];then
		${gatk} DetermineGermlineContigPloidy \
			--model ${cohort_ploidy}/ploidy-model \
			-I ${case_tsv_files}/sample1.tsv \
			-I ${case_tsv_files}/sample2.tsv \
			-I .tsv \
			-I .tsv \

			-I ${case_tsv_files}/samplen.tsv \
			-O ${case_ploidy}/ \
			--output-prefix ploidy-case
else
       echo "Case DetermineGermlineContigPloidy ready"
fi

# gatk GermlineCNVCaller applies COHORT model to the CASE.
# gatk PostprocessGermlineCNVCalls calls the VCF files for each CASE sample.  
if [ ! -d ${case_cnv_caller}/temp_0001_of_3-calls/ ] && [ ! -d ${case_cnv_caller}/temp_0001_of_3-tracking/ ] && [ ! -d ${case_cnv_caller}/temp_0002_of_3-calls/ ] && [ ! -d ${case_cnv_caller}/temp_0002_of_3-tracking/ ] && [ ! -d ${case_cnv_caller}/temp_0003_of_3-calls/ ] && [ ! -d ${case_cnv_caller}/temp_0003_of_3-tracking/ ];then
        for sample in ${case_tsv_files}/*.tsv;do
        	file=$(basename $sample .tsv)
                for index in $(cat tempfile.txt);do #tempfile is a file numbering the shards( 1..3)
                        ${gatk} GermlineCNVCaller \
                                --run-mode CASE \
                                -I $sample \
                                --contig-ploidy-calls ${case_ploidy}/ploidy-case-calls \
                                --model  ${cohort_cnv_caller}/temp_000${index}_of_3-model \
                                -O ${case_cnv_caller} \
                                --output-prefix temp_000${index}_of_3
                done
	        ${gatk} PostprocessGermlineCNVCalls \
        	        --model-shard-path ${cohort_cnv_caller}/temp_0001_of_3-model \
                	--model-shard-path ${cohort_cnv_caller}/temp_0002_of_3-model \
                	--model-shard-path ${cohort_cnv_caller}/temp_0003_of_3-model \
                	--calls-shard-path ${case_cnv_caller}/temp_0001_of_3-calls \
                	--calls-shard-path ${case_cnv_caller}/temp_0002_of_3-calls \
                	--calls-shard-path ${case_cnv_caller}/temp_0003_of_3-calls \
                	--autosomal-ref-copy-number 2 \
               		--allosomal-contig chrX --allosomal-contig chrY \
			--contig-ploidy-calls ${case_ploidy}/ploidy-case-calls \
                	--sample-index 0 \
                	--output-genotyped-intervals ${case_results}/genotyped-intervals-case-sample_0.vcf.gz \
                	--output-genotyped-segments ${case_results}/genotyped-segments-case-sample_0.vcf.gz \
                	--output-denoised-copy-ratios ${case_results}/sample_0.denoised_copy_ratios.tsv
		cd ${case-results}/;mkdir $file
		mv ${case_cnv_caller}/* ${case-results}/$file
		mv ${case_results}/* ${case-results}/$file
		cd ${gatk}
	done