diff --git a/examples/test_vcf.vcf b/examples/test_vcf.vcf index 296604e..9ff8f40 100644 --- a/examples/test_vcf.vcf +++ b/examples/test_vcf.vcf @@ -1,7 +1,12 @@ -##fileformat=VCFv4.1 +##fileformat=VCFv4.2 ##INFO= -##INFO=##contig= +##INFO= +##contig= ##INFO= +##FORMAT= +##FORMAT= +##FORMAT= +##FORMAT= ##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT father mother proband 1 11900 . A T 100 PASS MQ=1 GT:GQ 0/1:60 0/1:60 1/1:60 @@ -12,6 +17,6 @@ 3 879585 . A T 100 PASS MQ=1 GT:GQ 0/1:60 0/0:60 0/1:60 3 879586 . A T 100 PASS MQ=1 GT:GQ 0/0:60 0/1:60 0/1:60 3 947378 . A T 100 PASS MQ=1 GT:GQ:AD:DP 0/0:60:5,7:12 0/0:60:4,6:14 0/1:60:7,8:16 -3 947379 . A T,C 100 PASS MQ=1;CNT=5,8;DP_HIST=2.5 GT:GQ:AD:DP 1/1:60:0,7,0:12 0/2:60:7,0,10:17 1/2:60:0,7,8:16 -3 973348 . G A 100 PASS MQ=1;CNT=12;DP_HIST=17.5 GT:GQ:TP 0/0:60:23 0/0:60 0/1:60 -3 973349 . G A,T 100 PASS MQ=1;CNT=7,3;DP_HIST=32.5 GT:GQ:TP 0/1:60:23 1/2:60 0/2:60 +3 947379 . A T,C 100 PASS MQ=1;CNT=5,8;DP_HIST=12,43,22 GT:GQ:AD:DP 1/1:60:0,7,0:12 0/2:60:7,0,10:17 1/2:60:0,7,8:16 +3 973348 . G A 100 PASS MQ=1;CNT=12;DP_HIST=17,19 GT:GQ 0/0:60 0/0:60 0/1:60 +3 973349 . G A,T 100 PASS MQ=1;CNT=7,3;DP_HIST=54,33,22 GT:GQ 0/1:60 1/2:60 0/2:60 diff --git a/vcf_parser/parser.py b/vcf_parser/parser.py index 69e08e9..34c01d0 100755 --- a/vcf_parser/parser.py +++ b/vcf_parser/parser.py @@ -569,10 +569,16 @@ def make_splitted_variants(self, variant_dict): except KeyError: print(""""\nOne of the FILTER lines is missing in vcf header: %s \n""" % info, file=sys.stderr) - raise + raise + if info == 'CSQ': + try: + vep_dict[alternative] = variant_dict['vep_info'][alternative] + info_dict['CSQ'] = [self.build_new_vep_string(variant_dict['vep_info'][alternative])] + except KeyError: + pass # If there if one value per allele we need to split it in # the proper way - if number_of_values == 'A': + elif number_of_values == 'A': try: # When we split the alleles we only want to annotate with the correct number info_dict[info] = [variant_dict['info_dict'][info][alternative_number]] @@ -580,12 +586,17 @@ def make_splitted_variants(self, variant_dict): # If there is only one annotation we choose that one info_dict[info] = [variant_dict['info_dict'][info][0]] # Choose the right vep info from the old variant - elif info == 'CSQ': + elif number_of_values == 'R': + reference_value = variant_dict['info_dict'][info][0] + new_info = [reference_value] try: - vep_dict[alternative] = variant_dict['vep_info'][alternative] - info_dict['CSQ'] = [self.build_new_vep_string(variant_dict['vep_info'][alternative])] - except KeyError: + # When we split the alleles we only want to annotate with the correct number + new_info.append(variant_dict['info_dict'][info][alternative_number + 1]) + info_dict[info] = new_info + except IndexError: + # If annotation is missing we keep the original annotation pass + else: info_dict[info] = variant_dict['info_dict'][info]