Merge branch 'feature/dakl_develop' into develop

moonso · Feb 12, 2015 · f7796cd · f7796cd
2 parents 458718c + 49d814a
commit f7796cd
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 11 deletions.
diff --git a/examples/test_vcf.vcf b/examples/test_vcf.vcf
@@ -1,7 +1,12 @@
-##fileformat=VCFv4.1
+##fileformat=VCFv4.2
 ##INFO=<ID=MQ,Number=1,Type=Float,Description="RMS Mapping Quality">
-##INFO=<ID=CNT,Number=A,Type=Integer,Description="Number of times this allele was found in external db">##contig=<ID=1,length=249250621,assembly=b37>
+##INFO=<ID=CNT,Number=A,Type=Integer,Description="Number of times this allele was found in external db">
+##contig=<ID=1,length=249250621,assembly=b37>
 ##INFO=<ID=DP_HIST,Number=R,Type=String,Description="Histogram for DP; Mids: 2.5|7.5|12.5|17.5|22.5|27.5|32.5|37.5|42.5|47.5|52.5|57.5|62.5|67.5|72.5|77.5|82.5|87.5|92.5|97.5">
+##FORMAT=<ID=AD,Number=.,Type=Integer,Description="Allelic depths for the ref and alt alleles in the order listed">
+##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
+##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
+##FORMAT=<ID=GQ,Number=1,Type=String,Description="GenotypeQuality">
 ##reference=file:///humgen/gsa-hpprojects/GATK/bundle/current/b37/human_g1k_v37.fasta
 #CHROM	POS	ID	REF	ALT	QUAL	FILTER	INFO	FORMAT	father	mother	proband
 1	11900	.	A	T	100	PASS	MQ=1	GT:GQ	0/1:60	0/1:60	1/1:60
@@ -12,6 +17,6 @@
 3	879585	.	A	T	100	PASS	MQ=1	GT:GQ	0/1:60	0/0:60	0/1:60
 3	879586	.	A	T	100	PASS	MQ=1	GT:GQ	0/0:60	0/1:60	0/1:60
 3	947378	.	A	T	100	PASS	MQ=1	GT:GQ:AD:DP	0/0:60:5,7:12	0/0:60:4,6:14	0/1:60:7,8:16
-3	947379	.	A	T,C	100	PASS	MQ=1;CNT=5,8;DP_HIST=2.5	GT:GQ:AD:DP	1/1:60:0,7,0:12	0/2:60:7,0,10:17	1/2:60:0,7,8:16
-3	973348	.	G	A	100	PASS	MQ=1;CNT=12;DP_HIST=17.5	GT:GQ:TP	0/0:60:23	0/0:60	0/1:60
-3	973349	.	G	A,T	100	PASS	MQ=1;CNT=7,3;DP_HIST=32.5	GT:GQ:TP	0/1:60:23	1/2:60	0/2:60
+3	947379	.	A	T,C	100	PASS	MQ=1;CNT=5,8;DP_HIST=12,43,22	GT:GQ:AD:DP	1/1:60:0,7,0:12	0/2:60:7,0,10:17	1/2:60:0,7,8:16
+3	973348	.	G	A	100	PASS	MQ=1;CNT=12;DP_HIST=17,19	GT:GQ	0/0:60	0/0:60	 0/1:60
+3	973349	.	G	A,T	100	PASS	MQ=1;CNT=7,3;DP_HIST=54,33,22	GT:GQ	0/1:60	1/2:60	 0/2:60
diff --git a/vcf_parser/parser.py b/vcf_parser/parser.py
@@ -569,23 +569,34 @@ def make_splitted_variants(self, variant_dict):
                     except KeyError:
                         print(""""\nOne of the FILTER lines is missing in vcf 
                                 header: %s \n""" % info, file=sys.stderr)
-                        raise 
+                        raise
+                    if info == 'CSQ':
+                        try:
+                            vep_dict[alternative] = variant_dict['vep_info'][alternative]
+                            info_dict['CSQ'] = [self.build_new_vep_string(variant_dict['vep_info'][alternative])]
+                        except KeyError:
+                            pass
                     # If there if one value per allele we need to split it in
                     # the proper way
-                    if number_of_values == 'A':
+                    elif number_of_values == 'A':
                         try:
                             # When we split the alleles we only want to annotate with the correct number
                             info_dict[info] = [variant_dict['info_dict'][info][alternative_number]]
                         except IndexError:
                             # If there is only one annotation we choose that one
                             info_dict[info] = [variant_dict['info_dict'][info][0]]
                     # Choose the right vep info from the old variant
-                    elif info == 'CSQ':
+                    elif number_of_values == 'R':
+                        reference_value = variant_dict['info_dict'][info][0]
+                        new_info = [reference_value]
                         try:
-                            vep_dict[alternative] = variant_dict['vep_info'][alternative]
-                            info_dict['CSQ'] = [self.build_new_vep_string(variant_dict['vep_info'][alternative])]
-                        except KeyError:
+                            # When we split the alleles we only want to annotate with the correct number
+                            new_info.append(variant_dict['info_dict'][info][alternative_number + 1])
+                            info_dict[info] = new_info
+                        except IndexError:
+                            # If annotation is missing we keep the original annotation
                             pass
+
                     else:
                         info_dict[info] = variant_dict['info_dict'][info]