Skip to content

Commit

Permalink
Add methods to fix mate info on non-primaries and templates
Browse files Browse the repository at this point in the history
  • Loading branch information
clintval committed Dec 27, 2024
1 parent 24ec17c commit 32024b9
Showing 1 changed file with 101 additions and 0 deletions.
101 changes: 101 additions & 0 deletions fgpyo/sam/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,78 @@ def set_mate_info(
r2.is_proper_pair = proper_pair


def set_mate_info_on_secondary(
primary: AlignedSegment,
secondary: AlignedSegment,
is_proper_pair: Callable[[AlignedSegment, AlignedSegment], bool] = is_proper_pair,
) -> None:
"""Set mate info on a secondary alignment to the next read ordinal's primary alignment.
Args:
primary: The primary alignment of the secondary's mate.
secondary: The secondary alignment to set mate information upon.
is_proper_pair: A function that takes the two alignments and determines proper pair status.
Raises:
ValueError: If primary and secondary are of the same read ordinal.
ValueError: If the primary is marked as either secondary or supplementary.
ValueError: If the secondary is not marked as secondary.
"""
if primary.is_read1 == secondary.is_read1 or primary.is_secondary or primary.is_supplementary:
raise ValueError("Secondary mate info must be set from a primary of the next ordinal!")

Check warning on line 897 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L897

Added line #L897 was not covered by tests
if not secondary.is_secondary:
raise ValueError("Cannot set mate info on an alignment not marked as secondary!")

Check warning on line 899 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L899

Added line #L899 was not covered by tests
if primary.query_name != secondary.query_name:
raise ValueError("Cannot set mate info on alignments with different query names!")

Check warning on line 901 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L901

Added line #L901 was not covered by tests

secondary.next_reference_id = primary.reference_id
secondary.next_reference_name = primary.reference_name
secondary.next_reference_start = primary.reference_start
secondary.mate_is_forward = primary.is_forward
secondary.mate_is_mapped = primary.is_mapped
secondary.set_tag("MC", primary.cigarstring)
secondary.set_tag("MQ", primary.mapping_quality)
secondary.set_tag("ms", sum_of_base_qualities(primary))

Check warning on line 910 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L903-L910

Added lines #L903 - L910 were not covered by tests

# NB: calculate isize and proper pair as if this secondary alignment was the primary alignment.
secondary.is_proper_pair = is_proper_pair(primary, secondary)
secondary.template_length = isize(primary, secondary)

Check warning on line 914 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L913-L914

Added lines #L913 - L914 were not covered by tests


def set_mate_info_on_supplementary(primary: AlignedSegment, supp: AlignedSegment) -> None:
"""Set mate info on a supplementary alignment to the next read ordinal's primary alignment.
Args:
primary: The primary alignment of the supplementary's mate.
supp: The supplementary alignment to set mate information upon.
Raises:
ValueError: If primary and secondary are of the same read ordinal.
ValueError: If the primary is marked as either secondary or supplementary.
ValueError: If the secondary is not marked as secondary.
"""
if primary.is_read1 == supp.is_read1 or primary.is_secondary or primary.is_supplementary:
raise ValueError("Supplementary mate info must be set from a primary of the next ordinal!")

Check warning on line 930 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L930

Added line #L930 was not covered by tests
if not supp.is_supplementary:
raise ValueError("Cannot set mate info on an alignment not marked as supplementary!")

Check warning on line 932 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L932

Added line #L932 was not covered by tests
if primary.query_name != supp.query_name:
raise ValueError("Cannot set mate info on alignments with different query names!")

Check warning on line 934 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L934

Added line #L934 was not covered by tests

supp.next_reference_id = primary.reference_id
supp.next_reference_name = primary.reference_name
supp.next_reference_start = primary.reference_start
supp.mate_is_forward = primary.is_forward
supp.mate_is_mapped = primary.is_mapped
supp.set_tag("MC", primary.cigarstring)
supp.set_tag("MQ", primary.mapping_quality)
supp.set_tag("ms", sum_of_base_qualities(primary))

Check warning on line 943 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L936-L943

Added lines #L936 - L943 were not covered by tests

# NB: for a non-secondary supplemental alignment, set the following to the same as the primary.
if not supp.is_secondary:
supp.is_proper_pair = primary.is_proper_pair
supp.template_length = -primary.template_length

Check warning on line 948 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L947-L948

Added lines #L947 - L948 were not covered by tests


def set_as_pairs(
r1: AlignedSegment,
r2: AlignedSegment,
Expand Down Expand Up @@ -1157,6 +1229,10 @@ def all_recs(self) -> Iterator[AlignedSegment]:
for rec in recs:
yield rec

def set_mate_info(self) -> "Template":
"""Reset all mate information on every record in a template."""
return set_mate_info_for_template(self)

Check warning on line 1234 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L1234

Added line #L1234 was not covered by tests

def write_to(
self,
writer: SamFile,
Expand Down Expand Up @@ -1215,6 +1291,31 @@ def __next__(self) -> Template:
return Template.build(recs, validate=False)


def set_mate_info_for_template(
template: Template,
is_proper_pair: Callable[[AlignedSegment, AlignedSegment], bool] = is_proper_pair,
) -> Template:
"""Reset all mate information on every record in a template.
Args:
template: The template of alignments to reset all mate information on.
is_proper_pair: A function that takes two alignments and determines proper pair status.
"""
if template.r1 is not None and template.r2 is not None:
set_mate_info(template.r1, template.r2, is_proper_pair=is_proper_pair)

Check warning on line 1305 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L1305

Added line #L1305 was not covered by tests
if template.r1 is not None:
for rec in template.r2_secondaries:
set_mate_info_on_secondary(template.r1, rec, is_proper_pair=is_proper_pair)

Check warning on line 1308 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L1308

Added line #L1308 was not covered by tests
for rec in template.r2_supplementals:
set_mate_info_on_supplementary(template.r1, rec)

Check warning on line 1310 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L1310

Added line #L1310 was not covered by tests
if template.r2 is not None:
for rec in template.r1_secondaries:
set_mate_info_on_secondary(template.r2, rec, is_proper_pair=is_proper_pair)

Check warning on line 1313 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L1313

Added line #L1313 was not covered by tests
for rec in template.r1_supplementals:
set_mate_info_on_supplementary(template.r2, rec)
return template

Check warning on line 1316 in fgpyo/sam/__init__.py

View check run for this annotation

Codecov / codecov/patch

fgpyo/sam/__init__.py#L1315-L1316

Added lines #L1315 - L1316 were not covered by tests


class SamOrder(enum.Enum):
"""
Enumerations of possible sort orders for a SAM file.
Expand Down

0 comments on commit 32024b9

Please sign in to comment.