Skip to content

Commit

Permalink
Refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
jpjarnoux committed Apr 10, 2024
1 parent ee283cb commit 922073d
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 12 deletions.
4 changes: 2 additions & 2 deletions ppanggolin/annotate/synta.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,9 +301,9 @@ def get_dna_sequence(contig_seq: str, gene: Union[Gene, RNA]) -> str:
:return: str
"""
if gene.strand == "+":
return contig_seq[gene.start - 1:gene.stop]
return contig_seq[gene.start - 1: gene.stop]
elif gene.strand == "-":
return reverse_complement(contig_seq[gene.start - 1:gene.stop])
return reverse_complement(contig_seq[gene.start - 1: gene.stop])


def annotate_organism(org_name: str, file_name: Path, circular_contigs: List[str], tmpdir: str,
Expand Down
21 changes: 11 additions & 10 deletions ppanggolin/formats/writeMSA.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from tqdm import tqdm

# local libraries
from ppanggolin.genome import Gene
from ppanggolin.geneFamily import GeneFamily
from ppanggolin.pangenome import Pangenome
from ppanggolin.utils import mk_outdir, restricted_float
Expand Down Expand Up @@ -95,28 +96,28 @@ def get_families_to_write(pangenome: Pangenome, partition_filter: str = "core",
return families


def translate(seq: str, code: Dict[str, Dict[str, str]]) -> str:
def translate(gene: Gene, code: Dict[str, Dict[str, str]]) -> str:
"""translates the given dna sequence with the given translation table
:param seq: given dna sequence
:param gene: given gene
:param code: translation table corresponding to genetic code to use
:return: protein sequence
"""
# code: https://www.bioinformatics.org/sms/iupac.html
start_table = code["start_table"]
table = code["trans_table"]

if len(seq) % 3 == 0:
protein = start_table[seq[0: 3]]
for i in range(3, len(seq), 3):
codon = seq[i: i + 3]
if len(gene.dna) % 3 == 0:
protein = start_table[gene.dna[0: 3]]
for i in range(3, len(gene.dna), 3):
codon = gene.dna[i: i + 3]
try:
protein += table[codon]
except KeyError: # codon was not planned for. Probably can't determine it.
protein += 'X' # X is for unknown
else:
raise IndexError("Given sequence length modulo 3 was different than 0, which is unexpected.")
logging.getLogger("PPANGGOLIN").error(f"Found gene length is {len(gene.dna)}")
raise IndexError(f"Gene {gene.ID} sequence length modulo 3 was different than 0, which is unexpected.")
return protein


Expand Down Expand Up @@ -150,7 +151,7 @@ def write_fasta_families(family: GeneFamily, tmpdir: tempfile.TemporaryDirectory
if source == "dna":
f_obj.write(gene.dna + '\n')
elif source == "protein":
f_obj.write(translate(gene.dna, code_table) + "\n")
f_obj.write(translate(gene, code_table) + "\n")
else:
raise AssertionError("Unknown sequence source given (expected 'dna' or 'protein')")
f_obj.flush()
Expand Down Expand Up @@ -229,7 +230,7 @@ def write_whole_genome_msa(pangenome: Pangenome, families: set, phylo_name: Path
"""

# sort familes by ID, so the gene order is consistent
families = sorted(families, key = lambda fam: fam.ID)
families = sorted(families, key=lambda f: f.ID)

phylo_dict = {}
for org in pangenome.organisms:
Expand Down

0 comments on commit 922073d

Please sign in to comment.