Skip to content

Commit

Permalink
Improve checkpoint for divergence colors provided in configuration fi…
Browse files Browse the repository at this point in the history
…le (#36)

- Checks if "divergence_color" list in the
  configuration file is empty and
  if it contains at least as many colors
  as the number of internal nodes in the lineage
  that leads to the focal species (backbone of
  the tree plotted in the PDF or as ASCII).

- Warns and exit if colors in configuration
  file field "divergence_colors" are found
  to be not compatible with matplotlib, the
  package that is going to use them later
  on to generate the Ks plot.
  E.g. misspelled colors
  • Loading branch information
Cecilia-Sensalari authored Mar 23, 2022
1 parent bc9ea47 commit 621191f
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 15 deletions.
42 changes: 30 additions & 12 deletions ksrates/fc_configfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os
from ete3 import Tree
import ksrates.fc_check_input as fcCheck
from matplotlib.colors import is_color_like
import logging
import sys
from ast import literal_eval
Expand Down Expand Up @@ -106,10 +107,10 @@ def get_species(self):
"""
species = self.config.get("SPECIES", "focal_species")
if species == "":
logging.error("Parameter focal_species in configuration file is empty, please fill in")
logging.error('Field "focal_species" in configuration file is empty, please fill in')
sys.exit(1)
elif len(species.split()) != 1:
logging.error(f"Parameter focal_species [{species}] should be a short name and must not contain any spaces, please change accordingly")
elif len(species.split()) != 1 or "_" in species:
logging.error(f'Field "focal_species" [{species}] should be a short name and must not contain any spaces or underscores, please change accordingly')
sys.exit(1)
return species

Expand All @@ -123,13 +124,13 @@ def get_newick_tree(self):
tree_string = self.config.get("SPECIES", "newick_tree")
if not (tree_string.endswith(';')):
tree_string += ";"
if tree_string == "();":
logging.error("Parameter newick_tree in configuration file is empty, please fill in")
if tree_string == "();" or tree_string == ";":
logging.error('Field "newick_tree" in configuration file is empty, please fill in')
sys.exit(1)
try:
tree = Tree(tree_string)
except Exception:
logging.error("Unrecognized format for parameter newick_tree in configuration file (for example, parentheses do not match)")
logging.error('Unrecognized format for field "newick_tree" in configuration file (for example, parentheses do not match)')
sys.exit(1)

# Check if species' informal names contain illegal characters (underscore or spaces)
Expand Down Expand Up @@ -157,9 +158,9 @@ def check_complete_latin_names_dict(self, dictionary):
missing_species = list(set.difference(set(all_leaves), set(dictionary.keys())))
if len(missing_species) != 0:
if len(missing_species) == 1:
logging.error(f"The following species is missing from the [latin_names] configuration file field:")
logging.error(f'The following species is missing from the "latin_names" configuration file field:')
else:
logging.error(f"The following species are missing from the [latin_names] configuration file field:")
logging.error(f'The following species are missing from the "latin_names" configuration file field:')
for missing_name in missing_species:
logging.error(f" - {missing_name}")

Expand All @@ -177,7 +178,7 @@ def get_latin_names(self):
if latin_names != "":
latin_names_dict = self._get_clean_dict_stringent(latin_names, "latin_names")
else:
logging.error("Configuration file field [latin_names] is empty, please fill in and restart the analysis.")
logging.error('Configuration file field "latin_names" is empty, please fill in and restart the analysis.')
logging.error("Exiting.")
sys.exit(1)
# Check if latin_names contains all the species present in the Newick tree; if not, exits
Expand Down Expand Up @@ -458,10 +459,27 @@ def get_color_list(self):
is assigned to the second internal node encountered along this path, and so on.
There must be at least as many colors as the number of divergence nodes.
Checks if there are colors whose name is not recognized by matplotlib, e.g. misspelled.
:return colors: list of colors
"""
color_list_string = self.config.get("PARAMETERS", "divergence_colors")
colors = [c.strip() for c in color_list_string.split(',')]
if len(colors) == 1 and colors[0] == "":
logging.error('Field "divergence_colors" in configuration file is empty, please fill in')
logging.error("Exiting.")
sys.exit(1)

# Check if color names are recognized by matplotlib
faulty_color_names = []
for color in colors:
if not is_color_like(color):
faulty_color_names.append(color)
if len(faulty_color_names) != 0:
logging.error('Field "divergence_colors" in configuration file contains color names not recognized by Matplotlib, please adjust the following:')
for color in faulty_color_names:
logging.error(f"- {color}")
sys.exit(1)
return colors


Expand Down Expand Up @@ -689,12 +707,12 @@ def get_max_mixture_model_components(self):
logging.warning(f'Unrecognized field in expert configuration file [max_mixture_model_components = {max_comp}]. Please choose a positive integer >= 2. Default choice will be applied [5]')
max_comp = 5
elif max_comp == 1:
logging.warning(f"Parameter [max_mixture_model_components] has been changed from {max_comp} to the minimum required, 2.")
logging.warning(f'Field "max_mixture_model_components" has been changed from {max_comp} to the minimum required, 2')
max_comp = 2 # exponential + buffer
elif max_comp <= 3:
logging.warning(f"A low number of mixture model components [max_mixture_model_components = {max_comp}] can produce poor fitting.")
logging.warning(f"A low number of mixture model components [max_mixture_model_components = {max_comp}] can produce poor fitting")
elif max_comp >= 7:
logging.warning(f"A high number of mixture model components [max_mixture_model_components = {max_comp}] increases overfitting risk.")
logging.warning(f"A high number of mixture model components [max_mixture_model_components = {max_comp}] increases overfitting risk")
except Exception:
logging.warning(f'Missing field in expert configuration file [max_mixture_model_components]. Please choose a positive integer. Default choice will be applied [5]')
max_comp = 5
Expand Down
17 changes: 14 additions & 3 deletions ksrates/setup_correction.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def setup_correction(config_file, nextflow_flag):
fcTree.check_integrity_newick_tree(original_tree)
tree = fcTree.reorder_tree_leaves(original_tree, species_of_interest) # focal species is the top leaf
latin_names = config.get_latin_names()
divergence_colors = config.get_color_list()
paranome = config.get_paranome()
colinearity = config.get_colinearity()

Expand Down Expand Up @@ -104,14 +105,24 @@ def setup_correction(config_file, nextflow_flag):
logging.error(f"Please add at least one outgroup species or change the focal species.")
sys.exit(1)

# Obtaining the numeric labels for internal nodes relevant in the species analysis
fcTree.labeling_internal_nodes(species_of_interest_node)
# If the amount of colors provided for the divergence lines in the config file
# is insufficient for the number of divergence nodes in the tree, exit
num_required_colors = sp_history[-2].name
if len(divergence_colors) < num_required_colors:
logging.error("")
logging.error(f'Configuration file field "divergence_colors" is missing {num_required_colors - len(divergence_colors)} color(s) ' +
f"out of {num_required_colors} required for the analysis on focal species [{species_of_interest}]")
logging.error("Please add the missing color(s) and rerun the analysis")
logging.error("Exiting.")
sys.exit(1)

trios_array = [] # list of trios
outfile_drawing_path = os.path.join("rate_adjustment", f"{species_of_interest}",
f"tree_{species_of_interest}.txt")
with open(outfile_drawing_path, "w+") as outfile_drawing:
outfile_drawing.write(f"Focal species: {species_of_interest}\n\n")

# Obtaining the numeric labels for internal nodes relevant in the species analysis
fcTree.labeling_internal_nodes(species_of_interest_node)

node = 0
while node < len(sp_history)-2:
Expand Down

0 comments on commit 621191f

Please sign in to comment.