diff --git a/ksrates/fc_configfile.py b/ksrates/fc_configfile.py index 053c302..80ff9ec 100644 --- a/ksrates/fc_configfile.py +++ b/ksrates/fc_configfile.py @@ -2,6 +2,7 @@ import os from ete3 import Tree import ksrates.fc_check_input as fcCheck +from matplotlib.colors import is_color_like import logging import sys from ast import literal_eval @@ -106,10 +107,10 @@ def get_species(self): """ species = self.config.get("SPECIES", "focal_species") if species == "": - logging.error("Parameter focal_species in configuration file is empty, please fill in") + logging.error('Field "focal_species" in configuration file is empty, please fill in') sys.exit(1) - elif len(species.split()) != 1: - logging.error(f"Parameter focal_species [{species}] should be a short name and must not contain any spaces, please change accordingly") + elif len(species.split()) != 1 or "_" in species: + logging.error(f'Field "focal_species" [{species}] should be a short name and must not contain any spaces or underscores, please change accordingly') sys.exit(1) return species @@ -123,13 +124,13 @@ def get_newick_tree(self): tree_string = self.config.get("SPECIES", "newick_tree") if not (tree_string.endswith(';')): tree_string += ";" - if tree_string == "();": - logging.error("Parameter newick_tree in configuration file is empty, please fill in") + if tree_string == "();" or tree_string == ";": + logging.error('Field "newick_tree" in configuration file is empty, please fill in') sys.exit(1) try: tree = Tree(tree_string) except Exception: - logging.error("Unrecognized format for parameter newick_tree in configuration file (for example, parentheses do not match)") + logging.error('Unrecognized format for field "newick_tree" in configuration file (for example, parentheses do not match)') sys.exit(1) # Check if species' informal names contain illegal characters (underscore or spaces) @@ -157,9 +158,9 @@ def check_complete_latin_names_dict(self, dictionary): missing_species = list(set.difference(set(all_leaves), set(dictionary.keys()))) if len(missing_species) != 0: if len(missing_species) == 1: - logging.error(f"The following species is missing from the [latin_names] configuration file field:") + logging.error(f'The following species is missing from the "latin_names" configuration file field:') else: - logging.error(f"The following species are missing from the [latin_names] configuration file field:") + logging.error(f'The following species are missing from the "latin_names" configuration file field:') for missing_name in missing_species: logging.error(f" - {missing_name}") @@ -177,7 +178,7 @@ def get_latin_names(self): if latin_names != "": latin_names_dict = self._get_clean_dict_stringent(latin_names, "latin_names") else: - logging.error("Configuration file field [latin_names] is empty, please fill in and restart the analysis.") + logging.error('Configuration file field "latin_names" is empty, please fill in and restart the analysis.') logging.error("Exiting.") sys.exit(1) # Check if latin_names contains all the species present in the Newick tree; if not, exits @@ -458,10 +459,27 @@ def get_color_list(self): is assigned to the second internal node encountered along this path, and so on. There must be at least as many colors as the number of divergence nodes. + Checks if there are colors whose name is not recognized by matplotlib, e.g. misspelled. + :return colors: list of colors """ color_list_string = self.config.get("PARAMETERS", "divergence_colors") colors = [c.strip() for c in color_list_string.split(',')] + if len(colors) == 1 and colors[0] == "": + logging.error('Field "divergence_colors" in configuration file is empty, please fill in') + logging.error("Exiting.") + sys.exit(1) + + # Check if color names are recognized by matplotlib + faulty_color_names = [] + for color in colors: + if not is_color_like(color): + faulty_color_names.append(color) + if len(faulty_color_names) != 0: + logging.error('Field "divergence_colors" in configuration file contains color names not recognized by Matplotlib, please adjust the following:') + for color in faulty_color_names: + logging.error(f"- {color}") + sys.exit(1) return colors @@ -689,12 +707,12 @@ def get_max_mixture_model_components(self): logging.warning(f'Unrecognized field in expert configuration file [max_mixture_model_components = {max_comp}]. Please choose a positive integer >= 2. Default choice will be applied [5]') max_comp = 5 elif max_comp == 1: - logging.warning(f"Parameter [max_mixture_model_components] has been changed from {max_comp} to the minimum required, 2.") + logging.warning(f'Field "max_mixture_model_components" has been changed from {max_comp} to the minimum required, 2') max_comp = 2 # exponential + buffer elif max_comp <= 3: - logging.warning(f"A low number of mixture model components [max_mixture_model_components = {max_comp}] can produce poor fitting.") + logging.warning(f"A low number of mixture model components [max_mixture_model_components = {max_comp}] can produce poor fitting") elif max_comp >= 7: - logging.warning(f"A high number of mixture model components [max_mixture_model_components = {max_comp}] increases overfitting risk.") + logging.warning(f"A high number of mixture model components [max_mixture_model_components = {max_comp}] increases overfitting risk") except Exception: logging.warning(f'Missing field in expert configuration file [max_mixture_model_components]. Please choose a positive integer. Default choice will be applied [5]') max_comp = 5 diff --git a/ksrates/setup_correction.py b/ksrates/setup_correction.py index 0455638..3651ae5 100644 --- a/ksrates/setup_correction.py +++ b/ksrates/setup_correction.py @@ -22,6 +22,7 @@ def setup_correction(config_file, nextflow_flag): fcTree.check_integrity_newick_tree(original_tree) tree = fcTree.reorder_tree_leaves(original_tree, species_of_interest) # focal species is the top leaf latin_names = config.get_latin_names() + divergence_colors = config.get_color_list() paranome = config.get_paranome() colinearity = config.get_colinearity() @@ -104,14 +105,24 @@ def setup_correction(config_file, nextflow_flag): logging.error(f"Please add at least one outgroup species or change the focal species.") sys.exit(1) + # Obtaining the numeric labels for internal nodes relevant in the species analysis + fcTree.labeling_internal_nodes(species_of_interest_node) + # If the amount of colors provided for the divergence lines in the config file + # is insufficient for the number of divergence nodes in the tree, exit + num_required_colors = sp_history[-2].name + if len(divergence_colors) < num_required_colors: + logging.error("") + logging.error(f'Configuration file field "divergence_colors" is missing {num_required_colors - len(divergence_colors)} color(s) ' + + f"out of {num_required_colors} required for the analysis on focal species [{species_of_interest}]") + logging.error("Please add the missing color(s) and rerun the analysis") + logging.error("Exiting.") + sys.exit(1) + trios_array = [] # list of trios outfile_drawing_path = os.path.join("rate_adjustment", f"{species_of_interest}", f"tree_{species_of_interest}.txt") with open(outfile_drawing_path, "w+") as outfile_drawing: outfile_drawing.write(f"Focal species: {species_of_interest}\n\n") - - # Obtaining the numeric labels for internal nodes relevant in the species analysis - fcTree.labeling_internal_nodes(species_of_interest_node) node = 0 while node < len(sp_history)-2: