diff --git a/doc/source/configuration.rst b/doc/source/configuration.rst index 0d89402..5806131 100644 --- a/doc/source/configuration.rst +++ b/doc/source/configuration.rst @@ -201,7 +201,17 @@ The Nextflow configuration file is used to configure various settings for the *k Expert configuration file ========================= -This is an optional configuration file that contains several \"expert\" parameters for fine-tuning the analysis or for development/debug purposes. The file has to be named `config_expert.txt` and is then automatically detected when launching *ksrates*. The following can be used as a template:: +This is an optional configuration file that contains several \"expert\" parameters for fine-tuning the analysis or for development/debug purposes. The file can be provided in the command line through the ``--expert`` option. However, when named with default name ``config_expert.txt`` and placed in the launching directory, the file is automatically detected without needing the option in the command line. + +Syntax for the Nextflow pipeline:: + + nextflow run VIB-PSB/ksrates --config config_elaeis.txt --expert path/to/my_expert_config.txt + +Syntax for single `ksrates` commands:: + + ksrates init config_elaeis.txt --expert path/to/my_expert_config.txt + +The following can be used as a template:: [EXPERT PARAMETERS] diff --git a/ksrates/cluster_anchor_ks.py b/ksrates/cluster_anchor_ks.py index 9a6eff3..645a6d8 100644 --- a/ksrates/cluster_anchor_ks.py +++ b/ksrates/cluster_anchor_ks.py @@ -16,8 +16,8 @@ from ksrates.fc_cluster_anchors import subfolder from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE -def cluster_anchor_ks(config_file, correction_table_file, path_anchorpoints_txt, path_multiplicons_txt, path_segments_txt, path_list_elements_txt, path_ks_anchor_file, path_multiplicon_pair_txt): - config = fcConf.Configuration(config_file) +def cluster_anchor_ks(config_file, expert_config_file, correction_table_file, path_anchorpoints_txt, path_multiplicons_txt, path_segments_txt, path_list_elements_txt, path_ks_anchor_file, path_multiplicon_pair_txt): + config = fcConf.Configuration(config_file, expert_config_file) init_logging(f"Clustering anchorpoints Ks values to reconstruct recent WGD events", config.get_logging_level()) logging.info("Loading parameters and input files") diff --git a/ksrates/compute_peaks.py b/ksrates/compute_peaks.py index 92cc53d..85987b9 100644 --- a/ksrates/compute_peaks.py +++ b/ksrates/compute_peaks.py @@ -10,9 +10,9 @@ from ksrates.utils import init_logging -def compute_peaks(config_file, ortholog_pairs_file): +def compute_peaks(config_file, expert_config_file, ortholog_pairs_file): # INPUT - config = fcConf.Configuration(config_file) + config = fcConf.Configuration(config_file, expert_config_file) init_logging("Computing ortholog distribution peaks with related error", config.get_logging_level()) logging.info("Loading parameters and input files") diff --git a/ksrates/correct.py b/ksrates/correct.py index 992cd91..b2b7af0 100644 --- a/ksrates/correct.py +++ b/ksrates/correct.py @@ -10,9 +10,9 @@ from ksrates.utils import init_logging -def correct(config_file, trios_file): +def correct(config_file, expert_config_file, trios_file): # INPUT - config = fcConf.Configuration(config_file) + config = fcConf.Configuration(config_file, expert_config_file) init_logging("Rate-adjustment of ortholog Ks distributions", config.get_logging_level()) logging.info("Loading parameters and input files") diff --git a/ksrates/exp_log_mixture.py b/ksrates/exp_log_mixture.py index 8a48c58..83c33ee 100644 --- a/ksrates/exp_log_mixture.py +++ b/ksrates/exp_log_mixture.py @@ -15,9 +15,9 @@ from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE -def exp_log_mixture(config_file, paralog_tsv_file, correction_table_file): +def exp_log_mixture(config_file, expert_config_file, paralog_tsv_file, correction_table_file): # INPUT - config = fcConf.Configuration(config_file) + config = fcConf.Configuration(config_file, expert_config_file) init_logging("Exponential-Lognormal mixture model on Ks paranome", config.get_logging_level()) logging.info("Loading parameters and input files") diff --git a/ksrates/fc_configfile.py b/ksrates/fc_configfile.py index 80ff9ec..c4d6031 100644 --- a/ksrates/fc_configfile.py +++ b/ksrates/fc_configfile.py @@ -9,7 +9,7 @@ class Configuration: - def __init__(self, config_path): + def __init__(self, config_path, expert_config_file): """ Initializes the configuration file and the expert configuration file. This latter is always named "config_expert.txt", the code looks for it @@ -19,13 +19,31 @@ def __init__(self, config_path): # Configuration file self.config = configparser.ConfigParser() self.config.read(config_path) + # Expert configuration file - if os.path.exists("config_expert.txt"): - self.expert_config = configparser.ConfigParser() - self.expert_config.read("config_expert.txt") + self.expert_config = configparser.ConfigParser() + # If there is no user-defined expert config file given through "--expert" in the command line, + # variable "expert_config_file" was set to an empty string in the CLI code block + if expert_config_file == "": + # If there is in the launching folder an expert config file called with the default name "config_expert.txt", fallback to this latter + if os.path.exists("config_expert.txt"): + self.expert_config.read("config_expert.txt") + else: + # Else set the variable to None (scripts will use default expert parameters) + self.expert_config = None + # Else if there is a user-defined expert config file given through "--expert", + # variable expert_config_file has already been set to such given filename (so, it's not an empty string) else: - self.expert_config = None - + # If the user-defined file exists, read it + if os.path.exists(expert_config_file): + self.expert_config.read(expert_config_file) + else: + # Else if the user-defined file doesn't exists (e.g. misspelled), print an error and exit + # (This case is actually already caught by the CLI option definition of "--expert") + logging.error(f"User-defined expert configuration file {expert_config_file} not found:") + logging.error("please check the input path after the '--expert' parameter in the command line and rerun the analysis") + sys.exit(1) + def _get_clean_dict(self, dict_like_string, parameter): """This method reads a dictionary-like field from the configuration file \\ diff --git a/ksrates/lognormal_mixture.py b/ksrates/lognormal_mixture.py index 53405f0..9123220 100644 --- a/ksrates/lognormal_mixture.py +++ b/ksrates/lognormal_mixture.py @@ -14,9 +14,9 @@ from ksrates.fc_cluster_anchors import subfolder from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE -def lognormal_mixture(config_file, paralog_tsv_file, anchors_ks_tsv_file, correction_table_file): +def lognormal_mixture(config_file, expert_config_file, paralog_tsv_file, anchors_ks_tsv_file, correction_table_file): # INPUT - config = fcConf.Configuration(config_file) + config = fcConf.Configuration(config_file, expert_config_file) init_logging(f"Lognormal mixture model on Ks distribution", config.get_logging_level()) logging.info("Loading parameters and input files") diff --git a/ksrates/paralogs_analyses.py b/ksrates/paralogs_analyses.py index b8900be..13acfcd 100644 --- a/ksrates/paralogs_analyses.py +++ b/ksrates/paralogs_analyses.py @@ -7,9 +7,9 @@ from ksrates.lognormal_mixture import lognormal_mixture import ksrates.fc_configfile as fcConf -def paralogs_analyses_methods(config_file, paranome_table, anchors_table, correction_table, anchorpoints, multiplicons, segments, list_elements, multiplicon_pairs): +def paralogs_analyses_methods(config_file, expert_config_file, paranome_table, anchors_table, correction_table, anchorpoints, multiplicons, segments, list_elements, multiplicon_pairs): # INPUT - config = fcConf.Configuration(config_file) + config = fcConf.Configuration(config_file, expert_config_file) logging.basicConfig(format='%(levelname)s\t%(message)s', level=config.get_logging_level(), stream=sys.stdout) paranome = config.get_paranome() @@ -18,27 +18,27 @@ def paralogs_analyses_methods(config_file, paranome_table, anchors_table, correc if paranome and not colinearity: # Only exp-log mixture model by default - exp_log_mixture(config_file, paranome_table, correction_table) + exp_log_mixture(config_file, expert_config_file, paranome_table, correction_table) if extra_paralogs_analyses_methods: logging.info(f"\n") # Lognormal mixture model on paranome - lognormal_mixture(config_file, paranome_table, anchors_table, correction_table) + lognormal_mixture(config_file, expert_config_file, paranome_table, anchors_table, correction_table) if colinearity and not paranome: # Only anchor clustering by default - cluster_anchor_ks(config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs) + cluster_anchor_ks(config_file, expert_config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs) if extra_paralogs_analyses_methods: logging.info(f"\n") # Lognormal mixture model on anchors - lognormal_mixture(config_file, paranome_table, anchors_table, correction_table) + lognormal_mixture(config_file, expert_config_file, paranome_table, anchors_table, correction_table) if colinearity and paranome: # Only anchor clustering by default - cluster_anchor_ks(config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs) + cluster_anchor_ks(config_file, expert_config_file, correction_table, anchorpoints, multiplicons, segments, list_elements, anchors_table, multiplicon_pairs) if extra_paralogs_analyses_methods: logging.info(f"\n") # Exp-log mixture model on paranome - exp_log_mixture(config_file, paranome_table, correction_table) + exp_log_mixture(config_file, expert_config_file, paranome_table, correction_table) logging.info(f"\n") # Lognormal mixture model on both - lognormal_mixture(config_file, paranome_table, anchors_table, correction_table) + lognormal_mixture(config_file, expert_config_file, paranome_table, anchors_table, correction_table) diff --git a/ksrates/plot_orthologs.py b/ksrates/plot_orthologs.py index a3059b9..dbd9f22 100644 --- a/ksrates/plot_orthologs.py +++ b/ksrates/plot_orthologs.py @@ -15,9 +15,9 @@ matplotlib.use('Agg') -def plot_orthologs_distr(config_file, trios_file): +def plot_orthologs_distr(config_file, expert_config_file, trios_file): # INPUT - config = fcConf.Configuration(config_file) + config = fcConf.Configuration(config_file, expert_config_file) init_logging("Plotting ortholog distributions for all ortholog trios", config.get_logging_level()) logging.info("Loading parameters and input files") diff --git a/ksrates/plot_paralogs.py b/ksrates/plot_paralogs.py index d6c8a82..3b325f6 100755 --- a/ksrates/plot_paralogs.py +++ b/ksrates/plot_paralogs.py @@ -9,9 +9,9 @@ import ksrates.fc_configfile as fcConf from ksrates.fc_rrt_correction import _ADJUSTMENT_TABLE -def plot_paralogs_distr(config_file, correction_table_file, paralog_tsv_file, anchors_ks_tsv_file): +def plot_paralogs_distr(config_file, expert_config_file, correction_table_file, paralog_tsv_file, anchors_ks_tsv_file): # INPUT - config = fcConf.Configuration(config_file) + config = fcConf.Configuration(config_file, expert_config_file) init_logging("Generating mixed paralog and ortholog distributions", config.get_logging_level()) logging.info("Loading parameters and input files") diff --git a/ksrates/plot_tree.py b/ksrates/plot_tree.py index e5d5989..3820a4d 100644 --- a/ksrates/plot_tree.py +++ b/ksrates/plot_tree.py @@ -9,9 +9,9 @@ import pandas -def plot_tree_rates(config_file, correction_table_file, nextflow_flag): +def plot_tree_rates(config_file, expert_config_file, correction_table_file, nextflow_flag): # INPUT - config = fcConf.Configuration(config_file) + config = fcConf.Configuration(config_file, expert_config_file) init_logging("Generating PDF of input tree with branch length equal to Ks distances", config.get_logging_level()) logging.info("Loading parameters and input files") diff --git a/ksrates/setup_correction.py b/ksrates/setup_correction.py index 3651ae5..451c9ea 100644 --- a/ksrates/setup_correction.py +++ b/ksrates/setup_correction.py @@ -10,8 +10,8 @@ from ksrates.utils import init_logging -def setup_correction(config_file, nextflow_flag): - config = fcConf.Configuration(config_file) +def setup_correction(config_file, expert_config_file, nextflow_flag): + config = fcConf.Configuration(config_file, expert_config_file) init_logging("Setting up the analysis from configuration file", config.get_logging_level()) logging.info("Loading parameters and input files") diff --git a/ksrates/wgd_orthologs.py b/ksrates/wgd_orthologs.py index 1667404..b857b81 100644 --- a/ksrates/wgd_orthologs.py +++ b/ksrates/wgd_orthologs.py @@ -7,12 +7,12 @@ from ksrates.utils import init_logging -def wgd_orthologs(config_file, species_one, species_two, n_threads): +def wgd_orthologs(config_file, expert_config_file, species_one, species_two, n_threads): # INPUT species_pair = sorted([species_one, species_two], key=str.casefold) species1, species2 = species_pair[0], species_pair[1] # sorted! - config = fcConf.Configuration(config_file) + config = fcConf.Configuration(config_file, expert_config_file) init_logging(f"Ortholog wgd analysis for species pair [{species1} - {species2}]", config.get_logging_level()) # Get parameters and FASTA files from configuration file diff --git a/ksrates/wgd_paralogs.py b/ksrates/wgd_paralogs.py index e7fd889..9c9b82c 100644 --- a/ksrates/wgd_paralogs.py +++ b/ksrates/wgd_paralogs.py @@ -8,10 +8,10 @@ from ksrates.utils import init_logging -def wgd_paralogs(config_file, n_threads): +def wgd_paralogs(config_file, expert_config_file, n_threads): # INPUT # Get parameters and FASTA files from configuration file - config = fcConf.Configuration(config_file) + config = fcConf.Configuration(config_file, expert_config_file) species = config.get_species() init_logging(f"Paralog wgd analysis for species [{species}]", config.get_logging_level()) diff --git a/ksrates_cli.py b/ksrates_cli.py index f4def99..c7f0825 100644 --- a/ksrates_cli.py +++ b/ksrates_cli.py @@ -26,8 +26,9 @@ def generate_config(filename): @cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Initializes rate-adjustment.") @click.argument('config_file', type=click.Path(exists=True)) +@click.option('-e', '--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") @click.option("-n", "--nextflow", is_flag=True, help="Flag for Nextflow pipeline (Default: False)") -def init(config_file, nextflow): +def init(config_file, expert, nextflow): """ Initializes rate-adjustment from CONFIG_FILE. @@ -35,13 +36,18 @@ def init(config_file, nextflow): """ from ksrates.setup_correction import setup_correction click.format_filename(config_file) - setup_correction(config_file, nextflow) + if expert: + click.format_filename(expert) + else: + expert = "" + setup_correction(config_file, expert, nextflow) @cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Performs paralog Ks estimation.") @click.argument('config_file', type=click.Path(exists=True)) @click.option("--n-threads", type=int, default=4, help="Number of threads (default: 4)") -def paralogs_ks(config_file, n_threads): +@click.option('-e', '--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") +def paralogs_ks(config_file, expert, n_threads): """ Performs paralog Ks estimation for the focal species through wgd. @@ -51,15 +57,19 @@ def paralogs_ks(config_file, n_threads): """ from ksrates.wgd_paralogs import wgd_paralogs click.format_filename(config_file) - wgd_paralogs(config_file, n_threads) - + if expert: + click.format_filename(expert) + else: + expert = "" + wgd_paralogs(config_file, expert, n_threads) @cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Performs ortholog Ks estimation.") @click.argument('config_file', type=click.Path(exists=True)) +@click.option('-e', '--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") @click.argument("species1") @click.argument("species2") @click.option("--n-threads", type=int, default=4, help="Number of threads (default: 4)") -def orthologs_ks(config_file, species1, species2, n_threads): +def orthologs_ks(config_file, expert, species1, species2, n_threads): """ Performs ortholog Ks estimation for SPECIES1 and SPECIES2 through wgd. @@ -72,14 +82,18 @@ def orthologs_ks(config_file, species1, species2, n_threads): """ from ksrates.wgd_orthologs import wgd_orthologs click.format_filename(config_file) - wgd_orthologs(config_file, species1, species2, n_threads) - + if expert: + click.format_filename(expert) + else: + expert = "" + wgd_orthologs(config_file, expert, species1, species2, n_threads) @cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Computes ortholog divergence times Ks estimates.") @click.argument('config_file', type=click.Path(exists=True)) +@click.option('-e', '--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") @click.option('--ortholog-pairs', type=click.Path(exists=True), help="User-defined path to file containing the ortholog pairs with missing ortholog Ks peak in database (default: rate_adjustment/species/ortholog_pairs_species.tsv)") -def orthologs_analysis(config_file, ortholog_pairs): +def orthologs_analysis(config_file, expert, ortholog_pairs): """ Computes ortholog Ks distribution mode (or median) and updates the ortholog databases. @@ -89,16 +103,21 @@ def orthologs_analysis(config_file, ortholog_pairs): """ from ksrates.compute_peaks import compute_peaks click.format_filename(config_file) + if expert: + click.format_filename(expert) + else: + expert = "" if ortholog_pairs: click.format_filename(ortholog_pairs) - compute_peaks(config_file, ortholog_pairs) + compute_peaks(config_file, expert, ortholog_pairs) @cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Performs ortholog substitution rate-adjustment.") @click.argument('config_file', type=click.Path(exists=True)) +@click.option('-e', '--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") @click.option("--trios", type=click.Path(exists=True), help="User-defined path to file containing the ortholog trios (default: rate_adjustment/species/orthologs_trios_species.tsv)") -def orthologs_adjustment(config_file, trios): +def orthologs_adjustment(config_file, expert, trios): """ Performs substitution rate-adjustment relative to the focal species. @@ -108,17 +127,22 @@ def orthologs_adjustment(config_file, trios): """ from ksrates.correct import correct click.format_filename(config_file) + if expert: + click.format_filename(expert) + else: + expert = "" if trios: click.format_filename(trios) - correct(config_file, trios) + correct(config_file, expert, trios) @cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Generates rate-adjusted mixed Ks plot.") @click.argument('config_file', type=click.Path(exists=True)) +@click.option('-e', '--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") @click.option("--adjustment-table", type=click.Path(exists=True), help="User-defined path to file containing adjustment results (default: rate_adjustment/species/adjustment_table_species.tsv)") @click.option("--paranome-table", type=click.Path(exists=True), help="User-defined path to file containing paranome Ks (default: paralog_distributions/wgd_species/species.ks.tsv)") @click.option("--anchors-table", type=click.Path(exists=True), help="User-defined path to file containing anchor pair Ks (default: paralog_distribution/wgd_species/species.ks_anchors.tsv)") -def plot_paralogs(config_file, adjustment_table, paranome_table, anchors_table): +def plot_paralogs(config_file, expert, adjustment_table, paranome_table, anchors_table): """ Plots rate-adjusted mixed paralog-ortholog Ks distribution. @@ -128,20 +152,25 @@ def plot_paralogs(config_file, adjustment_table, paranome_table, anchors_table): """ from ksrates.plot_paralogs import plot_paralogs_distr click.format_filename(config_file) + if expert: + click.format_filename(expert) + else: + expert = "" if adjustment_table: click.format_filename(adjustment_table) if paranome_table: click.format_filename(paranome_table) if anchors_table: click.format_filename(anchors_table) - plot_paralogs_distr(config_file, adjustment_table, paranome_table, anchors_table) + plot_paralogs_distr(config_file, expert, adjustment_table, paranome_table, anchors_table) @cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Generates phylogram with Ks-unit branch lengths.") @click.argument('config_file', type=click.Path(exists=True)) +@click.option('-e', '--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") @click.option("--adjustment-table", type=click.Path(exists=True), help="User-defined path to file containing adjustment results (default: rate_adjustment/species/adjustment_table_species.tsv)") @click.option("-n", "--nextflow", is_flag=True, help="Flag for Nextflow pipeline (Default: False)") -def plot_tree(config_file, adjustment_table, nextflow): +def plot_tree(config_file, expert, adjustment_table, nextflow): """ Generates a phylogram of the input dataset with branch lengths set to\ Ks distances estimated from ortholog KS distributions. @@ -152,15 +181,20 @@ def plot_tree(config_file, adjustment_table, nextflow): """ from ksrates.plot_tree import plot_tree_rates click.format_filename(config_file) + if expert: + click.format_filename(expert) + else: + expert = "" if adjustment_table: click.format_filename(adjustment_table) - plot_tree_rates(config_file, adjustment_table, nextflow) + plot_tree_rates(config_file, expert, adjustment_table, nextflow) @cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Generates ortholog Ks distributions plot.") @click.argument('config_file', type=click.Path(exists=True)) +@click.option('-e', '--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") @click.option("--trios", type=click.Path(exists=True), help="User-defined path to file containing the ortholog trios (default: rate_adjustment/species/orthologs_trios_species.tsv)") -def plot_orthologs(config_file, trios): +def plot_orthologs(config_file, expert, trios): """ Plots ortholog Ks distributions used for rate-adjustment. @@ -170,13 +204,18 @@ def plot_orthologs(config_file, trios): """ from ksrates.plot_orthologs import plot_orthologs_distr click.format_filename(config_file) + if expert: + click.format_filename(expert) + else: + expert = "" if trios: click.format_filename(trios) - plot_orthologs_distr(config_file, trios) + plot_orthologs_distr(config_file, expert, trios) @cli.command(context_settings={'help_option_names': ['-h', '--help']}, short_help="Detects WGD signatures in paralog Ks distribution.") @click.argument('config_file', type=click.Path(exists=True)) +@click.option('-e', '--expert', type=click.Path(exists=True), help="User-defined path to the expert configuration file") @click.option("--paranome-table", type=click.Path(exists=True), help="User-defined path to file containing paranome Ks (default: paralog_distributions/wgd_species/species.ks.tsv)") @click.option("--anchors-table", type=click.Path(exists=True), help="User-defined path to file containing anchor pair Ks (default: paralog_distribution/wgd_species/species.ks_anchors.tsv)") @click.option("--adjustment-table", type=click.Path(exists=True), help="User-defined path to file containing adjustment results (default: rate_adjustment/species/adjustment_table_species.tsv)") @@ -185,7 +224,7 @@ def plot_orthologs(config_file, trios): @click.option("--segments", type=click.Path(exists=True), help="User-defined path to i-ADHoRe file segments.txt (default: paralog_distributions/wgd_species/species_i-adhore/segments.txt)") @click.option("--list-elements", type=click.Path(exists=True), help="User-defined path to i-ADHoRe file list_elements.txt (default: paralog_distributions/wgd_species/species_i-adhore/list_elements.txt)") @click.option("--multiplicon-pairs", type=click.Path(exists=True), help="User-defined path to i-ADHoRe file multiplicons_pairs.txt (default: paralog_distributions/wgd_species/species_i-adhore/multiplicons_pairs.txt)") -def paralogs_analyses(config_file, paranome_table, anchors_table, adjustment_table, anchorpoints, multiplicons, segments, list_elements, multiplicon_pairs): +def paralogs_analyses(config_file, expert, paranome_table, anchors_table, adjustment_table, anchorpoints, multiplicons, segments, list_elements, multiplicon_pairs): """ Reconstructs potential WGD peaks in the paralog Ks distributions. @@ -199,6 +238,10 @@ def paralogs_analyses(config_file, paranome_table, anchors_table, adjustment_tab """ from ksrates.paralogs_analyses import paralogs_analyses_methods click.format_filename(config_file) + if expert: + click.format_filename(expert) + else: + expert = "" if paranome_table: click.format_filename(paranome_table) if anchors_table: @@ -215,7 +258,7 @@ def paralogs_analyses(config_file, paranome_table, anchors_table, adjustment_tab click.format_filename(list_elements) if multiplicon_pairs: click.format_filename(multiplicon_pairs) - paralogs_analyses_methods(config_file, paranome_table, anchors_table, adjustment_table, + paralogs_analyses_methods(config_file, expert, paranome_table, anchors_table, adjustment_table, anchorpoints, multiplicons, segments, list_elements, multiplicon_pairs) diff --git a/main.nf b/main.nf index 8e65f82..d5bea6c 100755 --- a/main.nf +++ b/main.nf @@ -14,7 +14,11 @@ version = version_file[0][1] // Parameter to automatically delete or not leftover folders at the end of the pipeline params.preserve = false -// giving the configuration file through the "input" process section +// Giving the configuration file through the "input" process section +// NOTE: +// $params.config is what the user has entered after --config in the command line, for example ./config_files/config_elaeis.txt +// $configfile is the config file absolute path, e.g. /home/config_files/config_elaeis.txt +// $config is the basename of the config file, e.g. config_elaeis.txt configfile = file(params.config) if (configfile.isEmpty()) { newConfigFile = true @@ -22,6 +26,48 @@ if (configfile.isEmpty()) { newConfigFile = false } +// Giving the expert configuration file as input +// NOTE: +// $params.expert is what the user has entered after --expert in the command line, for example ./config_files/config_expert.txt +// $expert_configfile is the expert config file absolute path, e.g. /home/config_files/config_expert.txt +// $expert_config is the basename of the expert config file, e.g. config_expert.txt + +// Set parameter expert to false by default (can be overwritten to something else by using --expert in command line) +params.expert = false +user_defined_expert_config_missing = false +default_expert_config_file_available = true +// If user specified the expert config file through command line (e.g. to specify a certain path), overwrite "false" and read file +if (params.expert) { + expert_configfile = file(params.expert) + // If user has defined a file/path, but it doesn't exists, flag it and interrupt pipeline at later checkpoint + if (expert_configfile.exists() == false) { + user_defined_expert_config_missing = true + } +} +else { + // If parameter --expert not used in command line but file config_expert.txt is actually present in the launching folder, use it + if (file("config_expert.txt").exists()) { + expert_configfile = file("config_expert.txt") + // Note that default_expert_config_file_available is already set to true + } + // If parameter --expert not used in command line and default filename config_expert.txt doesn't exist, + // set the parameter to an empty string so that default parameters will be used + else { + expert_configfile = "" + default_expert_config_file_available = false + } +} + +// Collect input configuration files in a string that will be given to each ksrates command. +// The string always contains the standard config file, plus additionally the expert config file +// if this latter was provided by the user through the --expert option or if it was found +// with default name "config_expert.txt". +config_args = "${configfile}" +// If expert_configfile is NOT a string, it means that it has been provided and will be used +if (expert_configfile !instanceof String) { + config_args = config_args + " --expert ${expert_configfile}" +} + log.info "" log.info "" log.info """\ @@ -149,7 +195,8 @@ process checkConfig { input: file config from configfile - + file expert_config from expert_configfile + output: stdout outCheckConfig env trigger_pipeline into trigger_setupAdjustment_channel @@ -175,8 +222,28 @@ process checkConfig { trigger_pipeline=false else + echo "" + echo -n "Configuration file [${config}] found" trigger_pipeline=true fi + + # Expert config file + # If the file at the user-defined path is not found, warn and exit + if [ ${user_defined_expert_config_missing} = "true" ]; then + echo "" + echo "User-defined expert configuration file [${params.expert}] not found:" + echo "please check the input path after the '--expert' parameter in the command line and rerun the analysis" + exit 1 + # If the file with default name can't be found, will use default parameters + elif [ ${default_expert_config_file_available} = "false" ]; then + echo "" + echo -n "Expert configuration file [config_expert.txt] not available: will use default parameters" + # If the file with default name is found, will use parameter therein listed + else + echo "" + echo -n "Expert configuration file [${expert_config}] found" + fi + cd \$processDir """ } @@ -238,7 +305,8 @@ process setupAdjustment { echo -n "Extracting ortholog species pairs and trios from Newick tree... " echo "NF internal work directory for [setupAdjustment] process:\n\$processDir\n" > \${logs_folder}/${logs_names["setupAdjustment"]} - ksrates init ${config} --nextflow >> \${logs_folder}/${logs_names["setupAdjustment"]} 2>&1 + ksrates init ${config_args} --nextflow >> \${logs_folder}/${logs_names["setupAdjustment"]} 2>&1 + RET_CODE=\$? echo "done [\${RET_CODE}]" @@ -281,6 +349,7 @@ process setParalogAnalysis { val species from species_channel val logs_folder from logs_folder_channel file config from configfile + file expert_config from expert_configfile output: stdout outSetParalogAnalysis @@ -472,7 +541,8 @@ process estimatePeaks { echo "Updating ortholog peak database" >> $logs_folder/${logs_names["estimatePeaks"]} - ksrates orthologs-analysis ${config} --ortholog-pairs=\$processDir/$species_pairs_for_peak >> $logs_folder/${logs_names["estimatePeaks"]} 2>&1 + ksrates orthologs-analysis ${config_args} --ortholog-pairs=\$processDir/$species_pairs_for_peak >> $logs_folder/${logs_names["estimatePeaks"]} 2>&1 + RET_CODE=\$? echo "done [\${RET_CODE}] `date "+%T"`" @@ -499,7 +569,7 @@ process wgdParalogs { val trigger_wgdPara from trigger_wgdPara_channel val logs_folder from logs_folder_channel file config from configfile - + output: stdout outParalogs val true into trigger_doRateAdjustment_from_wgdParalog_channel @@ -518,9 +588,9 @@ process wgdParalogs { echo "NF internal work directory for [wgdParalogs (${task.index})] process:\n\$processDir\n" >> $logs_folder/${logs_names["wgdParalogs"]} echo "Using ${task.cpus} thread(s)\n">> $logs_folder/${logs_names["wgdParalogs"]} - echo "[$species] Using ${task.cpus} thread(s)" - ksrates paralogs-ks ${config} --n-threads=${task.cpus} >> $logs_folder/${logs_names["wgdParalogs"]} 2>&1 + ksrates paralogs-ks ${config_args} --n-threads=${task.cpus} >> $logs_folder/${logs_names["wgdParalogs"]} 2>&1 + RET_CODE=\$? echo "done [\${RET_CODE}] `date "+%T"`" @@ -547,7 +617,7 @@ process wgdOrthologs { tuple species1, species2 from species_pairs_for_wgd_Orthologs_channel.splitCsv(sep:'\t') val logs_folder from logs_folder_channel file config from configfile - + output: stdout outOrthologs val true into trigger_doRateAdjustment_from_wgdOrtholog_channel @@ -565,16 +635,18 @@ process wgdOrthologs { echo "NF internal work directory for [wgdOrthologs (${task.index})] process:\n\$processDir\n" > $logs_folder/${logs_names["wgdOrthologs"]}${species1}_${species2}.log echo "Using ${task.cpus} thread(s)\n">> $logs_folder/${logs_names["wgdOrthologs"]}${species1}_${species2}.log -# echo "[$species1 – $species2] Using ${task.cpus} thread(s)" - ksrates orthologs-ks ${config} $species1 $species2 --n-threads=${task.cpus} >> $logs_folder/${logs_names["wgdOrthologs"]}${species1}_${species2}.log 2>&1 + ksrates orthologs-ks ${config_args} $species1 $species2 --n-threads=${task.cpus} >> $logs_folder/${logs_names["wgdOrthologs"]}${species1}_${species2}.log 2>&1 + RET_CODE=\$? echo "done [\${RET_CODE}] `date "+%T"`" echo -n "[$species1 – $species2] `date "+%T"` Starting ortholog peak analysis... " echo "\n" >> $logs_folder/${logs_names["wgdOrthologs"]}${species1}_${species2}.log echo "Species1\tSpecies2\n$species1\t$species2" > \${processDir}/tmp_${species1}_${species2}.txt - ksrates orthologs-analysis ${config} --ortholog-pairs=\${processDir}/tmp_${species1}_${species2}.txt >> $logs_folder/${logs_names["wgdOrthologs"]}${species1}_${species2}.log 2>&1 + + ksrates orthologs-analysis ${config_args} --ortholog-pairs=\${processDir}/tmp_${species1}_${species2}.txt >> $logs_folder/${logs_names["wgdOrthologs"]}${species1}_${species2}.log 2>&1 + RET_CODE=\$? echo "done [\${RET_CODE}] `date "+%T"`" @@ -603,7 +675,7 @@ process plotOrthologDistrib { val trigger from trigger_plotOrtholog_from_setupAdjustment_channel.mix(trigger_plotOrtholog_from_estimatePeak_together_with_wgdOrthologs_channel.merge(trigger_plotOrtholog_from_wgdOrtholog_together_with_estimatePeak_channel.collect()), trigger_plotOrthologs_together_with_wgdOrtholog_channel.merge(trigger_plotOrtholog_from_wgdOrtholog_channel.collect()), trigger_plotOrthologs_together_with_estimatePeak_channel.merge(trigger_plotOrtholog_from_estimatePeak_channel)) val logs_folder from logs_folder_channel file config from configfile - + output: stdout outPlotOrthologDistrib @@ -633,7 +705,8 @@ process plotOrthologDistrib { cd $PWD echo "NF internal work directory for [plotOrthologDistrib] process:\n\$processDir\n" > $logs_folder/${logs_names["plotOrthologDistrib"]} - ksrates plot-orthologs ${config} >> $logs_folder/${logs_names["plotOrthologDistrib"]} 2>&1 + ksrates plot-orthologs ${config_args} >> $logs_folder/${logs_names["plotOrthologDistrib"]} 2>&1 + RET_CODE=\$? echo "done [\${RET_CODE}] `date "+%T"`" @@ -663,7 +736,8 @@ process doRateAdjustment { val trigger from trigger_doRateAdjustment_from_setParalog_channel.mix(trigger_doRateAdjustment_from_estimatePeak_channel, trigger_doRateAdjustment_from_wgdOrtholog_channel, trigger_doRateAdjustment_from_wgdParalog_channel) val logs_folder from logs_folder_channel file config from configfile - + file expert_config from expert_configfile + output: stdout outDoRateAdjustment val true into trigger_paralogsAnalyses_from_doRateAdjustment_channel @@ -710,14 +784,18 @@ process doRateAdjustment { echo "NF internal work directory for [doRateAdjustment (${task.index})] process:\n\$processDir\n" >> $logs_folder/${logs_names["doRateAdjustment"]} echo -n "`date "+%T"` Starting rate-adjustment analysis... " - ksrates orthologs-adjustment ${config} >> $logs_folder/${logs_names["doRateAdjustment"]} 2>&1 + + ksrates orthologs-adjustment ${config_args} >> $logs_folder/${logs_names["doRateAdjustment"]} 2>&1 + RET_CODE=\$? echo "done [\${RET_CODE}] `date "+%T"`" echo "\n" >> $logs_folder/${logs_names["doRateAdjustment"]} echo -n "`date "+%T"` Plotting mixed distributions... " - ksrates plot-paralogs ${config} >> $logs_folder/${logs_names["doRateAdjustment"]} 2>&1 + + ksrates plot-paralogs ${config_args} >> $logs_folder/${logs_names["doRateAdjustment"]} 2>&1 + RET_CODE=\$? echo "done [\${RET_CODE}] `date "+%T"`" @@ -748,7 +826,7 @@ process paralogsAnalyses { val logs_folder from logs_folder_channel file config from configfile val trigger from trigger_paralogsAnalyses_from_doRateAdjustment_channel.collect() - + output: stdout outParalogsAnalyses @@ -762,7 +840,8 @@ process paralogsAnalyses { cd $PWD echo "NF internal work directory for [paralogsAnalyses (${task.index})] process:\n\$processDir\n" >> $logs_folder/${logs_names["paralogsAnalyses"]} - ksrates paralogs-analyses ${config} >> $logs_folder/${logs_names["paralogsAnalyses"]} 2>&1 + ksrates paralogs-analyses ${config_args} >> $logs_folder/${logs_names["paralogsAnalyses"]} 2>&1 + RET_CODE=\$? echo "done [\${RET_CODE}] `date "+%T"`" @@ -796,7 +875,7 @@ process drawTree { val logs_folder from logs_folder_channel file config from configfile val trigger from trigger_drawTree_from_doRateAdjustment_channel.collect() - + output: stdout outDrawTree @@ -810,7 +889,8 @@ process drawTree { cd $PWD echo "NF internal work directory for [drawTree] process:\n\$processDir\n" >> $logs_folder/${logs_names["drawTree"]} - ksrates plot-tree ${config} --nextflow >> $logs_folder/${logs_names["drawTree"]} 2>&1 + ksrates plot-tree ${config_args} --nextflow >> $logs_folder/${logs_names["drawTree"]} 2>&1 + RET_CODE=\$? echo "done [\${RET_CODE}] `date "+%T"`"