From 91ef184f391314eda3d0e860dc2223d408d49be1 Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Tue, 12 Dec 2023 14:56:38 -0500 Subject: [PATCH 1/2] Support configuring job metrics inline, update documentation --- config/job_metrics_conf.xml.sample | 1 - doc/source/admin/galaxy_options.rst | 18 +- doc/source/admin/index.rst | 1 + doc/source/admin/job_metrics.rst | 171 ++++++++++++++++++ lib/galaxy/app.py | 2 +- lib/galaxy/config/__init__.py | 1 - lib/galaxy/config/sample/galaxy.yml.sample | 10 +- .../config/sample/job_metrics_conf.xml.sample | 138 -------------- lib/galaxy/config/schemas/config_schema.yml | 16 +- lib/galaxy/job_metrics/__init__.py | 17 +- .../job_metrics/instrumenters/cgroup.py | 4 +- lib/galaxy/job_metrics/instrumenters/env.py | 4 +- 12 files changed, 235 insertions(+), 148 deletions(-) delete mode 120000 config/job_metrics_conf.xml.sample create mode 100644 doc/source/admin/job_metrics.rst delete mode 100644 lib/galaxy/config/sample/job_metrics_conf.xml.sample diff --git a/config/job_metrics_conf.xml.sample b/config/job_metrics_conf.xml.sample deleted file mode 120000 index a5e81247f4c9..000000000000 --- a/config/job_metrics_conf.xml.sample +++ /dev/null @@ -1 +0,0 @@ -../lib/galaxy/config/sample/job_metrics_conf.xml.sample \ No newline at end of file diff --git a/doc/source/admin/galaxy_options.rst b/doc/source/admin/galaxy_options.rst index 38fe6334a007..ca6470d7092f 100644 --- a/doc/source/admin/galaxy_options.rst +++ b/doc/source/admin/galaxy_options.rst @@ -4289,7 +4289,7 @@ ~~~~~~~~~~~~~~~~~~~~~~~~~~~ :Description: - XML config file that contains the job metric collection + YAML or XML config file that contains the job metric collection configuration. The value of this option will be resolved with respect to . @@ -4297,6 +4297,22 @@ :Type: str +~~~~~~~~~~~~~~~ +``job_metrics`` +~~~~~~~~~~~~~~~ + +:Description: + Rather than specifying a job_metrics_config_file, the definition + of the metrics to enable can be embedded into Galaxy's config with + this option. This has no effect if a job_metrics_config_file is + used. + The syntax, available instrumenters, and documentation of their + options is explained in detail in the documentation: + https://docs.galaxyproject.org/en/master/admin/job_metrics.html +:Default: ``None`` +:Type: seq + + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``expose_potentially_sensitive_job_metrics`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/doc/source/admin/index.rst b/doc/source/admin/index.rst index c213a16e53d6..2ae8d1af72a0 100644 --- a/doc/source/admin/index.rst +++ b/doc/source/admin/index.rst @@ -17,6 +17,7 @@ This documentation is in the midst of being ported and unified based on resource scaling cluster jobs + job_metrics authentication tool_panel mq diff --git a/doc/source/admin/job_metrics.rst b/doc/source/admin/job_metrics.rst new file mode 100644 index 000000000000..aacbec1d304a --- /dev/null +++ b/doc/source/admin/job_metrics.rst @@ -0,0 +1,171 @@ +.. _job_metrics: + + +Collecting Job Metrics +====================== + +Galaxy can collect various metrics about jobs that it runs. The metrics that can be collected depend on which plugins +(described in this section) are enabled. Two ``galaxy.yml`` configuration options control the job metrics plugin +configuration: + +1. ``job_metrics``: Inline global configuration of job metrics plugins +2. ``job_metrics_config_file``: Path to a standalone metrics configuration file. Prior to Galaxy 23.2, this was the only + way to configure job metrics plugins. It defaults to ``/job_metrics_conf.xml`` for legacy reasons, but + using the XML syntax is discouraged, YAML (the syntax is the same as ``job_metrics``) is preferred. + +If the ``job_metrics_config_file`` exists, it overrides anything configured in ``job_metrics``. + +Default Job Metrics Configuration +--------------------------------- + +If no configuration is specified, the default is to load only the ``core`` plugin: + +.. code-block:: yaml + + - type: core + +Available Job Metrics Plugins +----------------------------- + +The list of metrics plugins implemented in the code can be found at ``lib/galaxy/job_metrics/instrumenters``. + + +core +~~~~ + +The core plugin captures the number of cores allocated to the job (``$GALAXY_SLOTS``), the start and end time of job (in +seconds since epoch) and computes the runtime in seconds. + +It has no options. + +.. code-block:: yaml + + - type: core + +cpuinfo +~~~~~~~ + +The cpuinfo plugin captures the processor count on the system that that job ran on (note that this may differ from the +number of CPUs actually allocated to the job). + +The optional ``verbose`` option (default: ``false``) captures details (likely far too much) about each CPU, as found in +``/proc/cpuinfo``. + +The cpuinfo plugin works on Linux only. + +.. code-block:: yaml + + - type: cpuinfo + verbose: false + +meminfo +~~~~~~~ + +The meminfo plugin captures the memory information on the system that the job ran on (note that this may differ from the +amount of memory actually allocated to the job). + +It has no options. + +.. code-block:: yaml + + - type: meminfo + +hostname +~~~~~~~~ + +The hostname plugin captures the output of ``hostname`` on the system that the job ran on. + +It has no options. + +.. code-block:: yaml + + - type: hostname + +uname +~~~~~ + +The uname plugin captures the output of ``uname -a`` on the system that the job ran on. + +It has no options. + +.. code-block:: yaml + + - type: uname + +env +~~~ + +The env plugin captures environment variables set in the job's executing environment. + +By default, it captures **all** environment variables, which is likely excessive but may be useful for debugging. The +optional ``variables`` option can be set to a list of variables to capture (if set). For legacy purposes, this can also +be a comma-separated string of variable names. + +.. code-block:: yaml + + - type: env + variables: + - HOSTNAME + - SLURM_CPUS_ON_NODE + - SLURM_JOBID + +cgroup +~~~~~~ + +The cgroup plugin captures values set by `Linux Control Groups (cgroups) +`_. This is most useful if your jobs run in unique per-job Cgroups +(as Slurm does `if so configured `_). + +Both cgroups version 1 (cgroupsv1) and cgroups version 2 (cgroupsv2) are supported, by default metrics will be collected +for whichever version is mounted on the system where the job ran. The optional ``version`` option (default: ``auto``) +can be used to only generate metrics capture commands in the job script for the specified cgroups version (``1`` or +``2``). + +By default, only a small set of cgroup parameters will be recorded, the list of which can be found in +``lib/galaxy/job_metrics/instrumenters/cgroup.py`` in the Galaxy code. The optional ``verbose`` option (default: +``false``) can be set to capture all parameters in the ``cpu``, ``cpuacct``, and ``memory`` controllers (cgroups version +1) or ``cpu`` and ``memory`` controllers (cgroups version 2). + +It is also possible to specify exactly which cgroup parameters to capture by setting the optional ``params`` option to a +list of parameter names (files in the controller directory) to capture. For legacy purposes, this can also be a +comma-separated string of cgroup parameter names. + +The cgroup plugin works on Linux only. + +.. code-block:: yaml + + - type: cgroup + verbose: false + version: 2 + params: + - cpu.stat + - memory.peak + +Overriding the Global Job Metrics Configuration +----------------------------------------------- + +Individual Galaxy job config environments (destinations) can disable metric collection by setting the ``metrics`` parameter on that environment: + + +.. code-block:: yaml + + execution: + environments: + example: + metrics: + - type: core + - type: cpuinfo + - type: meminfo + +Alternatively, a file can be specified: + +.. code-block:: yaml + + execution: + environments: + example: + metrics: + src: path + path: /srv/galaxy/config/metrics_override.yml + +Additional accepted values for ``src`` include ``default`` and ``disabled``. diff --git a/lib/galaxy/app.py b/lib/galaxy/app.py index 1c8d04847c09..76eecfc872f3 100644 --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -521,7 +521,7 @@ def __init__(self, configure_logging=True, use_converters=True, use_display_appl # Initialize job metrics manager, needs to be in place before # config so per-destination modifications can be made. self.job_metrics = self._register_singleton( - JobMetrics, JobMetrics(self.config.job_metrics_config_file, app=self) + JobMetrics, JobMetrics(self.config.job_metrics_config_file, self.config.job_metrics, app=self) ) # Initialize the job management configuration self.job_config = self._register_singleton(jobs.JobConfiguration) diff --git a/lib/galaxy/config/__init__.py b/lib/galaxy/config/__init__.py index a3dfa347e627..ca78874969eb 100644 --- a/lib/galaxy/config/__init__.py +++ b/lib/galaxy/config/__init__.py @@ -685,7 +685,6 @@ class GalaxyAppConfiguration(BaseAppConfiguration, CommonConfigurationMixin): add_sample_file_to_defaults = { "build_sites_config_file", "datatypes_config_file", - "job_metrics_config_file", "tool_data_table_config_path", "tool_config_file", } diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample index 2bdbe0956431..7854ce726f1c 100644 --- a/lib/galaxy/config/sample/galaxy.yml.sample +++ b/lib/galaxy/config/sample/galaxy.yml.sample @@ -2325,12 +2325,20 @@ galaxy: # with Galaxy there you can enable this option. #enable_tool_source_display: false - # XML config file that contains the job metric collection + # YAML or XML config file that contains the job metric collection # configuration. # The value of this option will be resolved with respect to # . #job_metrics_config_file: job_metrics_conf.xml + # Rather than specifying a job_metrics_config_file, the definition of + # the metrics to enable can be embedded into Galaxy's config with this + # option. This has no effect if a job_metrics_config_file is used. + # The syntax, available instrumenters, and documentation of their + # options is explained in detail in the documentation: + # https://docs.galaxyproject.org/en/master/admin/job_metrics.html + #job_metrics: null + # This option allows users to see the job metrics (except for # environment variables). #expose_potentially_sensitive_job_metrics: false diff --git a/lib/galaxy/config/sample/job_metrics_conf.xml.sample b/lib/galaxy/config/sample/job_metrics_conf.xml.sample deleted file mode 100644 index 1c9c70274e53..000000000000 --- a/lib/galaxy/config/sample/job_metrics_conf.xml.sample +++ /dev/null @@ -1,138 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index f63112d8a2f1..f4a4fcd1cbe5 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -3129,7 +3129,21 @@ mapping: path_resolves_to: config_dir required: false desc: | - XML config file that contains the job metric collection configuration. + YAML or XML config file that contains the job metric collection configuration. + + job_metrics: + type: seq + sequence: + - type: any + desc: | + Rather than specifying a job_metrics_config_file, the definition of the + metrics to enable can be embedded into Galaxy's config with this option. + This has no effect if a job_metrics_config_file is used. + + The syntax, available instrumenters, and documentation of their options is explained in detail in the + documentation: + + https://docs.galaxyproject.org/en/master/admin/job_metrics.html expose_potentially_sensitive_job_metrics: type: bool diff --git a/lib/galaxy/job_metrics/__init__.py b/lib/galaxy/job_metrics/__init__.py index 8fdb85f75a2e..e65b6354863c 100644 --- a/lib/galaxy/job_metrics/__init__.py +++ b/lib/galaxy/job_metrics/__init__.py @@ -37,6 +37,7 @@ DEFAULT_FORMATTER = formatting.JobMetricFormatter() +DEFAULT_CONFIG = [{"type": "core"}] class DictifiableMetric(NamedTuple): @@ -68,10 +69,17 @@ class RawMetric(NamedTuple): class JobMetrics: """Load and store a collection of :class:`JobInstrumenter` objects.""" - def __init__(self, conf_file=None, **kwargs): + def __init__(self, conf_file=None, conf_dict=None, **kwargs): """Load :class:`JobInstrumenter` objects from specified configuration file.""" self.plugin_classes = self.__plugins_dict() - self.default_job_instrumenter = JobInstrumenter.from_file(self.plugin_classes, conf_file, **kwargs) + if conf_file and os.path.exists(conf_file): + self.default_job_instrumenter = JobInstrumenter.from_file(self.plugin_classes, conf_file, **kwargs) + elif conf_dict or conf_dict is None: + if conf_dict is None: + conf_dict = DEFAULT_CONFIG + self.default_job_instrumenter = JobInstrumenter.from_dict(self.plugin_classes, conf_dict, **kwargs) + else: + self.default_job_instrumenter = NULL_JOB_INSTRUMENTER self.job_instrumenters = collections.defaultdict(lambda: self.default_job_instrumenter) def format(self, plugin: str, key: str, value: Any) -> formatting.FormattedMetric: @@ -227,6 +235,11 @@ def from_file(plugin_classes, conf_file, **kwargs) -> "JobInstrumenterI": plugins_source = plugin_config.plugin_source_from_path(conf_file) return JobInstrumenter(plugin_classes, plugins_source, **kwargs) + @staticmethod + def from_dict(plugin_classes, conf_dict, **kwargs) -> "JobInstrumenterI": + plugin_source = plugin_config.plugin_source_from_dict(conf_dict) + return JobInstrumenter(plugin_classes, plugin_source, **kwargs) + __all__ = ( "JobInstrumenter", diff --git a/lib/galaxy/job_metrics/instrumenters/cgroup.py b/lib/galaxy/job_metrics/instrumenters/cgroup.py index d47390190320..8ad6b11870c3 100644 --- a/lib/galaxy/job_metrics/instrumenters/cgroup.py +++ b/lib/galaxy/job_metrics/instrumenters/cgroup.py @@ -91,7 +91,9 @@ def __init__(self, **kwargs): self.verbose = asbool(kwargs.get("verbose", False)) self.cgroup_mount = kwargs.get("cgroup_mount", "/sys/fs/cgroup") params_str = kwargs.get("params", None) - if params_str: + if isinstance(params_str, list): + params = params_str + elif params_str: params = [v.strip() for v in params_str.split(",")] else: params = list(TITLES.keys()) diff --git a/lib/galaxy/job_metrics/instrumenters/env.py b/lib/galaxy/job_metrics/instrumenters/env.py index fd28e89b9e5b..d08b5c90a784 100644 --- a/lib/galaxy/job_metrics/instrumenters/env.py +++ b/lib/galaxy/job_metrics/instrumenters/env.py @@ -29,7 +29,9 @@ class EnvPlugin(InstrumentPlugin): def __init__(self, **kwargs): variables_str = kwargs.get("variables", None) - if variables_str: + if isinstance(variables_str, list): + self.variables = variables_str + elif variables_str: self.variables = [v.strip() for v in variables_str.split(",")] else: self.variables = None From 0598a4be81b5533749eae119b39756e56dcbb152 Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Tue, 12 Dec 2023 16:53:54 -0500 Subject: [PATCH 2/2] Document falsey values of job_metrics --- doc/source/admin/galaxy_options.rst | 2 ++ lib/galaxy/config/sample/galaxy.yml.sample | 2 ++ lib/galaxy/config/schemas/config_schema.yml | 3 +++ lib/galaxy/job_metrics/__init__.py | 1 + 4 files changed, 8 insertions(+) diff --git a/doc/source/admin/galaxy_options.rst b/doc/source/admin/galaxy_options.rst index ca6470d7092f..7ee4d6243cd8 100644 --- a/doc/source/admin/galaxy_options.rst +++ b/doc/source/admin/galaxy_options.rst @@ -4309,6 +4309,8 @@ The syntax, available instrumenters, and documentation of their options is explained in detail in the documentation: https://docs.galaxyproject.org/en/master/admin/job_metrics.html + By default, the core plugin is enabled. Setting this option to + false or an empty list disables metrics entirely. :Default: ``None`` :Type: seq diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample index 7854ce726f1c..8d1149c1e445 100644 --- a/lib/galaxy/config/sample/galaxy.yml.sample +++ b/lib/galaxy/config/sample/galaxy.yml.sample @@ -2337,6 +2337,8 @@ galaxy: # The syntax, available instrumenters, and documentation of their # options is explained in detail in the documentation: # https://docs.galaxyproject.org/en/master/admin/job_metrics.html + # By default, the core plugin is enabled. Setting this option to false + # or an empty list disables metrics entirely. #job_metrics: null # This option allows users to see the job metrics (except for diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index f4a4fcd1cbe5..52456679891c 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -3145,6 +3145,9 @@ mapping: https://docs.galaxyproject.org/en/master/admin/job_metrics.html + By default, the core plugin is enabled. Setting this option to false or an empty list disables metrics + entirely. + expose_potentially_sensitive_job_metrics: type: bool default: false diff --git a/lib/galaxy/job_metrics/__init__.py b/lib/galaxy/job_metrics/__init__.py index e65b6354863c..2b1c5399bd20 100644 --- a/lib/galaxy/job_metrics/__init__.py +++ b/lib/galaxy/job_metrics/__init__.py @@ -79,6 +79,7 @@ def __init__(self, conf_file=None, conf_dict=None, **kwargs): conf_dict = DEFAULT_CONFIG self.default_job_instrumenter = JobInstrumenter.from_dict(self.plugin_classes, conf_dict, **kwargs) else: + # allows for setting non-None falsey values to get no metrics config whatsoever self.default_job_instrumenter = NULL_JOB_INSTRUMENTER self.job_instrumenters = collections.defaultdict(lambda: self.default_job_instrumenter)