From 79da25435af111ea47f8c12805372cc116b3e98a Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Mon, 11 Dec 2023 13:24:43 -0500 Subject: [PATCH 1/3] Support Cgroupsv2 --- .../job_metrics/instrumenters/cgroup.py | 63 ++++++++++++++----- 1 file changed, 47 insertions(+), 16 deletions(-) diff --git a/lib/galaxy/job_metrics/instrumenters/cgroup.py b/lib/galaxy/job_metrics/instrumenters/cgroup.py index 8ad6b11870c3..64b04acab04c 100644 --- a/lib/galaxy/job_metrics/instrumenters/cgroup.py +++ b/lib/galaxy/job_metrics/instrumenters/cgroup.py @@ -1,4 +1,5 @@ """The module describes the ``cgroup`` job metrics plugin.""" +import decimal import logging import numbers from collections import namedtuple @@ -17,7 +18,9 @@ log = logging.getLogger(__name__) +VALID_VERSIONS = ("auto", "1", "2") TITLES = { + # cgroupsv1 "memory.memsw.max_usage_in_bytes": "Max memory usage (MEM+SWP)", "memory.max_usage_in_bytes": "Max memory usage (MEM)", "memory.limit_in_bytes": "Memory limit on cgroup (MEM)", @@ -27,14 +30,35 @@ "memory.oom_control.oom_kill_disable": "OOM Control enabled", "memory.oom_control.under_oom": "Was OOM Killer active?", "cpuacct.usage": "CPU Time", + # cgroupsv2 + "memory.events.low": "Number of times the cgroup was reclaimed due to high memory pressure even though its usage is under the low " + "boundary", + "memory.events.high": "Number of times processes of the cgroup were throttled and routed to perform direct memory reclaim because " + "the high memory boundary was exceeded", + "memory.events.max": "Number of times the cgroup's memory usage was about to go over the max boundary", + "memory.events.oom": "Number of time the cgroup's memory usage reached the limit and allocation was about to fail", + "memory.events.oom_kill": "Number of processes belonging to this cgroup killed by any kind of OOM killer", + "memory.events.oom_group_kill": "Number of times a group OOM has occurred", + "memory.high": "Memory usage throttle limit", + "memory.low": "Best-effort memory protection", + "memory.max": "Memory usage hard limit", + "memory.min": "Hard memory protection", + "memory.peak": "Max memory usage recorded", + "cpu.stat.system_usec": "CPU system time (seconds)", + "cpu.stat.usage_usec": "CPU usage time (seconds)", + "cpu.stat.user_usec": "CPU user time (seconds)", } CONVERSION = { "memory.oom_control.oom_kill_disable": lambda x: "No" if x == 1 else "Yes", "memory.oom_control.under_oom": lambda x: "Yes" if x == 1 else "No", + "memory.peak": lambda x: nice_size(x), "cpuacct.usage": lambda x: formatting.seconds_to_str(x / 10**9), # convert nanoseconds + "cpu.stat.system_usec": lambda x: formatting.seconds_to_str(x / 10**6), # convert microseconds + "cpu.stat.usage_usec": lambda x: formatting.seconds_to_str(x / 10**6), # convert microseconds + "cpu.stat.user_usec": lambda x: formatting.seconds_to_str(x / 10**6), # convert microseconds } -CPU_USAGE_TEMPLATE = r""" -if [ -e "/proc/$$/cgroup" -a -d "{cgroup_mount}" ]; then +CGROUPSV1_TEMPLATE = r""" +if [ -e "/proc/$$/cgroup" -a -d "{cgroup_mount}" -a ! -f "{cgroup_mount}/cgroup.controllers" ]; then cgroup_path=$(cat "/proc/$$/cgroup" | awk -F':' '($2=="cpuacct,cpu") || ($2=="cpu,cpuacct") {{print $3}}'); if [ ! -e "{cgroup_mount}/cpu$cgroup_path/cpuacct.usage" ]; then cgroup_path=""; @@ -44,12 +68,6 @@ echo "__$(basename $f)__" >> {metrics}; cat "$f" >> {metrics} 2>/dev/null; fi; done; -fi -""".replace( - "\n", " " -).strip() -MEMORY_USAGE_TEMPLATE = """ -if [ -e "/proc/$$/cgroup" -a -d "{cgroup_mount}" ]; then cgroup_path=$(cat "/proc/$$/cgroup" | awk -F':' '$2=="memory"{{print $3}}'); if [ ! -e "{cgroup_mount}/memory$cgroup_path/memory.max_usage_in_bytes" ]; then cgroup_path=""; @@ -61,6 +79,16 @@ """.replace( "\n", " " ).strip() +CGROUPSV2_TEMPLATE = r""" +if [ -e "/proc/$$/cgroup" -a -f "{cgroup_mount}/cgroup.controllers" ]; then + cgroup_path=$(cat "/proc/$$/cgroup" | awk -F':' '($1=="0") {{print $3}}'); + for f in {cgroup_mount}/${{cgroup_path}}/{{cpu,memory}}.*; do + echo "__$(basename $f)__" >> {metrics}; cat "$f" >> {metrics} 2>/dev/null; + done; +fi +""".replace( + "\n", " " +).strip() Metric = namedtuple("Metric", ("key", "subkey", "value")) @@ -76,7 +104,7 @@ def format(self, key, value): return title, nice_size(value) except ValueError: pass - elif isinstance(value, (numbers.Integral, numbers.Real)) and value == int(value): + elif isinstance(value, (decimal.Decimal, numbers.Integral, numbers.Real)) and value == int(value): value = int(value) return title, value @@ -90,6 +118,8 @@ class CgroupPlugin(InstrumentPlugin): def __init__(self, **kwargs): self.verbose = asbool(kwargs.get("verbose", False)) self.cgroup_mount = kwargs.get("cgroup_mount", "/sys/fs/cgroup") + self.version = str(kwargs.get("version", "auto")) + assert self.version in VALID_VERSIONS, f"cgroup metric version option must be one of {VALID_VERSIONS}" params_str = kwargs.get("params", None) if isinstance(params_str, list): params = params_str @@ -101,22 +131,23 @@ def __init__(self, **kwargs): def post_execute_instrument(self, job_directory: str) -> List[str]: commands: List[str] = [] - commands.append(self.__record_cgroup_cpu_usage(job_directory)) - commands.append(self.__record_cgroup_memory_usage(job_directory)) + if self.version in ("auto", "1"): + commands.append(self.__record_cgroup_v1_usage(job_directory)) + if self.version in ("auto", "2"): + commands.append(self.__record_cgroup_v2_usage(job_directory)) return commands def job_properties(self, job_id, job_directory: str) -> Dict[str, Any]: metrics = self.__read_metrics(self.__cgroup_metrics_file(job_directory)) return metrics - def __record_cgroup_cpu_usage(self, job_directory: str) -> str: - # comounted cgroups (which cpu and cpuacct are on the supported Linux distros) can appear in any order (cpu,cpuacct or cpuacct,cpu) - return CPU_USAGE_TEMPLATE.format( + def __record_cgroup_v1_usage(self, job_directory: str) -> str: + return CGROUPSV1_TEMPLATE.format( metrics=self.__cgroup_metrics_file(job_directory), cgroup_mount=self.cgroup_mount ) - def __record_cgroup_memory_usage(self, job_directory: str) -> str: - return MEMORY_USAGE_TEMPLATE.format( + def __record_cgroup_v2_usage(self, job_directory: str) -> str: + return CGROUPSV2_TEMPLATE.format( metrics=self.__cgroup_metrics_file(job_directory), cgroup_mount=self.cgroup_mount ) From ebe12aa10acc10ad6fb40b0e8f8797fc74313101 Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Tue, 12 Dec 2023 16:45:40 -0500 Subject: [PATCH 2/3] Separate cgroup metrics default params from titles --- .../job_metrics/instrumenters/cgroup.py | 26 ++++++++++++++++--- 1 file changed, 22 insertions(+), 4 deletions(-) diff --git a/lib/galaxy/job_metrics/instrumenters/cgroup.py b/lib/galaxy/job_metrics/instrumenters/cgroup.py index 64b04acab04c..f55f66a8a01a 100644 --- a/lib/galaxy/job_metrics/instrumenters/cgroup.py +++ b/lib/galaxy/job_metrics/instrumenters/cgroup.py @@ -19,6 +19,24 @@ log = logging.getLogger(__name__) VALID_VERSIONS = ("auto", "1", "2") +DEFAULT_PARAMS = ( + # cgroupsv1 - this is probably more params than are useful to collect, but don't remove any for legacy reasons + "memory.memsw.max_usage_in_bytes", + "memory.max_usage_in_bytes", + "memory.limit_in_bytes", + "memory.memsw.limit_in_bytes", + "memory.soft_limit_in_bytes", + "memory.failcnt", + "memory.oom_control.oom_kill_disable", + "memory.oom_control.under_oom", + "cpuacct.usage", + # cgroupsv2 + "memory.events.oom_kill", + "memory.peak", + "cpu.stat.system_usec", + "cpu.stat.usage_usec", + "cpu.stat.user_usec", +) TITLES = { # cgroupsv1 "memory.memsw.max_usage_in_bytes": "Max memory usage (MEM+SWP)", @@ -44,9 +62,9 @@ "memory.max": "Memory usage hard limit", "memory.min": "Hard memory protection", "memory.peak": "Max memory usage recorded", - "cpu.stat.system_usec": "CPU system time (seconds)", - "cpu.stat.usage_usec": "CPU usage time (seconds)", - "cpu.stat.user_usec": "CPU user time (seconds)", + "cpu.stat.system_usec": "CPU system time", + "cpu.stat.usage_usec": "CPU usage time", + "cpu.stat.user_usec": "CPU user time", } CONVERSION = { "memory.oom_control.oom_kill_disable": lambda x: "No" if x == 1 else "Yes", @@ -126,7 +144,7 @@ def __init__(self, **kwargs): elif params_str: params = [v.strip() for v in params_str.split(",")] else: - params = list(TITLES.keys()) + params = list(DEFAULT_PARAMS) self.params = params def post_execute_instrument(self, job_directory: str) -> List[str]: From c650007360da8ccb8fb2bdd55664be842baa2c2d Mon Sep 17 00:00:00 2001 From: Nate Coraor Date: Tue, 12 Dec 2023 16:46:05 -0500 Subject: [PATCH 3/3] Cgroupsv2 metrics instrumenter tests --- test/unit/job_metrics/test_cgroups.py | 162 +++++++++++++++++++++- test/unit/job_metrics/test_job_metrics.py | 14 ++ 2 files changed, 173 insertions(+), 3 deletions(-) diff --git a/test/unit/job_metrics/test_cgroups.py b/test/unit/job_metrics/test_cgroups.py index 863ea89c558b..7c6b41ae8ed9 100644 --- a/test/unit/job_metrics/test_cgroups.py +++ b/test/unit/job_metrics/test_cgroups.py @@ -1,6 +1,6 @@ from galaxy.job_metrics.instrumenters.cgroup import CgroupPlugin -CGROUP_PRODUCTION_EXAMPLE_2201 = """__cpu.cfs_period_us__ +CGROUPV1_PRODUCTION_EXAMPLE_2201 = """__cpu.cfs_period_us__ 100000 __cpu.cfs_quota_us__ -1 @@ -105,11 +105,156 @@ 1 """ +CGROUPV2_PRODUCTION_EXAMPLE_232 = """__cpu.idle__ +0 +__cpu.max__ +max 100000 +__cpu.max.burst__ +0 +__cpu.stat__ +usage_usec 8992210 +user_usec 6139150 +system_usec 2853059 +core_sched.force_idle_usec 0 +nr_periods 0 +nr_throttled 0 +throttled_usec 0 +nr_bursts 0 +burst_usec 0 +__cpu.weight__ +100 +__cpu.weight.nice__ +0 +__memory.current__ +139350016 +__memory.events__ +low 0 +high 0 +max 0 +oom 0 +oom_kill 0 +oom_group_kill 0 +__memory.events.local__ +low 0 +high 0 +max 0 +oom 0 +oom_kill 0 +oom_group_kill 0 +__memory.high__ +max +__memory.low__ +0 +__memory.max__ +max +__memory.min__ +0 +__memory.numa_stat__ +anon N0=864256 +file N0=129146880 +kernel_stack N0=32768 +pagetables N0=131072 +sec_pagetables N0=0 +shmem N0=0 +file_mapped N0=0 +file_dirty N0=0 +file_writeback N0=0 +swapcached N0=0 +anon_thp N0=0 +file_thp N0=0 +shmem_thp N0=0 +inactive_anon N0=819200 +active_anon N0=20480 +inactive_file N0=51507200 +active_file N0=77639680 +unevictable N0=0 +slab_reclaimable N0=8638552 +slab_unreclaimable N0=340136 +workingset_refault_anon N0=0 +workingset_refault_file N0=77 +workingset_activate_anon N0=0 +workingset_activate_file N0=0 +workingset_restore_anon N0=0 +workingset_restore_file N0=0 +workingset_nodereclaim N0=0 +__memory.oom.group__ +0 +__memory.peak__ +339906560 +__memory.reclaim__ +__memory.stat__ +anon 860160 +file 129146880 +kernel 9211904 +kernel_stack 32768 +pagetables 126976 +sec_pagetables 0 +percpu 0 +sock 0 +vmalloc 0 +shmem 0 +zswap 0 +zswapped 0 +file_mapped 0 +file_dirty 0 +file_writeback 0 +swapcached 0 +anon_thp 0 +file_thp 0 +shmem_thp 0 +inactive_anon 815104 +active_anon 20480 +inactive_file 51507200 +active_file 77639680 +unevictable 0 +slab_reclaimable 8642480 +slab_unreclaimable 340904 +slab 8983384 +workingset_refault_anon 0 +workingset_refault_file 77 +workingset_activate_anon 0 +workingset_activate_file 0 +workingset_restore_anon 0 +workingset_restore_file 0 +workingset_nodereclaim 0 +pgscan 0 +pgsteal 0 +pgscan_kswapd 0 +pgscan_direct 0 +pgsteal_kswapd 0 +pgsteal_direct 0 +pgfault 132306 +pgmajfault 524 +pgrefill 0 +pgactivate 18958 +pgdeactivate 0 +pglazyfree 0 +pglazyfreed 0 +zswpin 0 +zswpout 0 +thp_fault_alloc 19 +thp_collapse_alloc 0 +__memory.swap.current__ +0 +__memory.swap.events__ +high 0 +max 0 +fail 0 +__memory.swap.high__ +max +__memory.swap.max__ +max +__memory.zswap.current__ +0 +__memory.zswap.max__ +max +""" + -def test_cgroup_collection(tmpdir): +def test_cgroupv1_collection(tmpdir): plugin = CgroupPlugin() job_dir = tmpdir.mkdir("job") - job_dir.join("__instrument_cgroup__metrics").write(CGROUP_PRODUCTION_EXAMPLE_2201) + job_dir.join("__instrument_cgroup__metrics").write(CGROUPV1_PRODUCTION_EXAMPLE_2201) properties = plugin.job_properties(1, job_dir) assert "cpuacct.usage" in properties assert properties["cpuacct.usage"] == 7265342042 @@ -117,6 +262,17 @@ def test_cgroup_collection(tmpdir): assert properties["memory.limit_in_bytes"] == 9223372036854771712 +def test_cgroupv2_collection(tmpdir): + plugin = CgroupPlugin() + job_dir = tmpdir.mkdir("job") + job_dir.join("__instrument_cgroup__metrics").write(CGROUPV2_PRODUCTION_EXAMPLE_232) + properties = plugin.job_properties(1, job_dir) + assert "cpu.stat.usage_usec" in properties + assert properties["cpu.stat.usage_usec"] == 8992210 + assert "memory.peak" in properties + assert properties["memory.peak"] == 339906560 + + def test_instrumentation(tmpdir): # don't actually run the instrumentation but at least exercise the code the and make # sure templating includes cgroup_mount override. diff --git a/test/unit/job_metrics/test_job_metrics.py b/test/unit/job_metrics/test_job_metrics.py index 07930c0684ec..a80df5e9b413 100644 --- a/test/unit/job_metrics/test_job_metrics.py +++ b/test/unit/job_metrics/test_job_metrics.py @@ -49,6 +49,20 @@ def test_job_metrics_format_cgroup(): assert_title="Memory limit on cgroup (MEM)", assert_value="8.0 EB", ) + _assert_format( + "cgroup", + "cpu.stat.usage_usec", + 7982357892.000000, + assert_title="CPU usage time", + assert_value="2.0 hours and 13.0 minutes", + ) + _assert_format( + "cgroup", + "memory.peak", + 45097156608, + assert_title="Max memory usage recorded", + assert_value="42.0 GB", + ) def test_job_metrics_uname():