Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[23.2] Add support for Cgroupsv2 #17169

Merged
merged 3 commits into from
Dec 14, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 66 additions & 17 deletions lib/galaxy/job_metrics/instrumenters/cgroup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
"""The module describes the ``cgroup`` job metrics plugin."""
import decimal
import logging
import numbers
from collections import namedtuple
Expand All @@ -17,7 +18,27 @@

log = logging.getLogger(__name__)

VALID_VERSIONS = ("auto", "1", "2")
DEFAULT_PARAMS = (
# cgroupsv1 - this is probably more params than are useful to collect, but don't remove any for legacy reasons
"memory.memsw.max_usage_in_bytes",
"memory.max_usage_in_bytes",
"memory.limit_in_bytes",
"memory.memsw.limit_in_bytes",
"memory.soft_limit_in_bytes",
"memory.failcnt",
"memory.oom_control.oom_kill_disable",
"memory.oom_control.under_oom",
"cpuacct.usage",
# cgroupsv2
"memory.events.oom_kill",
"memory.peak",
"cpu.stat.system_usec",
"cpu.stat.usage_usec",
"cpu.stat.user_usec",
)
TITLES = {
# cgroupsv1
"memory.memsw.max_usage_in_bytes": "Max memory usage (MEM+SWP)",
"memory.max_usage_in_bytes": "Max memory usage (MEM)",
"memory.limit_in_bytes": "Memory limit on cgroup (MEM)",
Expand All @@ -27,14 +48,35 @@
"memory.oom_control.oom_kill_disable": "OOM Control enabled",
"memory.oom_control.under_oom": "Was OOM Killer active?",
"cpuacct.usage": "CPU Time",
# cgroupsv2
"memory.events.low": "Number of times the cgroup was reclaimed due to high memory pressure even though its usage is under the low "
mvdbeek marked this conversation as resolved.
Show resolved Hide resolved
"boundary",
"memory.events.high": "Number of times processes of the cgroup were throttled and routed to perform direct memory reclaim because "
"the high memory boundary was exceeded",
"memory.events.max": "Number of times the cgroup's memory usage was about to go over the max boundary",
"memory.events.oom": "Number of time the cgroup's memory usage reached the limit and allocation was about to fail",
"memory.events.oom_kill": "Number of processes belonging to this cgroup killed by any kind of OOM killer",
"memory.events.oom_group_kill": "Number of times a group OOM has occurred",
"memory.high": "Memory usage throttle limit",
"memory.low": "Best-effort memory protection",
"memory.max": "Memory usage hard limit",
"memory.min": "Hard memory protection",
"memory.peak": "Max memory usage recorded",
"cpu.stat.system_usec": "CPU system time",
"cpu.stat.usage_usec": "CPU usage time",
"cpu.stat.user_usec": "CPU user time",
}
CONVERSION = {
"memory.oom_control.oom_kill_disable": lambda x: "No" if x == 1 else "Yes",
"memory.oom_control.under_oom": lambda x: "Yes" if x == 1 else "No",
"memory.peak": lambda x: nice_size(x),
"cpuacct.usage": lambda x: formatting.seconds_to_str(x / 10**9), # convert nanoseconds
"cpu.stat.system_usec": lambda x: formatting.seconds_to_str(x / 10**6), # convert microseconds
"cpu.stat.usage_usec": lambda x: formatting.seconds_to_str(x / 10**6), # convert microseconds
"cpu.stat.user_usec": lambda x: formatting.seconds_to_str(x / 10**6), # convert microseconds
}
CPU_USAGE_TEMPLATE = r"""
if [ -e "/proc/$$/cgroup" -a -d "{cgroup_mount}" ]; then
CGROUPSV1_TEMPLATE = r"""
if [ -e "/proc/$$/cgroup" -a -d "{cgroup_mount}" -a ! -f "{cgroup_mount}/cgroup.controllers" ]; then
cgroup_path=$(cat "/proc/$$/cgroup" | awk -F':' '($2=="cpuacct,cpu") || ($2=="cpu,cpuacct") {{print $3}}');
if [ ! -e "{cgroup_mount}/cpu$cgroup_path/cpuacct.usage" ]; then
cgroup_path="";
Expand All @@ -44,12 +86,6 @@
echo "__$(basename $f)__" >> {metrics}; cat "$f" >> {metrics} 2>/dev/null;
fi;
done;
fi
""".replace(
"\n", " "
).strip()
MEMORY_USAGE_TEMPLATE = """
if [ -e "/proc/$$/cgroup" -a -d "{cgroup_mount}" ]; then
cgroup_path=$(cat "/proc/$$/cgroup" | awk -F':' '$2=="memory"{{print $3}}');
if [ ! -e "{cgroup_mount}/memory$cgroup_path/memory.max_usage_in_bytes" ]; then
cgroup_path="";
Expand All @@ -61,6 +97,16 @@
""".replace(
"\n", " "
).strip()
CGROUPSV2_TEMPLATE = r"""
if [ -e "/proc/$$/cgroup" -a -f "{cgroup_mount}/cgroup.controllers" ]; then
cgroup_path=$(cat "/proc/$$/cgroup" | awk -F':' '($1=="0") {{print $3}}');
for f in {cgroup_mount}/${{cgroup_path}}/{{cpu,memory}}.*; do
echo "__$(basename $f)__" >> {metrics}; cat "$f" >> {metrics} 2>/dev/null;
done;
fi
""".replace(
"\n", " "
).strip()


Metric = namedtuple("Metric", ("key", "subkey", "value"))
Expand All @@ -76,7 +122,7 @@ def format(self, key, value):
return title, nice_size(value)
except ValueError:
pass
elif isinstance(value, (numbers.Integral, numbers.Real)) and value == int(value):
elif isinstance(value, (decimal.Decimal, numbers.Integral, numbers.Real)) and value == int(value):
value = int(value)
return title, value

Expand All @@ -90,33 +136,36 @@ class CgroupPlugin(InstrumentPlugin):
def __init__(self, **kwargs):
self.verbose = asbool(kwargs.get("verbose", False))
self.cgroup_mount = kwargs.get("cgroup_mount", "/sys/fs/cgroup")
self.version = str(kwargs.get("version", "auto"))
assert self.version in VALID_VERSIONS, f"cgroup metric version option must be one of {VALID_VERSIONS}"
params_str = kwargs.get("params", None)
if isinstance(params_str, list):
params = params_str
elif params_str:
params = [v.strip() for v in params_str.split(",")]
else:
params = list(TITLES.keys())
params = list(DEFAULT_PARAMS)
self.params = params

def post_execute_instrument(self, job_directory: str) -> List[str]:
commands: List[str] = []
commands.append(self.__record_cgroup_cpu_usage(job_directory))
commands.append(self.__record_cgroup_memory_usage(job_directory))
if self.version in ("auto", "1"):
commands.append(self.__record_cgroup_v1_usage(job_directory))
if self.version in ("auto", "2"):
commands.append(self.__record_cgroup_v2_usage(job_directory))
return commands

def job_properties(self, job_id, job_directory: str) -> Dict[str, Any]:
metrics = self.__read_metrics(self.__cgroup_metrics_file(job_directory))
return metrics

def __record_cgroup_cpu_usage(self, job_directory: str) -> str:
# comounted cgroups (which cpu and cpuacct are on the supported Linux distros) can appear in any order (cpu,cpuacct or cpuacct,cpu)
return CPU_USAGE_TEMPLATE.format(
def __record_cgroup_v1_usage(self, job_directory: str) -> str:
return CGROUPSV1_TEMPLATE.format(
metrics=self.__cgroup_metrics_file(job_directory), cgroup_mount=self.cgroup_mount
)

def __record_cgroup_memory_usage(self, job_directory: str) -> str:
return MEMORY_USAGE_TEMPLATE.format(
def __record_cgroup_v2_usage(self, job_directory: str) -> str:
return CGROUPSV2_TEMPLATE.format(
metrics=self.__cgroup_metrics_file(job_directory), cgroup_mount=self.cgroup_mount
)

Expand Down
162 changes: 159 additions & 3 deletions test/unit/job_metrics/test_cgroups.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from galaxy.job_metrics.instrumenters.cgroup import CgroupPlugin

CGROUP_PRODUCTION_EXAMPLE_2201 = """__cpu.cfs_period_us__
CGROUPV1_PRODUCTION_EXAMPLE_2201 = """__cpu.cfs_period_us__
100000
__cpu.cfs_quota_us__
-1
Expand Down Expand Up @@ -105,18 +105,174 @@
1
"""

CGROUPV2_PRODUCTION_EXAMPLE_232 = """__cpu.idle__
0
__cpu.max__
max 100000
__cpu.max.burst__
0
__cpu.stat__
usage_usec 8992210
user_usec 6139150
system_usec 2853059
core_sched.force_idle_usec 0
nr_periods 0
nr_throttled 0
throttled_usec 0
nr_bursts 0
burst_usec 0
__cpu.weight__
100
__cpu.weight.nice__
0
__memory.current__
139350016
__memory.events__
low 0
high 0
max 0
oom 0
oom_kill 0
oom_group_kill 0
__memory.events.local__
low 0
high 0
max 0
oom 0
oom_kill 0
oom_group_kill 0
__memory.high__
max
__memory.low__
0
__memory.max__
max
__memory.min__
0
__memory.numa_stat__
anon N0=864256
file N0=129146880
kernel_stack N0=32768
pagetables N0=131072
sec_pagetables N0=0
shmem N0=0
file_mapped N0=0
file_dirty N0=0
file_writeback N0=0
swapcached N0=0
anon_thp N0=0
file_thp N0=0
shmem_thp N0=0
inactive_anon N0=819200
active_anon N0=20480
inactive_file N0=51507200
active_file N0=77639680
unevictable N0=0
slab_reclaimable N0=8638552
slab_unreclaimable N0=340136
workingset_refault_anon N0=0
workingset_refault_file N0=77
workingset_activate_anon N0=0
workingset_activate_file N0=0
workingset_restore_anon N0=0
workingset_restore_file N0=0
workingset_nodereclaim N0=0
__memory.oom.group__
0
__memory.peak__
339906560
__memory.reclaim__
__memory.stat__
anon 860160
file 129146880
kernel 9211904
kernel_stack 32768
pagetables 126976
sec_pagetables 0
percpu 0
sock 0
vmalloc 0
shmem 0
zswap 0
zswapped 0
file_mapped 0
file_dirty 0
file_writeback 0
swapcached 0
anon_thp 0
file_thp 0
shmem_thp 0
inactive_anon 815104
active_anon 20480
inactive_file 51507200
active_file 77639680
unevictable 0
slab_reclaimable 8642480
slab_unreclaimable 340904
slab 8983384
workingset_refault_anon 0
workingset_refault_file 77
workingset_activate_anon 0
workingset_activate_file 0
workingset_restore_anon 0
workingset_restore_file 0
workingset_nodereclaim 0
pgscan 0
pgsteal 0
pgscan_kswapd 0
pgscan_direct 0
pgsteal_kswapd 0
pgsteal_direct 0
pgfault 132306
pgmajfault 524
pgrefill 0
pgactivate 18958
pgdeactivate 0
pglazyfree 0
pglazyfreed 0
zswpin 0
zswpout 0
thp_fault_alloc 19
thp_collapse_alloc 0
__memory.swap.current__
0
__memory.swap.events__
high 0
max 0
fail 0
__memory.swap.high__
max
__memory.swap.max__
max
__memory.zswap.current__
0
__memory.zswap.max__
max
"""


def test_cgroup_collection(tmpdir):
def test_cgroupv1_collection(tmpdir):
plugin = CgroupPlugin()
job_dir = tmpdir.mkdir("job")
job_dir.join("__instrument_cgroup__metrics").write(CGROUP_PRODUCTION_EXAMPLE_2201)
job_dir.join("__instrument_cgroup__metrics").write(CGROUPV1_PRODUCTION_EXAMPLE_2201)
properties = plugin.job_properties(1, job_dir)
assert "cpuacct.usage" in properties
assert properties["cpuacct.usage"] == 7265342042
assert "memory.limit_in_bytes" in properties
assert properties["memory.limit_in_bytes"] == 9223372036854771712


def test_cgroupv2_collection(tmpdir):
plugin = CgroupPlugin()
job_dir = tmpdir.mkdir("job")
job_dir.join("__instrument_cgroup__metrics").write(CGROUPV2_PRODUCTION_EXAMPLE_232)
properties = plugin.job_properties(1, job_dir)
assert "cpu.stat.usage_usec" in properties
assert properties["cpu.stat.usage_usec"] == 8992210
assert "memory.peak" in properties
assert properties["memory.peak"] == 339906560


def test_instrumentation(tmpdir):
# don't actually run the instrumentation but at least exercise the code the and make
# sure templating includes cgroup_mount override.
Expand Down
14 changes: 14 additions & 0 deletions test/unit/job_metrics/test_job_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,20 @@ def test_job_metrics_format_cgroup():
assert_title="Memory limit on cgroup (MEM)",
assert_value="8.0 EB",
)
_assert_format(
"cgroup",
"cpu.stat.usage_usec",
7982357892.000000,
assert_title="CPU usage time",
assert_value="2.0 hours and 13.0 minutes",
)
_assert_format(
"cgroup",
"memory.peak",
45097156608,
assert_title="Max memory usage recorded",
assert_value="42.0 GB",
)


def test_job_metrics_uname():
Expand Down
Loading