From c02f2d5da46b7df392e3f06ca26ac30f18cdf003 Mon Sep 17 00:00:00 2001 From: Daniel Swarbrick Date: Fri, 18 Oct 2024 13:49:37 +0200 Subject: [PATCH] infiniband: do not make assumptions about counters based on HCA name Some users have reported cases of systemd "predictable network interface naming" apparently also renaming the HCA device. This means we can no longer make assumptions about which counter(s) should be present based on the HCA name (i.e., irdma*, mlx5_*). The previous approach was quite brittle anyway, since there will undoubtedly be other IB / RoCE drivers in future which implement the hw_counters directory (but not the older counters directory). Signed-off-by: Daniel Swarbrick --- sysfs/class_infiniband.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/sysfs/class_infiniband.go b/sysfs/class_infiniband.go index 7892c69a..deaa0a4a 100644 --- a/sysfs/class_infiniband.go +++ b/sysfs/class_infiniband.go @@ -279,8 +279,12 @@ func (fs FS) parseInfiniBandPort(name string, port string) (*InfiniBandPort, err return nil, fmt.Errorf("could not parse rate file in %q: %w", portPath, err) } - // Intel irdma module does not expose /sys/class/infiniband//ports//counters - if !strings.HasPrefix(ibp.Name, "irdma") { + // Since the HCA may have been renamed by systemd, we cannot infer the kernel driver used by the + // device, and thus do not know what type(s) of counters should be present. Attempt to parse + // either / both "counters" (and potentially also "counters_ext"), and "hw_counters", subject + // to their availability on the system - irrespective of HCA naming convention. + + if _, err := os.Stat(filepath.Join(portPath, "counters")); err == nil { counters, err := parseInfiniBandCounters(portPath) if err != nil { return nil, err @@ -288,7 +292,7 @@ func (fs FS) parseInfiniBandPort(name string, port string) (*InfiniBandPort, err ibp.Counters = *counters } - if strings.HasPrefix(ibp.Name, "irdma") || strings.HasPrefix(ibp.Name, "mlx5_") { + if _, err := os.Stat(filepath.Join(portPath, "hw_counters")); err == nil { hwCounters, err := parseInfiniBandHwCounters(portPath) if err != nil { return nil, err