forked from prometheus/procfs
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Read PCIE AER counters class/net (prometheus#686)
* feat: Read PCIE AER counters class/net Linux provides AER counters in the path /sys/class/net/<iface>/device/ This is split amoung 3 different files: aer_dev_correctable aer_dev_fatal aer_dev_nonfatal --------- Signed-off-by: Diego Asturias <[email protected]> Signed-off-by: Gregory Eremin <[email protected]>
- Loading branch information
1 parent
e4e2d8a
commit ecf7cf9
Showing
3 changed files
with
425 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,255 @@ | ||
// Copyright 2024 The Prometheus Authors | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
//go:build linux | ||
// +build linux | ||
|
||
package sysfs | ||
|
||
import ( | ||
"fmt" | ||
"path/filepath" | ||
"strconv" | ||
"strings" | ||
|
||
"github.com/prometheus/procfs/internal/util" | ||
) | ||
|
||
// CorrectableAerCounters contains values from /sys/class/net/<iface>/device/aer_dev_correctable | ||
// for single interface (iface). | ||
type CorrectableAerCounters struct { | ||
RxErr uint64 | ||
BadTLP uint64 | ||
BadDLLP uint64 | ||
Rollover uint64 | ||
Timeout uint64 | ||
NonFatalErr uint64 | ||
CorrIntErr uint64 | ||
HeaderOF uint64 | ||
} | ||
|
||
// UncorrectableAerCounters contains values from /sys/class/net/<iface>/device/aer_dev_[non]fatal | ||
// for single interface (iface). | ||
type UncorrectableAerCounters struct { | ||
Undefined uint64 | ||
DLP uint64 | ||
SDES uint64 | ||
TLP uint64 | ||
FCP uint64 | ||
CmpltTO uint64 | ||
CmpltAbrt uint64 | ||
UnxCmplt uint64 | ||
RxOF uint64 | ||
MalfTLP uint64 | ||
ECRC uint64 | ||
UnsupReq uint64 | ||
ACSViol uint64 | ||
UncorrIntErr uint64 | ||
BlockedTLP uint64 | ||
AtomicOpBlocked uint64 | ||
TLPBlockedErr uint64 | ||
PoisonTLPBlocked uint64 | ||
} | ||
|
||
// AerCounters contains AER counters from files in /sys/class/net/<iface>/device | ||
// for single interface (iface). | ||
type AerCounters struct { | ||
Name string // Interface name | ||
Correctable CorrectableAerCounters | ||
Fatal UncorrectableAerCounters | ||
NonFatal UncorrectableAerCounters | ||
} | ||
|
||
// AllAerCounters is collection of AER counters for every interface (iface) in /sys/class/net. | ||
// The map keys are interface (iface) names. | ||
type AllAerCounters map[string]AerCounters | ||
|
||
// AerCounters returns info for a single net interfaces (iface). | ||
func (fs FS) AerCountersByIface(devicePath string) (*AerCounters, error) { | ||
_, err := fs.NetClassByIface(devicePath) | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
path := fs.sys.Path(netclassPath) | ||
counters, err := parseAerCounters(filepath.Join(path, devicePath)) | ||
if err != nil { | ||
return nil, err | ||
} | ||
counters.Name = devicePath | ||
|
||
return counters, nil | ||
} | ||
|
||
// AerCounters returns AER counters for all net interfaces (iface) read from /sys/class/net/<iface>/device. | ||
func (fs FS) AerCounters() (AllAerCounters, error) { | ||
devices, err := fs.NetClassDevices() | ||
if err != nil { | ||
return nil, err | ||
} | ||
|
||
path := fs.sys.Path(netclassPath) | ||
allAerCounters := AllAerCounters{} | ||
for _, devicePath := range devices { | ||
counters, err := parseAerCounters(filepath.Join(path, devicePath)) | ||
if err != nil { | ||
return nil, err | ||
} | ||
counters.Name = devicePath | ||
allAerCounters[devicePath] = *counters | ||
} | ||
|
||
return allAerCounters, nil | ||
} | ||
|
||
// parseAerCounters scans predefined files in /sys/class/net/<iface>/device | ||
// directory and gets their contents. | ||
func parseAerCounters(devicePath string) (*AerCounters, error) { | ||
counters := AerCounters{} | ||
err := parseCorrectableAerCounters(devicePath, &counters.Correctable) | ||
if err != nil { | ||
return nil, err | ||
} | ||
err = parseUncorrectableAerCounters(devicePath, "fatal", &counters.Fatal) | ||
if err != nil { | ||
return nil, err | ||
} | ||
err = parseUncorrectableAerCounters(devicePath, "nonfatal", &counters.NonFatal) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return &counters, nil | ||
} | ||
|
||
// parseCorrectableAerCounters parses correctable error counters in | ||
// /sys/class/net/<iface>/device/aer_dev_correctable. | ||
func parseCorrectableAerCounters(devicePath string, counters *CorrectableAerCounters) error { | ||
path := filepath.Join(devicePath, "device", "aer_dev_correctable") | ||
value, err := util.SysReadFile(path) | ||
if err != nil { | ||
if canIgnoreError(err) { | ||
return nil | ||
} | ||
return fmt.Errorf("failed to read file %q: %w", path, err) | ||
} | ||
|
||
for _, line := range strings.Split(string(value), "\n") { | ||
if line == "" { | ||
continue | ||
} | ||
fields := strings.Fields(line) | ||
if len(fields) != 2 { | ||
return fmt.Errorf("unexpected number of fields: %v", fields) | ||
} | ||
counterName := fields[0] | ||
value, err := strconv.ParseUint(fields[1], 10, 64) | ||
if err != nil { | ||
return fmt.Errorf("error parsing value for %s: %v", counterName, err) | ||
} | ||
|
||
switch counterName { | ||
case "RxErr": | ||
counters.RxErr = value | ||
case "BadTLP": | ||
counters.BadTLP = value | ||
case "BadDLLP": | ||
counters.BadDLLP = value | ||
case "Rollover": | ||
counters.Rollover = value | ||
case "Timeout": | ||
counters.Timeout = value | ||
case "NonFatalErr": | ||
counters.NonFatalErr = value | ||
case "CorrIntErr": | ||
counters.CorrIntErr = value | ||
case "HeaderOF": | ||
counters.HeaderOF = value | ||
default: | ||
continue | ||
} | ||
} | ||
|
||
return nil | ||
} | ||
|
||
// parseUncorrectableAerCounters parses uncorrectable error counters in | ||
// /sys/class/net/<iface>/device/aer_dev_[non]fatal. | ||
func parseUncorrectableAerCounters(devicePath string, counterType string, | ||
counters *UncorrectableAerCounters) error { | ||
path := filepath.Join(devicePath, "device", "aer_dev_"+counterType) | ||
value, err := util.ReadFileNoStat(path) | ||
if err != nil { | ||
if canIgnoreError(err) { | ||
return nil | ||
} | ||
return fmt.Errorf("failed to read file %q: %w", path, err) | ||
} | ||
|
||
for _, line := range strings.Split(string(value), "\n") { | ||
if line == "" { | ||
continue | ||
} | ||
fields := strings.Fields(line) | ||
if len(fields) != 2 { | ||
return fmt.Errorf("unexpected number of fields: %v", fields) | ||
} | ||
counterName := fields[0] | ||
value, err := strconv.ParseUint(fields[1], 10, 64) | ||
if err != nil { | ||
return fmt.Errorf("error parsing value for %s: %v", counterName, err) | ||
} | ||
|
||
switch counterName { | ||
case "Undefined": | ||
counters.Undefined = value | ||
case "DLP": | ||
counters.DLP = value | ||
case "SDES": | ||
counters.SDES = value | ||
case "TLP": | ||
counters.TLP = value | ||
case "FCP": | ||
counters.FCP = value | ||
case "CmpltTO": | ||
counters.CmpltTO = value | ||
case "CmpltAbrt": | ||
counters.CmpltAbrt = value | ||
case "UnxCmplt": | ||
counters.UnxCmplt = value | ||
case "RxOF": | ||
counters.RxOF = value | ||
case "MalfTLP": | ||
counters.MalfTLP = value | ||
case "ECRC": | ||
counters.ECRC = value | ||
case "UnsupReq": | ||
counters.UnsupReq = value | ||
case "ACSViol": | ||
counters.ACSViol = value | ||
case "UncorrIntErr": | ||
counters.UncorrIntErr = value | ||
case "BlockedTLP": | ||
counters.BlockedTLP = value | ||
case "AtomicOpBlocked": | ||
counters.AtomicOpBlocked = value | ||
case "TLPBlockedErr": | ||
counters.TLPBlockedErr = value | ||
case "PoisonTLPBlocked": | ||
counters.PoisonTLPBlocked = value | ||
default: | ||
continue | ||
} | ||
} | ||
|
||
return nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
// Copyright 2024 The Prometheus Authors | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
//go:build linux | ||
// +build linux | ||
|
||
package sysfs | ||
|
||
import ( | ||
"reflect" | ||
"testing" | ||
) | ||
|
||
func TestAerCountersByIface(t *testing.T) { | ||
fs, err := NewFS(sysTestFixtures) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
_, err = fs.AerCountersByIface("non-existent") | ||
if err == nil { | ||
t.Fatal("expected error, have none") | ||
} | ||
|
||
device, err := fs.AerCountersByIface("eth0") | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
if device.Name != "eth0" { | ||
t.Errorf("Found unexpected device, want %s, have %s", "eth0", device.Name) | ||
} | ||
} | ||
|
||
func TestAerCounters(t *testing.T) { | ||
fs, err := NewFS(sysTestFixtures) | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
|
||
ac, _ := fs.AerCounters() | ||
aerCounters := AllAerCounters{ | ||
"eth0": AerCounters{ | ||
Name: "eth0", | ||
Correctable: CorrectableAerCounters{ | ||
RxErr: 1, | ||
BadTLP: 2, | ||
BadDLLP: 3, | ||
Rollover: 4, | ||
Timeout: 5, | ||
NonFatalErr: 6, | ||
CorrIntErr: 7, | ||
HeaderOF: 8, | ||
}, | ||
Fatal: UncorrectableAerCounters{ | ||
Undefined: 10, | ||
DLP: 11, | ||
SDES: 12, | ||
TLP: 13, | ||
FCP: 14, | ||
CmpltTO: 15, | ||
CmpltAbrt: 16, | ||
UnxCmplt: 17, | ||
RxOF: 18, | ||
MalfTLP: 19, | ||
ECRC: 20, | ||
UnsupReq: 21, | ||
ACSViol: 22, | ||
UncorrIntErr: 23, | ||
BlockedTLP: 24, | ||
AtomicOpBlocked: 25, | ||
TLPBlockedErr: 26, | ||
PoisonTLPBlocked: 27, | ||
}, | ||
NonFatal: UncorrectableAerCounters{ | ||
Undefined: 30, | ||
DLP: 31, | ||
SDES: 32, | ||
TLP: 33, | ||
FCP: 34, | ||
CmpltTO: 35, | ||
CmpltAbrt: 36, | ||
UnxCmplt: 37, | ||
RxOF: 38, | ||
MalfTLP: 39, | ||
ECRC: 40, | ||
UnsupReq: 41, | ||
ACSViol: 42, | ||
UncorrIntErr: 43, | ||
BlockedTLP: 44, | ||
AtomicOpBlocked: 45, | ||
TLPBlockedErr: 46, | ||
PoisonTLPBlocked: 47, | ||
}, | ||
}, | ||
} | ||
|
||
if !reflect.DeepEqual(aerCounters, ac) { | ||
t.Errorf("Result not correct: want %v, have %v", aerCounters, ac) | ||
} | ||
} |
Oops, something went wrong.