From adef23beeac1031069575b357553c7c1c16f5f4f Mon Sep 17 00:00:00 2001 From: trindenau Date: Mon, 23 Dec 2024 10:00:49 +0000 Subject: [PATCH] Add `power_envelope` & `soc_power` sensors Add `power_envelope` and `soc_power` sensors. Both will derive their values from `dbugfs`. testd by: ipmitool -I ipmb sensors: soc_power | 22.000 | Watts | ok | na | 5.000 | na | na | na | na power_envelope | 65.000 | Watts | ok | na | na | 10.000 | 150.000 | na |na Or by running the TestRedFishSensorSchema test. Fixes jira https://redmine.mellanox.com/issues/4016386. --- lanserv/mellanox-bf/mlx-bf-base.emu | 10 ++ lanserv/mellanox-bf/mlx-bf.sdrs | 136 +++++++++++++++++++++++++++ lanserv/mellanox-bf/sdr.30.main | 2 + lanserv/mellanox-bf/set_emu_param.sh | 44 +++++++++ 4 files changed, 192 insertions(+) diff --git a/lanserv/mellanox-bf/mlx-bf-base.emu b/lanserv/mellanox-bf/mlx-bf-base.emu index 4a6e144..d2d6aac 100644 --- a/lanserv/mellanox-bf/mlx-bf-base.emu +++ b/lanserv/mellanox-bf/mlx-bf-base.emu @@ -75,6 +75,16 @@ sensor_add 0x30 0 9 0x01 0x01 \ poll 5000 \ file "/run/emu_param/ddr_temp" +#Add the soc_power sensor +sensor_add 0x30 0 0x0a 0x02 0x01 \ + poll 5000 \ + file "/run/emu_param/soc_power" + +#Add the power_envelope sensor +sensor_add 0x30 0 0xb 0x02 0x01 \ + poll 5000 \ + file "/run/emu_param/power_envelope" + ########## FRUs ########## # mc_add_fru_data mc-addr DeviceID FRUSize (data byte1 ... byteN | diff --git a/lanserv/mellanox-bf/mlx-bf.sdrs b/lanserv/mellanox-bf/mlx-bf.sdrs index 906e381..6484085 100644 --- a/lanserv/mellanox-bf/mlx-bf.sdrs +++ b/lanserv/mellanox-bf/mlx-bf.sdrs @@ -615,6 +615,142 @@ sdr type 1 id_string "ddr_temp" endsdr +#soc_power +sdr type 1 + sensor_owner_id 0x30 + sensor_owner_lun 0 + channel_number 0 + sensor_number 0x0a + entity_id system_board + entity_instance 2 + sensor_type Voltage + event_reading_type_code 1 + init_scanning true + init_events true + init_thresholds true + init_hysteresis false + init_sensor_type true + default_event_gen_on true + default_sensor_scan_on true + sensor_auto_rearm true + sensor_event_msg_ctrl per_state + sensor_threshold_access settable + return_lnc false + return_lc true + return_unc false + return_uc false + deassert_lncgl false + deassert_uncgh false + deassert_lcgl true + deassert_ucgh false + assert_lncgl false + assert_uncgh false + assert_lcgl true + assert_ucgh false + lnc_thrsh_settable false + unc_thrsh_settable false + lc_thrsh_settable true + uc_thrsh_settable false + lnc_thrsh_readable false + unc_thrsh_readable false + lc_thrsh_readable true + uc_thrsh_readable false + analog_data_format unsigned + rate_unit none + modifier_unit none + percentage false + base_unit Watts + modifier_unit_code unspecified + linearization linear + nominal_specified false + nominal_reading 0 + m 1 + tolerance 0 + b 0 + accuracy 1 + accuracy_exp 0 + sensor_direction input + r_exp 0 + b_exp 0 + sensor_maximum 255 + sensor_minimum 0 + lc_fthresh 5 + lnc_fthresh 0 + unc_fthresh 0 + uc_fthresh 0 + positive_hysteresis 0 + negative_hysteresis 0 + id_string "soc_power" +endsdr + +#power_envelope +sdr type 1 + sensor_owner_id 0x30 + sensor_owner_lun 0 + channel_number 0 + sensor_number 0xb + entity_id system_board + entity_instance 2 + sensor_type Voltage + event_reading_type_code 1 + init_scanning true + init_events true + init_thresholds true + init_hysteresis false + init_sensor_type true + default_event_gen_on true + default_sensor_scan_on true + sensor_auto_rearm true + sensor_event_msg_ctrl per_state + sensor_threshold_access settable + return_lnc true + return_lc false + return_unc true + return_uc false + deassert_lncgl true + deassert_uncgh true + deassert_lcgl false + deassert_ucgh false + assert_lncgl true + assert_uncgh true + assert_lcgl false + assert_ucgh false + lnc_thrsh_settable true + unc_thrsh_settable true + lc_thrsh_settable false + uc_thrsh_settable false + lnc_thrsh_readable true + unc_thrsh_readable true + lc_thrsh_readable false + uc_thrsh_readable false + analog_data_format unsigned + rate_unit none + modifier_unit none + percentage false + base_unit Watts + modifier_unit_code unspecified + linearization linear + nominal_specified false + nominal_reading 0 + m 1 + tolerance 0 + b 0 + accuracy 1 + accuracy_exp 0 + sensor_direction input + r_exp 0 + b_exp 0 + sensor_maximum 255 + sensor_minimum 0 + lc_fthresh 0 + lnc_fthresh 10 + unc_fthresh 150 + uc_fthresh 0 + positive_hysteresis 0 + negative_hysteresis 0 + id_string "power_envelope" +endsdr + # Timer for next FRU update sdr type 0x11 device_access_address 0x30 diff --git a/lanserv/mellanox-bf/sdr.30.main b/lanserv/mellanox-bf/sdr.30.main index 846130b..306a84b 100644 --- a/lanserv/mellanox-bf/sdr.30.main +++ b/lanserv/mellanox-bf/sdr.30.main @@ -1,5 +1,7 @@ last_add_time:i:1605050738 +34:d:\22\00\51\01\39\30\00\0b\07\02\77\48\02\01\c3\10\c3\10\09\09\00\06\00\00\01\00\00\05\01\00\00\00\00\00\ff\00\00\00\96\00\00\0a\00\00\00\00\00\cepower_envelope +33:d:\21\00\51\01\34\30\00\0a\07\02\77\48\02\01\0c\20\0c\00\02\02\00\06\00\00\01\00\00\05\01\00\00\00\00\00\ff\00\00\00\00\00\05\00\00\00\00\00\00\c9soc_power 32:d:\20\00\51\01\39\30\00\09\07\02\7f\48\01\01\85\32\85\32\1b\1b\00\01\00\00\01\00\00\05\01\00\01\50\00\00\ff\00\00\69\5f\00\00\05\00\00\00\00\00\c8ddr_temp 31:d:\1f\00Q\11\190\14\80\00\00\10\00\01\03\00\cedmidecode_info 30:d:\1e\00Q\11\170\13\80\00\00\10\00\01\03\00\ccproduct_name diff --git a/lanserv/mellanox-bf/set_emu_param.sh b/lanserv/mellanox-bf/set_emu_param.sh index df81ea4..7afb4db 100755 --- a/lanserv/mellanox-bf/set_emu_param.sh +++ b/lanserv/mellanox-bf/set_emu_param.sh @@ -661,6 +661,50 @@ else remove_sensor "ddr_temp" fi +################################### +# Get SOC power info # +################################### +#If the file doesn’t exists try to load the module +SOC_POWER_PATH="/sys/kernel/debug/mlxbf-ptm/monitors/status/total_power" +if [ ! -f "$SOC_POWER_PATH" ]; then + modprobe mlxbf-ptm +fi + +if [ ! -f "$SOC_POWER_PATH" ]; then + echo "Error: soc_power file still not found after loading module" + remove_sensor "soc_power" +else + soc_power=$(cat "$SOC_POWER_PATH") + # Remove all the number after the decimal point – it can cause issues in the ipmb + if ! [[ "$soc_power" =~ ^-?[0-9]+(\.[0-9]+)?$ ]]; then + echo "Error: soc_power is not a valid number" + remove_sensor "soc_power" + else + # echo the soc_power value in to /run/emu_param/soc_power + echo "$soc_power" > "${EMU_PARAM_DIR}/soc_power" + fi +fi + +################################### +# Get power envelope info # +################################### +POWER_ENVELOPE_PATH="/sys/kernel/debug/mlxbf-ptm/monitors/status/power_envelope" +if [ ! -f "$POWER_ENVELOPE_PATH" ]; then + #the module loaded in the soc_power routine + echo "Error: power_envelope file still not found after loading module" + remove_sensor "power_envelope" +else + power_envelope=$(cat "$POWER_ENVELOPE_PATH") + # Remove all the number after the decimal point – it can cause issues in the ipmb + if ! [[ "$power_envelope" =~ ^-?[0-9]+(\.[0-9]+)?$ ]]; then + echo "Error: power_envelope is not a valid number" + remove_sensor "power_envelope" + else + # echo the power_envelope value in to /run/emu_param/power_envelope + echo "$power_envelope" > "${EMU_PARAM_DIR}/power_envelope" + fi +fi + ################################### # Get FW info # ###################################