Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add power_envelope & soc_power sensors #112

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions lanserv/mellanox-bf/mlx-bf-base.emu
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,16 @@ sensor_add 0x30 0 9 0x01 0x01 \
poll 5000 \
file "/run/emu_param/ddr_temp"

#Add the soc_power sensor
sensor_add 0x30 0 0x0a 0x02 0x01 \
poll 5000 \
file "/run/emu_param/soc_power"

#Add the power_envelope sensor
sensor_add 0x30 0 0xb 0x02 0x01 \
poll 5000 \
file "/run/emu_param/power_envelope"

# Add the RTC battery voltage sensor
sensor_add 0x30 0 0xc 0x02 0x01 \
poll 5000 \
Expand Down
136 changes: 136 additions & 0 deletions lanserv/mellanox-bf/mlx-bf.sdrs
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,142 @@ sdr type 1
id_string "ddr_temp"
endsdr

#soc_power
sdr type 1
sensor_owner_id 0x30
sensor_owner_lun 0
channel_number 0
sensor_number 0x0a
entity_id system_board
entity_instance 2
sensor_type Voltage
event_reading_type_code 1
init_scanning true
init_events true
init_thresholds true
init_hysteresis false
init_sensor_type true
default_event_gen_on true
default_sensor_scan_on true
sensor_auto_rearm true
sensor_event_msg_ctrl per_state
sensor_threshold_access settable
return_lnc false
return_lc true
return_unc false
return_uc false
deassert_lncgl false
deassert_uncgh false
deassert_lcgl true
deassert_ucgh false
assert_lncgl false
assert_uncgh false
assert_lcgl true
assert_ucgh false
lnc_thrsh_settable false
unc_thrsh_settable false
lc_thrsh_settable true
uc_thrsh_settable false
lnc_thrsh_readable false
unc_thrsh_readable false
lc_thrsh_readable true
uc_thrsh_readable false
analog_data_format unsigned
rate_unit none
modifier_unit none
percentage false
base_unit Watts
modifier_unit_code unspecified
linearization linear
nominal_specified false
nominal_reading 0
m 1
tolerance 0
b 0
accuracy 1
accuracy_exp 0
sensor_direction input
r_exp 0
b_exp 0
sensor_maximum 255
sensor_minimum 0
lc_fthresh 5
lnc_fthresh 0
unc_fthresh 0
uc_fthresh 0
positive_hysteresis 0
negative_hysteresis 0
id_string "soc_power"
endsdr

#power_envelope
sdr type 1
sensor_owner_id 0x30
sensor_owner_lun 0
channel_number 0
sensor_number 0xb
entity_id system_board
entity_instance 2
sensor_type Voltage
event_reading_type_code 1
init_scanning true
init_events true
init_thresholds true
init_hysteresis false
init_sensor_type true
default_event_gen_on true
default_sensor_scan_on true
sensor_auto_rearm true
sensor_event_msg_ctrl per_state
sensor_threshold_access settable
return_lnc true
return_lc false
return_unc true
return_uc false
deassert_lncgl true
deassert_uncgh true
deassert_lcgl false
deassert_ucgh false
assert_lncgl true
assert_uncgh true
assert_lcgl false
assert_ucgh false
lnc_thrsh_settable true
unc_thrsh_settable true
lc_thrsh_settable false
uc_thrsh_settable false
lnc_thrsh_readable true
unc_thrsh_readable true
lc_thrsh_readable false
uc_thrsh_readable false
analog_data_format unsigned
rate_unit none
modifier_unit none
percentage false
base_unit Watts
modifier_unit_code unspecified
linearization linear
nominal_specified false
nominal_reading 0
m 1
tolerance 0
b 0
accuracy 1
accuracy_exp 0
sensor_direction input
r_exp 0
b_exp 0
sensor_maximum 255
sensor_minimum 0
lc_fthresh 0
lnc_fthresh 10
unc_fthresh 150
uc_fthresh 0
positive_hysteresis 0
negative_hysteresis 0
id_string "power_envelope"
endsdr

sdr type 1
sensor_owner_id 0x30
sensor_owner_lun 0
Expand Down
2 changes: 2 additions & 0 deletions lanserv/mellanox-bf/sdr.30.main
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
last_add_time:i:1605050738

35:d:\23\00\51\01\36\30\00\0c\07\02\77\48\02\01\04\20\04\00\02\02\00\04\00\00\01\00\00\01\02\f0\00\00\00\00\ff\00\00\00\00\00\17\00\00\00\00\00\00\cbrtc_voltage
34:d:\22\00\51\01\39\30\00\0b\07\02\77\48\02\01\c3\10\c3\10\09\09\00\06\00\00\01\00\00\05\01\00\00\00\00\00\ff\00\00\00\96\00\00\0a\00\00\00\00\00\cepower_envelope
33:d:\21\00\51\01\34\30\00\0a\07\02\77\48\02\01\0c\20\0c\00\02\02\00\06\00\00\01\00\00\05\01\00\00\00\00\00\ff\00\00\00\00\00\05\00\00\00\00\00\00\c9soc_power
32:d:\20\00\51\01\39\30\00\09\07\02\7f\48\01\01\85\32\85\32\1b\1b\00\01\00\00\01\00\00\05\01\00\01\50\00\00\ff\00\00\69\5f\00\00\05\00\00\00\00\00\c8ddr_temp
31:d:\1f\00Q\11\190\14\80\00\00\10\00\01\03\00\cedmidecode_info
30:d:\1e\00Q\11\170\13\80\00\00\10\00\01\03\00\ccproduct_name
Expand Down
42 changes: 42 additions & 0 deletions lanserv/mellanox-bf/set_emu_param.sh
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,48 @@ case "$temp" in
;;
esac

###################################
# Get SOC power info #
###################################
SOC_POWER_PATH="/sys/kernel/debug/mlxbf-ptm/monitors/status/total_power"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@trindenau - is this path defined in Yochai arch doc? if not we need to make sure he define the path

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the right path according to the arch doc

if [ ! -f "$SOC_POWER_PATH" ]; then
echo "Error: soc_power file not found try to load the driver with: modprobe mlxbf-ptm"
remove_sensor "soc_power"
else
soc_power=$(cat "$SOC_POWER_PATH")
trindenau marked this conversation as resolved.
Show resolved Hide resolved
#check of soc_power is decimal number.
if ! [[ "$soc_power" =~ ^([0-9]+(\.[0-9]+)?|0)$ ]]; then
echo "Error: soc_power is not a valid number"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@trindenau - indentation

remove_sensor "soc_power"
else
# Remove all the number after the decimal point – it can cause issues in the ipmb
soc_power=$((${soc_power%.*}))
# echo the soc_power value in to /run/emu_param/soc_power
echo "$soc_power" > "${EMU_PARAM_DIR}/soc_power"
fi
fi

###################################
# Get power envelope info #
###################################
POWER_ENVELOPE_PATH="/sys/kernel/debug/mlxbf-ptm/monitors/status/power_envelope"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@trindenau - same as above, this need to be defined in the arch

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the right path according to the arch doc

if [ ! -f "$POWER_ENVELOPE_PATH" ]; then
echo "Error: power_envelope file not found try to load the driver with: modprobe mlxbf-ptm"
remove_sensor "power_envelope"
else
power_envelope=$(cat "$POWER_ENVELOPE_PATH")
#check of power_envelope is decimal number.
if ! [[ "$power_envelope" =~ ^-?[0-9]+(\.[0-9]+)?$ ]]; then
echo "Error: power_envelope is not a valid number"
remove_sensor "power_envelope"
else
# Remove all the number after the decimal point – it can cause issues in the ipmb
power_envelope=$((${power_envelope%.*}))
# echo the power_envelope value in to /run/emu_param/power_envelope
echo "$power_envelope" > "${EMU_PARAM_DIR}/power_envelope"
fi
fi

###################################
# Get FW info #
###################################
Expand Down