Skip to content

Commit

Permalink
Reworks SPR topdown implementation to use rdpmc-style values instead …
Browse files Browse the repository at this point in the history
…of raw counter values
  • Loading branch information
ilumsden committed Oct 8, 2024
1 parent 2f02a26 commit 18c14d4
Showing 1 changed file with 60 additions and 118 deletions.
178 changes: 60 additions & 118 deletions src/services/topdown/SapphireRapidsTopdown.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,21 @@

#include <algorithm>

#define RETIRING_OFFSET 0
#define BAD_SPEC_OFFSET 1
#define FE_BOUND_OFFSET 2
#define BE_BOUND_OFFSET 3

#define HEAVY_OPS_OFFSET 4
#define BR_MISPRED_OFFSET 5
#define FETCH_LAT_OFFSET 6
#define MEM_BOUND_OFFSET 7

static double get_tma_percent_from_rdpmc_value(uint64_t rdpmc_value,
uint64_t offset) {
return (double)((rdpmc_value >> (offset * 8)) & 0xff) / 0xff;
}

namespace cali {
namespace topdown {

Expand All @@ -10,22 +25,10 @@ SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level)
level,
// top_counters
"perf::slots"
",perf::topdown-retiring"
",perf::topdown-bad-spec"
",perf::topdown-fe-bound"
",perf::topdown-be-bound"
",INT_MISC:UOP_DROPPING",
",perf::topdown-retiring",
// all_counters
"perf::slots"
",perf::topdown-retiring"
",perf::topdown-bad-spec"
",perf::topdown-fe-bound"
",perf::topdown-be-bound"
",INT_MISC:UOP_DROPPING"
",perf_raw::r8400" // topdown-heavy-ops
",perf_raw::r8500" // topdown-br-mispredict
",perf_raw::r8600" // topdown-fetch-lat
",perf_raw::r8700", // topdown-mem-bound
",perf::topdown-retiring",
// res_top
{"retiring", "backend_bound", "frontend_bound", "bad_speculation"},
// res_all
Expand All @@ -44,43 +47,29 @@ SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry> &rec) {

// Get PAPI metrics for toplevel calculations
Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
Variant v_int_misc_uop_dropping =
get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");

// Check if any Variant is empty (use .empty())
bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
v_bad_spec.empty() || v_retiring.empty() ||
v_int_misc_uop_dropping.empty() ||
v_slots_or_info_thread_slots.empty();
bool is_incomplete =
v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
// Check if all Variants are greater than 0 when casted to doubles (use
// .to_double())
bool is_nonzero =
v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 &&
v_bad_spec.to_double() > 0.0 && v_retiring.to_double() > 0.0 &&
v_int_misc_uop_dropping.to_double() > 0.0 &&
v_slots_or_info_thread_slots.to_double() > 0.0;
bool is_nonzero = v_tma_metrics.to_uint() > 0;

// Check if bad values were obtained
if (is_incomplete || !is_nonzero)
return ret;

// Perform toplevel calcs
double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
v_fe_bound.to_double() + v_be_bound.to_double());

double retiring = (v_retiring.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
(v_int_misc_uop_dropping.to_double() /
v_slots_or_info_thread_slots.to_double());
double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();

double retiring =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
double frontend_bound =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
double backend_bound =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
double bad_speculation =
std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);

// Add toplevel metrics to vector of Entry
ret.reserve(4);
Expand All @@ -106,30 +95,22 @@ SapphireRapidsTopdown::compute_retiring(const std::vector<Entry> &rec) {

// Get PAPI metrics for toplevel calculations
Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
Variant v_heavy_ops = get_val_from_rec(rec, "perf_raw::r8400");
Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");

// Check if any Variant is empty (use .empty())
bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
v_bad_spec.empty() || v_retiring.empty() ||
v_slots_or_info_thread_slots.empty() ||
v_heavy_ops.empty();
bool is_incomplete =
v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();

// Check if bad values were obtained
if (is_incomplete)
return ret;

double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
v_fe_bound.to_double() + v_be_bound.to_double());
// Copied from compute_toplevel
double retiring = (v_retiring.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();

double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double retiring =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
double heavy_ops =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, HEAVY_OPS_OFFSET);
double light_ops = std::max(0.0, retiring - heavy_ops);

// Add toplevel metrics to vector of Entry
Expand All @@ -152,30 +133,22 @@ SapphireRapidsTopdown::compute_backend_bound(const std::vector<Entry> &rec) {

// Get PAPI metrics for toplevel calculations
Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
Variant v_memory_bound = get_val_from_rec(rec, "perf_raw::r8700");
Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");

// Check if any Variant is empty (use .empty())
bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
v_bad_spec.empty() || v_retiring.empty() ||
v_slots_or_info_thread_slots.empty() ||
v_memory_bound.empty();
bool is_incomplete =
v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();

// Check if bad values were obtained
if (is_incomplete)
return ret;

double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
v_fe_bound.to_double() + v_be_bound.to_double());
// Copied from compute_toplevel
double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();

double memory_bound = (v_memory_bound.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double backend_bound =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
double memory_bound =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, MEM_BOUND_OFFSET);
double core_bound = std::max(0.0, backend_bound - memory_bound);

// Add toplevel metrics to vector of Entry
Expand All @@ -198,35 +171,22 @@ SapphireRapidsTopdown::compute_frontend_bound(const std::vector<Entry> &rec) {

// Get PAPI metrics for toplevel calculations
Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
Variant v_int_misc_uop_dropping =
get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
Variant v_fetch_latency = get_val_from_rec(rec, "perf_raw::r8600");
Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");

// Check if any Variant is empty (use .empty())
bool is_incomplete =
v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
v_slots_or_info_thread_slots.empty() || v_fetch_latency.empty();
v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();

// Check if bad values were obtained
if (is_incomplete)
return ret;

double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
v_fe_bound.to_double() + v_be_bound.to_double());
// Copied from compute_toplevel
double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
(v_int_misc_uop_dropping.to_double() /
v_slots_or_info_thread_slots.to_double());

double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum) -
(v_int_misc_uop_dropping.to_double() /
v_slots_or_info_thread_slots.to_double());
uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();

double frontend_bound =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
double fetch_latency =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FETCH_LAT_OFFSET);
double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);

// Add toplevel metrics to vector of Entry
Expand All @@ -249,40 +209,22 @@ SapphireRapidsTopdown::compute_bad_speculation(const std::vector<Entry> &rec) {

// Get PAPI metrics for toplevel calculations
Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
Variant v_int_misc_uop_dropping =
get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
Variant v_branch_mispredict = get_val_from_rec(rec, "perf_raw::r8500");
Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");

// Check if any Variant is empty (use .empty())
bool is_incomplete =
v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
v_slots_or_info_thread_slots.empty() || v_branch_mispredict.empty();
v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();

// Check if bad values were obtained
if (is_incomplete)
return ret;

// Perform toplevel calcs
double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
v_fe_bound.to_double() + v_be_bound.to_double());

double retiring = (v_retiring.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
(v_int_misc_uop_dropping.to_double() /
v_slots_or_info_thread_slots.to_double());
double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double bad_speculation =
std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();

double branch_mispredict = (v_branch_mispredict.to_double() / toplevel_sum) +
(0 * v_slots_or_info_thread_slots.to_double());
double bad_speculation =
get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);
double branch_mispredict = get_tma_percent_from_rdpmc_value(
tma_metric_papi_rdpmc, BR_MISPRED_OFFSET);
double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);

// Add toplevel metrics to vector of Entry
Expand Down

0 comments on commit 18c14d4

Please sign in to comment.