Reworks SPR topdown implementation to use rdpmc-style values instead …

…of raw counter values
LLNL · Oct 8, 2024 · 18c14d4 · 18c14d4
1 parent 2f02a26
commit 18c14d4
Showing 1 changed file with 60 additions and 118 deletions.
diff --git a/src/services/topdown/SapphireRapidsTopdown.cpp b/src/services/topdown/SapphireRapidsTopdown.cpp
@@ -2,6 +2,21 @@
 
 #include <algorithm>
 
+#define RETIRING_OFFSET 0
+#define BAD_SPEC_OFFSET 1
+#define FE_BOUND_OFFSET 2
+#define BE_BOUND_OFFSET 3
+
+#define HEAVY_OPS_OFFSET 4
+#define BR_MISPRED_OFFSET 5
+#define FETCH_LAT_OFFSET 6
+#define MEM_BOUND_OFFSET 7
+
+static double get_tma_percent_from_rdpmc_value(uint64_t rdpmc_value,
+                                               uint64_t offset) {
+  return (double)((rdpmc_value >> (offset * 8)) & 0xff) / 0xff;
+}
+
 namespace cali {
 namespace topdown {
 
@@ -10,22 +25,10 @@ SapphireRapidsTopdown::SapphireRapidsTopdown(IntelTopdownLevel level)
           level,
           // top_counters
           "perf::slots"
-          ",perf::topdown-retiring"
-          ",perf::topdown-bad-spec"
-          ",perf::topdown-fe-bound"
-          ",perf::topdown-be-bound"
-          ",INT_MISC:UOP_DROPPING",
+          ",perf::topdown-retiring",
           // all_counters
           "perf::slots"
-          ",perf::topdown-retiring"
-          ",perf::topdown-bad-spec"
-          ",perf::topdown-fe-bound"
-          ",perf::topdown-be-bound"
-          ",INT_MISC:UOP_DROPPING"
-          ",perf_raw::r8400"  // topdown-heavy-ops
-          ",perf_raw::r8500"  // topdown-br-mispredict
-          ",perf_raw::r8600"  // topdown-fetch-lat
-          ",perf_raw::r8700", // topdown-mem-bound
+          ",perf::topdown-retiring",
           // res_top
           {"retiring", "backend_bound", "frontend_bound", "bad_speculation"},
           // res_all
@@ -44,43 +47,29 @@ SapphireRapidsTopdown::compute_toplevel(const std::vector<Entry> &rec) {
 
   // Get PAPI metrics for toplevel calculations
   Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_int_misc_uop_dropping =
-      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
+  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
 
   // Check if any Variant is empty (use .empty())
-  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
-                       v_bad_spec.empty() || v_retiring.empty() ||
-                       v_int_misc_uop_dropping.empty() ||
-                       v_slots_or_info_thread_slots.empty();
+  bool is_incomplete =
+      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
   // Check if all Variants are greater than 0 when casted to doubles (use
   // .to_double())
-  bool is_nonzero =
-      v_fe_bound.to_double() > 0.0 && v_be_bound.to_double() > 0.0 &&
-      v_bad_spec.to_double() > 0.0 && v_retiring.to_double() > 0.0 &&
-      v_int_misc_uop_dropping.to_double() > 0.0 &&
-      v_slots_or_info_thread_slots.to_double() > 0.0;
+  bool is_nonzero = v_tma_metrics.to_uint() > 0;
 
   // Check if bad values were obtained
   if (is_incomplete || !is_nonzero)
     return ret;
 
-  // Perform toplevel calcs
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-
-  double retiring = (v_retiring.to_double() / toplevel_sum) +
-                    (0 * v_slots_or_info_thread_slots.to_double());
-  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
-                          (v_int_misc_uop_dropping.to_double() /
-                           v_slots_or_info_thread_slots.to_double());
-  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
-                         (0 * v_slots_or_info_thread_slots.to_double());
+  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
+
+  double retiring =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
+  double frontend_bound =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
+  double backend_bound =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
   double bad_speculation =
-      std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);
 
   // Add toplevel metrics to vector of Entry
   ret.reserve(4);
@@ -106,30 +95,22 @@ SapphireRapidsTopdown::compute_retiring(const std::vector<Entry> &rec) {
 
   // Get PAPI metrics for toplevel calculations
   Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_heavy_ops = get_val_from_rec(rec, "perf_raw::r8400");
+  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
 
   // Check if any Variant is empty (use .empty())
-  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
-                       v_bad_spec.empty() || v_retiring.empty() ||
-                       v_slots_or_info_thread_slots.empty() ||
-                       v_heavy_ops.empty();
+  bool is_incomplete =
+      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
   // Check if bad values were obtained
   if (is_incomplete)
     return ret;
 
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-  // Copied from compute_toplevel
-  double retiring = (v_retiring.to_double() / toplevel_sum) +
-                    (0 * v_slots_or_info_thread_slots.to_double());
+  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
-  double heavy_ops = (v_heavy_ops.to_double() / toplevel_sum) +
-                     (0 * v_slots_or_info_thread_slots.to_double());
+  double retiring =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, RETIRING_OFFSET);
+  double heavy_ops =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, HEAVY_OPS_OFFSET);
   double light_ops = std::max(0.0, retiring - heavy_ops);
 
   // Add toplevel metrics to vector of Entry
@@ -152,30 +133,22 @@ SapphireRapidsTopdown::compute_backend_bound(const std::vector<Entry> &rec) {
 
   // Get PAPI metrics for toplevel calculations
   Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_memory_bound = get_val_from_rec(rec, "perf_raw::r8700");
+  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
 
   // Check if any Variant is empty (use .empty())
-  bool is_incomplete = v_fe_bound.empty() || v_be_bound.empty() ||
-                       v_bad_spec.empty() || v_retiring.empty() ||
-                       v_slots_or_info_thread_slots.empty() ||
-                       v_memory_bound.empty();
+  bool is_incomplete =
+      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
   // Check if bad values were obtained
   if (is_incomplete)
     return ret;
 
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-  // Copied from compute_toplevel
-  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
-                         (0 * v_slots_or_info_thread_slots.to_double());
+  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
-  double memory_bound = (v_memory_bound.to_double() / toplevel_sum) +
-                        (0 * v_slots_or_info_thread_slots.to_double());
+  double backend_bound =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BE_BOUND_OFFSET);
+  double memory_bound =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, MEM_BOUND_OFFSET);
   double core_bound = std::max(0.0, backend_bound - memory_bound);
 
   // Add toplevel metrics to vector of Entry
@@ -198,35 +171,22 @@ SapphireRapidsTopdown::compute_frontend_bound(const std::vector<Entry> &rec) {
 
   // Get PAPI metrics for toplevel calculations
   Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_int_misc_uop_dropping =
-      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
-  Variant v_fetch_latency = get_val_from_rec(rec, "perf_raw::r8600");
+  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
 
   // Check if any Variant is empty (use .empty())
   bool is_incomplete =
-      v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
-      v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
-      v_slots_or_info_thread_slots.empty() || v_fetch_latency.empty();
+      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
   // Check if bad values were obtained
   if (is_incomplete)
     return ret;
 
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-  // Copied from compute_toplevel
-  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
-                          (v_int_misc_uop_dropping.to_double() /
-                           v_slots_or_info_thread_slots.to_double());
-
-  double fetch_latency = (v_fetch_latency.to_double() / toplevel_sum) -
-                         (v_int_misc_uop_dropping.to_double() /
-                          v_slots_or_info_thread_slots.to_double());
+  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
+  double frontend_bound =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FE_BOUND_OFFSET);
+  double fetch_latency =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, FETCH_LAT_OFFSET);
   double fetch_bandwidth = std::max(0.0, frontend_bound - fetch_latency);
 
   // Add toplevel metrics to vector of Entry
@@ -249,40 +209,22 @@ SapphireRapidsTopdown::compute_bad_speculation(const std::vector<Entry> &rec) {
 
   // Get PAPI metrics for toplevel calculations
   Variant v_slots_or_info_thread_slots = get_val_from_rec(rec, "perf::slots");
-  Variant v_retiring = get_val_from_rec(rec, "perf::topdown-retiring");
-  Variant v_bad_spec = get_val_from_rec(rec, "perf::topdown-bad-spec");
-  Variant v_fe_bound = get_val_from_rec(rec, "perf::topdown-fe-bound");
-  Variant v_be_bound = get_val_from_rec(rec, "perf::topdown-be-bound");
-  Variant v_int_misc_uop_dropping =
-      get_val_from_rec(rec, "INT_MISC:UOP_DROPPING");
-  Variant v_branch_mispredict = get_val_from_rec(rec, "perf_raw::r8500");
+  Variant v_tma_metrics = get_val_from_rec(rec, "perf::topdown-retiring");
 
   // Check if any Variant is empty (use .empty())
   bool is_incomplete =
-      v_fe_bound.empty() || v_be_bound.empty() || v_bad_spec.empty() ||
-      v_retiring.empty() || v_int_misc_uop_dropping.empty() ||
-      v_slots_or_info_thread_slots.empty() || v_branch_mispredict.empty();
+      v_tma_metrics.empty() || v_slots_or_info_thread_slots.empty();
 
   // Check if bad values were obtained
   if (is_incomplete)
     return ret;
 
-  // Perform toplevel calcs
-  double toplevel_sum = (v_retiring.to_double() + v_bad_spec.to_double() +
-                         v_fe_bound.to_double() + v_be_bound.to_double());
-
-  double retiring = (v_retiring.to_double() / toplevel_sum) +
-                    (0 * v_slots_or_info_thread_slots.to_double());
-  double frontend_bound = (v_fe_bound.to_double() / toplevel_sum) -
-                          (v_int_misc_uop_dropping.to_double() /
-                           v_slots_or_info_thread_slots.to_double());
-  double backend_bound = (v_be_bound.to_double() / toplevel_sum) +
-                         (0 * v_slots_or_info_thread_slots.to_double());
-  double bad_speculation =
-      std::max(1.0 - (frontend_bound + backend_bound + retiring), 0.0);
+  uint64_t tma_metric_papi_rdpmc = v_tma_metrics.to_uint();
 
-  double branch_mispredict = (v_branch_mispredict.to_double() / toplevel_sum) +
-                             (0 * v_slots_or_info_thread_slots.to_double());
+  double bad_speculation =
+      get_tma_percent_from_rdpmc_value(tma_metric_papi_rdpmc, BAD_SPEC_OFFSET);
+  double branch_mispredict = get_tma_percent_from_rdpmc_value(
+      tma_metric_papi_rdpmc, BR_MISPRED_OFFSET);
   double machine_clears = std::max(0.0, bad_speculation - branch_mispredict);
 
   // Add toplevel metrics to vector of Entry