From df519ff16e2651bdebcbc9c0ad0237f0e840ad53 Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sat, 16 Mar 2024 12:17:24 +0000 Subject: [PATCH] Fix --prof-exec predicted time values (#4988) Wrapping the functions in #4933 broke --prof-exec report as the predicted MTask times are computed during thread packing, but are emitted in the wrapping functions. --- src/V3ExecGraph.cpp | 24 ++++++++++++------------ test_regress/t/t_gantt.pl | 2 ++ 2 files changed, 14 insertions(+), 12 deletions(-) diff --git a/src/V3ExecGraph.cpp b/src/V3ExecGraph.cpp index 74fa397690..8df206bb85 100644 --- a/src/V3ExecGraph.cpp +++ b/src/V3ExecGraph.cpp @@ -203,7 +203,6 @@ class PackThreads final { const uint32_t m_sandbagNumerator; // Numerator padding for est runtime const uint32_t m_sandbagDenom; // Denominator padding for est runtime -public: // CONSTRUCTORS explicit PackThreads(uint32_t nThreads = v3Global.opt.threads(), unsigned sandbagNumerator = 30, unsigned sandbagDenom = 100) @@ -211,8 +210,8 @@ class PackThreads final { , m_sandbagNumerator{sandbagNumerator} , m_sandbagDenom{sandbagDenom} {} ~PackThreads() = default; + VL_UNCOPYABLE(PackThreads); -private: // METHODS uint32_t completionTime(const ThreadSchedule& schedule, const ExecMTask* mtaskp, uint32_t threadId) { @@ -256,9 +255,8 @@ class PackThreads final { return true; } -public: // Pack an MTasks from given graph into m_nThreads threads, return the schedule. - const ThreadSchedule pack(const V3Graph& mtaskGraph) { + ThreadSchedule pack(const V3Graph& mtaskGraph) { // The result ThreadSchedule schedule{m_nThreads}; @@ -350,6 +348,7 @@ class PackThreads final { return schedule; } +public: // SELF TEST static void selfTest() { V3Graph graph; @@ -402,8 +401,9 @@ class PackThreads final { UASSERT_SELFTEST(uint32_t, packer.completionTime(schedule, t2, 1), 1199); } -private: - VL_UNCOPYABLE(PackThreads); + static const ThreadSchedule apply(const V3Graph& mtaskGraph) { + return PackThreads{}.pack(mtaskGraph); + } }; using EstimateAndProfiled = std::pair; // cost est, cost profiled @@ -780,14 +780,10 @@ void wrapMTaskBodies(AstExecGraph* const execGraphp) { } } -void implementExecGraph(AstExecGraph* const execGraphp) { +void implementExecGraph(AstExecGraph* const execGraphp, const ThreadSchedule& schedule) { // Nothing to be done if there are no MTasks in the graph at all. if (execGraphp->depGraphp()->empty()) return; - // Schedule the mtasks: statically associate each mtask with a thread, - // and determine the order in which each thread will runs its mtasks. - const ThreadSchedule& schedule = PackThreads{}.pack(*execGraphp->depGraphp()); - // Create a function to be run by each thread. Note this moves all AstMTaskBody nodes form the // AstExecGrap into the AstCFunc created const std::vector& funcps = createThreadFunctions(schedule, execGraphp->name()); @@ -808,11 +804,15 @@ void implement(AstNetlist* netlistp) { fillinCosts(execGraphp->depGraphp()); finalizeCosts(execGraphp->depGraphp()); + // Schedule the mtasks: statically associate each mtask with a thread, + // and determine the order in which each thread will runs its mtasks. + const ThreadSchedule& schedule = PackThreads::apply(*execGraphp->depGraphp()); + // Wrap each MTask body into a CFunc for better profiling/debugging wrapMTaskBodies(execGraphp); // Replace the graph body with its multi-threaded implementation. - implementExecGraph(execGraphp); + implementExecGraph(execGraphp, schedule); }); } diff --git a/test_regress/t/t_gantt.pl b/test_regress/t/t_gantt.pl index 30a0c599c3..917ede8fec 100755 --- a/test_regress/t/t_gantt.pl +++ b/test_regress/t/t_gantt.pl @@ -45,6 +45,8 @@ if ($Self->{vltmt}) { file_grep("$Self->{obj_dir}/gantt.log", qr/Total threads += 2/i); file_grep("$Self->{obj_dir}/gantt.log", qr/Total mtasks += 7/i); + # Predicted thread utilization should be less than 100% + file_grep_not("$Self->{obj_dir}/gantt.log", qr/Thread utilization =\s*\d\d\d+\.\d+%/i); } else { file_grep("$Self->{obj_dir}/gantt.log", qr/Total threads += 1/i); file_grep("$Self->{obj_dir}/gantt.log", qr/Total mtasks += 0/i);