diff --git a/bin/verilator_gantt b/bin/verilator_gantt index 40450d2992f..5ad1d0411f7 100755 --- a/bin/verilator_gantt +++ b/bin/verilator_gantt @@ -36,7 +36,7 @@ def read_data(filename): re_proc_cpu = re.compile(r'VLPROFPROC processor\s*:\s*(\d+)\s*$') re_proc_dat = re.compile(r'VLPROFPROC ([a-z_ ]+)\s*:\s*(.*)$') cpu = None - thread = None + thread = 0 execGraphStart = None global LongestVcdStrValueLength @@ -54,11 +54,11 @@ def read_data(filename): if kind == "SECTION_PUSH": LongestVcdStrValueLength = max(LongestVcdStrValueLength, len(payload)) SectionStack.append(payload) - Sections.append((tick, tuple(SectionStack))) + Sections[thread].append((tick, tuple(SectionStack))) elif kind == "SECTION_POP": assert SectionStack, "SECTION_POP without SECTION_PUSH" SectionStack.pop() - Sections.append((tick, tuple(SectionStack))) + Sections[thread].append((tick, tuple(SectionStack))) elif kind == "MTASK_BEGIN": mtask, predict_start, ecpu = re_payload_mtaskBegin.match(payload).groups() mtask = int(mtask) @@ -97,6 +97,7 @@ def read_data(filename): print("-Unknown execution trace record: %s" % line) elif re_thread.match(line): thread = int(re_thread.match(line).group(1)) + Sections.append([]) elif re.match(r'^VLPROF(THREAD|VERSION)', line): pass elif re_arg1.match(line): @@ -307,23 +308,27 @@ def report_cpus(): def report_sections(): - if not Sections: - return - print("\nSection profile:") + for thread, section in enumerate(Sections): + if section: + print(f"\nSection profile for thread {thread}:") + report_section(section) + +def report_section(section): totalTime = collections.defaultdict(lambda: 0) selfTime = collections.defaultdict(lambda: 0) sectionTree = [0, {}, 1] # [selfTime, childTrees, numberOfTimesEntered] prevTime = 0 prevStack = () - for time, stack in Sections: + for time, stack in section: if len(stack) > len(prevStack): scope = sectionTree for item in stack: scope = scope[1].setdefault(item, [0, {}, 0]) scope[2] += 1 dt = time - prevTime + assert dt >= 0 scope = sectionTree for item in prevStack: scope = scope[1].setdefault(item, [0, {}, 0]) @@ -457,12 +462,13 @@ def write_vcd(filename): addValue(pcode, time, value) # Section graph - if Sections: - scode = getCode(LongestVcdStrValueLength * 8, "section", "trace") - dcode = getCode(32, "section", "depth") - for time, stack in Sections: - addValue(scode, time, stack[-1] if stack else None) - addValue(dcode, time, len(stack)) + for thread, section in enumerate(Sections): + if section: + scode = getCode(LongestVcdStrValueLength * 8, "section", f"t{thread}_trace") + dcode = getCode(32, "section", f"t{thread}_depth") + for time, stack in section: + addValue(scode, time, stack[-1] if stack else None) + addValue(dcode, time, len(stack)) # Create output file fh.write("$version Generated by verilator_gantt $end\n") diff --git a/src/V3EmitCModel.cpp b/src/V3EmitCModel.cpp index d31dc8ca60c..bba4545579c 100644 --- a/src/V3EmitCModel.cpp +++ b/src/V3EmitCModel.cpp @@ -432,7 +432,8 @@ class EmitCModel final : public EmitCFunc { puts(topModNameProtected + "__" + protect("_eval_settle") + "(&(vlSymsp->TOP));\n"); puts("}\n"); - if (v3Global.opt.profExec()) puts("vlSymsp->__Vm_executionProfilerp->configure();\n"); + if (v3Global.opt.profExec() && !v3Global.opt.hierChild()) + puts("vlSymsp->__Vm_executionProfilerp->configure();\n"); puts("VL_DEBUG_IF(VL_DBG_MSGF(\"+ Eval\\n\"););\n"); puts(topModNameProtected + "__" + protect("_eval") + "(&(vlSymsp->TOP));\n"); diff --git a/src/V3Order.cpp b/src/V3Order.cpp index 431a5f95679..a8217afb033 100644 --- a/src/V3Order.cpp +++ b/src/V3Order.cpp @@ -123,8 +123,10 @@ AstCFunc* V3Order::order(AstNetlist* netlistp, // }(); if (v3Global.opt.profExec()) { - funcp->addStmtsp(new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"func " - + tag + "\");\n"}); + const string name + = (v3Global.opt.hierChild() ? (v3Global.opt.topModule() + " ") : "") + "func " + tag; + funcp->addStmtsp(new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"" + + name + "\");\n"}); } // Build the OrderGraph diff --git a/src/V3Sched.cpp b/src/V3Sched.cpp index d85cf93b866..c5af5acaa13 100644 --- a/src/V3Sched.cpp +++ b/src/V3Sched.cpp @@ -156,7 +156,9 @@ AstNodeStmt* checkIterationLimit(AstNetlist* netlistp, const string& name, AstVa return ifp; } -AstNodeStmt* profExecSectionPush(FileLine* flp, const string& name) { +AstNodeStmt* profExecSectionPush(FileLine* flp, const string& section) { + const string name + = (v3Global.opt.hierChild() ? (v3Global.opt.topModule() + " ") : "") + section; return new AstCStmt{flp, "VL_EXEC_TRACE_ADD_RECORD(vlSymsp).sectionPush(\"" + name + "\");\n"}; } diff --git a/test_regress/t/t_gantt_hier.py b/test_regress/t/t_gantt_hier.py new file mode 100755 index 00000000000..19d2e439cdf --- /dev/null +++ b/test_regress/t/t_gantt_hier.py @@ -0,0 +1,52 @@ +#!/usr/bin/env python3 +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2024 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +# Test for bin/verilator_gantt, + +import vltest_bootstrap + +test.scenarios('vlt_all') +test.top_filename = "t/t_gen_alw.v" # Any, as long as runs a few cycles + +test.compile( + v_flags2=["--prof-exec", "--hierarchical"], + # Checks below care about thread count, so use 2 (minimum reasonable) + threads=(2 if test.vltmt else 1)) + +test.execute(all_run_flags=[ + "+verilator+prof+exec+start+2", + " +verilator+prof+exec+window+2", + " +verilator+prof+exec+file+" + test.obj_dir + "/profile_exec.dat", + " +verilator+prof+vlt+file+" + test.obj_dir + "/profile.vlt"]) # yapf:disable + +# For now, verilator_gantt still reads from STDIN +# (probably it should take a file, gantt.dat like verilator_profcfunc) +# The profiling data still goes direct to the runtime's STDOUT +# (maybe that should go to a separate file - gantt.dat?) +test.run(cmd=[ + os.environ["VERILATOR_ROOT"] + "/bin/verilator_gantt", test.obj_dir + + "/profile_exec.dat", "--vcd " + test.obj_dir + "/profile_exec.vcd", "| tee " + test.obj_dir + + "/gantt.log" +]) + +if test.vltmt: + test.file_grep(test.obj_dir + "/gantt.log", r'Total threads += 2') + test.file_grep(test.obj_dir + "/gantt.log", r'Total mtasks += 8') + # Predicted thread utilization should be less than 100% + test.file_grep_not(test.obj_dir + "/gantt.log", r'Thread utilization =\s*\d\d\d+\.\d+%') +else: + test.file_grep(test.obj_dir + "/gantt.log", r'Total threads += 1') + test.file_grep(test.obj_dir + "/gantt.log", r'Total mtasks += 0') + +test.file_grep(test.obj_dir + "/gantt.log", r'\|\s+2\s+\|\s+2\.0+\s+\|\s+eval') + +# Diff to itself, just to check parsing +test.vcd_identical(test.obj_dir + "/profile_exec.vcd", test.obj_dir + "/profile_exec.vcd") + +test.passes() diff --git a/test_regress/t/t_gen_alw.v b/test_regress/t/t_gen_alw.v index 21b9538391c..27b86e96bec 100644 --- a/test_regress/t/t_gen_alw.v +++ b/test_regress/t/t_gen_alw.v @@ -59,7 +59,7 @@ endmodule module Test (/*AUTOARG*/ // Inputs clk, in - ); + ); /*verilator hier_block*/ input clk; input [9:0] in;