From 00e4096742e935e51fa0c65a1ff1513114825cbd Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sun, 10 Nov 2024 12:13:44 +0000 Subject: [PATCH] Move Concat balancing from DFG to FuncOpt This means it applies more widely, e.g. inside sequential logic. --- docs/guide/exe_verilator.rst | 2 + src/CMakeLists.txt | 1 - src/Makefile_obj.in | 1 - src/V3Dfg.h | 3 - src/V3DfgBalanceTrees.cpp | 197 ------------------ src/V3DfgOptimizer.cpp | 4 +- src/V3DfgOptimizer.h | 2 +- src/V3DfgPasses.cpp | 11 +- src/V3DfgPasses.h | 16 +- src/V3FuncOpt.cpp | 157 +++++++++++++- src/V3Options.cpp | 2 + src/V3Options.h | 4 +- src/Verilator.cpp | 4 +- ..._dfg_balance_cats.py => t_balance_cats.py} | 7 +- ...{t_dfg_balance_cats.v => t_balance_cats.v} | 0 ...ats_nofunc.py => t_balance_cats_nofunc.py} | 9 +- test_regress/t/t_opt_const_dfg.py | 2 +- 17 files changed, 174 insertions(+), 248 deletions(-) delete mode 100644 src/V3DfgBalanceTrees.cpp rename test_regress/t/{t_dfg_balance_cats.py => t_balance_cats.py} (55%) rename test_regress/t/{t_dfg_balance_cats.v => t_balance_cats.v} (100%) rename test_regress/t/{t_dfg_balance_cats_nofunc.py => t_balance_cats_nofunc.py} (53%) diff --git a/docs/guide/exe_verilator.rst b/docs/guide/exe_verilator.rst index e70f70b718..1ec0efd4ee 100644 --- a/docs/guide/exe_verilator.rst +++ b/docs/guide/exe_verilator.rst @@ -591,6 +591,8 @@ Summary: .. option:: -fno-func-opt +.. option:: -fno-func-opt-balance-cat + .. option:: -fno-func-opt-split-cat .. option:: -fno-gate diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 9b1aac1d01..d9b43d17af 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -225,7 +225,6 @@ set(COMMON_SOURCES V3Descope.cpp V3Dfg.cpp V3DfgAstToDfg.cpp - V3DfgBalanceTrees.cpp V3DfgCache.cpp V3DfgDecomposition.cpp V3DfgDfgToAst.cpp diff --git a/src/Makefile_obj.in b/src/Makefile_obj.in index 0945e4690b..adfcb22155 100644 --- a/src/Makefile_obj.in +++ b/src/Makefile_obj.in @@ -238,7 +238,6 @@ RAW_OBJS_PCH_ASTNOMT = \ V3Descope.o \ V3Dfg.o \ V3DfgAstToDfg.o \ - V3DfgBalanceTrees.o \ V3DfgCache.o \ V3DfgDecomposition.o \ V3DfgDfgToAst.o \ diff --git a/src/V3Dfg.h b/src/V3Dfg.h index 8b0978b973..5fab278ee4 100644 --- a/src/V3Dfg.h +++ b/src/V3Dfg.h @@ -274,9 +274,6 @@ class DfgVertex VL_NOT_FINAL { // Predicate: has 1 or more sinks bool hasSinks() const { return m_sinksp != nullptr; } - // Predicate: has precisely 1 sink - bool hasSingleSink() const { return m_sinksp && !m_sinksp->m_nextp; } - // Predicate: has 2 or more sinks bool hasMultipleSinks() const { return m_sinksp && m_sinksp->m_nextp; } diff --git a/src/V3DfgBalanceTrees.cpp b/src/V3DfgBalanceTrees.cpp deleted file mode 100644 index 6b5eca2d82..0000000000 --- a/src/V3DfgBalanceTrees.cpp +++ /dev/null @@ -1,197 +0,0 @@ -// -*- mode: C++; c-file-style: "cc-mode" -*- -//************************************************************************* -// DESCRIPTION: Verilator: Balance associative op trees in DfgGraphs -// -// Code available from: https://verilator.org -// -//************************************************************************* -// -// Copyright 2003-2024 by Wilson Snyder. This program is free software; you -// can redistribute it and/or modify it under the terms of either the GNU -// Lesser General Public License Version 3 or the Perl Artistic License -// Version 2.0. -// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 -// -//************************************************************************* -// -// - Convert concatenation trees into balanced form -// -//************************************************************************* - -#include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT - -#include "V3Dfg.h" -#include "V3DfgPasses.h" - -VL_DEFINE_DEBUG_FUNCTIONS; - -class DfgBalanceTrees final { - // We keep the expressions, together with their offsets within a concatenation tree - struct ConcatTerm final { - DfgVertex* vtxp = nullptr; - size_t offset = 0; - - ConcatTerm() = default; - ConcatTerm(DfgVertex* vtxp, size_t offset) - : vtxp{vtxp} - , offset{offset} {} - }; - - DfgGraph& m_dfg; // The graph being processed - V3DfgBalanceTreesContext& m_ctx; // The optimization context for stats - - // Is the given vertex the root of a tree (of potentially size 1), of the given type? - template - static bool isRoot(const DfgVertex& vtx) { - static_assert(std::is_base_of::value, - "'Vertex' must be a 'DfgVertexBinary'"); - if (!vtx.is()) return false; - // Has a single sink, and that sink is not another vertex of the same type - return vtx.hasSingleSink() && !vtx.findSink(); - } - - // Recursive implementation of 'gatherTerms' below. - template - static void gatherTermsImpl(DfgVertex* vtxp, std::vector& terms) { - // Base case: different type, or multiple sinks -> it's a term - if (!vtxp->is() || vtxp->hasMultipleSinks()) { - terms.emplace_back(vtxp); - return; - } - // Recursive case: gather sub terms, right to right - DfgVertexBinary* const binp = vtxp->as(); - gatherTermsImpl(binp->rhsp(), terms); - gatherTermsImpl(binp->lhsp(), terms); - } - - // Gather terms in the tree of given type, rooted at the given vertex. - // Results are right to left, that is, index 0 in the returned vector - // is the rightmost term, index size()-1 is the leftmost term. - template - static std::vector gatherTerms(Vertex& root) { - static_assert(std::is_base_of::value, - "'Vertex' must be a 'DfgVertexBinary'"); - std::vector terms; - gatherTermsImpl(root.rhsp(), terms); - gatherTermsImpl(root.lhsp(), terms); - return terms; - } - - // Construct a balanced concatenation from the given terms, - // between indices begin (inclusive), and end (exclusive). - // Note term[end].offset must be valid. term[end].vtxp is - // never referenced. - DfgVertex* constructConcat(const std::vector& terms, const size_t begin, - const size_t end) { - UASSERT(end < terms.size(), "Invalid end"); - UASSERT(begin < end, "Invalid range"); - // Base case: just return the term - if (end == begin + 1) return terms[begin].vtxp; - - // Recursive case: - // Compute the mid-point, trying to create roughly equal width intermediates - const size_t width = terms[end].offset - terms[begin].offset; - const size_t midOffset = width / 2 + terms[begin].offset; - const auto beginIt = terms.begin() + begin; - const auto endIt = terms.begin() + end; - const auto midIt = std::lower_bound(beginIt + 1, endIt - 1, midOffset, // - [&](const ConcatTerm& term, size_t value) { // - return term.offset < value; - }); - const size_t mid = begin + std::distance(beginIt, midIt); - UASSERT(begin < mid && mid < end, "Must make some progress"); - // Construct the subtrees - DfgVertex* const rhsp = constructConcat(terms, begin, mid); - DfgVertex* const lhsp = constructConcat(terms, mid, end); - // Construct new node - AstNodeDType* const dtypep = DfgVertex::dtypeForWidth(lhsp->width() + rhsp->width()); - DfgConcat* const newp = new DfgConcat{m_dfg, lhsp->fileline(), dtypep}; - newp->rhsp(rhsp); - newp->lhsp(lhsp); - return newp; - } - - // Delete unused tree rooted at the given vertex - void deleteTree(DfgVertexBinary* const vtxp) { - UASSERT_OBJ(!vtxp->hasSinks(), vtxp, "Trying to remove used vertex"); - DfgVertexBinary* const lhsp = vtxp->lhsp()->cast(); - DfgVertexBinary* const rhsp = vtxp->rhsp()->cast(); - VL_DO_DANGLING(vtxp->unlinkDelete(m_dfg), vtxp); - if (lhsp && !lhsp->hasSinks()) deleteTree(lhsp); - if (rhsp && !rhsp->hasSinks()) deleteTree(rhsp); - } - - void balanceConcat(DfgConcat* const rootp) { - // Gather all input vertices of the tree - const std::vector vtxps = gatherTerms(*rootp); - // Don't bother with trivial trees - if (vtxps.size() <= 3) return; - - // Construct the terms Vector that we are going to do processing on - std::vector terms(vtxps.size() + 1); - // These are redundant (constructor does the same), but here they are for clarity - terms[0].offset = 0; - terms[vtxps.size()].vtxp = nullptr; - for (size_t i = 0; i < vtxps.size(); ++i) { - terms[i].vtxp = vtxps[i]; - terms[i + 1].offset = terms[i].offset + vtxps[i]->width(); - } - - // Round 1: try to create terms ending on VL_EDATASIZE boundaries. - // This ensures we pack bits within a VL_EDATASIZE first is possible, - // and then hopefully we can just assemble VL_EDATASIZE words afterward. - std::vector terms2; - { - terms2.reserve(terms.size()); - - size_t begin = 0; // Start of current range considered - size_t end = 0; // End of current range considered - size_t offset = 0; // Offset of current range considered - - // Create a term from the current range - const auto makeTerm = [&]() { - DfgVertex* const vtxp = constructConcat(terms, begin, end); - terms2.emplace_back(vtxp, offset); - offset += vtxp->width(); - begin = end; - }; - - // Create all terms ending on a boundary. - while (++end < terms.size() - 1) { - if (terms[end].offset % VL_EDATASIZE == 0) makeTerm(); - } - // Final term. Loop condition above ensures this always exists, - // and might or might not be on a boundary. - makeTerm(); - // Sentinel term - terms2.emplace_back(nullptr, offset); - // should have ended up with the same number of bits at least... - UASSERT(terms2.back().offset == terms.back().offset, "Inconsitent terms"); - } - - // Round 2: Combine the partial terms - rootp->replaceWith(constructConcat(terms2, 0, terms2.size() - 1)); - VL_DO_DANGLING(deleteTree(rootp), rootp); - - ++m_ctx.m_balancedConcats; - } - - DfgBalanceTrees(DfgGraph& dfg, V3DfgBalanceTreesContext& ctx) - : m_dfg{dfg} - , m_ctx{ctx} { - // Find all roots - std::vector rootps; - for (DfgVertex& vtx : dfg.opVertices()) { - if (isRoot(vtx)) rootps.emplace_back(vtx.as()); - } - // Balance them - for (DfgConcat* const rootp : rootps) balanceConcat(rootp); - } - -public: - static void apply(DfgGraph& dfg, V3DfgBalanceTreesContext& ctx) { DfgBalanceTrees{dfg, ctx}; } -}; - -void V3DfgPasses::balanceTrees(DfgGraph& dfg, V3DfgBalanceTreesContext& ctx) { - DfgBalanceTrees::apply(dfg, ctx); -} diff --git a/src/V3DfgOptimizer.cpp b/src/V3DfgOptimizer.cpp index 7297cdd85e..d6c6f1f302 100644 --- a/src/V3DfgOptimizer.cpp +++ b/src/V3DfgOptimizer.cpp @@ -236,7 +236,7 @@ void V3DfgOptimizer::extract(AstNetlist* netlistp) { V3Global::dumpCheckGlobalTree("dfg-extract", 0, dumpTreeEitherLevel() >= 3); } -void V3DfgOptimizer::optimize(AstNetlist* netlistp, const string& label, bool lastInvocation) { +void V3DfgOptimizer::optimize(AstNetlist* netlistp, const string& label) { UINFO(2, __FUNCTION__ << ": " << endl); // NODE STATE @@ -282,7 +282,7 @@ void V3DfgOptimizer::optimize(AstNetlist* netlistp, const string& label, bool la for (auto& component : acyclicComponents) { if (dumpDfgLevel() >= 7) component->dumpDotFilePrefixed(ctx.prefix() + "source"); // Optimize the component - V3DfgPasses::optimize(*component, ctx, lastInvocation); + V3DfgPasses::optimize(*component, ctx); // Add back under the main DFG (we will convert everything back in one go) dfg->addGraph(*component); } diff --git a/src/V3DfgOptimizer.h b/src/V3DfgOptimizer.h index df67c3e53d..067b5e8017 100644 --- a/src/V3DfgOptimizer.h +++ b/src/V3DfgOptimizer.h @@ -29,7 +29,7 @@ namespace V3DfgOptimizer { void extract(AstNetlist*) VL_MT_DISABLED; // Optimize the design -void optimize(AstNetlist*, const string& label, bool lastInvocation) VL_MT_DISABLED; +void optimize(AstNetlist*, const string& label) VL_MT_DISABLED; } // namespace V3DfgOptimizer #endif // Guard diff --git a/src/V3DfgPasses.cpp b/src/V3DfgPasses.cpp index 5b3f04041e..d67642e8cb 100644 --- a/src/V3DfgPasses.cpp +++ b/src/V3DfgPasses.cpp @@ -42,11 +42,6 @@ V3DfgEliminateVarsContext::~V3DfgEliminateVarsContext() { m_varsRemoved); } -V3DfgBalanceTreesContext::~V3DfgBalanceTreesContext() { - V3Stats::addStat("Optimizations, DFG " + m_label + " BalanceTrees, concat trees balanced", - m_balancedConcats); -} - static std::string getPrefix(const std::string& label) { if (label.empty()) return ""; std::string str = VString::removeWhitespace(label); @@ -337,7 +332,7 @@ void V3DfgPasses::eliminateVars(DfgGraph& dfg, V3DfgEliminateVarsContext& ctx) { for (AstVar* const varp : replacedVariables) varp->unlinkFrBack()->deleteTree(); } -void V3DfgPasses::optimize(DfgGraph& dfg, V3DfgOptimizationContext& ctx, bool lastInvocation) { +void V3DfgPasses::optimize(DfgGraph& dfg, V3DfgOptimizationContext& ctx) { // There is absolutely nothing useful we can do with a graph of size 2 or less if (dfg.size() <= 2) return; @@ -365,10 +360,6 @@ void V3DfgPasses::optimize(DfgGraph& dfg, V3DfgOptimizationContext& ctx, bool la } // Accumulate patterns for reporting if (v3Global.opt.stats()) ctx.m_patternStats.accumulate(dfg); - // The peephole pass covnerts all trees to right leaning, so only do this on the last DFG run. - if (lastInvocation) { - apply(4, "balanceTrees", [&]() { balanceTrees(dfg, ctx.m_balanceTreesContext); }); - } apply(4, "regularize", [&]() { regularize(dfg, ctx.m_regularizeContext); }); if (dumpDfgLevel() >= 8) dfg.dumpDotAllVarConesPrefixed(ctx.prefix() + "optimized"); } diff --git a/src/V3DfgPasses.h b/src/V3DfgPasses.h index d893c84ce5..2b1e08aa64 100644 --- a/src/V3DfgPasses.h +++ b/src/V3DfgPasses.h @@ -68,17 +68,6 @@ class V3DfgEliminateVarsContext final { ~V3DfgEliminateVarsContext() VL_MT_DISABLED; }; -class V3DfgBalanceTreesContext final { - const std::string m_label; // Label to apply to stats - -public: - VDouble0 m_balancedConcats; // Number of temporaries introduced - - explicit V3DfgBalanceTreesContext(const std::string& label) - : m_label{label} {} - ~V3DfgBalanceTreesContext() VL_MT_DISABLED; -}; - class V3DfgOptimizationContext final { const std::string m_label; // Label to add to stats, etc. const std::string m_prefix; // Prefix to add to file dumps (derived from label) @@ -103,7 +92,6 @@ class V3DfgOptimizationContext final { V3DfgPeepholeContext m_peepholeContext{m_label}; V3DfgRegularizeContext m_regularizeContext{m_label}; V3DfgEliminateVarsContext m_eliminateVarsContext{m_label}; - V3DfgBalanceTreesContext m_balanceTreesContext{m_label}; V3DfgPatternStats m_patternStats; @@ -124,7 +112,7 @@ namespace V3DfgPasses { DfgGraph* astToDfg(AstModule&, V3DfgOptimizationContext&) VL_MT_DISABLED; // Optimize the given DfgGraph -void optimize(DfgGraph&, V3DfgOptimizationContext&, bool lastInvocation) VL_MT_DISABLED; +void optimize(DfgGraph&, V3DfgOptimizationContext&) VL_MT_DISABLED; // Convert DfgGraph back into Ast, and insert converted graph back into its parent module. // Returns the parent module. @@ -146,8 +134,6 @@ void regularize(DfgGraph&, V3DfgRegularizeContext&) VL_MT_DISABLED; void removeUnused(DfgGraph&) VL_MT_DISABLED; // Eliminate (remove or replace) redundant variables. Also removes resulting unused logic. void eliminateVars(DfgGraph&, V3DfgEliminateVarsContext&) VL_MT_DISABLED; -// Make computation trees balanced -void balanceTrees(DfgGraph&, V3DfgBalanceTreesContext&) VL_MT_DISABLED; } // namespace V3DfgPasses diff --git a/src/V3FuncOpt.cpp b/src/V3FuncOpt.cpp index 748ac91008..881aa17a03 100644 --- a/src/V3FuncOpt.cpp +++ b/src/V3FuncOpt.cpp @@ -21,6 +21,12 @@ // foo[_:_] = r; // foo[_:_] = l; // +// - Balance concatenation trees, e.g.: +// {a, {b, {c, d}} +// becomes: +// {{a, b}, {c, d}} +// Reality is more complex here, see the code. +// //************************************************************************* #include "V3PchAstMT.h" @@ -33,14 +39,147 @@ VL_DEFINE_DEBUG_FUNCTIONS; +class BalanceConcatTree final { + // STATELESS + + // We keep the expressions, together with their offsets within a concatenation tree + struct Term final { + AstNodeExpr* exprp = nullptr; + size_t offset = 0; + + Term() = default; + Term(AstNodeExpr* exprp, size_t offset) + : exprp{exprp} + , offset{offset} {} + }; + + // Recursive implementation of 'gatherTerms' below. + static void gatherTermsRecursive(AstNodeExpr* exprp, std::vector& terms) { + if (AstConcat* const catp = VN_CAST(exprp, Concat)) { + // Recursive case: gather sub terms, right to left + gatherTermsRecursive(catp->rhsp(), terms); + gatherTermsRecursive(catp->lhsp(), terms); + return; + } + + // Base case: different operation + terms.emplace_back(exprp); + } + + // Gather terms in the tree of given type, rooted at the given vertex. + // Results are right to left, that is, index 0 in the returned vector + // is the rightmost term, index size()-1 is the leftmost term. + static std::vector gatherTerms(AstConcat* rootp) { + std::vector terms; + gatherTermsRecursive(rootp->rhsp(), terms); + gatherTermsRecursive(rootp->lhsp(), terms); + return terms; + } + + // Construct a balanced concatenation from the given terms, + // between indices begin (inclusive), and end (exclusive). + // Note term[end].offset must be valid. term[end].vtxp is + // never referenced. + static AstNodeExpr* construct(const std::vector& terms, const size_t begin, + const size_t end) { + UASSERT(end < terms.size(), "Invalid end"); + UASSERT(begin < end, "Invalid range"); + // Base case: just return the term + if (end == begin + 1) return terms[begin].exprp; + + // Recursive case: + // Compute the mid-point, trying to create roughly equal width intermediates + const size_t width = terms[end].offset - terms[begin].offset; + const size_t midOffset = width / 2 + terms[begin].offset; + const auto beginIt = terms.begin() + begin; + const auto endIt = terms.begin() + end; + const auto midIt = std::lower_bound(beginIt + 1, endIt - 1, midOffset, // + [&](const Term& term, size_t value) { // + return term.offset < value; + }); + const size_t mid = begin + std::distance(beginIt, midIt); + UASSERT(begin < mid && mid < end, "Must make some progress"); + // Construct the subtrees + AstNodeExpr* const rhsp = construct(terms, begin, mid); + AstNodeExpr* const lhsp = construct(terms, mid, end); + // Construct new node + AstNodeExpr* newp = new AstConcat{lhsp->fileline(), lhsp, rhsp}; + newp->user1(true); // Must not attempt to balance again. + return newp; + } + + // Returns replacement node, or nullptr if no change + static AstConcat* balance(AstConcat* const rootp) { + UINFO(9, "balanceConcat " << rootp << "\n"); + // Gather all input vertices of the tree + const std::vector exprps = gatherTerms(rootp); + // Don't bother with trivial trees + if (exprps.size() <= 3) return nullptr; + // Don't do it if any of the terms are impure + for (AstNodeExpr* const exprp : exprps) { + if (!exprp->isPure()) return nullptr; + } + + // Construct the terms Vector that we are going to do processing on + std::vector terms(exprps.size() + 1); + // These are redundant (constructor does the same), but here they are for clarity + terms[0].offset = 0; + terms[exprps.size()].exprp = nullptr; + for (size_t i = 0; i < exprps.size(); ++i) { + terms[i].exprp = exprps[i]->unlinkFrBack(); + terms[i + 1].offset = terms[i].offset + exprps[i]->width(); + } + + // Round 1: try to create terms ending on VL_EDATASIZE boundaries. + // This ensures we pack bits within a VL_EDATASIZE first is possible, + // and then hopefully we can just assemble VL_EDATASIZE words afterward. + std::vector terms2; + { + terms2.reserve(terms.size()); + + size_t begin = 0; // Start of current range considered + size_t end = 0; // End of current range considered + size_t offset = 0; // Offset of current range considered + + // Create a term from the current range + const auto makeTerm = [&]() { + AstNodeExpr* const exprp = construct(terms, begin, end); + terms2.emplace_back(exprp, offset); + offset += exprp->width(); + begin = end; + }; + + // Create all terms ending on a boundary. + while (++end < terms.size() - 1) { + if (terms[end].offset % VL_EDATASIZE == 0) makeTerm(); + } + // Final term. Loop condition above ensures this always exists, + // and might or might not be on a boundary. + makeTerm(); + // Sentinel term + terms2.emplace_back(nullptr, offset); + // should have ended up with the same number of bits at least... + UASSERT(terms2.back().offset == terms.back().offset, "Inconsitent terms"); + } + + // Round 2: Combine the partial terms + return VN_AS(construct(terms2, 0, terms2.size() - 1), Concat); + } + +public: + static AstConcat* apply(AstConcat* rootp) { return balance(rootp); } +}; + class FuncOptVisitor final : public VNVisitor { // NODE STATE // AstNodeAssign::user() -> bool. Already checked, safe to split. Omit expensive check. + // AstConcat::user() -> bool. Already balanced. // STATE - across all visitors AstCFunc* const m_funcp; // The function being processes // STATE - Statistic tracking + VDouble0 m_balancedConcats; // Number of concatenations balanced VDouble0 m_concatSplits; // Number of splits in assignments with Concat on RHS // True for e.g.: foo = foo >> 1; or foo[foo[0]] = ...; @@ -143,12 +282,27 @@ class FuncOptVisitor final : public VNVisitor { void visit(AstNodeAssign* nodep) override { // TODO: Only thing remaining inside functions should be AstAssign (that is, an actual // assignment statemant), but we stil use AstAssignW, AstAssignDly, and all, fix. + iterateChildren(nodep); + if (v3Global.opt.fFuncSplitCat()) { if (splitConcat(nodep)) return; // Must return here, in case more code is added below } } - void visit(AstNodeExpr*) override {} // No need to descend further (Ignore AstExprStmt...) + void visit(AstConcat* nodep) override { + if (v3Global.opt.fFuncBalanceCat() && !nodep->user1() && !VN_IS(nodep->backp(), Concat)) { + if (AstConcat* const newp = BalanceConcatTree::apply(nodep)) { + UINFO(5, "balanceConcat optimizing " << nodep << "\n"); + ++m_balancedConcats; + nodep->replaceWith(newp); + VL_DO_DANGLING(pushDeletep(nodep), nodep); + newp->user1(true); // Must not attempt again. + // Return here. The new node will be iterated next. + return; + } + } + iterateChildren(nodep); + } void visit(AstNode* nodep) override { iterateChildren(nodep); } @@ -158,6 +312,7 @@ class FuncOptVisitor final : public VNVisitor { iterateChildren(funcp); } ~FuncOptVisitor() override { + V3Stats::addStatSum("Optimizations, FuncOpt concat trees balanced", m_balancedConcats); V3Stats::addStatSum("Optimizations, FuncOpt concat splits", m_concatSplits); } diff --git a/src/V3Options.cpp b/src/V3Options.cpp index 11a154a435..c55af2780a 100644 --- a/src/V3Options.cpp +++ b/src/V3Options.cpp @@ -1305,7 +1305,9 @@ void V3Options::parseOptsList(FileLine* fl, const string& optdir, int argc, DECL_OPTION("-fexpand", FOnOff, &m_fExpand); DECL_OPTION("-ffunc-opt", CbFOnOff, [this](bool flag) { // m_fFuncSplitCat = flag; + m_fFuncBalanceCat = flag; }); + DECL_OPTION("-ffunc-opt-balance-cat", FOnOff, &m_fFuncBalanceCat); DECL_OPTION("-ffunc-opt-split-cat", FOnOff, &m_fFuncSplitCat); DECL_OPTION("-fgate", FOnOff, &m_fGate); DECL_OPTION("-finline", FOnOff, &m_fInline); diff --git a/src/V3Options.h b/src/V3Options.h index 5eaa0aebd1..67f66cd18f 100644 --- a/src/V3Options.h +++ b/src/V3Options.h @@ -384,6 +384,7 @@ class V3Options final { bool m_fDeadAssigns; // main switch: -fno-dead-assigns: remove dead assigns bool m_fDeadCells; // main switch: -fno-dead-cells: remove dead cells bool m_fExpand; // main switch: -fno-expand: expansion of C macros + bool m_fFuncBalanceCat = true; // main switch: -fno-func-balance-cat: expansion of C macros bool m_fFuncSplitCat = true; // main switch: -fno-func-split-cat: expansion of C macros bool m_fGate; // main switch: -fno-gate: gate wire elimination bool m_fInline; // main switch: -fno-inline: module inlining @@ -675,8 +676,9 @@ class V3Options final { bool fDeadAssigns() const { return m_fDeadAssigns; } bool fDeadCells() const { return m_fDeadCells; } bool fExpand() const { return m_fExpand; } + bool fFuncBalanceCat() const { return m_fFuncBalanceCat; } bool fFuncSplitCat() const { return m_fFuncSplitCat; } - bool fFunc() const { return fFuncSplitCat(); } + bool fFunc() const { return fFuncSplitCat() || fFuncBalanceCat(); } bool fGate() const { return m_fGate; } bool fInline() const { return m_fInline; } bool fLife() const { return m_fLife; } diff --git a/src/Verilator.cpp b/src/Verilator.cpp index 1c4d58cfad..1cfce8c302 100644 --- a/src/Verilator.cpp +++ b/src/Verilator.cpp @@ -287,7 +287,7 @@ static void process() { if (v3Global.opt.fDfgPreInline()) { // Pre inline DFG optimization - V3DfgOptimizer::optimize(v3Global.rootp(), "pre inline", /* lastInvocation: */ false); + V3DfgOptimizer::optimize(v3Global.rootp(), "pre inline"); } if (!(v3Global.opt.serializeOnly() && !v3Global.opt.flatten())) { @@ -304,7 +304,7 @@ static void process() { if (v3Global.opt.fDfgPostInline()) { // Post inline DFG optimization - V3DfgOptimizer::optimize(v3Global.rootp(), "post inline", /* lastInvocation: */ true); + V3DfgOptimizer::optimize(v3Global.rootp(), "post inline"); } // --PRE-FLAT OPTIMIZATIONS------------------ diff --git a/test_regress/t/t_dfg_balance_cats.py b/test_regress/t/t_balance_cats.py similarity index 55% rename from test_regress/t/t_dfg_balance_cats.py rename to test_regress/t/t_balance_cats.py index 93de94adf1..b3cdbade48 100755 --- a/test_regress/t/t_dfg_balance_cats.py +++ b/test_regress/t/t_balance_cats.py @@ -13,12 +13,7 @@ test.compile(verilator_flags2=["--stats"]) -test.file_grep(test.stats, - r' Optimizations, DFG pre inline BalanceTrees, concat trees balanced\s+(\d+)', 0) -test.file_grep(test.stats, - r' Optimizations, DFG post inline BalanceTrees, concat trees balanced\s+(\d+)', 1) -test.file_grep(test.stats, r'Optimizations, DFG pre inline Dfg2Ast, result equations\s+(\d+)', 1) -test.file_grep(test.stats, r'Optimizations, DFG post inline Dfg2Ast, result equations\s+(\d+)', 1) +test.file_grep(test.stats, r'Optimizations, FuncOpt concat trees balanced\s+(\d+)', 1) test.file_grep(test.stats, r'Optimizations, FuncOpt concat splits\s+(\d+)', 62) test.passes() diff --git a/test_regress/t/t_dfg_balance_cats.v b/test_regress/t/t_balance_cats.v similarity index 100% rename from test_regress/t/t_dfg_balance_cats.v rename to test_regress/t/t_balance_cats.v diff --git a/test_regress/t/t_dfg_balance_cats_nofunc.py b/test_regress/t/t_balance_cats_nofunc.py similarity index 53% rename from test_regress/t/t_dfg_balance_cats_nofunc.py rename to test_regress/t/t_balance_cats_nofunc.py index d57622f3a9..6ce07d2c66 100755 --- a/test_regress/t/t_dfg_balance_cats_nofunc.py +++ b/test_regress/t/t_balance_cats_nofunc.py @@ -11,16 +11,11 @@ test.scenarios('vlt') -test.top_filename = "t/t_dfg_balance_cats.v" +test.top_filename = "t/t_balance_cats.v" test.compile(verilator_flags2=["--stats", "-fno-func-opt"]) -test.file_grep(test.stats, - r' Optimizations, DFG pre inline BalanceTrees, concat trees balanced\s+(\d+)', 0) -test.file_grep(test.stats, - r' Optimizations, DFG post inline BalanceTrees, concat trees balanced\s+(\d+)', 1) -test.file_grep(test.stats, r'Optimizations, DFG pre inline Dfg2Ast, result equations\s+(\d+)', 1) -test.file_grep(test.stats, r'Optimizations, DFG post inline Dfg2Ast, result equations\s+(\d+)', 1) +test.file_grep_not(test.stats, r'Optimizations, FuncOpt concat trees balances') test.file_grep_not(test.stats, r'Optimizations, FuncOpt concat splits') test.passes() diff --git a/test_regress/t/t_opt_const_dfg.py b/test_regress/t/t_opt_const_dfg.py index e46719f231..eed838d280 100755 --- a/test_regress/t/t_opt_const_dfg.py +++ b/test_regress/t/t_opt_const_dfg.py @@ -17,6 +17,6 @@ test.execute() if test.vlt: - test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 39) + test.file_grep(test.stats, r'Optimizations, Const bit op reduction\s+(\d+)', 40) test.passes()