forked from verilator/verilator
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Split up assignments to wides with Concat on the RHS
Add a new pass to split up (recursively): foo = {l, r}; into the following, with the right indices, iff the concatenation straddles a wide word boundary. foo[_:_] = r; foo[_:_] = l; This eliminates more wide temporaries. Another 23% speedup on VeeR EH2 high_perf. Also brings the predicted stack size from 8M to 40k.
- Loading branch information
Showing
10 changed files
with
261 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,182 @@ | ||
// -*- mode: C++; c-file-style: "cc-mode" -*- | ||
//************************************************************************* | ||
// DESCRIPTION: Verilator: Generic optimizations on a per function basis | ||
// | ||
// Code available from: https://verilator.org | ||
// | ||
//************************************************************************* | ||
// | ||
// Copyright 2003-2024 by Wilson Snyder. This program is free software; you | ||
// can redistribute it and/or modify it under the terms of either the GNU | ||
// Lesser General Public License Version 3 or the Perl Artistic License | ||
// Version 2.0. | ||
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 | ||
// | ||
//************************************************************************* | ||
// | ||
// - Split assignments to wide locations with Concat on the RHS | ||
// at word boundaries: | ||
// foo = {l, r}; | ||
// becomes (recursively): | ||
// foo[_:_] = r; | ||
// foo[_:_] = l; | ||
// | ||
//************************************************************************* | ||
|
||
#include "V3PchAstMT.h" | ||
|
||
#include "V3FuncOpt.h" | ||
|
||
#include "V3Global.h" | ||
#include "V3Stats.h" | ||
#include "V3ThreadPool.h" | ||
|
||
VL_DEFINE_DEBUG_FUNCTIONS; | ||
|
||
class FuncOptVisitor final : public VNVisitor { | ||
// NODE STATE | ||
// AstNodeAssign::user() -> bool. Already checked, safe to split. Omit expensive check. | ||
|
||
// STATE - Statistic tracking | ||
VDouble0 m_concatSplits; // Number of splits in assignments with Concat on RHS | ||
|
||
// True for e.g.: foo = foo >> 1; or foo[foo[0]] = ...; | ||
static bool readsLhs(AstNodeAssign* nodep) { | ||
// It is expected that the number of vars written on the LHS is very small (should be 1). | ||
std::unordered_set<const AstVar*> lhsWrVarps; | ||
std::unordered_set<const AstVar*> lhsRdVarps; | ||
nodep->lhsp()->foreach([&](const AstVarRef* refp) { | ||
if (refp->access().isWriteOrRW()) lhsWrVarps.emplace(refp->varp()); | ||
if (refp->access().isReadOrRW()) lhsRdVarps.emplace(refp->varp()); | ||
}); | ||
|
||
// Common case of 1 variable on the LHS - special handling for speed | ||
if (lhsWrVarps.size() == 1) { | ||
const AstVar* const lhsWrVarp = *lhsWrVarps.begin(); | ||
// Check Rhs doesn't read the written var | ||
const bool rhsReadsWritten = nodep->rhsp()->exists([=](const AstVarRef* refp) { // | ||
return refp->varp() == lhsWrVarp; | ||
}); | ||
if (rhsReadsWritten) return true; | ||
// Check Lhs doesn't read the written var | ||
return lhsRdVarps.count(lhsWrVarp); | ||
} | ||
|
||
// Generic case of multiple vars written on LHS | ||
// TODO: this might be impossible due to earlier transforms, not sure | ||
// Check Rhs doesn't read the written vars | ||
const bool rhsReadsWritten = nodep->rhsp()->exists([&](const AstVarRef* refp) { // | ||
return lhsWrVarps.count(refp->varp()); | ||
}); | ||
if (rhsReadsWritten) return true; | ||
// Check Lhs doesn't read the written vars | ||
for (const AstVar* const lhsWrVarp : lhsWrVarps) { | ||
if (lhsRdVarps.count(lhsWrVarp)) return true; | ||
} | ||
return false; | ||
} | ||
|
||
// METHODS | ||
// Split wide assignments with a wide concatenation on the RHS. | ||
// Returns true if 'nodep' was deleted | ||
bool splitConcat(AstNodeAssign* nodep) { | ||
UINFO(9, "splitConcat " << nodep << "\n"); | ||
// Only care about concatenations on the right | ||
AstConcat* const rhsp = VN_CAST(nodep->rhsp(), Concat); | ||
if (!rhsp) return false; | ||
// Will need the LHS | ||
AstNodeExpr* lhsp = nodep->lhsp(); | ||
UASSERT_OBJ(lhsp->width() == rhsp->width(), nodep, "Inconsistent assignment"); | ||
// Only consider pure assignments. Nodes inserted below are safe. | ||
if (!nodep->user1() && (!lhsp->isPure() || !rhsp->isPure())) return false; | ||
// Check for a Sel on the LHS if present, and skip over it | ||
uint32_t lsb = 0; | ||
if (AstSel* const selp = VN_CAST(lhsp, Sel)) { | ||
if (AstConst* const lsbp = VN_CAST(selp->lsbp(), Const)) { | ||
lhsp = selp->fromp(); | ||
lsb = lsbp->toUInt(); | ||
} else { | ||
// Don't optimize if it's a variable select | ||
return false; | ||
} | ||
} | ||
// No need to split assignments targeting storage smaller than a machine register | ||
if (lhsp->width() <= VL_QUADSIZE) return false; | ||
|
||
// If it's a concat straddling a word boundary, try to split it. | ||
// The next visit on the new nodes will split it recursively. | ||
// Otherwise, keep the original assignment. | ||
const int lsbWord = lsb / VL_EDATASIZE; | ||
const int msbWord = (lsb + rhsp->width() - 1) / VL_EDATASIZE; | ||
if (lsbWord == msbWord) return false; | ||
|
||
// If the RHS reads the LHS, we can't actually do this. Nodes inserted below are safe. | ||
if (!nodep->user1() && readsLhs(nodep)) return false; | ||
|
||
// Ok, actually split it now | ||
UINFO(5, "splitConcat optimizing " << nodep << "\n"); | ||
++m_concatSplits; | ||
// The 2 parts and their offsets | ||
AstNodeExpr* const rrp = rhsp->rhsp()->unlinkFrBack(); | ||
AstNodeExpr* const rlp = rhsp->lhsp()->unlinkFrBack(); | ||
const int rLsb = lsb; | ||
const int lLsb = lsb + rrp->width(); | ||
// Insert the 2 assignment right after the original. They will be visited next. | ||
AstAssign* const arp = new AstAssign{ | ||
nodep->fileline(), | ||
new AstSel{lhsp->fileline(), lhsp->cloneTreePure(false), rLsb, rrp->width()}, rrp}; | ||
AstAssign* const alp = new AstAssign{ | ||
nodep->fileline(), | ||
new AstSel{lhsp->fileline(), lhsp->unlinkFrBack(), lLsb, rlp->width()}, rlp}; | ||
nodep->addNextHere(arp); | ||
arp->addNextHere(alp); | ||
// Safe to split these. | ||
arp->user1(true); | ||
alp->user1(true); | ||
// Nuke what is left | ||
VL_DO_DANGLING(pushDeletep(nodep->unlinkFrBack()), nodep); | ||
return true; | ||
} | ||
|
||
// VISIT | ||
void visit(AstNodeAssign* nodep) override { | ||
// TODO: Only thing remaining inside functions should be AstAssign (that is, an actual | ||
// assignment statemant), but we stil use AstAssignW, AstAssignDly, and all, fix. | ||
if (v3Global.opt.fFuncSplitCat()) { | ||
if (splitConcat(nodep)) return; // Must return here, in case more code is added below | ||
} | ||
} | ||
|
||
void visit(AstNodeExpr*) override {} // No need to descend further (Ignore AstExprStmt...) | ||
|
||
void visit(AstNode* nodep) override { iterateChildren(nodep); } | ||
|
||
// CONSTRUCTORS | ||
explicit FuncOptVisitor(AstCFunc* funcp) { iterateChildren(funcp); } | ||
~FuncOptVisitor() override { | ||
V3Stats::addStatSum("Optimizations, FuncOpt concat splits", m_concatSplits); | ||
} | ||
|
||
public: | ||
static void apply(AstCFunc* funcp) { FuncOptVisitor{funcp}; } | ||
}; | ||
|
||
//###################################################################### | ||
|
||
void V3FuncOpt::funcOptAll(AstNetlist* nodep) { | ||
UINFO(2, __FUNCTION__ << ": " << endl); | ||
{ | ||
const VNUser1InUse user1InUse; | ||
V3ThreadScope threadScope; | ||
for (AstNodeModule *modp = nodep->modulesp(), *nextModp; modp; modp = nextModp) { | ||
nextModp = VN_AS(modp->nextp(), NodeModule); | ||
for (AstNode *nodep = modp->stmtsp(), *nextNodep; nodep; nodep = nextNodep) { | ||
nextNodep = nodep->nextp(); | ||
if (AstCFunc* const cfuncp = VN_CAST(nodep, CFunc)) { | ||
threadScope.enqueue([cfuncp]() { FuncOptVisitor::apply(cfuncp); }); | ||
} | ||
} | ||
} | ||
} | ||
V3Global::dumpCheckGlobalTree("funcopt", 0, dumpTreeEitherLevel() >= 3); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
// -*- mode: C++; c-file-style: "cc-mode" -*- | ||
//************************************************************************* | ||
// DESCRIPTION: Verilator: Generic optimizations on a per function basis | ||
// | ||
// Code available from: https://verilator.org | ||
// | ||
//************************************************************************* | ||
// | ||
// Copyright 2003-2024 by Wilson Snyder. This program is free software; you | ||
// can redistribute it and/or modify it under the terms of either the GNU | ||
// Lesser General Public License Version 3 or the Perl Artistic License | ||
// Version 2.0. | ||
// SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 | ||
// | ||
//************************************************************************* | ||
|
||
#ifndef VERILATOR_V3FUNCOPT_H_ | ||
#define VERILATOR_V3FUNCOPT_H_ | ||
|
||
#include "config_build.h" | ||
#include "verilatedos.h" | ||
|
||
class AstNetlist; | ||
|
||
//============================================================================ | ||
|
||
class V3FuncOpt final { | ||
public: | ||
static void funcOptAll(AstNetlist* nodep); | ||
}; | ||
|
||
#endif // Guard |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
#!/usr/bin/env python3 | ||
# DESCRIPTION: Verilator: Verilog Test driver/expect definition | ||
# | ||
# Copyright 2024 by Wilson Snyder. This program is free software; you | ||
# can redistribute it and/or modify it under the terms of either the GNU | ||
# Lesser General Public License Version 3 or the Perl Artistic License | ||
# Version 2.0. | ||
# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 | ||
|
||
import vltest_bootstrap | ||
|
||
test.scenarios('vlt') | ||
|
||
test.top_filename = "t/t_dfg_balance_cats.v" | ||
|
||
test.compile(verilator_flags2=["--stats", "-fno-func-opt"]) | ||
|
||
test.file_grep(test.stats, | ||
r' Optimizations, DFG pre inline BalanceTrees, concat trees balanced\s+(\d+)', 0) | ||
test.file_grep(test.stats, | ||
r' Optimizations, DFG post inline BalanceTrees, concat trees balanced\s+(\d+)', 1) | ||
test.file_grep(test.stats, r'Optimizations, DFG pre inline Dfg2Ast, result equations\s+(\d+)', 1) | ||
test.file_grep(test.stats, r'Optimizations, DFG post inline Dfg2Ast, result equations\s+(\d+)', 1) | ||
test.file_grep_not(test.stats, r'Optimizations, FuncOpt concat splits') | ||
|
||
test.passes() |