From 579e4fe2c84d55240565c1ae585efb8b0bd0a09d Mon Sep 17 00:00:00 2001 From: Geza Lore Date: Sun, 28 Apr 2024 17:55:05 +0100 Subject: [PATCH] Support NBAs to arrays inside loops For NBAs that might execute a dynamic number of times in a single evaluation (specifically: those that assign to array elements inside loops), we introduce a new run-time VlNBACommitQueue data-structure (currently a vector), which stores all pending updates and the necessary info to reconstruct the LHS reference of the AstAssignDly at run-time. All variables needing a commit queue has their corresponding unique commit queue. All NBAs to a variable that requires a commit queue go through the commit queue. This is necessary to preserve update order in sequential code, e.g.: a[7] <= 10 for (int i = 1 ; i < 10; ++i) a[i] <= i; a[2] <= 10 needs to end with array elements 1..9 being 1, 10, 3, 4, 5, 6, 7, 8, 9. This enables supporting common forms of NBAs to arrays on the left hand side of <= in non-suspendable/non-fork code. (Suspendable/fork implementation is unclear to me so I left it unchanged, see #5084). Any NBA that does not need a commit queue (i.e.: those that were supported before), use the same scheme as before, and this patch should have no effect on the generated code for those NBAs. --- include/verilated_types.h | 189 ++++++++++++- src/V3AstNodeDType.h | 24 ++ src/V3AstNodes.cpp | 7 + src/V3Delayed.cpp | 271 ++++++++++++++++--- src/V3EmitCImp.cpp | 1 + src/V3Localize.cpp | 2 + test_regress/t/t_nba_commit_queue.pl | 27 ++ test_regress/t/t_nba_commit_queue.v | 296 +++++++++++++++++++++ test_regress/t/t_order_blkloopinit_bad.out | 19 +- test_regress/t/t_order_blkloopinit_bad.pl | 1 + test_regress/t/t_order_blkloopinit_bad.v | 33 ++- 11 files changed, 823 insertions(+), 47 deletions(-) create mode 100755 test_regress/t/t_nba_commit_queue.pl create mode 100644 test_regress/t/t_nba_commit_queue.v diff --git a/include/verilated_types.h b/include/verilated_types.h index 21371bd47ff..77ec44b1d1b 100644 --- a/include/verilated_types.h +++ b/include/verilated_types.h @@ -415,8 +415,20 @@ class VlWriteMem final { static int _vl_cmp_w(int words, WDataInP const lwp, WDataInP const rwp) VL_PURE; +template +struct VlWide; + +// Type trait to check if a type is VlWide +template +struct IsVlWide : public std::false_type {}; + +template +struct IsVlWide> : public std::true_type {}; + template struct VlWide final { + static constexpr size_t Words = T_Words; + // MEMBERS // This should be the only data member, otherwise generated static initializers need updating EData m_storage[T_Words]; // Contents of the packed array @@ -1274,14 +1286,41 @@ void VL_WRITEMEM_N(bool hex, int bits, const std::string& filename, /// This class may get exposed to a Verilated Model's top I/O, if the top /// IO has an unpacked array. -template +template +struct VlUnpacked; + +// Type trait to check if a type is VlUnpacked +template +struct IsVlUnpacked : public std::false_type {}; + +template +struct IsVlUnpacked> : public std::true_type {}; + +template struct VlUnpacked final { private: // TYPES using T_Key = IData; // Index type, for uniformity with other containers using Unpacked = T_Value[T_Depth]; + struct RankBaseCase final { + static constexpr size_t Rank = 0; + }; + + template + struct BaseElementBaseCase final { + using BaseElement = T; + }; + public: + // Rank of this array. E.g.: VlUnpacked, _> have rank '2' + static constexpr size_t Rank + = std::conditional::value, T_Value, RankBaseCase>::type::Rank + 1; + + // Final non-array element type. + using BaseElement = typename std::conditional::value, T_Value, + BaseElementBaseCase>::type::BaseElement; + // MEMBERS // This should be the only data member, otherwise generated static initializers need updating Unpacked m_storage; // Contents of the unpacked array @@ -1511,6 +1550,154 @@ std::string VL_TO_STRING(const VlUnpacked& obj) { return obj.to_string(); } +//=================================================================== +// Helper to apply the given indices to a target expression + +template +struct VlApplyIndices final { + VL_ATTR_ALWINLINE + static auto& apply(T_Target& target, const size_t* indicesp) { + return VlApplyIndices::apply( + target[indicesp[Curr]], indicesp); + } +}; + +template +struct VlApplyIndices final { + VL_ATTR_ALWINLINE + static T_Target& apply(T_Target& target, const size_t*) { return target; } +}; + +//=================================================================== +// Commit queue for NBAs - currently only for unpacked arrays + +template +class VlNBACommitQueue; + +// Whole element updates only +template +class VlNBACommitQueue final { + static_assert(IsVlUnpacked::value, "'VlNBACommitQueue' only supports 'VlUnpacked'"); + + static constexpr size_t Rank = T_Target::Rank; + using Element = typename T_Target::BaseElement; + + struct Entry final { + Element value; + size_t indices[Rank]; + }; + + std::vector m_pending; // Pending updates, in program order + +public: + VlNBACommitQueue() = default; + ~VlNBACommitQueue() = default; + + template + void enqueue(const Element& value, Args... indices) { + m_pending.emplace_back(Entry{value, {indices...}}); + } + + // Note: T_Commit might be different from T_Target. Specifically, when the signal is a + // top-level IO port, T_Commit will be a native C array, while T_Target, willbe a VlUnpacked + template + void commit(T_Commit& target) { + if (m_pending.empty()) return; + for (const Entry& entry : m_pending) { + VlApplyIndices<0, Rank, T_Commit>::apply(target, entry.indices) = entry.value; + } + m_pending.clear(); + } +}; + +// With partial element updates +template +class VlNBACommitQueue final { + static_assert(IsVlUnpacked::value, "'VlNBACommitQueue' only supports 'VlUnpacked'"); + + static constexpr size_t Rank = T_Target::Rank; + using Element = typename T_Target::BaseElement; + + // Queue entry when partial elements might need updating + struct Entry final { + Element value; + Element mask; + size_t indices[Rank]; + }; + + std::vector m_pending; // Pending updates, in program order + + // Binary & | ~ for elements to use for masking in partial updates. Sorry for the templates. + template + VL_ATTR_ALWINLINE static typename std::enable_if::value, T_Elem>::type + bAnd(const T_Elem& a, const T_Elem& b) { + return a & b; + } + + template + VL_ATTR_ALWINLINE static typename std::enable_if::value, T_Elem>::type + bAnd(const T_Elem& a, const T_Elem& b) { + T_Elem result; + for (size_t i = 0; i < T_Elem::Words; ++i) { + result.m_storage[i] = a.m_storage[i] & b.m_storage[i]; + } + return result; + } + + template + VL_ATTR_ALWINLINE static typename std::enable_if::value, T_Elem>::type + bOr(const T_Elem& a, const T_Elem& b) { + return a | b; + } + + template + VL_ATTR_ALWINLINE static typename std::enable_if::value, T_Elem>::type + bOr(const T_Elem& a, const T_Elem& b) { + T_Elem result; + for (size_t i = 0; i < T_Elem::Words; ++i) { + result.m_storage[i] = a.m_storage[i] | b.m_storage[i]; + } + return result; + } + + template + VL_ATTR_ALWINLINE static typename std::enable_if::value, T_Elem>::type + bNot(const T_Elem& a) { + return ~a; + } + + template + VL_ATTR_ALWINLINE static typename std::enable_if::value, T_Elem>::type + bNot(const T_Elem& a) { + T_Elem result; + for (size_t i = 0; i < T_Elem::Words; ++i) result.m_storage[i] = ~a.m_storage[i]; + return result; + } + +public: + VlNBACommitQueue() = default; + ~VlNBACommitQueue() = default; + + template + void enqueue(const Element& value, const Element& mask, Args... indices) { + m_pending.emplace_back(Entry{value, mask, {indices...}}); + } + + // Note: T_Commit might be different from T_Target. Specifically, when the signal is a + // top-level IO port, T_Commit will be a native C array, while T_Target, willbe a VlUnpacked + template + void commit(T_Commit& target) { + if (m_pending.empty()) return; + for (const Entry& entry : m_pending) { // + auto& ref = VlApplyIndices<0, Rank, T_Commit>::apply(target, entry.indices); + // Maybe inefficient, but it works for now ... + const auto oldValue = ref; + ref = bOr(bAnd(entry.value, entry.mask), bAnd(oldValue, bNot(entry.mask))); + } + m_pending.clear(); + } +}; + //=================================================================== // Object that VlDeleter is capable of deleting diff --git a/src/V3AstNodeDType.h b/src/V3AstNodeDType.h index 4e781995e07..521c14c3d1d 100644 --- a/src/V3AstNodeDType.h +++ b/src/V3AstNodeDType.h @@ -959,6 +959,30 @@ class AstMemberDType final : public AstNodeDType { return false; } }; +class AstNBACommitQueueDType final : public AstNodeDType { + // @astgen ptr := m_targetDTypep : AstNodeDType // Type of the corresponding variable + const bool m_partial; // Partial element update required + +public: + AstNBACommitQueueDType(FileLine* fl, AstNodeDType* targetDTypep, bool partial) + : ASTGEN_SUPER_NBACommitQueueDType(fl) + , m_partial{partial} + , m_targetDTypep{targetDTypep} { + dtypep(this); + } + ASTGEN_MEMBERS_AstNBACommitQueueDType; + + AstNodeDType* targetDTypep() const { return m_targetDTypep; } + bool partial() const { return m_partial; } + bool similarDType(const AstNodeDType* samep) const override { return this == samep; } + AstBasicDType* basicp() const override { return nullptr; } + AstNodeDType* skipRefp() const override { return (AstNodeDType*)this; } + AstNodeDType* skipRefToConstp() const override { return (AstNodeDType*)this; } + AstNodeDType* skipRefToEnump() const override { return (AstNodeDType*)this; } + int widthAlignBytes() const override { return 1; } + int widthTotalBytes() const override { return 24; } + bool isCompound() const override { return true; } +}; class AstParamTypeDType final : public AstNodeDType { // Parents: MODULE // A parameter type statement; much like a var or typedef diff --git a/src/V3AstNodes.cpp b/src/V3AstNodes.cpp index 66369177752..31b7366d17f 100644 --- a/src/V3AstNodes.cpp +++ b/src/V3AstNodes.cpp @@ -788,6 +788,12 @@ AstNodeDType::CTypeRecursed AstNodeDType::cTypeRecurse(bool compound, bool packe info.m_type = "VlUnpacked<" + sub.m_type; info.m_type += ", " + cvtToStr(adtypep->declRange().elements()); info.m_type += ">"; + } else if (const auto* const adtypep = VN_CAST(dtypep, NBACommitQueueDType)) { + UASSERT_OBJ(!packed, this, "Unsupported type for packed struct or union"); + compound = true; + const CTypeRecursed sub = adtypep->targetDTypep()->cTypeRecurse(compound, false); + const char* const partialp = adtypep->partial() ? "true" : "false"; + info.m_type = "VlNBACommitQueue<" + sub.m_type + ", " + partialp + ">"; } else if (packed && (VN_IS(dtypep, PackArrayDType))) { const AstPackArrayDType* const adtypep = VN_CAST(dtypep, PackArrayDType); const CTypeRecursed sub = adtypep->subDTypep()->cTypeRecurse(false, true); @@ -2683,6 +2689,7 @@ void AstCMethodHard::setPurity() { {"commit", false}, {"delay", false}, {"done", false}, + {"enqueue", false}, {"erase", false}, {"evaluate", false}, {"evaluation", false}, diff --git a/src/V3Delayed.cpp b/src/V3Delayed.cpp index bdbb043871d..731c9b03360 100644 --- a/src/V3Delayed.cpp +++ b/src/V3Delayed.cpp @@ -28,7 +28,7 @@ // - Add new "Post-scheduled" logic: // a = __Vdly__a; // -// An array LHS: +// An array LHS, that is not inside a loop: // a[idxa][idxb] <= RHS // is converted: // - Add new "Pre_scheduled" logic: @@ -44,6 +44,23 @@ // Any AstAssignDly in a suspendable process or fork also uses the // '__VdlySet' flag based scheme, like arrays, with some modifications. // +// An array LHS, which is updated inside a loop (that is, we don't know +// statically how many updates there might be): +// a[idxa][idxb] <= RHS +// is converted to: +// - In the original logic, replace the AstAssignDelay with: +// __VdlyDim0__a = idxa; +// __VdlyDim1__a = idxb; +// __VdlyVal__a = RHS; +// __VdlyCommitQueue.enqueue(RHS, idxa, idxb); +// - Add new "Post-scheduled" logic: +// __VdlyCommitQueue.commit(array-on-LHS); +// +// Note that it is necessary to use the same commit queue for all NBAs +// targeting the same array, if any NBAs require a commit queue. Hence +// we can only decide whether to use a commit queue or not after having +// examined all NBAs. +// //************************************************************************* #include "V3PchAstNoMT.h" // VL_MT_DISABLED_CODE_UNIT @@ -90,6 +107,7 @@ class DelayedVisitor final : public VNVisitor { // AstAlwaysPost::user1() -> AstIf*. Last IF (__VdlySet__) created under this AlwaysPost // // Cleared each scope/active: + // AstVarScope::user2() -> AstVarScope*. Commit queue for this variable // AstVarRef::user2() -> bool. Set true if already processed // AstAssignDly::user2() -> AstVarScope*. __VdlySet__ created for this assign // AstAlwaysPost::user2() -> AstVarScope*. __VdlySet__ last referenced in IF @@ -108,6 +126,12 @@ class DelayedVisitor final : public VNVisitor { AstAlwaysPost* postp = nullptr; // First reference encountered to the VarScope const AstNodeVarRef* firstRefp = nullptr; + // If the implementation cannot be deferred, this is the location why + FileLine* nonDeferredFlp = nullptr; + // Variable requires a commit queue due to dynamic NBA + bool needsCommitQueue = false; + // Array element is partially updated by at least some NBA + bool hasPartialUpdate = false; }; AstUser1Allocator m_vscpAux; @@ -116,7 +140,6 @@ class DelayedVisitor final : public VNVisitor { std::set m_timingDomains; // Timing resume domains // Table of new var names created under module std::map, AstVar*> m_modVarMap; - VDouble0 m_statSharedSet; // Statistic tracking // STATE - for current visit position (use VL_RESTORER) AstActive* m_activep = nullptr; // Current activate @@ -132,6 +155,11 @@ class DelayedVisitor final : public VNVisitor { std::deque m_deferred; // Deferred AstAssignDly instances to lower at the end AstVarScope* m_setVscp = nullptr; // The last used set flag, for reuse + // STATE - Statistic tracking + VDouble0 m_statSharedSet; // "VdlySet" variables eliminated by reuse + VDouble0 m_commitQueuesWhole; // Variables needing a commit queue without partial updates + VDouble0 m_commitQueuesPartial; // Variables needing a commit queue with partial updates + // METHODS const AstNode* containingAssignment(const AstNode* nodep) { @@ -302,6 +330,9 @@ class DelayedVisitor final : public VNVisitor { // The referenced variable AstVarScope* const vscp = lhsComponents.refp->varScopep(); + // Location of the AstAssignDly + FileLine* const flp = nodep->fileline(); + // Name suffix for signals constructed below const std::string baseName = "__" + vscp->varp()->shortName() + "__v" + std::to_string(m_scopeVecMap[vscp]++); @@ -317,7 +348,6 @@ class DelayedVisitor final : public VNVisitor { if (VN_IS(exprp, Const)) return exprp; const std::string realName = "__" + name + baseName; AstVarScope* const tmpVscp = createNewVarScope(vscp, realName, exprp->dtypep()); - FileLine* const flp = exprp->fileline(); insert(new AstAssign{flp, new AstVarRef{flp, tmpVscp, VAccess::WRITE}, exprp}); return new AstVarRef{flp, tmpVscp, VAccess::READ}; }; @@ -337,9 +367,17 @@ class DelayedVisitor final : public VNVisitor { } if (m_inSuspendableOrFork) { - // Currently we convert all NBAs in suspendable blocks immediately - // TODO: error check that deferrence was not required - FileLine* const flp = nodep->fileline(); + if (m_inLoop && !lhsComponents.arrIdxps.empty()) { + nodep->v3warn(BLKLOOPINIT, + "Unsupported: Non-blocking assignment to array inside loop " + "in suspendable process or fork"); + return true; + } + + // Currently we convert all NBAs in suspendable blocks immediately. + if (!m_vscpAux(vscp).nonDeferredFlp) { + m_vscpAux(vscp).nonDeferredFlp = nodep->fileline(); + } // Get/Create 'Post' ordered block to commit the delayed value AstAlwaysPost* postp = m_vscpAux(vscp).suspPostp; @@ -371,6 +409,20 @@ class DelayedVisitor final : public VNVisitor { // The original AstAssignDly ('nodep') can be deleted return true; } else { + if (m_inLoop) { + UASSERT_OBJ(!lhsComponents.arrIdxps.empty(), nodep, "Should be an array"); + AstBasicDType* const basicp = vscp->dtypep()->basicp(); + if (!basicp + || !(basicp->isIntegralOrPacked() || basicp->isDouble() + || basicp->isString())) { + nodep->v3warn(BLKLOOPINIT, + "Unsupported: Non-blocking assignment to array with " + "compound element type inside loop"); + return true; + } + m_vscpAux(vscp).needsCommitQueue = true; + if (lhsComponents.selLsbp) m_vscpAux(vscp).hasPartialUpdate = true; + } // Record this AstAssignDly for deferred processing m_deferred.emplace_back(); Deferred& record = m_deferred.back(); @@ -421,39 +473,178 @@ class DelayedVisitor final : public VNVisitor { // Ensure it contains the current sensitivities checkActiveSense(lhsComponents.refp, activep, oActivep); - // Create the flag denoting an update is pending - if (record.consecutive) { - // Simplistic optimization. If the previous assignment was immediately before this - // assignment, we can reuse the existing flag. This is good for code like: - // arrayA[0] <= something; - // arrayB[1] <= something; - ++m_statSharedSet; - } else { - // Create new flag - m_setVscp = createNewVarScope(vscp, "__VdlySet" + record.suffix, 1); - // Set it here - insert(new AstAssign{flp, new AstVarRef{flp, m_setVscp, VAccess::WRITE}, - new AstConst{flp, AstConst::BitTrue{}}}); - // Add the 'Pre' ordered reset for the flag - activep->addStmtsp(new AstAssignPre{flp, new AstVarRef{flp, m_setVscp, VAccess::WRITE}, - new AstConst{flp, 0}}); - }; + if (m_vscpAux(vscp).needsCommitQueue) { + if (FileLine* const badFlp = m_vscpAux(vscp).nonDeferredFlp) { + insertionPointp->v3warn( + BLKLOOPINIT, + "Unsupported: Non-blocking assignment to array in both " + "loop and suspendable process/fork\n" + << badFlp->warnOther() + << "... Location of non-blocking assignment in suspendable process/fork\n" + << badFlp->warnContextSecondary() << insertionPointp->warnOther() + << "... Location of non-blocking assignment inside loop\n"); + return; + } - // Create/Get 'if (__VdlySet) { ... commit .. }' - AstIf* ifp = nullptr; - if (postp->user2p() == m_setVscp) { - // Optimize as above. If sharing VdlySet *ON SAME VARIABLE*, we can share the 'if' - ifp = VN_AS(postp->user1p(), If); + // Need special handling for variables that require partial updates + const bool partial = m_vscpAux(vscp).hasPartialUpdate; + + // If partial updates are required, construct the mask and the widened value + AstNodeExpr* valuep = record.rhsp; + AstNodeExpr* maskp = nullptr; + if (partial) { + // Type of array element + AstNodeDType* const eDType = [&]() -> AstNodeDType* { + AstNodeDType* dtypep = vscp->dtypep(); + while (AstUnpackArrayDType* const ap = VN_CAST(dtypep, UnpackArrayDType)) { + dtypep = ap->subDTypep(); + } + return dtypep; + }(); + + if (AstNodeExpr* const sLsbp = lhsComponents.selLsbp) { + // This is a partial assignment. Need to create a mask and widen the value to + // element size. + AstConst* const sWidthp = lhsComponents.selWidthp; + + // Create a temporary variable with the given name, and with 'eDType' type, + // with the bits selected by 'sLsbp'/'sWidthp' set to 'insertp'. Returns a + // read reference to the temporary variable. + auto createWidened = [&](const std::string& name, AstNodeExpr* insertp) { + // Create temporary variable. + AstVarScope* const tp + = createNewVarScope(vscp, name + record.suffix, eDType); + // Zero it + AstConst* const zerop = new AstConst{flp, AstConst::DTyped{}, eDType}; + zerop->num().setAllBits0(); + insert(new AstAssign{flp, new AstVarRef{flp, tp, VAccess::WRITE}, zerop}); + // Set the selected bits to 'insertp' + AstSel* const selp + = new AstSel{flp, new AstVarRef{flp, tp, VAccess::WRITE}, + sLsbp->cloneTreePure(true), sWidthp->cloneTreePure(true)}; + insert(new AstAssign{flp, selp, insertp}); + // This is the expression to get the value of the temporary + return new AstVarRef{flp, tp, VAccess::READ}; + }; + + // Create mask value + maskp = [&]() -> AstNodeExpr* { + // Constant mask we can compute here + if (AstConst* const cLsbp = VN_CAST(sLsbp, Const)) { + AstConst* const cp = new AstConst{flp, AstConst::DTyped{}, eDType}; + cp->num().setAllBits0(); + const int lsb = cLsbp->toSInt(); + const int msb = lsb + sWidthp->toSInt() - 1; + for (int bit = lsb; bit <= msb; ++bit) cp->num().setBit(bit, '1'); + return cp; + } + + // A non-constant mask we must compute at run-time. + AstConst* const onesp + = new AstConst{flp, AstConst::WidthedValue{}, sWidthp->toSInt(), 0}; + onesp->num().setAllBits1(); + return createWidened("__VdlyMask", onesp); + }(); + + // Adjust value to element size + valuep = [&]() -> AstNodeExpr* { + // Constant value with constant select we can compute here + if (AstConst* const cValuep = VN_CAST(valuep, Const)) { + if (AstConst* const cLsbp = VN_CAST(sLsbp, Const)) { // + AstConst* const cp = new AstConst{flp, AstConst::DTyped{}, eDType}; + cp->num().setAllBits0(); + cp->num().opSelInto(cValuep->num(), cLsbp->toSInt(), + sWidthp->toSInt()); + return cp; + } + } + + // A non-constant value we must adjust. + return createWidened("__VdlyElem", valuep); + }(); + + // Finished with the sel operands here + VL_DO_DANGLING(lhsComponents.selLsbp->deleteTree(), lhsComponents.selLsbp); + VL_DO_DANGLING(lhsComponents.selWidthp->deleteTree(), lhsComponents.selWidthp); + } else { + // If this assignment is not partial, set mask to ones and we are done + AstConst* const ones = new AstConst{flp, AstConst::DTyped{}, eDType}; + ones->num().setAllBits1(); + maskp = ones; + } + } + + // Create/get the commit queue + if (!vscp->user2p()) { + FileLine* const vflp = vscp->fileline(); + + // Statistics + if (partial) { + ++m_commitQueuesPartial; + } else { + ++m_commitQueuesWhole; + } + + // Create the commit queue variable + auto* const cqDTypep = new AstNBACommitQueueDType{vflp, vscp->dtypep(), partial}; + v3Global.rootp()->typeTablep()->addTypesp(cqDTypep); + const std::string name = "__VdlyCommitQueue" + record.suffix; + AstVarScope* const newCqp = createNewVarScope(vscp, name, cqDTypep); + newCqp->varp()->noReset(true); + vscp->user2p(newCqp); + + // Commit it in the 'Post' block + AstCMethodHard* const callp = new AstCMethodHard{ + vflp, new AstVarRef{vflp, newCqp, VAccess::READWRITE}, "commit"}; + callp->dtypeSetVoid(); + callp->addPinsp(lhsComponents.refp->cloneTreePure(false)); + postp->addStmtsp(callp->makeStmt()); + } + AstVarScope* const cqp = VN_AS(vscp->user2p(), VarScope); + + // Enqueue the update at the original location + AstCMethodHard* const callp + = new AstCMethodHard{flp, new AstVarRef{flp, cqp, VAccess::READWRITE}, "enqueue"}; + callp->dtypeSetVoid(); + callp->addPinsp(valuep); + if (maskp) callp->addPinsp(maskp); + for (AstNodeExpr* const indexp : lhsComponents.arrIdxps) callp->addPinsp(indexp); + insert(callp->makeStmt()); } else { - ifp = new AstIf{flp, new AstVarRef{flp, m_setVscp, VAccess::READ}}; - postp->addStmtsp(ifp); - postp->user1p(ifp); // Remember the associated 'AstIf' - postp->user2p(m_setVscp); // Remember the VdlySet variable used as the condition. - } + // Create the flag denoting an update is pending + if (record.consecutive) { + // Simplistic optimization. If the previous assignment was immediately before this + // assignment, we can reuse the existing flag. This is good for code like: + // arrayA[0] <= something; + // arrayB[1] <= something; + ++m_statSharedSet; + } else { + // Create new flag + m_setVscp = createNewVarScope(vscp, "__VdlySet" + record.suffix, 1); + // Set it here + insert(new AstAssign{flp, new AstVarRef{flp, m_setVscp, VAccess::WRITE}, + new AstConst{flp, AstConst::BitTrue{}}}); + // Add the 'Pre' ordered reset for the flag + activep->addStmtsp(new AstAssignPre{ + flp, new AstVarRef{flp, m_setVscp, VAccess::WRITE}, new AstConst{flp, 0}}); + }; - // Finally assign the delayed value to the reconstructed LHS - AstNodeExpr* const newLhsp = reconstructLhs(lhsComponents, flp); - ifp->addThensp(new AstAssign{flp, newLhsp, record.rhsp}); + // Create/Get 'if (__VdlySet) { ... commit .. }' + AstIf* ifp = nullptr; + if (postp->user2p() == m_setVscp) { + // Optimize as above. If sharing VdlySet *ON SAME VARIABLE*, we can share the 'if' + ifp = VN_AS(postp->user1p(), If); + } else { + ifp = new AstIf{flp, new AstVarRef{flp, m_setVscp, VAccess::READ}}; + postp->addStmtsp(ifp); + postp->user1p(ifp); // Remember the associated 'AstIf' + postp->user2p(m_setVscp); // Remember the VdlySet variable used as the condition. + } + + // Finally assign the delayed value to the reconstructed LHS + AstNodeExpr* const newLhsp = reconstructLhs(lhsComponents, flp); + ifp->addThensp(new AstAssign{flp, newLhsp, record.rhsp}); + } // If the original AstAssignDelay was kept as a placeholder, we can nuke it now. if (dlyp) pushDeletep(dlyp->unlinkFrBack()); @@ -582,10 +773,6 @@ class DelayedVisitor final : public VNVisitor { || (VN_IS(nodep->lhsp(), Sel) && VN_IS(VN_AS(nodep->lhsp(), Sel)->fromp(), ArraySel)); if (isArray) { - if (m_inLoop) { - nodep->v3warn(BLKLOOPINIT, "Unsupported: Delayed assignment to array inside for " - "loops (non-delayed is ok - see docs)"); - } if (const AstBasicDType* const basicp = nodep->lhsp()->dtypep()->basicp()) { // TODO: this message is not covered by tests if (basicp->isEvent()) nodep->v3warn(E_UNSUPPORTED, "Unsupported: event arrays"); @@ -684,6 +871,10 @@ class DelayedVisitor final : public VNVisitor { explicit DelayedVisitor(AstNetlist* nodep) { iterate(nodep); } ~DelayedVisitor() override { V3Stats::addStat("Optimizations, Delayed shared-sets", m_statSharedSet); + V3Stats::addStat("Dynamic NBA, variables needing commit queue with partial updates", + m_commitQueuesPartial); + V3Stats::addStat("Dynamic NBA, variables needing commit queue without partial updates", + m_commitQueuesWhole); } }; diff --git a/src/V3EmitCImp.cpp b/src/V3EmitCImp.cpp index 9fef152b4b2..3bfebebb5f3 100644 --- a/src/V3EmitCImp.cpp +++ b/src/V3EmitCImp.cpp @@ -389,6 +389,7 @@ class EmitCImp final : EmitCFunc { } else if (varp->isParam()) { } else if (varp->isStatic() && varp->isConst()) { } else if (varp->basicp() && varp->basicp()->isTriggerVec()) { + } else if (VN_IS(varp->dtypep(), NBACommitQueueDType)) { } else { int vects = 0; AstNodeDType* elementp = varp->dtypeSkipRefp(); diff --git a/src/V3Localize.cpp b/src/V3Localize.cpp index 0b59c88884e..64692fbd478 100644 --- a/src/V3Localize.cpp +++ b/src/V3Localize.cpp @@ -63,6 +63,8 @@ class LocalizeVisitor final : public VNVisitor { // METHODS bool isOptimizable(AstVarScope* nodep) { + // Don't want to malloc/free the backing store all the time + if (VN_IS(nodep->dtypep(), NBACommitQueueDType)) return false; return ((!nodep->user1() // Not marked as not optimizable, or ... // .. a block temp used in a single CFunc || (nodep->varp()->varType() == VVarType::BLOCKTEMP diff --git a/test_regress/t/t_nba_commit_queue.pl b/test_regress/t/t_nba_commit_queue.pl new file mode 100755 index 00000000000..c7b151c9ce4 --- /dev/null +++ b/test_regress/t/t_nba_commit_queue.pl @@ -0,0 +1,27 @@ +#!/usr/bin/env perl +if (!$::Driver) { use FindBin; exec("$FindBin::Bin/bootstrap.pl", @ARGV, $0); die; } +# DESCRIPTION: Verilator: Verilog Test driver/expect definition +# +# Copyright 2024 by Wilson Snyder. This program is free software; you +# can redistribute it and/or modify it under the terms of either the GNU +# Lesser General Public License Version 3 or the Perl Artistic License +# Version 2.0. +# SPDX-License-Identifier: LGPL-3.0-only OR Artistic-2.0 + +scenarios(vlt_all => 1); + +compile( + verilator_flags2 => ["-unroll-count 1", "--stats"], + ); + +execute( + check_finished => 1, + ); + +file_grep($Self->{stats}, qr/Dynamic NBA, variables needing commit queue without partial updates\s+(\d+)/i, + 6); +file_grep($Self->{stats}, qr/Dynamic NBA, variables needing commit queue with partial updates\s+(\d+)/i, + 3); + +ok(1); +1; diff --git a/test_regress/t/t_nba_commit_queue.v b/test_regress/t/t_nba_commit_queue.v new file mode 100644 index 00000000000..51c8c699c4f --- /dev/null +++ b/test_regress/t/t_nba_commit_queue.v @@ -0,0 +1,296 @@ +// DESCRIPTION: Verilator: Verilog Test module +// +// This file ONLY is placed under the Creative Commons Public Domain, for +// any use, without warranty, 2024 by Wilson Snyder. +// SPDX-License-Identifier: CC0-1.0 + +`define check(got ,exp) do \ + if ((got) !== (exp)) begin \ + $write("%%Error: %s:%0d: cyc=%0d result='h%x expected='h%x\n", `__FILE__,`__LINE__, cyc, (got), (exp)); \ + $stop; \ + end \ + while(0) + +`define checkReal(got ,exp) do \ + if ((got) !== (exp)) begin \ + $write("%%Error: %s:%0d: cyc=%0d result=%f expected=%f\n", `__FILE__,`__LINE__, cyc, (got), (exp)); \ + $stop; \ + end \ + while(0) + +`define checkString(got ,exp) do \ + if ((got) !== (exp)) begin \ + $write("%%Error: %s:%0d: cyc=%0d result=\"%s\" expected=\"%s\"\n", `__FILE__,`__LINE__, cyc, (got), (exp)); \ + $stop; \ + end \ + while(0) + + + +module t(clk); + input clk; + + logic [31:0] cyc = 0; + always @(posedge clk) begin + cyc <= cyc + 1; + if (cyc == 99) begin + $write("*-* All Finished *-*\n"); + $finish; + end + end + + reg [63:0] crc = 64'h5aef0c8d_d70a4497; + always @ (posedge clk) crc <= {crc[62:0], crc[63] ^ crc[2] ^ crc[0]}; + +`define at_posedge_clk_on_cycle(n) always @(posedge clk) if (cyc == n) + + // Case 1: narrow packed variable, whole element updates only - 1D + logic [31:0] array1 [128]; + `at_posedge_clk_on_cycle(0) begin + for (int i = 0 ; i < 128; ++i) array1[i] = 0; + for (int i = 0 ; i < 128; ++i) `check(array1[i], 0); + end + `at_posedge_clk_on_cycle(1) begin + for (int i = 0 ; i < 128; ++i) `check(array1[i], 0); + for (int i = 0 ; i < 128; ++i) array1[i] <= i; + for (int i = 0 ; i < 128; ++i) `check(array1[i], 0); + end + `at_posedge_clk_on_cycle(2) begin + for (int i = 0 ; i < 128; ++i) `check(array1[i], i); + for (int i = 0 ; i < 128; ++i) array1[i] <= ~i; + for (int i = 0 ; i < 128; ++i) `check(array1[i], i); + end + `at_posedge_clk_on_cycle(3) begin + for (int i = 0 ; i < 128; ++i) `check(array1[i], ~i); + for (int i = 0 ; i < 128; ++i) array1[i] <= -1; + for (int i = 0 ; i < 128; ++i) `check(array1[i], ~i); + end + `at_posedge_clk_on_cycle(4) begin + for (int i = 0 ; i < 128; ++i) `check(array1[i], -1); + end + + // Case 2: wide packed variable, whole element updates only - 1D + logic [127:0] array2 [128]; + `at_posedge_clk_on_cycle(0) begin + for (int i = 0 ; i < 128; ++i) array2[i] = 0; + for (int i = 0 ; i < 128; ++i) `check(array2[i], 0); + end + `at_posedge_clk_on_cycle(1) begin + for (int i = 0 ; i < 128; ++i) `check(array2[i], 0); + for (int i = 0 ; i < 128; ++i) array2[i] <= {4{i}}; + for (int i = 0 ; i < 128; ++i) `check(array2[i], 0); + end + `at_posedge_clk_on_cycle(2) begin + for (int i = 0 ; i < 128; ++i) `check(array2[i], {4{i}}); + for (int i = 0 ; i < 128; ++i) array2[i] <= {4{~i}}; + for (int i = 0 ; i < 128; ++i) `check(array2[i], {4{i}}); + end + `at_posedge_clk_on_cycle(3) begin + for (int i = 0 ; i < 128; ++i) `check(array2[i], {4{~i}}); + for (int i = 0 ; i < 128; ++i) array2[i] <= '1; + for (int i = 0 ; i < 128; ++i) `check(array2[i], {4{~i}}); + end + `at_posedge_clk_on_cycle(4) begin + for (int i = 0 ; i < 128; ++i) `check(array2[i], ~128'b0); + end + + + // Case 3: wide packed variable, whole element updates only - 2D + logic [127:0] array3 [128][512]; + `at_posedge_clk_on_cycle(0) begin + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) array3[i][j] = 0; + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) `check(array3[i][j], 0); + end + `at_posedge_clk_on_cycle(1) begin + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) `check(array3[i][j], 0); + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) array3[i][j] <= {4{i}}; + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) `check(array3[i][j], 0); + end + `at_posedge_clk_on_cycle(2) begin + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) `check(array3[i][j], {4{i}}); + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) array3[i][j] <= {4{~i}}; + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) `check(array3[i][j], {4{i}}); + end + `at_posedge_clk_on_cycle(3) begin + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) `check(array3[i][j], {4{~i}}); + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) array3[i][j] <= '1; + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) `check(array3[i][j], {4{~i}}); + end + `at_posedge_clk_on_cycle(4) begin + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 512; ++j) `check(array3[i][j], ~128'b0); + end + + // Case 4: real + real array4 [128]; + `at_posedge_clk_on_cycle(0) begin + for (int i = 0 ; i < 128; ++i) array4[i] = 1e-5; + for (int i = 0 ; i < 128; ++i) `checkReal(array4[i], 1e-5); + end + `at_posedge_clk_on_cycle(1) begin + for (int i = 0 ; i < 128; ++i) `checkReal(array4[i], 1e-5); + for (int i = 0 ; i < 128; ++i) array4[i] <= 3.14*real'(i); + for (int i = 0 ; i < 128; ++i) `checkReal(array4[i], 1e-5); + end + `at_posedge_clk_on_cycle(2) begin + for (int i = 0 ; i < 128; ++i) `checkReal(array4[i], 3.14*real'(i)); + for (int i = 0 ; i < 128; ++i) array4[i] <= 2.78*real'(i); + for (int i = 0 ; i < 128; ++i) `checkReal(array4[i], 3.14*real'(i)); + end + `at_posedge_clk_on_cycle(3) begin + for (int i = 0 ; i < 128; ++i) `checkReal(array4[i], 2.78*real'(i)); + for (int i = 0 ; i < 128; ++i) array4[i] <= 1e50; + for (int i = 0 ; i < 128; ++i) `checkReal(array4[i], 2.78*real'(i)); + end + `at_posedge_clk_on_cycle(4) begin + for (int i = 0 ; i < 128; ++i) `checkReal(array4[i], 1e50); + end + + // Case 5: narrow packed variable, partial element updates - 1D + logic [31:0] array5 [128]; + `at_posedge_clk_on_cycle(0) begin + for (int i = 0 ; i < 128; ++i) array5[i] = -1; + for (int i = 0 ; i < 128; ++i) `check(array5[i], -1); + end + `at_posedge_clk_on_cycle(1) begin + for (int i = 0 ; i < 128; ++i) `check(array5[i], -1); + for (int i = 0 ; i < 128; ++i) array5[i][0] <= 1'b0; + for (int i = 0 ; i < 128; ++i) array5[i][1] <= 1'b0; + for (int i = 0 ; i < 128; ++i) array5[i][2] <= 1'b0; + for (int i = 0 ; i < 128; ++i) array5[i][1] <= 1'b1; + for (int i = 0 ; i < 128; ++i) `check(array5[i], -1); + end + `at_posedge_clk_on_cycle(2) begin + for (int i = 0 ; i < 128; ++i) `check(array5[i], 32'hffff_fffa); + for (int i = 0 ; i < 128; ++i) array5[i][18:16] <= i[3:1]; + for (int i = 0 ; i < 128; ++i) array5[i][19:17] <= ~i[2:0]; + for (int i = 0 ; i < 128; ++i) `check(array5[i], 32'hffff_fffa); + end + `at_posedge_clk_on_cycle(3) begin + for (int i = 0 ; i < 128; ++i) `check(array5[i], {12'hfff, ~i[2:0], i[1], 16'hfffa}); + for (int i = 0 ; i < 128; ++i) array5[i] <= -1; + for (int i = 0 ; i < 128; ++i) `check(array5[i], {12'hfff, ~i[2:0], i[1], 16'hfffa}); + end + `at_posedge_clk_on_cycle(4) begin + for (int i = 0 ; i < 128; ++i) `check(array5[i], -1); + end + + // Case 6: wide packed variable, partial element updates - 1D + logic [99:0] array6 [128]; + `at_posedge_clk_on_cycle(0) begin + for (int i = 0 ; i < 128; ++i) array6[i] = -1; + for (int i = 0 ; i < 128; ++i) `check(array6[i], -1); + end + `at_posedge_clk_on_cycle(1) begin + for (int i = 0 ; i < 128; ++i) `check(array6[i], -1); + for (int i = 0 ; i < 128; ++i) array6[i][80] <= 1'b0; + for (int i = 0 ; i < 128; ++i) array6[i][81] <= 1'b0; + for (int i = 0 ; i < 128; ++i) array6[i][82] <= 1'b0; + for (int i = 0 ; i < 128; ++i) array6[i][81] <= 1'b1; + for (int i = 0 ; i < 128; ++i) `check(array6[i], -1); + end + `at_posedge_clk_on_cycle(2) begin + for (int i = 0 ; i < 128; ++i) `check(array6[i], 100'hf_fffa_ffff_ffff_ffff_ffff_ffff); + for (int i = 0 ; i < 128; ++i) array6[i][86:84] <= ~i[3:1]; + for (int i = 0 ; i < 128; ++i) array6[i][87:85] <= i[2:0]; + for (int i = 0 ; i < 128; ++i) `check(array6[i], 100'hf_fffa_ffff_ffff_ffff_ffff_ffff); + end + `at_posedge_clk_on_cycle(3) begin + for (int i = 0 ; i < 128; ++i) `check(array6[i], {12'hfff, i[2:0], ~i[1], 84'ha_ffff_ffff_ffff_ffff_ffff}); + for (int i = 0 ; i < 128; ++i) array6[i] <= -1; + for (int i = 0 ; i < 128; ++i) `check(array6[i], {12'hfff, i[2:0], ~i[1], 84'ha_ffff_ffff_ffff_ffff_ffff}); + end + `at_posedge_clk_on_cycle(4) begin + for (int i = 0 ; i < 128; ++i) `check(array6[i], -1); + end + + // Case 7: variable partial updates + logic [99:0] array7_nba [128][256]; + logic [99:0] array7_ref [128][256]; + always @(posedge clk) begin + if (cyc == 0) begin + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 256; ++j) array7_nba[i][j] = 100'b0; + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 256; ++j) array7_ref[i][j] = ~100'b0; + end else begin + for (int i = 0 ; i < 128; ++i) for (int j = 0 ; j < 256; ++j) `check(array7_nba[i][j], ~array7_ref[i][j]); + for (int i = 0 ; i < 128; ++i) begin + for (int j = 0 ; j < 256; ++j) begin + array7_nba[i[6:0] ^ crc[30+:7]][j[7:0] ^ crc[10+:8]][7'((crc % 10) * 5) +: 5] <= ~crc[4+:5]; + array7_ref[i[6:0] ^ crc[30+:7]][j[7:0] ^ crc[10+:8]][7'((crc % 10) * 5) +: 5] = crc[4+:5]; + end + end + end + end + + // Case 8: Mixed dynamic/non-dynamic + longint array8 [4]; + `at_posedge_clk_on_cycle(0) begin + array8[0] <= 0; + array8[1] <= 0; + array8[2] <= 0; + array8[3] <= 0; + end + `at_posedge_clk_on_cycle(1) begin + `check(array8[0], 0); + `check(array8[1], 0); + `check(array8[2], 0); + `check(array8[3], 0); + array8[1] <= 42; + array8[3] <= 63; + for (int i = 1 ; i < 3 ; ++i) array8[i] <= 2*i + 7; + array8[1] <= 74; + end + `at_posedge_clk_on_cycle(3) begin + `check(array8[0], 0); + `check(array8[1], 74); + `check(array8[2], 11); + `check(array8[3], 63); + end + + // Case 9: string + string array9 [10]; + `at_posedge_clk_on_cycle(0) begin + for (int i = 0 ; i < 10; ++i) array9[i] = "squid"; + for (int i = 0 ; i < 10; ++i) `checkString(array9[i], "squid"); + end + `at_posedge_clk_on_cycle(1) begin + for (int i = 0 ; i < 10; ++i) `checkString(array9[i], "squid"); + for (int i = 0 ; i < 10; ++i) array9[i] <= "octopus"; + for (int i = 0 ; i < 10; ++i) `checkString(array9[i], "squid"); + end + `at_posedge_clk_on_cycle(2) begin + for (int i = 0 ; i < 10; ++i) `checkString(array9[i], "octopus"); + for (int i = 1 ; i < 9; ++i) begin + string tmp; + $sformat(tmp, "%0d-legged-cephalopod", i); + array9[i] <= tmp; + end + for (int i = 0 ; i < 10; ++i) `checkString(array9[i], "octopus"); + end + `at_posedge_clk_on_cycle(3) begin + `checkString(array9[0], "octopus"); + `checkString(array9[1], "1-legged-cephalopod"); + `checkString(array9[2], "2-legged-cephalopod"); + `checkString(array9[3], "3-legged-cephalopod"); + `checkString(array9[4], "4-legged-cephalopod"); + `checkString(array9[5], "5-legged-cephalopod"); + `checkString(array9[6], "6-legged-cephalopod"); + `checkString(array9[7], "7-legged-cephalopod"); + `checkString(array9[8], "8-legged-cephalopod"); + `checkString(array9[9], "octopus"); + for (int i = 0 ; i < 10; ++i) array9[i] <= "cuttlefish"; + `checkString(array9[0], "octopus"); + `checkString(array9[1], "1-legged-cephalopod"); + `checkString(array9[2], "2-legged-cephalopod"); + `checkString(array9[3], "3-legged-cephalopod"); + `checkString(array9[4], "4-legged-cephalopod"); + `checkString(array9[5], "5-legged-cephalopod"); + `checkString(array9[6], "6-legged-cephalopod"); + `checkString(array9[7], "7-legged-cephalopod"); + `checkString(array9[8], "8-legged-cephalopod"); + `checkString(array9[9], "octopus"); + end + `at_posedge_clk_on_cycle(4) begin + for (int i = 0 ; i < 10; ++i) `checkString(array9[i], "cuttlefish"); + end + +endmodule diff --git a/test_regress/t/t_order_blkloopinit_bad.out b/test_regress/t/t_order_blkloopinit_bad.out index 56fd5c3ecf3..d34918ab93e 100644 --- a/test_regress/t/t_order_blkloopinit_bad.out +++ b/test_regress/t/t_order_blkloopinit_bad.out @@ -1,5 +1,18 @@ -%Error-BLKLOOPINIT: t/t_order_blkloopinit_bad.v:21:19: Unsupported: Delayed assignment to array inside for loops (non-delayed is ok - see docs) - 21 | array[i] <= 0; - | ^~ +%Error-BLKLOOPINIT: t/t_order_blkloopinit_bad.v:26:20: Unsupported: Non-blocking assignment to array inside loop in suspendable process or fork + : ... note: In instance 't' + 26 | array1[i] <= 0; + | ^~ ... For error description see https://verilator.org/warn/BLKLOOPINIT?v=latest +%Error-BLKLOOPINIT: t/t_order_blkloopinit_bad.v:36:20: Unsupported: Non-blocking assignment to array with compound element type inside loop + : ... note: In instance 't' + 36 | array2[i] <= null; + | ^~ +%Error-BLKLOOPINIT: t/t_order_blkloopinit_bad.v:45:20: Unsupported: Non-blocking assignment to array in both loop and suspendable process/fork + : ... note: In instance 't' + t/t_order_blkloopinit_bad.v:50:20: ... Location of non-blocking assignment in suspendable process/fork + 50 | #1 array3[0] <= 0; + | ^~ + t/t_order_blkloopinit_bad.v:45:20: ... Location of non-blocking assignment inside loop + 45 | array3[i] <= 0; + | ^~ %Error: Exiting due to diff --git a/test_regress/t/t_order_blkloopinit_bad.pl b/test_regress/t/t_order_blkloopinit_bad.pl index a5846c69938..e3fd94eb828 100755 --- a/test_regress/t/t_order_blkloopinit_bad.pl +++ b/test_regress/t/t_order_blkloopinit_bad.pl @@ -11,6 +11,7 @@ scenarios(vlt => 1); lint( + verilator_flags2 => ["--timing"], fails => 1, expect_filename => $Self->{golden_filename}, ); diff --git a/test_regress/t/t_order_blkloopinit_bad.v b/test_regress/t/t_order_blkloopinit_bad.v index d2cef2b314d..db0581a5add 100644 --- a/test_regress/t/t_order_blkloopinit_bad.v +++ b/test_regress/t/t_order_blkloopinit_bad.v @@ -4,6 +4,8 @@ // any use, without warranty, 2020 by Wilson Snyder. // SPDX-License-Identifier: CC0-1.0 +// verilator lint_off MULTIDRIVEN + module t (/*AUTOARG*/ // Outputs o, @@ -14,13 +16,38 @@ module t (/*AUTOARG*/ output int o; localparam SIZE = 65536; - int array [SIZE]; + // Unsupported case 1: Array NBA inside suspendable + int array1 [SIZE]; + always @ (posedge clk) begin + #1; + o <= array1[1]; + for (int i=0; i