From 009d3e2abece7ce208e1080d9665bd0330443a1c Mon Sep 17 00:00:00 2001 From: Guillaume Ballet <3272758+gballet@users.noreply.github.com> Date: Thu, 22 Feb 2024 16:03:51 +0100 Subject: [PATCH] persist conversion state to db and use an LRU cache for active transition states (#375) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * persist conversion state to db * fix: don't recreate LRU when writing state * opt: only write state to db if not already present in LRU * fix: rlp can't encode TransitionState * fix: use gob because binary.Write does not support structs 🤦‍♂️ * fix: nil pointer panic * add logs to debug shadowfork * no such thing as not enough traces * ditto * fix stupid bug * add a comment for readability * add more traces * Lock the state transition during conversion (#384) * heavy handed approach: lock the state transition during conversion * also lock transition state loading/unloading * reduce logs verbosity * add conversion test to workflow (#386) * add conversion test to workflow * mandatory -f switch fix in rm * forgot & at the end of the geth command * remove incorrect kill * add debug traces * add an overlay stride * fix typo * Apply suggestions from code review --- .github/workflows/conversion.yml | 75 +++++++++++++++++++ consensus/beacon/consensus.go | 2 + core/overlay/conversion.go | 7 +- core/rawdb/accessors_overlay.go | 30 ++++++++ core/rawdb/schema.go | 7 ++ core/state/database.go | 119 ++++++++++++++++++++++--------- core/state_processor.go | 6 -- light/trie.go | 7 ++ 8 files changed, 212 insertions(+), 41 deletions(-) create mode 100644 .github/workflows/conversion.yml create mode 100644 core/rawdb/accessors_overlay.go diff --git a/.github/workflows/conversion.yml b/.github/workflows/conversion.yml new file mode 100644 index 000000000000..40d4fed6925d --- /dev/null +++ b/.github/workflows/conversion.yml @@ -0,0 +1,75 @@ +name: Overlay conversion + +on: + push: + branches: [ master, transition-post-genesis, store-transition-state-in-db ] + pull_request: + branches: [ master, kaustinen-with-shapella, transition-post-genesis, store-transition-state-in-db, lock-overlay-transition ] + workflow_dispatch: + +jobs: + build: + runs-on: self-hosted + steps: + - uses: actions/checkout@v2 + - name: Set up Go + uses: actions/setup-go@v2 + with: + go-version: 1.21.1 + + - name: Cleanup from previous runs + run: | + rm -f log.txt + rm -rf .shadowfork + rm -f genesis.json + + - name: Download genesis file + run: wget https://gist.githubusercontent.com/gballet/0b02a025428aa0e7b67941864d54716c/raw/bfb4e158bca5217b356a19b2ec55c4a45a7b2bad/genesis.json + + - name: Init data + run: go run ./cmd/geth --dev --cache.preimages init genesis.json + + - name: Run geth in devmode + run: go run ./cmd/geth --dev --dev.period=5 --cache.preimages --http --datadir=.shadowfork --override.overlay-stride=10 --override.prague=$(($(date +%s) + 45)) > log.txt & + + - name: Wait for the transition to start + run: | + start_time=$(date +%s) + while true; do + sleep 5 + current_time=$(date +%s) + elapsed_time=$((current_time - start_time)) + + # 2 minute timeout + if [ $elapsed_time -ge 120 ]; then + kill -9 $(pgrep -f geth) + exit 1 + fi + + # Check for signs that the conversion has started + if grep -q "Processing verkle conversion starting at" log.txt; then + break + fi + done + + - name: Wait for the transition to end + run: | + start_time=$(date +%s) + while true; do + sleep 5 + current_time=$(date +%s) + elapsed_time=$((current_time - start_time)) + + # 10 minute timeout + if [ $elapsed_time -ge 300 ]; then + cat log.txt + kill -9 $(pgrep -f geth) + exit 1 + fi + + # Check for signs that the conversion has started + if egrep -q "at block.*performing transition\? false" log.txt; then + kill -9 $(pgrep -f geth) + break + fi + done diff --git a/consensus/beacon/consensus.go b/consensus/beacon/consensus.go index 7c93844f39c9..62d9f48feb2a 100644 --- a/consensus/beacon/consensus.go +++ b/consensus/beacon/consensus.go @@ -411,6 +411,8 @@ func (beacon *Beacon) FinalizeAndAssemble(chain consensus.ChainHeaderReader, hea return nil, fmt.Errorf("nil parent header for block %d", header.Number) } + // Load transition state at beginning of block, because + // OpenTrie needs to know what the conversion status is. state.Database().LoadTransitionState(parent.Root) if chain.Config().ProofInBlocks { diff --git a/core/overlay/conversion.go b/core/overlay/conversion.go index 89d8dc8079ca..bf77ff0033f1 100644 --- a/core/overlay/conversion.go +++ b/core/overlay/conversion.go @@ -221,6 +221,8 @@ func (kvm *keyValueMigrator) migrateCollectedKeyValues(tree *trie.VerkleTrie) er // OverlayVerkleTransition contains the overlay conversion logic func OverlayVerkleTransition(statedb *state.StateDB, root common.Hash, maxMovedCount uint64) error { migrdb := statedb.Database() + migrdb.LockCurrentTransitionState() + defer migrdb.UnLockCurrentTransitionState() // verkle transition: if the conversion process is in progress, move // N values from the MPT into the verkle tree. @@ -289,7 +291,7 @@ func OverlayVerkleTransition(statedb *state.StateDB, root common.Hash, maxMovedC for count < maxMovedCount { acc, err := types.FullAccount(accIt.Account()) if err != nil { - log.Error("Invalid account encountered during traversal", "error", err) + fmt.Println("Invalid account encountered during traversal", "error", err) return err } vkt.SetStorageRootConversion(*migrdb.GetCurrentAccountAddress(), acc.Root) @@ -399,7 +401,6 @@ func OverlayVerkleTransition(statedb *state.StateDB, root common.Hash, maxMovedC return fmt.Errorf("account address len is zero is not 20: %d", len(addr)) } } - // fmt.Printf("account switch: %s != %s\n", crypto.Keccak256Hash(addr[:]), accIt.Hash()) if crypto.Keccak256Hash(addr[:]) != accIt.Hash() { return fmt.Errorf("preimage file does not match account hash: %s != %s", crypto.Keccak256Hash(addr[:]), accIt.Hash()) } @@ -416,7 +417,7 @@ func OverlayVerkleTransition(statedb *state.StateDB, root common.Hash, maxMovedC } migrdb.SetCurrentPreimageOffset(preimageSeek) - log.Info("Collected key values from base tree", "count", count, "duration", time.Since(now), "last account", statedb.Database().GetCurrentAccountHash(), "storage processed", statedb.Database().GetStorageProcessed(), "last storage", statedb.Database().GetCurrentSlotHash()) + log.Info("Collected key values from base tree", "count", count, "duration", time.Since(now), "last account hash", statedb.Database().GetCurrentAccountHash(), "last account address", statedb.Database().GetCurrentAccountAddress(), "storage processed", statedb.Database().GetStorageProcessed(), "last storage", statedb.Database().GetCurrentSlotHash()) // Take all the collected key-values and prepare the new leaf values. // This fires a background routine that will start doing the work that diff --git a/core/rawdb/accessors_overlay.go b/core/rawdb/accessors_overlay.go new file mode 100644 index 000000000000..5a371b9d307f --- /dev/null +++ b/core/rawdb/accessors_overlay.go @@ -0,0 +1,30 @@ +// Copyright 2024 The go-ethereum Authors +// This file is part of the go-ethereum library. +// +// The go-ethereum library is free software: you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// The go-ethereum library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with the go-ethereum library. If not, see . + +package rawdb + +import ( + "github.com/ethereum/go-ethereum/common" + "github.com/ethereum/go-ethereum/ethdb" +) + +func ReadVerkleTransitionState(db ethdb.KeyValueReader, hash common.Hash) ([]byte, error) { + return db.Get(transitionStateKey(hash)) +} + +func WriteVerkleTransitionState(db ethdb.KeyValueWriter, hash common.Hash, state []byte) error { + return db.Put(transitionStateKey(hash), state) +} diff --git a/core/rawdb/schema.go b/core/rawdb/schema.go index 18722ed5d4cb..f7a3515715ae 100644 --- a/core/rawdb/schema.go +++ b/core/rawdb/schema.go @@ -122,6 +122,8 @@ var ( CliqueSnapshotPrefix = []byte("clique-") + VerkleTransitionStatePrefix = []byte("verkle-transition-state-") + preimageCounter = metrics.NewRegisteredCounter("db/preimage/total", nil) preimageHitCounter = metrics.NewRegisteredCounter("db/preimage/hits", nil) ) @@ -250,6 +252,11 @@ func storageTrieNodeKey(accountHash common.Hash, path []byte) []byte { return append(append(trieNodeStoragePrefix, accountHash.Bytes()...), path...) } +// transitionStateKey = transitionStatusKey + hash +func transitionStateKey(hash common.Hash) []byte { + return append(VerkleTransitionStatePrefix, hash.Bytes()...) +} + // IsLegacyTrieNode reports whether a provided database entry is a legacy trie // node. The characteristics of legacy trie node are: // - the key length is 32 bytes diff --git a/core/state/database.go b/core/state/database.go index 075b948cf32e..cca65bb6f993 100644 --- a/core/state/database.go +++ b/core/state/database.go @@ -17,8 +17,12 @@ package state import ( + "bytes" + "encoding/gob" "errors" "fmt" + "runtime/debug" + "sync" "github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common/lru" @@ -102,6 +106,10 @@ type Database interface { SaveTransitionState(common.Hash) LoadTransitionState(common.Hash) + + LockCurrentTransitionState() + + UnLockCurrentTransitionState() } // Trie is a Ethereum Merkle Patricia trie. @@ -189,22 +197,24 @@ func NewDatabase(db ethdb.Database) Database { // large memory cache. func NewDatabaseWithConfig(db ethdb.Database, config *trie.Config) Database { return &cachingDB{ - disk: db, - codeSizeCache: lru.NewCache[common.Hash, int](codeSizeCacheSize), - codeCache: lru.NewSizeConstrainedCache[common.Hash, []byte](codeCacheSize), - triedb: trie.NewDatabaseWithConfig(db, config), - addrToPoint: utils.NewPointCache(), + disk: db, + codeSizeCache: lru.NewCache[common.Hash, int](codeSizeCacheSize), + codeCache: lru.NewSizeConstrainedCache[common.Hash, []byte](codeCacheSize), + triedb: trie.NewDatabaseWithConfig(db, config), + addrToPoint: utils.NewPointCache(), + TransitionStatePerRoot: lru.NewBasicLRU[common.Hash, *TransitionState](100), } } // NewDatabaseWithNodeDB creates a state database with an already initialized node database. func NewDatabaseWithNodeDB(db ethdb.Database, triedb *trie.Database) Database { return &cachingDB{ - disk: db, - codeSizeCache: lru.NewCache[common.Hash, int](codeSizeCacheSize), - codeCache: lru.NewSizeConstrainedCache[common.Hash, []byte](codeCacheSize), - triedb: triedb, - addrToPoint: utils.NewPointCache(), + disk: db, + codeSizeCache: lru.NewCache[common.Hash, int](codeSizeCacheSize), + codeCache: lru.NewSizeConstrainedCache[common.Hash, []byte](codeCacheSize), + triedb: triedb, + addrToPoint: utils.NewPointCache(), + TransitionStatePerRoot: lru.NewBasicLRU[common.Hash, *TransitionState](100), } } @@ -305,12 +315,12 @@ type cachingDB struct { // TODO ensure that this info is in the DB LastMerkleRoot common.Hash // root hash of the read-only base tree CurrentTransitionState *TransitionState - TransitionStatePerRoot map[common.Hash]*TransitionState + TransitionStatePerRoot lru.BasicLRU[common.Hash, *TransitionState] + transitionStateLock sync.Mutex addrToPoint *utils.PointCache baseRoot common.Hash // hash of the read-only base tree - } func (db *cachingDB) openMPTTrie(root common.Hash) (Trie, error) { @@ -543,37 +553,82 @@ func (db *cachingDB) SetLastMerkleRoot(merkleRoot common.Hash) { } func (db *cachingDB) SaveTransitionState(root common.Hash) { - if db.TransitionStatePerRoot == nil { - db.TransitionStatePerRoot = make(map[common.Hash]*TransitionState) - } - + db.transitionStateLock.Lock() + defer db.transitionStateLock.Unlock() if db.CurrentTransitionState != nil { - // Copy so that the address pointer isn't updated after - // it has been saved. - db.TransitionStatePerRoot[root] = db.CurrentTransitionState.Copy() + var buf bytes.Buffer + enc := gob.NewEncoder(&buf) + err := enc.Encode(db.CurrentTransitionState) + if err != nil { + log.Error("failed to encode transition state", "err", err) + return + } + + if !db.TransitionStatePerRoot.Contains(root) { + // Copy so that the address pointer isn't updated after + // it has been saved. + db.TransitionStatePerRoot.Add(root, db.CurrentTransitionState.Copy()) + + rawdb.WriteVerkleTransitionState(db.DiskDB(), root, buf.Bytes()) + } - fmt.Println("saving transition state", "storage processed", db.CurrentTransitionState.StorageProcessed, "addr", db.CurrentTransitionState.CurrentAccountAddress, "slot hash", db.CurrentTransitionState.CurrentSlotHash, "root", root, "ended", db.CurrentTransitionState.ended, "started", db.CurrentTransitionState.started) + log.Debug("saving transition state", "storage processed", db.CurrentTransitionState.StorageProcessed, "addr", db.CurrentTransitionState.CurrentAccountAddress, "slot hash", db.CurrentTransitionState.CurrentSlotHash, "root", root, "ended", db.CurrentTransitionState.ended, "started", db.CurrentTransitionState.started) } } func (db *cachingDB) LoadTransitionState(root common.Hash) { - if db.TransitionStatePerRoot == nil { - db.TransitionStatePerRoot = make(map[common.Hash]*TransitionState) - } + db.transitionStateLock.Lock() + defer db.transitionStateLock.Unlock() + // Try to get the transition state from the cache and + // the DB if it's not there. + ts, ok := db.TransitionStatePerRoot.Get(root) + if !ok { + // Not in the cache, try getting it from the DB + data, err := rawdb.ReadVerkleTransitionState(db.DiskDB(), root) + if err != nil { + log.Error("failed to read transition state", "err", err) + return + } + + // if a state could be read from the db, attempt to decode it + if len(data) > 0 { + var ( + newts TransitionState + buf = bytes.NewBuffer(data[:]) + dec = gob.NewDecoder(buf) + ) + // Decode transition state + err = dec.Decode(&newts) + if err != nil { + log.Error("failed to decode transition state", "err", err) + return + } + ts = &newts + } - // Initialize the first transition state, with the "ended" - // field set to true if the database was created - // as a verkle database. - ts, ok := db.TransitionStatePerRoot[root] - if !ok || ts == nil { - fmt.Println("could not find any transition state, starting with a fresh state", "is verkle", db.triedb.IsVerkle()) - // Start with a fresh state - ts = &TransitionState{ended: false} + // Fallback that should only happen before the transition + if ts == nil { + // Initialize the first transition state, with the "ended" + // field set to true if the database was created + // as a verkle database. + log.Debug("no transition state found, starting fresh", "is verkle", db.triedb.IsVerkle()) + // Start with a fresh state + ts = &TransitionState{ended: db.triedb.IsVerkle()} + } } // Copy so that the CurrentAddress pointer in the map // doesn't get overwritten. db.CurrentTransitionState = ts.Copy() - fmt.Println("loaded transition state", "storage processed", db.CurrentTransitionState.StorageProcessed, "addr", db.CurrentTransitionState.CurrentAccountAddress, "slot hash", db.CurrentTransitionState.CurrentSlotHash, "root", root, "ended", db.CurrentTransitionState.ended, "started", db.CurrentTransitionState.started) + log.Debug("loaded transition state", "storage processed", db.CurrentTransitionState.StorageProcessed, "addr", db.CurrentTransitionState.CurrentAccountAddress, "slot hash", db.CurrentTransitionState.CurrentSlotHash, "root", root, "ended", db.CurrentTransitionState.ended, "started", db.CurrentTransitionState.started) + debug.PrintStack() +} + +func (db *cachingDB) LockCurrentTransitionState() { + db.transitionStateLock.Lock() +} + +func (db *cachingDB) UnLockCurrentTransitionState() { + db.transitionStateLock.Unlock() } diff --git a/core/state_processor.go b/core/state_processor.go index 818ca4e0b089..f26cbfdd3bea 100644 --- a/core/state_processor.go +++ b/core/state_processor.go @@ -107,12 +107,6 @@ func (p *StateProcessor) Process(block *types.Block, statedb *state.StateDB, cfg return nil, nil, 0, errors.New("withdrawals before shanghai") } - // Perform the overlay transition, if relevant - //parent := p.bc.GetHeaderByHash(header.ParentHash) - //if err := OverlayVerkleTransition(statedb, parent.Root); err != nil { - // return nil, nil, 0, fmt.Errorf("error performing verkle overlay transition: %w", err) - //} - // Finalize the block, applying any consensus engine specific extras (e.g. block rewards) p.engine.Finalize(p.bc, header, statedb, block.Transactions(), block.Uncles(), withdrawals) diff --git a/light/trie.go b/light/trie.go index df300c8c6ed2..7e7c03bc16c1 100644 --- a/light/trie.go +++ b/light/trie.go @@ -177,6 +177,13 @@ func (db *odrDatabase) LoadTransitionState(common.Hash) { panic("not implemented") // TODO: Implement } +func (db *odrDatabase) LockCurrentTransitionState() { + panic("not implemented") // TODO: Implement +} +func (db *odrDatabase) UnLockCurrentTransitionState() { + panic("not implemented") // TODO: Implement +} + type odrTrie struct { db *odrDatabase id *TrieID