Commit be29bfd9 authored by George Hotz's avatar George Hotz

trie builds with fake database

parent 0178bec6
// Copyright 2019 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"errors"
"fmt"
"sync"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"golang.org/x/crypto/sha3"
)
// leafChanSize is the size of the leafCh. It's a pretty arbitrary number, to allow
// some parallelism but not incur too much memory overhead.
const leafChanSize = 200
// leaf represents a trie leaf value
type leaf struct {
size int // size of the rlp data (estimate)
hash common.Hash // hash of rlp data
node node // the node to commit
}
// committer is a type used for the trie Commit operation. A committer has some
// internal preallocated temp space, and also a callback that is invoked when
// leaves are committed. The leafs are passed through the `leafCh`, to allow
// some level of parallelism.
// By 'some level' of parallelism, it's still the case that all leaves will be
// processed sequentially - onleaf will never be called in parallel or out of order.
type committer struct {
tmp sliceBuffer
sha crypto.KeccakState
onleaf LeafCallback
leafCh chan *leaf
}
// committers live in a global sync.Pool
var committerPool = sync.Pool{
New: func() interface{} {
return &committer{
tmp: make(sliceBuffer, 0, 550), // cap is as large as a full fullNode.
sha: sha3.NewLegacyKeccak256().(crypto.KeccakState),
}
},
}
// newCommitter creates a new committer or picks one from the pool.
func newCommitter() *committer {
return committerPool.Get().(*committer)
}
func returnCommitterToPool(h *committer) {
h.onleaf = nil
h.leafCh = nil
committerPool.Put(h)
}
// commit collapses a node down into a hash node and inserts it into the database
func (c *committer) Commit(n node, db *Database) (hashNode, error) {
if db == nil {
return nil, errors.New("no db provided")
}
h, err := c.commit(n, db)
if err != nil {
return nil, err
}
return h.(hashNode), nil
}
// commit collapses a node down into a hash node and inserts it into the database
func (c *committer) commit(n node, db *Database) (node, error) {
// if this path is clean, use available cached data
hash, dirty := n.cache()
if hash != nil && !dirty {
return hash, nil
}
// Commit children, then parent, and remove remove the dirty flag.
switch cn := n.(type) {
case *shortNode:
// Commit child
collapsed := cn.copy()
// If the child is fullnode, recursively commit.
// Otherwise it can only be hashNode or valueNode.
if _, ok := cn.Val.(*fullNode); ok {
childV, err := c.commit(cn.Val, db)
if err != nil {
return nil, err
}
collapsed.Val = childV
}
// The key needs to be copied, since we're delivering it to database
collapsed.Key = hexToCompact(cn.Key)
hashedNode := c.store(collapsed, db)
if hn, ok := hashedNode.(hashNode); ok {
return hn, nil
}
return collapsed, nil
case *fullNode:
hashedKids, err := c.commitChildren(cn, db)
if err != nil {
return nil, err
}
collapsed := cn.copy()
collapsed.Children = hashedKids
hashedNode := c.store(collapsed, db)
if hn, ok := hashedNode.(hashNode); ok {
return hn, nil
}
return collapsed, nil
case hashNode:
return cn, nil
default:
// nil, valuenode shouldn't be committed
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
}
}
// commitChildren commits the children of the given fullnode
func (c *committer) commitChildren(n *fullNode, db *Database) ([17]node, error) {
var children [17]node
for i := 0; i < 16; i++ {
child := n.Children[i]
if child == nil {
continue
}
// If it's the hashed child, save the hash value directly.
// Note: it's impossible that the child in range [0, 15]
// is a valuenode.
if hn, ok := child.(hashNode); ok {
children[i] = hn
continue
}
// Commit the child recursively and store the "hashed" value.
// Note the returned node can be some embedded nodes, so it's
// possible the type is not hashnode.
hashed, err := c.commit(child, db)
if err != nil {
return children, err
}
children[i] = hashed
}
// For the 17th child, it's possible the type is valuenode.
if n.Children[16] != nil {
children[16] = n.Children[16]
}
return children, nil
}
// store hashes the node n and if we have a storage layer specified, it writes
// the key/value pair to it and tracks any node->child references as well as any
// node->external trie references.
func (c *committer) store(n node, db *Database) node {
// Larger nodes are replaced by their hash and stored in the database.
var (
hash, _ = n.cache()
size int
)
if hash == nil {
// This was not generated - must be a small node stored in the parent.
// In theory we should apply the leafCall here if it's not nil(embedded
// node usually contains value). But small value(less than 32bytes) is
// not our target.
return n
} else {
// We have the hash already, estimate the RLP encoding-size of the node.
// The size is used for mem tracking, does not need to be exact
size = estimateSize(n)
}
// If we're using channel-based leaf-reporting, send to channel.
// The leaf channel will be active only when there an active leaf-callback
if c.leafCh != nil {
c.leafCh <- &leaf{
size: size,
hash: common.BytesToHash(hash),
node: n,
}
} else if db != nil {
// No leaf-callback used, but there's still a database. Do serial
// insertion
db.lock.Lock()
db.insert(common.BytesToHash(hash), size, n)
db.lock.Unlock()
}
return hash
}
// commitLoop does the actual insert + leaf callback for nodes.
func (c *committer) commitLoop(db *Database) {
for item := range c.leafCh {
var (
hash = item.hash
size = item.size
n = item.node
)
// We are pooling the trie nodes into an intermediate memory cache
db.lock.Lock()
db.insert(hash, size, n)
db.lock.Unlock()
if c.onleaf != nil {
switch n := n.(type) {
case *shortNode:
if child, ok := n.Val.(valueNode); ok {
c.onleaf(nil, nil, child, hash)
}
case *fullNode:
// For children in range [0, 15], it's impossible
// to contain valuenode. Only check the 17th child.
if n.Children[16] != nil {
c.onleaf(nil, nil, n.Children[16].(valueNode), hash)
}
}
}
}
}
func (c *committer) makeHashNode(data []byte) hashNode {
n := make(hashNode, c.sha.Size())
c.sha.Reset()
c.sha.Write(data)
c.sha.Read(n)
return n
}
// estimateSize estimates the size of an rlp-encoded node, without actually
// rlp-encoding it (zero allocs). This method has been experimentally tried, and with a trie
// with 1000 leafs, the only errors above 1% are on small shortnodes, where this
// method overestimates by 2 or 3 bytes (e.g. 37 instead of 35)
func estimateSize(n node) int {
switch n := n.(type) {
case *shortNode:
// A short node contains a compacted key, and a value.
return 3 + len(n.Key) + estimateSize(n.Val)
case *fullNode:
// A full node contains up to 16 hashes (some nils), and a key
s := 3
for i := 0; i < 16; i++ {
if child := n.Children[i]; child != nil {
s += estimateSize(child)
} else {
s++
}
}
return s
case valueNode:
return 1 + len(n)
case hashNode:
return 1 + len(n)
default:
panic(fmt.Sprintf("node type %T", n))
}
}
// Copyright 2015 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"fmt"
"github.com/ethereum/go-ethereum/common"
)
// MissingNodeError is returned by the trie functions (TryGet, TryUpdate, TryDelete)
// in the case where a trie node is not present in the local database. It contains
// information necessary for retrieving the missing node.
type MissingNodeError struct {
NodeHash common.Hash // hash of the missing node
Path []byte // hex-encoded path to the missing node
}
func (err *MissingNodeError) Error() string {
return fmt.Sprintf("missing trie node %x (path %x)", err.NodeHash, err.Path)
}
...@@ -2,9 +2,9 @@ package trie ...@@ -2,9 +2,9 @@ package trie
import ( import (
"io" "io"
"sync"
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
) )
// rawNode is a simple binary blob used to differentiate between collapsed trie // rawNode is a simple binary blob used to differentiate between collapsed trie
...@@ -20,10 +20,25 @@ func (n rawNode) EncodeRLP(w io.Writer) error { ...@@ -20,10 +20,25 @@ func (n rawNode) EncodeRLP(w io.Writer) error {
return err return err
} }
var ( type Database struct {
// emptyRoot is the known root hash of an empty trie. lock sync.RWMutex
emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421") }
// emptyState is the known hash of an empty state trie entry. // Node retrieves an encoded cached trie node from memory. If it cannot be found
emptyState = crypto.Keccak256Hash(nil) // cached, the method queries the persistent database for the content.
) func (db *Database) Node(hash common.Hash) ([]byte, error) {
return []byte{}, nil
}
// node retrieves a cached trie node from memory, or returns nil if none can be
// found in the memory cache.
func (db *Database) node(hash common.Hash) node {
return nilValueNode
}
// insert inserts a collapsed trie node into the memory database.
// The blob size must be specified to allow proper size tracking.
// All nodes inserted by this function will be reference tracked
// and in theory should only used for **trie nodes** insertion.
func (db *Database) insert(hash common.Hash, size int, node node) {
}
// Copyright 2014 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
"bytes"
"container/heap"
"errors"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/rlp"
)
// Iterator is a key-value trie iterator that traverses a Trie.
type Iterator struct {
nodeIt NodeIterator
Key []byte // Current data key on which the iterator is positioned on
Value []byte // Current data value on which the iterator is positioned on
Err error
}
// NewIterator creates a new key-value iterator from a node iterator.
// Note that the value returned by the iterator is raw. If the content is encoded
// (e.g. storage value is RLP-encoded), it's caller's duty to decode it.
func NewIterator(it NodeIterator) *Iterator {
return &Iterator{
nodeIt: it,
}
}
// Next moves the iterator forward one key-value entry.
func (it *Iterator) Next() bool {
for it.nodeIt.Next(true) {
if it.nodeIt.Leaf() {
it.Key = it.nodeIt.LeafKey()
it.Value = it.nodeIt.LeafBlob()
return true
}
}
it.Key = nil
it.Value = nil
it.Err = it.nodeIt.Error()
return false
}
// Prove generates the Merkle proof for the leaf node the iterator is currently
// positioned on.
func (it *Iterator) Prove() [][]byte {
return it.nodeIt.LeafProof()
}
// NodeIterator is an iterator to traverse the trie pre-order.
type NodeIterator interface {
// Next moves the iterator to the next node. If the parameter is false, any child
// nodes will be skipped.
Next(bool) bool
// Error returns the error status of the iterator.
Error() error
// Hash returns the hash of the current node.
Hash() common.Hash
// Parent returns the hash of the parent of the current node. The hash may be the one
// grandparent if the immediate parent is an internal node with no hash.
Parent() common.Hash
// Path returns the hex-encoded path to the current node.
// Callers must not retain references to the return value after calling Next.
// For leaf nodes, the last element of the path is the 'terminator symbol' 0x10.
Path() []byte
// Leaf returns true iff the current node is a leaf node.
Leaf() bool
// LeafKey returns the key of the leaf. The method panics if the iterator is not
// positioned at a leaf. Callers must not retain references to the value after
// calling Next.
LeafKey() []byte
// LeafBlob returns the content of the leaf. The method panics if the iterator
// is not positioned at a leaf. Callers must not retain references to the value
// after calling Next.
LeafBlob() []byte
// LeafProof returns the Merkle proof of the leaf. The method panics if the
// iterator is not positioned at a leaf. Callers must not retain references
// to the value after calling Next.
LeafProof() [][]byte
// AddResolver sets an intermediate database to use for looking up trie nodes
// before reaching into the real persistent layer.
//
// This is not required for normal operation, rather is an optimization for
// cases where trie nodes can be recovered from some external mechanism without
// reading from disk. In those cases, this resolver allows short circuiting
// accesses and returning them from memory.
//
// Before adding a similar mechanism to any other place in Geth, consider
// making trie.Database an interface and wrapping at that level. It's a huge
// refactor, but it could be worth it if another occurrence arises.
AddResolver(ethdb.KeyValueStore)
}
// nodeIteratorState represents the iteration state at one particular node of the
// trie, which can be resumed at a later invocation.
type nodeIteratorState struct {
hash common.Hash // Hash of the node being iterated (nil if not standalone)
node node // Trie node being iterated
parent common.Hash // Hash of the first full ancestor node (nil if current is the root)
index int // Child to be processed next
pathlen int // Length of the path to this node
}
type nodeIterator struct {
trie *Trie // Trie being iterated
stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state
path []byte // Path to the current node
err error // Failure set in case of an internal error in the iterator
resolver ethdb.KeyValueStore // Optional intermediate resolver above the disk layer
}
// errIteratorEnd is stored in nodeIterator.err when iteration is done.
var errIteratorEnd = errors.New("end of iteration")
// seekError is stored in nodeIterator.err if the initial seek has failed.
type seekError struct {
key []byte
err error
}
func (e seekError) Error() string {
return "seek error: " + e.err.Error()
}
func newNodeIterator(trie *Trie, start []byte) NodeIterator {
if trie.Hash() == emptyState {
return new(nodeIterator)
}
it := &nodeIterator{trie: trie}
it.err = it.seek(start)
return it
}
func (it *nodeIterator) AddResolver(resolver ethdb.KeyValueStore) {
it.resolver = resolver
}
func (it *nodeIterator) Hash() common.Hash {
if len(it.stack) == 0 {
return common.Hash{}
}
return it.stack[len(it.stack)-1].hash
}
func (it *nodeIterator) Parent() common.Hash {
if len(it.stack) == 0 {
return common.Hash{}
}
return it.stack[len(it.stack)-1].parent
}
func (it *nodeIterator) Leaf() bool {
return hasTerm(it.path)
}
func (it *nodeIterator) LeafKey() []byte {
if len(it.stack) > 0 {
if _, ok := it.stack[len(it.stack)-1].node.(valueNode); ok {
return hexToKeybytes(it.path)
}
}
panic("not at leaf")
}
func (it *nodeIterator) LeafBlob() []byte {
if len(it.stack) > 0 {
if node, ok := it.stack[len(it.stack)-1].node.(valueNode); ok {
return node
}
}
panic("not at leaf")
}
func (it *nodeIterator) LeafProof() [][]byte {
if len(it.stack) > 0 {
if _, ok := it.stack[len(it.stack)-1].node.(valueNode); ok {
hasher := newHasher(false)
defer returnHasherToPool(hasher)
proofs := make([][]byte, 0, len(it.stack))
for i, item := range it.stack[:len(it.stack)-1] {
// Gather nodes that end up as hash nodes (or the root)
node, hashed := hasher.proofHash(item.node)
if _, ok := hashed.(hashNode); ok || i == 0 {
enc, _ := rlp.EncodeToBytes(node)
proofs = append(proofs, enc)
}
}
return proofs
}
}
panic("not at leaf")
}
func (it *nodeIterator) Path() []byte {
return it.path
}
func (it *nodeIterator) Error() error {
if it.err == errIteratorEnd {
return nil
}
if seek, ok := it.err.(seekError); ok {
return seek.err
}
return it.err
}
// Next moves the iterator to the next node, returning whether there are any
// further nodes. In case of an internal error this method returns false and
// sets the Error field to the encountered failure. If `descend` is false,
// skips iterating over any subnodes of the current node.
func (it *nodeIterator) Next(descend bool) bool {
if it.err == errIteratorEnd {
return false
}
if seek, ok := it.err.(seekError); ok {
if it.err = it.seek(seek.key); it.err != nil {
return false
}
}
// Otherwise step forward with the iterator and report any errors.
state, parentIndex, path, err := it.peek(descend)
it.err = err
if it.err != nil {
return false
}
it.push(state, parentIndex, path)
return true
}
func (it *nodeIterator) seek(prefix []byte) error {
// The path we're looking for is the hex encoded key without terminator.
key := keybytesToHex(prefix)
key = key[:len(key)-1]
// Move forward until we're just before the closest match to key.
for {
state, parentIndex, path, err := it.peekSeek(key)
if err == errIteratorEnd {
return errIteratorEnd
} else if err != nil {
return seekError{prefix, err}
} else if bytes.Compare(path, key) >= 0 {
return nil
}
it.push(state, parentIndex, path)
}
}
// init initializes the the iterator.
func (it *nodeIterator) init() (*nodeIteratorState, error) {
root := it.trie.Hash()
state := &nodeIteratorState{node: it.trie.root, index: -1}
if root != emptyRoot {
state.hash = root
}
return state, state.resolve(it, nil)
}
// peek creates the next state of the iterator.
func (it *nodeIterator) peek(descend bool) (*nodeIteratorState, *int, []byte, error) {
// Initialize the iterator if we've just started.
if len(it.stack) == 0 {
state, err := it.init()
return state, nil, nil, err
}
if !descend {
// If we're skipping children, pop the current node first
it.pop()
}
// Continue iteration to the next child
for len(it.stack) > 0 {
parent := it.stack[len(it.stack)-1]
ancestor := parent.hash
if (ancestor == common.Hash{}) {
ancestor = parent.parent
}
state, path, ok := it.nextChild(parent, ancestor)
if ok {
if err := state.resolve(it, path); err != nil {
return parent, &parent.index, path, err
}
return state, &parent.index, path, nil
}
// No more child nodes, move back up.
it.pop()
}
return nil, nil, nil, errIteratorEnd
}
// peekSeek is like peek, but it also tries to skip resolving hashes by skipping
// over the siblings that do not lead towards the desired seek position.
func (it *nodeIterator) peekSeek(seekKey []byte) (*nodeIteratorState, *int, []byte, error) {
// Initialize the iterator if we've just started.
if len(it.stack) == 0 {
state, err := it.init()
return state, nil, nil, err
}
if !bytes.HasPrefix(seekKey, it.path) {
// If we're skipping children, pop the current node first
it.pop()
}
// Continue iteration to the next child
for len(it.stack) > 0 {
parent := it.stack[len(it.stack)-1]
ancestor := parent.hash
if (ancestor == common.Hash{}) {
ancestor = parent.parent
}
state, path, ok := it.nextChildAt(parent, ancestor, seekKey)
if ok {
if err := state.resolve(it, path); err != nil {
return parent, &parent.index, path, err
}
return state, &parent.index, path, nil
}
// No more child nodes, move back up.
it.pop()
}
return nil, nil, nil, errIteratorEnd
}
func (it *nodeIterator) resolveHash(hash hashNode, path []byte) (node, error) {
if it.resolver != nil {
if blob, err := it.resolver.Get(hash); err == nil && len(blob) > 0 {
if resolved, err := decodeNode(hash, blob); err == nil {
return resolved, nil
}
}
}
resolved, err := it.trie.resolveHash(hash, path)
return resolved, err
}
func (st *nodeIteratorState) resolve(it *nodeIterator, path []byte) error {
if hash, ok := st.node.(hashNode); ok {
resolved, err := it.resolveHash(hash, path)
if err != nil {
return err
}
st.node = resolved
st.hash = common.BytesToHash(hash)
}
return nil
}
func findChild(n *fullNode, index int, path []byte, ancestor common.Hash) (node, *nodeIteratorState, []byte, int) {
var (
child node
state *nodeIteratorState
childPath []byte
)
for ; index < len(n.Children); index++ {
if n.Children[index] != nil {
child = n.Children[index]
hash, _ := child.cache()
state = &nodeIteratorState{
hash: common.BytesToHash(hash),
node: child,
parent: ancestor,
index: -1,
pathlen: len(path),
}
childPath = append(childPath, path...)
childPath = append(childPath, byte(index))
return child, state, childPath, index
}
}
return nil, nil, nil, 0
}
func (it *nodeIterator) nextChild(parent *nodeIteratorState, ancestor common.Hash) (*nodeIteratorState, []byte, bool) {
switch node := parent.node.(type) {
case *fullNode:
//Full node, move to the first non-nil child.
if child, state, path, index := findChild(node, parent.index+1, it.path, ancestor); child != nil {
parent.index = index - 1
return state, path, true
}
case *shortNode:
// Short node, return the pointer singleton child
if parent.index < 0 {
hash, _ := node.Val.cache()
state := &nodeIteratorState{
hash: common.BytesToHash(hash),
node: node.Val,
parent: ancestor,
index: -1,
pathlen: len(it.path),
}
path := append(it.path, node.Key...)
return state, path, true
}
}
return parent, it.path, false
}
// nextChildAt is similar to nextChild, except that it targets a child as close to the
// target key as possible, thus skipping siblings.
func (it *nodeIterator) nextChildAt(parent *nodeIteratorState, ancestor common.Hash, key []byte) (*nodeIteratorState, []byte, bool) {
switch n := parent.node.(type) {
case *fullNode:
// Full node, move to the first non-nil child before the desired key position
child, state, path, index := findChild(n, parent.index+1, it.path, ancestor)
if child == nil {
// No more children in this fullnode
return parent, it.path, false
}
// If the child we found is already past the seek position, just return it.
if bytes.Compare(path, key) >= 0 {
parent.index = index - 1
return state, path, true
}
// The child is before the seek position. Try advancing
for {
nextChild, nextState, nextPath, nextIndex := findChild(n, index+1, it.path, ancestor)
// If we run out of children, or skipped past the target, return the
// previous one
if nextChild == nil || bytes.Compare(nextPath, key) >= 0 {
parent.index = index - 1
return state, path, true
}
// We found a better child closer to the target
state, path, index = nextState, nextPath, nextIndex
}
case *shortNode:
// Short node, return the pointer singleton child
if parent.index < 0 {
hash, _ := n.Val.cache()
state := &nodeIteratorState{
hash: common.BytesToHash(hash),
node: n.Val,
parent: ancestor,
index: -1,
pathlen: len(it.path),
}
path := append(it.path, n.Key...)
return state, path, true
}
}
return parent, it.path, false
}
func (it *nodeIterator) push(state *nodeIteratorState, parentIndex *int, path []byte) {
it.path = path
it.stack = append(it.stack, state)
if parentIndex != nil {
*parentIndex++
}
}
func (it *nodeIterator) pop() {
parent := it.stack[len(it.stack)-1]
it.path = it.path[:parent.pathlen]
it.stack = it.stack[:len(it.stack)-1]
}
func compareNodes(a, b NodeIterator) int {
if cmp := bytes.Compare(a.Path(), b.Path()); cmp != 0 {
return cmp
}
if a.Leaf() && !b.Leaf() {
return -1
} else if b.Leaf() && !a.Leaf() {
return 1
}
if cmp := bytes.Compare(a.Hash().Bytes(), b.Hash().Bytes()); cmp != 0 {
return cmp
}
if a.Leaf() && b.Leaf() {
return bytes.Compare(a.LeafBlob(), b.LeafBlob())
}
return 0
}
type differenceIterator struct {
a, b NodeIterator // Nodes returned are those in b - a.
eof bool // Indicates a has run out of elements
count int // Number of nodes scanned on either trie
}
// NewDifferenceIterator constructs a NodeIterator that iterates over elements in b that
// are not in a. Returns the iterator, and a pointer to an integer recording the number
// of nodes seen.
func NewDifferenceIterator(a, b NodeIterator) (NodeIterator, *int) {
a.Next(true)
it := &differenceIterator{
a: a,
b: b,
}
return it, &it.count
}
func (it *differenceIterator) Hash() common.Hash {
return it.b.Hash()
}
func (it *differenceIterator) Parent() common.Hash {
return it.b.Parent()
}
func (it *differenceIterator) Leaf() bool {
return it.b.Leaf()
}
func (it *differenceIterator) LeafKey() []byte {
return it.b.LeafKey()
}
func (it *differenceIterator) LeafBlob() []byte {
return it.b.LeafBlob()
}
func (it *differenceIterator) LeafProof() [][]byte {
return it.b.LeafProof()
}
func (it *differenceIterator) Path() []byte {
return it.b.Path()
}
func (it *differenceIterator) AddResolver(resolver ethdb.KeyValueStore) {
panic("not implemented")
}
func (it *differenceIterator) Next(bool) bool {
// Invariants:
// - We always advance at least one element in b.
// - At the start of this function, a's path is lexically greater than b's.
if !it.b.Next(true) {
return false
}
it.count++
if it.eof {
// a has reached eof, so we just return all elements from b
return true
}
for {
switch compareNodes(it.a, it.b) {
case -1:
// b jumped past a; advance a
if !it.a.Next(true) {
it.eof = true
return true
}
it.count++
case 1:
// b is before a
return true
case 0:
// a and b are identical; skip this whole subtree if the nodes have hashes
hasHash := it.a.Hash() == common.Hash{}
if !it.b.Next(hasHash) {
return false
}
it.count++
if !it.a.Next(hasHash) {
it.eof = true
return true
}
it.count++
}
}
}
func (it *differenceIterator) Error() error {
if err := it.a.Error(); err != nil {
return err
}
return it.b.Error()
}
type nodeIteratorHeap []NodeIterator
func (h nodeIteratorHeap) Len() int { return len(h) }
func (h nodeIteratorHeap) Less(i, j int) bool { return compareNodes(h[i], h[j]) < 0 }
func (h nodeIteratorHeap) Swap(i, j int) { h[i], h[j] = h[j], h[i] }
func (h *nodeIteratorHeap) Push(x interface{}) { *h = append(*h, x.(NodeIterator)) }
func (h *nodeIteratorHeap) Pop() interface{} {
n := len(*h)
x := (*h)[n-1]
*h = (*h)[0 : n-1]
return x
}
type unionIterator struct {
items *nodeIteratorHeap // Nodes returned are the union of the ones in these iterators
count int // Number of nodes scanned across all tries
}
// NewUnionIterator constructs a NodeIterator that iterates over elements in the union
// of the provided NodeIterators. Returns the iterator, and a pointer to an integer
// recording the number of nodes visited.
func NewUnionIterator(iters []NodeIterator) (NodeIterator, *int) {
h := make(nodeIteratorHeap, len(iters))
copy(h, iters)
heap.Init(&h)
ui := &unionIterator{items: &h}
return ui, &ui.count
}
func (it *unionIterator) Hash() common.Hash {
return (*it.items)[0].Hash()
}
func (it *unionIterator) Parent() common.Hash {
return (*it.items)[0].Parent()
}
func (it *unionIterator) Leaf() bool {
return (*it.items)[0].Leaf()
}
func (it *unionIterator) LeafKey() []byte {
return (*it.items)[0].LeafKey()
}
func (it *unionIterator) LeafBlob() []byte {
return (*it.items)[0].LeafBlob()
}
func (it *unionIterator) LeafProof() [][]byte {
return (*it.items)[0].LeafProof()
}
func (it *unionIterator) Path() []byte {
return (*it.items)[0].Path()
}
func (it *unionIterator) AddResolver(resolver ethdb.KeyValueStore) {
panic("not implemented")
}
// Next returns the next node in the union of tries being iterated over.
//
// It does this by maintaining a heap of iterators, sorted by the iteration
// order of their next elements, with one entry for each source trie. Each
// time Next() is called, it takes the least element from the heap to return,
// advancing any other iterators that also point to that same element. These
// iterators are called with descend=false, since we know that any nodes under
// these nodes will also be duplicates, found in the currently selected iterator.
// Whenever an iterator is advanced, it is pushed back into the heap if it still
// has elements remaining.
//
// In the case that descend=false - eg, we're asked to ignore all subnodes of the
// current node - we also advance any iterators in the heap that have the current
// path as a prefix.
func (it *unionIterator) Next(descend bool) bool {
if len(*it.items) == 0 {
return false
}
// Get the next key from the union
least := heap.Pop(it.items).(NodeIterator)
// Skip over other nodes as long as they're identical, or, if we're not descending, as
// long as they have the same prefix as the current node.
for len(*it.items) > 0 && ((!descend && bytes.HasPrefix((*it.items)[0].Path(), least.Path())) || compareNodes(least, (*it.items)[0]) == 0) {
skipped := heap.Pop(it.items).(NodeIterator)
// Skip the whole subtree if the nodes have hashes; otherwise just skip this node
if skipped.Next(skipped.Hash() == common.Hash{}) {
it.count++
// If there are more elements, push the iterator back on the heap
heap.Push(it.items, skipped)
}
}
if least.Next(descend) {
it.count++
heap.Push(it.items, least)
}
return len(*it.items) > 0
}
func (it *unionIterator) Error() error {
for i := 0; i < len(*it.items); i++ {
if err := (*it.items)[i].Error(); err != nil {
return err
}
}
return nil
}
// Copyright 2014 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Package trie implements Merkle Patricia Tries.
package trie
import (
"bytes"
"errors"
"fmt"
"sync"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/crypto"
"github.com/ethereum/go-ethereum/log"
)
var (
// emptyRoot is the known root hash of an empty trie.
emptyRoot = common.HexToHash("56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421")
// emptyState is the known hash of an empty state trie entry.
emptyState = crypto.Keccak256Hash(nil)
)
// LeafCallback is a callback type invoked when a trie operation reaches a leaf
// node.
//
// The paths is a path tuple identifying a particular trie node either in a single
// trie (account) or a layered trie (account -> storage). Each path in the tuple
// is in the raw format(32 bytes).
//
// The hexpath is a composite hexary path identifying the trie node. All the key
// bytes are converted to the hexary nibbles and composited with the parent path
// if the trie node is in a layered trie.
//
// It's used by state sync and commit to allow handling external references
// between account and storage tries. And also it's used in the state healing
// for extracting the raw states(leaf nodes) with corresponding paths.
type LeafCallback func(paths [][]byte, hexpath []byte, leaf []byte, parent common.Hash) error
// Trie is a Merkle Patricia Trie.
// The zero value is an empty trie with no database.
// Use New to create a trie that sits on top of a database.
//
// Trie is not safe for concurrent use.
type Trie struct {
db *Database
root node
// Keep track of the number leafs which have been inserted since the last
// hashing operation. This number will not directly map to the number of
// actually unhashed nodes
unhashed int
}
// newFlag returns the cache flag value for a newly created node.
func (t *Trie) newFlag() nodeFlag {
return nodeFlag{dirty: true}
}
// New creates a trie with an existing root node from db.
//
// If root is the zero hash or the sha3 hash of an empty string, the
// trie is initially empty and does not require a database. Otherwise,
// New will panic if db is nil and returns a MissingNodeError if root does
// not exist in the database. Accessing the trie loads nodes from db on demand.
func New(root common.Hash, db *Database) (*Trie, error) {
if db == nil {
panic("trie.New called without a database")
}
trie := &Trie{
db: db,
}
if root != (common.Hash{}) && root != emptyRoot {
rootnode, err := trie.resolveHash(root[:], nil)
if err != nil {
return nil, err
}
trie.root = rootnode
}
return trie, nil
}
// NodeIterator returns an iterator that returns nodes of the trie. Iteration starts at
// the key after the given start key.
func (t *Trie) NodeIterator(start []byte) NodeIterator {
return newNodeIterator(t, start)
}
// Get returns the value for key stored in the trie.
// The value bytes must not be modified by the caller.
func (t *Trie) Get(key []byte) []byte {
res, err := t.TryGet(key)
if err != nil {
log.Error(fmt.Sprintf("Unhandled trie error: %v", err))
}
return res
}
// TryGet returns the value for key stored in the trie.
// The value bytes must not be modified by the caller.
// If a node was not found in the database, a MissingNodeError is returned.
func (t *Trie) TryGet(key []byte) ([]byte, error) {
value, newroot, didResolve, err := t.tryGet(t.root, keybytesToHex(key), 0)
if err == nil && didResolve {
t.root = newroot
}
return value, err
}
func (t *Trie) tryGet(origNode node, key []byte, pos int) (value []byte, newnode node, didResolve bool, err error) {
switch n := (origNode).(type) {
case nil:
return nil, nil, false, nil
case valueNode:
return n, n, false, nil
case *shortNode:
if len(key)-pos < len(n.Key) || !bytes.Equal(n.Key, key[pos:pos+len(n.Key)]) {
// key not found in trie
return nil, n, false, nil
}
value, newnode, didResolve, err = t.tryGet(n.Val, key, pos+len(n.Key))
if err == nil && didResolve {
n = n.copy()
n.Val = newnode
}
return value, n, didResolve, err
case *fullNode:
value, newnode, didResolve, err = t.tryGet(n.Children[key[pos]], key, pos+1)
if err == nil && didResolve {
n = n.copy()
n.Children[key[pos]] = newnode
}
return value, n, didResolve, err
case hashNode:
child, err := t.resolveHash(n, key[:pos])
if err != nil {
return nil, n, true, err
}
value, newnode, _, err := t.tryGet(child, key, pos)
return value, newnode, true, err
default:
panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode))
}
}
// TryGetNode attempts to retrieve a trie node by compact-encoded path. It is not
// possible to use keybyte-encoding as the path might contain odd nibbles.
func (t *Trie) TryGetNode(path []byte) ([]byte, int, error) {
item, newroot, resolved, err := t.tryGetNode(t.root, compactToHex(path), 0)
if err != nil {
return nil, resolved, err
}
if resolved > 0 {
t.root = newroot
}
if item == nil {
return nil, resolved, nil
}
return item, resolved, err
}
func (t *Trie) tryGetNode(origNode node, path []byte, pos int) (item []byte, newnode node, resolved int, err error) {
// If we reached the requested path, return the current node
if pos >= len(path) {
// Although we most probably have the original node expanded, encoding
// that into consensus form can be nasty (needs to cascade down) and
// time consuming. Instead, just pull the hash up from disk directly.
var hash hashNode
if node, ok := origNode.(hashNode); ok {
hash = node
} else {
hash, _ = origNode.cache()
}
if hash == nil {
return nil, origNode, 0, errors.New("non-consensus node")
}
blob, err := t.db.Node(common.BytesToHash(hash))
return blob, origNode, 1, err
}
// Path still needs to be traversed, descend into children
switch n := (origNode).(type) {
case nil:
// Non-existent path requested, abort
return nil, nil, 0, nil
case valueNode:
// Path prematurely ended, abort
return nil, nil, 0, nil
case *shortNode:
if len(path)-pos < len(n.Key) || !bytes.Equal(n.Key, path[pos:pos+len(n.Key)]) {
// Path branches off from short node
return nil, n, 0, nil
}
item, newnode, resolved, err = t.tryGetNode(n.Val, path, pos+len(n.Key))
if err == nil && resolved > 0 {
n = n.copy()
n.Val = newnode
}
return item, n, resolved, err
case *fullNode:
item, newnode, resolved, err = t.tryGetNode(n.Children[path[pos]], path, pos+1)
if err == nil && resolved > 0 {
n = n.copy()
n.Children[path[pos]] = newnode
}
return item, n, resolved, err
case hashNode:
child, err := t.resolveHash(n, path[:pos])
if err != nil {
return nil, n, 1, err
}
item, newnode, resolved, err := t.tryGetNode(child, path, pos)
return item, newnode, resolved + 1, err
default:
panic(fmt.Sprintf("%T: invalid node: %v", origNode, origNode))
}
}
// Update associates key with value in the trie. Subsequent calls to
// Get will return value. If value has length zero, any existing value
// is deleted from the trie and calls to Get will return nil.
//
// The value bytes must not be modified by the caller while they are
// stored in the trie.
func (t *Trie) Update(key, value []byte) {
if err := t.TryUpdate(key, value); err != nil {
log.Error(fmt.Sprintf("Unhandled trie error: %v", err))
}
}
// TryUpdate associates key with value in the trie. Subsequent calls to
// Get will return value. If value has length zero, any existing value
// is deleted from the trie and calls to Get will return nil.
//
// The value bytes must not be modified by the caller while they are
// stored in the trie.
//
// If a node was not found in the database, a MissingNodeError is returned.
func (t *Trie) TryUpdate(key, value []byte) error {
t.unhashed++
k := keybytesToHex(key)
if len(value) != 0 {
_, n, err := t.insert(t.root, nil, k, valueNode(value))
if err != nil {
return err
}
t.root = n
} else {
_, n, err := t.delete(t.root, nil, k)
if err != nil {
return err
}
t.root = n
}
return nil
}
func (t *Trie) insert(n node, prefix, key []byte, value node) (bool, node, error) {
if len(key) == 0 {
if v, ok := n.(valueNode); ok {
return !bytes.Equal(v, value.(valueNode)), value, nil
}
return true, value, nil
}
switch n := n.(type) {
case *shortNode:
matchlen := prefixLen(key, n.Key)
// If the whole key matches, keep this short node as is
// and only update the value.
if matchlen == len(n.Key) {
dirty, nn, err := t.insert(n.Val, append(prefix, key[:matchlen]...), key[matchlen:], value)
if !dirty || err != nil {
return false, n, err
}
return true, &shortNode{n.Key, nn, t.newFlag()}, nil
}
// Otherwise branch out at the index where they differ.
branch := &fullNode{flags: t.newFlag()}
var err error
_, branch.Children[n.Key[matchlen]], err = t.insert(nil, append(prefix, n.Key[:matchlen+1]...), n.Key[matchlen+1:], n.Val)
if err != nil {
return false, nil, err
}
_, branch.Children[key[matchlen]], err = t.insert(nil, append(prefix, key[:matchlen+1]...), key[matchlen+1:], value)
if err != nil {
return false, nil, err
}
// Replace this shortNode with the branch if it occurs at index 0.
if matchlen == 0 {
return true, branch, nil
}
// Otherwise, replace it with a short node leading up to the branch.
return true, &shortNode{key[:matchlen], branch, t.newFlag()}, nil
case *fullNode:
dirty, nn, err := t.insert(n.Children[key[0]], append(prefix, key[0]), key[1:], value)
if !dirty || err != nil {
return false, n, err
}
n = n.copy()
n.flags = t.newFlag()
n.Children[key[0]] = nn
return true, n, nil
case nil:
return true, &shortNode{key, value, t.newFlag()}, nil
case hashNode:
// We've hit a part of the trie that isn't loaded yet. Load
// the node and insert into it. This leaves all child nodes on
// the path to the value in the trie.
rn, err := t.resolveHash(n, prefix)
if err != nil {
return false, nil, err
}
dirty, nn, err := t.insert(rn, prefix, key, value)
if !dirty || err != nil {
return false, rn, err
}
return true, nn, nil
default:
panic(fmt.Sprintf("%T: invalid node: %v", n, n))
}
}
// Delete removes any existing value for key from the trie.
func (t *Trie) Delete(key []byte) {
if err := t.TryDelete(key); err != nil {
log.Error(fmt.Sprintf("Unhandled trie error: %v", err))
}
}
// TryDelete removes any existing value for key from the trie.
// If a node was not found in the database, a MissingNodeError is returned.
func (t *Trie) TryDelete(key []byte) error {
t.unhashed++
k := keybytesToHex(key)
_, n, err := t.delete(t.root, nil, k)
if err != nil {
return err
}
t.root = n
return nil
}
// delete returns the new root of the trie with key deleted.
// It reduces the trie to minimal form by simplifying
// nodes on the way up after deleting recursively.
func (t *Trie) delete(n node, prefix, key []byte) (bool, node, error) {
switch n := n.(type) {
case *shortNode:
matchlen := prefixLen(key, n.Key)
if matchlen < len(n.Key) {
return false, n, nil // don't replace n on mismatch
}
if matchlen == len(key) {
return true, nil, nil // remove n entirely for whole matches
}
// The key is longer than n.Key. Remove the remaining suffix
// from the subtrie. Child can never be nil here since the
// subtrie must contain at least two other values with keys
// longer than n.Key.
dirty, child, err := t.delete(n.Val, append(prefix, key[:len(n.Key)]...), key[len(n.Key):])
if !dirty || err != nil {
return false, n, err
}
switch child := child.(type) {
case *shortNode:
// Deleting from the subtrie reduced it to another
// short node. Merge the nodes to avoid creating a
// shortNode{..., shortNode{...}}. Use concat (which
// always creates a new slice) instead of append to
// avoid modifying n.Key since it might be shared with
// other nodes.
return true, &shortNode{concat(n.Key, child.Key...), child.Val, t.newFlag()}, nil
default:
return true, &shortNode{n.Key, child, t.newFlag()}, nil
}
case *fullNode:
dirty, nn, err := t.delete(n.Children[key[0]], append(prefix, key[0]), key[1:])
if !dirty || err != nil {
return false, n, err
}
n = n.copy()
n.flags = t.newFlag()
n.Children[key[0]] = nn
// Because n is a full node, it must've contained at least two children
// before the delete operation. If the new child value is non-nil, n still
// has at least two children after the deletion, and cannot be reduced to
// a short node.
if nn != nil {
return true, n, nil
}
// Reduction:
// Check how many non-nil entries are left after deleting and
// reduce the full node to a short node if only one entry is
// left. Since n must've contained at least two children
// before deletion (otherwise it would not be a full node) n
// can never be reduced to nil.
//
// When the loop is done, pos contains the index of the single
// value that is left in n or -2 if n contains at least two
// values.
pos := -1
for i, cld := range &n.Children {
if cld != nil {
if pos == -1 {
pos = i
} else {
pos = -2
break
}
}
}
if pos >= 0 {
if pos != 16 {
// If the remaining entry is a short node, it replaces
// n and its key gets the missing nibble tacked to the
// front. This avoids creating an invalid
// shortNode{..., shortNode{...}}. Since the entry
// might not be loaded yet, resolve it just for this
// check.
cnode, err := t.resolve(n.Children[pos], prefix)
if err != nil {
return false, nil, err
}
if cnode, ok := cnode.(*shortNode); ok {
k := append([]byte{byte(pos)}, cnode.Key...)
return true, &shortNode{k, cnode.Val, t.newFlag()}, nil
}
}
// Otherwise, n is replaced by a one-nibble short node
// containing the child.
return true, &shortNode{[]byte{byte(pos)}, n.Children[pos], t.newFlag()}, nil
}
// n still contains at least two values and cannot be reduced.
return true, n, nil
case valueNode:
return true, nil, nil
case nil:
return false, nil, nil
case hashNode:
// We've hit a part of the trie that isn't loaded yet. Load
// the node and delete from it. This leaves all child nodes on
// the path to the value in the trie.
rn, err := t.resolveHash(n, prefix)
if err != nil {
return false, nil, err
}
dirty, nn, err := t.delete(rn, prefix, key)
if !dirty || err != nil {
return false, rn, err
}
return true, nn, nil
default:
panic(fmt.Sprintf("%T: invalid node: %v (%v)", n, n, key))
}
}
func concat(s1 []byte, s2 ...byte) []byte {
r := make([]byte, len(s1)+len(s2))
copy(r, s1)
copy(r[len(s1):], s2)
return r
}
func (t *Trie) resolve(n node, prefix []byte) (node, error) {
if n, ok := n.(hashNode); ok {
return t.resolveHash(n, prefix)
}
return n, nil
}
func (t *Trie) resolveHash(n hashNode, prefix []byte) (node, error) {
hash := common.BytesToHash(n)
if node := t.db.node(hash); node != nil {
return node, nil
}
return nil, &MissingNodeError{NodeHash: hash, Path: prefix}
}
// Hash returns the root hash of the trie. It does not write to the
// database and can be used even if the trie doesn't have one.
func (t *Trie) Hash() common.Hash {
hash, cached, _ := t.hashRoot()
t.root = cached
return common.BytesToHash(hash.(hashNode))
}
// Commit writes all nodes to the trie's memory database, tracking the internal
// and external (for account tries) references.
func (t *Trie) Commit(onleaf LeafCallback) (root common.Hash, err error) {
if t.db == nil {
panic("commit called on trie with nil database")
}
if t.root == nil {
return emptyRoot, nil
}
// Derive the hash for all dirty nodes first. We hold the assumption
// in the following procedure that all nodes are hashed.
rootHash := t.Hash()
h := newCommitter()
defer returnCommitterToPool(h)
// Do a quick check if we really need to commit, before we spin
// up goroutines. This can happen e.g. if we load a trie for reading storage
// values, but don't write to it.
if _, dirty := t.root.cache(); !dirty {
return rootHash, nil
}
var wg sync.WaitGroup
if onleaf != nil {
h.onleaf = onleaf
h.leafCh = make(chan *leaf, leafChanSize)
wg.Add(1)
go func() {
defer wg.Done()
h.commitLoop(t.db)
}()
}
var newRoot hashNode
newRoot, err = h.Commit(t.root, t.db)
if onleaf != nil {
// The leafch is created in newCommitter if there was an onleaf callback
// provided. The commitLoop only _reads_ from it, and the commit
// operation was the sole writer. Therefore, it's safe to close this
// channel here.
close(h.leafCh)
wg.Wait()
}
if err != nil {
return common.Hash{}, err
}
t.root = newRoot
return rootHash, nil
}
// hashRoot calculates the root hash of the given trie
func (t *Trie) hashRoot() (node, node, error) {
if t.root == nil {
return hashNode(emptyRoot.Bytes()), nil, nil
}
// If the number of changes is below 100, we let one thread handle it
h := newHasher(t.unhashed >= 100)
defer returnHasherToPool(h)
hashed, cached := h.hash(t.root, true)
t.unhashed = 0
return hashed, cached, nil
}
// Reset drops the referenced root node and cleans all internal state.
func (t *Trie) Reset() {
t.root = nil
t.unhashed = 0
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment