Commit 4254a3e3 authored by acud's avatar acud Committed by GitHub

bmt: import bmt repo (#1520)

parent 1b83c2e5
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bmt
import (
"hash"
)
const (
SpanSize = 8
)
// Hash provides the necessary extension of the hash interface to add the length-prefix of the BMT hash.
//
// Any implementation should make it possible to generate a BMT hash using the hash.Hash interface only.
// However, the limitation will be that the Span of the BMT hash always must be limited to the amount of bytes actually written.
type Hash interface {
hash.Hash
// SetSpan sets the length prefix of BMT hash.
SetSpan(int64) error
// SetSpanBytes sets the length prefix of BMT hash in byte form.
SetSpanBytes([]byte) error
// Capacity returns the maximum amount of bytes that will be processed by the implementation.
Capacity() int
// WriteSection writes to a specific section of the data to be hashed.
WriteSection(idx int, data []byte) error
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Command generate_legacy generates bmt hashes of sequential byte inputs
// for every possible length of legacy bmt hasher
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"github.com/ethersphere/bee/pkg/bmt/legacy"
"gitlab.com/nolash/go-mockbytes"
"golang.org/x/crypto/sha3"
)
func main() {
// create output directory, fail if it already exists or error creating
if len(os.Args) != 2 {
fmt.Fprintf(os.Stderr, "Usage: generate-hashes <output_directory>\n")
os.Exit(1)
}
outputDir, err := filepath.Abs(os.Args[1])
if err != nil {
fmt.Fprintf(os.Stderr, "Invalid input: %s", err)
os.Exit(1)
}
err = os.Mkdir(outputDir, 0750)
if err == os.ErrExist {
fmt.Fprintf(os.Stderr, "Directory %s already exists\n", outputDir)
os.Exit(1)
} else if err != nil {
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
os.Exit(1)
}
// set up hasher
hashPool := legacy.NewTreePool(sha3.NewLegacyKeccak256, 128, legacy.PoolSize)
bmtHash := legacy.New(hashPool)
// create sequence generator and outputs
var i int
g := mockbytes.New(0, mockbytes.MockTypeStandard).WithModulus(255)
for i = 0; i < 4096; i++ {
s := fmt.Sprintf("processing %d...", i)
fmt.Fprintf(os.Stderr, "%-64s\r", s)
filename := fmt.Sprintf("%s/%d.bin", outputDir, i)
b, err := g.SequentialBytes(i)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
bmtHash.Reset()
_, err = bmtHash.Write(b)
sum := bmtHash.Sum(nil)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
err = ioutil.WriteFile(filename, sum, 0666)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
err = ioutil.WriteFile(filename, b, 0666)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
}
}
// Be kind and give feedback to user
dirString := fmt.Sprintf("Done. Data is in %s. Enjoy!", outputDir)
fmt.Printf("%-64s\n", dirString)
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Command main_legacy executes the BMT hash algorithm on the given data and writes the binary result to standard output
//
// Up to 4096 bytes will be read
//
// If a filename is given as argument, it reads data from the file. Otherwise it reads data from standard input.
package main
import (
"fmt"
"io"
"os"
"github.com/ethersphere/bee/pkg/bmt/legacy"
"golang.org/x/crypto/sha3"
)
func main() {
var data [4096]byte
var err error
var infile *os.File
if len(os.Args) > 1 {
infile, err = os.Open(os.Args[1])
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
} else {
infile = os.Stdin
}
var c int
c, err = infile.Read(data[:])
// EOF means zero-length input. This is still valid input for BMT
if err != nil && err != io.EOF {
fmt.Fprint(os.Stderr, err.Error())
infile.Close()
os.Exit(1)
}
infile.Close()
hashPool := legacy.NewTreePool(sha3.NewLegacyKeccak256, 128, legacy.PoolSize)
bmtHash := legacy.New(hashPool)
_, err = bmtHash.Write(data[:c])
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
binSum := bmtHash.Sum(nil)
_, err = os.Stdout.Write(binSum)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bmt defines the interface for the Binary Merkle Tree hash.
package bmt
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bmt
import (
"errors"
)
var ErrOverflow = errors.New("BMT hash capacity exceeded")
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package legacy
import (
"encoding/binary"
"errors"
"fmt"
"hash"
"strings"
"sync"
"sync/atomic"
"github.com/ethersphere/bee/pkg/bmt"
)
var _ bmt.Hash = (*Hasher)(nil)
const (
// PoolSize is the maximum number of bmt trees used by the hashers, i.e,
// the maximum number of concurrent BMT hashing operations performed by the same hasher
PoolSize = 8
)
var (
ZeroSpan = make([]byte, 8)
)
// BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT.
// implemented by Keccak256 SHA3 sha3.NewLegacyKeccak256
type BaseHasherFunc func() hash.Hash
// Hasher a reusable hasher for fixed maximum size chunks representing a BMT
// It reuses a pool of trees for amortised memory allocation and resource control,
// and supports order-agnostic concurrent segment writes and section (double segment) writes
// as well as sequential read and write.
//
// The same hasher instance must not be called concurrently on more than one chunk.
//
// The same hasher instance is synchronously reuseable.
//
// Sum gives back the tree to the pool and guaranteed to leave
// the tree and itself in a state reusable for hashing a new chunk.
type Hasher struct {
mtx sync.Mutex // protects Hasher.size increments (temporary solution)
pool *TreePool // BMT resource pool
bmt *tree // prebuilt BMT resource for flowcontrol and proofs
size int // bytes written to Hasher since last Reset()
cursor int // cursor to write to on next Write() call
}
// New creates a reusable BMT Hasher that
// pulls a new tree from a resource pool for hashing each chunk.
func New(p *TreePool) *Hasher {
return &Hasher{
pool: p,
}
}
// TreePool provides a pool of trees used as resources by the BMT Hasher.
// A tree popped from the pool is guaranteed to have a clean state ready
// for hashing a new chunk.
type TreePool struct {
lock sync.Mutex
c chan *tree // the channel to obtain a resource from the pool
hasher BaseHasherFunc // base hasher to use for the BMT levels
SegmentSize int // size of leaf segments, stipulated to be = hash size
SegmentCount int // the number of segments on the base level of the BMT
Capacity int // pool capacity, controls concurrency
Depth int // depth of the bmt trees = int(log2(segmentCount))+1
Size int // the total length of the data (count * size)
count int // current count of (ever) allocated resources
zerohashes [][]byte // lookup table for predictable padding subtrees for all levels
}
// NewTreePool creates a tree pool with hasher, segment size, segment count and capacity
// on Hasher.getTree it reuses free trees or creates a new one if capacity is not reached.
func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool {
// initialises the zerohashes lookup table
depth := calculateDepthFor(segmentCount)
segmentSize := hasher().Size()
zerohashes := make([][]byte, depth+1)
zeros := make([]byte, segmentSize)
zerohashes[0] = zeros
h := hasher()
for i := 1; i < depth+1; i++ {
zeros = doSum(h, nil, zeros, zeros)
zerohashes[i] = zeros
}
return &TreePool{
c: make(chan *tree, capacity),
hasher: hasher,
SegmentSize: segmentSize,
SegmentCount: segmentCount,
Capacity: capacity,
Size: segmentCount * segmentSize,
Depth: depth,
zerohashes: zerohashes,
}
}
// Drain drains the pool until it has no more than n resources.
func (p *TreePool) Drain(n int) {
p.lock.Lock()
defer p.lock.Unlock()
for len(p.c) > n {
<-p.c
p.count--
}
}
// Reserve is blocking until it returns an available tree
// it reuses free trees or creates a new one if size is not reached.
func (p *TreePool) reserve() *tree {
p.lock.Lock()
defer p.lock.Unlock()
var t *tree
if p.count == p.Capacity {
return <-p.c
}
select {
case t = <-p.c:
default:
t = newTree(p.SegmentSize, p.Depth, p.hasher)
p.count++
}
return t
}
// release gives back a tree to the pool.
// this tree is guaranteed to be in reusable state.
func (p *TreePool) release(t *tree) {
p.c <- t // can never fail ...
}
// tree is a reusable control structure representing a BMT
// organised in a binary tree
//
// Hasher uses a TreePool to obtain a tree for each chunk hash
// the tree is 'locked' while not in the pool.
type tree struct {
leaves []*node // leaf nodes of the tree, other nodes accessible via parent links
cursor int // index of rightmost currently open segment
offset int // offset (cursor position) within currently open segment
section []byte // the rightmost open section (double segment)
result chan []byte // result channel
span []byte // The span of the data subsumed under the chunk
}
// node is a reuseable segment hasher representing a node in a BMT.
type node struct {
isLeft bool // whether it is left side of the parent double segment
parent *node // pointer to parent node in the BMT
state int32 // atomic increment impl concurrent boolean toggle
left, right []byte // this is where the two children sections are written
hasher hash.Hash // preconstructed hasher on nodes
}
// newNode constructs a segment hasher node in the BMT (used by newTree).
func newNode(index int, parent *node, hasher hash.Hash) *node {
return &node{
parent: parent,
isLeft: index%2 == 0,
hasher: hasher,
}
}
// Draw draws the BMT (badly).
func (t *tree) Draw(hash []byte) string {
var left, right []string
var anc []*node
for i, n := range t.leaves {
left = append(left, fmt.Sprintf("%v", hashstr(n.left)))
if i%2 == 0 {
anc = append(anc, n.parent)
}
right = append(right, fmt.Sprintf("%v", hashstr(n.right)))
}
//anc = t.leaves
var hashes [][]string
for l := 0; len(anc) > 0; l++ {
var nodes []*node
hash := []string{""}
for i, n := range anc {
hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right)))
if i%2 == 0 && n.parent != nil {
nodes = append(nodes, n.parent)
}
}
hash = append(hash, "")
hashes = append(hashes, hash)
anc = nodes
}
hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""})
total := 60
del := " "
var rows []string
for i := len(hashes) - 1; i >= 0; i-- {
var textlen int
hash := hashes[i]
for _, s := range hash {
textlen += len(s)
}
if total < textlen {
total = textlen + len(hash)
}
delsize := (total - textlen) / (len(hash) - 1)
if delsize > len(del) {
delsize = len(del)
}
row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize]))
rows = append(rows, row)
}
rows = append(rows, strings.Join(left, " "), strings.Join(right, " "))
return strings.Join(rows, "\n") + "\n"
}
// newTree initialises a tree by building up the nodes of a BMT
//
// segmentSize is stipulated to be the size of the hash.
func newTree(segmentSize, depth int, hashfunc func() hash.Hash) *tree {
n := newNode(0, nil, hashfunc())
prevlevel := []*node{n}
// iterate over levels and creates 2^(depth-level) nodes
// the 0 level is on double segment sections so we start at depth - 2 since
count := 2
for level := depth - 2; level >= 0; level-- {
nodes := make([]*node, count)
for i := 0; i < count; i++ {
parent := prevlevel[i/2]
var hasher hash.Hash
if level == 0 {
hasher = hashfunc()
}
nodes[i] = newNode(i, parent, hasher)
}
prevlevel = nodes
count *= 2
}
// the datanode level is the nodes on the last level
return &tree{
leaves: prevlevel,
result: make(chan []byte),
section: make([]byte, 2*segmentSize),
}
}
// Count returns the maximum amount of bytes that will be processed by this hasher implementation.
func (h *Hasher) Capacity() int {
return h.pool.Size
}
// writeSection is not in use for this implementation.
func (h *Hasher) WriteSection(idx int, data []byte) error {
return errors.New("this hasher only implements sequential writes. please use Write() instead")
}
// SetSpan sets the span length value prefix in numeric form for the current hash operation.
func (h *Hasher) SetSpan(length int64) error {
span := LengthToSpan(length)
h.getTree().span = span
return nil
}
// SetSpanBytes sets the span length value prefix in bytes for the current hash operation.
func (h *Hasher) SetSpanBytes(b []byte) error {
if len(b) != bmt.SpanSize {
return errors.New("invalid span size")
}
span := b
h.getTree().span = span
return nil
}
// Size returns the digest size of the hash
func (h *Hasher) Size() int {
return h.pool.SegmentSize
}
// BlockSize returns the optimal write size to the Hasher
func (h *Hasher) BlockSize() int {
return 2 * h.pool.SegmentSize
}
// Sum returns the BMT root hash of the buffer
// using Sum presupposes sequential synchronous writes (io.Writer interface).
func (h *Hasher) Sum(b []byte) (s []byte) {
t := h.getTree()
h.mtx.Lock()
if h.size == 0 && t.offset == 0 {
h.mtx.Unlock()
h.releaseTree()
//return h.pool.zerohashes[h.pool.Depth]
return h.GetZeroHash()
}
h.mtx.Unlock()
// write the last section with final flag set to true
go h.writeSection(t.cursor, t.section, true, true)
// wait for the result
s = <-t.result
if t.span == nil {
t.span = LengthToSpan(int64(h.size))
}
span := t.span
// release the tree resource back to the pool
h.releaseTree()
return doSum(h.pool.hasher(), b, span, s)
}
// Write calls sequentially add to the buffer to be hashed,
// with every full segment calls writeSection in a go routine.
//
// NOTE: This legacy implementation has no error handling for the writer. Use with caution.
func (h *Hasher) Write(b []byte) (int, error) {
c := h.write(b)
return c, nil
}
// write exposes writing to the hasher to internal methods.
func (h *Hasher) write(b []byte) int {
l := len(b)
if l == 0 || l > h.pool.Size {
return 0
}
h.mtx.Lock()
h.size += len(b)
h.mtx.Unlock()
t := h.getTree()
secsize := 2 * h.pool.SegmentSize
// calculate length of missing bit to complete current open section
smax := secsize - t.offset
// if at the beginning of chunk or middle of the section
if t.offset < secsize {
// fill up current segment from buffer
copy(t.section[t.offset:], b)
// if input buffer consumed and open section not complete, then
// advance offset and return
if smax == 0 {
smax = secsize
}
if l <= smax {
t.offset += l
return l
}
} else if t.cursor == h.pool.SegmentCount*2 {
// if end of a section
return 0
}
// read full sections and the last possibly partial section from the input buffer
for smax < l {
// section complete; push to tree asynchronously
go h.writeSection(t.cursor, t.section, true, false)
// reset section
t.section = make([]byte, secsize)
// copy from input buffer at smax to right half of section
copy(t.section, b[smax:])
// advance cursor
t.cursor++
// smax here represents successive offsets in the input buffer
smax += secsize
}
t.offset = l - smax + secsize
return l
}
// Reset prepares the Hasher for reuse
func (h *Hasher) Reset() {
h.cursor = 0
h.size = 0
h.releaseTree()
}
// LengthToSpan creates a binary data span size representation.
// It is required for calculating the BMT hash.
func LengthToSpan(length int64) []byte {
spanBytes := make([]byte, 8)
binary.LittleEndian.PutUint64(spanBytes, uint64(length))
return spanBytes
}
// GetZeroHash returns the zero hash of the full depth of the Hasher instance.
func (h *Hasher) GetZeroHash() []byte {
return h.pool.zerohashes[h.pool.Depth]
}
// releaseTree gives back the Tree to the pool whereby it unlocks.
// It resets tree, segment and index.
func (h *Hasher) releaseTree() {
t := h.bmt
if t == nil {
return
}
h.bmt = nil
go func() {
t.cursor = 0
t.offset = 0
t.span = nil
t.section = make([]byte, h.pool.SegmentSize*2)
select {
case <-t.result:
default:
}
h.pool.release(t)
}()
}
// getTree obtains a BMT resource by reserving one from the pool and assigns it to the bmt field.
func (h *Hasher) getTree() *tree {
if h.bmt != nil {
return h.bmt
}
t := h.pool.reserve()
h.bmt = t
return t
}
// atomic bool toggle implementing a concurrent reusable 2-state object.
// Atomic addint with %2 implements atomic bool toggle.
// It returns true if the toggler just put it in the active/waiting state.
func (n *node) toggle() bool {
return atomic.AddInt32(&n.state, 1)%2 == 1
}
// calculates the hash of the data using hash.Hash.
//
// BUG: This legacy implementation has no error handling for the writer. Use with caution.
func doSum(h hash.Hash, b []byte, data ...[]byte) []byte {
h.Reset()
for _, v := range data {
_, _ = h.Write(v)
}
return h.Sum(b)
}
// hashstr is a pretty printer for bytes used in tree.draw.
func hashstr(b []byte) string {
end := len(b)
if end > 4 {
end = 4
}
return fmt.Sprintf("%x", b[:end])
}
// calculateDepthFor calculates the depth (number of levels) in the BMT tree.
func calculateDepthFor(n int) (d int) {
c := 2
for ; c < n; c *= 2 {
d++
}
return d + 1
}
// writeSection writes the hash of i-th section into level 1 node of the BMT tree.
func (h *Hasher) writeSection(i int, section []byte, double, final bool) {
// select the leaf node for the section
var n *node
var isLeft bool
var hasher hash.Hash
var level int
t := h.getTree()
if double {
level++
n = t.leaves[i]
hasher = n.hasher
isLeft = n.isLeft
n = n.parent
// hash the section
section = doSum(hasher, nil, section)
} else {
n = t.leaves[i/2]
hasher = n.hasher
isLeft = i%2 == 0
}
// write hash into parent node
if final {
// for the last segment use writeFinalNode
h.writeFinalNode(level, n, hasher, isLeft, section)
} else {
h.writeNode(n, hasher, isLeft, section)
}
}
// writeNode pushes the data to the node.
// if it is the first of 2 sisters written, the routine terminates.
// if it is the second, it calculates the hash and writes it
// to the parent node recursively.
// since hashing the parent is synchronous the same hasher can be used.
func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
level := 1
for {
// at the root of the bmt just write the result to the result channel
if n == nil {
h.getTree().result <- s
return
}
// otherwise assign child hash to left or right segment
if isLeft {
n.left = s
} else {
n.right = s
}
// the child-thread first arriving will terminate
if n.toggle() {
return
}
// the thread coming second now can be sure both left and right children are written
// so it calculates the hash of left|right and pushes it to the parent
s = doSum(bh, nil, n.left, n.right)
isLeft = n.isLeft
n = n.parent
level++
}
}
// writeFinalNode is following the path starting from the final datasegment to the
// BMT root via parents.
// For unbalanced trees it fills in the missing right sister nodes using
// the pool's lookup table for BMT subtree root hashes for all-zero sections.
// Otherwise behaves like `writeNode`.
func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s []byte) {
for {
// at the root of the bmt just write the result to the result channel
if n == nil {
if s != nil {
h.getTree().result <- s
}
return
}
var noHash bool
if isLeft {
// coming from left sister branch
// when the final section's path is going via left child node
// we include an all-zero subtree hash for the right level and toggle the node.
n.right = h.pool.zerohashes[level]
if s != nil {
n.left = s
// if a left final node carries a hash, it must be the first (and only thread)
// so the toggle is already in passive state no need no call
// yet thread needs to carry on pushing hash to parent
noHash = false
} else {
// if again first thread then propagate nil and calculate no hash
noHash = n.toggle()
}
} else {
// right sister branch
if s != nil {
// if hash was pushed from right child node, write right segment change state
n.right = s
// if toggle is true, we arrived first so no hashing just push nil to parent
noHash = n.toggle()
} else {
// if s is nil, then thread arrived first at previous node and here there will be two,
// so no need to do anything and keep s = nil for parent
noHash = true
}
}
// the child-thread first arriving will just continue resetting s to nil
// the second thread now can be sure both left and right children are written
// it calculates the hash of left|right and pushes it to the parent
if noHash {
s = nil
} else {
s = doSum(bh, nil, n.left, n.right)
}
// iterate to parent
isLeft = n.isLeft
n = n.parent
level++
}
}
// Copyright 2017 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package legacy
import (
"bytes"
"encoding/binary"
"fmt"
"math/rand"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/ethersphere/bee/pkg/bmt"
"github.com/ethersphere/bee/pkg/bmt/reference"
"gitlab.com/nolash/go-mockbytes"
"golang.org/x/crypto/sha3"
)
// the actual data length generated (could be longer than max datalength of the BMT)
const BufferSize = 4128
const (
// segmentCount is the maximum number of segments of the underlying chunk
// Should be equal to max-chunk-data-size / hash-size
// Currently set to 128 == 4096 (default chunk size) / 32 (sha3.keccak256 size)
SegmentCount = 128
)
var Counts = []int{1, 2, 3, 4, 5, 8, 9, 15, 16, 17, 32, 37, 42, 53, 63, 64, 65, 111, 127, 128}
var BenchmarkBMTResult []byte
// calculates the Keccak256 SHA3 hash of the data
func sha3hash(data ...[]byte) []byte {
h := sha3.NewLegacyKeccak256()
return doSum(h, nil, data...)
}
// tests if hasher responds with correct hash comparing the reference implementation return value
func TestHasherEmptyData(t *testing.T) {
hasher := sha3.NewLegacyKeccak256
var data []byte
for _, count := range Counts {
t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) {
pool := NewTreePool(hasher, count, PoolSize)
defer pool.Drain(0)
bmt := New(pool)
rbmt := reference.NewRefHasher(hasher(), count)
expHash, err := rbmt.Hash(data)
if err != nil {
t.Fatal(err)
}
resHash, err := syncHash(bmt, 0, data)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(expHash, resHash) {
t.Fatalf("hash mismatch with reference. expected %x, got %x", resHash, expHash)
}
})
}
}
// tests sequential write with entire max size written in one go
func TestSyncHasherCorrectness(t *testing.T) {
g := mockbytes.New(1, mockbytes.MockTypeStandard)
data, err := g.RandomBytes(BufferSize)
if err != nil {
t.Fatal(err)
}
hasher := sha3.NewLegacyKeccak256
size := hasher().Size()
for _, count := range Counts {
t.Run(fmt.Sprintf("segments_%v", count), func(t *testing.T) {
max := count * size
var incr int
capacity := 1
pool := NewTreePool(hasher, count, capacity)
defer pool.Drain(0)
for n := 0; n <= max; n += incr {
incr = 1 + rand.Intn(5)
bmt := New(pool)
err = testHasherCorrectness(bmt, hasher, data, n, count)
if err != nil {
t.Fatal(err)
}
}
})
}
}
// Tests that the BMT hasher can be synchronously reused with poolsizes 1 and PoolSize
func TestHasherReuse(t *testing.T) {
t.Run(fmt.Sprintf("poolsize_%d", 1), func(t *testing.T) {
testHasherReuse(1, t)
})
t.Run(fmt.Sprintf("poolsize_%d", PoolSize), func(t *testing.T) {
testHasherReuse(PoolSize, t)
})
}
// tests if bmt reuse is not corrupting result
func testHasherReuse(poolsize int, t *testing.T) {
hasher := sha3.NewLegacyKeccak256
pool := NewTreePool(hasher, SegmentCount, poolsize)
defer pool.Drain(0)
bmt := New(pool)
for i := 0; i < 100; i++ {
g := mockbytes.New(1, mockbytes.MockTypeStandard)
data, err := g.RandomBytes(BufferSize)
if err != nil {
t.Fatal(err)
}
n := rand.Intn(bmt.Size())
err = testHasherCorrectness(bmt, hasher, data, n, SegmentCount)
if err != nil {
t.Fatal(err)
}
}
}
// Tests if pool can be cleanly reused even in concurrent use by several hasher
func TestBMTConcurrentUse(t *testing.T) {
hasher := sha3.NewLegacyKeccak256
pool := NewTreePool(hasher, SegmentCount, PoolSize)
defer pool.Drain(0)
cycles := 100
errc := make(chan error)
for i := 0; i < cycles; i++ {
go func() {
bmt := New(pool)
g := mockbytes.New(1, mockbytes.MockTypeStandard)
data, _ := g.RandomBytes(BufferSize)
n := rand.Intn(bmt.Size())
errc <- testHasherCorrectness(bmt, hasher, data, n, 128)
}()
}
LOOP:
for {
select {
case <-time.NewTimer(5 * time.Second).C:
t.Fatal("timed out")
case err := <-errc:
if err != nil {
t.Fatal(err)
}
cycles--
if cycles == 0 {
break LOOP
}
}
}
}
// Tests BMT Hasher io.Writer interface is working correctly
// even multiple short random write buffers
func TestBMTWriterBuffers(t *testing.T) {
hasher := sha3.NewLegacyKeccak256
for _, count := range Counts {
t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) {
errc := make(chan error)
pool := NewTreePool(hasher, count, PoolSize)
defer pool.Drain(0)
n := count * 32
bmt := New(pool)
g := mockbytes.New(1, mockbytes.MockTypeStandard)
data, err := g.RandomBytes(n)
if err != nil {
t.Fatal(err)
}
rbmt := reference.NewRefHasher(hasher(), count)
refNoMetaHash, err := rbmt.Hash(data)
if err != nil {
t.Fatal(err)
}
h := hasher()
_, err = h.Write(ZeroSpan)
if err != nil {
t.Fatal(err)
}
_, err = h.Write(refNoMetaHash)
if err != nil {
t.Fatal(err)
}
refHash := h.Sum(nil)
expHash, err := syncHash(bmt, 0, data)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(expHash, refHash) {
t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash)
}
attempts := 10
f := func() error {
bmt := New(pool)
bmt.Reset()
var buflen int
for offset := 0; offset < n; offset += buflen {
buflen = rand.Intn(n-offset) + 1
read, err := bmt.Write(data[offset : offset+buflen])
if err != nil {
return err
}
if read != buflen {
return fmt.Errorf("incorrect read. expected %v bytes, got %v", buflen, read)
}
}
err := bmt.SetSpan(0)
if err != nil {
t.Fatal(err)
}
hash := bmt.Sum(nil)
if !bytes.Equal(hash, expHash) {
return fmt.Errorf("hash mismatch. expected %x, got %x", hash, expHash)
}
return nil
}
for j := 0; j < attempts; j++ {
go func() {
errc <- f()
}()
}
timeout := time.NewTimer(2 * time.Second)
for {
select {
case err := <-errc:
if err != nil {
t.Fatal(err)
}
attempts--
if attempts == 0 {
return
}
case <-timeout.C:
t.Fatalf("timeout")
}
}
})
}
}
// helper function that compares reference and optimised implementations on
// correctness
func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, count int) (err error) {
span := make([]byte, 8)
if len(d) < n {
n = len(d)
}
binary.LittleEndian.PutUint64(span, uint64(n))
data := d[:n]
rbmt := reference.NewRefHasher(hasher(), count)
var exp []byte
if n == 0 {
exp = bmt.pool.zerohashes[bmt.pool.Depth]
} else {
r, err := rbmt.Hash(data)
if err != nil {
return err
}
exp = sha3hash(span, r)
}
got, err := syncHash(bmt, n, data)
if err != nil {
return err
}
if !bytes.Equal(got, exp) {
return fmt.Errorf("wrong hash: expected %x, got %x", exp, got)
}
return err
}
//
func BenchmarkBMT(t *testing.B) {
for size := 4096; size >= 128; size /= 2 {
t.Run(fmt.Sprintf("%v_size_%v", "SHA3", size), func(t *testing.B) {
benchmarkSHA3(t, size)
})
t.Run(fmt.Sprintf("%v_size_%v", "Baseline", size), func(t *testing.B) {
benchmarkBMTBaseline(t, size)
})
t.Run(fmt.Sprintf("%v_size_%v", "REF", size), func(t *testing.B) {
benchmarkRefHasher(t, size)
})
t.Run(fmt.Sprintf("%v_size_%v", "BMT", size), func(t *testing.B) {
benchmarkBMT(t, size)
})
}
}
func BenchmarkPool(t *testing.B) {
caps := []int{1, PoolSize}
for size := 4096; size >= 128; size /= 2 {
for _, c := range caps {
t.Run(fmt.Sprintf("poolsize_%v_size_%v", c, size), func(t *testing.B) {
benchmarkPool(t, c, size)
})
}
}
}
// benchmarks simple sha3 hash on chunks
func benchmarkSHA3(t *testing.B, n int) {
g := mockbytes.New(1, mockbytes.MockTypeStandard)
data, err := g.RandomBytes(n)
if err != nil {
t.Fatal(err)
}
hasher := sha3.NewLegacyKeccak256
h := hasher()
t.ReportAllocs()
t.ResetTimer()
for i := 0; i < t.N; i++ {
doSum(h, nil, data)
}
}
// benchmarks the minimum hashing time for a balanced (for simplicity) BMT
// by doing count/segmentsize parallel hashings of 2*segmentsize bytes
// doing it on n PoolSize each reusing the base hasher
// the premise is that this is the minimum computation needed for a BMT
// therefore this serves as a theoretical optimum for concurrent implementations
func benchmarkBMTBaseline(t *testing.B, n int) {
hasher := sha3.NewLegacyKeccak256
hashSize := hasher().Size()
g := mockbytes.New(1, mockbytes.MockTypeStandard)
data, err := g.RandomBytes(hashSize)
if err != nil {
t.Fatal(err)
}
t.ReportAllocs()
t.ResetTimer()
for i := 0; i < t.N; i++ {
count := int32((n-1)/hashSize + 1)
wg := sync.WaitGroup{}
wg.Add(PoolSize)
var i int32
for j := 0; j < PoolSize; j++ {
go func() {
defer wg.Done()
h := hasher()
for atomic.AddInt32(&i, 1) < count {
doSum(h, nil, data)
}
}()
}
wg.Wait()
}
}
// benchmarks BMT Hasher
func benchmarkBMT(t *testing.B, n int) {
g := mockbytes.New(1, mockbytes.MockTypeStandard)
data, err := g.RandomBytes(n)
if err != nil {
t.Fatal(err)
}
hasher := sha3.NewLegacyKeccak256
pool := NewTreePool(hasher, SegmentCount, PoolSize)
bmt := New(pool)
var r []byte
t.ReportAllocs()
t.ResetTimer()
for i := 0; i < t.N; i++ {
r, err = syncHash(bmt, 0, data)
if err != nil {
t.Fatal(err)
}
}
BenchmarkBMTResult = r
}
// benchmarks 100 concurrent bmt hashes with pool capacity
func benchmarkPool(t *testing.B, poolsize, n int) {
g := mockbytes.New(1, mockbytes.MockTypeStandard)
data, err := g.RandomBytes(n)
if err != nil {
t.Fatal(err)
}
hasher := sha3.NewLegacyKeccak256
pool := NewTreePool(hasher, SegmentCount, poolsize)
cycles := 100
t.ReportAllocs()
t.ResetTimer()
wg := sync.WaitGroup{}
for i := 0; i < t.N; i++ {
wg.Add(cycles)
for j := 0; j < cycles; j++ {
go func() {
defer wg.Done()
bmt := New(pool)
_, _ = syncHash(bmt, 0, data)
}()
}
wg.Wait()
}
}
// benchmarks the reference hasher
func benchmarkRefHasher(t *testing.B, n int) {
g := mockbytes.New(1, mockbytes.MockTypeStandard)
data, err := g.RandomBytes(n)
if err != nil {
t.Fatal(err)
}
hasher := sha3.NewLegacyKeccak256
rbmt := reference.NewRefHasher(hasher(), 128)
t.ReportAllocs()
t.ResetTimer()
for i := 0; i < t.N; i++ {
_, err := rbmt.Hash(data)
if err != nil {
t.Fatal(err)
}
}
}
// Hash hashes the data and the span using the bmt hasher
func syncHash(h *Hasher, spanLength int, data []byte) ([]byte, error) {
h.Reset()
err := h.SetSpan(int64(spanLength))
if err != nil {
return nil, err
}
_, err = h.Write(data)
if err != nil {
return nil, err
}
return h.Sum(nil), nil
}
// TestUseSyncAsOrdinaryHasher verifies that the bmt.Hasher can be used with the hash.Hash interface
func TestUseSyncAsOrdinaryHasher(t *testing.T) {
hasher := sha3.NewLegacyKeccak256
pool := NewTreePool(hasher, SegmentCount, PoolSize)
bmt := New(pool)
err := bmt.SetSpan(3)
if err != nil {
t.Fatal(err)
}
_, err = bmt.Write([]byte("foo"))
if err != nil {
t.Fatal(err)
}
res := bmt.Sum(nil)
refh := reference.NewRefHasher(hasher(), 128)
resh, err := refh.Hash([]byte("foo"))
if err != nil {
t.Fatal(err)
}
hsub := hasher()
span := LengthToSpan(3)
_, err = hsub.Write(span)
if err != nil {
t.Fatal(err)
}
_, err = hsub.Write(resh)
if err != nil {
t.Fatal(err)
}
refRes := hsub.Sum(nil)
if !bytes.Equal(res, refRes) {
t.Fatalf("normalhash; expected %x, got %x", refRes, res)
}
}
func TestConformsToBMTInterface(t *testing.T) {
func() bmt.Hash {
return (New(nil))
}()
}
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size.
// It is defined as the root hash of the binary merkle tree built over fixed size segments
// of the underlying chunk using any base hash function (e.g., keccak 256 SHA3).
// Chunks with data shorter than the fixed size are hashed as if they had zero padding.
//
// BMT hash is used as the chunk hash function in swarm which in turn is the basis for the
// 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
//
// The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
// segment is a substring of a chunk starting at a particular offset.
// The size of the underlying segments is fixed to the size of the base hash (called the resolution
// of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification
// as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
//
// Two implementations are provided:
//
// RefHasher is optimized for code simplicity and meant as a reference implementation
// that is simple to understand
//
// Hasher is optimized for speed taking advantage of concurrency with minimalistic
// control structure to coordinate the concurrent routines
//
// BMT Hasher implements the following interfaces:
//
// standard golang hash.Hash - synchronous, reusable
//
// io.Writer - synchronous left-to-right datawriter
package legacy
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pool
import (
"hash"
"sync"
bmtlegacy "github.com/ethersphere/bee/pkg/bmt/legacy"
"golang.org/x/crypto/sha3"
)
// Pooler pools bmt Hashers.
// It provides the ability for the number of hashers to grow
// according to demand, but will shrink once the minimum defined
// hashers are put back into the pool.
type Pooler interface {
// Get a bmt Hasher instance.
// Instances are reset before being returned to the caller.
Get() *bmtlegacy.Hasher
// Put a bmt Hasher back into the pool
Put(*bmtlegacy.Hasher)
// Size of the pool.
Size() int
}
type pool struct {
p sync.Pool
mtx sync.Mutex
minimum int // minimum number of instances the pool should have
size int // size of the pool (only accounted for when items are put back)
rented int // number of video tapes on rent
}
// New returns a new HasherPool.
func New(minPool, branches int) Pooler {
return &pool{
p: sync.Pool{
New: func() interface{} {
return bmtlegacy.New(bmtlegacy.NewTreePool(hashFunc, branches, 1)) // one tree per hasher
},
},
minimum: minPool,
}
}
// Get gets a bmt Hasher from the pool.
func (h *pool) Get() *bmtlegacy.Hasher {
h.mtx.Lock()
defer h.mtx.Unlock()
v := h.p.Get().(*bmtlegacy.Hasher)
h.rented++
if h.size > 0 {
h.size--
}
return v
}
// Put puts a Hasher back into the pool.
// It discards the instance if the minimum number of instances
// has been reached.
// The hasher is reset before being put back into the pool.
func (h *pool) Put(v *bmtlegacy.Hasher) {
h.mtx.Lock()
defer h.mtx.Unlock()
h.rented--
// only put back if we're not exceeding the minimum capacity
if h.size+1 > h.minimum {
return
}
v.Reset()
h.p.Put(v)
h.size++
}
// Size of the pool.
func (h *pool) Size() int {
h.mtx.Lock()
defer h.mtx.Unlock()
return h.size
}
func hashFunc() hash.Hash {
return sha3.NewLegacyKeccak256()
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pool_test
import (
"sync"
"testing"
"github.com/ethersphere/bee/pkg/bmt/pool"
)
const str = "hello world"
func TestHasherPool(t *testing.T) {
h := pool.New(3, 128)
v := h.Get()
_, err := v.Write([]byte(str))
if err != nil {
t.Fatal(err)
}
h.Put(v)
if s := h.Size(); s != 1 {
t.Fatalf("expected size 1 but got %d", s)
}
}
func TestHasherPool_concurrent(t *testing.T) {
h := pool.New(3, 128)
c := make(chan struct{})
var wg sync.WaitGroup
// request 10 copies
for i := 0; i < 10; i++ {
v := h.Get()
_, err := v.Write([]byte(str))
if err != nil {
t.Fatal(err)
}
wg.Add(1)
go func() {
defer wg.Done()
<-c
h.Put(v)
}()
}
// when we get instances from the pool, we dont know
// which ones are new and which aren't, so size is
// only incremented when items are put back
if s := h.Size(); s != 0 {
t.Fatalf("expected size 0 but got %d", s)
}
close(c)
wg.Wait()
if s := h.Size(); s != 3 {
t.Fatalf("expected size 3 but got %d", s)
}
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package reference is a simple nonconcurrent reference implementation for hashsize segment based
// Binary Merkle tree hash on arbitrary but fixed maximum chunksize n where 0 <= n <= 4096
//
// This implementation does not take advantage of any paralellisms and uses
// far more memory than necessary, but it is easy to see that it is correct.
// It can be used for generating test cases for optimized implementations.
// There is extra check on reference hasher correctness in bmt_test.go
// * TestRefHasher
// * testBMTHasherCorrectness function
package reference
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package reference
import (
"hash"
)
// RefHasher is the non-optimized easy-to-read reference implementation of BMT.
type RefHasher struct {
maxDataLength int // c * hashSize, where c = 2 ^ ceil(log2(count)), where count = ceil(length / hashSize)
sectionLength int // 2 * hashSize
hasher hash.Hash // base hash func (Keccak256 SHA3)
}
// NewRefHasher returns a new RefHasher.
func NewRefHasher(h hash.Hash, count int) *RefHasher {
hashsize := h.Size()
c := 2
for ; c < count; c *= 2 {
}
return &RefHasher{
sectionLength: 2 * hashsize,
maxDataLength: c * hashsize,
hasher: h,
}
}
// Hash returns the BMT hash of the byte slice.
func (rh *RefHasher) Hash(data []byte) ([]byte, error) {
// if data is shorter than the base length (maxDataLength), we provide padding with zeros
d := make([]byte, rh.maxDataLength)
length := len(data)
if length > rh.maxDataLength {
length = rh.maxDataLength
}
copy(d, data[:length])
return rh.hash(d, rh.maxDataLength)
}
// hash calls itself recursively on both halves of the given slice
// concatenates the results, and returns the hash of that
// if the length of d is 2 * segmentSize then just returns the hash of that section
// data has length maxDataLength = segmentSize * 2^k
func (rh *RefHasher) hash(data []byte, length int) ([]byte, error) {
var section []byte
if length == rh.sectionLength {
// section contains two data segments (d)
section = data
} else {
// section contains hashes of left and right BMT subtree
// to be calculated by calling hash recursively on left and right half of d
length /= 2
left, err := rh.hash(data[:length], length)
if err != nil {
return nil, err
}
right, err := rh.hash(data[length:], length)
if err != nil {
return nil, err
}
section = append(left, right...)
}
rh.hasher.Reset()
_, err := rh.hasher.Write(section)
if err != nil {
return nil, err
}
return rh.hasher.Sum(nil), nil
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package reference_test
import (
"bytes"
"fmt"
"hash"
"testing"
"github.com/ethersphere/bee/pkg/bmt/reference"
"gitlab.com/nolash/go-mockbytes"
"golang.org/x/crypto/sha3"
)
// calculates the hash of the data using hash.Hash
func doSum(h hash.Hash, b []byte, data ...[]byte) ([]byte, error) {
h.Reset()
for _, v := range data {
var err error
_, err = h.Write(v)
if err != nil {
return nil, err
}
}
return h.Sum(b), nil
}
// calculates the Keccak256 SHA3 hash of the data
func sha3hash(t *testing.T, data ...[]byte) []byte {
t.Helper()
h := sha3.NewLegacyKeccak256()
r, err := doSum(h, nil, data...)
if err != nil {
t.Fatal(err)
}
return r
}
// TestRefHasher tests that the RefHasher computes the expected BMT hash for some small data lengths.
func TestRefHasher(t *testing.T) {
// the test struct is used to specify the expected BMT hash for
// segment counts between from and to and lengths from 1 to datalength
for i, x := range []struct {
from int
to int
expected func([]byte) []byte
}{
{
// all lengths in [0,64] should be:
//
// sha3hash(data)
//
from: 1,
to: 2,
expected: func(d []byte) []byte {
data := make([]byte, 64)
copy(data, d)
return sha3hash(t, data)
},
}, {
// all lengths in [3,4] should be:
//
// sha3hash(
// sha3hash(data[:64])
// sha3hash(data[64:])
// )
//
from: 3,
to: 4,
expected: func(d []byte) []byte {
data := make([]byte, 128)
copy(data, d)
return sha3hash(t, sha3hash(t, data[:64]), sha3hash(t, data[64:]))
},
}, {
// all bmttestutil.SegmentCounts in [5,8] should be:
//
// sha3hash(
// sha3hash(
// sha3hash(data[:64])
// sha3hash(data[64:128])
// )
// sha3hash(
// sha3hash(data[128:192])
// sha3hash(data[192:])
// )
// )
//
from: 5,
to: 8,
expected: func(d []byte) []byte {
data := make([]byte, 256)
copy(data, d)
return sha3hash(t, sha3hash(t, sha3hash(t, data[:64]), sha3hash(t, data[64:128])), sha3hash(t, sha3hash(t, data[128:192]), sha3hash(t, data[192:])))
},
},
} {
for segCount := x.from; segCount <= x.to; segCount++ {
for length := 1; length <= segCount*32; length++ {
t.Run(fmt.Sprintf("%d_segments_%d_bytes", segCount, length), func(t *testing.T) {
g := mockbytes.New(i, mockbytes.MockTypeStandard)
data, err := g.RandomBytes(length)
if err != nil {
t.Fatal(err)
}
expected := x.expected(data)
actual, err := reference.NewRefHasher(sha3.NewLegacyKeccak256(), segCount).Hash(data)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(actual, expected) {
t.Fatalf("expected %x, got %x", expected, actual)
}
})
}
}
}
}
......@@ -7,9 +7,9 @@
package bmtpool
import (
bmtlegacy "github.com/ethersphere/bee/pkg/bmt/legacy"
"github.com/ethersphere/bee/pkg/bmt/pool"
"github.com/ethersphere/bee/pkg/swarm"
bmtlegacy "github.com/ethersphere/bmt/legacy"
"github.com/ethersphere/bmt/pool"
)
var instance pool.Pooler
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment