bmt: import bmt repo (#1520)

4254a3e3 · acud · GitHub · 1b83c2e5 · 4254a3e3 · 4254a3e3
Commit 4254a3e3 authored Apr 01, 2021 by acud Committed by GitHub Apr 01, 2021
14 changed files
--- a/pkg/bmt/bmt.go
+++ b/pkg/bmt/bmt.go
+// Copyright 2020 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bmt
+
+import (
+	"hash"
+)
+
+const (
+	SpanSize = 8
+)
+
+// Hash provides the necessary extension of the hash interface to add the length-prefix of the BMT hash.
+//
+// Any implementation should make it possible to generate a BMT hash using the hash.Hash interface only.
+// However, the limitation will be that the Span of the BMT hash always must be limited to the amount of bytes actually written.
+type Hash interface {
+	hash.Hash
+
+	// SetSpan sets the length prefix of BMT hash.
+	SetSpan(int64) error
+
+	// SetSpanBytes sets the length prefix of BMT hash in byte form.
+	SetSpanBytes([]byte) error
+
+	// Capacity returns the maximum amount of bytes that will be processed by the implementation.
+	Capacity() int
+
+	// WriteSection writes to a specific section of the data to be hashed.
+	WriteSection(idx int, data []byte) error
+}
--- a/pkg/bmt/cmd/generate-hashes/generate-hashes.go
+++ b/pkg/bmt/cmd/generate-hashes/generate-hashes.go
+// Copyright 2020 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Command generate_legacy generates bmt hashes of sequential byte inputs
+// for every possible length of legacy bmt hasher
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+	"path/filepath"
+
+	"github.com/ethersphere/bee/pkg/bmt/legacy"
+	"gitlab.com/nolash/go-mockbytes"
+	"golang.org/x/crypto/sha3"
+)
+
+func main() {
+
+	// create output directory, fail if it already exists or error creating
+	if len(os.Args) != 2 {
+		fmt.Fprintf(os.Stderr, "Usage: generate-hashes <output_directory>\n")
+		os.Exit(1)
+	}
+	outputDir, err := filepath.Abs(os.Args[1])
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "Invalid input: %s", err)
+		os.Exit(1)
+	}
+	err = os.Mkdir(outputDir, 0750)
+	if err == os.ErrExist {
+		fmt.Fprintf(os.Stderr, "Directory %s already exists\n", outputDir)
+		os.Exit(1)
+	} else if err != nil {
+		fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
+		os.Exit(1)
+	}
+
+	// set up hasher
+	hashPool := legacy.NewTreePool(sha3.NewLegacyKeccak256, 128, legacy.PoolSize)
+	bmtHash := legacy.New(hashPool)
+
+	// create sequence generator and outputs
+	var i int
+	g := mockbytes.New(0, mockbytes.MockTypeStandard).WithModulus(255)
+	for i = 0; i < 4096; i++ {
+		s := fmt.Sprintf("processing %d...", i)
+		fmt.Fprintf(os.Stderr, "%-64s\r", s)
+		filename := fmt.Sprintf("%s/%d.bin", outputDir, i)
+		b, err := g.SequentialBytes(i)
+		if err != nil {
+			fmt.Fprint(os.Stderr, err.Error())
+			os.Exit(1)
+		}
+		bmtHash.Reset()
+		_, err = bmtHash.Write(b)
+		sum := bmtHash.Sum(nil)
+		if err != nil {
+			fmt.Fprint(os.Stderr, err.Error())
+			os.Exit(1)
+		}
+		err = ioutil.WriteFile(filename, sum, 0666)
+		if err != nil {
+			fmt.Fprint(os.Stderr, err.Error())
+			os.Exit(1)
+		}
+		err = ioutil.WriteFile(filename, b, 0666)
+		if err != nil {
+			fmt.Fprint(os.Stderr, err.Error())
+		}
+	}
+
+	// Be kind and give feedback to user
+	dirString := fmt.Sprintf("Done. Data is in %s. Enjoy!", outputDir)
+	fmt.Printf("%-64s\n", dirString)
+}
--- a/pkg/bmt/cmd/main_legacy.go
+++ b/pkg/bmt/cmd/main_legacy.go
+// Copyright 2020 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Command main_legacy executes the BMT hash algorithm on the given data and writes the binary result to standard output
+//
+// Up to 4096 bytes will be read
+//
+// If a filename is given as argument, it reads data from the file. Otherwise it reads data from standard input.
+package main
+
+import (
+	"fmt"
+	"io"
+	"os"
+
+	"github.com/ethersphere/bee/pkg/bmt/legacy"
+	"golang.org/x/crypto/sha3"
+)
+
+func main() {
+	var data [4096]byte
+	var err error
+	var infile *os.File
+
+	if len(os.Args) > 1 {
+		infile, err = os.Open(os.Args[1])
+		if err != nil {
+			fmt.Fprint(os.Stderr, err.Error())
+			os.Exit(1)
+		}
+	} else {
+		infile = os.Stdin
+	}
+	var c int
+	c, err = infile.Read(data[:])
+
+	// EOF means zero-length input. This is still valid input for BMT
+	if err != nil && err != io.EOF {
+		fmt.Fprint(os.Stderr, err.Error())
+		infile.Close()
+		os.Exit(1)
+	}
+	infile.Close()
+
+	hashPool := legacy.NewTreePool(sha3.NewLegacyKeccak256, 128, legacy.PoolSize)
+	bmtHash := legacy.New(hashPool)
+	_, err = bmtHash.Write(data[:c])
+	if err != nil {
+		fmt.Fprint(os.Stderr, err.Error())
+		os.Exit(1)
+	}
+	binSum := bmtHash.Sum(nil)
+	_, err = os.Stdout.Write(binSum)
+	if err != nil {
+		fmt.Fprint(os.Stderr, err.Error())
+		os.Exit(1)
+	}
+}
--- a/pkg/bmt/doc.go
+++ b/pkg/bmt/doc.go
+// Copyright 2020 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package bmt defines the interface for the Binary Merkle Tree hash.
+package bmt
--- a/pkg/bmt/errors.go
+++ b/pkg/bmt/errors.go
+// Copyright 2020 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bmt
+
+import (
+	"errors"
+)
+
+var ErrOverflow = errors.New("BMT hash capacity exceeded")
--- a/pkg/bmt/legacy/bmt.go
+++ b/pkg/bmt/legacy/bmt.go
+// Copyright 2018 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package legacy
+
+import (
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"hash"
+	"strings"
+	"sync"
+	"sync/atomic"
+
+	"github.com/ethersphere/bee/pkg/bmt"
+)
+
+var _ bmt.Hash = (*Hasher)(nil)
+
+const (
+	// PoolSize is the maximum number of bmt trees used by the hashers, i.e,
+	// the maximum number of concurrent BMT hashing operations performed by the same hasher
+	PoolSize = 8
+)
+
+var (
+	ZeroSpan = make([]byte, 8)
+)
+
+// BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT.
+// implemented by Keccak256 SHA3 sha3.NewLegacyKeccak256
+type BaseHasherFunc func() hash.Hash
+
+// Hasher a reusable hasher for fixed maximum size chunks representing a BMT
+// It reuses a pool of trees for amortised memory allocation and resource control,
+// and supports order-agnostic concurrent segment writes and section (double segment) writes
+// as well as sequential read and write.
+//
+// The same hasher instance must not be called concurrently on more than one chunk.
+//
+// The same hasher instance is synchronously reuseable.
+//
+// Sum gives back the tree to the pool and guaranteed to leave
+// the tree and itself in a state reusable for hashing a new chunk.
+type Hasher struct {
+	mtx    sync.Mutex // protects Hasher.size increments (temporary solution)
+	pool   *TreePool  // BMT resource pool
+	bmt    *tree      // prebuilt BMT resource for flowcontrol and proofs
+	size   int        // bytes written to Hasher since last Reset()
+	cursor int        // cursor to write to on next Write() call
+}
+
+// New creates a reusable BMT Hasher that
+// pulls a new tree from a resource pool for hashing each chunk.
+func New(p *TreePool) *Hasher {
+	return &Hasher{
+		pool: p,
+	}
+}
+
+// TreePool provides a pool of trees used as resources by the BMT Hasher.
+// A tree popped from the pool is guaranteed to have a clean state ready
+// for hashing a new chunk.
+type TreePool struct {
+	lock         sync.Mutex
+	c            chan *tree     // the channel to obtain a resource from the pool
+	hasher       BaseHasherFunc // base hasher to use for the BMT levels
+	SegmentSize  int            // size of leaf segments, stipulated to be = hash size
+	SegmentCount int            // the number of segments on the base level of the BMT
+	Capacity     int            // pool capacity, controls concurrency
+	Depth        int            // depth of the bmt trees = int(log2(segmentCount))+1
+	Size         int            // the total length of the data (count * size)
+	count        int            // current count of (ever) allocated resources
+	zerohashes   [][]byte       // lookup table for predictable padding subtrees for all levels
+}
+
+// NewTreePool creates a tree pool with hasher, segment size, segment count and capacity
+// on Hasher.getTree it reuses free trees or creates a new one if capacity is not reached.
+func NewTreePool(hasher BaseHasherFunc, segmentCount, capacity int) *TreePool {
+	// initialises the zerohashes lookup table
+	depth := calculateDepthFor(segmentCount)
+	segmentSize := hasher().Size()
+	zerohashes := make([][]byte, depth+1)
+	zeros := make([]byte, segmentSize)
+	zerohashes[0] = zeros
+	h := hasher()
+	for i := 1; i < depth+1; i++ {
+		zeros = doSum(h, nil, zeros, zeros)
+		zerohashes[i] = zeros
+	}
+	return &TreePool{
+		c:            make(chan *tree, capacity),
+		hasher:       hasher,
+		SegmentSize:  segmentSize,
+		SegmentCount: segmentCount,
+		Capacity:     capacity,
+		Size:         segmentCount * segmentSize,
+		Depth:        depth,
+		zerohashes:   zerohashes,
+	}
+}
+
+// Drain drains the pool until it has no more than n resources.
+func (p *TreePool) Drain(n int) {
+	p.lock.Lock()
+	defer p.lock.Unlock()
+	for len(p.c) > n {
+		<-p.c
+		p.count--
+	}
+}
+
+// Reserve is blocking until it returns an available tree
+// it reuses free trees or creates a new one if size is not reached.
+func (p *TreePool) reserve() *tree {
+	p.lock.Lock()
+	defer p.lock.Unlock()
+	var t *tree
+	if p.count == p.Capacity {
+		return <-p.c
+	}
+	select {
+	case t = <-p.c:
+	default:
+		t = newTree(p.SegmentSize, p.Depth, p.hasher)
+		p.count++
+	}
+	return t
+}
+
+// release gives back a tree to the pool.
+// this tree is guaranteed to be in reusable state.
+func (p *TreePool) release(t *tree) {
+	p.c <- t // can never fail ...
+}
+
+// tree is a reusable control structure representing a BMT
+// organised in a binary tree
+//
+// Hasher uses a TreePool to obtain a tree for each chunk hash
+// the tree is 'locked' while not in the pool.
+type tree struct {
+	leaves  []*node     // leaf nodes of the tree, other nodes accessible via parent links
+	cursor  int         // index of rightmost currently open segment
+	offset  int         // offset (cursor position) within currently open segment
+	section []byte      // the rightmost open section (double segment)
+	result  chan []byte // result channel
+	span    []byte      // The span of the data subsumed under the chunk
+}
+
+// node is a reuseable segment hasher representing a node in a BMT.
+type node struct {
+	isLeft      bool      // whether it is left side of the parent double segment
+	parent      *node     // pointer to parent node in the BMT
+	state       int32     // atomic increment impl concurrent boolean toggle
+	left, right []byte    // this is where the two children sections are written
+	hasher      hash.Hash // preconstructed hasher on nodes
+}
+
+// newNode constructs a segment hasher node in the BMT (used by newTree).
+func newNode(index int, parent *node, hasher hash.Hash) *node {
+	return &node{
+		parent: parent,
+		isLeft: index%2 == 0,
+		hasher: hasher,
+	}
+}
+
+// Draw draws the BMT (badly).
+func (t *tree) Draw(hash []byte) string {
+	var left, right []string
+	var anc []*node
+	for i, n := range t.leaves {
+		left = append(left, fmt.Sprintf("%v", hashstr(n.left)))
+		if i%2 == 0 {
+			anc = append(anc, n.parent)
+		}
+		right = append(right, fmt.Sprintf("%v", hashstr(n.right)))
+	}
+	//anc = t.leaves
+	var hashes [][]string
+	for l := 0; len(anc) > 0; l++ {
+		var nodes []*node
+		hash := []string{""}
+		for i, n := range anc {
+			hash = append(hash, fmt.Sprintf("%v|%v", hashstr(n.left), hashstr(n.right)))
+			if i%2 == 0 && n.parent != nil {
+				nodes = append(nodes, n.parent)
+			}
+		}
+		hash = append(hash, "")
+		hashes = append(hashes, hash)
+		anc = nodes
+	}
+	hashes = append(hashes, []string{"", fmt.Sprintf("%v", hashstr(hash)), ""})
+	total := 60
+	del := "                             "
+	var rows []string
+	for i := len(hashes) - 1; i >= 0; i-- {
+		var textlen int
+		hash := hashes[i]
+		for _, s := range hash {
+			textlen += len(s)
+		}
+		if total < textlen {
+			total = textlen + len(hash)
+		}
+		delsize := (total - textlen) / (len(hash) - 1)
+		if delsize > len(del) {
+			delsize = len(del)
+		}
+		row := fmt.Sprintf("%v: %v", len(hashes)-i-1, strings.Join(hash, del[:delsize]))
+		rows = append(rows, row)
+
+	}
+	rows = append(rows, strings.Join(left, "  "), strings.Join(right, "  "))
+	return strings.Join(rows, "\n") + "\n"
+}
+
+// newTree initialises a tree by building up the nodes of a BMT
+//
+// segmentSize is stipulated to be the size of the hash.
+func newTree(segmentSize, depth int, hashfunc func() hash.Hash) *tree {
+	n := newNode(0, nil, hashfunc())
+	prevlevel := []*node{n}
+	// iterate over levels and creates 2^(depth-level) nodes
+	// the 0 level is on double segment sections so we start at depth - 2 since
+	count := 2
+	for level := depth - 2; level >= 0; level-- {
+		nodes := make([]*node, count)
+		for i := 0; i < count; i++ {
+			parent := prevlevel[i/2]
+			var hasher hash.Hash
+			if level == 0 {
+				hasher = hashfunc()
+			}
+			nodes[i] = newNode(i, parent, hasher)
+		}
+		prevlevel = nodes
+		count *= 2
+	}
+	// the datanode level is the nodes on the last level
+	return &tree{
+		leaves:  prevlevel,
+		result:  make(chan []byte),
+		section: make([]byte, 2*segmentSize),
+	}
+}
+
+// Count returns the maximum amount of bytes that will be processed by this hasher implementation.
+func (h *Hasher) Capacity() int {
+	return h.pool.Size
+}
+
+// writeSection is not in use for this implementation.
+func (h *Hasher) WriteSection(idx int, data []byte) error {
+	return errors.New("this hasher only implements sequential writes. please use Write() instead")
+}
+
+// SetSpan sets the span length value prefix in numeric form for the current hash operation.
+func (h *Hasher) SetSpan(length int64) error {
+	span := LengthToSpan(length)
+	h.getTree().span = span
+	return nil
+}
+
+// SetSpanBytes sets the span length value prefix in bytes for the current hash operation.
+func (h *Hasher) SetSpanBytes(b []byte) error {
+	if len(b) != bmt.SpanSize {
+		return errors.New("invalid span size")
+	}
+	span := b
+	h.getTree().span = span
+	return nil
+}
+
+// Size returns the digest size of the hash
+func (h *Hasher) Size() int {
+	return h.pool.SegmentSize
+}
+
+// BlockSize returns the optimal write size to the Hasher
+func (h *Hasher) BlockSize() int {
+	return 2 * h.pool.SegmentSize
+}
+
+// Sum returns the BMT root hash of the buffer
+// using Sum presupposes sequential synchronous writes (io.Writer interface).
+func (h *Hasher) Sum(b []byte) (s []byte) {
+	t := h.getTree()
+	h.mtx.Lock()
+	if h.size == 0 && t.offset == 0 {
+		h.mtx.Unlock()
+		h.releaseTree()
+		//return h.pool.zerohashes[h.pool.Depth]
+		return h.GetZeroHash()
+	}
+	h.mtx.Unlock()
+	// write the last section with final flag set to true
+	go h.writeSection(t.cursor, t.section, true, true)
+	// wait for the result
+	s = <-t.result
+	if t.span == nil {
+		t.span = LengthToSpan(int64(h.size))
+	}
+	span := t.span
+	// release the tree resource back to the pool
+	h.releaseTree()
+	return doSum(h.pool.hasher(), b, span, s)
+}
+
+// Write calls sequentially add to the buffer to be hashed,
+// with every full segment calls writeSection in a go routine.
+//
+// NOTE: This legacy implementation has no error handling for the writer. Use with caution.
+func (h *Hasher) Write(b []byte) (int, error) {
+	c := h.write(b)
+	return c, nil
+}
+
+// write exposes writing to the hasher to internal methods.
+func (h *Hasher) write(b []byte) int {
+	l := len(b)
+	if l == 0 || l > h.pool.Size {
+		return 0
+	}
+	h.mtx.Lock()
+	h.size += len(b)
+	h.mtx.Unlock()
+	t := h.getTree()
+	secsize := 2 * h.pool.SegmentSize
+	// calculate length of missing bit to complete current open section
+	smax := secsize - t.offset
+	// if at the beginning of chunk or middle of the section
+	if t.offset < secsize {
+		// fill up current segment from buffer
+		copy(t.section[t.offset:], b)
+		// if input buffer consumed and open section not complete, then
+		// advance offset and return
+		if smax == 0 {
+			smax = secsize
+		}
+		if l <= smax {
+			t.offset += l
+			return l
+		}
+	} else if t.cursor == h.pool.SegmentCount*2 {
+		// if end of a section
+		return 0
+	}
+	// read full sections and the last possibly partial section from the input buffer
+	for smax < l {
+		// section complete; push to tree asynchronously
+		go h.writeSection(t.cursor, t.section, true, false)
+		// reset section
+		t.section = make([]byte, secsize)
+		// copy from input buffer at smax to right half of section
+		copy(t.section, b[smax:])
+		// advance cursor
+		t.cursor++
+		// smax here represents successive offsets in the input buffer
+		smax += secsize
+	}
+	t.offset = l - smax + secsize
+	return l
+}
+
+// Reset prepares the Hasher for reuse
+func (h *Hasher) Reset() {
+	h.cursor = 0
+	h.size = 0
+	h.releaseTree()
+}
+
+// LengthToSpan creates a binary data span size representation.
+// It is required for calculating the BMT hash.
+func LengthToSpan(length int64) []byte {
+	spanBytes := make([]byte, 8)
+	binary.LittleEndian.PutUint64(spanBytes, uint64(length))
+	return spanBytes
+}
+
+// GetZeroHash returns the zero hash of the full depth of the Hasher instance.
+func (h *Hasher) GetZeroHash() []byte {
+	return h.pool.zerohashes[h.pool.Depth]
+}
+
+// releaseTree gives back the Tree to the pool whereby it unlocks.
+// It resets tree, segment and index.
+func (h *Hasher) releaseTree() {
+	t := h.bmt
+	if t == nil {
+		return
+	}
+	h.bmt = nil
+	go func() {
+		t.cursor = 0
+		t.offset = 0
+		t.span = nil
+		t.section = make([]byte, h.pool.SegmentSize*2)
+		select {
+		case <-t.result:
+		default:
+		}
+		h.pool.release(t)
+	}()
+}
+
+// getTree obtains a BMT resource by reserving one from the pool and assigns it to the bmt field.
+func (h *Hasher) getTree() *tree {
+	if h.bmt != nil {
+		return h.bmt
+	}
+	t := h.pool.reserve()
+	h.bmt = t
+	return t
+}
+
+// atomic bool toggle implementing a concurrent reusable 2-state object.
+// Atomic addint with %2 implements atomic bool toggle.
+// It returns true if the toggler just put it in the active/waiting state.
+func (n *node) toggle() bool {
+	return atomic.AddInt32(&n.state, 1)%2 == 1
+}
+
+// calculates the hash of the data using hash.Hash.
+//
+// BUG: This legacy implementation has no error handling for the writer. Use with caution.
+func doSum(h hash.Hash, b []byte, data ...[]byte) []byte {
+	h.Reset()
+	for _, v := range data {
+		_, _ = h.Write(v)
+	}
+	return h.Sum(b)
+}
+
+// hashstr is a pretty printer for bytes used in tree.draw.
+func hashstr(b []byte) string {
+	end := len(b)
+	if end > 4 {
+		end = 4
+	}
+	return fmt.Sprintf("%x", b[:end])
+}
+
+// calculateDepthFor calculates the depth (number of levels) in the BMT tree.
+func calculateDepthFor(n int) (d int) {
+	c := 2
+	for ; c < n; c *= 2 {
+		d++
+	}
+	return d + 1
+}
+
+// writeSection writes the hash of i-th section into level 1 node of the BMT tree.
+func (h *Hasher) writeSection(i int, section []byte, double, final bool) {
+	// select the leaf node for the section
+	var n *node
+	var isLeft bool
+	var hasher hash.Hash
+	var level int
+	t := h.getTree()
+	if double {
+		level++
+		n = t.leaves[i]
+		hasher = n.hasher
+		isLeft = n.isLeft
+		n = n.parent
+		// hash the section
+		section = doSum(hasher, nil, section)
+	} else {
+		n = t.leaves[i/2]
+		hasher = n.hasher
+		isLeft = i%2 == 0
+	}
+	// write hash into parent node
+	if final {
+		// for the last segment use writeFinalNode
+		h.writeFinalNode(level, n, hasher, isLeft, section)
+	} else {
+		h.writeNode(n, hasher, isLeft, section)
+	}
+}
+
+// writeNode pushes the data to the node.
+// if it is the first of 2 sisters written, the routine terminates.
+// if it is the second, it calculates the hash and writes it
+// to the parent node recursively.
+// since hashing the parent is synchronous the same hasher can be used.
+func (h *Hasher) writeNode(n *node, bh hash.Hash, isLeft bool, s []byte) {
+	level := 1
+	for {
+		// at the root of the bmt just write the result to the result channel
+		if n == nil {
+			h.getTree().result <- s
+			return
+		}
+		// otherwise assign child hash to left or right segment
+		if isLeft {
+			n.left = s
+		} else {
+			n.right = s
+		}
+		// the child-thread first arriving will terminate
+		if n.toggle() {
+			return
+		}
+		// the thread coming second now can be sure both left and right children are written
+		// so it calculates the hash of left|right and pushes it to the parent
+		s = doSum(bh, nil, n.left, n.right)
+		isLeft = n.isLeft
+		n = n.parent
+		level++
+	}
+}
+
+// writeFinalNode is following the path starting from the final datasegment to the
+// BMT root via parents.
+// For unbalanced trees it fills in the missing right sister nodes using
+// the pool's lookup table for BMT subtree root hashes for all-zero sections.
+// Otherwise behaves like `writeNode`.
+func (h *Hasher) writeFinalNode(level int, n *node, bh hash.Hash, isLeft bool, s []byte) {
+
+	for {
+		// at the root of the bmt just write the result to the result channel
+		if n == nil {
+			if s != nil {
+				h.getTree().result <- s
+			}
+			return
+		}
+		var noHash bool
+		if isLeft {
+			// coming from left sister branch
+			// when the final section's path is going via left child node
+			// we include an all-zero subtree hash for the right level and toggle the node.
+			n.right = h.pool.zerohashes[level]
+			if s != nil {
+				n.left = s
+				// if a left final node carries a hash, it must be the first (and only thread)
+				// so the toggle is already in passive state no need no call
+				// yet thread needs to carry on pushing hash to parent
+				noHash = false
+			} else {
+				// if again first thread then propagate nil and calculate no hash
+				noHash = n.toggle()
+			}
+		} else {
+			// right sister branch
+			if s != nil {
+				// if hash was pushed from right child node, write right segment change state
+				n.right = s
+				// if toggle is true, we arrived first so no hashing just push nil to parent
+				noHash = n.toggle()
+
+			} else {
+				// if s is nil, then thread arrived first at previous node and here there will be two,
+				// so no need to do anything and keep s = nil for parent
+				noHash = true
+			}
+		}
+		// the child-thread first arriving will just continue resetting s to nil
+		// the second thread now can be sure both left and right children are written
+		// it calculates the hash of left|right and pushes it to the parent
+		if noHash {
+			s = nil
+		} else {
+			s = doSum(bh, nil, n.left, n.right)
+		}
+		// iterate to parent
+		isLeft = n.isLeft
+		n = n.parent
+		level++
+	}
+}
--- a/pkg/bmt/legacy/bmt_test.go
+++ b/pkg/bmt/legacy/bmt_test.go
+// Copyright 2017 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package legacy
+
+import (
+	"bytes"
+	"encoding/binary"
+	"fmt"
+	"math/rand"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/ethersphere/bee/pkg/bmt"
+	"github.com/ethersphere/bee/pkg/bmt/reference"
+	"gitlab.com/nolash/go-mockbytes"
+	"golang.org/x/crypto/sha3"
+)
+
+// the actual data length generated (could be longer than max datalength of the BMT)
+const BufferSize = 4128
+
+const (
+	// segmentCount is the maximum number of segments of the underlying chunk
+	// Should be equal to max-chunk-data-size / hash-size
+	// Currently set to 128 == 4096 (default chunk size) / 32 (sha3.keccak256 size)
+	SegmentCount = 128
+)
+
+var Counts = []int{1, 2, 3, 4, 5, 8, 9, 15, 16, 17, 32, 37, 42, 53, 63, 64, 65, 111, 127, 128}
+
+var BenchmarkBMTResult []byte
+
+// calculates the Keccak256 SHA3 hash of the data
+func sha3hash(data ...[]byte) []byte {
+	h := sha3.NewLegacyKeccak256()
+	return doSum(h, nil, data...)
+}
+
+// tests if hasher responds with correct hash comparing the reference implementation return value
+func TestHasherEmptyData(t *testing.T) {
+	hasher := sha3.NewLegacyKeccak256
+	var data []byte
+	for _, count := range Counts {
+		t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) {
+			pool := NewTreePool(hasher, count, PoolSize)
+			defer pool.Drain(0)
+			bmt := New(pool)
+			rbmt := reference.NewRefHasher(hasher(), count)
+			expHash, err := rbmt.Hash(data)
+			if err != nil {
+				t.Fatal(err)
+			}
+			resHash, err := syncHash(bmt, 0, data)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if !bytes.Equal(expHash, resHash) {
+				t.Fatalf("hash mismatch with reference. expected %x, got %x", resHash, expHash)
+			}
+		})
+	}
+}
+
+// tests sequential write with entire max size written in one go
+func TestSyncHasherCorrectness(t *testing.T) {
+	g := mockbytes.New(1, mockbytes.MockTypeStandard)
+	data, err := g.RandomBytes(BufferSize)
+	if err != nil {
+		t.Fatal(err)
+	}
+	hasher := sha3.NewLegacyKeccak256
+	size := hasher().Size()
+
+	for _, count := range Counts {
+		t.Run(fmt.Sprintf("segments_%v", count), func(t *testing.T) {
+			max := count * size
+			var incr int
+			capacity := 1
+			pool := NewTreePool(hasher, count, capacity)
+			defer pool.Drain(0)
+			for n := 0; n <= max; n += incr {
+				incr = 1 + rand.Intn(5)
+				bmt := New(pool)
+				err = testHasherCorrectness(bmt, hasher, data, n, count)
+				if err != nil {
+					t.Fatal(err)
+				}
+			}
+		})
+	}
+}
+
+// Tests that the BMT hasher can be synchronously reused with poolsizes 1 and PoolSize
+func TestHasherReuse(t *testing.T) {
+	t.Run(fmt.Sprintf("poolsize_%d", 1), func(t *testing.T) {
+		testHasherReuse(1, t)
+	})
+	t.Run(fmt.Sprintf("poolsize_%d", PoolSize), func(t *testing.T) {
+		testHasherReuse(PoolSize, t)
+	})
+}
+
+// tests if bmt reuse is not corrupting result
+func testHasherReuse(poolsize int, t *testing.T) {
+	hasher := sha3.NewLegacyKeccak256
+	pool := NewTreePool(hasher, SegmentCount, poolsize)
+	defer pool.Drain(0)
+	bmt := New(pool)
+
+	for i := 0; i < 100; i++ {
+
+		g := mockbytes.New(1, mockbytes.MockTypeStandard)
+		data, err := g.RandomBytes(BufferSize)
+		if err != nil {
+			t.Fatal(err)
+		}
+		n := rand.Intn(bmt.Size())
+		err = testHasherCorrectness(bmt, hasher, data, n, SegmentCount)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+// Tests if pool can be cleanly reused even in concurrent use by several hasher
+func TestBMTConcurrentUse(t *testing.T) {
+	hasher := sha3.NewLegacyKeccak256
+	pool := NewTreePool(hasher, SegmentCount, PoolSize)
+	defer pool.Drain(0)
+	cycles := 100
+	errc := make(chan error)
+
+	for i := 0; i < cycles; i++ {
+		go func() {
+			bmt := New(pool)
+			g := mockbytes.New(1, mockbytes.MockTypeStandard)
+			data, _ := g.RandomBytes(BufferSize)
+			n := rand.Intn(bmt.Size())
+			errc <- testHasherCorrectness(bmt, hasher, data, n, 128)
+		}()
+	}
+LOOP:
+	for {
+		select {
+		case <-time.NewTimer(5 * time.Second).C:
+			t.Fatal("timed out")
+		case err := <-errc:
+			if err != nil {
+				t.Fatal(err)
+			}
+			cycles--
+			if cycles == 0 {
+				break LOOP
+			}
+		}
+	}
+}
+
+// Tests BMT Hasher io.Writer interface is working correctly
+// even multiple short random write buffers
+func TestBMTWriterBuffers(t *testing.T) {
+	hasher := sha3.NewLegacyKeccak256
+
+	for _, count := range Counts {
+		t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) {
+			errc := make(chan error)
+			pool := NewTreePool(hasher, count, PoolSize)
+			defer pool.Drain(0)
+			n := count * 32
+			bmt := New(pool)
+
+			g := mockbytes.New(1, mockbytes.MockTypeStandard)
+			data, err := g.RandomBytes(n)
+			if err != nil {
+				t.Fatal(err)
+			}
+			rbmt := reference.NewRefHasher(hasher(), count)
+			refNoMetaHash, err := rbmt.Hash(data)
+			if err != nil {
+				t.Fatal(err)
+			}
+			h := hasher()
+			_, err = h.Write(ZeroSpan)
+			if err != nil {
+				t.Fatal(err)
+			}
+			_, err = h.Write(refNoMetaHash)
+			if err != nil {
+				t.Fatal(err)
+			}
+			refHash := h.Sum(nil)
+			expHash, err := syncHash(bmt, 0, data)
+			if err != nil {
+				t.Fatal(err)
+			}
+			if !bytes.Equal(expHash, refHash) {
+				t.Fatalf("hash mismatch with reference. expected %x, got %x", refHash, expHash)
+			}
+			attempts := 10
+			f := func() error {
+				bmt := New(pool)
+				bmt.Reset()
+				var buflen int
+				for offset := 0; offset < n; offset += buflen {
+					buflen = rand.Intn(n-offset) + 1
+					read, err := bmt.Write(data[offset : offset+buflen])
+					if err != nil {
+						return err
+					}
+					if read != buflen {
+						return fmt.Errorf("incorrect read. expected %v bytes, got %v", buflen, read)
+					}
+				}
+				err := bmt.SetSpan(0)
+				if err != nil {
+					t.Fatal(err)
+				}
+				hash := bmt.Sum(nil)
+				if !bytes.Equal(hash, expHash) {
+					return fmt.Errorf("hash mismatch. expected %x, got %x", hash, expHash)
+				}
+				return nil
+			}
+
+			for j := 0; j < attempts; j++ {
+				go func() {
+					errc <- f()
+				}()
+			}
+			timeout := time.NewTimer(2 * time.Second)
+			for {
+				select {
+				case err := <-errc:
+					if err != nil {
+						t.Fatal(err)
+					}
+					attempts--
+					if attempts == 0 {
+						return
+					}
+				case <-timeout.C:
+					t.Fatalf("timeout")
+				}
+			}
+		})
+	}
+}
+
+// helper function that compares reference and optimised implementations on
+// correctness
+func testHasherCorrectness(bmt *Hasher, hasher BaseHasherFunc, d []byte, n, count int) (err error) {
+	span := make([]byte, 8)
+	if len(d) < n {
+		n = len(d)
+	}
+	binary.LittleEndian.PutUint64(span, uint64(n))
+	data := d[:n]
+	rbmt := reference.NewRefHasher(hasher(), count)
+	var exp []byte
+	if n == 0 {
+		exp = bmt.pool.zerohashes[bmt.pool.Depth]
+	} else {
+		r, err := rbmt.Hash(data)
+		if err != nil {
+			return err
+		}
+		exp = sha3hash(span, r)
+	}
+	got, err := syncHash(bmt, n, data)
+	if err != nil {
+		return err
+	}
+	if !bytes.Equal(got, exp) {
+		return fmt.Errorf("wrong hash: expected %x, got %x", exp, got)
+	}
+	return err
+}
+
+//
+func BenchmarkBMT(t *testing.B) {
+	for size := 4096; size >= 128; size /= 2 {
+		t.Run(fmt.Sprintf("%v_size_%v", "SHA3", size), func(t *testing.B) {
+			benchmarkSHA3(t, size)
+		})
+		t.Run(fmt.Sprintf("%v_size_%v", "Baseline", size), func(t *testing.B) {
+			benchmarkBMTBaseline(t, size)
+		})
+		t.Run(fmt.Sprintf("%v_size_%v", "REF", size), func(t *testing.B) {
+			benchmarkRefHasher(t, size)
+		})
+		t.Run(fmt.Sprintf("%v_size_%v", "BMT", size), func(t *testing.B) {
+			benchmarkBMT(t, size)
+		})
+	}
+}
+
+func BenchmarkPool(t *testing.B) {
+	caps := []int{1, PoolSize}
+	for size := 4096; size >= 128; size /= 2 {
+		for _, c := range caps {
+			t.Run(fmt.Sprintf("poolsize_%v_size_%v", c, size), func(t *testing.B) {
+				benchmarkPool(t, c, size)
+			})
+		}
+	}
+}
+
+// benchmarks simple sha3 hash on chunks
+func benchmarkSHA3(t *testing.B, n int) {
+
+	g := mockbytes.New(1, mockbytes.MockTypeStandard)
+	data, err := g.RandomBytes(n)
+	if err != nil {
+		t.Fatal(err)
+	}
+	hasher := sha3.NewLegacyKeccak256
+	h := hasher()
+
+	t.ReportAllocs()
+	t.ResetTimer()
+	for i := 0; i < t.N; i++ {
+		doSum(h, nil, data)
+	}
+}
+
+// benchmarks the minimum hashing time for a balanced (for simplicity) BMT
+// by doing count/segmentsize parallel hashings of 2*segmentsize bytes
+// doing it on n PoolSize each reusing the base hasher
+// the premise is that this is the minimum computation needed for a BMT
+// therefore this serves as a theoretical optimum for concurrent implementations
+func benchmarkBMTBaseline(t *testing.B, n int) {
+	hasher := sha3.NewLegacyKeccak256
+	hashSize := hasher().Size()
+
+	g := mockbytes.New(1, mockbytes.MockTypeStandard)
+	data, err := g.RandomBytes(hashSize)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	t.ReportAllocs()
+	t.ResetTimer()
+	for i := 0; i < t.N; i++ {
+		count := int32((n-1)/hashSize + 1)
+		wg := sync.WaitGroup{}
+		wg.Add(PoolSize)
+		var i int32
+		for j := 0; j < PoolSize; j++ {
+			go func() {
+				defer wg.Done()
+				h := hasher()
+				for atomic.AddInt32(&i, 1) < count {
+					doSum(h, nil, data)
+				}
+			}()
+		}
+		wg.Wait()
+	}
+}
+
+// benchmarks BMT Hasher
+func benchmarkBMT(t *testing.B, n int) {
+
+	g := mockbytes.New(1, mockbytes.MockTypeStandard)
+	data, err := g.RandomBytes(n)
+	if err != nil {
+		t.Fatal(err)
+	}
+	hasher := sha3.NewLegacyKeccak256
+	pool := NewTreePool(hasher, SegmentCount, PoolSize)
+	bmt := New(pool)
+	var r []byte
+
+	t.ReportAllocs()
+	t.ResetTimer()
+	for i := 0; i < t.N; i++ {
+		r, err = syncHash(bmt, 0, data)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+	BenchmarkBMTResult = r
+}
+
+// benchmarks 100 concurrent bmt hashes with pool capacity
+func benchmarkPool(t *testing.B, poolsize, n int) {
+
+	g := mockbytes.New(1, mockbytes.MockTypeStandard)
+	data, err := g.RandomBytes(n)
+	if err != nil {
+		t.Fatal(err)
+	}
+	hasher := sha3.NewLegacyKeccak256
+	pool := NewTreePool(hasher, SegmentCount, poolsize)
+	cycles := 100
+
+	t.ReportAllocs()
+	t.ResetTimer()
+	wg := sync.WaitGroup{}
+	for i := 0; i < t.N; i++ {
+		wg.Add(cycles)
+		for j := 0; j < cycles; j++ {
+			go func() {
+				defer wg.Done()
+				bmt := New(pool)
+				_, _ = syncHash(bmt, 0, data)
+			}()
+		}
+		wg.Wait()
+	}
+}
+
+// benchmarks the reference hasher
+func benchmarkRefHasher(t *testing.B, n int) {
+
+	g := mockbytes.New(1, mockbytes.MockTypeStandard)
+	data, err := g.RandomBytes(n)
+	if err != nil {
+		t.Fatal(err)
+	}
+	hasher := sha3.NewLegacyKeccak256
+	rbmt := reference.NewRefHasher(hasher(), 128)
+
+	t.ReportAllocs()
+	t.ResetTimer()
+	for i := 0; i < t.N; i++ {
+		_, err := rbmt.Hash(data)
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+}
+
+// Hash hashes the data and the span using the bmt hasher
+func syncHash(h *Hasher, spanLength int, data []byte) ([]byte, error) {
+	h.Reset()
+	err := h.SetSpan(int64(spanLength))
+	if err != nil {
+		return nil, err
+	}
+	_, err = h.Write(data)
+	if err != nil {
+		return nil, err
+	}
+	return h.Sum(nil), nil
+}
+
+// TestUseSyncAsOrdinaryHasher verifies that the bmt.Hasher can be used with the hash.Hash interface
+func TestUseSyncAsOrdinaryHasher(t *testing.T) {
+	hasher := sha3.NewLegacyKeccak256
+	pool := NewTreePool(hasher, SegmentCount, PoolSize)
+	bmt := New(pool)
+	err := bmt.SetSpan(3)
+	if err != nil {
+		t.Fatal(err)
+	}
+	_, err = bmt.Write([]byte("foo"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	res := bmt.Sum(nil)
+	refh := reference.NewRefHasher(hasher(), 128)
+	resh, err := refh.Hash([]byte("foo"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	hsub := hasher()
+	span := LengthToSpan(3)
+	_, err = hsub.Write(span)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	_, err = hsub.Write(resh)
+	if err != nil {
+		t.Fatal(err)
+	}
+	refRes := hsub.Sum(nil)
+	if !bytes.Equal(res, refRes) {
+		t.Fatalf("normalhash; expected %x, got %x", refRes, res)
+	}
+}
+
+func TestConformsToBMTInterface(t *testing.T) {
+	func() bmt.Hash {
+		return (New(nil))
+	}()
+}
--- a/pkg/bmt/legacy/doc.go
+++ b/pkg/bmt/legacy/doc.go
+// Copyright 2018 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+// Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size.
+// It is defined as the root hash of the binary merkle tree built over fixed size segments
+// of the underlying chunk using any base hash function (e.g., keccak 256 SHA3).
+// Chunks with data shorter than the fixed size are hashed as if they had zero padding.
+//
+// BMT hash is used as the chunk hash function in swarm which in turn is the basis for the
+// 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
+//
+// The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
+// segment is a substring of a chunk starting at a particular offset.
+// The size of the underlying segments is fixed to the size of the base hash (called the resolution
+// of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification
+// as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
+//
+// Two implementations are provided:
+//
+// RefHasher is optimized for code simplicity and meant as a reference implementation
+// that is simple to understand
+//
+// Hasher is optimized for speed taking advantage of concurrency with minimalistic
+// control structure to coordinate the concurrent routines
+//
+// BMT Hasher implements the following interfaces:
+//
+// standard golang hash.Hash - synchronous, reusable
+//
+// io.Writer - synchronous left-to-right datawriter
+package legacy
--- a/pkg/bmt/pool/pool.go
+++ b/pkg/bmt/pool/pool.go
+// Copyright 2020 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pool
+
+import (
+	"hash"
+	"sync"
+
+	bmtlegacy "github.com/ethersphere/bee/pkg/bmt/legacy"
+	"golang.org/x/crypto/sha3"
+)
+
+// Pooler pools bmt Hashers.
+// It provides the ability for the number of hashers to grow
+// according to demand, but will shrink once the minimum defined
+// hashers are put back into the pool.
+type Pooler interface {
+	// Get a bmt Hasher instance.
+	// Instances are reset before being returned to the caller.
+	Get() *bmtlegacy.Hasher
+	// Put a bmt Hasher back into the pool
+	Put(*bmtlegacy.Hasher)
+	// Size of the pool.
+	Size() int
+}
+
+type pool struct {
+	p       sync.Pool
+	mtx     sync.Mutex
+	minimum int // minimum number of instances the pool should have
+	size    int // size of the pool (only accounted for when items are put back)
+	rented  int // number of video tapes on rent
+}
+
+// New returns a new HasherPool.
+func New(minPool, branches int) Pooler {
+	return &pool{
+		p: sync.Pool{
+			New: func() interface{} {
+				return bmtlegacy.New(bmtlegacy.NewTreePool(hashFunc, branches, 1)) // one tree per hasher
+			},
+		},
+		minimum: minPool,
+	}
+}
+
+// Get gets a bmt Hasher from the pool.
+func (h *pool) Get() *bmtlegacy.Hasher {
+	h.mtx.Lock()
+	defer h.mtx.Unlock()
+
+	v := h.p.Get().(*bmtlegacy.Hasher)
+	h.rented++
+
+	if h.size > 0 {
+		h.size--
+	}
+
+	return v
+}
+
+// Put puts a Hasher back into the pool.
+// It discards the instance if the minimum number of instances
+// has been reached.
+// The hasher is reset before being put back into the pool.
+func (h *pool) Put(v *bmtlegacy.Hasher) {
+	h.mtx.Lock()
+	defer h.mtx.Unlock()
+
+	h.rented--
+
+	// only put back if we're not exceeding the minimum capacity
+	if h.size+1 > h.minimum {
+		return
+	}
+
+	v.Reset()
+	h.p.Put(v)
+	h.size++
+}
+
+// Size of the pool.
+func (h *pool) Size() int {
+	h.mtx.Lock()
+	defer h.mtx.Unlock()
+	return h.size
+}
+
+func hashFunc() hash.Hash {
+	return sha3.NewLegacyKeccak256()
+}
--- a/pkg/bmt/pool/pool_test.go
+++ b/pkg/bmt/pool/pool_test.go
+// Copyright 2020 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package pool_test
+
+import (
+	"sync"
+	"testing"
+
+	"github.com/ethersphere/bee/pkg/bmt/pool"
+)
+
+const str = "hello world"
+
+func TestHasherPool(t *testing.T) {
+	h := pool.New(3, 128)
+	v := h.Get()
+	_, err := v.Write([]byte(str))
+	if err != nil {
+		t.Fatal(err)
+	}
+	h.Put(v)
+	if s := h.Size(); s != 1 {
+		t.Fatalf("expected size 1 but got %d", s)
+	}
+}
+
+func TestHasherPool_concurrent(t *testing.T) {
+	h := pool.New(3, 128)
+
+	c := make(chan struct{})
+	var wg sync.WaitGroup
+
+	// request 10 copies
+	for i := 0; i < 10; i++ {
+		v := h.Get()
+		_, err := v.Write([]byte(str))
+		if err != nil {
+			t.Fatal(err)
+		}
+		wg.Add(1)
+		go func() {
+			defer wg.Done()
+			<-c
+			h.Put(v)
+		}()
+	}
+
+	// when we get instances from the pool, we dont know
+	// which ones are new and which aren't, so size is
+	// only incremented when items are put back
+	if s := h.Size(); s != 0 {
+		t.Fatalf("expected size 0 but got %d", s)
+	}
+
+	close(c)
+	wg.Wait()
+
+	if s := h.Size(); s != 3 {
+		t.Fatalf("expected size 3 but got %d", s)
+	}
+}
--- a/pkg/bmt/reference/doc.go
+++ b/pkg/bmt/reference/doc.go
+// Copyright 2020 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Package reference is a simple nonconcurrent reference implementation for hashsize segment based
+// Binary Merkle tree hash on arbitrary but fixed maximum chunksize n where 0 <= n <= 4096
+//
+// This implementation does not take advantage of any paralellisms and uses
+// far more memory than necessary, but it is easy to see that it is correct.
+// It can be used for generating test cases for optimized implementations.
+// There is extra check on reference hasher correctness in bmt_test.go
+// * TestRefHasher
+// * testBMTHasherCorrectness function
+package reference
--- a/pkg/bmt/reference/reference.go
+++ b/pkg/bmt/reference/reference.go
+// Copyright 2020 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reference
+
+import (
+	"hash"
+)
+
+// RefHasher is the non-optimized easy-to-read reference implementation of BMT.
+type RefHasher struct {
+	maxDataLength int       // c * hashSize, where c = 2 ^ ceil(log2(count)), where count = ceil(length / hashSize)
+	sectionLength int       // 2 * hashSize
+	hasher        hash.Hash // base hash func (Keccak256 SHA3)
+}
+
+// NewRefHasher returns a new RefHasher.
+func NewRefHasher(h hash.Hash, count int) *RefHasher {
+	hashsize := h.Size()
+	c := 2
+	for ; c < count; c *= 2 {
+	}
+	return &RefHasher{
+		sectionLength: 2 * hashsize,
+		maxDataLength: c * hashsize,
+		hasher:        h,
+	}
+}
+
+// Hash returns the BMT hash of the byte slice.
+func (rh *RefHasher) Hash(data []byte) ([]byte, error) {
+	// if data is shorter than the base length (maxDataLength), we provide padding with zeros
+	d := make([]byte, rh.maxDataLength)
+	length := len(data)
+	if length > rh.maxDataLength {
+		length = rh.maxDataLength
+	}
+	copy(d, data[:length])
+	return rh.hash(d, rh.maxDataLength)
+}
+
+// hash calls itself recursively on both halves of the given slice
+// concatenates the results, and returns the hash of that
+// if the length of d is 2 * segmentSize then just returns the hash of that section
+// data has length maxDataLength = segmentSize * 2^k
+func (rh *RefHasher) hash(data []byte, length int) ([]byte, error) {
+	var section []byte
+	if length == rh.sectionLength {
+		// section contains two data segments (d)
+		section = data
+	} else {
+		// section contains hashes of left and right BMT subtree
+		// to be calculated by calling hash recursively on left and right half of d
+		length /= 2
+		left, err := rh.hash(data[:length], length)
+		if err != nil {
+			return nil, err
+		}
+		right, err := rh.hash(data[length:], length)
+		if err != nil {
+			return nil, err
+		}
+		section = append(left, right...)
+	}
+	rh.hasher.Reset()
+	_, err := rh.hasher.Write(section)
+	if err != nil {
+		return nil, err
+	}
+	return rh.hasher.Sum(nil), nil
+}
--- a/pkg/bmt/reference/reference_test.go
+++ b/pkg/bmt/reference/reference_test.go
+// Copyright 2020 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package reference_test
+
+import (
+	"bytes"
+	"fmt"
+	"hash"
+	"testing"
+
+	"github.com/ethersphere/bee/pkg/bmt/reference"
+	"gitlab.com/nolash/go-mockbytes"
+
+	"golang.org/x/crypto/sha3"
+)
+
+// calculates the hash of the data using hash.Hash
+func doSum(h hash.Hash, b []byte, data ...[]byte) ([]byte, error) {
+	h.Reset()
+	for _, v := range data {
+		var err error
+		_, err = h.Write(v)
+		if err != nil {
+			return nil, err
+		}
+	}
+	return h.Sum(b), nil
+}
+
+// calculates the Keccak256 SHA3 hash of the data
+func sha3hash(t *testing.T, data ...[]byte) []byte {
+	t.Helper()
+	h := sha3.NewLegacyKeccak256()
+	r, err := doSum(h, nil, data...)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return r
+}
+
+// TestRefHasher tests that the RefHasher computes the expected BMT hash for some small data lengths.
+func TestRefHasher(t *testing.T) {
+	// the test struct is used to specify the expected BMT hash for
+	// segment counts between from and to and lengths from 1 to datalength
+	for i, x := range []struct {
+		from     int
+		to       int
+		expected func([]byte) []byte
+	}{
+		{
+			// all lengths in [0,64] should be:
+			//
+			//   sha3hash(data)
+			//
+			from: 1,
+			to:   2,
+			expected: func(d []byte) []byte {
+				data := make([]byte, 64)
+				copy(data, d)
+				return sha3hash(t, data)
+			},
+		}, {
+			// all lengths in [3,4] should be:
+			//
+			//   sha3hash(
+			//     sha3hash(data[:64])
+			//     sha3hash(data[64:])
+			//   )
+			//
+			from: 3,
+			to:   4,
+			expected: func(d []byte) []byte {
+				data := make([]byte, 128)
+				copy(data, d)
+				return sha3hash(t, sha3hash(t, data[:64]), sha3hash(t, data[64:]))
+			},
+		}, {
+			// all bmttestutil.SegmentCounts in [5,8] should be:
+			//
+			//   sha3hash(
+			//     sha3hash(
+			//       sha3hash(data[:64])
+			//       sha3hash(data[64:128])
+			//     )
+			//     sha3hash(
+			//       sha3hash(data[128:192])
+			//       sha3hash(data[192:])
+			//     )
+			//   )
+			//
+			from: 5,
+			to:   8,
+			expected: func(d []byte) []byte {
+				data := make([]byte, 256)
+				copy(data, d)
+				return sha3hash(t, sha3hash(t, sha3hash(t, data[:64]), sha3hash(t, data[64:128])), sha3hash(t, sha3hash(t, data[128:192]), sha3hash(t, data[192:])))
+			},
+		},
+	} {
+		for segCount := x.from; segCount <= x.to; segCount++ {
+			for length := 1; length <= segCount*32; length++ {
+				t.Run(fmt.Sprintf("%d_segments_%d_bytes", segCount, length), func(t *testing.T) {
+					g := mockbytes.New(i, mockbytes.MockTypeStandard)
+					data, err := g.RandomBytes(length)
+					if err != nil {
+						t.Fatal(err)
+					}
+					expected := x.expected(data)
+					actual, err := reference.NewRefHasher(sha3.NewLegacyKeccak256(), segCount).Hash(data)
+					if err != nil {
+						t.Fatal(err)
+					}
+					if !bytes.Equal(actual, expected) {
+						t.Fatalf("expected %x, got %x", expected, actual)
+					}
+				})
+			}
+		}
+	}
+}
--- a/pkg/bmtpool/bmtpool.go
+++ b/pkg/bmtpool/bmtpool.go
@@ -7,9 +7,9 @@
 package bmtpool

 import (
+	bmtlegacy "github.com/ethersphere/bee/pkg/bmt/legacy"
+	"github.com/ethersphere/bee/pkg/bmt/pool"
 	"github.com/ethersphere/bee/pkg/swarm"
-	bmtlegacy "github.com/ethersphere/bmt/legacy"
-	"github.com/ethersphere/bmt/pool"
 )

 var instance pool.Pooler