bmt: simplification and cleanup (#1490)

421f3935 · Viktor Trón · GitHub · a4ca719e · 421f3935 · 421f3935
Commit 421f3935 authored Apr 09, 2021 by Viktor Trón Committed by GitHub Apr 09, 2021
22 changed files
--- a/pkg/bmt/benchmark_test.go
+++ b/pkg/bmt/benchmark_test.go
+// Copyright 2021 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bmt_test
+
+import (
+	"fmt"
+	"testing"
+
+	"github.com/ethersphere/bee/pkg/bmt"
+	"github.com/ethersphere/bee/pkg/bmt/reference"
+	"github.com/ethersphere/bee/pkg/swarm"
+	"golang.org/x/sync/errgroup"
+)
+
+//
+func BenchmarkBMT(t *testing.B) {
+	for size := 4096; size >= 128; size /= 2 {
+		t.Run(fmt.Sprintf("%v_size_%v", "SHA3", size), func(t *testing.B) {
+			benchmarkSHA3(t, size)
+		})
+		t.Run(fmt.Sprintf("%v_size_%v", "Baseline", size), func(t *testing.B) {
+			benchmarkBMTBaseline(t, size)
+		})
+		t.Run(fmt.Sprintf("%v_size_%v", "REF", size), func(t *testing.B) {
+			benchmarkRefHasher(t, size)
+		})
+		t.Run(fmt.Sprintf("%v_size_%v", "BMT", size), func(t *testing.B) {
+			benchmarkBMT(t, size)
+		})
+	}
+}
+
+func BenchmarkPool(t *testing.B) {
+	for _, c := range []int{1, 8, 16, 32, 64} {
+		t.Run(fmt.Sprintf("poolsize_%v", c), func(t *testing.B) {
+			benchmarkPool(t, c)
+		})
+	}
+}
+
+// benchmarks simple sha3 hash on chunks
+func benchmarkSHA3(t *testing.B, n int) {
+	testData := randomBytes(t, seed)
+
+	t.ReportAllocs()
+	t.ResetTimer()
+	for i := 0; i < t.N; i++ {
+		if _, err := bmt.Sha3hash(testData[:n]); err != nil {
+			t.Fatalf("seed %d: %v", seed, err)
+		}
+	}
+}
+
+// benchmarks the minimum hashing time for a balanced (for simplicity) BMT
+// by doing count/segmentsize parallel hashings of 2*segmentsize bytes
+// doing it on n testPoolSize each reusing the base hasher
+// the premise is that this is the minimum computation needed for a BMT
+// therefore this serves as a theoretical optimum for concurrent implementations
+func benchmarkBMTBaseline(t *testing.B, n int) {
+	testData := randomBytes(t, seed)
+
+	t.ReportAllocs()
+	t.ResetTimer()
+	for i := 0; i < t.N; i++ {
+		eg := new(errgroup.Group)
+		for j := 0; j < testSegmentCount; j++ {
+			eg.Go(func() error {
+				_, err := bmt.Sha3hash(testData[:hashSize])
+				return err
+			})
+		}
+		if err := eg.Wait(); err != nil {
+			t.Fatalf("seed %d: %v", seed, err)
+		}
+	}
+}
+
+// benchmarks BMT Hasher
+func benchmarkBMT(t *testing.B, n int) {
+	testData := randomBytes(t, seed)
+
+	pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, testSegmentCount, testPoolSize))
+	h := pool.Get()
+	defer pool.Put(h)
+
+	t.ReportAllocs()
+	t.ResetTimer()
+	for i := 0; i < t.N; i++ {
+		if _, err := syncHash(h, testData[:n]); err != nil {
+			t.Fatalf("seed %d: %v", seed, err)
+		}
+	}
+}
+
+// benchmarks 100 concurrent bmt hashes with pool capacity
+func benchmarkPool(t *testing.B, poolsize int) {
+	testData := randomBytes(t, seed)
+
+	pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, testSegmentCount, poolsize))
+	cycles := 100
+
+	t.ReportAllocs()
+	t.ResetTimer()
+	for i := 0; i < t.N; i++ {
+		eg := new(errgroup.Group)
+		for j := 0; j < cycles; j++ {
+			eg.Go(func() error {
+				h := pool.Get()
+				defer pool.Put(h)
+				_, err := syncHash(h, testData[:h.Capacity()])
+				return err
+			})
+		}
+		if err := eg.Wait(); err != nil {
+			t.Fatalf("seed %d: %v", seed, err)
+		}
+	}
+}
+
+// benchmarks the reference hasher
+func benchmarkRefHasher(t *testing.B, n int) {
+	testData := randomBytes(t, seed)
+
+	rbmt := reference.NewRefHasher(swarm.NewHasher(), 128)
+
+	t.ReportAllocs()
+	t.ResetTimer()
+	for i := 0; i < t.N; i++ {
+		_, err := rbmt.Hash(testData[:n])
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+}
--- a/pkg/bmt/bmt.go
+++ b/pkg/bmt/bmt.go
-// Copyright 2020 The Swarm Authors. All rights reserved.
+// Copyright 2021 The Swarm Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

 package bmt

 import (
+	"encoding/binary"
 	"hash"
+
+	"github.com/ethersphere/bee/pkg/swarm"
 )

-const (
-	SpanSize = 8
+var _ Hash = (*Hasher)(nil)
+
+var (
+	zerospan    = make([]byte, 8)
+	zerosection = make([]byte, 64)
 )

-// Hash provides the necessary extension of the hash interface to add the length-prefix of the BMT hash.
+// Hasher is a reusable hasher for fixed maximum size chunks representing a BMT
+// It reuses a pool of trees for amortised memory allocation and resource control,
+// and supports order-agnostic concurrent segment writes and section (double segment) writes
+// as well as sequential read and write.
+//
+// The same hasher instance must not be called concurrently on more than one chunk.
+//
+// The same hasher instance is synchronously reuseable.
 //
-// Any implementation should make it possible to generate a BMT hash using the hash.Hash interface only.
-// However, the limitation will be that the Span of the BMT hash always must be limited to the amount of bytes actually written.
-type Hash interface {
-	hash.Hash
+// Sum gives back the tree to the pool and guaranteed to leave
+// the tree and itself in a state reusable for hashing a new chunk.
+type Hasher struct {
+	*Conf              // configuration
+	bmt    *tree       // prebuilt BMT resource for flowcontrol and proofs
+	size   int         // bytes written to Hasher since last Reset()
+	pos    int         // index of rightmost currently open segment
+	offset int         // offset (cursor position) within currently open segment
+	result chan []byte // result channel
+	errc   chan error  // error channel
+	span   []byte      // The span of the data subsumed under the chunk
+}
+
+// Capacity returns the maximum amount of bytes that will be processed by this hasher implementation.
+// since BMT assumes a balanced binary tree, capacity it is always a power of 2
+func (h *Hasher) Capacity() int {
+	return h.maxSize
+}
+
+// LengthToSpan creates a binary data span size representation.
+// It is required for calculating the BMT hash.
+func LengthToSpan(length int64) []byte {
+	span := make([]byte, SpanSize)
+	binary.LittleEndian.PutUint64(span, uint64(length))
+	return span
+}
+
+// SetHeaderInt64 sets the metadata preamble to the little endian binary representation of int64 argument for the current hash operation.
+func (h *Hasher) SetHeaderInt64(length int64) {
+	binary.LittleEndian.PutUint64(h.span, uint64(length))
+}
+
+// SetHeader sets the metadata preamble to the span bytes given argument for the current hash operation.
+func (h *Hasher) SetHeader(span []byte) {
+	copy(h.span, span)
+}
+
+// Size returns the digest size of the hash
+func (h *Hasher) Size() int {
+	return h.segmentSize
+}

-	// SetSpan sets the length prefix of BMT hash.
-	SetSpan(int64) error
+// BlockSize returns the optimal write size to the Hasher
+func (h *Hasher) BlockSize() int {
+	return 2 * h.segmentSize
+}
+
+// Hash returns the BMT root hash of the buffer and an error
+// using Hash presupposes sequential synchronous writes (io.Writer interface).
+func (h *Hasher) Hash(b []byte) ([]byte, error) {
+	if h.size == 0 {
+		return sha3hash(h.span, h.zerohashes[h.depth])
+	}
+	copy(h.bmt.buffer[h.size:], zerosection)
+	// write the last section with final flag set to true
+	go h.processSection(h.pos, true)
+	select {
+	case result := <-h.result:
+		return sha3hash(h.span, result)
+	case err := <-h.errc:
+		return nil, err
+	}
+}
+
+// Sum returns the BMT root hash of the buffer, unsafe version of Hash
+func (h *Hasher) Sum(b []byte) []byte {
+	s, _ := h.Hash(b)
+	return s
+}
+
+// Write calls sequentially add to the buffer to be hashed,
+// with every full segment calls processSection in a go routine.
+func (h *Hasher) Write(b []byte) (int, error) {
+	l := len(b)
+	max := h.maxSize - h.size
+	if l > max {
+		l = max
+	}
+	copy(h.bmt.buffer[h.size:], b)
+	secsize := 2 * h.segmentSize
+	from := h.size / secsize
+	h.offset = h.size % secsize
+	h.size += l
+	to := h.size / secsize
+	if l == max {
+		to--
+	}
+	h.pos = to
+	for i := from; i < to; i++ {
+		go h.processSection(i, false)
+	}
+	return l, nil
+}

-	// SetSpanBytes sets the length prefix of BMT hash in byte form.
-	SetSpanBytes([]byte) error
+// Reset prepares the Hasher for reuse
+func (h *Hasher) Reset() {
+	h.pos = 0
+	h.size = 0
+	h.offset = 0
+	copy(h.span, zerospan)
+}

-	// Capacity returns the maximum amount of bytes that will be processed by the implementation.
-	Capacity() int
+// processSection writes the hash of i-th section into level 1 node of the BMT tree.
+func (h *Hasher) processSection(i int, final bool) {
+	secsize := 2 * h.segmentSize
+	offset := i * secsize
+	level := 1
+	// select the leaf node for the section
+	n := h.bmt.leaves[i]
+	isLeft := n.isLeft
+	hasher := n.hasher
+	n = n.parent
+	// hash the section
+	section, err := doHash(hasher, h.bmt.buffer[offset:offset+secsize])
+	if err != nil {
+		select {
+		case h.errc <- err:
+		default:
+		}
+		return
+	}
+	// write hash into parent node
+	if final {
+		// for the last segment use writeFinalNode
+		h.writeFinalNode(level, n, isLeft, section)
+	} else {
+		h.writeNode(n, isLeft, section)
+	}
+}
+
+// writeNode pushes the data to the node.
+// if it is the first of 2 sisters written, the routine terminates.
+// if it is the second, it calculates the hash and writes it
+// to the parent node recursively.
+// since hashing the parent is synchronous the same hasher can be used.
+func (h *Hasher) writeNode(n *node, isLeft bool, s []byte) {
+	var err error
+	level := 1
+	for {
+		// at the root of the bmt just write the result to the result channel
+		if n == nil {
+			h.result <- s
+			return
+		}
+		// otherwise assign child hash to left or right segment
+		if isLeft {
+			n.left = s
+		} else {
+			n.right = s
+		}
+		// the child-thread first arriving will terminate
+		if n.toggle() {
+			return
+		}
+		// the thread coming second now can be sure both left and right children are written
+		// so it calculates the hash of left|right and pushes it to the parent
+		s, err = doHash(n.hasher, n.left, n.right)
+		if err != nil {
+			select {
+			case h.errc <- err:
+			default:
+			}
+			return
+		}
+		isLeft = n.isLeft
+		n = n.parent
+		level++
+	}
+}
+
+// writeFinalNode is following the path starting from the final datasegment to the
+// BMT root via parents.
+// For unbalanced trees it fills in the missing right sister nodes using
+// the pool's lookup table for BMT subtree root hashes for all-zero sections.
+// Otherwise behaves like `writeNode`.
+func (h *Hasher) writeFinalNode(level int, n *node, isLeft bool, s []byte) {
+	var err error
+	for {
+		// at the root of the bmt just write the result to the result channel
+		if n == nil {
+			if s != nil {
+				h.result <- s
+			}
+			return
+		}
+		var noHash bool
+		if isLeft {
+			// coming from left sister branch
+			// when the final section's path is going via left child node
+			// we include an all-zero subtree hash for the right level and toggle the node.
+			n.right = h.zerohashes[level]
+			if s != nil {
+				n.left = s
+				// if a left final node carries a hash, it must be the first (and only thread)
+				// so the toggle is already in passive state no need no call
+				// yet thread needs to carry on pushing hash to parent
+				noHash = false
+			} else {
+				// if again first thread then propagate nil and calculate no hash
+				noHash = n.toggle()
+			}
+		} else {
+			// right sister branch
+			if s != nil {
+				// if hash was pushed from right child node, write right segment change state
+				n.right = s
+				// if toggle is true, we arrived first so no hashing just push nil to parent
+				noHash = n.toggle()
+			} else {
+				// if s is nil, then thread arrived first at previous node and here there will be two,
+				// so no need to do anything and keep s = nil for parent
+				noHash = true
+			}
+		}
+		// the child-thread first arriving will just continue resetting s to nil
+		// the second thread now can be sure both left and right children are written
+		// it calculates the hash of left|right and pushes it to the parent
+		if noHash {
+			s = nil
+		} else {
+			s, err = doHash(n.hasher, n.left, n.right)
+			if err != nil {
+				select {
+				case h.errc <- err:
+				default:
+				}
+				return
+			}
+		}
+		// iterate to parent
+		isLeft = n.isLeft
+		n = n.parent
+		level++
+	}
+}
+
+// calculates the Keccak256 SHA3 hash of the data
+func sha3hash(data ...[]byte) ([]byte, error) {
+	return doHash(swarm.NewHasher(), data...)
+}

-	// WriteSection writes to a specific section of the data to be hashed.
-	WriteSection(idx int, data []byte) error
+// calculates Hash of the data
+func doHash(h hash.Hash, data ...[]byte) ([]byte, error) {
+	h.Reset()
+	for _, v := range data {
+		if _, err := h.Write(v); err != nil {
+			return nil, err
+		}
+	}
+	return h.Sum(nil), nil
 }
--- a/pkg/bmt/bmt_test.go
+++ b/pkg/bmt/bmt_test.go
+// Copyright 2021 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bmt_test
+
+import (
+	"bytes"
+	"context"
+	"fmt"
+	"io"
+	"math/rand"
+	"sort"
+	"testing"
+	"time"
+
+	"github.com/ethersphere/bee/pkg/bmt"
+	"github.com/ethersphere/bee/pkg/bmt/reference"
+	"github.com/ethersphere/bee/pkg/swarm"
+	"golang.org/x/sync/errgroup"
+)
+
+const (
+	// testPoolSize is the number of bmt trees the pool keeps when
+	testPoolSize = 16
+	// segmentCount is the maximum number of segments of the underlying chunk
+	// Should be equal to max-chunk-data-size / hash-size
+	// Currently set to 128 == 4096 (default chunk size) / 32 (sha3.keccak256 size)
+	testSegmentCount = 128
+)
+
+var (
+	testSegmentCounts = []int{1, 2, 3, 4, 5, 8, 9, 15, 16, 17, 32, 37, 42, 53, 63, 64, 65, 111, 127, 128}
+	hashSize          = swarm.NewHasher().Size()
+	seed              = time.Now().Unix()
+)
+
+func refHash(count int, data []byte) ([]byte, error) {
+	rbmt := reference.NewRefHasher(swarm.NewHasher(), count)
+	refNoMetaHash, err := rbmt.Hash(data)
+	if err != nil {
+		return nil, err
+	}
+	return bmt.Sha3hash(bmt.LengthToSpan(int64(len(data))), refNoMetaHash)
+}
+
+// syncHash hashes the data and the span using the bmt hasher
+func syncHash(h *bmt.Hasher, data []byte) ([]byte, error) {
+	h.Reset()
+	h.SetHeaderInt64(int64(len(data)))
+	_, err := h.Write(data)
+	if err != nil {
+		return nil, err
+	}
+	return h.Hash(nil)
+}
+
+// tests if hasher responds with correct hash comparing the reference implementation return value
+func TestHasherEmptyData(t *testing.T) {
+	for _, count := range testSegmentCounts {
+		t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) {
+			expHash, err := refHash(count, nil)
+			if err != nil {
+				t.Fatal(err)
+			}
+			pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, count, 1))
+			h := pool.Get()
+			resHash, err := syncHash(h, nil)
+			if err != nil {
+				t.Fatal(err)
+			}
+			pool.Put(h)
+			if !bytes.Equal(expHash, resHash) {
+				t.Fatalf("hash mismatch with reference. expected %x, got %x", expHash, resHash)
+			}
+		})
+	}
+}
+
+// tests sequential write with entire max size written in one go
+func TestSyncHasherCorrectness(t *testing.T) {
+	testData := randomBytes(t, seed)
+
+	for _, count := range testSegmentCounts {
+		t.Run(fmt.Sprintf("segments_%v", count), func(t *testing.T) {
+			max := count * hashSize
+			var incr int
+			capacity := 1
+			pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, count, capacity))
+			for n := 0; n <= max; n += incr {
+				h := pool.Get()
+				incr = 1 + rand.Intn(5)
+				err := testHasherCorrectness(h, testData, n, count)
+				if err != nil {
+					t.Fatalf("seed %d: %v", seed, err)
+				}
+				pool.Put(h)
+			}
+		})
+	}
+}
+
+// tests that the BMT hasher can be synchronously reused with poolsizes 1 and testPoolSize
+func TestHasherReuse(t *testing.T) {
+	t.Run(fmt.Sprintf("poolsize_%d", 1), func(t *testing.T) {
+		testHasherReuse(t, 1)
+	})
+	t.Run(fmt.Sprintf("poolsize_%d", testPoolSize), func(t *testing.T) {
+		testHasherReuse(t, testPoolSize)
+	})
+}
+
+// tests if bmt reuse is not corrupting result
+func testHasherReuse(t *testing.T, poolsize int) {
+	pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, testSegmentCount, poolsize))
+	h := pool.Get()
+	defer pool.Put(h)
+
+	for i := 0; i < 100; i++ {
+		seed := int64(i)
+		testData := randomBytes(t, seed)
+		n := rand.Intn(h.Capacity())
+		err := testHasherCorrectness(h, testData, n, testSegmentCount)
+		if err != nil {
+			t.Fatalf("seed %d: %v", seed, err)
+		}
+	}
+}
+
+// tests if pool can be cleanly reused even in concurrent use by several hashers
+func TestBMTConcurrentUse(t *testing.T) {
+	testData := randomBytes(t, seed)
+	pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, testSegmentCount, testPoolSize))
+	cycles := 100
+
+	ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+	defer cancel()
+	eg, ectx := errgroup.WithContext(ctx)
+	for i := 0; i < cycles; i++ {
+		eg.Go(func() error {
+			select {
+			case <-ectx.Done():
+				return ectx.Err()
+			default:
+			}
+			h := pool.Get()
+			defer pool.Put(h)
+
+			n := rand.Intn(h.Capacity())
+			return testHasherCorrectness(h, testData, n, testSegmentCount)
+		})
+	}
+	if err := eg.Wait(); err != nil {
+		t.Fatalf("seed %d: %v", seed, err)
+	}
+}
+
+// tests BMT Hasher io.Writer interface is working correctly even with random short writes
+func TestBMTWriterBuffers(t *testing.T) {
+	for i, count := range testSegmentCounts {
+		t.Run(fmt.Sprintf("%d_segments", count), func(t *testing.T) {
+			pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, count, testPoolSize))
+			h := pool.Get()
+			defer pool.Put(h)
+
+			size := h.Capacity()
+			seed := int64(i)
+			testData := randomBytes(t, seed)
+
+			resHash, err := syncHash(h, testData[:size])
+			if err != nil {
+				t.Fatal(err)
+			}
+			expHash, err := refHash(count, testData[:size])
+			if err != nil {
+				t.Fatal(err)
+			}
+			if !bytes.Equal(resHash, expHash) {
+				t.Fatalf("single write :hash mismatch with reference. expected %x, got %x", expHash, resHash)
+			}
+			attempts := 10
+			f := func() error {
+				h := pool.Get()
+				defer pool.Put(h)
+
+				reads := rand.Intn(count*2-1) + 1
+				offsets := make([]int, reads+1)
+				for i := 0; i < reads; i++ {
+					offsets[i] = rand.Intn(size) + 1
+				}
+				offsets[reads] = size
+				from := 0
+				sort.Ints(offsets)
+				for _, to := range offsets {
+					if from < to {
+						read, err := h.Write(testData[from:to])
+						if err != nil {
+							return err
+						}
+						if read != to-from {
+							return fmt.Errorf("incorrect read. expected %v bytes, got %v", to-from, read)
+						}
+						from = to
+					}
+				}
+				h.SetHeaderInt64(int64(size))
+				resHash, err := h.Hash(nil)
+				if err != nil {
+					return err
+				}
+				if !bytes.Equal(resHash, expHash) {
+					return fmt.Errorf("hash mismatch on %v. expected %x, got %x", offsets, expHash, resHash)
+				}
+				return nil
+			}
+			ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
+			defer cancel()
+			eg, ectx := errgroup.WithContext(ctx)
+			for i := 0; i < attempts; i++ {
+				eg.Go(func() error {
+					select {
+					case <-ectx.Done():
+						return ectx.Err()
+					default:
+					}
+					return f()
+				})
+			}
+			if err := eg.Wait(); err != nil {
+				t.Fatalf("seed %d: %v", seed, err)
+			}
+		})
+	}
+}
+
+// helper function that compares reference and optimised implementations for correctness
+func testHasherCorrectness(h *bmt.Hasher, data []byte, n, count int) (err error) {
+	if len(data) < n {
+		n = len(data)
+	}
+	exp, err := refHash(count, data[:n])
+	if err != nil {
+		return err
+	}
+	got, err := syncHash(h, data[:n])
+	if err != nil {
+		return err
+	}
+	if !bytes.Equal(got, exp) {
+		return fmt.Errorf("wrong hash: expected %x, got %x", exp, got)
+	}
+	return nil
+}
+
+// verifies that the bmt.Hasher can be used with the hash.Hash interface
+func TestUseSyncAsOrdinaryHasher(t *testing.T) {
+	pool := bmt.NewPool(bmt.NewConf(swarm.NewHasher, testSegmentCount, testPoolSize))
+	h := pool.Get()
+	defer pool.Put(h)
+	data := []byte("moodbytesmoodbytesmoodbytesmoodbytes")
+	expHash, err := refHash(128, data)
+	if err != nil {
+		t.Fatal(err)
+	}
+	h.SetHeaderInt64(int64(len(data)))
+	_, err = h.Write(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+	resHash := h.Sum(nil)
+	if !bytes.Equal(expHash, resHash) {
+		t.Fatalf("normalhash; expected %x, got %x", expHash, resHash)
+	}
+}
+
+func randomBytes(t testing.TB, seed int64) []byte {
+	t.Helper()
+	data := make([]byte, 4096)
+	s := rand.NewSource(seed)
+	r := rand.New(s)
+	_, err := io.ReadFull(r, data)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return data
+}
--- a/pkg/bmt/cmd/generate-hashes/generate-hashes.go
+++ b/pkg/bmt/cmd/generate-hashes/generate-hashes.go
-// Copyright 2020 The Swarm Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Command generate_legacy generates bmt hashes of sequential byte inputs
-// for every possible length of legacy bmt hasher
-package main
-
-import (
-	"fmt"
-	"io/ioutil"
-	"os"
-	"path/filepath"
-
-	"github.com/ethersphere/bee/pkg/bmt/legacy"
-	"gitlab.com/nolash/go-mockbytes"
-	"golang.org/x/crypto/sha3"
-)
-
-func main() {
-
-	// create output directory, fail if it already exists or error creating
-	if len(os.Args) != 2 {
-		fmt.Fprintf(os.Stderr, "Usage: generate-hashes <output_directory>\n")
-		os.Exit(1)
-	}
-	outputDir, err := filepath.Abs(os.Args[1])
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Invalid input: %s", err)
-		os.Exit(1)
-	}
-	err = os.Mkdir(outputDir, 0750)
-	if err == os.ErrExist {
-		fmt.Fprintf(os.Stderr, "Directory %s already exists\n", outputDir)
-		os.Exit(1)
-	} else if err != nil {
-		fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
-		os.Exit(1)
-	}
-
-	// set up hasher
-	hashPool := legacy.NewTreePool(sha3.NewLegacyKeccak256, 128, legacy.PoolSize)
-	bmtHash := legacy.New(hashPool)
-
-	// create sequence generator and outputs
-	var i int
-	g := mockbytes.New(0, mockbytes.MockTypeStandard).WithModulus(255)
-	for i = 0; i < 4096; i++ {
-		s := fmt.Sprintf("processing %d...", i)
-		fmt.Fprintf(os.Stderr, "%-64s\r", s)
-		filename := fmt.Sprintf("%s/%d.bin", outputDir, i)
-		b, err := g.SequentialBytes(i)
-		if err != nil {
-			fmt.Fprint(os.Stderr, err.Error())
-			os.Exit(1)
-		}
-		bmtHash.Reset()
-		_, err = bmtHash.Write(b)
-		sum := bmtHash.Sum(nil)
-		if err != nil {
-			fmt.Fprint(os.Stderr, err.Error())
-			os.Exit(1)
-		}
-		err = ioutil.WriteFile(filename, sum, 0666)
-		if err != nil {
-			fmt.Fprint(os.Stderr, err.Error())
-			os.Exit(1)
-		}
-		err = ioutil.WriteFile(filename, b, 0666)
-		if err != nil {
-			fmt.Fprint(os.Stderr, err.Error())
-		}
-	}
-
-	// Be kind and give feedback to user
-	dirString := fmt.Sprintf("Done. Data is in %s. Enjoy!", outputDir)
-	fmt.Printf("%-64s\n", dirString)
-}
--- a/pkg/bmt/cmd/main_legacy.go
+++ b/pkg/bmt/cmd/main_legacy.go
-// Copyright 2020 The Swarm Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// Command main_legacy executes the BMT hash algorithm on the given data and writes the binary result to standard output
-//
-// Up to 4096 bytes will be read
-//
-// If a filename is given as argument, it reads data from the file. Otherwise it reads data from standard input.
-package main
-
-import (
-	"fmt"
-	"io"
-	"os"
-
-	"github.com/ethersphere/bee/pkg/bmt/legacy"
-	"golang.org/x/crypto/sha3"
-)
-
-func main() {
-	var data [4096]byte
-	var err error
-	var infile *os.File
-
-	if len(os.Args) > 1 {
-		infile, err = os.Open(os.Args[1])
-		if err != nil {
-			fmt.Fprint(os.Stderr, err.Error())
-			os.Exit(1)
-		}
-	} else {
-		infile = os.Stdin
-	}
-	var c int
-	c, err = infile.Read(data[:])
-
-	// EOF means zero-length input. This is still valid input for BMT
-	if err != nil && err != io.EOF {
-		fmt.Fprint(os.Stderr, err.Error())
-		infile.Close()
-		os.Exit(1)
-	}
-	infile.Close()
-
-	hashPool := legacy.NewTreePool(sha3.NewLegacyKeccak256, 128, legacy.PoolSize)
-	bmtHash := legacy.New(hashPool)
-	_, err = bmtHash.Write(data[:c])
-	if err != nil {
-		fmt.Fprint(os.Stderr, err.Error())
-		os.Exit(1)
-	}
-	binSum := bmtHash.Sum(nil)
-	_, err = os.Stdout.Write(binSum)
-	if err != nil {
-		fmt.Fprint(os.Stderr, err.Error())
-		os.Exit(1)
-	}
-}
--- a/pkg/bmt/doc.go
+++ b/pkg/bmt/doc.go
-// Copyright 2020 The Swarm Authors. All rights reserved.
+// Copyright 2021 The Swarm Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

-// Package bmt defines the interface for the Binary Merkle Tree hash.
+// Package bmt implements Binary Merkle Tree hash.
+// Binary Merkle Tree Hash is a hash function over arbitrary byte slices of limited size.
+// The BMT hash is defined as H(header|bmt-root) where header is an 8-byte metadata prefix and
+// bmt-root is the root hash of the binary merkle tree built over fixed size segments
+// of the underlying chunk using any base hash function H (e.g., keccak 256 SHA3).
+// The segment size is the same as the hash size of H.
+// The number of segments on the base must be a power of 2 so that the resulting tree is balanced.
+// Chunks with data shorter than the fixed size are hashed as if they had zero padding.
+//
+// BMT hash is used as the chunk hash function in swarm which in turn is the basis for the
+// 128 branching swarm hash used to represent files.
+//
+// The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
+// segment is a substring of a chunk starting at a particular offset.
+// The size of the underlying segments is fixed to the size of the base hash (called the resolution
+// of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification
+// as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
+//
+// Two implementations are provided:
+//
+// RefHasher is optimized for code simplicity and meant as a reference implementation
+// that is simple to understand
+//
+// Hasher is optimized for speed taking advantage of concurrency with minimalistic concurrency control.
+//
+// BMT Hasher implements the following interfaces:
+//
+// - standard golang hash.Hash - synchronous, reusable
+//
+// - io.Writer - synchronous left-to-right datawriter.
 package bmt
--- a/pkg/bmt/errors.go
+++ b/pkg/bmt/errors.go
-// Copyright 2020 The Swarm Authors. All rights reserved.
+// Copyright 2021 The Swarm Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
-
 package bmt

-import (
-	"errors"
-)
-
-var ErrOverflow = errors.New("BMT hash capacity exceeded")
+var Sha3hash = sha3hash
--- a/pkg/bmt/interface.go
+++ b/pkg/bmt/interface.go
+// Copyright 2021 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bmt
+
+import (
+	"hash"
+)
+
+const (
+	SpanSize = 8
+)
+
+// Hash provides the necessary extension of the hash interface to add the length-prefix of the BMT hash.
+//
+// Any implementation should make it possible to generate a BMT hash using the hash.Hash interface only.
+// However, the limitation will be that the Span of the BMT hash always must be limited to the amount of bytes actually written.
+type Hash interface {
+	hash.Hash
+
+	// SetHeaderInt64 sets the header bytes of BMT hash to the little endian binary representation of the int64 argument.
+	SetHeaderInt64(int64)
+
+	// SetHeader sets the header bytes of BMT hash by copying the first 8 bytes of the argument.
+	SetHeader([]byte)
+
+	// Hash calculates the BMT hash of the buffer written so far and appends it to the argument
+	Hash([]byte) ([]byte, error)
+
+	// Capacity returns the maximum amount of bytes that will be processed by the implementation.
+	Capacity() int
+}
--- a/pkg/bmt/legacy/bmt.go
+++ b/pkg/bmt/legacy/bmt.go
--- a/pkg/bmt/legacy/bmt_test.go
+++ b/pkg/bmt/legacy/bmt_test.go
--- a/pkg/bmt/legacy/doc.go
+++ b/pkg/bmt/legacy/doc.go
-// Copyright 2018 The go-ethereum Authors
-// This file is part of the go-ethereum library.
-//
-// The go-ethereum library is free software: you can redistribute it and/or modify
-// it under the terms of the GNU Lesser General Public License as published by
-// the Free Software Foundation, either version 3 of the License, or
-// (at your option) any later version.
-//
-// The go-ethereum library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU Lesser General Public License for more details.
-//
-// You should have received a copy of the GNU Lesser General Public License
-// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
-
-// Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size.
-// It is defined as the root hash of the binary merkle tree built over fixed size segments
-// of the underlying chunk using any base hash function (e.g., keccak 256 SHA3).
-// Chunks with data shorter than the fixed size are hashed as if they had zero padding.
-//
-// BMT hash is used as the chunk hash function in swarm which in turn is the basis for the
-// 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
-//
-// The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
-// segment is a substring of a chunk starting at a particular offset.
-// The size of the underlying segments is fixed to the size of the base hash (called the resolution
-// of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification
-// as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
-//
-// Two implementations are provided:
-//
-// RefHasher is optimized for code simplicity and meant as a reference implementation
-// that is simple to understand
-//
-// Hasher is optimized for speed taking advantage of concurrency with minimalistic
-// control structure to coordinate the concurrent routines
-//
-// BMT Hasher implements the following interfaces:
-//
-// standard golang hash.Hash - synchronous, reusable
-//
-// io.Writer - synchronous left-to-right datawriter
-package legacy
--- a/pkg/bmt/pool.go
+++ b/pkg/bmt/pool.go
+// Copyright 2021 The Swarm Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package bmt
+
+import (
+	"hash"
+	"sync/atomic"
+)
+
+// BaseHasherFunc is a hash.Hash constructor function used for the base hash of the BMT.
+// implemented by Keccak256 SHA3 sha3.NewLegacyKeccak256
+type BaseHasherFunc func() hash.Hash
+
+// configuration
+type Conf struct {
+	segmentSize  int            // size of leaf segments, stipulated to be = hash size
+	segmentCount int            // the number of segments on the base level of the BMT
+	capacity     int            // pool capacity, controls concurrency
+	depth        int            // depth of the bmt trees = int(log2(segmentCount))+1
+	maxSize      int            // the total length of the data (count * size)
+	zerohashes   [][]byte       // lookup table for predictable padding subtrees for all levels
+	hasher       BaseHasherFunc // base hasher to use for the BMT levels
+}
+
+// Pool provides a pool of trees used as resources by the BMT Hasher.
+// A tree popped from the pool is guaranteed to have a clean state ready
+// for hashing a new chunk.
+type Pool struct {
+	c     chan *tree // the channel to obtain a resource from the pool
+	*Conf            // configuration
+}
+
+func NewConf(hasher BaseHasherFunc, segmentCount, capacity int) *Conf {
+	count, depth := sizeToParams(segmentCount)
+	segmentSize := hasher().Size()
+	zerohashes := make([][]byte, depth+1)
+	zeros := make([]byte, segmentSize)
+	zerohashes[0] = zeros
+	var err error
+	// initialises the zerohashes lookup table
+	for i := 1; i < depth+1; i++ {
+		if zeros, err = doHash(hasher(), zeros, zeros); err != nil {
+			panic(err.Error())
+		}
+		zerohashes[i] = zeros
+	}
+	return &Conf{
+		hasher:       hasher,
+		segmentSize:  segmentSize,
+		segmentCount: segmentCount,
+		capacity:     capacity,
+		maxSize:      count * segmentSize,
+		depth:        depth,
+		zerohashes:   zerohashes,
+	}
+}
+
+// NewPool creates a tree pool with hasher, segment size, segment count and capacity
+// it reuses free trees or creates a new one if capacity is not reached.
+func NewPool(c *Conf) *Pool {
+	p := &Pool{
+		Conf: c,
+		c:    make(chan *tree, c.capacity),
+	}
+	for i := 0; i < c.capacity; i++ {
+		p.c <- newTree(p.segmentSize, p.maxSize, p.depth, p.hasher)
+	}
+	return p
+}
+
+// Get returns a BMT hasher possibly reusing a tree from the pool
+func (p *Pool) Get() *Hasher {
+	t := <-p.c
+	return &Hasher{
+		Conf:   p.Conf,
+		result: make(chan []byte),
+		errc:   make(chan error, 1),
+		span:   make([]byte, SpanSize),
+		bmt:    t,
+	}
+}
+
+// Put is called after using a bmt hasher to return the tree to a pool for reuse
+func (p *Pool) Put(h *Hasher) {
+	p.c <- h.bmt
+}
+
+// tree is a reusable control structure representing a BMT
+// organised in a binary tree
+//
+// Hasher uses a Pool to obtain a tree for each chunk hash
+// the tree is 'locked' while not in the pool.
+type tree struct {
+	leaves []*node // leaf nodes of the tree, other nodes accessible via parent links
+	buffer []byte
+}
+
+// node is a reuseable segment hasher representing a node in a BMT.
+type node struct {
+	isLeft      bool      // whether it is left side of the parent double segment
+	parent      *node     // pointer to parent node in the BMT
+	state       int32     // atomic increment impl concurrent boolean toggle
+	left, right []byte    // this is where the two children sections are written
+	hasher      hash.Hash // preconstructed hasher on nodes
+}
+
+// newNode constructs a segment hasher node in the BMT (used by newTree).
+func newNode(index int, parent *node, hasher hash.Hash) *node {
+	return &node{
+		parent: parent,
+		isLeft: index%2 == 0,
+		hasher: hasher,
+	}
+}
+
+// newTree initialises a tree by building up the nodes of a BMT
+//
+// segmentSize is stipulated to be the size of the hash.
+func newTree(segmentSize, maxsize, depth int, hashfunc func() hash.Hash) *tree {
+	n := newNode(0, nil, hashfunc())
+	prevlevel := []*node{n}
+	// iterate over levels and creates 2^(depth-level) nodes
+	// the 0 level is on double segment sections so we start at depth - 2
+	count := 2
+	for level := depth - 2; level >= 0; level-- {
+		nodes := make([]*node, count)
+		for i := 0; i < count; i++ {
+			parent := prevlevel[i/2]
+			nodes[i] = newNode(i, parent, hashfunc())
+		}
+		prevlevel = nodes
+		count *= 2
+	}
+	// the datanode level is the nodes on the last level
+	return &tree{
+		leaves: prevlevel,
+		buffer: make([]byte, maxsize),
+	}
+}
+
+// atomic bool toggle implementing a concurrent reusable 2-state object.
+// Atomic addint with %2 implements atomic bool toggle.
+// It returns true if the toggler just put it in the active/waiting state.
+func (n *node) toggle() bool {
+	return atomic.AddInt32(&n.state, 1)%2 == 1
+}
+
+// sizeToParams calculates the depth (number of levels) and segment count in the BMT tree.
+func sizeToParams(n int) (c, d int) {
+	c = 2
+	for ; c < n; c *= 2 {
+		d++
+	}
+	return c, d + 1
+}
--- a/pkg/bmt/pool/pool.go
+++ b/pkg/bmt/pool/pool.go
-// Copyright 2020 The Swarm Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package pool
-
-import (
-	"hash"
-	"sync"
-
-	bmtlegacy "github.com/ethersphere/bee/pkg/bmt/legacy"
-	"golang.org/x/crypto/sha3"
-)
-
-// Pooler pools bmt Hashers.
-// It provides the ability for the number of hashers to grow
-// according to demand, but will shrink once the minimum defined
-// hashers are put back into the pool.
-type Pooler interface {
-	// Get a bmt Hasher instance.
-	// Instances are reset before being returned to the caller.
-	Get() *bmtlegacy.Hasher
-	// Put a bmt Hasher back into the pool
-	Put(*bmtlegacy.Hasher)
-	// Size of the pool.
-	Size() int
-}
-
-type pool struct {
-	p       sync.Pool
-	mtx     sync.Mutex
-	minimum int // minimum number of instances the pool should have
-	size    int // size of the pool (only accounted for when items are put back)
-	rented  int // number of video tapes on rent
-}
-
-// New returns a new HasherPool.
-func New(minPool, branches int) Pooler {
-	return &pool{
-		p: sync.Pool{
-			New: func() interface{} {
-				return bmtlegacy.New(bmtlegacy.NewTreePool(hashFunc, branches, 1)) // one tree per hasher
-			},
-		},
-		minimum: minPool,
-	}
-}
-
-// Get gets a bmt Hasher from the pool.
-func (h *pool) Get() *bmtlegacy.Hasher {
-	h.mtx.Lock()
-	defer h.mtx.Unlock()
-
-	v := h.p.Get().(*bmtlegacy.Hasher)
-	h.rented++
-
-	if h.size > 0 {
-		h.size--
-	}
-
-	return v
-}
-
-// Put puts a Hasher back into the pool.
-// It discards the instance if the minimum number of instances
-// has been reached.
-// The hasher is reset before being put back into the pool.
-func (h *pool) Put(v *bmtlegacy.Hasher) {
-	h.mtx.Lock()
-	defer h.mtx.Unlock()
-
-	h.rented--
-
-	// only put back if we're not exceeding the minimum capacity
-	if h.size+1 > h.minimum {
-		return
-	}
-
-	v.Reset()
-	h.p.Put(v)
-	h.size++
-}
-
-// Size of the pool.
-func (h *pool) Size() int {
-	h.mtx.Lock()
-	defer h.mtx.Unlock()
-	return h.size
-}
-
-func hashFunc() hash.Hash {
-	return sha3.NewLegacyKeccak256()
-}
--- a/pkg/bmt/pool/pool_test.go
+++ b/pkg/bmt/pool/pool_test.go
-// Copyright 2020 The Swarm Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-package pool_test
-
-import (
-	"sync"
-	"testing"
-
-	"github.com/ethersphere/bee/pkg/bmt/pool"
-)
-
-const str = "hello world"
-
-func TestHasherPool(t *testing.T) {
-	h := pool.New(3, 128)
-	v := h.Get()
-	_, err := v.Write([]byte(str))
-	if err != nil {
-		t.Fatal(err)
-	}
-	h.Put(v)
-	if s := h.Size(); s != 1 {
-		t.Fatalf("expected size 1 but got %d", s)
-	}
-}
-
-func TestHasherPool_concurrent(t *testing.T) {
-	h := pool.New(3, 128)
-
-	c := make(chan struct{})
-	var wg sync.WaitGroup
-
-	// request 10 copies
-	for i := 0; i < 10; i++ {
-		v := h.Get()
-		_, err := v.Write([]byte(str))
-		if err != nil {
-			t.Fatal(err)
-		}
-		wg.Add(1)
-		go func() {
-			defer wg.Done()
-			<-c
-			h.Put(v)
-		}()
-	}
-
-	// when we get instances from the pool, we dont know
-	// which ones are new and which aren't, so size is
-	// only incremented when items are put back
-	if s := h.Size(); s != 0 {
-		t.Fatalf("expected size 0 but got %d", s)
-	}
-
-	close(c)
-	wg.Wait()
-
-	if s := h.Size(); s != 3 {
-		t.Fatalf("expected size 3 but got %d", s)
-	}
-}
--- a/pkg/bmt/reference/reference.go
+++ b/pkg/bmt/reference/reference.go
-// Copyright 2020 The Swarm Authors. All rights reserved.
+// Copyright 2021 The Swarm Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

+// Package reference is a simple nonconcurrent reference implementation of the BMT hash
+//
+// This implementation does not take advantage of any paralellisms and uses
+// far more memory than necessary, but it is easy to see that it is correct.
+// It can be used for generating test cases for optimized implementations.
+// There is extra check on reference hasher correctness in reference_test.go
 package reference

 import (

--- a/pkg/bmt/reference/reference_test.go
+++ b/pkg/bmt/reference/reference_test.go
-// Copyright 2020 The Swarm Authors. All rights reserved.
+// Copyright 2021 The Swarm Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.

@@ -6,12 +6,13 @@ package reference_test

 import (
 	"bytes"
+	crand "crypto/rand"
 	"fmt"
 	"hash"
+	"io"
 	"testing"

 	"github.com/ethersphere/bee/pkg/bmt/reference"
-	"gitlab.com/nolash/go-mockbytes"

 	"golang.org/x/crypto/sha3"
 )
@@ -44,16 +45,16 @@ func sha3hash(t *testing.T, data ...[]byte) []byte {
 func TestRefHasher(t *testing.T) {
 	// the test struct is used to specify the expected BMT hash for
 	// segment counts between from and to and lengths from 1 to datalength
-	for i, x := range []struct {
+	for _, x := range []struct {
 		from     int
 		to       int
 		expected func([]byte) []byte
 	}{
+		// all lengths in [0,64] should be:
+		//
+		//   sha3hash(data)
+		//
 		{
-			// all lengths in [0,64] should be:
-			//
-			//   sha3hash(data)
-			//
 			from: 1,
 			to:   2,
 			expected: func(d []byte) []byte {
@@ -61,14 +62,15 @@ func TestRefHasher(t *testing.T) {
 				copy(data, d)
 				return sha3hash(t, data)
 			},
-		}, {
-			// all lengths in [3,4] should be:
-			//
-			//   sha3hash(
-			//     sha3hash(data[:64])
-			//     sha3hash(data[64:])
-			//   )
-			//
+		},
+		// all lengths in [3,4] should be:
+		//
+		//   sha3hash(
+		//     sha3hash(data[:64])
+		//     sha3hash(data[64:])
+		//   )
+		//
+		{
 			from: 3,
 			to:   4,
 			expected: func(d []byte) []byte {
@@ -76,20 +78,21 @@ func TestRefHasher(t *testing.T) {
 				copy(data, d)
 				return sha3hash(t, sha3hash(t, data[:64]), sha3hash(t, data[64:]))
 			},
-		}, {
-			// all bmttestutil.SegmentCounts in [5,8] should be:
-			//
-			//   sha3hash(
-			//     sha3hash(
-			//       sha3hash(data[:64])
-			//       sha3hash(data[64:128])
-			//     )
-			//     sha3hash(
-			//       sha3hash(data[128:192])
-			//       sha3hash(data[192:])
-			//     )
-			//   )
-			//
+		},
+		// all bmttestutil.SegmentCounts in [5,8] should be:
+		//
+		//   sha3hash(
+		//     sha3hash(
+		//       sha3hash(data[:64])
+		//       sha3hash(data[64:128])
+		//     )
+		//     sha3hash(
+		//       sha3hash(data[128:192])
+		//       sha3hash(data[192:])
+		//     )
+		//   )
+		//
+		{
 			from: 5,
 			to:   8,
 			expected: func(d []byte) []byte {
@@ -102,8 +105,8 @@ func TestRefHasher(t *testing.T) {
 		for segCount := x.from; segCount <= x.to; segCount++ {
 			for length := 1; length <= segCount*32; length++ {
 				t.Run(fmt.Sprintf("%d_segments_%d_bytes", segCount, length), func(t *testing.T) {
-					g := mockbytes.New(i, mockbytes.MockTypeStandard)
-					data, err := g.RandomBytes(length)
+					data := make([]byte, length)
+					_, err := io.ReadFull(crand.Reader, data)
 					if err != nil {
 						t.Fatal(err)
 					}

--- a/pkg/bmtpool/bmtpool.go
+++ b/pkg/bmtpool/bmtpool.go
@@ -7,24 +7,25 @@
 package bmtpool

 import (
-	bmtlegacy "github.com/ethersphere/bee/pkg/bmt/legacy"
-	"github.com/ethersphere/bee/pkg/bmt/pool"
+	"github.com/ethersphere/bee/pkg/bmt"
 	"github.com/ethersphere/bee/pkg/swarm"
 )

-var instance pool.Pooler
+const Capacity = 32
+
+var instance *bmt.Pool

 func init() {
-	instance = pool.New(8, swarm.BmtBranches)
+	instance = bmt.NewPool(bmt.NewConf(swarm.NewHasher, swarm.BmtBranches, Capacity))
 }

 // Get a bmt Hasher instance.
 // Instances are reset before being returned to the caller.
-func Get() *bmtlegacy.Hasher {
+func Get() *bmt.Hasher {
 	return instance.Get()
 }

 // Put a bmt Hasher back into the pool
-func Put(h *bmtlegacy.Hasher) {
+func Put(h *bmt.Hasher) {
 	instance.Put(h)
 }
--- a/pkg/cac/cac.go
+++ b/pkg/cac/cac.go
@@ -63,13 +63,11 @@ func hasher(data []byte) func([]byte) ([]byte, error) {
 		hasher := bmtpool.Get()
 		defer bmtpool.Put(hasher)

-		if err := hasher.SetSpanBytes(span); err != nil {
-			return nil, err
-		}
+		hasher.SetHeader(span)
 		if _, err := hasher.Write(data); err != nil {
 			return nil, err
 		}
-		return hasher.Sum(nil), nil
+		return hasher.Hash(nil)
 	}
 }


--- a/pkg/file/pipeline/bmt/bmt.go
+++ b/pkg/file/pipeline/bmt/bmt.go
@@ -35,18 +35,17 @@ func (w *bmtWriter) ChainWrite(p *pipeline.PipeWriteArgs) error {
 		return errInvalidData
 	}
 	hasher := bmtpool.Get()
-	err := hasher.SetSpanBytes(p.Data[:swarm.SpanSize])
+	hasher.SetHeader(p.Data[:swarm.SpanSize])
+	_, err := hasher.Write(p.Data[swarm.SpanSize:])
 	if err != nil {
 		bmtpool.Put(hasher)
 		return err
 	}
-	_, err = hasher.Write(p.Data[swarm.SpanSize:])
+	p.Ref, err = hasher.Hash(nil)
+	bmtpool.Put(hasher)
 	if err != nil {
-		bmtpool.Put(hasher)
 		return err
 	}
-	p.Ref = hasher.Sum(nil)
-	bmtpool.Put(hasher)

 	return w.next.ChainWrite(p)
 }

--- a/pkg/file/pipeline/bmt/bmt_test.go
+++ b/pkg/file/pipeline/bmt/bmt_test.go
@@ -14,7 +14,6 @@ import (
 	"github.com/ethersphere/bee/pkg/file/pipeline"
 	"github.com/ethersphere/bee/pkg/file/pipeline/bmt"
 	mock "github.com/ethersphere/bee/pkg/file/pipeline/mock"
-	"github.com/ethersphere/bee/pkg/swarm"
 )

 // TestStoreWriter tests that store writer stores the provided data and calls the next chain writer.
@@ -29,9 +28,9 @@ func TestBmtWriter(t *testing.T) {
 		{
 			// this is a special case, since semantically it can be considered the hash
 			// of an empty file (since data is all zeros).
-			name:    "all zeros",
-			data:    make([]byte, swarm.ChunkSize),
-			expHash: mustDecodeString(t, "09ae927d0f3aaa37324df178928d3826820f3dd3388ce4aaebfc3af410bde23a"),
+			name:    "empty file",
+			data:    make([]byte, 0),
+			expHash: mustDecodeString(t, "b34ca8c22b9e982354f9c7f50b470d66db428d880c8a904d5fe4ec9713171526"),
 		},
 		{
 			name:    "hello world",
@@ -68,7 +67,7 @@ func TestBmtWriter(t *testing.T) {
 				t.Fatal(err)
 			}
 			if !bytes.Equal(tc.expHash, args.Ref) {
-				t.Fatalf("ref mismatch. got %v want %v", args.Ref, tc.expHash)
+				t.Fatalf("ref mismatch. got %x want %x", args.Ref, tc.expHash)
 			}

 			if calls := mockChainWriter.ChainWriteCalls(); calls != 1 {

--- a/pkg/file/pipeline/builder/builder_test.go
+++ b/pkg/file/pipeline/builder/builder_test.go
@@ -71,7 +71,7 @@ func TestEmpty(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	exp := swarm.MustParseHexAddress("ffd70157e48063fc33c97a050f7f640233bf646cc98d9524c6b92bcf3ab56f83")
+	exp := swarm.MustParseHexAddress("b34ca8c22b9e982354f9c7f50b470d66db428d880c8a904d5fe4ec9713171526")
 	if !bytes.Equal(exp.Bytes(), sum) {
 		t.Fatalf("expected %s got %s", exp.String(), hex.EncodeToString(sum))
 	}

--- a/pkg/pss/trojan.go
+++ b/pkg/pss/trojan.go
@@ -180,13 +180,11 @@ func hasher(span, b []byte) func([]byte) ([]byte, error) {
 		s := append(nonce, b...)
 		hasher := bmtpool.Get()
 		defer bmtpool.Put(hasher)
-		if err := hasher.SetSpanBytes(span); err != nil {
-			return nil, err
-		}
+		hasher.SetHeader(span)
 		if _, err := hasher.Write(s); err != nil {
 			return nil, err
 		}
-		return hasher.Sum(nil), nil
+		return hasher.Hash(nil)
 	}
 }