Commit 4254a3e3 authored by acud's avatar acud Committed by GitHub

bmt: import bmt repo (#1520)

parent 1b83c2e5
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bmt
import (
"hash"
)
const (
SpanSize = 8
)
// Hash provides the necessary extension of the hash interface to add the length-prefix of the BMT hash.
//
// Any implementation should make it possible to generate a BMT hash using the hash.Hash interface only.
// However, the limitation will be that the Span of the BMT hash always must be limited to the amount of bytes actually written.
type Hash interface {
hash.Hash
// SetSpan sets the length prefix of BMT hash.
SetSpan(int64) error
// SetSpanBytes sets the length prefix of BMT hash in byte form.
SetSpanBytes([]byte) error
// Capacity returns the maximum amount of bytes that will be processed by the implementation.
Capacity() int
// WriteSection writes to a specific section of the data to be hashed.
WriteSection(idx int, data []byte) error
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Command generate_legacy generates bmt hashes of sequential byte inputs
// for every possible length of legacy bmt hasher
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"github.com/ethersphere/bee/pkg/bmt/legacy"
"gitlab.com/nolash/go-mockbytes"
"golang.org/x/crypto/sha3"
)
func main() {
// create output directory, fail if it already exists or error creating
if len(os.Args) != 2 {
fmt.Fprintf(os.Stderr, "Usage: generate-hashes <output_directory>\n")
os.Exit(1)
}
outputDir, err := filepath.Abs(os.Args[1])
if err != nil {
fmt.Fprintf(os.Stderr, "Invalid input: %s", err)
os.Exit(1)
}
err = os.Mkdir(outputDir, 0750)
if err == os.ErrExist {
fmt.Fprintf(os.Stderr, "Directory %s already exists\n", outputDir)
os.Exit(1)
} else if err != nil {
fmt.Fprintf(os.Stderr, "Error creating output directory: %v\n", err)
os.Exit(1)
}
// set up hasher
hashPool := legacy.NewTreePool(sha3.NewLegacyKeccak256, 128, legacy.PoolSize)
bmtHash := legacy.New(hashPool)
// create sequence generator and outputs
var i int
g := mockbytes.New(0, mockbytes.MockTypeStandard).WithModulus(255)
for i = 0; i < 4096; i++ {
s := fmt.Sprintf("processing %d...", i)
fmt.Fprintf(os.Stderr, "%-64s\r", s)
filename := fmt.Sprintf("%s/%d.bin", outputDir, i)
b, err := g.SequentialBytes(i)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
bmtHash.Reset()
_, err = bmtHash.Write(b)
sum := bmtHash.Sum(nil)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
err = ioutil.WriteFile(filename, sum, 0666)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
err = ioutil.WriteFile(filename, b, 0666)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
}
}
// Be kind and give feedback to user
dirString := fmt.Sprintf("Done. Data is in %s. Enjoy!", outputDir)
fmt.Printf("%-64s\n", dirString)
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Command main_legacy executes the BMT hash algorithm on the given data and writes the binary result to standard output
//
// Up to 4096 bytes will be read
//
// If a filename is given as argument, it reads data from the file. Otherwise it reads data from standard input.
package main
import (
"fmt"
"io"
"os"
"github.com/ethersphere/bee/pkg/bmt/legacy"
"golang.org/x/crypto/sha3"
)
func main() {
var data [4096]byte
var err error
var infile *os.File
if len(os.Args) > 1 {
infile, err = os.Open(os.Args[1])
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
} else {
infile = os.Stdin
}
var c int
c, err = infile.Read(data[:])
// EOF means zero-length input. This is still valid input for BMT
if err != nil && err != io.EOF {
fmt.Fprint(os.Stderr, err.Error())
infile.Close()
os.Exit(1)
}
infile.Close()
hashPool := legacy.NewTreePool(sha3.NewLegacyKeccak256, 128, legacy.PoolSize)
bmtHash := legacy.New(hashPool)
_, err = bmtHash.Write(data[:c])
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
binSum := bmtHash.Sum(nil)
_, err = os.Stdout.Write(binSum)
if err != nil {
fmt.Fprint(os.Stderr, err.Error())
os.Exit(1)
}
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package bmt defines the interface for the Binary Merkle Tree hash.
package bmt
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package bmt
import (
"errors"
)
var ErrOverflow = errors.New("BMT hash capacity exceeded")
This diff is collapsed.
This diff is collapsed.
// Copyright 2018 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// Binary Merkle Tree Hash is a hash function over arbitrary datachunks of limited size.
// It is defined as the root hash of the binary merkle tree built over fixed size segments
// of the underlying chunk using any base hash function (e.g., keccak 256 SHA3).
// Chunks with data shorter than the fixed size are hashed as if they had zero padding.
//
// BMT hash is used as the chunk hash function in swarm which in turn is the basis for the
// 128 branching swarm hash http://swarm-guide.readthedocs.io/en/latest/architecture.html#swarm-hash
//
// The BMT is optimal for providing compact inclusion proofs, i.e. prove that a
// segment is a substring of a chunk starting at a particular offset.
// The size of the underlying segments is fixed to the size of the base hash (called the resolution
// of the BMT hash), Using Keccak256 SHA3 hash is 32 bytes, the EVM word size to optimize for on-chain BMT verification
// as well as the hash size optimal for inclusion proofs in the merkle tree of the swarm hash.
//
// Two implementations are provided:
//
// RefHasher is optimized for code simplicity and meant as a reference implementation
// that is simple to understand
//
// Hasher is optimized for speed taking advantage of concurrency with minimalistic
// control structure to coordinate the concurrent routines
//
// BMT Hasher implements the following interfaces:
//
// standard golang hash.Hash - synchronous, reusable
//
// io.Writer - synchronous left-to-right datawriter
package legacy
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pool
import (
"hash"
"sync"
bmtlegacy "github.com/ethersphere/bee/pkg/bmt/legacy"
"golang.org/x/crypto/sha3"
)
// Pooler pools bmt Hashers.
// It provides the ability for the number of hashers to grow
// according to demand, but will shrink once the minimum defined
// hashers are put back into the pool.
type Pooler interface {
// Get a bmt Hasher instance.
// Instances are reset before being returned to the caller.
Get() *bmtlegacy.Hasher
// Put a bmt Hasher back into the pool
Put(*bmtlegacy.Hasher)
// Size of the pool.
Size() int
}
type pool struct {
p sync.Pool
mtx sync.Mutex
minimum int // minimum number of instances the pool should have
size int // size of the pool (only accounted for when items are put back)
rented int // number of video tapes on rent
}
// New returns a new HasherPool.
func New(minPool, branches int) Pooler {
return &pool{
p: sync.Pool{
New: func() interface{} {
return bmtlegacy.New(bmtlegacy.NewTreePool(hashFunc, branches, 1)) // one tree per hasher
},
},
minimum: minPool,
}
}
// Get gets a bmt Hasher from the pool.
func (h *pool) Get() *bmtlegacy.Hasher {
h.mtx.Lock()
defer h.mtx.Unlock()
v := h.p.Get().(*bmtlegacy.Hasher)
h.rented++
if h.size > 0 {
h.size--
}
return v
}
// Put puts a Hasher back into the pool.
// It discards the instance if the minimum number of instances
// has been reached.
// The hasher is reset before being put back into the pool.
func (h *pool) Put(v *bmtlegacy.Hasher) {
h.mtx.Lock()
defer h.mtx.Unlock()
h.rented--
// only put back if we're not exceeding the minimum capacity
if h.size+1 > h.minimum {
return
}
v.Reset()
h.p.Put(v)
h.size++
}
// Size of the pool.
func (h *pool) Size() int {
h.mtx.Lock()
defer h.mtx.Unlock()
return h.size
}
func hashFunc() hash.Hash {
return sha3.NewLegacyKeccak256()
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package pool_test
import (
"sync"
"testing"
"github.com/ethersphere/bee/pkg/bmt/pool"
)
const str = "hello world"
func TestHasherPool(t *testing.T) {
h := pool.New(3, 128)
v := h.Get()
_, err := v.Write([]byte(str))
if err != nil {
t.Fatal(err)
}
h.Put(v)
if s := h.Size(); s != 1 {
t.Fatalf("expected size 1 but got %d", s)
}
}
func TestHasherPool_concurrent(t *testing.T) {
h := pool.New(3, 128)
c := make(chan struct{})
var wg sync.WaitGroup
// request 10 copies
for i := 0; i < 10; i++ {
v := h.Get()
_, err := v.Write([]byte(str))
if err != nil {
t.Fatal(err)
}
wg.Add(1)
go func() {
defer wg.Done()
<-c
h.Put(v)
}()
}
// when we get instances from the pool, we dont know
// which ones are new and which aren't, so size is
// only incremented when items are put back
if s := h.Size(); s != 0 {
t.Fatalf("expected size 0 but got %d", s)
}
close(c)
wg.Wait()
if s := h.Size(); s != 3 {
t.Fatalf("expected size 3 but got %d", s)
}
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package reference is a simple nonconcurrent reference implementation for hashsize segment based
// Binary Merkle tree hash on arbitrary but fixed maximum chunksize n where 0 <= n <= 4096
//
// This implementation does not take advantage of any paralellisms and uses
// far more memory than necessary, but it is easy to see that it is correct.
// It can be used for generating test cases for optimized implementations.
// There is extra check on reference hasher correctness in bmt_test.go
// * TestRefHasher
// * testBMTHasherCorrectness function
package reference
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package reference
import (
"hash"
)
// RefHasher is the non-optimized easy-to-read reference implementation of BMT.
type RefHasher struct {
maxDataLength int // c * hashSize, where c = 2 ^ ceil(log2(count)), where count = ceil(length / hashSize)
sectionLength int // 2 * hashSize
hasher hash.Hash // base hash func (Keccak256 SHA3)
}
// NewRefHasher returns a new RefHasher.
func NewRefHasher(h hash.Hash, count int) *RefHasher {
hashsize := h.Size()
c := 2
for ; c < count; c *= 2 {
}
return &RefHasher{
sectionLength: 2 * hashsize,
maxDataLength: c * hashsize,
hasher: h,
}
}
// Hash returns the BMT hash of the byte slice.
func (rh *RefHasher) Hash(data []byte) ([]byte, error) {
// if data is shorter than the base length (maxDataLength), we provide padding with zeros
d := make([]byte, rh.maxDataLength)
length := len(data)
if length > rh.maxDataLength {
length = rh.maxDataLength
}
copy(d, data[:length])
return rh.hash(d, rh.maxDataLength)
}
// hash calls itself recursively on both halves of the given slice
// concatenates the results, and returns the hash of that
// if the length of d is 2 * segmentSize then just returns the hash of that section
// data has length maxDataLength = segmentSize * 2^k
func (rh *RefHasher) hash(data []byte, length int) ([]byte, error) {
var section []byte
if length == rh.sectionLength {
// section contains two data segments (d)
section = data
} else {
// section contains hashes of left and right BMT subtree
// to be calculated by calling hash recursively on left and right half of d
length /= 2
left, err := rh.hash(data[:length], length)
if err != nil {
return nil, err
}
right, err := rh.hash(data[length:], length)
if err != nil {
return nil, err
}
section = append(left, right...)
}
rh.hasher.Reset()
_, err := rh.hasher.Write(section)
if err != nil {
return nil, err
}
return rh.hasher.Sum(nil), nil
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package reference_test
import (
"bytes"
"fmt"
"hash"
"testing"
"github.com/ethersphere/bee/pkg/bmt/reference"
"gitlab.com/nolash/go-mockbytes"
"golang.org/x/crypto/sha3"
)
// calculates the hash of the data using hash.Hash
func doSum(h hash.Hash, b []byte, data ...[]byte) ([]byte, error) {
h.Reset()
for _, v := range data {
var err error
_, err = h.Write(v)
if err != nil {
return nil, err
}
}
return h.Sum(b), nil
}
// calculates the Keccak256 SHA3 hash of the data
func sha3hash(t *testing.T, data ...[]byte) []byte {
t.Helper()
h := sha3.NewLegacyKeccak256()
r, err := doSum(h, nil, data...)
if err != nil {
t.Fatal(err)
}
return r
}
// TestRefHasher tests that the RefHasher computes the expected BMT hash for some small data lengths.
func TestRefHasher(t *testing.T) {
// the test struct is used to specify the expected BMT hash for
// segment counts between from and to and lengths from 1 to datalength
for i, x := range []struct {
from int
to int
expected func([]byte) []byte
}{
{
// all lengths in [0,64] should be:
//
// sha3hash(data)
//
from: 1,
to: 2,
expected: func(d []byte) []byte {
data := make([]byte, 64)
copy(data, d)
return sha3hash(t, data)
},
}, {
// all lengths in [3,4] should be:
//
// sha3hash(
// sha3hash(data[:64])
// sha3hash(data[64:])
// )
//
from: 3,
to: 4,
expected: func(d []byte) []byte {
data := make([]byte, 128)
copy(data, d)
return sha3hash(t, sha3hash(t, data[:64]), sha3hash(t, data[64:]))
},
}, {
// all bmttestutil.SegmentCounts in [5,8] should be:
//
// sha3hash(
// sha3hash(
// sha3hash(data[:64])
// sha3hash(data[64:128])
// )
// sha3hash(
// sha3hash(data[128:192])
// sha3hash(data[192:])
// )
// )
//
from: 5,
to: 8,
expected: func(d []byte) []byte {
data := make([]byte, 256)
copy(data, d)
return sha3hash(t, sha3hash(t, sha3hash(t, data[:64]), sha3hash(t, data[64:128])), sha3hash(t, sha3hash(t, data[128:192]), sha3hash(t, data[192:])))
},
},
} {
for segCount := x.from; segCount <= x.to; segCount++ {
for length := 1; length <= segCount*32; length++ {
t.Run(fmt.Sprintf("%d_segments_%d_bytes", segCount, length), func(t *testing.T) {
g := mockbytes.New(i, mockbytes.MockTypeStandard)
data, err := g.RandomBytes(length)
if err != nil {
t.Fatal(err)
}
expected := x.expected(data)
actual, err := reference.NewRefHasher(sha3.NewLegacyKeccak256(), segCount).Hash(data)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(actual, expected) {
t.Fatalf("expected %x, got %x", expected, actual)
}
})
}
}
}
}
......@@ -7,9 +7,9 @@
package bmtpool
import (
bmtlegacy "github.com/ethersphere/bee/pkg/bmt/legacy"
"github.com/ethersphere/bee/pkg/bmt/pool"
"github.com/ethersphere/bee/pkg/swarm"
bmtlegacy "github.com/ethersphere/bmt/legacy"
"github.com/ethersphere/bmt/pool"
)
var instance pool.Pooler
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment