Commit 77dd5628 authored by Nemanja Zbiljić's avatar Nemanja Zbiljić Committed by GitHub

Chunk traversal service (#924)

parent 09aa147e
...@@ -10,7 +10,7 @@ require ( ...@@ -10,7 +10,7 @@ require (
github.com/ethereum/go-ethereum v1.9.20 github.com/ethereum/go-ethereum v1.9.20
github.com/ethersphere/bmt v0.1.4 github.com/ethersphere/bmt v0.1.4
github.com/ethersphere/langos v1.0.0 github.com/ethersphere/langos v1.0.0
github.com/ethersphere/manifest v0.3.2 github.com/ethersphere/manifest v0.3.3
github.com/ethersphere/sw3-bindings/v2 v2.1.0 github.com/ethersphere/sw3-bindings/v2 v2.1.0
github.com/gogo/protobuf v1.3.1 github.com/gogo/protobuf v1.3.1
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e // indirect
......
...@@ -167,8 +167,8 @@ github.com/ethersphere/bmt v0.1.4 h1:+rkWYNtMgDx6bkNqGdWu+U9DgGI1rRZplpSW3YhBr1Q ...@@ -167,8 +167,8 @@ github.com/ethersphere/bmt v0.1.4 h1:+rkWYNtMgDx6bkNqGdWu+U9DgGI1rRZplpSW3YhBr1Q
github.com/ethersphere/bmt v0.1.4/go.mod h1:Yd8ft1U69WDuHevZc/rwPxUv1rzPSMpMnS6xbU53aY8= github.com/ethersphere/bmt v0.1.4/go.mod h1:Yd8ft1U69WDuHevZc/rwPxUv1rzPSMpMnS6xbU53aY8=
github.com/ethersphere/langos v1.0.0 h1:NBtNKzXTTRSue95uOlzPN4py7Aofs0xWPzyj4AI1Vcc= github.com/ethersphere/langos v1.0.0 h1:NBtNKzXTTRSue95uOlzPN4py7Aofs0xWPzyj4AI1Vcc=
github.com/ethersphere/langos v1.0.0/go.mod h1:dlcN2j4O8sQ+BlCaxeBu43bgr4RQ+inJ+pHwLeZg5Tw= github.com/ethersphere/langos v1.0.0/go.mod h1:dlcN2j4O8sQ+BlCaxeBu43bgr4RQ+inJ+pHwLeZg5Tw=
github.com/ethersphere/manifest v0.3.2 h1:IusNNfpqde2F7uWZ2DE9eyo9PMwUAMop3Ws1NBcdMyM= github.com/ethersphere/manifest v0.3.3 h1:Fc4nE1c28v9j2IOGHdpaU7DQLjDWSJxXjCHL0Vl/9pQ=
github.com/ethersphere/manifest v0.3.2/go.mod h1:ygAx0KLhXYmKqsjUab95RCbXf8UcO7yMDjyfP0lY76Y= github.com/ethersphere/manifest v0.3.3/go.mod h1:ygAx0KLhXYmKqsjUab95RCbXf8UcO7yMDjyfP0lY76Y=
github.com/ethersphere/sw3-bindings/v2 v2.1.0 h1:QefDtzU94UelICMPXWr7m52E2oj6r018Yc0XLoCWOxw= github.com/ethersphere/sw3-bindings/v2 v2.1.0 h1:QefDtzU94UelICMPXWr7m52E2oj6r018Yc0XLoCWOxw=
github.com/ethersphere/sw3-bindings/v2 v2.1.0/go.mod h1:ozMVBZZlAirS/FcUpFwzV60v8gC0nVbA/5ZXtCX3xCc= github.com/ethersphere/sw3-bindings/v2 v2.1.0/go.mod h1:ozMVBZZlAirS/FcUpFwzV60v8gC0nVbA/5ZXtCX3xCc=
github.com/fatih/color v1.3.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= github.com/fatih/color v1.3.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4=
......
...@@ -6,6 +6,7 @@ package entry ...@@ -6,6 +6,7 @@ package entry
import ( import (
"errors" "errors"
"math"
"github.com/ethersphere/bee/pkg/collection" "github.com/ethersphere/bee/pkg/collection"
"github.com/ethersphere/bee/pkg/encryption" "github.com/ethersphere/bee/pkg/encryption"
...@@ -33,6 +34,18 @@ func New(reference, metadata swarm.Address) *Entry { ...@@ -33,6 +34,18 @@ func New(reference, metadata swarm.Address) *Entry {
} }
} }
// CanUnmarshal returns whether the entry may be might be unmarshaled based on
// the size.
func CanUnmarshal(size int64) bool {
if size < math.MaxInt32 {
switch int(size) {
case serializedDataSize, encryptedSerializedDataSize:
return true
}
}
return false
}
// Reference implements collection.Entry // Reference implements collection.Entry
func (e *Entry) Reference() swarm.Address { func (e *Entry) Reference() swarm.Address {
return e.reference return e.reference
......
...@@ -10,6 +10,7 @@ import ( ...@@ -10,6 +10,7 @@ import (
"encoding/binary" "encoding/binary"
"errors" "errors"
"io" "io"
"sync"
"sync/atomic" "sync/atomic"
"github.com/ethersphere/bee/pkg/encryption/store" "github.com/ethersphere/bee/pkg/encryption/store"
...@@ -229,6 +230,8 @@ func (j *joiner) processChunkAddresses(fn swarm.AddressIterFunc, data []byte, su ...@@ -229,6 +230,8 @@ func (j *joiner) processChunkAddresses(fn swarm.AddressIterFunc, data []byte, su
return return
} }
var wg sync.WaitGroup
for cursor := 0; cursor < len(data); cursor += j.refLength { for cursor := 0; cursor < len(data); cursor += j.refLength {
select { select {
case <-j.ctx.Done(): case <-j.ctx.Done():
...@@ -249,7 +252,11 @@ func (j *joiner) processChunkAddresses(fn swarm.AddressIterFunc, data []byte, su ...@@ -249,7 +252,11 @@ func (j *joiner) processChunkAddresses(fn swarm.AddressIterFunc, data []byte, su
} }
func(address swarm.Address, eg *errgroup.Group) { func(address swarm.Address, eg *errgroup.Group) {
wg.Add(1)
eg.Go(func() error { eg.Go(func() error {
defer wg.Done()
ch, err := j.getter.Get(j.ctx, storage.ModeGetRequest, address) ch, err := j.getter.Get(j.ctx, storage.ModeGetRequest, address)
if err != nil { if err != nil {
return err return err
...@@ -261,6 +268,8 @@ func (j *joiner) processChunkAddresses(fn swarm.AddressIterFunc, data []byte, su ...@@ -261,6 +268,8 @@ func (j *joiner) processChunkAddresses(fn swarm.AddressIterFunc, data []byte, su
return nil return nil
}) })
}(address, eg) }(address, eg)
wg.Wait()
} }
} }
......
...@@ -12,7 +12,6 @@ import ( ...@@ -12,7 +12,6 @@ import (
"io" "io"
"io/ioutil" "io/ioutil"
mrand "math/rand" mrand "math/rand"
"sync"
"testing" "testing"
"time" "time"
...@@ -801,12 +800,9 @@ func TestJoinerIterateChunkAddresses(t *testing.T) { ...@@ -801,12 +800,9 @@ func TestJoinerIterateChunkAddresses(t *testing.T) {
} }
foundAddresses := make(map[string]struct{}) foundAddresses := make(map[string]struct{})
var foundAddressesMu sync.Mutex
err = j.IterateChunkAddresses(func(addr swarm.Address) (stop bool) { err = j.IterateChunkAddresses(func(addr swarm.Address) (stop bool) {
foundAddressesMu.Lock()
foundAddresses[addr.String()] = struct{}{} foundAddresses[addr.String()] = struct{}{}
foundAddressesMu.Unlock()
return false return false
}) })
if err != nil { if err != nil {
......
...@@ -21,6 +21,14 @@ var ( ...@@ -21,6 +21,14 @@ var (
// ErrInvalidManifestType is returned when an unknown manifest type // ErrInvalidManifestType is returned when an unknown manifest type
// is provided to the function. // is provided to the function.
ErrInvalidManifestType = errors.New("manifest: invalid type") ErrInvalidManifestType = errors.New("manifest: invalid type")
// ErrMissingReference is returned when the reference for the manifest file
// is missing.
ErrMissingReference = errors.New("manifest: missing reference")
)
var (
errStopIterator = errors.New("manifest: stop iterator")
) )
// Interface for operations with manifest. // Interface for operations with manifest.
...@@ -37,6 +45,10 @@ type Interface interface { ...@@ -37,6 +45,10 @@ type Interface interface {
HasPrefix(string) (bool, error) HasPrefix(string) (bool, error)
// Store stores the manifest, returning the resulting address. // Store stores the manifest, returning the resulting address.
Store(context.Context, storage.ModePut) (swarm.Address, error) Store(context.Context, storage.ModePut) (swarm.Address, error)
// IterateAddresses is used to iterate over chunks addresses for
// the manifest.
IterateAddresses(context.Context, swarm.AddressIterFunc) error
} }
// Entry represents a single manifest entry. // Entry represents a single manifest entry.
......
...@@ -45,6 +45,24 @@ func NewMantarayManifest( ...@@ -45,6 +45,24 @@ func NewMantarayManifest(
}, nil }, nil
} }
// NewMantarayManifestWithObfuscationKeyFn creates a new mantaray-based manifest
// with configured obfuscation key
//
// NOTE: This should only be used in tests.
func NewMantarayManifestWithObfuscationKeyFn(
encrypted bool,
storer storage.Storer,
obfuscationKeyFn func([]byte) (int, error),
) (Interface, error) {
mm := &mantarayManifest{
trie: mantaray.New(),
encrypted: encrypted,
storer: storer,
}
mantaray.SetObfuscationKeyFn(obfuscationKeyFn)
return mm, nil
}
// NewMantarayManifestReference loads existing mantaray-based manifest. // NewMantarayManifestReference loads existing mantaray-based manifest.
func NewMantarayManifestReference( func NewMantarayManifestReference(
ctx context.Context, ctx context.Context,
...@@ -113,6 +131,7 @@ func (m *mantarayManifest) HasPrefix(prefix string) (bool, error) { ...@@ -113,6 +131,7 @@ func (m *mantarayManifest) HasPrefix(prefix string) (bool, error) {
func (m *mantarayManifest) Store(ctx context.Context, mode storage.ModePut) (swarm.Address, error) { func (m *mantarayManifest) Store(ctx context.Context, mode storage.ModePut) (swarm.Address, error) {
saver := newMantaraySaver(ctx, m.encrypted, m.storer, mode) saver := newMantaraySaver(ctx, m.encrypted, m.storer, mode)
m.loader = saver
err := m.trie.Save(saver) err := m.trie.Save(saver)
if err != nil { if err != nil {
...@@ -124,6 +143,53 @@ func (m *mantarayManifest) Store(ctx context.Context, mode storage.ModePut) (swa ...@@ -124,6 +143,53 @@ func (m *mantarayManifest) Store(ctx context.Context, mode storage.ModePut) (swa
return address, nil return address, nil
} }
func (m *mantarayManifest) IterateAddresses(ctx context.Context, fn swarm.AddressIterFunc) error {
reference := swarm.NewAddress(m.trie.Reference())
if swarm.ZeroAddress.Equal(reference) {
return ErrMissingReference
}
walker := func(path []byte, node *mantaray.Node, err error) error {
if err != nil {
return err
}
if node != nil {
var stop bool
if node.Reference() != nil {
ref := swarm.NewAddress(node.Reference())
stop = fn(ref)
if stop {
return errStopIterator
}
}
if node.IsValueType() && node.Entry() != nil {
entry := swarm.NewAddress(node.Entry())
stop = fn(entry)
if stop {
return errStopIterator
}
}
}
return nil
}
err := m.trie.WalkNode([]byte{}, m.loader, walker)
if err != nil {
if !errors.Is(err, errStopIterator) {
return fmt.Errorf("manifest iterate addresses: %w", err)
}
// ignore error if interation stopped by caller
}
return nil
}
// mantarayLoadSaver implements required interface 'mantaray.LoadSaver' // mantarayLoadSaver implements required interface 'mantaray.LoadSaver'
type mantarayLoadSaver struct { type mantarayLoadSaver struct {
ctx context.Context ctx context.Context
......
...@@ -27,6 +27,7 @@ const ( ...@@ -27,6 +27,7 @@ const (
type simpleManifest struct { type simpleManifest struct {
manifest simple.Manifest manifest simple.Manifest
reference swarm.Address
encrypted bool encrypted bool
storer storage.Storer storer storage.Storer
} }
...@@ -52,6 +53,7 @@ func NewSimpleManifestReference( ...@@ -52,6 +53,7 @@ func NewSimpleManifestReference(
) (Interface, error) { ) (Interface, error) {
m := &simpleManifest{ m := &simpleManifest{
manifest: simple.NewManifest(), manifest: simple.NewManifest(),
reference: reference,
encrypted: encrypted, encrypted: encrypted,
storer: storer, storer: storer,
} }
...@@ -116,9 +118,51 @@ func (m *simpleManifest) Store(ctx context.Context, mode storage.ModePut) (swarm ...@@ -116,9 +118,51 @@ func (m *simpleManifest) Store(ctx context.Context, mode storage.ModePut) (swarm
return swarm.ZeroAddress, fmt.Errorf("manifest save error: %w", err) return swarm.ZeroAddress, fmt.Errorf("manifest save error: %w", err)
} }
m.reference = address
return address, nil return address, nil
} }
func (m *simpleManifest) IterateAddresses(ctx context.Context, fn swarm.AddressIterFunc) error {
if swarm.ZeroAddress.Equal(m.reference) {
return ErrMissingReference
}
// NOTE: making it behave same for all manifest implementation
stop := fn(m.reference)
if stop {
return nil
}
walker := func(path string, entry simple.Entry, err error) error {
if err != nil {
return err
}
ref, err := swarm.ParseHexAddress(entry.Reference())
if err != nil {
return err
}
stop := fn(ref)
if stop {
return errStopIterator
}
return nil
}
err := m.manifest.WalkEntry("", walker)
if err != nil {
if !errors.Is(err, errStopIterator) {
return fmt.Errorf("manifest iterate addresses: %w", err)
}
// ignore error if interation stopped by caller
}
return nil
}
func (m *simpleManifest) load(ctx context.Context, reference swarm.Address) error { func (m *simpleManifest) load(ctx context.Context, reference swarm.Address) error {
j, _, err := joiner.New(ctx, m.storer, reference) j, _, err := joiner.New(ctx, m.storer, reference)
if err != nil { if err != nil {
......
...@@ -107,6 +107,25 @@ func (m *MockStorer) Put(ctx context.Context, mode storage.ModePut, chs ...swarm ...@@ -107,6 +107,25 @@ func (m *MockStorer) Put(ctx context.Context, mode storage.ModePut, chs ...swarm
} }
m.store[ch.Address().String()] = ch.Data() m.store[ch.Address().String()] = ch.Data()
m.modePut[ch.Address().String()] = mode m.modePut[ch.Address().String()] = mode
// pin chunks if needed
switch mode {
case storage.ModePutUploadPin:
// if mode is set pin, increment the pin counter
var found bool
addr := ch.Address()
for i, ad := range m.pinnedAddress {
if addr.String() == ad.String() {
m.pinnedCounter[i] = m.pinnedCounter[i] + 1
found = true
}
}
if !found {
m.pinnedAddress = append(m.pinnedAddress, addr)
m.pinnedCounter = append(m.pinnedCounter, uint64(1))
}
default:
}
} }
return exist, nil return exist, nil
} }
......
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package traversal
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"github.com/ethersphere/bee/pkg/collection/entry"
"github.com/ethersphere/bee/pkg/file"
"github.com/ethersphere/bee/pkg/file/joiner"
"github.com/ethersphere/bee/pkg/manifest"
"github.com/ethersphere/bee/pkg/storage"
"github.com/ethersphere/bee/pkg/swarm"
)
var (
// ErrInvalidType is returned when the reference was not expected type.
ErrInvalidType = errors.New("traversal: invalid type")
)
// Service is the service to find dependent chunks for an address.
type Service interface {
// TraverseAddresses iterates through each address related to the supplied
// one, if possible.
TraverseAddresses(context.Context, swarm.Address, swarm.AddressIterFunc) error
// TraverseBytesAddresses iterates through each address of a bytes.
TraverseBytesAddresses(context.Context, swarm.Address, swarm.AddressIterFunc) error
// TraverseFileAddresses iterates through each address of a file.
TraverseFileAddresses(context.Context, swarm.Address, swarm.AddressIterFunc) error
// TraverseManifestAddresses iterates through each address of a manifest,
// as well as each entry found in it.
TraverseManifestAddresses(context.Context, swarm.Address, swarm.AddressIterFunc) error
}
type traversalService struct {
storer storage.Storer
}
func NewService(storer storage.Storer) Service {
return &traversalService{
storer: storer,
}
}
func (s *traversalService) TraverseAddresses(
ctx context.Context,
reference swarm.Address,
chunkAddressFunc swarm.AddressIterFunc,
) error {
isFile, e, metadata, err := s.checkIsFile(ctx, reference)
if err != nil {
return err
}
// reference address could be missrepresented as file when:
// - content size is 64 bytes (or 128 for encrypted reference)
// - second reference exists and is JSON (and not actually file metadata)
if isFile {
isManifest, m, err := s.checkIsManifest(ctx, reference, e, metadata)
if err != nil {
return err
}
// reference address could be missrepresented as manifest when:
// - file content type is actually on of manifest type (manually set)
// - content was unmarshalled
//
// even though content could be unmarshaled in some case, iteration
// through addresses will not be possible
if isManifest {
// process as manifest
err = m.IterateAddresses(ctx, func(manifestNodeAddr swarm.Address) (stop bool) {
err := s.traverseChunkAddressesFromManifest(ctx, manifestNodeAddr, chunkAddressFunc)
if err != nil {
stop = true
}
return
})
if err != nil {
return fmt.Errorf("traversal: iterate chunks: %s: %w", reference, err)
}
metadataReference := e.Metadata()
err = s.processBytes(ctx, metadataReference, chunkAddressFunc)
if err != nil {
return err
}
_ = chunkAddressFunc(reference)
} else {
return s.traverseChunkAddressesAsFile(ctx, reference, chunkAddressFunc, e)
}
} else {
return s.processBytes(ctx, reference, chunkAddressFunc)
}
return nil
}
func (s *traversalService) TraverseBytesAddresses(
ctx context.Context,
reference swarm.Address,
chunkAddressFunc swarm.AddressIterFunc,
) error {
return s.processBytes(ctx, reference, chunkAddressFunc)
}
func (s *traversalService) TraverseFileAddresses(
ctx context.Context,
reference swarm.Address,
chunkAddressFunc swarm.AddressIterFunc,
) error {
isFile, e, _, err := s.checkIsFile(ctx, reference)
if err != nil {
return err
}
// reference address could be missrepresented as file when:
// - content size is 64 bytes (or 128 for encrypted reference)
// - second reference exists and is JSON (and not actually file metadata)
if !isFile {
return ErrInvalidType
}
return s.traverseChunkAddressesAsFile(ctx, reference, chunkAddressFunc, e)
}
func (s *traversalService) TraverseManifestAddresses(
ctx context.Context,
reference swarm.Address,
chunkAddressFunc swarm.AddressIterFunc,
) error {
isFile, e, metadata, err := s.checkIsFile(ctx, reference)
if err != nil {
return err
}
if !isFile {
return ErrInvalidType
}
isManifest, m, err := s.checkIsManifest(ctx, reference, e, metadata)
if err != nil {
return err
}
// reference address could be missrepresented as manifest when:
// - file content type is actually on of manifest type (manually set)
// - content was unmarshalled
//
// even though content could be unmarshaled in some case, iteration
// through addresses will not be possible
if !isManifest {
return ErrInvalidType
}
err = m.IterateAddresses(ctx, func(manifestNodeAddr swarm.Address) (stop bool) {
err := s.traverseChunkAddressesFromManifest(ctx, manifestNodeAddr, chunkAddressFunc)
if err != nil {
stop = true
}
return
})
if err != nil {
return fmt.Errorf("traversal: iterate chunks: %s: %w", reference, err)
}
metadataReference := e.Metadata()
err = s.processBytes(ctx, metadataReference, chunkAddressFunc)
if err != nil {
return err
}
_ = chunkAddressFunc(reference)
return nil
}
func (s *traversalService) traverseChunkAddressesFromManifest(
ctx context.Context,
reference swarm.Address,
chunkAddressFunc swarm.AddressIterFunc,
) error {
isFile, e, _, err := s.checkIsFile(ctx, reference)
if err != nil {
return err
}
if isFile {
return s.traverseChunkAddressesAsFile(ctx, reference, chunkAddressFunc, e)
}
return s.processBytes(ctx, reference, chunkAddressFunc)
}
func (s *traversalService) traverseChunkAddressesAsFile(
ctx context.Context,
reference swarm.Address,
chunkAddressFunc swarm.AddressIterFunc,
e *entry.Entry,
) (err error) {
bytesReference := e.Reference()
err = s.processBytes(ctx, bytesReference, chunkAddressFunc)
if err != nil {
// possible it was custom JSON bytes, which matches entry JSON
// but in fact is not file, and does not contain reference to
// existing address, which is why it was not found in storage
if !errors.Is(err, storage.ErrNotFound) {
return nil
}
// ignore
}
metadataReference := e.Metadata()
err = s.processBytes(ctx, metadataReference, chunkAddressFunc)
if err != nil {
return
}
_ = chunkAddressFunc(reference)
return nil
}
// checkIsFile checks if the content is file.
func (s *traversalService) checkIsFile(
ctx context.Context,
reference swarm.Address,
) (isFile bool, e *entry.Entry, metadata *entry.Metadata, err error) {
var (
j file.Joiner
span int64
)
j, span, err = joiner.New(ctx, s.storer, reference)
if err != nil {
err = fmt.Errorf("traversal: joiner: %s: %w", reference, err)
return
}
maybeIsFile := entry.CanUnmarshal(span)
if maybeIsFile {
buf := bytes.NewBuffer(nil)
_, err = file.JoinReadAll(ctx, j, buf)
if err != nil {
err = fmt.Errorf("traversal: read entry: %s: %w", reference, err)
return
}
e = &entry.Entry{}
err = e.UnmarshalBinary(buf.Bytes())
if err != nil {
err = fmt.Errorf("traversal: unmarshal entry: %s: %w", reference, err)
return
}
// NOTE: any bytes will unmarshall to addresses; we need to check metadata
// read metadata
j, _, err = joiner.New(ctx, s.storer, e.Metadata())
if err != nil {
// ignore
err = nil
return
}
buf = bytes.NewBuffer(nil)
_, err = file.JoinReadAll(ctx, j, buf)
if err != nil {
err = fmt.Errorf("traversal: read metadata: %s: %w", reference, err)
return
}
metadata = &entry.Metadata{}
dec := json.NewDecoder(buf)
dec.DisallowUnknownFields()
err = dec.Decode(metadata)
if err != nil {
// may not be metadata JSON
err = nil
return
}
isFile = true
}
return
}
// checkIsManifest checks if the content is manifest.
func (s *traversalService) checkIsManifest(
ctx context.Context,
reference swarm.Address,
e *entry.Entry,
metadata *entry.Metadata,
) (isManifest bool, m manifest.Interface, err error) {
// NOTE: 'encrypted' parameter only used for saving manifest
m, err = manifest.NewManifestReference(
ctx,
metadata.MimeType,
e.Reference(),
false,
s.storer,
)
if err != nil {
if err == manifest.ErrInvalidManifestType {
// ignore
err = nil
return
}
err = fmt.Errorf("traversal: read manifest: %s: %w", reference, err)
return
}
isManifest = true
return
}
func (s *traversalService) processBytes(
ctx context.Context,
reference swarm.Address,
chunkAddressFunc swarm.AddressIterFunc,
) error {
j, _, err := joiner.New(ctx, s.storer, reference)
if err != nil {
return fmt.Errorf("traversal: joiner: %s: %w", reference, err)
}
err = j.IterateChunkAddresses(chunkAddressFunc)
if err != nil {
return fmt.Errorf("traversal: iterate chunks: %s: %w", reference, err)
}
return nil
}
// Copyright 2020 The Swarm Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package traversal_test
import (
"bytes"
"context"
"encoding/json"
"fmt"
"math"
"mime"
"path"
"sort"
"strings"
"testing"
"time"
"github.com/ethersphere/bee/pkg/collection/entry"
"github.com/ethersphere/bee/pkg/file/pipeline/builder"
"github.com/ethersphere/bee/pkg/manifest"
"github.com/ethersphere/bee/pkg/storage"
"github.com/ethersphere/bee/pkg/storage/mock"
"github.com/ethersphere/bee/pkg/swarm"
"github.com/ethersphere/bee/pkg/traversal"
)
var (
simpleData = []byte("hello test world") // fixed, 16 bytes
)
func generateSampleData(size int) (b []byte) {
for {
b = append(b, simpleData...)
if len(b) >= size {
break
}
}
b = b[:size]
return b
}
func TestTraversalBytes(t *testing.T) {
traverseFn := func(traversalService traversal.Service) func(context.Context, swarm.Address, swarm.AddressIterFunc) error {
return traversalService.TraverseBytesAddresses
}
testCases := []struct {
dataSize int
expectedHashesCount int
expectedHashes []string
ignoreDuplicateHash bool
}{
{
dataSize: len(simpleData),
expectedHashesCount: 1,
expectedHashes: []string{
"e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a",
},
},
{
dataSize: swarm.ChunkSize,
expectedHashesCount: 1,
expectedHashes: []string{
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
},
},
{
dataSize: swarm.ChunkSize + 1,
expectedHashesCount: 3,
expectedHashes: []string{
"a1c4483d15167aeb406017942c9625464574cf70bf7e42f237094acbccdb6834", // bytes (joiner)
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
"dcbfb467950a28f8c5023b86d31de4ff3a337993e921ae623ae62c7190d60329", // bytes (1)
},
},
{
dataSize: swarm.ChunkSize * 128,
expectedHashesCount: 129,
expectedHashes: []string{
"5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner)
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
},
ignoreDuplicateHash: true,
},
{
dataSize: swarm.ChunkSize * 129,
expectedHashesCount: 131,
expectedHashes: []string{
"150665dfbd81f80f5ba00a0caa2caa34f8b94e662e1dea769fe9ce7ea170bf25", // root (joiner, chunk)
"5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner)
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
},
ignoreDuplicateHash: true,
},
{
dataSize: swarm.ChunkSize*129 - 1,
expectedHashesCount: 131,
expectedHashes: []string{
"895610b2d795e7cc351a8336d46ba9ef37309d83267d272c6e257e46a78ecb7c", // root (joiner, chunk)
"5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner)
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
"d18f0d81b832086376684558978cfe6773ed773178f84961c8b750fe72033a26", // bytes (4095)
},
ignoreDuplicateHash: true,
},
{
dataSize: swarm.ChunkSize*129 + 1,
expectedHashesCount: 133,
expectedHashes: []string{
"023ee8b901702a999e9ef90ca2bc1c6db1daefb3f178b683a87b0fd613fd8e21", // root (joiner, chunk)
"5060cfd2a34df0269b47201e1f202eb2a165d787a0c5043ceb29bb85b7567c61", // bytes (joiner [4096 * 128])
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
"dcbfb467950a28f8c5023b86d31de4ff3a337993e921ae623ae62c7190d60329", // bytes (1)
"a1c4483d15167aeb406017942c9625464574cf70bf7e42f237094acbccdb6834", // bytes (joiner - [4096, 1])
},
ignoreDuplicateHash: true,
},
}
for _, tc := range testCases {
chunkCount := int(math.Ceil(float64(tc.dataSize) / swarm.ChunkSize))
testName := fmt.Sprintf("%d-chunk-%d-bytes", chunkCount, tc.dataSize)
t.Run(testName, func(t *testing.T) {
var (
mockStorer = mock.NewStorer()
)
ctx := context.Background()
bytesData := generateSampleData(tc.dataSize)
pipe := builder.NewPipelineBuilder(ctx, mockStorer, storage.ModePutUpload, false)
address, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(bytesData), int64(len(bytesData)))
if err != nil {
t.Fatal(err)
}
traversalCheck(t, mockStorer, traverseFn, address, tc.expectedHashesCount, tc.expectedHashes, tc.ignoreDuplicateHash)
})
}
}
func TestTraversalFiles(t *testing.T) {
traverseFn := func(traversalService traversal.Service) func(context.Context, swarm.Address, swarm.AddressIterFunc) error {
return traversalService.TraverseFileAddresses
}
testCases := []struct {
filesSize int
contentType string
expectedHashesCount int
expectedHashes []string
ignoreDuplicateHash bool
}{
{
filesSize: len(simpleData),
contentType: "text/plain; charset=utf-8",
expectedHashesCount: 3,
expectedHashes: []string{
"06e50210b6bcebca15cfc8bc9ee3aa51ad8fa9cac41340f9f6396ada74fec78f", // root
"999a9f2e1fd29a6691a3b8e437cbb36e34a1f67decc973dfc70928d1e7de3c3b", // metadata
"e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a", // bytes
},
},
{
filesSize: swarm.ChunkSize,
contentType: "text/plain; charset=utf-8",
expectedHashesCount: 3,
expectedHashes: []string{
"29ae87fda18bee4255ef19faabe901e2cf9c1c5c4648083383255670492e814e", // root
"e7d4d4a897cd69f5759621044402e40a3d5c903cf1e225864eef5d1f77d97680", // metadata
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
},
},
{
filesSize: swarm.ChunkSize + 1,
contentType: "text/plain; charset=utf-8",
expectedHashesCount: 5,
expectedHashes: []string{
"aa4a46bfbdff91c8db555edcfa4ba18371a083fdec67120db58d7ef177815ff0", // root
"be1f048819e744886803fbe44cf16205949b196640665077bfcacf68c323aa49", // metadata
"a1c4483d15167aeb406017942c9625464574cf70bf7e42f237094acbccdb6834", // bytes (joiner)
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
"dcbfb467950a28f8c5023b86d31de4ff3a337993e921ae623ae62c7190d60329", // bytes (1)
},
},
}
for _, tc := range testCases {
chunkCount := int(math.Ceil(float64(tc.filesSize) / swarm.ChunkSize))
testName := fmt.Sprintf("%d-chunk-%d-bytes", chunkCount, tc.filesSize)
t.Run(testName, func(t *testing.T) {
var (
mockStorer = mock.NewStorer()
)
ctx := context.Background()
bytesData := generateSampleData(tc.filesSize)
pipe := builder.NewPipelineBuilder(ctx, mockStorer, storage.ModePutUpload, false)
fr, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(bytesData), int64(len(bytesData)))
if err != nil {
t.Fatal(err)
}
fileName := fr.String()
m := entry.NewMetadata(fileName)
m.MimeType = tc.contentType
metadataBytes, err := json.Marshal(m)
if err != nil {
t.Fatal(err)
}
pipe = builder.NewPipelineBuilder(ctx, mockStorer, storage.ModePutUpload, false)
mr, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(metadataBytes), int64(len(metadataBytes)))
if err != nil {
t.Fatal(err)
}
entrie := entry.New(fr, mr)
fileEntryBytes, err := entrie.MarshalBinary()
if err != nil {
t.Fatal(err)
}
pipe = builder.NewPipelineBuilder(ctx, mockStorer, storage.ModePutUpload, false)
reference, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(fileEntryBytes), int64(len(fileEntryBytes)))
if err != nil {
t.Fatal(err)
}
traversalCheck(t, mockStorer, traverseFn, reference, tc.expectedHashesCount, tc.expectedHashes, tc.ignoreDuplicateHash)
})
}
}
type file struct {
size int
dir string
name string
reference string
chunks fileChunks
}
type fileChunks struct {
metadata string
content []string
}
func TestTraversalManifest(t *testing.T) {
traverseFn := func(traversalService traversal.Service) func(context.Context, swarm.Address, swarm.AddressIterFunc) error {
return traversalService.TraverseManifestAddresses
}
var (
obfuscationKey = make([]byte, 32)
obfuscationKeyFn = func(p []byte) (n int, err error) {
n = copy(p, obfuscationKey)
return
}
)
testCases := []struct {
manifestType string
files []file
manifestHashes []string
expectedHashesCount int
ignoreDuplicateHash bool
}{
{
manifestType: manifest.ManifestSimpleContentType,
files: []file{
{
size: len(simpleData),
dir: "",
name: "hello.txt",
reference: "a7c9250614bd2d2529e7bee2e2d0df295661b7185465193dc3b54ffea30c4702",
chunks: fileChunks{
metadata: "af2f73f800821b8ca7f5d2c33d0ba6018734d809389a47993c621cc62245d9e0",
content: []string{
"e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a",
},
},
},
},
manifestHashes: []string{
"864984d3b0a0401123325ffac8ce696f3eb67ea9ba290a66e8d4e7ddb41fd1dc", // root
"90cca4ac6ec25d8fdae297f65dfa389abd2db77f1b44a623d9fcb96802a935a7", // metadata
"3665a0de7b2a63ba80fd3bb6f7c2d75b633ee4a297a0d7442cecd89c3553a4d2", // bytes
},
expectedHashesCount: 6,
},
{
manifestType: manifest.ManifestSimpleContentType,
files: []file{
{
size: len(simpleData),
dir: "",
name: "hello.txt",
reference: "a7c9250614bd2d2529e7bee2e2d0df295661b7185465193dc3b54ffea30c4702",
chunks: fileChunks{
metadata: "af2f73f800821b8ca7f5d2c33d0ba6018734d809389a47993c621cc62245d9e0",
content: []string{
"e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a",
},
},
},
{
size: swarm.ChunkSize,
dir: "",
name: "data/1.txt",
reference: "5241139a93e4c8735b62414c4a3be8d10e83c6644af320f8892cbac0bc869cab",
chunks: fileChunks{
metadata: "ec35ef758093abaeaabc3956c8eeb9739cf6e6168ce44ae912b9b4777b0e9420",
content: []string{
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
},
},
},
{
size: swarm.ChunkSize,
dir: "",
name: "data/2.txt",
reference: "940d67638f577ad36701b7ed380ed8e1c4c14e6bb6e19c6a74b0d5ac7cb0fb55",
chunks: fileChunks{
metadata: "a05586fb3c4625e21377ce2043c362835d3eb95bd9970d84db414a0f6164f822",
content: []string{
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
},
},
},
},
manifestHashes: []string{
"d2c4586f8791058153464064aa9b90059ad8ab9afe068df37d97f5711a0a197f", // root
"39745d382da0c21042290c59d43840a5685f461bd7da49c36a120136f49869cb", // metadata
"dc763a70a578970c001cb9c59c90615d3e5c19eb4147cc45757481e32bf72ec7", // bytes
},
expectedHashesCount: 12,
ignoreDuplicateHash: true,
},
{
manifestType: manifest.ManifestMantarayContentType,
files: []file{
{
size: len(simpleData),
dir: "",
name: "hello.txt",
reference: "a7c9250614bd2d2529e7bee2e2d0df295661b7185465193dc3b54ffea30c4702",
chunks: fileChunks{
metadata: "af2f73f800821b8ca7f5d2c33d0ba6018734d809389a47993c621cc62245d9e0",
content: []string{
"e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a",
},
},
},
},
manifestHashes: []string{
// NOTE: references will be fixed, due to custom obfuscation key function
"596c29bd00b241cb38aba10ca7005bf124baed90b613c2ff11ee891165a487fd", // root
"70501ac2caed16fc5f929977172a631ac540a5efd567cf1447bf7ee4aae4eb9f", // metadata
"486914d1449e482ff248268e99c5d7d2772281f033c07f2f74aa4cc1ce3a8fe0", // bytes - root node
"3d6a9e4eec6ebaf6ca6c6412dae6a23c76bc0c0672d259d98562368915d16b88", // bytes - node [h]
},
expectedHashesCount: 7,
},
{
manifestType: manifest.ManifestMantarayContentType,
files: []file{
{
size: len(simpleData),
dir: "",
name: "hello.txt",
reference: "a7c9250614bd2d2529e7bee2e2d0df295661b7185465193dc3b54ffea30c4702",
chunks: fileChunks{
metadata: "af2f73f800821b8ca7f5d2c33d0ba6018734d809389a47993c621cc62245d9e0",
content: []string{
"e94a5aadf259f008b7d5039420c65d692901846523f503d97d24e2f077786d9a",
},
},
},
{
size: swarm.ChunkSize,
dir: "",
name: "data/1.txt",
reference: "5241139a93e4c8735b62414c4a3be8d10e83c6644af320f8892cbac0bc869cab",
chunks: fileChunks{
metadata: "ec35ef758093abaeaabc3956c8eeb9739cf6e6168ce44ae912b9b4777b0e9420",
content: []string{
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
},
},
},
{
size: swarm.ChunkSize,
dir: "",
name: "data/2.txt",
reference: "940d67638f577ad36701b7ed380ed8e1c4c14e6bb6e19c6a74b0d5ac7cb0fb55",
chunks: fileChunks{
metadata: "a05586fb3c4625e21377ce2043c362835d3eb95bd9970d84db414a0f6164f822",
content: []string{
"f833c17be12d68aec95eca7f9d993f7d7aaa7a9c282eb2c3d79ab26a5aeaf384", // bytes (4096)
},
},
},
},
manifestHashes: []string{
// NOTE: references will be fixed, due to custom obfuscation key function
"10a70b3a0102b94e909d08b91b98a2d8ca22c762ad7286d5451de2dd6432c218", // root
"fb2c46942a3b2148e856d778731de9c173a26bec027aa27897f32e423eb14458", // metadata
"39caaed3c9e42ea3ad9a374d37181e21c9a686367e0ae42d66c20465538d9789", // bytes - root node
"735aee067bdc02e1c1e8e88eea8b5b0535bfc9d0d36bf3a4d6fbac94a03bc233", // bytes - node [d]
"3d6a9e4eec6ebaf6ca6c6412dae6a23c76bc0c0672d259d98562368915d16b88", // bytes - node [h]
"ddb31ae6a74caf5df03e5d8bf6056e589229b4cae3087433db64a4768923f73b", // bytes - node [d]/[2]
"281dc7467f647abbfbaaf259a95ab60df8bf76ec3fbc525bfbca794d6360fa46", // bytes - node [d]/[1]
},
expectedHashesCount: 16,
ignoreDuplicateHash: true,
},
}
for _, tc := range testCases {
mediatype, _, err := mime.ParseMediaType(tc.manifestType)
if err != nil {
t.Fatal(err)
}
mediatype = strings.Split(mediatype, "/")[1]
mediatype = strings.Split(mediatype, "+")[0]
testName := fmt.Sprintf("%s-%d-files-%d-chunks", mediatype, len(tc.files), tc.expectedHashesCount)
t.Run(testName, func(t *testing.T) {
var (
mockStorer = mock.NewStorer()
)
expectedHashes := []string{}
// add hashes for files
for _, f := range tc.files {
expectedHashes = append(expectedHashes, f.reference, f.chunks.metadata)
// add hash for each content
expectedHashes = append(expectedHashes, f.chunks.content...)
}
// add hashes for manifest
expectedHashes = append(expectedHashes, tc.manifestHashes...)
ctx := context.Background()
var dirManifest manifest.Interface
switch tc.manifestType {
case manifest.ManifestSimpleContentType:
dirManifest, err = manifest.NewSimpleManifest(false, mockStorer)
if err != nil {
t.Fatal(err)
}
case manifest.ManifestMantarayContentType:
dirManifest, err = manifest.NewMantarayManifestWithObfuscationKeyFn(false, mockStorer, obfuscationKeyFn)
if err != nil {
t.Fatal(err)
}
default:
t.Fatalf("manifest: invalid type: %s", tc.manifestType)
}
// add files to manifest
for _, f := range tc.files {
bytesData := generateSampleData(f.size)
pipe := builder.NewPipelineBuilder(ctx, mockStorer, storage.ModePutUpload, false)
fr, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(bytesData), int64(len(bytesData)))
if err != nil {
t.Fatal(err)
}
fileName := f.name
if fileName == "" {
fileName = fr.String()
}
m := entry.NewMetadata(fileName)
metadataBytes, err := json.Marshal(m)
if err != nil {
t.Fatal(err)
}
pipe = builder.NewPipelineBuilder(ctx, mockStorer, storage.ModePutUpload, false)
mr, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(metadataBytes), int64(len(metadataBytes)))
if err != nil {
t.Fatal(err)
}
entrie := entry.New(fr, mr)
fileEntryBytes, err := entrie.MarshalBinary()
if err != nil {
t.Fatal(err)
}
pipe = builder.NewPipelineBuilder(ctx, mockStorer, storage.ModePutUpload, false)
reference, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(fileEntryBytes), int64(len(fileEntryBytes)))
if err != nil {
t.Fatal(err)
}
filePath := path.Join(f.dir, fileName)
err = dirManifest.Add(filePath, manifest.NewEntry(reference, nil))
if err != nil {
t.Fatal(err)
}
}
// save manifest
manifestBytesReference, err := dirManifest.Store(ctx, storage.ModePutUpload)
if err != nil {
t.Fatal(err)
}
// store the manifest metadata and get its reference
m := entry.NewMetadata(manifestBytesReference.String())
m.MimeType = dirManifest.Type()
metadataBytes, err := json.Marshal(m)
if err != nil {
t.Fatal(err)
}
pipe := builder.NewPipelineBuilder(ctx, mockStorer, storage.ModePutUpload, false)
mr, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(metadataBytes), int64(len(metadataBytes)))
if err != nil {
t.Fatal(err)
}
// now join both references (fr, mr) to create an entry and store it
e := entry.New(manifestBytesReference, mr)
fileEntryBytes, err := e.MarshalBinary()
if err != nil {
t.Fatal(err)
}
pipe = builder.NewPipelineBuilder(ctx, mockStorer, storage.ModePutUpload, false)
manifestFileReference, err := builder.FeedPipeline(ctx, pipe, bytes.NewReader(fileEntryBytes), int64(len(fileEntryBytes)))
if err != nil {
t.Fatal(err)
}
traversalCheck(t, mockStorer, traverseFn, manifestFileReference, tc.expectedHashesCount, expectedHashes, tc.ignoreDuplicateHash)
})
}
}
func traversalCheck(t *testing.T,
storer storage.Storer,
traverseFn func(traversalService traversal.Service) func(context.Context, swarm.Address, swarm.AddressIterFunc) error,
reference swarm.Address,
expectedHashesCount int,
expectedHashes []string,
ignoreDuplicateHash bool,
) {
t.Helper()
// sort input
sort.Strings(expectedHashes)
// traverse chunks
traversalService := traversal.NewService(storer)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
foundAddressesCount := 0
foundAddresses := make(map[string]struct{})
err := traverseFn(traversalService)(
ctx,
reference,
func(addr swarm.Address) (stop bool) {
foundAddressesCount++
if !ignoreDuplicateHash {
if _, ok := foundAddresses[addr.String()]; ok {
t.Fatalf("address found again: %s", addr.String())
}
}
foundAddresses[addr.String()] = struct{}{}
return false
})
if err != nil {
t.Fatal(err)
}
if expectedHashesCount != foundAddressesCount {
t.Fatalf("expected to find %d addresses, got %d", expectedHashesCount, foundAddressesCount)
}
if !ignoreDuplicateHash {
if len(expectedHashes) != len(foundAddresses) {
t.Fatalf("expected to find %d addresses hashes, got %d", len(expectedHashes), len(foundAddresses))
}
}
checkAddressFound := func(t *testing.T, foundAddresses map[string]struct{}, address string) {
t.Helper()
if _, ok := foundAddresses[address]; !ok {
t.Fatalf("expected address %s not found", address)
}
}
for _, createdAddress := range expectedHashes {
checkAddressFound(t, foundAddresses, createdAddress)
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment