Commit 8598ed22 authored by Alok Nerurkar's avatar Alok Nerurkar Committed by GitHub

refactor(pushsync): Retry improvements (#1662)

- Don't account for failed attempts during retries
- LRU Cache to remember failed traces and select better peers
- Metrics
parent 553a1b56
...@@ -20,6 +20,7 @@ require ( ...@@ -20,6 +20,7 @@ require (
github.com/gorilla/mux v1.7.4 github.com/gorilla/mux v1.7.4
github.com/gorilla/websocket v1.4.2 github.com/gorilla/websocket v1.4.2
github.com/hashicorp/go-multierror v1.1.1 github.com/hashicorp/go-multierror v1.1.1
github.com/hashicorp/golang-lru v0.5.4
github.com/kardianos/service v1.2.0 github.com/kardianos/service v1.2.0
github.com/koron/go-ssdp v0.0.2 // indirect github.com/koron/go-ssdp v0.0.2 // indirect
github.com/kr/text v0.2.0 // indirect github.com/kr/text v0.2.0 // indirect
......
...@@ -30,11 +30,13 @@ var ( ...@@ -30,11 +30,13 @@ var (
) )
type Recorder struct { type Recorder struct {
base swarm.Address base swarm.Address
records map[string][]*Record records map[string][]*Record
recordsMu sync.Mutex recordsMu sync.Mutex
protocols []p2p.ProtocolSpec protocols []p2p.ProtocolSpec
middlewares []p2p.HandlerMiddleware middlewares []p2p.HandlerMiddleware
streamErr func(swarm.Address, string, string, string) error
protocolsWithPeers map[string]p2p.ProtocolSpec
} }
func WithProtocols(protocols ...p2p.ProtocolSpec) Option { func WithProtocols(protocols ...p2p.ProtocolSpec) Option {
...@@ -43,6 +45,12 @@ func WithProtocols(protocols ...p2p.ProtocolSpec) Option { ...@@ -43,6 +45,12 @@ func WithProtocols(protocols ...p2p.ProtocolSpec) Option {
}) })
} }
func WithPeerProtocols(protocolsWithPeers map[string]p2p.ProtocolSpec) Option {
return optionFunc(func(r *Recorder) {
r.protocolsWithPeers = protocolsWithPeers
})
}
func WithMiddlewares(middlewares ...p2p.HandlerMiddleware) Option { func WithMiddlewares(middlewares ...p2p.HandlerMiddleware) Option {
return optionFunc(func(r *Recorder) { return optionFunc(func(r *Recorder) {
r.middlewares = append(r.middlewares, middlewares...) r.middlewares = append(r.middlewares, middlewares...)
...@@ -55,6 +63,12 @@ func WithBaseAddr(a swarm.Address) Option { ...@@ -55,6 +63,12 @@ func WithBaseAddr(a swarm.Address) Option {
}) })
} }
func WithStreamError(streamErr func(swarm.Address, string, string, string) error) Option {
return optionFunc(func(r *Recorder) {
r.streamErr = streamErr
})
}
func New(opts ...Option) *Recorder { func New(opts ...Option) *Recorder {
r := &Recorder{ r := &Recorder{
records: make(map[string][]*Record), records: make(map[string][]*Record),
...@@ -73,6 +87,12 @@ func (r *Recorder) SetProtocols(protocols ...p2p.ProtocolSpec) { ...@@ -73,6 +87,12 @@ func (r *Recorder) SetProtocols(protocols ...p2p.ProtocolSpec) {
} }
func (r *Recorder) NewStream(ctx context.Context, addr swarm.Address, h p2p.Headers, protocolName, protocolVersion, streamName string) (p2p.Stream, error) { func (r *Recorder) NewStream(ctx context.Context, addr swarm.Address, h p2p.Headers, protocolName, protocolVersion, streamName string) (p2p.Stream, error) {
if r.streamErr != nil {
err := r.streamErr(addr, protocolName, protocolVersion, streamName)
if err != nil {
return nil, err
}
}
recordIn := newRecord() recordIn := newRecord()
recordOut := newRecord() recordOut := newRecord()
streamOut := newStream(recordIn, recordOut) streamOut := newStream(recordIn, recordOut)
...@@ -80,16 +100,20 @@ func (r *Recorder) NewStream(ctx context.Context, addr swarm.Address, h p2p.Head ...@@ -80,16 +100,20 @@ func (r *Recorder) NewStream(ctx context.Context, addr swarm.Address, h p2p.Head
var handler p2p.HandlerFunc var handler p2p.HandlerFunc
var headler p2p.HeadlerFunc var headler p2p.HeadlerFunc
for _, p := range r.protocols { peerHandlers, ok := r.protocolsWithPeers[addr.String()]
if p.Name == protocolName && p.Version == protocolVersion { if !ok {
for _, s := range p.StreamSpecs { for _, p := range r.protocols {
if s.Name == streamName { if p.Name == protocolName && p.Version == protocolVersion {
handler = s.Handler peerHandlers = p
headler = s.Headler
}
} }
} }
} }
for _, s := range peerHandlers.StreamSpecs {
if s.Name == streamName {
handler = s.Handler
headler = s.Headler
}
}
if handler == nil { if handler == nil {
return nil, ErrStreamNotSupported return nil, ErrStreamNotSupported
} }
......
...@@ -579,6 +579,185 @@ func TestRecorder_recordErr(t *testing.T) { ...@@ -579,6 +579,185 @@ func TestRecorder_recordErr(t *testing.T) {
}, testErr) }, testErr)
} }
func TestRecorder_withPeerProtocols(t *testing.T) {
peer1 := swarm.MustParseHexAddress("1000000000000000000000000000000000000000000000000000000000000000")
peer2 := swarm.MustParseHexAddress("2000000000000000000000000000000000000000000000000000000000000000")
recorder := streamtest.New(
streamtest.WithPeerProtocols(map[string]p2p.ProtocolSpec{
peer1.String(): newTestProtocol(func(_ context.Context, peer p2p.Peer, stream p2p.Stream) error {
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.ReadString('\n'); err != nil {
return err
}
if _, err := rw.WriteString("handler 1\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
return nil
}),
peer2.String(): newTestProtocol(func(_ context.Context, peer p2p.Peer, stream p2p.Stream) error {
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.ReadString('\n'); err != nil {
return err
}
if _, err := rw.WriteString("handler 2\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
return nil
}),
}),
)
request := func(ctx context.Context, s p2p.Streamer, address swarm.Address) error {
stream, err := s.NewStream(ctx, address, nil, testProtocolName, testProtocolVersion, testStreamName)
if err != nil {
return fmt.Errorf("new stream: %w", err)
}
defer stream.Close()
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.WriteString("req\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
_, err = rw.ReadString('\n')
return err
}
err := request(context.Background(), recorder, peer1)
if err != nil {
t.Fatal(err)
}
records, err := recorder.Records(peer1, testProtocolName, testProtocolVersion, testStreamName)
if err != nil {
t.Fatal(err)
}
testRecords(t, records, [][2]string{
{
"req\n",
"handler 1\n",
},
}, nil)
err = request(context.Background(), recorder, peer2)
if err != nil {
t.Fatal(err)
}
records, err = recorder.Records(peer2, testProtocolName, testProtocolVersion, testStreamName)
if err != nil {
t.Fatal(err)
}
testRecords(t, records, [][2]string{
{
"req\n",
"handler 2\n",
},
}, nil)
}
func TestRecorder_withStreamError(t *testing.T) {
peer1 := swarm.MustParseHexAddress("1000000000000000000000000000000000000000000000000000000000000000")
peer2 := swarm.MustParseHexAddress("2000000000000000000000000000000000000000000000000000000000000000")
testErr := errors.New("dummy stream error")
recorder := streamtest.New(
streamtest.WithPeerProtocols(map[string]p2p.ProtocolSpec{
peer1.String(): newTestProtocol(func(_ context.Context, peer p2p.Peer, stream p2p.Stream) error {
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.ReadString('\n'); err != nil {
return err
}
if _, err := rw.WriteString("handler 1\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
return nil
}),
peer2.String(): newTestProtocol(func(_ context.Context, peer p2p.Peer, stream p2p.Stream) error {
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.ReadString('\n'); err != nil {
return err
}
if _, err := rw.WriteString("handler 2\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
return nil
}),
}),
streamtest.WithStreamError(func(addr swarm.Address, _, _, _ string) error {
if addr.String() == peer1.String() {
return testErr
}
return nil
}),
)
request := func(ctx context.Context, s p2p.Streamer, address swarm.Address) error {
stream, err := s.NewStream(ctx, address, nil, testProtocolName, testProtocolVersion, testStreamName)
if err != nil {
return fmt.Errorf("new stream: %w", err)
}
defer stream.Close()
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.WriteString("req\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
_, err = rw.ReadString('\n')
return err
}
err := request(context.Background(), recorder, peer1)
if err == nil {
t.Fatal("expected error on NewStream for peer")
}
err = request(context.Background(), recorder, peer2)
if err != nil {
t.Fatal(err)
}
records, err := recorder.Records(peer2, testProtocolName, testProtocolVersion, testStreamName)
if err != nil {
t.Fatal(err)
}
testRecords(t, records, [][2]string{
{
"req\n",
"handler 2\n",
},
}, nil)
}
const ( const (
testProtocolName = "testing" testProtocolName = "testing"
testProtocolVersion = "1.0.1" testProtocolVersion = "1.0.1"
......
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
package pushsync package pushsync
var ( var (
ProtocolName = protocolName ProtocolName = protocolName
ProtocolVersion = protocolVersion ProtocolVersion = protocolVersion
StreamName = streamName StreamName = streamName
FailedRequestCache = newFailedRequestCache
) )
...@@ -10,11 +10,14 @@ import ( ...@@ -10,11 +10,14 @@ import (
) )
type metrics struct { type metrics struct {
TotalSent prometheus.Counter TotalSent prometheus.Counter
TotalReceived prometheus.Counter TotalReceived prometheus.Counter
TotalErrors prometheus.Counter TotalErrors prometheus.Counter
TotalReplicated prometheus.Counter TotalReplicated prometheus.Counter
TotalReplicatedError prometheus.Counter TotalReplicatedError prometheus.Counter
TotalSendAttempts prometheus.Counter
TotalFailedSendAttempts prometheus.Counter
FailedCacheHits *prometheus.CounterVec
} }
func newMetrics() metrics { func newMetrics() metrics {
...@@ -51,6 +54,27 @@ func newMetrics() metrics { ...@@ -51,6 +54,27 @@ func newMetrics() metrics {
Name: "total_replication_error", Name: "total_replication_error",
Help: "Total no of failed replication chunks.", Help: "Total no of failed replication chunks.",
}), }),
TotalSendAttempts: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: m.Namespace,
Subsystem: subsystem,
Name: "total_send_attempts",
Help: "Total no of attempts to push chunk.",
}),
TotalFailedSendAttempts: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: m.Namespace,
Subsystem: subsystem,
Name: "total_failed_send_attempts",
Help: "Total no of failed attempts to push chunk.",
}),
FailedCacheHits: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: m.Namespace,
Subsystem: subsystem,
Name: "failed_cache_hits",
Help: "FailedRequestCache hits",
},
[]string{"peer", "chunk"},
),
} }
} }
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment