Commit 8598ed22 authored by Alok Nerurkar's avatar Alok Nerurkar Committed by GitHub

refactor(pushsync): Retry improvements (#1662)

- Don't account for failed attempts during retries
- LRU Cache to remember failed traces and select better peers
- Metrics
parent 553a1b56
......@@ -20,6 +20,7 @@ require (
github.com/gorilla/mux v1.7.4
github.com/gorilla/websocket v1.4.2
github.com/hashicorp/go-multierror v1.1.1
github.com/hashicorp/golang-lru v0.5.4
github.com/kardianos/service v1.2.0
github.com/koron/go-ssdp v0.0.2 // indirect
github.com/kr/text v0.2.0 // indirect
......
......@@ -30,11 +30,13 @@ var (
)
type Recorder struct {
base swarm.Address
records map[string][]*Record
recordsMu sync.Mutex
protocols []p2p.ProtocolSpec
middlewares []p2p.HandlerMiddleware
base swarm.Address
records map[string][]*Record
recordsMu sync.Mutex
protocols []p2p.ProtocolSpec
middlewares []p2p.HandlerMiddleware
streamErr func(swarm.Address, string, string, string) error
protocolsWithPeers map[string]p2p.ProtocolSpec
}
func WithProtocols(protocols ...p2p.ProtocolSpec) Option {
......@@ -43,6 +45,12 @@ func WithProtocols(protocols ...p2p.ProtocolSpec) Option {
})
}
func WithPeerProtocols(protocolsWithPeers map[string]p2p.ProtocolSpec) Option {
return optionFunc(func(r *Recorder) {
r.protocolsWithPeers = protocolsWithPeers
})
}
func WithMiddlewares(middlewares ...p2p.HandlerMiddleware) Option {
return optionFunc(func(r *Recorder) {
r.middlewares = append(r.middlewares, middlewares...)
......@@ -55,6 +63,12 @@ func WithBaseAddr(a swarm.Address) Option {
})
}
func WithStreamError(streamErr func(swarm.Address, string, string, string) error) Option {
return optionFunc(func(r *Recorder) {
r.streamErr = streamErr
})
}
func New(opts ...Option) *Recorder {
r := &Recorder{
records: make(map[string][]*Record),
......@@ -73,6 +87,12 @@ func (r *Recorder) SetProtocols(protocols ...p2p.ProtocolSpec) {
}
func (r *Recorder) NewStream(ctx context.Context, addr swarm.Address, h p2p.Headers, protocolName, protocolVersion, streamName string) (p2p.Stream, error) {
if r.streamErr != nil {
err := r.streamErr(addr, protocolName, protocolVersion, streamName)
if err != nil {
return nil, err
}
}
recordIn := newRecord()
recordOut := newRecord()
streamOut := newStream(recordIn, recordOut)
......@@ -80,16 +100,20 @@ func (r *Recorder) NewStream(ctx context.Context, addr swarm.Address, h p2p.Head
var handler p2p.HandlerFunc
var headler p2p.HeadlerFunc
for _, p := range r.protocols {
if p.Name == protocolName && p.Version == protocolVersion {
for _, s := range p.StreamSpecs {
if s.Name == streamName {
handler = s.Handler
headler = s.Headler
}
peerHandlers, ok := r.protocolsWithPeers[addr.String()]
if !ok {
for _, p := range r.protocols {
if p.Name == protocolName && p.Version == protocolVersion {
peerHandlers = p
}
}
}
for _, s := range peerHandlers.StreamSpecs {
if s.Name == streamName {
handler = s.Handler
headler = s.Headler
}
}
if handler == nil {
return nil, ErrStreamNotSupported
}
......
......@@ -579,6 +579,185 @@ func TestRecorder_recordErr(t *testing.T) {
}, testErr)
}
func TestRecorder_withPeerProtocols(t *testing.T) {
peer1 := swarm.MustParseHexAddress("1000000000000000000000000000000000000000000000000000000000000000")
peer2 := swarm.MustParseHexAddress("2000000000000000000000000000000000000000000000000000000000000000")
recorder := streamtest.New(
streamtest.WithPeerProtocols(map[string]p2p.ProtocolSpec{
peer1.String(): newTestProtocol(func(_ context.Context, peer p2p.Peer, stream p2p.Stream) error {
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.ReadString('\n'); err != nil {
return err
}
if _, err := rw.WriteString("handler 1\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
return nil
}),
peer2.String(): newTestProtocol(func(_ context.Context, peer p2p.Peer, stream p2p.Stream) error {
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.ReadString('\n'); err != nil {
return err
}
if _, err := rw.WriteString("handler 2\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
return nil
}),
}),
)
request := func(ctx context.Context, s p2p.Streamer, address swarm.Address) error {
stream, err := s.NewStream(ctx, address, nil, testProtocolName, testProtocolVersion, testStreamName)
if err != nil {
return fmt.Errorf("new stream: %w", err)
}
defer stream.Close()
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.WriteString("req\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
_, err = rw.ReadString('\n')
return err
}
err := request(context.Background(), recorder, peer1)
if err != nil {
t.Fatal(err)
}
records, err := recorder.Records(peer1, testProtocolName, testProtocolVersion, testStreamName)
if err != nil {
t.Fatal(err)
}
testRecords(t, records, [][2]string{
{
"req\n",
"handler 1\n",
},
}, nil)
err = request(context.Background(), recorder, peer2)
if err != nil {
t.Fatal(err)
}
records, err = recorder.Records(peer2, testProtocolName, testProtocolVersion, testStreamName)
if err != nil {
t.Fatal(err)
}
testRecords(t, records, [][2]string{
{
"req\n",
"handler 2\n",
},
}, nil)
}
func TestRecorder_withStreamError(t *testing.T) {
peer1 := swarm.MustParseHexAddress("1000000000000000000000000000000000000000000000000000000000000000")
peer2 := swarm.MustParseHexAddress("2000000000000000000000000000000000000000000000000000000000000000")
testErr := errors.New("dummy stream error")
recorder := streamtest.New(
streamtest.WithPeerProtocols(map[string]p2p.ProtocolSpec{
peer1.String(): newTestProtocol(func(_ context.Context, peer p2p.Peer, stream p2p.Stream) error {
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.ReadString('\n'); err != nil {
return err
}
if _, err := rw.WriteString("handler 1\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
return nil
}),
peer2.String(): newTestProtocol(func(_ context.Context, peer p2p.Peer, stream p2p.Stream) error {
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.ReadString('\n'); err != nil {
return err
}
if _, err := rw.WriteString("handler 2\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
return nil
}),
}),
streamtest.WithStreamError(func(addr swarm.Address, _, _, _ string) error {
if addr.String() == peer1.String() {
return testErr
}
return nil
}),
)
request := func(ctx context.Context, s p2p.Streamer, address swarm.Address) error {
stream, err := s.NewStream(ctx, address, nil, testProtocolName, testProtocolVersion, testStreamName)
if err != nil {
return fmt.Errorf("new stream: %w", err)
}
defer stream.Close()
rw := bufio.NewReadWriter(bufio.NewReader(stream), bufio.NewWriter(stream))
if _, err := rw.WriteString("req\n"); err != nil {
return err
}
if err := rw.Flush(); err != nil {
return err
}
_, err = rw.ReadString('\n')
return err
}
err := request(context.Background(), recorder, peer1)
if err == nil {
t.Fatal("expected error on NewStream for peer")
}
err = request(context.Background(), recorder, peer2)
if err != nil {
t.Fatal(err)
}
records, err := recorder.Records(peer2, testProtocolName, testProtocolVersion, testStreamName)
if err != nil {
t.Fatal(err)
}
testRecords(t, records, [][2]string{
{
"req\n",
"handler 2\n",
},
}, nil)
}
const (
testProtocolName = "testing"
testProtocolVersion = "1.0.1"
......
......@@ -5,7 +5,8 @@
package pushsync
var (
ProtocolName = protocolName
ProtocolVersion = protocolVersion
StreamName = streamName
ProtocolName = protocolName
ProtocolVersion = protocolVersion
StreamName = streamName
FailedRequestCache = newFailedRequestCache
)
......@@ -10,11 +10,14 @@ import (
)
type metrics struct {
TotalSent prometheus.Counter
TotalReceived prometheus.Counter
TotalErrors prometheus.Counter
TotalReplicated prometheus.Counter
TotalReplicatedError prometheus.Counter
TotalSent prometheus.Counter
TotalReceived prometheus.Counter
TotalErrors prometheus.Counter
TotalReplicated prometheus.Counter
TotalReplicatedError prometheus.Counter
TotalSendAttempts prometheus.Counter
TotalFailedSendAttempts prometheus.Counter
FailedCacheHits *prometheus.CounterVec
}
func newMetrics() metrics {
......@@ -51,6 +54,27 @@ func newMetrics() metrics {
Name: "total_replication_error",
Help: "Total no of failed replication chunks.",
}),
TotalSendAttempts: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: m.Namespace,
Subsystem: subsystem,
Name: "total_send_attempts",
Help: "Total no of attempts to push chunk.",
}),
TotalFailedSendAttempts: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: m.Namespace,
Subsystem: subsystem,
Name: "total_failed_send_attempts",
Help: "Total no of failed attempts to push chunk.",
}),
FailedCacheHits: prometheus.NewCounterVec(
prometheus.CounterOpts{
Namespace: m.Namespace,
Subsystem: subsystem,
Name: "failed_cache_hits",
Help: "FailedRequestCache hits",
},
[]string{"peer", "chunk"},
),
}
}
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment