Commit bfa93925 authored by acud's avatar acud Committed by GitHub

kademlia: wait on peer before retrying after connect err (#1449)

parent f41a3666
...@@ -309,6 +309,14 @@ func (k *Kad) manage() { ...@@ -309,6 +309,14 @@ func (k *Kad) manage() {
} }
k.logger.Debugf("peer not reachable from kademlia %s: %v", bzzAddr.String(), err) k.logger.Debugf("peer not reachable from kademlia %s: %v", bzzAddr.String(), err)
k.logger.Warningf("peer not reachable when attempting to connect") k.logger.Warningf("peer not reachable when attempting to connect")
k.waitNextMu.Lock()
if _, ok := k.waitNext[peer.String()]; !ok {
// don't override existing data in the map
k.waitNext[peer.String()] = retryInfo{tryAfter: time.Now().Add(timeToRetry)}
}
k.waitNextMu.Unlock()
// continue to next // continue to next
return nil return nil
} }
...@@ -382,6 +390,14 @@ func (k *Kad) manage() { ...@@ -382,6 +390,14 @@ func (k *Kad) manage() {
} }
k.logger.Debugf("peer not reachable from kademlia %s: %v", bzzAddr.String(), err) k.logger.Debugf("peer not reachable from kademlia %s: %v", bzzAddr.String(), err)
k.logger.Warningf("peer not reachable when attempting to connect") k.logger.Warningf("peer not reachable when attempting to connect")
k.waitNextMu.Lock()
if _, ok := k.waitNext[peer.String()]; !ok {
// don't override existing data in the map
k.waitNext[peer.String()] = retryInfo{tryAfter: time.Now().Add(timeToRetry)}
}
k.waitNextMu.Unlock()
// continue to next // continue to next
return false, false, nil return false, false, nil
} }
......
...@@ -496,6 +496,7 @@ func (s *Service) Connect(ctx context.Context, addr ma.Multiaddr) (address *bzz. ...@@ -496,6 +496,7 @@ func (s *Service) Connect(ctx context.Context, addr ma.Multiaddr) (address *bzz.
if err := s.connectionBreaker.Execute(func() error { return s.host.Connect(ctx, *info) }); err != nil { if err := s.connectionBreaker.Execute(func() error { return s.host.Connect(ctx, *info) }); err != nil {
if errors.Is(err, breaker.ErrClosed) { if errors.Is(err, breaker.ErrClosed) {
s.metrics.ConnectBreakerCount.Inc()
return nil, p2p.NewConnectionBackoffError(err, s.connectionBreaker.ClosedUntil()) return nil, p2p.NewConnectionBackoffError(err, s.connectionBreaker.ClosedUntil())
} }
return nil, err return nil, err
......
...@@ -20,6 +20,7 @@ type metrics struct { ...@@ -20,6 +20,7 @@ type metrics struct {
BlocklistedPeerCount prometheus.Counter BlocklistedPeerCount prometheus.Counter
BlocklistedPeerErrCount prometheus.Counter BlocklistedPeerErrCount prometheus.Counter
DisconnectCount prometheus.Counter DisconnectCount prometheus.Counter
ConnectBreakerCount prometheus.Counter
} }
func newMetrics() metrics { func newMetrics() metrics {
...@@ -68,6 +69,12 @@ func newMetrics() metrics { ...@@ -68,6 +69,12 @@ func newMetrics() metrics {
Name: "disconnect_count", Name: "disconnect_count",
Help: "Number of peers we've disconnected from (initiated locally).", Help: "Number of peers we've disconnected from (initiated locally).",
}), }),
ConnectBreakerCount: prometheus.NewCounter(prometheus.CounterOpts{
Namespace: m.Namespace,
Subsystem: subsystem,
Name: "connect_breaker_count",
Help: "Number of times we got a closed breaker while connecting to another peer.",
}),
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment