Commit 58bc639b authored by acud's avatar acud Committed by GitHub

kademlia, debugapi, node: fix connection issues (#236)

* debugapi/peer.go - the AddPeer method on kademlia was called after connecting to the peer. this is legacy from the full topology implementation and is incorrect in this case. The resulting behavior would be that the peer would be connected to, then would be added to the known peers list, instead of both known and connect lists. So the kademlia would try to iterate in manage but the peer would never show up in the known peers list, so would never show up in the iterator.
* it could be, that under certain circumstances that this peer would be gossiped to the node from another node, eventually triggering adding the peer through AddPeer, resulting in it being in the knownPeers slice. It is difficult to say what would be the behavior in such case, since AddPeer does not guarantee trying to connect to such a node, so we cannot guarantee in such case that the peer will ever shown in the connectedPeers list
* kademlia.go - moved the addition of a peer to the connect peers list after connect has finished. this is to prevent a data race when we try to connect to a peer, but it has already dialed in but was not yet added to the kademlia (the Connected callback was not yet called). In this case the peer should also be added to the connected peers list. This call is of no harm, since duplicates are not allowed as part of pslice guarantees (they are skipped and not added)
* kademlia.go - add peer to known peers slice on dial in - the problem was that a peer has dialed in, but was never known to us, resulting in an inconsistency in the kademlia
node.go - peer should not be disconnected from when AddPeer fails in the case of kademlia (since AddPeer doesn't guarantee a connection)
parent 75f893db
......@@ -32,7 +32,7 @@ type Options struct {
Overlay swarm.Address
P2P p2p.Service
Addressbook addressbook.GetPutter
TopologyDriver topology.PeerAdder
TopologyDriver topology.Notifier
Storer storage.Storer
Logger logging.Logger
}
......
......@@ -42,9 +42,9 @@ func (s *server) peerConnectHandler(w http.ResponseWriter, r *http.Request) {
jsonhttp.InternalServerError(w, err)
return
}
if err := s.TopologyDriver.AddPeer(r.Context(), address); err != nil {
if err := s.TopologyDriver.Connected(r.Context(), address); err != nil {
_ = s.P2P.Disconnect(address)
s.Logger.Debugf("debug api: topologyDriver.AddPeer %s: %v", addr, err)
s.Logger.Debugf("debug api: topologyDriver.Connected %s: %v", addr, err)
s.Logger.Errorf("unable to connect to peer %s", addr)
jsonhttp.InternalServerError(w, err)
return
......
......@@ -105,7 +105,6 @@ func (k *Kad) manage() {
return
case <-k.manageC:
err := k.knownPeers.EachBinRev(func(peer swarm.Address, po uint8) (bool, bool, error) {
if k.connectedPeers.Exists(peer) {
return false, false, nil
}
......@@ -144,6 +143,8 @@ func (k *Kad) manage() {
return false, false, nil
}
k.connectedPeers.Add(peer, po)
k.waitNextMu.Lock()
delete(k.waitNext, peer.String())
k.waitNextMu.Unlock()
......@@ -246,10 +247,9 @@ func (k *Kad) connect(ctx context.Context, peer swarm.Address, ma ma.Multiaddr,
k.waitNextMu.Unlock()
// TODO: somehow keep track of attempts and at some point forget about the peer
return err // dont stop, continue to next peer
return err
}
k.connectedPeers.Add(peer, po)
return k.announce(ctx, peer)
}
......@@ -304,6 +304,7 @@ func (k *Kad) AddPeer(ctx context.Context, addr swarm.Address) error {
// Connected is called when a peer has dialed in.
func (k *Kad) Connected(ctx context.Context, addr swarm.Address) error {
po := uint8(swarm.Proximity(k.base.Bytes(), addr.Bytes()))
k.knownPeers.Add(addr, po)
k.connectedPeers.Add(addr, po)
k.waitNextMu.Lock()
......
......@@ -315,7 +315,6 @@ func NewBee(o Options) (*Bee, error) {
defer wg.Done()
if err := topologyDriver.AddPeer(p2pCtx, overlay); err != nil {
_ = p2ps.Disconnect(overlay)
logger.Debugf("topology add peer fail %s: %v", overlay, err)
logger.Errorf("topology add peer %s", overlay)
return
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment