// Try to stop sequencer first, but since sequencer is not healthy, we may not be able to stop it.
// In this case, it's fine to continue to try to transfer leadership to another server. This is safe because
// 1. if leadership transfer succeeded, then we'll retry and enter case !status.leader && status.healthy && status.active, which will try to stop sequencer.
// 2. even if the retry continues to fail and current server stays in active sequencing mode, it would be safe because our hook in op-node will prevent it from committing any new blocks to the network via p2p (if it's not leader any more)
ife:=oc.stopSequencer();e!=nil{
result=multierror.Append(result,e)
}
// try to transfer leadership to another server despite if sequencer is stopped or not. There are 4 scenarios here:
// 1. [sequencer stopped, leadership transfer succeeded] which is the happy case and we handed over sequencing to another server.
// 2. [sequencer stopped, leadership transfer failed] we'll enter into case status.leader && !status.healthy && !status.active and retry transfer leadership.
// 3. [sequencer active, leadership transfer succeeded] we'll enter into case !status.leader && status.healthy && status.active and retry stop sequencer.
// 4. [sequencer active, leadership transfer failed] we're in the same state and will retry here again.
@@ -159,6 +205,13 @@ func (s *OpConductorTestSuite) TestControlLoop2() {
err=s.conductor.Resume(s.ctx)
s.NoError(err)
s.False(s.conductor.Paused())
// Stop
s.hmon.EXPECT().Stop().Return(nil)
s.cons.EXPECT().Shutdown().Return(nil)
err=s.conductor.Stop(s.ctx)
s.NoError(err)
s.True(s.conductor.Stopped())
}
// Scenario 3: pause -> stop
...
...
@@ -176,6 +229,289 @@ func (s *OpConductorTestSuite) TestControlLoop3() {
s.True(s.conductor.Stopped())
}
// In this test, we have a follower that is not healthy and not sequencing, it becomes leader through election and we expect it to transfer leadership to another node.
// [follower, not healthy, not sequencing] -- become leader --> [leader, not healthy, not sequencing] -- transfer leadership --> [follower, not healthy, not sequencing]
// expect to transfer leadership, go back to [follower, not healthy, not sequencing]
s.False(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
s.cons.AssertCalled(s.T(),"TransferLeader")
}
// In this test, we have a follower that is not healthy and not sequencing. it becomes healthy and we expect it to stay as follower and not start sequencing.
// [follower, not healthy, not sequencing] -- become healthy --> [follower, healthy, not sequencing]
// This test setup is the same as Scenario 3, the difference is that scenario 3 is all happy case and in this test, we try to exhaust all the error cases.
// [follower, healthy, not sequencing] -- become leader, unsafe head does not match, retry, eventually succeed --> [leader, healthy, sequencing]
func(s*OpConductorTestSuite)TestScenario4(){
s.enableSynchronization()
// unsafe in consensus is 1 block ahead of unsafe in sequencer, we try to post the unsafe payload to sequencer and return error to allow retry
// this is normal because the latest unsafe (in consensus) might not arrive at sequencer through p2p yet
// In this test, we have a follower that is healthy and not sequencing, we send a unhealthy update to it and expect it to stay as follower and not start sequencing.
// [follower, healthy, not sequencing] -- become unhealthy --> [follower, not healthy, not sequencing]
// In this test, we have a leader that is healthy and sequencing, we send a unhealthy update to it and expect it to stop sequencing and transfer leadership.
// 1. [leader, healthy, sequencing] -- become unhealthy -->
// 2. [leader, unhealthy, sequencing] -- stop sequencing, transfer leadership --> [follower, unhealthy, not sequencing]
// In this test, we have a leader that is healthy and sequencing, we send a unhealthy update to it and expect it to stop sequencing and transfer leadership.
// However, the action we needed to take failed temporarily, so we expect it to retry until it succeeds.
// 1. [leader, healthy, sequencing] -- become unhealthy -->
// In this test, we have a leader that is healthy and sequencing, we send a unhealthy update to it and expect it to stop sequencing and transfer leadership.
// However, the action we needed to take failed temporarily, so we expect it to retry until it succeeds.
// 1. [leader, healthy, sequencing] -- become unhealthy -->