Commit 8ba2e1e1 authored by Francis Li's avatar Francis Li Committed by GitHub

[op-conductor] part 2 - core control logic (#8854)

* Implement main control logic

* Add more tests
parent fe6dfa6f
......@@ -3,12 +3,15 @@ package conductor
import (
"context"
"fmt"
"math/rand"
"sync"
"sync/atomic"
"time"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rpc"
"github.com/hashicorp/go-multierror"
"github.com/hashicorp/raft"
"github.com/pkg/errors"
"github.com/ethereum-optimism/optimism/op-conductor/client"
......@@ -136,6 +139,7 @@ func (c *OpConductor) initConsensus(ctx context.Context) error {
return errors.Wrap(err, "failed to create raft consensus")
}
c.cons = cons
c.leaderUpdateCh = c.cons.LeaderCh()
return nil
}
......@@ -165,6 +169,7 @@ func (c *OpConductor) initHealthMonitor(ctx context.Context) error {
node,
p2p,
)
c.healthUpdateCh = c.hmon.Subscribe()
return nil
}
......@@ -191,7 +196,9 @@ type OpConductor struct {
healthy atomic.Bool
seqActive atomic.Bool
actionFn func() // actionFn defines the action to be executed to bring the sequencer to the desired state.
healthUpdateCh <-chan bool
leaderUpdateCh <-chan bool
actionFn func() // actionFn defines the action to be executed to bring the sequencer to the desired state.
wg sync.WaitGroup
pauseCh chan struct{}
......@@ -225,8 +232,12 @@ func (oc *OpConductor) Start(ctx context.Context) error {
// Stop implements cliapp.Lifecycle.
func (oc *OpConductor) Stop(ctx context.Context) error {
oc.log.Info("stopping OpConductor")
if oc.Stopped() {
oc.log.Info("OpConductor already stopped")
return nil
}
oc.log.Info("stopping OpConductor")
var result *multierror.Error
// close control loop
......@@ -286,16 +297,14 @@ func (oc *OpConductor) Paused() bool {
func (oc *OpConductor) loop() {
defer oc.wg.Done()
healthUpdate := oc.hmon.Subscribe()
leaderUpdate := oc.cons.LeaderCh()
for {
select {
// We process status update (health, leadership) first regardless of the paused state.
// This way we could properly bring the sequencer to the desired state when resumed.
case healthy := <-healthUpdate:
case healthy := <-oc.healthUpdateCh:
oc.handleHealthUpdate(healthy)
case leader := <-leaderUpdate:
case leader := <-oc.leaderUpdateCh:
oc.handleLeaderUpdate(leader)
case <-oc.pauseCh:
oc.paused.Store(true)
......@@ -349,5 +358,119 @@ func (oc *OpConductor) action() {
return
}
// TODO: (https://github.com/ethereum-optimism/protocol-quest/issues/47) implement
var err error
// exhaust all cases below for completeness, 3 state, 8 cases.
switch status := struct{ leader, healthy, active bool }{oc.leader.Load(), oc.healthy.Load(), oc.seqActive.Load()}; {
case !status.leader && !status.healthy && !status.active:
// if follower is not healthy and not sequencing, just log an error
oc.log.Error("server (follower) is not healthy", "server", oc.cons.ServerID())
case !status.leader && !status.healthy && status.active:
// sequencer is not leader, not healthy, but it is sequencing, stop it
err = oc.stopSequencer()
case !status.leader && status.healthy && !status.active:
// normal follower, do nothing
case !status.leader && status.healthy && status.active:
// stop sequencer, this happens when current server steps down as leader.
err = oc.stopSequencer()
case status.leader && !status.healthy && !status.active:
// transfer leadership to another node
err = oc.transferLeader()
case status.leader && !status.healthy && status.active:
var result *multierror.Error
// Try to stop sequencer first, but since sequencer is not healthy, we may not be able to stop it.
// In this case, it's fine to continue to try to transfer leadership to another server. This is safe because
// 1. if leadership transfer succeeded, then we'll retry and enter case !status.leader && status.healthy && status.active, which will try to stop sequencer.
// 2. even if the retry continues to fail and current server stays in active sequencing mode, it would be safe because our hook in op-node will prevent it from committing any new blocks to the network via p2p (if it's not leader any more)
if e := oc.stopSequencer(); e != nil {
result = multierror.Append(result, e)
}
// try to transfer leadership to another server despite if sequencer is stopped or not. There are 4 scenarios here:
// 1. [sequencer stopped, leadership transfer succeeded] which is the happy case and we handed over sequencing to another server.
// 2. [sequencer stopped, leadership transfer failed] we'll enter into case status.leader && !status.healthy && !status.active and retry transfer leadership.
// 3. [sequencer active, leadership transfer succeeded] we'll enter into case !status.leader && status.healthy && status.active and retry stop sequencer.
// 4. [sequencer active, leadership transfer failed] we're in the same state and will retry here again.
if e := oc.transferLeader(); e != nil {
result = multierror.Append(result, e)
}
err = result.ErrorOrNil()
case status.leader && status.healthy && !status.active:
// start sequencer
err = oc.startSequencer()
case status.leader && status.healthy && status.active:
// normal leader, do nothing
}
if err != nil {
oc.log.Error("failed to execute step, queueing another one to retry", "err", err)
// randomly sleep for 0-200ms to avoid excessive retry
time.Sleep(time.Duration(rand.Intn(200)) * time.Millisecond)
oc.queueAction()
}
}
// transferLeader tries to transfer leadership to another server.
func (oc *OpConductor) transferLeader() error {
// TransferLeader here will do round robin to try to transfer leadership to the next healthy node.
err := oc.cons.TransferLeader()
if err == nil {
oc.leader.Store(false)
return nil // success
}
switch {
case errors.Is(err, raft.ErrNotLeader):
// This node is not the leader, do nothing.
oc.log.Warn("cannot transfer leadership since current server is not the leader")
return nil
default:
oc.log.Error("failed to transfer leadership", "err", err)
return err
}
}
func (oc *OpConductor) stopSequencer() error {
oc.log.Info("stopping sequencer", "server", oc.cons.ServerID(), "leader", oc.leader.Load(), "healthy", oc.healthy.Load(), "active", oc.seqActive.Load())
if _, err := oc.ctrl.StopSequencer(context.Background()); err != nil {
return errors.Wrap(err, "failed to stop sequencer")
}
oc.seqActive.Store(false)
return nil
}
func (oc *OpConductor) startSequencer() error {
oc.log.Info("starting sequencer", "server", oc.cons.ServerID(), "leader", oc.leader.Load(), "healthy", oc.healthy.Load(), "active", oc.seqActive.Load())
// When starting sequencer, we need to make sure that the current node has the latest unsafe head from the consensus protocol
// If not, then we wait for the unsafe head to catch up or gossip it to op-node manually from op-conductor.
unsafeInCons := oc.cons.LatestUnsafePayload()
unsafeInNode, err := oc.ctrl.LatestUnsafeBlock(context.Background())
if err != nil {
return errors.Wrap(err, "failed to get latest unsafe block from EL during startSequencer phase")
}
if unsafeInCons.BlockHash != unsafeInNode.Hash() {
oc.log.Warn(
"latest unsafe block in consensus is not the same as the one in op-node",
"consensus_hash", unsafeInCons.BlockHash,
"consensus_block_num", unsafeInCons.BlockNumber,
"node_hash", unsafeInNode.Hash(),
"node_block_num", unsafeInNode.NumberU64(),
)
if uint64(unsafeInCons.BlockNumber)-unsafeInNode.NumberU64() == 1 {
// tries to post the unsafe head to op-node when head is only 1 block behind (most likely due to gossip delay)
if err = oc.ctrl.PostUnsafePayload(context.Background(), &unsafeInCons); err != nil {
oc.log.Error("failed to post unsafe head payload to op-node", "err", err)
}
}
return ErrUnsafeHeadMismarch // return error to allow retry
}
if err := oc.ctrl.StartSequencer(context.Background(), unsafeInCons.BlockHash); err != nil {
return errors.Wrap(err, "failed to start sequencer")
}
oc.seqActive.Store(true)
return nil
}
......@@ -2,13 +2,16 @@ package conductor
import (
"context"
"errors"
"math/big"
"os"
"sync"
"testing"
"time"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethereum/go-ethereum/log"
"github.com/stretchr/testify/require"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
clientmocks "github.com/ethereum-optimism/optimism/op-conductor/client/mocks"
......@@ -17,20 +20,20 @@ import (
"github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-service/eth"
"github.com/ethereum-optimism/optimism/op-service/testlog"
"github.com/ethereum-optimism/optimism/op-service/testutils"
)
func mockConfig(t *testing.T) Config {
now := uint64(time.Now().Unix())
dir, err := os.MkdirTemp("/tmp", "")
require.NoError(t, err)
return Config{
ConsensusAddr: "127.0.0.1",
ConsensusPort: 50050,
RaftServerID: "SequencerA",
RaftStorageDir: dir,
RaftStorageDir: "/tmp/raft",
RaftBootstrap: false,
NodeRPC: "http://node:8545",
ExecutionRPC: "http://geth:8545",
Paused: false,
HealthCheck: HealthCheckConfig{
Interval: 1,
SafeInterval: 5,
......@@ -84,6 +87,9 @@ type OpConductorTestSuite struct {
ctrl *clientmocks.SequencerControl
cons *consensusmocks.Consensus
hmon *healthmocks.HealthMonitor
next chan struct{}
wg sync.WaitGroup
}
func (s *OpConductorTestSuite) SetupSuite() {
......@@ -91,30 +97,70 @@ func (s *OpConductorTestSuite) SetupSuite() {
s.log = testlog.Logger(s.T(), log.LvlDebug)
s.cfg = mockConfig(s.T())
s.version = "v0.0.1"
s.next = make(chan struct{}, 1)
}
func (s *OpConductorTestSuite) SetupTest() {
// initialize for every test so that method call count starts from 0
s.ctrl = &clientmocks.SequencerControl{}
s.cons = &consensusmocks.Consensus{}
s.hmon = &healthmocks.HealthMonitor{}
s.cons.EXPECT().ServerID().Return("SequencerA")
}
func (s *OpConductorTestSuite) SetupTest() {
conductor, err := NewOpConductor(s.ctx, &s.cfg, s.log, s.version, s.ctrl, s.cons, s.hmon)
s.NoError(err)
s.conductor = conductor
s.healthUpdateCh = make(chan bool)
s.hmon.EXPECT().Start().Return(nil)
s.hmon.EXPECT().Subscribe().Return(s.healthUpdateCh)
s.conductor.healthUpdateCh = s.healthUpdateCh
s.leaderUpdateCh = make(chan bool)
s.cons.EXPECT().LeaderCh().Return(s.leaderUpdateCh)
s.conductor.leaderUpdateCh = s.leaderUpdateCh
err = s.conductor.Start(s.ctx)
s.NoError(err)
s.False(s.conductor.Stopped())
}
func (s *OpConductorTestSuite) TearDownTest() {
s.hmon.EXPECT().Stop().Return(nil)
s.cons.EXPECT().Shutdown().Return(nil)
s.NoError(s.conductor.Stop(s.ctx))
s.True(s.conductor.Stopped())
}
// enableSynchronization wraps conductor actionFn with extra synchronization logic
// so that we could control the execution of actionFn and observe the internal state transition in between.
func (s *OpConductorTestSuite) enableSynchronization() {
s.conductor.actionFn = func() {
<-s.next
s.conductor.action()
s.wg.Done()
}
}
func (s *OpConductorTestSuite) execute(fn func()) {
s.wg.Add(1)
s.next <- struct{}{}
if fn != nil {
fn()
}
s.wg.Wait()
}
func (s *OpConductorTestSuite) updateStatusAndExecuteAction(ch chan bool, status bool) {
fn := func() {
ch <- status
}
s.execute(fn)
}
func (s *OpConductorTestSuite) executeAction() {
s.execute(nil)
}
// Scenario 1: pause -> resume -> stop
func (s *OpConductorTestSuite) TestControlLoop1() {
// Pause
......@@ -159,6 +205,13 @@ func (s *OpConductorTestSuite) TestControlLoop2() {
err = s.conductor.Resume(s.ctx)
s.NoError(err)
s.False(s.conductor.Paused())
// Stop
s.hmon.EXPECT().Stop().Return(nil)
s.cons.EXPECT().Shutdown().Return(nil)
err = s.conductor.Stop(s.ctx)
s.NoError(err)
s.True(s.conductor.Stopped())
}
// Scenario 3: pause -> stop
......@@ -176,6 +229,289 @@ func (s *OpConductorTestSuite) TestControlLoop3() {
s.True(s.conductor.Stopped())
}
// In this test, we have a follower that is not healthy and not sequencing, it becomes leader through election and we expect it to transfer leadership to another node.
// [follower, not healthy, not sequencing] -- become leader --> [leader, not healthy, not sequencing] -- transfer leadership --> [follower, not healthy, not sequencing]
func (s *OpConductorTestSuite) TestScenario1() {
s.enableSynchronization()
// set initial state
s.conductor.leader.Store(false)
s.conductor.healthy.Store(false)
s.conductor.seqActive.Store(false)
s.cons.EXPECT().TransferLeader().Return(nil)
// become leader
s.updateStatusAndExecuteAction(s.leaderUpdateCh, true)
// expect to transfer leadership, go back to [follower, not healthy, not sequencing]
s.False(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
s.cons.AssertCalled(s.T(), "TransferLeader")
}
// In this test, we have a follower that is not healthy and not sequencing. it becomes healthy and we expect it to stay as follower and not start sequencing.
// [follower, not healthy, not sequencing] -- become healthy --> [follower, healthy, not sequencing]
func (s *OpConductorTestSuite) TestScenario2() {
s.enableSynchronization()
// set initial state
s.conductor.leader.Store(false)
s.conductor.healthy.Store(false)
s.conductor.seqActive.Store(false)
// become healthy
s.updateStatusAndExecuteAction(s.healthUpdateCh, true)
// expect to stay as follower, go to [follower, healthy, not sequencing]
s.False(s.conductor.leader.Load())
s.True(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
}
// In this test, we have a follower that is healthy and not sequencing, we send a leader update to it and expect it to start sequencing.
// [follower, healthy, not sequencing] -- become leader --> [leader, healthy, sequencing]
func (s *OpConductorTestSuite) TestScenario3() {
s.enableSynchronization()
mockPayload := eth.ExecutionPayload{
BlockNumber: 1,
Timestamp: hexutil.Uint64(time.Now().Unix()),
BlockHash: [32]byte{1, 2, 3},
}
mockBlockInfo := &testutils.MockBlockInfo{
InfoNum: 1,
InfoHash: [32]byte{1, 2, 3},
}
s.cons.EXPECT().LatestUnsafePayload().Return(mockPayload).Times(1)
s.ctrl.EXPECT().LatestUnsafeBlock(mock.Anything).Return(mockBlockInfo, nil).Times(1)
s.ctrl.EXPECT().StartSequencer(mock.Anything, mock.Anything).Return(nil).Times(1)
// [follower, healthy, not sequencing]
s.False(s.conductor.leader.Load())
s.True(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
// become leader
s.updateStatusAndExecuteAction(s.leaderUpdateCh, true)
// [leader, healthy, sequencing]
s.True(s.conductor.leader.Load())
s.True(s.conductor.healthy.Load())
s.True(s.conductor.seqActive.Load())
s.ctrl.AssertCalled(s.T(), "StartSequencer", mock.Anything, mock.Anything)
s.ctrl.AssertCalled(s.T(), "LatestUnsafeBlock", mock.Anything)
}
// This test setup is the same as Scenario 3, the difference is that scenario 3 is all happy case and in this test, we try to exhaust all the error cases.
// [follower, healthy, not sequencing] -- become leader, unsafe head does not match, retry, eventually succeed --> [leader, healthy, sequencing]
func (s *OpConductorTestSuite) TestScenario4() {
s.enableSynchronization()
// unsafe in consensus is 1 block ahead of unsafe in sequencer, we try to post the unsafe payload to sequencer and return error to allow retry
// this is normal because the latest unsafe (in consensus) might not arrive at sequencer through p2p yet
mockPayload := eth.ExecutionPayload{
BlockNumber: 2,
Timestamp: hexutil.Uint64(time.Now().Unix()),
BlockHash: [32]byte{1, 2, 3},
}
mockBlockInfo := &testutils.MockBlockInfo{
InfoNum: 1,
InfoHash: [32]byte{2, 3, 4},
}
s.cons.EXPECT().LatestUnsafePayload().Return(mockPayload).Times(1)
s.ctrl.EXPECT().LatestUnsafeBlock(mock.Anything).Return(mockBlockInfo, nil).Times(1)
s.ctrl.EXPECT().PostUnsafePayload(mock.Anything, mock.Anything).Return(nil).Times(1)
s.updateStatusAndExecuteAction(s.leaderUpdateCh, true)
// [leader, healthy, not sequencing]
s.True(s.conductor.leader.Load())
s.True(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
s.ctrl.AssertNotCalled(s.T(), "StartSequencer", mock.Anything, mock.Anything)
s.ctrl.AssertNumberOfCalls(s.T(), "LatestUnsafeBlock", 1)
s.ctrl.AssertNumberOfCalls(s.T(), "PostUnsafePayload", 1)
s.cons.AssertNumberOfCalls(s.T(), "LatestUnsafePayload", 1)
// unsafe caught up, we try to start sequencer at specified block and succeeds
mockBlockInfo.InfoNum = 2
mockBlockInfo.InfoHash = [32]byte{1, 2, 3}
s.cons.EXPECT().LatestUnsafePayload().Return(mockPayload).Times(1)
s.ctrl.EXPECT().LatestUnsafeBlock(mock.Anything).Return(mockBlockInfo, nil).Times(1)
s.ctrl.EXPECT().StartSequencer(mock.Anything, mockBlockInfo.InfoHash).Return(nil).Times(1)
s.executeAction()
// [leader, healthy, sequencing]
s.True(s.conductor.leader.Load())
s.True(s.conductor.healthy.Load())
s.True(s.conductor.seqActive.Load())
s.ctrl.AssertNumberOfCalls(s.T(), "LatestUnsafeBlock", 2)
s.ctrl.AssertNumberOfCalls(s.T(), "PostUnsafePayload", 1)
s.ctrl.AssertNumberOfCalls(s.T(), "StartSequencer", 1)
s.cons.AssertNumberOfCalls(s.T(), "LatestUnsafePayload", 2)
}
// In this test, we have a follower that is healthy and not sequencing, we send a unhealthy update to it and expect it to stay as follower and not start sequencing.
// [follower, healthy, not sequencing] -- become unhealthy --> [follower, not healthy, not sequencing]
func (s *OpConductorTestSuite) TestScenario5() {
s.enableSynchronization()
// set initial state
s.conductor.leader.Store(false)
s.conductor.healthy.Store(true)
s.conductor.seqActive.Store(false)
// become unhealthy
s.updateStatusAndExecuteAction(s.healthUpdateCh, false)
// expect to stay as follower, go to [follower, not healthy, not sequencing]
s.False(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
}
// In this test, we have a leader that is healthy and sequencing, we send a leader update to it and expect it to stop sequencing.
// [leader, healthy, sequencing] -- step down as leader --> [follower, healthy, not sequencing]
func (s *OpConductorTestSuite) TestScenario6() {
s.enableSynchronization()
// set initial state
s.conductor.leader.Store(true)
s.conductor.healthy.Store(true)
s.conductor.seqActive.Store(true)
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, nil).Times(1)
// step down as leader
s.updateStatusAndExecuteAction(s.leaderUpdateCh, false)
// expect to stay as follower, go to [follower, healthy, not sequencing]
s.False(s.conductor.leader.Load())
s.True(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
s.ctrl.AssertCalled(s.T(), "StopSequencer", mock.Anything)
}
// In this test, we have a leader that is healthy and sequencing, we send a unhealthy update to it and expect it to stop sequencing and transfer leadership.
// 1. [leader, healthy, sequencing] -- become unhealthy -->
// 2. [leader, unhealthy, sequencing] -- stop sequencing, transfer leadership --> [follower, unhealthy, not sequencing]
func (s *OpConductorTestSuite) TestScenario7() {
s.enableSynchronization()
// set initial state
s.conductor.leader.Store(true)
s.conductor.healthy.Store(true)
s.conductor.seqActive.Store(true)
s.cons.EXPECT().TransferLeader().Return(nil).Times(1)
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, nil).Times(1)
// become unhealthy
s.updateStatusAndExecuteAction(s.healthUpdateCh, false)
// expect to step down as leader and stop sequencing
s.False(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
s.ctrl.AssertCalled(s.T(), "StopSequencer", mock.Anything)
s.cons.AssertCalled(s.T(), "TransferLeader")
}
// In this test, we have a leader that is healthy and sequencing, we send a unhealthy update to it and expect it to stop sequencing and transfer leadership.
// However, the action we needed to take failed temporarily, so we expect it to retry until it succeeds.
// 1. [leader, healthy, sequencing] -- become unhealthy -->
// 2. [leader, unhealthy, sequencing] -- stop sequencing failed, transfer leadership failed, retry -->
// 3. [leader, unhealthy, sequencing] -- stop sequencing succeeded, transfer leadership failed, retry -->
// 4. [leader, unhealthy, not sequencing] -- transfer leadership succeeded -->
// 5. [follower, unhealthy, not sequencing]
func (s *OpConductorTestSuite) TestFailureAndRetry1() {
s.enableSynchronization()
err := errors.New("failure")
// set initial state
s.conductor.leader.Store(true)
s.conductor.healthy.Store(true)
s.conductor.seqActive.Store(true)
// step 1 & 2: become unhealthy, stop sequencing failed, transfer leadership failed
s.cons.EXPECT().TransferLeader().Return(err).Times(1)
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, err).Times(1)
s.updateStatusAndExecuteAction(s.healthUpdateCh, false)
s.True(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load())
s.True(s.conductor.seqActive.Load())
s.ctrl.AssertNumberOfCalls(s.T(), "StopSequencer", 1)
s.cons.AssertNumberOfCalls(s.T(), "TransferLeader", 1)
// step 3: [leader, unhealthy, sequencing] -- stop sequencing succeeded, transfer leadership failed, retry
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, nil).Times(1)
s.cons.EXPECT().TransferLeader().Return(err).Times(1)
s.executeAction()
s.True(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
s.ctrl.AssertNumberOfCalls(s.T(), "StopSequencer", 2)
s.cons.AssertNumberOfCalls(s.T(), "TransferLeader", 2)
// step 4: [leader, unhealthy, not sequencing] -- transfer leadership succeeded
s.cons.EXPECT().TransferLeader().Return(nil).Times(1)
s.executeAction()
// [follower, unhealthy, not sequencing]
s.False(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
s.ctrl.AssertNumberOfCalls(s.T(), "StopSequencer", 2)
s.cons.AssertNumberOfCalls(s.T(), "TransferLeader", 3)
}
// In this test, we have a leader that is healthy and sequencing, we send a unhealthy update to it and expect it to stop sequencing and transfer leadership.
// However, the action we needed to take failed temporarily, so we expect it to retry until it succeeds.
// 1. [leader, healthy, sequencing] -- become unhealthy -->
// 2. [leader, unhealthy, sequencing] -- stop sequencing failed, transfer leadership succeeded, retry -->
// 3. [follower, unhealthy, sequencing] -- stop sequencing succeeded -->
// 4. [follower, unhealthy, not sequencing]
func (s *OpConductorTestSuite) TestFailureAndRetry2() {
s.enableSynchronization()
err := errors.New("failure")
// set initial state
s.conductor.leader.Store(true)
s.conductor.healthy.Store(true)
s.conductor.seqActive.Store(true)
// step 1 & 2: become unhealthy, stop sequencing failed, transfer leadership succeeded, retry
s.cons.EXPECT().TransferLeader().Return(nil).Times(1)
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, err).Times(1)
s.updateStatusAndExecuteAction(s.healthUpdateCh, false)
s.False(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load())
s.True(s.conductor.seqActive.Load())
s.ctrl.AssertNumberOfCalls(s.T(), "StopSequencer", 1)
s.cons.AssertNumberOfCalls(s.T(), "TransferLeader", 1)
// step 3: [follower, unhealthy, sequencing] -- stop sequencing succeeded
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, nil).Times(1)
s.executeAction()
s.False(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load())
s.False(s.conductor.seqActive.Load())
s.ctrl.AssertNumberOfCalls(s.T(), "StopSequencer", 2)
s.cons.AssertNumberOfCalls(s.T(), "TransferLeader", 1)
}
func TestHealthMonitor(t *testing.T) {
suite.Run(t, new(OpConductorTestSuite))
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment