Commit aa28bcfd authored by Francis Li's avatar Francis Li Committed by GitHub

change health monitor to return error instead of bool (#9198)

parent 6ffd27fe
...@@ -30,9 +30,10 @@ import ( ...@@ -30,9 +30,10 @@ import (
) )
var ( var (
ErrResumeTimeout = errors.New("timeout to resume conductor") ErrResumeTimeout = errors.New("timeout to resume conductor")
ErrPauseTimeout = errors.New("timeout to pause conductor") ErrPauseTimeout = errors.New("timeout to pause conductor")
ErrUnsafeHeadMismarch = errors.New("unsafe head mismatch") ErrUnsafeHeadMismarch = errors.New("unsafe head mismatch")
ErrUnableToRetrieveUnsafeHeadFromConsensus = errors.New("unable to retrieve unsafe head from consensus")
) )
// New creates a new OpConductor instance. // New creates a new OpConductor instance.
...@@ -248,10 +249,11 @@ type OpConductor struct { ...@@ -248,10 +249,11 @@ type OpConductor struct {
hmon health.HealthMonitor hmon health.HealthMonitor
leader atomic.Bool leader atomic.Bool
healthy atomic.Bool
seqActive atomic.Bool seqActive atomic.Bool
healthy atomic.Bool
hcerr error // error from health check
healthUpdateCh <-chan bool healthUpdateCh <-chan error
leaderUpdateCh <-chan bool leaderUpdateCh <-chan bool
actionFn func() // actionFn defines the action to be executed to bring the sequencer to the desired state. actionFn func() // actionFn defines the action to be executed to bring the sequencer to the desired state.
...@@ -469,15 +471,21 @@ func (oc *OpConductor) handleLeaderUpdate(leader bool) { ...@@ -469,15 +471,21 @@ func (oc *OpConductor) handleLeaderUpdate(leader bool) {
} }
// handleHealthUpdate handles health update from health monitor. // handleHealthUpdate handles health update from health monitor.
func (oc *OpConductor) handleHealthUpdate(healthy bool) { func (oc *OpConductor) handleHealthUpdate(hcerr error) {
healthy := hcerr == nil
if !healthy { if !healthy {
oc.log.Error("Sequencer is unhealthy", "server", oc.cons.ServerID()) oc.log.Error("Sequencer is unhealthy", "server", oc.cons.ServerID(), "err", hcerr)
// always queue an action if it's unhealthy, it could be an no-op in the handler.
oc.queueAction()
} }
if healthy != oc.healthy.Load() { if healthy != oc.healthy.Load() {
oc.healthy.Store(healthy) // queue an action if health status changed.
oc.queueAction() oc.queueAction()
} }
oc.healthy.Store(healthy)
oc.hcerr = hcerr
} }
// action tries to bring the sequencer to the desired state, a retry will be queued if any action failed. // action tries to bring the sequencer to the desired state, a retry will be queued if any action failed.
...@@ -572,13 +580,40 @@ func (oc *OpConductor) startSequencer() error { ...@@ -572,13 +580,40 @@ func (oc *OpConductor) startSequencer() error {
// When starting sequencer, we need to make sure that the current node has the latest unsafe head from the consensus protocol // When starting sequencer, we need to make sure that the current node has the latest unsafe head from the consensus protocol
// If not, then we wait for the unsafe head to catch up or gossip it to op-node manually from op-conductor. // If not, then we wait for the unsafe head to catch up or gossip it to op-node manually from op-conductor.
unsafeInCons, unsafeInNode, err := oc.compareUnsafeHead(ctx)
// if there's a mismatch, try to post the unsafe head to op-node
if err != nil {
if errors.Is(err, ErrUnsafeHeadMismarch) && uint64(unsafeInCons.ExecutionPayload.BlockNumber)-unsafeInNode.NumberU64() == 1 {
// tries to post the unsafe head to op-node when head is only 1 block behind (most likely due to gossip delay)
if innerErr := oc.ctrl.PostUnsafePayload(ctx, unsafeInCons); innerErr != nil {
oc.log.Error("failed to post unsafe head payload envelope to op-node", "err", innerErr)
}
}
return err
}
if err = oc.ctrl.StartSequencer(ctx, unsafeInCons.ExecutionPayload.BlockHash); err != nil {
// cannot directly compare using Errors.Is because the error is returned from an JSON RPC server which lost its type.
if !strings.Contains(err.Error(), driver.ErrSequencerAlreadyStarted.Error()) {
return fmt.Errorf("failed to start sequencer: %w", err)
} else {
oc.log.Warn("sequencer already started.", "err", err)
}
}
oc.seqActive.Store(true)
return nil
}
func (oc *OpConductor) compareUnsafeHead(ctx context.Context) (*eth.ExecutionPayloadEnvelope, eth.BlockInfo, error) {
unsafeInCons := oc.cons.LatestUnsafePayload() unsafeInCons := oc.cons.LatestUnsafePayload()
if unsafeInCons == nil { if unsafeInCons == nil {
return errors.New("failed to get latest unsafe block from consensus") return nil, nil, ErrUnableToRetrieveUnsafeHeadFromConsensus
} }
unsafeInNode, err := oc.ctrl.LatestUnsafeBlock(ctx) unsafeInNode, err := oc.ctrl.LatestUnsafeBlock(ctx)
if err != nil { if err != nil {
return errors.Wrap(err, "failed to get latest unsafe block from EL during startSequencer phase") return unsafeInCons, nil, errors.Wrap(err, "failed to get latest unsafe block from EL during compareUnsafeHead phase")
} }
if unsafeInCons.ExecutionPayload.BlockHash != unsafeInNode.Hash() { if unsafeInCons.ExecutionPayload.BlockHash != unsafeInNode.Hash() {
...@@ -590,26 +625,10 @@ func (oc *OpConductor) startSequencer() error { ...@@ -590,26 +625,10 @@ func (oc *OpConductor) startSequencer() error {
"node_block_num", unsafeInNode.NumberU64(), "node_block_num", unsafeInNode.NumberU64(),
) )
if uint64(unsafeInCons.ExecutionPayload.BlockNumber)-unsafeInNode.NumberU64() == 1 { return unsafeInCons, unsafeInNode, ErrUnsafeHeadMismarch
// tries to post the unsafe head to op-node when head is only 1 block behind (most likely due to gossip delay)
if err = oc.ctrl.PostUnsafePayload(ctx, unsafeInCons); err != nil {
oc.log.Error("failed to post unsafe head payload envelope to op-node", "err", err)
}
}
return ErrUnsafeHeadMismarch // return error to allow retry
} }
if err = oc.ctrl.StartSequencer(ctx, unsafeInCons.ExecutionPayload.BlockHash); err != nil { return unsafeInCons, unsafeInNode, nil
// cannot directly compare using Errors.Is because the error is returned from an JSON RPC server which lost its type.
if !strings.Contains(err.Error(), driver.ErrSequencerAlreadyStarted.Error()) {
return fmt.Errorf("failed to start sequencer: %w", err)
} else {
oc.log.Warn("sequencer already started.", "err", err)
}
}
oc.seqActive.Store(true)
return nil
} }
func (oc *OpConductor) updateSequencerActiveStatus() error { func (oc *OpConductor) updateSequencerActiveStatus() error {
......
...@@ -17,6 +17,7 @@ import ( ...@@ -17,6 +17,7 @@ import (
clientmocks "github.com/ethereum-optimism/optimism/op-conductor/client/mocks" clientmocks "github.com/ethereum-optimism/optimism/op-conductor/client/mocks"
consensusmocks "github.com/ethereum-optimism/optimism/op-conductor/consensus/mocks" consensusmocks "github.com/ethereum-optimism/optimism/op-conductor/consensus/mocks"
"github.com/ethereum-optimism/optimism/op-conductor/health"
healthmocks "github.com/ethereum-optimism/optimism/op-conductor/health/mocks" healthmocks "github.com/ethereum-optimism/optimism/op-conductor/health/mocks"
"github.com/ethereum-optimism/optimism/op-node/rollup" "github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-service/eth" "github.com/ethereum-optimism/optimism/op-service/eth"
...@@ -80,7 +81,7 @@ type OpConductorTestSuite struct { ...@@ -80,7 +81,7 @@ type OpConductorTestSuite struct {
conductor *OpConductor conductor *OpConductor
healthUpdateCh chan bool healthUpdateCh chan error
leaderUpdateCh chan bool leaderUpdateCh chan bool
ctx context.Context ctx context.Context
...@@ -114,7 +115,7 @@ func (s *OpConductorTestSuite) SetupTest() { ...@@ -114,7 +115,7 @@ func (s *OpConductorTestSuite) SetupTest() {
s.NoError(err) s.NoError(err)
s.conductor = conductor s.conductor = conductor
s.healthUpdateCh = make(chan bool) s.healthUpdateCh = make(chan error)
s.hmon.EXPECT().Start().Return(nil) s.hmon.EXPECT().Start().Return(nil)
s.conductor.healthUpdateCh = s.healthUpdateCh s.conductor.healthUpdateCh = s.healthUpdateCh
...@@ -153,7 +154,14 @@ func (s *OpConductorTestSuite) execute(fn func()) { ...@@ -153,7 +154,14 @@ func (s *OpConductorTestSuite) execute(fn func()) {
s.wg.Wait() s.wg.Wait()
} }
func (s *OpConductorTestSuite) updateStatusAndExecuteAction(ch chan bool, status bool) { func (s *OpConductorTestSuite) updateLeaderStatusAndExecuteAction(ch chan bool, status bool) {
fn := func() {
ch <- status
}
s.execute(fn)
}
func (s *OpConductorTestSuite) updateHealthStatusAndExecuteAction(ch chan error, status error) {
fn := func() { fn := func() {
ch <- status ch <- status
} }
...@@ -172,7 +180,7 @@ func (s *OpConductorTestSuite) TestControlLoop1() { ...@@ -172,7 +180,7 @@ func (s *OpConductorTestSuite) TestControlLoop1() {
s.True(s.conductor.Paused()) s.True(s.conductor.Paused())
// Send health update, make sure it can still be consumed. // Send health update, make sure it can still be consumed.
s.healthUpdateCh <- true s.healthUpdateCh <- nil
// Resume // Resume
s.ctrl.EXPECT().SequencerActive(mock.Anything).Return(false, nil) s.ctrl.EXPECT().SequencerActive(mock.Anything).Return(false, nil)
...@@ -247,7 +255,7 @@ func (s *OpConductorTestSuite) TestScenario1() { ...@@ -247,7 +255,7 @@ func (s *OpConductorTestSuite) TestScenario1() {
s.cons.EXPECT().TransferLeader().Return(nil) s.cons.EXPECT().TransferLeader().Return(nil)
// become leader // become leader
s.updateStatusAndExecuteAction(s.leaderUpdateCh, true) s.updateLeaderStatusAndExecuteAction(s.leaderUpdateCh, true)
// expect to transfer leadership, go back to [follower, not healthy, not sequencing] // expect to transfer leadership, go back to [follower, not healthy, not sequencing]
s.False(s.conductor.leader.Load()) s.False(s.conductor.leader.Load())
...@@ -267,7 +275,7 @@ func (s *OpConductorTestSuite) TestScenario2() { ...@@ -267,7 +275,7 @@ func (s *OpConductorTestSuite) TestScenario2() {
s.conductor.seqActive.Store(false) s.conductor.seqActive.Store(false)
// become healthy // become healthy
s.updateStatusAndExecuteAction(s.healthUpdateCh, true) s.updateHealthStatusAndExecuteAction(s.healthUpdateCh, nil)
// expect to stay as follower, go to [follower, healthy, not sequencing] // expect to stay as follower, go to [follower, healthy, not sequencing]
s.False(s.conductor.leader.Load()) s.False(s.conductor.leader.Load())
...@@ -302,7 +310,7 @@ func (s *OpConductorTestSuite) TestScenario3() { ...@@ -302,7 +310,7 @@ func (s *OpConductorTestSuite) TestScenario3() {
s.False(s.conductor.seqActive.Load()) s.False(s.conductor.seqActive.Load())
// become leader // become leader
s.updateStatusAndExecuteAction(s.leaderUpdateCh, true) s.updateLeaderStatusAndExecuteAction(s.leaderUpdateCh, true)
// [leader, healthy, sequencing] // [leader, healthy, sequencing]
s.True(s.conductor.leader.Load()) s.True(s.conductor.leader.Load())
...@@ -335,7 +343,7 @@ func (s *OpConductorTestSuite) TestScenario4() { ...@@ -335,7 +343,7 @@ func (s *OpConductorTestSuite) TestScenario4() {
s.ctrl.EXPECT().LatestUnsafeBlock(mock.Anything).Return(mockBlockInfo, nil).Times(1) s.ctrl.EXPECT().LatestUnsafeBlock(mock.Anything).Return(mockBlockInfo, nil).Times(1)
s.ctrl.EXPECT().PostUnsafePayload(mock.Anything, mock.Anything).Return(nil).Times(1) s.ctrl.EXPECT().PostUnsafePayload(mock.Anything, mock.Anything).Return(nil).Times(1)
s.updateStatusAndExecuteAction(s.leaderUpdateCh, true) s.updateLeaderStatusAndExecuteAction(s.leaderUpdateCh, true)
// [leader, healthy, not sequencing] // [leader, healthy, not sequencing]
s.True(s.conductor.leader.Load()) s.True(s.conductor.leader.Load())
...@@ -376,7 +384,7 @@ func (s *OpConductorTestSuite) TestScenario5() { ...@@ -376,7 +384,7 @@ func (s *OpConductorTestSuite) TestScenario5() {
s.conductor.seqActive.Store(false) s.conductor.seqActive.Store(false)
// become unhealthy // become unhealthy
s.updateStatusAndExecuteAction(s.healthUpdateCh, false) s.updateHealthStatusAndExecuteAction(s.healthUpdateCh, health.ErrSequencerNotHealthy)
// expect to stay as follower, go to [follower, not healthy, not sequencing] // expect to stay as follower, go to [follower, not healthy, not sequencing]
s.False(s.conductor.leader.Load()) s.False(s.conductor.leader.Load())
...@@ -397,7 +405,7 @@ func (s *OpConductorTestSuite) TestScenario6() { ...@@ -397,7 +405,7 @@ func (s *OpConductorTestSuite) TestScenario6() {
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, nil).Times(1) s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, nil).Times(1)
// step down as leader // step down as leader
s.updateStatusAndExecuteAction(s.leaderUpdateCh, false) s.updateLeaderStatusAndExecuteAction(s.leaderUpdateCh, false)
// expect to stay as follower, go to [follower, healthy, not sequencing] // expect to stay as follower, go to [follower, healthy, not sequencing]
s.False(s.conductor.leader.Load()) s.False(s.conductor.leader.Load())
...@@ -421,7 +429,7 @@ func (s *OpConductorTestSuite) TestScenario7() { ...@@ -421,7 +429,7 @@ func (s *OpConductorTestSuite) TestScenario7() {
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, nil).Times(1) s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, nil).Times(1)
// become unhealthy // become unhealthy
s.updateStatusAndExecuteAction(s.healthUpdateCh, false) s.updateHealthStatusAndExecuteAction(s.healthUpdateCh, health.ErrSequencerNotHealthy)
// expect to step down as leader and stop sequencing // expect to step down as leader and stop sequencing
s.False(s.conductor.leader.Load()) s.False(s.conductor.leader.Load())
...@@ -451,7 +459,7 @@ func (s *OpConductorTestSuite) TestFailureAndRetry1() { ...@@ -451,7 +459,7 @@ func (s *OpConductorTestSuite) TestFailureAndRetry1() {
s.cons.EXPECT().TransferLeader().Return(err).Times(1) s.cons.EXPECT().TransferLeader().Return(err).Times(1)
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, err).Times(1) s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, err).Times(1)
s.updateStatusAndExecuteAction(s.healthUpdateCh, false) s.updateHealthStatusAndExecuteAction(s.healthUpdateCh, health.ErrSequencerNotHealthy)
s.True(s.conductor.leader.Load()) s.True(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load()) s.False(s.conductor.healthy.Load())
...@@ -503,7 +511,7 @@ func (s *OpConductorTestSuite) TestFailureAndRetry2() { ...@@ -503,7 +511,7 @@ func (s *OpConductorTestSuite) TestFailureAndRetry2() {
s.cons.EXPECT().TransferLeader().Return(nil).Times(1) s.cons.EXPECT().TransferLeader().Return(nil).Times(1)
s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, err).Times(1) s.ctrl.EXPECT().StopSequencer(mock.Anything).Return(common.Hash{}, err).Times(1)
s.updateStatusAndExecuteAction(s.healthUpdateCh, false) s.updateHealthStatusAndExecuteAction(s.healthUpdateCh, health.ErrSequencerNotHealthy)
s.False(s.conductor.leader.Load()) s.False(s.conductor.leader.Load())
s.False(s.conductor.healthy.Load()) s.False(s.conductor.healthy.Load())
......
...@@ -108,19 +108,19 @@ func (_c *HealthMonitor_Stop_Call) RunAndReturn(run func() error) *HealthMonitor ...@@ -108,19 +108,19 @@ func (_c *HealthMonitor_Stop_Call) RunAndReturn(run func() error) *HealthMonitor
} }
// Subscribe provides a mock function with given fields: // Subscribe provides a mock function with given fields:
func (_m *HealthMonitor) Subscribe() <-chan bool { func (_m *HealthMonitor) Subscribe() <-chan error {
ret := _m.Called() ret := _m.Called()
if len(ret) == 0 { if len(ret) == 0 {
panic("no return value specified for Subscribe") panic("no return value specified for Subscribe")
} }
var r0 <-chan bool var r0 <-chan error
if rf, ok := ret.Get(0).(func() <-chan bool); ok { if rf, ok := ret.Get(0).(func() <-chan error); ok {
r0 = rf() r0 = rf()
} else { } else {
if ret.Get(0) != nil { if ret.Get(0) != nil {
r0 = ret.Get(0).(<-chan bool) r0 = ret.Get(0).(<-chan error)
} }
} }
...@@ -144,12 +144,12 @@ func (_c *HealthMonitor_Subscribe_Call) Run(run func()) *HealthMonitor_Subscribe ...@@ -144,12 +144,12 @@ func (_c *HealthMonitor_Subscribe_Call) Run(run func()) *HealthMonitor_Subscribe
return _c return _c
} }
func (_c *HealthMonitor_Subscribe_Call) Return(_a0 <-chan bool) *HealthMonitor_Subscribe_Call { func (_c *HealthMonitor_Subscribe_Call) Return(_a0 <-chan error) *HealthMonitor_Subscribe_Call {
_c.Call.Return(_a0) _c.Call.Return(_a0)
return _c return _c
} }
func (_c *HealthMonitor_Subscribe_Call) RunAndReturn(run func() <-chan bool) *HealthMonitor_Subscribe_Call { func (_c *HealthMonitor_Subscribe_Call) RunAndReturn(run func() <-chan error) *HealthMonitor_Subscribe_Call {
_c.Call.Return(run) _c.Call.Return(run)
return _c return _c
} }
......
...@@ -2,6 +2,7 @@ package health ...@@ -2,6 +2,7 @@ package health
import ( import (
"context" "context"
"errors"
"sync" "sync"
"time" "time"
...@@ -12,12 +13,17 @@ import ( ...@@ -12,12 +13,17 @@ import (
"github.com/ethereum-optimism/optimism/op-service/dial" "github.com/ethereum-optimism/optimism/op-service/dial"
) )
var (
ErrSequencerNotHealthy = errors.New("sequencer is not healthy")
ErrSequencerConnectionDown = errors.New("cannot connect to sequencer rpc endpoints")
)
// HealthMonitor defines the interface for monitoring the health of the sequencer. // HealthMonitor defines the interface for monitoring the health of the sequencer.
// //
//go:generate mockery --name HealthMonitor --output mocks/ --with-expecter=true //go:generate mockery --name HealthMonitor --output mocks/ --with-expecter=true
type HealthMonitor interface { type HealthMonitor interface {
// Subscribe returns a channel that will be notified for every health check. // Subscribe returns a channel that will be notified for every health check.
Subscribe() <-chan bool Subscribe() <-chan error
// Start starts the health check. // Start starts the health check.
Start() error Start() error
// Stop stops the health check. // Stop stops the health check.
...@@ -33,7 +39,7 @@ func NewSequencerHealthMonitor(log log.Logger, interval, unsafeInterval, safeInt ...@@ -33,7 +39,7 @@ func NewSequencerHealthMonitor(log log.Logger, interval, unsafeInterval, safeInt
log: log, log: log,
done: make(chan struct{}), done: make(chan struct{}),
interval: interval, interval: interval,
healthUpdateCh: make(chan bool), healthUpdateCh: make(chan error),
rollupCfg: rollupCfg, rollupCfg: rollupCfg,
unsafeInterval: unsafeInterval, unsafeInterval: unsafeInterval,
safeInterval: safeInterval, safeInterval: safeInterval,
...@@ -54,7 +60,7 @@ type SequencerHealthMonitor struct { ...@@ -54,7 +60,7 @@ type SequencerHealthMonitor struct {
safeInterval uint64 safeInterval uint64
minPeerCount uint64 minPeerCount uint64
interval uint64 interval uint64
healthUpdateCh chan bool healthUpdateCh chan error
lastSeenUnsafeNum uint64 lastSeenUnsafeNum uint64
lastSeenUnsafeTime uint64 lastSeenUnsafeTime uint64
...@@ -85,7 +91,7 @@ func (hm *SequencerHealthMonitor) Stop() error { ...@@ -85,7 +91,7 @@ func (hm *SequencerHealthMonitor) Stop() error {
} }
// Subscribe implements HealthMonitor. // Subscribe implements HealthMonitor.
func (hm *SequencerHealthMonitor) Subscribe() <-chan bool { func (hm *SequencerHealthMonitor) Subscribe() <-chan error {
return hm.healthUpdateCh return hm.healthUpdateCh
} }
...@@ -111,12 +117,12 @@ func (hm *SequencerHealthMonitor) loop() { ...@@ -111,12 +117,12 @@ func (hm *SequencerHealthMonitor) loop() {
// 2. unsafe head is not too far behind now (measured by unsafeInterval) // 2. unsafe head is not too far behind now (measured by unsafeInterval)
// 3. safe head is progressing every configured batch submission interval // 3. safe head is progressing every configured batch submission interval
// 4. peer count is above the configured minimum // 4. peer count is above the configured minimum
func (hm *SequencerHealthMonitor) healthCheck() bool { func (hm *SequencerHealthMonitor) healthCheck() error {
ctx := context.Background() ctx := context.Background()
status, err := hm.node.SyncStatus(ctx) status, err := hm.node.SyncStatus(ctx)
if err != nil { if err != nil {
hm.log.Error("health monitor failed to get sync status", "err", err) hm.log.Error("health monitor failed to get sync status", "err", err)
return false return ErrSequencerConnectionDown
} }
now := uint64(time.Now().Unix()) now := uint64(time.Now().Unix())
...@@ -135,7 +141,7 @@ func (hm *SequencerHealthMonitor) healthCheck() bool { ...@@ -135,7 +141,7 @@ func (hm *SequencerHealthMonitor) healthCheck() bool {
"last_seen_unsafe_time", hm.lastSeenUnsafeTime, "last_seen_unsafe_time", hm.lastSeenUnsafeTime,
"unsafe_interval", hm.unsafeInterval, "unsafe_interval", hm.unsafeInterval,
) )
return false return ErrSequencerNotHealthy
} }
} }
if status.UnsafeL2.Number > hm.lastSeenUnsafeNum { if status.UnsafeL2.Number > hm.lastSeenUnsafeNum {
...@@ -151,7 +157,7 @@ func (hm *SequencerHealthMonitor) healthCheck() bool { ...@@ -151,7 +157,7 @@ func (hm *SequencerHealthMonitor) healthCheck() bool {
"unsafe_head_time", status.UnsafeL2.Time, "unsafe_head_time", status.UnsafeL2.Time,
"unsafe_interval", hm.unsafeInterval, "unsafe_interval", hm.unsafeInterval,
) )
return false return ErrSequencerNotHealthy
} }
if now-status.SafeL2.Time > hm.safeInterval { if now-status.SafeL2.Time > hm.safeInterval {
...@@ -162,18 +168,18 @@ func (hm *SequencerHealthMonitor) healthCheck() bool { ...@@ -162,18 +168,18 @@ func (hm *SequencerHealthMonitor) healthCheck() bool {
"safe_head_time", status.SafeL2.Time, "safe_head_time", status.SafeL2.Time,
"safe_interval", hm.safeInterval, "safe_interval", hm.safeInterval,
) )
return false return ErrSequencerNotHealthy
} }
stats, err := hm.p2p.PeerStats(ctx) stats, err := hm.p2p.PeerStats(ctx)
if err != nil { if err != nil {
hm.log.Error("health monitor failed to get peer stats", "err", err) hm.log.Error("health monitor failed to get peer stats", "err", err)
return false return ErrSequencerConnectionDown
} }
if uint64(stats.Connected) < hm.minPeerCount { if uint64(stats.Connected) < hm.minPeerCount {
hm.log.Error("peer count is below minimum", "connected", stats.Connected, "minPeerCount", hm.minPeerCount) hm.log.Error("peer count is below minimum", "connected", stats.Connected, "minPeerCount", hm.minPeerCount)
return false return ErrSequencerNotHealthy
} }
return true return nil
} }
...@@ -80,7 +80,7 @@ func (s *HealthMonitorTestSuite) TestUnhealthyLowPeerCount() { ...@@ -80,7 +80,7 @@ func (s *HealthMonitorTestSuite) TestUnhealthyLowPeerCount() {
healthUpdateCh := s.monitor.Subscribe() healthUpdateCh := s.monitor.Subscribe()
healthy := <-healthUpdateCh healthy := <-healthUpdateCh
s.False(healthy) s.NotNil(healthy)
} }
func (s *HealthMonitorTestSuite) TestUnhealthyUnsafeHeadNotProgressing() { func (s *HealthMonitorTestSuite) TestUnhealthyUnsafeHeadNotProgressing() {
...@@ -108,9 +108,9 @@ func (s *HealthMonitorTestSuite) TestUnhealthyUnsafeHeadNotProgressing() { ...@@ -108,9 +108,9 @@ func (s *HealthMonitorTestSuite) TestUnhealthyUnsafeHeadNotProgressing() {
for i := 0; i < 3; i++ { for i := 0; i < 3; i++ {
healthy := <-healthUpdateCh healthy := <-healthUpdateCh
if i < 2 { if i < 2 {
s.True(healthy) s.Nil(healthy)
} else { } else {
s.False(healthy) s.NotNil(healthy)
} }
} }
} }
...@@ -143,9 +143,9 @@ func (s *HealthMonitorTestSuite) TestUnhealthySafeHeadNotProgressing() { ...@@ -143,9 +143,9 @@ func (s *HealthMonitorTestSuite) TestUnhealthySafeHeadNotProgressing() {
for i := 0; i < 6; i++ { for i := 0; i < 6; i++ {
healthy := <-healthUpdateCh healthy := <-healthUpdateCh
if i < 5 { if i < 5 {
s.True(healthy) s.Nil(healthy)
} else { } else {
s.False(healthy) s.NotNil(healthy)
} }
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment