Commit 344d9192 authored by Francis Li's avatar Francis Li Committed by GitHub

[op-conductor] make health check more robust (#10325)

parent a7ff2303
...@@ -134,7 +134,7 @@ func (hm *SequencerHealthMonitor) healthCheck() error { ...@@ -134,7 +134,7 @@ func (hm *SequencerHealthMonitor) healthCheck() error {
var timeDiff, blockDiff, expectedBlocks uint64 var timeDiff, blockDiff, expectedBlocks uint64
if hm.lastSeenUnsafeNum != 0 { if hm.lastSeenUnsafeNum != 0 {
timeDiff = now - hm.lastSeenUnsafeTime timeDiff = calculateTimeDiff(now, hm.lastSeenUnsafeTime)
blockDiff = status.UnsafeL2.Number - hm.lastSeenUnsafeNum blockDiff = status.UnsafeL2.Number - hm.lastSeenUnsafeNum
// how many blocks do we expect to see, minus 1 to account for edge case with respect to time. // how many blocks do we expect to see, minus 1 to account for edge case with respect to time.
// for example, if diff = 2.001s and block time = 2s, expecting to see 1 block could potentially cause sequencer to be considered unhealthy. // for example, if diff = 2.001s and block time = 2s, expecting to see 1 block could potentially cause sequencer to be considered unhealthy.
...@@ -160,7 +160,7 @@ func (hm *SequencerHealthMonitor) healthCheck() error { ...@@ -160,7 +160,7 @@ func (hm *SequencerHealthMonitor) healthCheck() error {
return ErrSequencerNotHealthy return ErrSequencerNotHealthy
} }
if now-status.UnsafeL2.Time > hm.unsafeInterval { if calculateTimeDiff(now, status.UnsafeL2.Time) > hm.unsafeInterval {
hm.log.Error( hm.log.Error(
"unsafe head is not progressing as expected", "unsafe head is not progressing as expected",
"now", now, "now", now,
...@@ -171,7 +171,7 @@ func (hm *SequencerHealthMonitor) healthCheck() error { ...@@ -171,7 +171,7 @@ func (hm *SequencerHealthMonitor) healthCheck() error {
return ErrSequencerNotHealthy return ErrSequencerNotHealthy
} }
if hm.safeEnabled && now-status.SafeL2.Time > hm.safeInterval { if hm.safeEnabled && calculateTimeDiff(now, status.SafeL2.Time) > hm.safeInterval {
hm.log.Error( hm.log.Error(
"safe head is not progressing as expected", "safe head is not progressing as expected",
"now", now, "now", now,
...@@ -196,6 +196,13 @@ func (hm *SequencerHealthMonitor) healthCheck() error { ...@@ -196,6 +196,13 @@ func (hm *SequencerHealthMonitor) healthCheck() error {
return nil return nil
} }
func calculateTimeDiff(now, then uint64) uint64 {
if now < then {
return 0
}
return now - then
}
func currentTimeProvicer() uint64 { func currentTimeProvicer() uint64 {
return uint64(time.Now().Unix()) return uint64(time.Now().Unix())
} }
...@@ -103,7 +103,7 @@ func (s *HealthMonitorTestSuite) TestUnhealthyUnsafeHeadNotProgressing() { ...@@ -103,7 +103,7 @@ func (s *HealthMonitorTestSuite) TestUnhealthyUnsafeHeadNotProgressing() {
rc := &testutils.MockRollupClient{} rc := &testutils.MockRollupClient{}
ss1 := mockSyncStatus(now, 5, now-8, 1) ss1 := mockSyncStatus(now, 5, now-8, 1)
for i := 0; i < 6; i++ { for i := 0; i < 5; i++ {
rc.ExpectSyncStatus(ss1, nil) rc.ExpectSyncStatus(ss1, nil)
} }
...@@ -168,7 +168,8 @@ func (s *HealthMonitorTestSuite) TestHealthyWithUnsafeLag() { ...@@ -168,7 +168,8 @@ func (s *HealthMonitorTestSuite) TestHealthyWithUnsafeLag() {
rc.ExpectSyncStatus(mockSyncStatus(now-10, 1, now, 1), nil) rc.ExpectSyncStatus(mockSyncStatus(now-10, 1, now, 1), nil)
rc.ExpectSyncStatus(mockSyncStatus(now-10, 1, now, 1), nil) rc.ExpectSyncStatus(mockSyncStatus(now-10, 1, now, 1), nil)
rc.ExpectSyncStatus(mockSyncStatus(now-8, 2, now, 1), nil) rc.ExpectSyncStatus(mockSyncStatus(now-8, 2, now, 1), nil)
rc.ExpectSyncStatus(mockSyncStatus(now-8, 2, now, 1), nil) // in this case now time is behind unsafe head time, this should still be considered healthy.
rc.ExpectSyncStatus(mockSyncStatus(now+5, 2, now, 1), nil)
monitor := s.SetupMonitor(now, 60, 60, rc, nil) monitor := s.SetupMonitor(now, 60, 60, rc, nil)
healthUpdateCh := monitor.Subscribe() healthUpdateCh := monitor.Subscribe()
...@@ -194,6 +195,11 @@ func (s *HealthMonitorTestSuite) TestHealthyWithUnsafeLag() { ...@@ -194,6 +195,11 @@ func (s *HealthMonitorTestSuite) TestHealthyWithUnsafeLag() {
s.Equal(lastSeenUnsafeTime+2, monitor.lastSeenUnsafeTime) s.Equal(lastSeenUnsafeTime+2, monitor.lastSeenUnsafeTime)
s.Equal(uint64(2), monitor.lastSeenUnsafeNum) s.Equal(uint64(2), monitor.lastSeenUnsafeNum)
healthy = <-healthUpdateCh
s.Nil(healthy)
s.Equal(lastSeenUnsafeTime+2, monitor.lastSeenUnsafeTime)
s.Equal(uint64(2), monitor.lastSeenUnsafeNum)
s.NoError(monitor.Stop()) s.NoError(monitor.Stop())
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment