Commit dcc4fb35 authored by Zach Howard's avatar Zach Howard Committed by GitHub

op-conductor: adds flag to enable safe head progression checks (#10167)

parent b625f7d2
...@@ -118,6 +118,7 @@ func NewConfig(ctx *cli.Context, log log.Logger) (*Config, error) { ...@@ -118,6 +118,7 @@ func NewConfig(ctx *cli.Context, log log.Logger) (*Config, error) {
HealthCheck: HealthCheckConfig{ HealthCheck: HealthCheckConfig{
Interval: ctx.Uint64(flags.HealthCheckInterval.Name), Interval: ctx.Uint64(flags.HealthCheckInterval.Name),
UnsafeInterval: ctx.Uint64(flags.HealthCheckUnsafeInterval.Name), UnsafeInterval: ctx.Uint64(flags.HealthCheckUnsafeInterval.Name),
SafeEnabled: ctx.Bool(flags.HealthCheckSafeEnabled.Name),
SafeInterval: ctx.Uint64(flags.HealthCheckSafeInterval.Name), SafeInterval: ctx.Uint64(flags.HealthCheckSafeInterval.Name),
MinPeerCount: ctx.Uint64(flags.HealthCheckMinPeerCount.Name), MinPeerCount: ctx.Uint64(flags.HealthCheckMinPeerCount.Name),
}, },
...@@ -138,6 +139,9 @@ type HealthCheckConfig struct { ...@@ -138,6 +139,9 @@ type HealthCheckConfig struct {
// UnsafeInterval is the interval allowed between unsafe head and now in seconds. // UnsafeInterval is the interval allowed between unsafe head and now in seconds.
UnsafeInterval uint64 UnsafeInterval uint64
// SafeEnabled is whether to enable safe head progression checks.
SafeEnabled bool
// SafeInterval is the interval between safe head progression measured in seconds. // SafeInterval is the interval between safe head progression measured in seconds.
SafeInterval uint64 SafeInterval uint64
......
...@@ -176,6 +176,7 @@ func (c *OpConductor) initHealthMonitor(ctx context.Context) error { ...@@ -176,6 +176,7 @@ func (c *OpConductor) initHealthMonitor(ctx context.Context) error {
c.cfg.HealthCheck.UnsafeInterval, c.cfg.HealthCheck.UnsafeInterval,
c.cfg.HealthCheck.SafeInterval, c.cfg.HealthCheck.SafeInterval,
c.cfg.HealthCheck.MinPeerCount, c.cfg.HealthCheck.MinPeerCount,
c.cfg.HealthCheck.SafeEnabled,
&c.cfg.RollupCfg, &c.cfg.RollupCfg,
node, node,
p2p, p2p,
......
...@@ -64,6 +64,12 @@ var ( ...@@ -64,6 +64,12 @@ var (
Usage: "Interval allowed between unsafe head and now measured in seconds", Usage: "Interval allowed between unsafe head and now measured in seconds",
EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "HEALTHCHECK_UNSAFE_INTERVAL"), EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "HEALTHCHECK_UNSAFE_INTERVAL"),
} }
HealthCheckSafeEnabled = &cli.BoolFlag{
Name: "healthcheck.safe-enabled",
Usage: "Whether to enable safe head progression checks",
EnvVars: opservice.PrefixEnvVar(EnvVarPrefix, "HEALTHCHECK_SAFE_ENABLED"),
Value: false,
}
HealthCheckSafeInterval = &cli.Uint64Flag{ HealthCheckSafeInterval = &cli.Uint64Flag{
Name: "healthcheck.safe-interval", Name: "healthcheck.safe-interval",
Usage: "Interval between safe head progression measured in seconds", Usage: "Interval between safe head progression measured in seconds",
...@@ -105,6 +111,7 @@ var optionalFlags = []cli.Flag{ ...@@ -105,6 +111,7 @@ var optionalFlags = []cli.Flag{
Paused, Paused,
RPCEnableProxy, RPCEnableProxy,
RaftBootstrap, RaftBootstrap,
HealthCheckSafeEnabled,
} }
func init() { func init() {
......
...@@ -34,7 +34,7 @@ type HealthMonitor interface { ...@@ -34,7 +34,7 @@ type HealthMonitor interface {
// interval is the interval between health checks measured in seconds. // interval is the interval between health checks measured in seconds.
// safeInterval is the interval between safe head progress measured in seconds. // safeInterval is the interval between safe head progress measured in seconds.
// minPeerCount is the minimum number of peers required for the sequencer to be healthy. // minPeerCount is the minimum number of peers required for the sequencer to be healthy.
func NewSequencerHealthMonitor(log log.Logger, interval, unsafeInterval, safeInterval, minPeerCount uint64, rollupCfg *rollup.Config, node dial.RollupClientInterface, p2p p2p.API) HealthMonitor { func NewSequencerHealthMonitor(log log.Logger, interval, unsafeInterval, safeInterval, minPeerCount uint64, safeEnabled bool, rollupCfg *rollup.Config, node dial.RollupClientInterface, p2p p2p.API) HealthMonitor {
return &SequencerHealthMonitor{ return &SequencerHealthMonitor{
log: log, log: log,
done: make(chan struct{}), done: make(chan struct{}),
...@@ -42,6 +42,7 @@ func NewSequencerHealthMonitor(log log.Logger, interval, unsafeInterval, safeInt ...@@ -42,6 +42,7 @@ func NewSequencerHealthMonitor(log log.Logger, interval, unsafeInterval, safeInt
healthUpdateCh: make(chan error), healthUpdateCh: make(chan error),
rollupCfg: rollupCfg, rollupCfg: rollupCfg,
unsafeInterval: unsafeInterval, unsafeInterval: unsafeInterval,
safeEnabled: safeEnabled,
safeInterval: safeInterval, safeInterval: safeInterval,
minPeerCount: minPeerCount, minPeerCount: minPeerCount,
timeProviderFn: currentTimeProvicer, timeProviderFn: currentTimeProvicer,
...@@ -58,6 +59,7 @@ type SequencerHealthMonitor struct { ...@@ -58,6 +59,7 @@ type SequencerHealthMonitor struct {
rollupCfg *rollup.Config rollupCfg *rollup.Config
unsafeInterval uint64 unsafeInterval uint64
safeEnabled bool
safeInterval uint64 safeInterval uint64
minPeerCount uint64 minPeerCount uint64
interval uint64 interval uint64
...@@ -169,7 +171,7 @@ func (hm *SequencerHealthMonitor) healthCheck() error { ...@@ -169,7 +171,7 @@ func (hm *SequencerHealthMonitor) healthCheck() error {
return ErrSequencerNotHealthy return ErrSequencerNotHealthy
} }
if now-status.SafeL2.Time > hm.safeInterval { if hm.safeEnabled && now-status.SafeL2.Time > hm.safeInterval {
hm.log.Error( hm.log.Error(
"safe head is not progressing as expected", "safe head is not progressing as expected",
"now", now, "now", now,
......
...@@ -62,6 +62,7 @@ func (s *HealthMonitorTestSuite) SetupMonitor( ...@@ -62,6 +62,7 @@ func (s *HealthMonitorTestSuite) SetupMonitor(
rollupCfg: s.rollupCfg, rollupCfg: s.rollupCfg,
unsafeInterval: unsafeInterval, unsafeInterval: unsafeInterval,
safeInterval: safeInterval, safeInterval: safeInterval,
safeEnabled: true,
minPeerCount: s.minPeerCount, minPeerCount: s.minPeerCount,
timeProviderFn: tp.Now, timeProviderFn: tp.Now,
node: mockRollupClient, node: mockRollupClient,
...@@ -147,6 +148,13 @@ func (s *HealthMonitorTestSuite) TestUnhealthySafeHeadNotProgressing() { ...@@ -147,6 +148,13 @@ func (s *HealthMonitorTestSuite) TestUnhealthySafeHeadNotProgressing() {
} }
} }
// test that the safeEnabled flag works
monitor.safeEnabled = false
rc.ExpectSyncStatus(mockSyncStatus(now+6, 4, now, 1), nil)
rc.ExpectSyncStatus(mockSyncStatus(now+6, 4, now, 1), nil)
healthy := <-healthUpdateCh
s.Nil(healthy)
s.NoError(monitor.Stop()) s.NoError(monitor.Stop())
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment