Commit 91516a6d authored by protolambda's avatar protolambda

op-node: fix-sequencer-err-handling PR 4930 suggestions

parent 61c0b14e
...@@ -8,6 +8,7 @@ import ( ...@@ -8,6 +8,7 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/ethereum-optimism/optimism/op-node/eth" "github.com/ethereum-optimism/optimism/op-node/eth"
"github.com/ethereum-optimism/optimism/op-node/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup" "github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive" "github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-node/rollup/driver" "github.com/ethereum-optimism/optimism/op-node/rollup/driver"
...@@ -47,7 +48,7 @@ func NewL2Sequencer(t Testing, log log.Logger, l1 derive.L1Fetcher, eng L2API, c ...@@ -47,7 +48,7 @@ func NewL2Sequencer(t Testing, log log.Logger, l1 derive.L1Fetcher, eng L2API, c
} }
return &L2Sequencer{ return &L2Sequencer{
L2Verifier: *ver, L2Verifier: *ver,
sequencer: driver.NewSequencer(log, cfg, ver.derivation, attrBuilder, l1OriginSelector), sequencer: driver.NewSequencer(log, cfg, ver.derivation, attrBuilder, l1OriginSelector, metrics.NoopMetrics),
mockL1OriginSelector: l1OriginSelector, mockL1OriginSelector: l1OriginSelector,
failL2GossipUnsafeBlock: nil, failL2GossipUnsafeBlock: nil,
} }
...@@ -121,7 +122,7 @@ func (s *L2Sequencer) ActBuildToL1Head(t Testing) { ...@@ -121,7 +122,7 @@ func (s *L2Sequencer) ActBuildToL1Head(t Testing) {
// ActBuildToL1HeadUnsafe builds empty blocks until (incl.) the L1 head becomes the L1 origin of the L2 head // ActBuildToL1HeadUnsafe builds empty blocks until (incl.) the L1 head becomes the L1 origin of the L2 head
func (s *L2Sequencer) ActBuildToL1HeadUnsafe(t Testing) { func (s *L2Sequencer) ActBuildToL1HeadUnsafe(t Testing) {
for s.derivation.UnsafeL2Head().L1Origin.Number < s.l1State.L1Head().Number { for s.derivation.UnsafeL2Head().L1Origin.Number < s.l1State.L1Head().Number {
// Note: the // Note: the derivation pipeline does not run, we are just sequencing a block on top of the existing L2 chain.
s.ActL2StartBlock(t) s.ActL2StartBlock(t)
s.ActL2EndBlock(t) s.ActL2EndBlock(t)
} }
...@@ -144,6 +145,7 @@ func (s *L2Sequencer) ActBuildToL1HeadExcl(t Testing) { ...@@ -144,6 +145,7 @@ func (s *L2Sequencer) ActBuildToL1HeadExcl(t Testing) {
// ActBuildToL1HeadExclUnsafe builds empty blocks until (excl.) the L1 head becomes the L1 origin of the L2 head, without safe-head progression. // ActBuildToL1HeadExclUnsafe builds empty blocks until (excl.) the L1 head becomes the L1 origin of the L2 head, without safe-head progression.
func (s *L2Sequencer) ActBuildToL1HeadExclUnsafe(t Testing) { func (s *L2Sequencer) ActBuildToL1HeadExclUnsafe(t Testing) {
for { for {
// Note: the derivation pipeline does not run, we are just sequencing a block on top of the existing L2 chain.
nextOrigin, err := s.mockL1OriginSelector.FindL1Origin(t.Ctx(), s.derivation.UnsafeL2Head()) nextOrigin, err := s.mockL1OriginSelector.FindL1Origin(t.Ctx(), s.derivation.UnsafeL2Head())
require.NoError(t, err) require.NoError(t, err)
if nextOrigin.Number >= s.l1State.L1Head().Number { if nextOrigin.Number >= s.l1State.L1Head().Number {
......
...@@ -108,12 +108,7 @@ func TestL2Sequencer_SequencerDrift(gt *testing.T) { ...@@ -108,12 +108,7 @@ func TestL2Sequencer_SequencerDrift(gt *testing.T) {
// while the verifier-codepath only ever sees the valid post-reorg L1 chain. // while the verifier-codepath only ever sees the valid post-reorg L1 chain.
func TestL2Sequencer_SequencerOnlyReorg(gt *testing.T) { func TestL2Sequencer_SequencerOnlyReorg(gt *testing.T) {
t := NewDefaultTesting(gt) t := NewDefaultTesting(gt)
p := &e2eutils.TestParams{ dp := e2eutils.MakeDeployParams(t, defaultRollupTestParams)
MaxSequencerDrift: 20, // larger than L1 block time we simulate in this test (12)
SequencerWindowSize: 24,
ChannelTimeout: 20,
}
dp := e2eutils.MakeDeployParams(t, p)
sd := e2eutils.Setup(t, dp, defaultAlloc) sd := e2eutils.Setup(t, dp, defaultAlloc)
log := testlog.Logger(t, log.LvlDebug) log := testlog.Logger(t, log.LvlDebug)
miner, _, sequencer := setupSequencerTest(t, sd, log) miner, _, sequencer := setupSequencerTest(t, sd, log)
......
...@@ -53,6 +53,8 @@ type Metricer interface { ...@@ -53,6 +53,8 @@ type Metricer interface {
RecordUnsafePayloadsBuffer(length uint64, memSize uint64, next eth.BlockID) RecordUnsafePayloadsBuffer(length uint64, memSize uint64, next eth.BlockID)
CountSequencedTxs(count int) CountSequencedTxs(count int)
RecordL1ReorgDepth(d uint64) RecordL1ReorgDepth(d uint64)
RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID)
RecordSequencerReset()
RecordGossipEvent(evType int32) RecordGossipEvent(evType int32)
IncPeerCount() IncPeerCount()
DecPeerCount() DecPeerCount()
...@@ -86,6 +88,9 @@ type Metrics struct { ...@@ -86,6 +88,9 @@ type Metrics struct {
SequencingErrors *EventMetrics SequencingErrors *EventMetrics
PublishingErrors *EventMetrics PublishingErrors *EventMetrics
SequencerInconsistentL1Origin *EventMetrics
SequencerResets *EventMetrics
SequencerBuildingDiffDurationSeconds prometheus.Histogram SequencerBuildingDiffDurationSeconds prometheus.Histogram
SequencerBuildingDiffTotal prometheus.Counter SequencerBuildingDiffTotal prometheus.Counter
...@@ -204,6 +209,9 @@ func NewMetrics(procName string) *Metrics { ...@@ -204,6 +209,9 @@ func NewMetrics(procName string) *Metrics {
SequencingErrors: NewEventMetrics(factory, ns, "sequencing_errors", "sequencing errors"), SequencingErrors: NewEventMetrics(factory, ns, "sequencing_errors", "sequencing errors"),
PublishingErrors: NewEventMetrics(factory, ns, "publishing_errors", "p2p publishing errors"), PublishingErrors: NewEventMetrics(factory, ns, "publishing_errors", "p2p publishing errors"),
SequencerInconsistentL1Origin: NewEventMetrics(factory, ns, "sequencer_inconsistent_l1_origin", "events when the sequencer selects an inconsistent L1 origin"),
SequencerResets: NewEventMetrics(factory, ns, "sequencer_resets", "sequencer resets"),
UnsafePayloadsBufferLen: factory.NewGauge(prometheus.GaugeOpts{ UnsafePayloadsBufferLen: factory.NewGauge(prometheus.GaugeOpts{
Namespace: ns, Namespace: ns,
Name: "unsafe_payloads_buffer_len", Name: "unsafe_payloads_buffer_len",
...@@ -455,6 +463,16 @@ func (m *Metrics) RecordL1ReorgDepth(d uint64) { ...@@ -455,6 +463,16 @@ func (m *Metrics) RecordL1ReorgDepth(d uint64) {
m.L1ReorgDepth.Observe(float64(d)) m.L1ReorgDepth.Observe(float64(d))
} }
func (m *Metrics) RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID) {
m.SequencerInconsistentL1Origin.RecordEvent()
m.recordRef("l1_origin", "inconsistent_from", from.Number, 0, from.Hash)
m.recordRef("l1_origin", "inconsistent_to", to.Number, 0, to.Hash)
}
func (m *Metrics) RecordSequencerReset() {
m.SequencerResets.RecordEvent()
}
func (m *Metrics) RecordGossipEvent(evType int32) { func (m *Metrics) RecordGossipEvent(evType int32) {
m.GossipEventsTotal.WithLabelValues(pb.TraceEvent_Type_name[evType]).Inc() m.GossipEventsTotal.WithLabelValues(pb.TraceEvent_Type_name[evType]).Inc()
} }
...@@ -584,6 +602,12 @@ func (n *noopMetricer) CountSequencedTxs(count int) { ...@@ -584,6 +602,12 @@ func (n *noopMetricer) CountSequencedTxs(count int) {
func (n *noopMetricer) RecordL1ReorgDepth(d uint64) { func (n *noopMetricer) RecordL1ReorgDepth(d uint64) {
} }
func (n *noopMetricer) RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID) {
}
func (n *noopMetricer) RecordSequencerReset() {
}
func (n *noopMetricer) RecordGossipEvent(evType int32) { func (n *noopMetricer) RecordGossipEvent(evType int32) {
} }
......
...@@ -25,6 +25,9 @@ type L1Fetcher interface { ...@@ -25,6 +25,9 @@ type L1Fetcher interface {
L1TransactionFetcher L1TransactionFetcher
} }
// ResettableEngineControl wraps EngineControl with reset-functionality,
// which handles reorgs like the derivation pipeline:
// by determining the last valid block references to continue from.
type ResettableEngineControl interface { type ResettableEngineControl interface {
EngineControl EngineControl
Reset() Reset()
......
...@@ -29,6 +29,7 @@ type Metrics interface { ...@@ -29,6 +29,7 @@ type Metrics interface {
RecordL1ReorgDepth(d uint64) RecordL1ReorgDepth(d uint64)
EngineMetrics EngineMetrics
SequencerMetrics
} }
type L1Chain interface { type L1Chain interface {
...@@ -88,7 +89,7 @@ func NewDriver(driverCfg *Config, cfg *rollup.Config, l2 L2Chain, l1 L1Chain, ne ...@@ -88,7 +89,7 @@ func NewDriver(driverCfg *Config, cfg *rollup.Config, l2 L2Chain, l1 L1Chain, ne
attrBuilder := derive.NewFetchingAttributesBuilder(cfg, l1, l2) attrBuilder := derive.NewFetchingAttributesBuilder(cfg, l1, l2)
engine := derivationPipeline engine := derivationPipeline
meteredEngine := NewMeteredEngine(cfg, engine, metrics, log) meteredEngine := NewMeteredEngine(cfg, engine, metrics, log)
sequencer := NewSequencer(log, cfg, meteredEngine, attrBuilder, findL1Origin) sequencer := NewSequencer(log, cfg, meteredEngine, attrBuilder, findL1Origin, metrics)
return &Driver{ return &Driver{
l1State: l1State, l1State: l1State,
......
...@@ -30,7 +30,7 @@ type MeteredEngine struct { ...@@ -30,7 +30,7 @@ type MeteredEngine struct {
buildingStartTime time.Time buildingStartTime time.Time
} }
// MeteredEngine implements derive.EngineControl // MeteredEngine implements derive.ResettableEngineControl
var _ derive.ResettableEngineControl = (*MeteredEngine)(nil) var _ derive.ResettableEngineControl = (*MeteredEngine)(nil)
func NewMeteredEngine(cfg *rollup.Config, inner derive.ResettableEngineControl, metrics EngineMetrics, log log.Logger) *MeteredEngine { func NewMeteredEngine(cfg *rollup.Config, inner derive.ResettableEngineControl, metrics EngineMetrics, log log.Logger) *MeteredEngine {
......
...@@ -24,6 +24,11 @@ type L1OriginSelectorIface interface { ...@@ -24,6 +24,11 @@ type L1OriginSelectorIface interface {
FindL1Origin(ctx context.Context, l2Head eth.L2BlockRef) (eth.L1BlockRef, error) FindL1Origin(ctx context.Context, l2Head eth.L2BlockRef) (eth.L1BlockRef, error)
} }
type SequencerMetrics interface {
RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID)
RecordSequencerReset()
}
// Sequencer implements the sequencing interface of the driver: it starts and completes block building jobs. // Sequencer implements the sequencing interface of the driver: it starts and completes block building jobs.
type Sequencer struct { type Sequencer struct {
log log.Logger log log.Logger
...@@ -34,13 +39,15 @@ type Sequencer struct { ...@@ -34,13 +39,15 @@ type Sequencer struct {
attrBuilder derive.AttributesBuilder attrBuilder derive.AttributesBuilder
l1OriginSelector L1OriginSelectorIface l1OriginSelector L1OriginSelectorIface
metrics SequencerMetrics
// timeNow enables sequencer testing to mock the time // timeNow enables sequencer testing to mock the time
timeNow func() time.Time timeNow func() time.Time
nextAction time.Time nextAction time.Time
} }
func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEngineControl, attributesBuilder derive.AttributesBuilder, l1OriginSelector L1OriginSelectorIface) *Sequencer { func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEngineControl, attributesBuilder derive.AttributesBuilder, l1OriginSelector L1OriginSelectorIface, metrics SequencerMetrics) *Sequencer {
return &Sequencer{ return &Sequencer{
log: log, log: log,
config: cfg, config: cfg,
...@@ -48,6 +55,7 @@ func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEn ...@@ -48,6 +55,7 @@ func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEn
timeNow: time.Now, timeNow: time.Now,
attrBuilder: attributesBuilder, attrBuilder: attributesBuilder,
l1OriginSelector: l1OriginSelector, l1OriginSelector: l1OriginSelector,
metrics: metrics,
} }
} }
...@@ -63,6 +71,7 @@ func (d *Sequencer) StartBuildingBlock(ctx context.Context) error { ...@@ -63,6 +71,7 @@ func (d *Sequencer) StartBuildingBlock(ctx context.Context) error {
} }
if !(l2Head.L1Origin.Hash == l1Origin.ParentHash || l2Head.L1Origin.Hash == l1Origin.Hash) { if !(l2Head.L1Origin.Hash == l1Origin.ParentHash || l2Head.L1Origin.Hash == l1Origin.Hash) {
d.metrics.RecordSequencerInconsistentL1Origin(l2Head.L1Origin, l1Origin.ID())
return derive.NewResetError(fmt.Errorf("cannot build new L2 block with L1 origin %s (parent L1 %s) on current L2 head %s with L1 origin %s", l1Origin, l1Origin.ParentHash, l2Head, l2Head.L1Origin)) return derive.NewResetError(fmt.Errorf("cannot build new L2 block with L1 origin %s (parent L1 %s) on current L2 head %s with L1 origin %s", l1Origin, l1Origin.ParentHash, l2Head, l2Head.L1Origin))
} }
...@@ -169,6 +178,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP ...@@ -169,6 +178,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
return nil, err // bubble up critical errors. return nil, err // bubble up critical errors.
} else if errors.Is(err, derive.ErrReset) { } else if errors.Is(err, derive.ErrReset) {
d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err) d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err)
d.metrics.RecordSequencerReset()
d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block
if buildingID != (eth.PayloadID{}) { // cancel what we were doing if buildingID != (eth.PayloadID{}) { // cancel what we were doing
d.CancelBuildingBlock(ctx) d.CancelBuildingBlock(ctx)
...@@ -180,6 +190,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP ...@@ -180,6 +190,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
// Any unfinished block building work eventually times out, and will be cleaned up that way. // Any unfinished block building work eventually times out, and will be cleaned up that way.
} else { } else {
d.log.Error("sequencer failed to seal block with unclassified error", "err", err) d.log.Error("sequencer failed to seal block with unclassified error", "err", err)
d.nextAction = d.timeNow().Add(time.Second)
if buildingID != (eth.PayloadID{}) { // don't keep stale block building jobs around, try to cancel them if buildingID != (eth.PayloadID{}) { // don't keep stale block building jobs around, try to cancel them
d.CancelBuildingBlock(ctx) d.CancelBuildingBlock(ctx)
} }
...@@ -196,6 +207,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP ...@@ -196,6 +207,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
return nil, err return nil, err
} else if errors.Is(err, derive.ErrReset) { } else if errors.Is(err, derive.ErrReset) {
d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err) d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err)
d.metrics.RecordSequencerReset()
d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block
d.engine.Reset() d.engine.Reset()
} else if errors.Is(err, derive.ErrTemporary) { } else if errors.Is(err, derive.ErrTemporary) {
......
...@@ -17,6 +17,7 @@ import ( ...@@ -17,6 +17,7 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/ethereum-optimism/optimism/op-node/eth" "github.com/ethereum-optimism/optimism/op-node/eth"
"github.com/ethereum-optimism/optimism/op-node/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup" "github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive" "github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-node/testlog" "github.com/ethereum-optimism/optimism/op-node/testlog"
...@@ -301,7 +302,7 @@ func TestSequencerChaosMonkey(t *testing.T) { ...@@ -301,7 +302,7 @@ func TestSequencerChaosMonkey(t *testing.T) {
} }
}) })
seq := NewSequencer(log, cfg, engControl, attrBuilder, originSelector) seq := NewSequencer(log, cfg, engControl, attrBuilder, originSelector, metrics.NoopMetrics)
seq.timeNow = clockFn seq.timeNow = clockFn
// try to build 1000 blocks, with 5x as many planning attempts, to handle errors and clock problems // try to build 1000 blocks, with 5x as many planning attempts, to handle errors and clock problems
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment