Commit 91516a6d authored by protolambda's avatar protolambda

op-node: fix-sequencer-err-handling PR 4930 suggestions

parent 61c0b14e
......@@ -8,6 +8,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/ethereum-optimism/optimism/op-node/eth"
"github.com/ethereum-optimism/optimism/op-node/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-node/rollup/driver"
......@@ -47,7 +48,7 @@ func NewL2Sequencer(t Testing, log log.Logger, l1 derive.L1Fetcher, eng L2API, c
}
return &L2Sequencer{
L2Verifier: *ver,
sequencer: driver.NewSequencer(log, cfg, ver.derivation, attrBuilder, l1OriginSelector),
sequencer: driver.NewSequencer(log, cfg, ver.derivation, attrBuilder, l1OriginSelector, metrics.NoopMetrics),
mockL1OriginSelector: l1OriginSelector,
failL2GossipUnsafeBlock: nil,
}
......@@ -121,7 +122,7 @@ func (s *L2Sequencer) ActBuildToL1Head(t Testing) {
// ActBuildToL1HeadUnsafe builds empty blocks until (incl.) the L1 head becomes the L1 origin of the L2 head
func (s *L2Sequencer) ActBuildToL1HeadUnsafe(t Testing) {
for s.derivation.UnsafeL2Head().L1Origin.Number < s.l1State.L1Head().Number {
// Note: the
// Note: the derivation pipeline does not run, we are just sequencing a block on top of the existing L2 chain.
s.ActL2StartBlock(t)
s.ActL2EndBlock(t)
}
......@@ -144,6 +145,7 @@ func (s *L2Sequencer) ActBuildToL1HeadExcl(t Testing) {
// ActBuildToL1HeadExclUnsafe builds empty blocks until (excl.) the L1 head becomes the L1 origin of the L2 head, without safe-head progression.
func (s *L2Sequencer) ActBuildToL1HeadExclUnsafe(t Testing) {
for {
// Note: the derivation pipeline does not run, we are just sequencing a block on top of the existing L2 chain.
nextOrigin, err := s.mockL1OriginSelector.FindL1Origin(t.Ctx(), s.derivation.UnsafeL2Head())
require.NoError(t, err)
if nextOrigin.Number >= s.l1State.L1Head().Number {
......
......@@ -108,12 +108,7 @@ func TestL2Sequencer_SequencerDrift(gt *testing.T) {
// while the verifier-codepath only ever sees the valid post-reorg L1 chain.
func TestL2Sequencer_SequencerOnlyReorg(gt *testing.T) {
t := NewDefaultTesting(gt)
p := &e2eutils.TestParams{
MaxSequencerDrift: 20, // larger than L1 block time we simulate in this test (12)
SequencerWindowSize: 24,
ChannelTimeout: 20,
}
dp := e2eutils.MakeDeployParams(t, p)
dp := e2eutils.MakeDeployParams(t, defaultRollupTestParams)
sd := e2eutils.Setup(t, dp, defaultAlloc)
log := testlog.Logger(t, log.LvlDebug)
miner, _, sequencer := setupSequencerTest(t, sd, log)
......
......@@ -53,6 +53,8 @@ type Metricer interface {
RecordUnsafePayloadsBuffer(length uint64, memSize uint64, next eth.BlockID)
CountSequencedTxs(count int)
RecordL1ReorgDepth(d uint64)
RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID)
RecordSequencerReset()
RecordGossipEvent(evType int32)
IncPeerCount()
DecPeerCount()
......@@ -86,6 +88,9 @@ type Metrics struct {
SequencingErrors *EventMetrics
PublishingErrors *EventMetrics
SequencerInconsistentL1Origin *EventMetrics
SequencerResets *EventMetrics
SequencerBuildingDiffDurationSeconds prometheus.Histogram
SequencerBuildingDiffTotal prometheus.Counter
......@@ -204,6 +209,9 @@ func NewMetrics(procName string) *Metrics {
SequencingErrors: NewEventMetrics(factory, ns, "sequencing_errors", "sequencing errors"),
PublishingErrors: NewEventMetrics(factory, ns, "publishing_errors", "p2p publishing errors"),
SequencerInconsistentL1Origin: NewEventMetrics(factory, ns, "sequencer_inconsistent_l1_origin", "events when the sequencer selects an inconsistent L1 origin"),
SequencerResets: NewEventMetrics(factory, ns, "sequencer_resets", "sequencer resets"),
UnsafePayloadsBufferLen: factory.NewGauge(prometheus.GaugeOpts{
Namespace: ns,
Name: "unsafe_payloads_buffer_len",
......@@ -455,6 +463,16 @@ func (m *Metrics) RecordL1ReorgDepth(d uint64) {
m.L1ReorgDepth.Observe(float64(d))
}
func (m *Metrics) RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID) {
m.SequencerInconsistentL1Origin.RecordEvent()
m.recordRef("l1_origin", "inconsistent_from", from.Number, 0, from.Hash)
m.recordRef("l1_origin", "inconsistent_to", to.Number, 0, to.Hash)
}
func (m *Metrics) RecordSequencerReset() {
m.SequencerResets.RecordEvent()
}
func (m *Metrics) RecordGossipEvent(evType int32) {
m.GossipEventsTotal.WithLabelValues(pb.TraceEvent_Type_name[evType]).Inc()
}
......@@ -584,6 +602,12 @@ func (n *noopMetricer) CountSequencedTxs(count int) {
func (n *noopMetricer) RecordL1ReorgDepth(d uint64) {
}
func (n *noopMetricer) RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID) {
}
func (n *noopMetricer) RecordSequencerReset() {
}
func (n *noopMetricer) RecordGossipEvent(evType int32) {
}
......
......@@ -25,6 +25,9 @@ type L1Fetcher interface {
L1TransactionFetcher
}
// ResettableEngineControl wraps EngineControl with reset-functionality,
// which handles reorgs like the derivation pipeline:
// by determining the last valid block references to continue from.
type ResettableEngineControl interface {
EngineControl
Reset()
......
......@@ -29,6 +29,7 @@ type Metrics interface {
RecordL1ReorgDepth(d uint64)
EngineMetrics
SequencerMetrics
}
type L1Chain interface {
......@@ -88,7 +89,7 @@ func NewDriver(driverCfg *Config, cfg *rollup.Config, l2 L2Chain, l1 L1Chain, ne
attrBuilder := derive.NewFetchingAttributesBuilder(cfg, l1, l2)
engine := derivationPipeline
meteredEngine := NewMeteredEngine(cfg, engine, metrics, log)
sequencer := NewSequencer(log, cfg, meteredEngine, attrBuilder, findL1Origin)
sequencer := NewSequencer(log, cfg, meteredEngine, attrBuilder, findL1Origin, metrics)
return &Driver{
l1State: l1State,
......
......@@ -30,7 +30,7 @@ type MeteredEngine struct {
buildingStartTime time.Time
}
// MeteredEngine implements derive.EngineControl
// MeteredEngine implements derive.ResettableEngineControl
var _ derive.ResettableEngineControl = (*MeteredEngine)(nil)
func NewMeteredEngine(cfg *rollup.Config, inner derive.ResettableEngineControl, metrics EngineMetrics, log log.Logger) *MeteredEngine {
......
......@@ -24,6 +24,11 @@ type L1OriginSelectorIface interface {
FindL1Origin(ctx context.Context, l2Head eth.L2BlockRef) (eth.L1BlockRef, error)
}
type SequencerMetrics interface {
RecordSequencerInconsistentL1Origin(from eth.BlockID, to eth.BlockID)
RecordSequencerReset()
}
// Sequencer implements the sequencing interface of the driver: it starts and completes block building jobs.
type Sequencer struct {
log log.Logger
......@@ -34,13 +39,15 @@ type Sequencer struct {
attrBuilder derive.AttributesBuilder
l1OriginSelector L1OriginSelectorIface
metrics SequencerMetrics
// timeNow enables sequencer testing to mock the time
timeNow func() time.Time
nextAction time.Time
}
func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEngineControl, attributesBuilder derive.AttributesBuilder, l1OriginSelector L1OriginSelectorIface) *Sequencer {
func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEngineControl, attributesBuilder derive.AttributesBuilder, l1OriginSelector L1OriginSelectorIface, metrics SequencerMetrics) *Sequencer {
return &Sequencer{
log: log,
config: cfg,
......@@ -48,6 +55,7 @@ func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEn
timeNow: time.Now,
attrBuilder: attributesBuilder,
l1OriginSelector: l1OriginSelector,
metrics: metrics,
}
}
......@@ -63,6 +71,7 @@ func (d *Sequencer) StartBuildingBlock(ctx context.Context) error {
}
if !(l2Head.L1Origin.Hash == l1Origin.ParentHash || l2Head.L1Origin.Hash == l1Origin.Hash) {
d.metrics.RecordSequencerInconsistentL1Origin(l2Head.L1Origin, l1Origin.ID())
return derive.NewResetError(fmt.Errorf("cannot build new L2 block with L1 origin %s (parent L1 %s) on current L2 head %s with L1 origin %s", l1Origin, l1Origin.ParentHash, l2Head, l2Head.L1Origin))
}
......@@ -169,6 +178,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
return nil, err // bubble up critical errors.
} else if errors.Is(err, derive.ErrReset) {
d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err)
d.metrics.RecordSequencerReset()
d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block
if buildingID != (eth.PayloadID{}) { // cancel what we were doing
d.CancelBuildingBlock(ctx)
......@@ -180,6 +190,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
// Any unfinished block building work eventually times out, and will be cleaned up that way.
} else {
d.log.Error("sequencer failed to seal block with unclassified error", "err", err)
d.nextAction = d.timeNow().Add(time.Second)
if buildingID != (eth.PayloadID{}) { // don't keep stale block building jobs around, try to cancel them
d.CancelBuildingBlock(ctx)
}
......@@ -196,6 +207,7 @@ func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionP
return nil, err
} else if errors.Is(err, derive.ErrReset) {
d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err)
d.metrics.RecordSequencerReset()
d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block
d.engine.Reset()
} else if errors.Is(err, derive.ErrTemporary) {
......
......@@ -17,6 +17,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/ethereum-optimism/optimism/op-node/eth"
"github.com/ethereum-optimism/optimism/op-node/metrics"
"github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum-optimism/optimism/op-node/testlog"
......@@ -301,7 +302,7 @@ func TestSequencerChaosMonkey(t *testing.T) {
}
})
seq := NewSequencer(log, cfg, engControl, attrBuilder, originSelector)
seq := NewSequencer(log, cfg, engControl, attrBuilder, originSelector, metrics.NoopMetrics)
seq.timeNow = clockFn
// try to build 1000 blocks, with 5x as many planning attempts, to handle errors and clock problems
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment