Commit 61c0b14e authored by protolambda's avatar protolambda

op-node: handle sequencer errors with derivation-levels, reset when L1 origins...

op-node: handle sequencer errors with derivation-levels, reset when L1 origins are inconsistent, test reset handling
parent a340d593
......@@ -2,6 +2,7 @@ package actions
import (
"context"
"errors"
"github.com/ethereum/go-ethereum/log"
"github.com/stretchr/testify/require"
......@@ -54,6 +55,10 @@ func NewL2Sequencer(t Testing, log log.Logger, l1 derive.L1Fetcher, eng L2API, c
// ActL2StartBlock starts building of a new L2 block on top of the head
func (s *L2Sequencer) ActL2StartBlock(t Testing) {
s.ActL2StartBlockCheckErr(t, nil)
}
func (s *L2Sequencer) ActL2StartBlockCheckErr(t Testing, checkErr error) {
if !s.l2PipelineIdle {
t.InvalidAction("cannot start L2 build when derivation is not idle")
return
......@@ -64,9 +69,19 @@ func (s *L2Sequencer) ActL2StartBlock(t Testing) {
}
err := s.sequencer.StartBuildingBlock(t.Ctx())
if checkErr == nil {
require.NoError(t, err, "failed to start block building")
} else {
require.ErrorIs(t, err, checkErr, "expected typed error")
}
if errors.Is(err, derive.ErrReset) {
s.derivation.Reset()
}
if err == nil {
s.l2Building = true
}
}
// ActL2EndBlock completes a new L2 block and applies it to the L2 chain as new canonical unsafe head
......@@ -103,7 +118,16 @@ func (s *L2Sequencer) ActBuildToL1Head(t Testing) {
}
}
// ActBuildToL1HeadExcl builds empty blocks until (excl.) the L1 head becomes the L2 origin
// ActBuildToL1HeadUnsafe builds empty blocks until (incl.) the L1 head becomes the L1 origin of the L2 head
func (s *L2Sequencer) ActBuildToL1HeadUnsafe(t Testing) {
for s.derivation.UnsafeL2Head().L1Origin.Number < s.l1State.L1Head().Number {
// Note: the
s.ActL2StartBlock(t)
s.ActL2EndBlock(t)
}
}
// ActBuildToL1HeadExcl builds empty blocks until (excl.) the L1 head becomes the L1 origin of the L2 head
func (s *L2Sequencer) ActBuildToL1HeadExcl(t Testing) {
for {
s.ActL2PipelineFull(t)
......@@ -116,3 +140,16 @@ func (s *L2Sequencer) ActBuildToL1HeadExcl(t Testing) {
s.ActL2EndBlock(t)
}
}
// ActBuildToL1HeadExclUnsafe builds empty blocks until (excl.) the L1 head becomes the L1 origin of the L2 head, without safe-head progression.
func (s *L2Sequencer) ActBuildToL1HeadExclUnsafe(t Testing) {
for {
nextOrigin, err := s.mockL1OriginSelector.FindL1Origin(t.Ctx(), s.derivation.UnsafeL2Head())
require.NoError(t, err)
if nextOrigin.Number >= s.l1State.L1Head().Number {
break
}
s.ActL2StartBlock(t)
s.ActL2EndBlock(t)
}
}
......@@ -4,6 +4,8 @@ import (
"math/big"
"testing"
"github.com/ethereum-optimism/optimism/op-node/rollup/derive"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/params"
......@@ -100,3 +102,72 @@ func TestL2Sequencer_SequencerDrift(gt *testing.T) {
sequencer.ActL2StartBlock(t)
require.True(t, engine.l2ForceEmpty, "engine should not be allowed to include anything after sequencer drift is surpassed")
}
// TestL2Sequencer_SequencerOnlyReorg regression-tests a Goerli halt where the sequencer
// would build an unsafe L2 block with a L1 origin that then gets reorged out,
// while the verifier-codepath only ever sees the valid post-reorg L1 chain.
func TestL2Sequencer_SequencerOnlyReorg(gt *testing.T) {
t := NewDefaultTesting(gt)
p := &e2eutils.TestParams{
MaxSequencerDrift: 20, // larger than L1 block time we simulate in this test (12)
SequencerWindowSize: 24,
ChannelTimeout: 20,
}
dp := e2eutils.MakeDeployParams(t, p)
sd := e2eutils.Setup(t, dp, defaultAlloc)
log := testlog.Logger(t, log.LvlDebug)
miner, _, sequencer := setupSequencerTest(t, sd, log)
// Sequencer at first only recognizes the genesis as safe.
// The rest of the L1 chain will be incorporated as L1 origins into unsafe L2 blocks.
sequencer.ActL2PipelineFull(t)
// build L1 block with coinbase A
miner.ActL1SetFeeRecipient(common.Address{'A'})
miner.ActEmptyBlock(t)
// sequencer builds L2 blocks, until (incl.) it creates a L2 block with a L1 origin that has A as coinbase address
sequencer.ActL1HeadSignal(t)
sequencer.ActBuildToL1HeadUnsafe(t)
status := sequencer.SyncStatus()
require.Zero(t, status.SafeL2.L1Origin.Number, "no safe head progress")
require.Equal(t, status.HeadL1.Hash, status.UnsafeL2.L1Origin.Hash, "have head L1 origin")
// reorg out block with coinbase A, and make a block with coinbase B
miner.ActL1RewindToParent(t)
miner.ActL1SetFeeRecipient(common.Address{'B'})
miner.ActEmptyBlock(t)
// and a second block, for derivation to pick up on the new L1 chain
// (height is used as heuristic to not flip-flop between chains too frequently)
miner.ActEmptyBlock(t)
// Make the sequencer aware of the new head, and try to sync it.
// Since the safe chain never incorporated the now reorged L1 block with coinbase A,
// it will sync the new L1 chain fine.
// No batches are submitted yet however,
// so it'll keep the L2 block with the old L1 origin, since no conflict is detected.
sequencer.ActL1HeadSignal(t)
sequencer.ActL2PipelineFull(t)
// TODO: CLI-3405 we can detect the inconsistency of the L1 origin of the unsafe L2 head:
// as verifier, there is no need to wait for sequencer to recognize it.
newStatus := sequencer.SyncStatus()
require.Equal(t, status.HeadL1.Hash, newStatus.UnsafeL2.L1Origin.Hash, "still have old bad L1 origin")
require.NotEqual(t, status.HeadL1.Hash, newStatus.HeadL1.Hash, "did see the new L1 head change")
require.Equal(t, newStatus.HeadL1.Hash, newStatus.CurrentL1.Hash, "did sync the new L1 head as verifier")
// the block N+1 cannot build on the old N which still refers to the now orphaned L1 origin
require.Equal(t, status.UnsafeL2.L1Origin.Number, newStatus.HeadL1.Number-1, "seeing N+1 to attempt to build on N")
require.NotEqual(t, status.UnsafeL2.L1Origin.Hash, newStatus.HeadL1.ParentHash, "but N+1 cannot fit on N")
sequencer.ActL1HeadSignal(t)
// sequence more L2 blocks, until we actually need the next L1 origin
sequencer.ActBuildToL1HeadExclUnsafe(t)
// We expect block building to fail when the next L1 block is not consistent with the existing L1 origin
sequencer.ActL2StartBlockCheckErr(t, derive.ErrReset)
// After hitting a reset error, it reset derivation, and drops the old L1 chain
sequencer.ActL2PipelineFull(t)
require.Zero(t, sequencer.SyncStatus().UnsafeL2.L1Origin.Number, "back to genesis block with good L1 origin, drop old unsafe L2 chain with bad L1 origins")
// Can build new L2 blocks with good L1 origin
sequencer.ActBuildToL1HeadUnsafe(t)
require.Equal(t, newStatus.HeadL1.Hash, sequencer.SyncStatus().UnsafeL2.L1Origin.Hash, "build L2 chain with new correct L1 origins")
}
......@@ -25,6 +25,11 @@ type L1Fetcher interface {
L1TransactionFetcher
}
type ResettableEngineControl interface {
EngineControl
Reset()
}
type ResetableStage interface {
// Reset resets a pull stage. `base` refers to the L1 Block Reference to reset to, with corresponding configuration.
Reset(ctx context.Context, base eth.L1BlockRef, baseCfg eth.SystemConfig) error
......
......@@ -69,7 +69,7 @@ type SequencerIface interface {
StartBuildingBlock(ctx context.Context) error
CompleteBuildingBlock(ctx context.Context) (*eth.ExecutionPayload, error)
PlanNextSequencerAction() time.Duration
RunNextSequencerAction(ctx context.Context) *eth.ExecutionPayload
RunNextSequencerAction(ctx context.Context) (*eth.ExecutionPayload, error)
BuildingOnto() eth.L2BlockRef
}
......@@ -93,7 +93,6 @@ func NewDriver(driverCfg *Config, cfg *rollup.Config, l2 L2Chain, l1 L1Chain, ne
return &Driver{
l1State: l1State,
derivation: derivationPipeline,
idleDerivation: false,
stateReq: make(chan chan struct{}),
forceReset: make(chan chan struct{}, 10),
startSequencer: make(chan hashAndErrorChannel, 10),
......
......@@ -21,7 +21,7 @@ type EngineMetrics interface {
// MeteredEngine wraps an EngineControl and adds metrics such as block building time diff and sealing time
type MeteredEngine struct {
inner derive.EngineControl
inner derive.ResettableEngineControl
cfg *rollup.Config
metrics EngineMetrics
......@@ -31,9 +31,9 @@ type MeteredEngine struct {
}
// MeteredEngine implements derive.EngineControl
var _ derive.EngineControl = (*MeteredEngine)(nil)
var _ derive.ResettableEngineControl = (*MeteredEngine)(nil)
func NewMeteredEngine(cfg *rollup.Config, inner derive.EngineControl, metrics EngineMetrics, log log.Logger) *MeteredEngine {
func NewMeteredEngine(cfg *rollup.Config, inner derive.ResettableEngineControl, metrics EngineMetrics, log log.Logger) *MeteredEngine {
return &MeteredEngine{
inner: inner,
cfg: cfg,
......@@ -93,3 +93,7 @@ func (m *MeteredEngine) CancelPayload(ctx context.Context, force bool) error {
func (m *MeteredEngine) BuildingPayload() (onto eth.L2BlockRef, id eth.PayloadID, safe bool) {
return m.inner.BuildingPayload()
}
func (m *MeteredEngine) Reset() {
m.inner.Reset()
}
......@@ -2,6 +2,7 @@ package driver
import (
"context"
"errors"
"fmt"
"time"
......@@ -28,7 +29,7 @@ type Sequencer struct {
log log.Logger
config *rollup.Config
engine derive.EngineControl
engine derive.ResettableEngineControl
attrBuilder derive.AttributesBuilder
l1OriginSelector L1OriginSelectorIface
......@@ -39,7 +40,7 @@ type Sequencer struct {
nextAction time.Time
}
func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.EngineControl, attributesBuilder derive.AttributesBuilder, l1OriginSelector L1OriginSelectorIface) *Sequencer {
func NewSequencer(log log.Logger, cfg *rollup.Config, engine derive.ResettableEngineControl, attributesBuilder derive.AttributesBuilder, l1OriginSelector L1OriginSelectorIface) *Sequencer {
return &Sequencer{
log: log,
config: cfg,
......@@ -62,7 +63,7 @@ func (d *Sequencer) StartBuildingBlock(ctx context.Context) error {
}
if !(l2Head.L1Origin.Hash == l1Origin.ParentHash || l2Head.L1Origin.Hash == l1Origin.Hash) {
return fmt.Errorf("cannot build new L2 block with L1 origin %s (parent L1 %s) on current L2 head %s with L1 origin %s", l1Origin, l1Origin.ParentHash, l2Head, l2Head.L1Origin)
return derive.NewResetError(fmt.Errorf("cannot build new L2 block with L1 origin %s (parent L1 %s) on current L2 head %s with L1 origin %s", l1Origin, l1Origin.ParentHash, l2Head, l2Head.L1Origin))
}
d.log.Info("creating new block", "parent", l2Head, "l1Origin", l1Origin)
......@@ -159,29 +160,55 @@ func (d *Sequencer) BuildingOnto() eth.L2BlockRef {
// RunNextSequencerAction starts new block building work, or seals existing work,
// and is best timed by first awaiting the delay returned by PlanNextSequencerAction.
// If a new block is successfully sealed, it will be returned for publishing, nil otherwise.
func (d *Sequencer) RunNextSequencerAction(ctx context.Context) *eth.ExecutionPayload {
// Only critical errors are bubbled up, other errors are handled internally.
func (d *Sequencer) RunNextSequencerAction(ctx context.Context) (*eth.ExecutionPayload, error) {
if _, buildingID, _ := d.engine.BuildingPayload(); buildingID != (eth.PayloadID{}) {
payload, err := d.CompleteBuildingBlock(ctx)
if err != nil {
d.log.Error("sequencer failed to seal new block", "err", err)
if errors.Is(err, derive.ErrCritical) {
return nil, err // bubble up critical errors.
} else if errors.Is(err, derive.ErrReset) {
d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err)
d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block
if buildingID != (eth.PayloadID{}) { // cancel what we were doing
d.CancelBuildingBlock(ctx)
}
} else if errors.Is(err, derive.ErrTemporary) {
d.log.Error("sequencer failed temporarily to seal new block", "err", err)
d.nextAction = d.timeNow().Add(time.Second)
// We don't explicitly cancel block building jobs upon temporary errors: we may still finish the block.
// Any unfinished block building work eventually times out, and will be cleaned up that way.
} else {
d.log.Error("sequencer failed to seal block with unclassified error", "err", err)
if buildingID != (eth.PayloadID{}) { // don't keep stale block building jobs around, try to cancel them
d.CancelBuildingBlock(ctx)
}
return nil
}
return nil, nil
} else {
d.log.Info("sequencer successfully built a new block", "block", payload.ID(), "time", uint64(payload.Timestamp), "txs", len(payload.Transactions))
return payload
return payload, nil
}
} else {
err := d.StartBuildingBlock(ctx)
if err != nil {
d.log.Error("sequencer failed to start building new block", "err", err)
if errors.Is(err, derive.ErrCritical) {
return nil, err
} else if errors.Is(err, derive.ErrReset) {
d.log.Error("sequencer failed to seal new block, requiring derivation reset", "err", err)
d.nextAction = d.timeNow().Add(time.Second * time.Duration(d.config.BlockTime)) // hold off from sequencing for a full block
d.engine.Reset()
} else if errors.Is(err, derive.ErrTemporary) {
d.log.Error("sequencer temporarily failed to start building new block", "err", err)
d.nextAction = d.timeNow().Add(time.Second)
} else {
d.log.Error("sequencer failed to start building new block with unclassified error", "err", err)
d.nextAction = d.timeNow().Add(time.Second)
}
} else {
parent, buildingID, _ := d.engine.BuildingPayload() // we should have a new payload ID now that we're building a block
d.log.Info("sequencer started building new block", "payload_id", buildingID, "l2_parent_block", parent, "l2_parent_block_time", parent.Time)
}
return nil
return nil, nil
}
}
......@@ -23,6 +23,8 @@ import (
"github.com/ethereum-optimism/optimism/op-node/testutils"
)
var mockResetErr = fmt.Errorf("mock reset err: %w", derive.ErrReset)
type FakeEngineControl struct {
finalized eth.L2BlockRef
safe eth.L2BlockRef
......@@ -122,7 +124,11 @@ func (m *FakeEngineControl) resetBuildingState() {
m.buildingAttrs = nil
}
var _ derive.EngineControl = (*FakeEngineControl)(nil)
func (m *FakeEngineControl) Reset() {
m.err = nil
}
var _ derive.ResettableEngineControl = (*FakeEngineControl)(nil)
type testAttrBuilderFn func(ctx context.Context, l2Parent eth.L2BlockRef, epoch eth.BlockID) (attrs *eth.PayloadAttributes, err error)
......@@ -319,25 +325,30 @@ func TestSequencerChaosMonkey(t *testing.T) {
// reset errors
originErr = nil
attrsErr = nil
if engControl.err != mockResetErr { // the mockResetErr requires the sequencer to Reset() to recover.
engControl.err = nil
}
engControl.errTyp = derive.BlockInsertOK
// maybe make something maybe fail, or try a new L1 origin
switch rng.Intn(10) { // 40% chance to fail sequencer action (!!!)
case 0:
switch rng.Intn(20) { // 9/20 = 45% chance to fail sequencer action (!!!)
case 0, 1:
originErr = errors.New("mock origin error")
case 1:
case 2, 3:
attrsErr = errors.New("mock attributes error")
case 2:
case 4, 5:
engControl.err = errors.New("mock temporary engine error")
engControl.errTyp = derive.BlockInsertTemporaryErr
case 3:
case 6, 7:
engControl.err = errors.New("mock prestate engine error")
engControl.errTyp = derive.BlockInsertPrestateErr
case 8:
engControl.err = mockResetErr
default:
// no error
}
payload := seq.RunNextSequencerAction(context.Background())
payload, err := seq.RunNextSequencerAction(context.Background())
require.NoError(t, err)
if payload != nil {
require.Equal(t, engControl.UnsafeL2Head().ID(), payload.ID(), "head must stay in sync with emitted payloads")
var tx types.Transaction
......
......@@ -32,9 +32,6 @@ type Driver struct {
// The derivation pipeline determines the new l2Safe.
derivation DerivationPipeline
// When the derivation pipeline is waiting for new data to do anything
idleDerivation bool
// Requests to block the event loop for synchronous execution to avoid reading an inconsistent state
stateReq chan chan struct{}
......@@ -212,7 +209,11 @@ func (s *Driver) eventLoop() {
select {
case <-sequencerCh:
payload := s.sequencer.RunNextSequencerAction(ctx)
payload, err := s.sequencer.RunNextSequencerAction(ctx)
if err != nil {
s.log.Error("Sequencer critical error", "err", err)
return
}
if s.network != nil && payload != nil {
// Publishing of unsafe data via p2p is optional.
// Errors are not severe enough to change/halt sequencing but should be logged and metered.
......@@ -244,13 +245,11 @@ func (s *Driver) eventLoop() {
step()
case <-stepReqCh:
s.metrics.SetDerivationIdle(false)
s.idleDerivation = false
s.log.Debug("Derivation process step", "onto_origin", s.derivation.Origin(), "attempts", stepAttempts)
err := s.derivation.Step(context.Background())
stepAttempts += 1 // count as attempt by default. We reset to 0 if we are making healthy progress.
if err == io.EOF {
s.log.Debug("Derivation process went idle", "progress", s.derivation.Origin())
s.idleDerivation = true
stepAttempts = 0
s.metrics.SetDerivationIdle(true)
continue
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment