engine_queue.go 33.1 KB
Newer Older
protolambda's avatar
protolambda committed
1 2 3 4
package derive

import (
	"context"
5
	"errors"
protolambda's avatar
protolambda committed
6 7 8 9
	"fmt"
	"io"
	"time"

10
	"github.com/ethereum/go-ethereum"
protolambda's avatar
protolambda committed
11
	"github.com/ethereum/go-ethereum/common"
12
	"github.com/ethereum/go-ethereum/core/types"
protolambda's avatar
protolambda committed
13
	"github.com/ethereum/go-ethereum/log"
14 15

	"github.com/ethereum-optimism/optimism/op-node/rollup"
16
	"github.com/ethereum-optimism/optimism/op-node/rollup/async"
17
	"github.com/ethereum-optimism/optimism/op-node/rollup/conductor"
18
	"github.com/ethereum-optimism/optimism/op-node/rollup/sync"
19
	"github.com/ethereum-optimism/optimism/op-service/eth"
protolambda's avatar
protolambda committed
20 21
)

22 23 24 25
type AttributesWithParent struct {
	attributes   *eth.PayloadAttributes
	parent       eth.L2BlockRef
	isLastInSpan bool
26 27
}

28 29 30 31 32 33 34 35
func NewAttributesWithParent(attributes *eth.PayloadAttributes, parent eth.L2BlockRef, isLastInSpan bool) *AttributesWithParent {
	return &AttributesWithParent{attributes, parent, isLastInSpan}
}

func (a *AttributesWithParent) Attributes() *eth.PayloadAttributes {
	return a.attributes
}

36 37
type NextAttributesProvider interface {
	Origin() eth.L1BlockRef
38
	NextAttributes(context.Context, eth.L2BlockRef) (*AttributesWithParent, error)
39 40
}

41
type L2Source interface {
42 43
	PayloadByHash(context.Context, common.Hash) (*eth.ExecutionPayloadEnvelope, error)
	PayloadByNumber(context.Context, uint64) (*eth.ExecutionPayloadEnvelope, error)
44
	L2BlockRefByLabel(ctx context.Context, label eth.BlockLabel) (eth.L2BlockRef, error)
protolambda's avatar
protolambda committed
45
	L2BlockRefByHash(ctx context.Context, l2Hash common.Hash) (eth.L2BlockRef, error)
46
	L2BlockRefByNumber(ctx context.Context, num uint64) (eth.L2BlockRef, error)
47
	SystemConfigL2Fetcher
protolambda's avatar
protolambda committed
48 49
}

50 51 52 53 54
type Engine interface {
	ExecEngine
	L2Source
}

55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
// EngineState provides a read-only interface of the forkchoice state properties of the L2 Engine.
type EngineState interface {
	Finalized() eth.L2BlockRef
	UnsafeL2Head() eth.L2BlockRef
	SafeL2Head() eth.L2BlockRef
}

// EngineControl enables other components to build blocks with the Engine,
// while keeping the forkchoice state and payload-id management internal to
// avoid state inconsistencies between different users of the EngineControl.
type EngineControl interface {
	EngineState

	// StartPayload requests the engine to start building a block with the given attributes.
	// If updateSafe, the resulting block will be marked as a safe block.
70
	StartPayload(ctx context.Context, parent eth.L2BlockRef, attrs *AttributesWithParent, updateSafe bool) (errType BlockInsertionErrType, err error)
71
	// ConfirmPayload requests the engine to complete the current block. If no block is being built, or if it fails, an error is returned.
72
	ConfirmPayload(ctx context.Context, agossip async.AsyncGossiper, sequencerConductor conductor.SequencerConductor) (out *eth.ExecutionPayloadEnvelope, errTyp BlockInsertionErrType, err error)
73 74 75 76 77 78 79
	// CancelPayload requests the engine to stop building the current block without making it canonical.
	// This is optional, as the engine expires building jobs that are left uncompleted, but can still save resources.
	CancelPayload(ctx context.Context, force bool) error
	// BuildingPayload indicates if a payload is being built, and onto which block it is being built, and whether or not it is a safe payload.
	BuildingPayload() (onto eth.L2BlockRef, id eth.PayloadID, safe bool)
}

80 81 82 83
type LocalEngineControl interface {
	EngineControl
	ResetBuildingState()
	IsEngineSyncing() bool
84
	TryUpdateEngine(ctx context.Context) error
85
	TryBackupUnsafeReorg(ctx context.Context) (bool, error)
86
	InsertUnsafePayload(ctx context.Context, payload *eth.ExecutionPayloadEnvelope, ref eth.L2BlockRef) error
87 88

	PendingSafeL2Head() eth.L2BlockRef
89
	BackupUnsafeL2Head() eth.L2BlockRef
90 91 92 93 94

	SetUnsafeHead(eth.L2BlockRef)
	SetSafeHead(eth.L2BlockRef)
	SetFinalizedHead(eth.L2BlockRef)
	SetPendingSafeL2Head(eth.L2BlockRef)
95
	SetBackupUnsafeL2Head(block eth.L2BlockRef, triggerReorg bool)
96 97
}

98 99 100 101
// SafeHeadListener is called when the safe head is updated.
// The safe head may advance by more than one block in a single update
// The l1Block specified is the first L1 block that includes sufficient information to derive the new safe head
type SafeHeadListener interface {
102 103 104 105 106 107

	// Enabled reports if this safe head listener is actively using the posted data. This allows the engine queue to
	// optionally skip making calls that may be expensive to prepare.
	// Callbacks may still be made if Enabled returns false but are not guaranteed.
	Enabled() bool

108 109 110 111 112 113 114 115 116
	// SafeHeadUpdated indicates that the safe head has been updated in response to processing batch data
	// The l1Block specified is the first L1 block containing all required batch data to derive newSafeHead
	SafeHeadUpdated(newSafeHead eth.L2BlockRef, l1Block eth.BlockID) error

	// SafeHeadReset indicates that the derivation pipeline reset back to the specified safe head
	// The L1 block that made the new safe head safe is unknown.
	SafeHeadReset(resetSafeHead eth.L2BlockRef) error
}

117 118
// Max memory used for buffering unsafe payloads
const maxUnsafePayloadsMemory = 500 * 1024 * 1024
protolambda's avatar
protolambda committed
119

120 121 122 123 124 125 126 127 128 129 130 131 132 133 134
// finalityLookback defines the amount of L1<>L2 relations to track for finalization purposes, one per L1 block.
//
// When L1 finalizes blocks, it finalizes finalityLookback blocks behind the L1 head.
// Non-finality may take longer, but when it does finalize again, it is within this range of the L1 head.
// Thus we only need to retain the L1<>L2 derivation relation data of this many L1 blocks.
//
// In the event of older finalization signals, misconfiguration, or insufficient L1<>L2 derivation relation data,
// then we may miss the opportunity to finalize more L2 blocks.
// This does not cause any divergence, it just causes lagging finalization status.
//
// The beacon chain on mainnet has 32 slots per epoch,
// and new finalization events happen at most 4 epochs behind the head.
// And then we add 1 to make pruning easier by leaving room for a new item without pruning the 32*4.
const finalityLookback = 4*32 + 1

135 136 137 138
// finalityDelay is the number of L1 blocks to traverse before trying to finalize L2 blocks again.
// We do not want to do this too often, since it requires fetching a L1 block by number, so no cache data.
const finalityDelay = 64

139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
// calcFinalityLookback calculates the default finality lookback based on DA challenge window if plasma
// mode is activated or L1 finality lookback.
func calcFinalityLookback(cfg *rollup.Config) uint64 {
	// in plasma mode the longest finality lookback is a commitment is challenged on the last block of
	// the challenge window in which case it will be both challenge + resolve window.
	if cfg.UsePlasma {
		lkb := cfg.DAChallengeWindow + cfg.DAResolveWindow + 1
		// in the case only if the plasma windows are longer than the default finality lookback
		if lkb > finalityLookback {
			return lkb
		}
	}
	return finalityLookback
}

154 155 156 157 158 159 160 161
type FinalityData struct {
	// The last L2 block that was fully derived and inserted into the L2 engine while processing this L1 block.
	L2Block eth.L2BlockRef
	// The L1 block this stage was at when inserting the L2 block.
	// When this L1 block is finalized, the L2 chain up to this block can be fully reproduced from finalized L1 data.
	L1Block eth.BlockID
}

protolambda's avatar
protolambda committed
162 163 164 165 166
// EngineQueue queues up payload attributes to consolidate or process with the provided Engine
type EngineQueue struct {
	log log.Logger
	cfg *rollup.Config

167
	ec LocalEngineControl
168

169 170
	// finalizedL1 is the currently perceived finalized L1 block.
	// This may be ahead of the current traversed origin when syncing.
171
	finalizedL1 eth.L1BlockRef
protolambda's avatar
protolambda committed
172

173 174 175
	// triedFinalizeAt tracks at which origin we last tried to finalize during sync.
	triedFinalizeAt eth.L1BlockRef

176
	// The queued-up attributes
177
	safeAttributes *AttributesWithParent
178
	unsafePayloads *PayloadsQueue // queue of unsafe payloads, ordered by ascending block number, may have gaps and duplicates
protolambda's avatar
protolambda committed
179

180 181 182
	// Tracks which L2 blocks where last derived from which L1 block. At most finalityLookback large.
	finalityData []FinalityData

183
	engine L2Source
184 185
	prev   NextAttributesProvider

186 187
	origin eth.L1BlockRef   // updated on resets, and whenever we read from the previous stage.
	sysCfg eth.SystemConfig // only used for pipeline resets
188

189 190
	metrics   Metrics
	l1Fetcher L1Fetcher
191 192

	syncCfg *sync.Config
193 194 195

	safeHeadNotifs       SafeHeadListener // notified when safe head is updated
	lastNotifiedSafeHead eth.L2BlockRef
protolambda's avatar
protolambda committed
196 197 198
}

// NewEngineQueue creates a new EngineQueue, which should be Reset(origin) before use.
199
func NewEngineQueue(log log.Logger, cfg *rollup.Config, l2Source L2Source, engine LocalEngineControl, metrics Metrics, prev NextAttributesProvider, l1Fetcher L1Fetcher, syncCfg *sync.Config, safeHeadNotifs SafeHeadListener) *EngineQueue {
200
	return &EngineQueue{
201 202
		log:            log,
		cfg:            cfg,
203 204
		ec:             engine,
		engine:         l2Source,
205
		metrics:        metrics,
206
		finalityData:   make([]FinalityData, 0, calcFinalityLookback(cfg)),
207
		unsafePayloads: NewPayloadsQueue(log, maxUnsafePayloadsMemory, payloadMemSize),
208 209
		prev:           prev,
		l1Fetcher:      l1Fetcher,
210
		syncCfg:        syncCfg,
211
		safeHeadNotifs: safeHeadNotifs,
212
	}
protolambda's avatar
protolambda committed
213 214
}

215
// Origin identifies the L1 chain (incl.) that included and/or produced all the safe L2 blocks.
216 217
func (eq *EngineQueue) Origin() eth.L1BlockRef {
	return eq.origin
protolambda's avatar
protolambda committed
218 219
}

220 221 222 223
func (eq *EngineQueue) SystemConfig() eth.SystemConfig {
	return eq.sysCfg
}

224 225
func (eq *EngineQueue) AddUnsafePayload(envelope *eth.ExecutionPayloadEnvelope) {
	if envelope == nil {
226 227
		eq.log.Warn("cannot add nil unsafe payload")
		return
protolambda's avatar
protolambda committed
228
	}
Danyal Prout's avatar
Danyal Prout committed
229

230 231
	if err := eq.unsafePayloads.Push(envelope); err != nil {
		eq.log.Warn("Could not add unsafe payload", "id", envelope.ExecutionPayload.ID(), "timestamp", uint64(envelope.ExecutionPayload.Timestamp), "err", err)
232 233 234
		return
	}
	p := eq.unsafePayloads.Peek()
235 236
	eq.metrics.RecordUnsafePayloadsBuffer(uint64(eq.unsafePayloads.Len()), eq.unsafePayloads.MemSize(), p.ExecutionPayload.ID())
	eq.log.Trace("Next unsafe payload to process", "next", p.ExecutionPayload.ID(), "timestamp", uint64(p.ExecutionPayload.Timestamp))
protolambda's avatar
protolambda committed
237 238
}

239
func (eq *EngineQueue) Finalize(l1Origin eth.L1BlockRef) {
240
	prevFinalizedL1 := eq.finalizedL1
241
	if l1Origin.Number < eq.finalizedL1.Number {
242
		eq.log.Error("ignoring old L1 finalized block signal! Is the L1 provider corrupted?", "prev_finalized_l1", prevFinalizedL1, "signaled_finalized_l1", l1Origin)
243 244
		return
	}
245 246 247 248 249 250 251 252 253

	// remember the L1 finalization signal
	eq.finalizedL1 = l1Origin

	// Sanity check: we only try to finalize L2 immediately, without fetching additional data,
	// if we are on the same chain as the signal.
	// If we are on a different chain, the signal will be ignored,
	// and tryFinalizeL1Origin() will eventually detect that we are on the wrong chain,
	// if not resetting due to reorg elsewhere already.
254 255 256 257 258 259
	for _, fd := range eq.finalityData {
		if fd.L1Block == l1Origin.ID() {
			eq.tryFinalizeL2()
			return
		}
	}
260

261
	eq.log.Info("received L1 finality signal, but missing data for immediate L2 finalization", "prev_finalized_l1", prevFinalizedL1, "signaled_finalized_l1", l1Origin)
262 263 264 265 266 267
}

// FinalizedL1 identifies the L1 chain (incl.) that included and/or produced all the finalized L2 blocks.
// This may return a zeroed ID if no finalization signals have been seen yet.
func (eq *EngineQueue) FinalizedL1() eth.L1BlockRef {
	return eq.finalizedL1
protolambda's avatar
protolambda committed
268 269
}

270 271 272 273 274 275
// LowestQueuedUnsafeBlock returns the block
func (eq *EngineQueue) LowestQueuedUnsafeBlock() eth.L2BlockRef {
	payload := eq.unsafePayloads.Peek()
	if payload == nil {
		return eth.L2BlockRef{}
	}
276
	ref, err := PayloadToBlockRef(eq.cfg, payload.ExecutionPayload)
277 278 279 280
	if err != nil {
		return eth.L2BlockRef{}
	}
	return ref
281 282
}

283 284 285 286
func (eq *EngineQueue) BackupUnsafeL2Head() eth.L2BlockRef {
	return eq.ec.BackupUnsafeL2Head()
}

287 288
// Determine if the engine is syncing to the target block
func (eq *EngineQueue) isEngineSyncing() bool {
289
	return eq.ec.IsEngineSyncing()
290 291
}

292
func (eq *EngineQueue) Step(ctx context.Context) error {
293 294 295 296 297 298
	// If we don't need to call FCU to restore unsafeHead using backupUnsafe, keep going b/c
	// this was a no-op(except correcting invalid state when backupUnsafe is empty but TryBackupUnsafeReorg called).
	if fcuCalled, err := eq.ec.TryBackupUnsafeReorg(ctx); fcuCalled {
		// If we needed to perform a network call, then we should yield even if we did not encounter an error.
		return err
	}
299 300 301 302
	// If we don't need to call FCU, keep going b/c this was a no-op. If we needed to
	// perform a network call, then we should yield even if we did not encounter an error.
	if err := eq.ec.TryUpdateEngine(ctx); !errors.Is(err, errNoFCUNeeded) {
		return err
303
	}
304 305 306 307 308 309 310 311 312
	// Trying unsafe payload should be done before safe attributes
	// It allows the unsafe head can move forward while the long-range consolidation is in progress.
	if eq.unsafePayloads.Len() > 0 {
		if err := eq.tryNextUnsafePayload(ctx); err != io.EOF {
			return err
		}
		// EOF error means we can't process the next unsafe payload. Then we should process next safe attributes.
	}
	if eq.isEngineSyncing() {
313
		// The pipeline cannot move forwards if doing EL sync.
314
		return EngineELSyncing
315
	}
s7v7nislands's avatar
s7v7nislands committed
316
	if eq.safeAttributes != nil {
protolambda's avatar
protolambda committed
317 318
		return eq.tryNextSafeAttributes(ctx)
	}
319
	outOfData := false
320 321 322 323 324 325
	newOrigin := eq.prev.Origin()
	// Check if the L2 unsafe head origin is consistent with the new origin
	if err := eq.verifyNewL1Origin(ctx, newOrigin); err != nil {
		return err
	}
	eq.origin = newOrigin
326 327 328 329
	// make sure we track the last L2 safe head for every new L1 block
	if err := eq.postProcessSafeL2(); err != nil {
		return err
	}
330 331 332 333
	// try to finalize the L2 blocks we have synced so far (no-op if L1 finality is behind)
	if err := eq.tryFinalizePastL2Blocks(ctx); err != nil {
		return err
	}
334
	if next, err := eq.prev.NextAttributes(ctx, eq.ec.PendingSafeL2Head()); err == io.EOF {
335 336 337 338
		outOfData = true
	} else if err != nil {
		return err
	} else {
339
		eq.safeAttributes = next
340 341
		eq.log.Debug("Adding next safe attributes", "safe_head", eq.ec.SafeL2Head(),
			"pending_safe_head", eq.ec.PendingSafeL2Head(), "next", next)
342
		return NotEnoughData
343
	}
344

345 346 347 348 349
	if outOfData {
		return io.EOF
	} else {
		return nil
	}
protolambda's avatar
protolambda committed
350 351
}

352 353 354 355 356 357 358
// verifyNewL1Origin checks that the L2 unsafe head still has a L1 origin that is on the canonical chain.
// If the unsafe head origin is after the new L1 origin it is assumed to still be canonical.
// The check is only required when moving to a new L1 origin.
func (eq *EngineQueue) verifyNewL1Origin(ctx context.Context, newOrigin eth.L1BlockRef) error {
	if newOrigin == eq.origin {
		return nil
	}
359
	unsafeOrigin := eq.ec.UnsafeL2Head().L1Origin
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
	if newOrigin.Number == unsafeOrigin.Number && newOrigin.ID() != unsafeOrigin {
		return NewResetError(fmt.Errorf("l1 origin was inconsistent with l2 unsafe head origin, need reset to resolve: l1 origin: %v; unsafe origin: %v",
			newOrigin.ID(), unsafeOrigin))
	}
	// Avoid requesting an older block by checking against the parent hash
	if newOrigin.Number == unsafeOrigin.Number+1 && newOrigin.ParentHash != unsafeOrigin.Hash {
		return NewResetError(fmt.Errorf("l2 unsafe head origin is no longer canonical, need reset to resolve: canonical hash: %v; unsafe origin hash: %v",
			newOrigin.ParentHash, unsafeOrigin.Hash))
	}
	if newOrigin.Number > unsafeOrigin.Number+1 {
		// If unsafe origin is further behind new origin, check it's still on the canonical chain.
		canonical, err := eq.l1Fetcher.L1BlockRefByNumber(ctx, unsafeOrigin.Number)
		if err != nil {
			return NewTemporaryError(fmt.Errorf("failed to fetch canonical L1 block at slot: %v; err: %w", unsafeOrigin.Number, err))
		}
		if canonical.ID() != unsafeOrigin {
			eq.log.Error("Resetting due to origin mismatch")
			return NewResetError(fmt.Errorf("l2 unsafe head origin is no longer canonical, need reset to resolve: canonical: %v; unsafe origin: %v",
				canonical, unsafeOrigin))
		}
	}
	return nil
}

384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
func (eq *EngineQueue) tryFinalizePastL2Blocks(ctx context.Context) error {
	if eq.finalizedL1 == (eth.L1BlockRef{}) {
		return nil
	}

	// If the L1 is finalized beyond the point we are traversing (e.g. during sync),
	// then we should check if we can finalize this L1 block we are traversing.
	// Otherwise, nothing to act on here, we will finalize later on a new finality signal matching the recent history.
	if eq.finalizedL1.Number < eq.origin.Number {
		return nil
	}

	// If we recently tried finalizing, then don't try again just yet, but traverse more of L1 first.
	if eq.triedFinalizeAt != (eth.L1BlockRef{}) && eq.origin.Number <= eq.triedFinalizeAt.Number+finalityDelay {
		return nil
	}

	eq.log.Info("processing L1 finality information", "l1_finalized", eq.finalizedL1, "l1_origin", eq.origin, "previous", eq.triedFinalizeAt)

	// Sanity check we are indeed on the finalizing chain, and not stuck on something else.
	// We assume that the block-by-number query is consistent with the previously received finalized chain signal
	ref, err := eq.l1Fetcher.L1BlockRefByNumber(ctx, eq.origin.Number)
	if err != nil {
		return NewTemporaryError(fmt.Errorf("failed to check if on finalizing L1 chain: %w", err))
	}
	if ref.Hash != eq.origin.Hash {
		return NewResetError(fmt.Errorf("need to reset, we are on %s, not on the finalizing L1 chain %s (towards %s)", eq.origin, ref, eq.finalizedL1))
	}
	eq.tryFinalizeL2()
	return nil
}

416 417 418 419
// tryFinalizeL2 traverses the past L1 blocks, checks if any has been finalized,
// and then marks the latest fully derived L2 block from this as finalized,
// or defaults to the current finalized L2 block.
func (eq *EngineQueue) tryFinalizeL2() {
420
	if eq.finalizedL1 == (eth.L1BlockRef{}) {
421 422
		return // if no L1 information is finalized yet, then skip this
	}
423
	eq.triedFinalizeAt = eq.origin
424
	// default to keep the same finalized block
425
	finalizedL2 := eq.ec.Finalized()
426 427 428 429 430 431
	// go through the latest inclusion data, and find the last L2 block that was derived from a finalized L1 block
	for _, fd := range eq.finalityData {
		if fd.L2Block.Number > finalizedL2.Number && fd.L1Block.Number <= eq.finalizedL1.Number {
			finalizedL2 = fd.L2Block
		}
	}
432
	eq.ec.SetFinalizedHead(finalizedL2)
433 434 435 436
}

// postProcessSafeL2 buffers the L1 block the safe head was fully derived from,
// to finalize it once the L1 block, or later, finalizes.
437 438 439 440
func (eq *EngineQueue) postProcessSafeL2() error {
	if err := eq.notifyNewSafeHead(eq.ec.SafeL2Head()); err != nil {
		return err
	}
441
	// prune finality data if necessary
442 443
	if uint64(len(eq.finalityData)) >= calcFinalityLookback(eq.cfg) {
		eq.finalityData = append(eq.finalityData[:0], eq.finalityData[1:calcFinalityLookback(eq.cfg)]...)
444 445
	}
	// remember the last L2 block that we fully derived from the given finality data
446
	if len(eq.finalityData) == 0 || eq.finalityData[len(eq.finalityData)-1].L1Block.Number < eq.origin.Number {
447 448
		// append entry for new L1 block
		eq.finalityData = append(eq.finalityData, FinalityData{
449
			L2Block: eq.ec.SafeL2Head(),
450
			L1Block: eq.origin.ID(),
451
		})
452 453
		last := &eq.finalityData[len(eq.finalityData)-1]
		eq.log.Debug("extended finality-data", "last_l1", last.L1Block, "last_l2", last.L2Block)
454
	} else {
455 456
		// if it's a new L2 block that was derived from the same latest L1 block, then just update the entry
		last := &eq.finalityData[len(eq.finalityData)-1]
457 458
		if last.L2Block != eq.ec.SafeL2Head() { // avoid logging if there are no changes
			last.L2Block = eq.ec.SafeL2Head()
459 460
			eq.log.Debug("updated finality-data", "last_l1", last.L1Block, "last_l2", last.L2Block)
		}
461
	}
462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478
	return nil
}

// notifyNewSafeHead calls the safe head listener with the current safe head and l1 origin information.
func (eq *EngineQueue) notifyNewSafeHead(safeHead eth.L2BlockRef) error {
	if eq.lastNotifiedSafeHead == safeHead {
		// No change, no need to notify
		return nil
	}
	if err := eq.safeHeadNotifs.SafeHeadUpdated(safeHead, eq.origin.ID()); err != nil {
		// At this point our state is in a potentially inconsistent state as we've updated the safe head
		// in the execution client but failed to post process it. Reset the pipeline so the safe head rolls back
		// a little (it always rolls back at least 1 block) and then it will retry storing the entry
		return NewResetError(fmt.Errorf("safe head notifications failed: %w", err))
	}
	eq.lastNotifiedSafeHead = safeHead
	return nil
479
}
protolambda's avatar
protolambda committed
480

481 482 483
func (eq *EngineQueue) logSyncProgress(reason string) {
	eq.log.Info("Sync progress",
		"reason", reason,
484 485
		"l2_finalized", eq.ec.Finalized(),
		"l2_safe", eq.ec.SafeL2Head(),
486
		"l2_pending_safe", eq.ec.PendingSafeL2Head(),
487
		"l2_unsafe", eq.ec.UnsafeL2Head(),
488
		"l2_backup_unsafe", eq.ec.BackupUnsafeL2Head(),
489
		"l2_time", eq.ec.UnsafeL2Head().Time,
490
		"l1_derived", eq.origin,
491 492 493
	)
}

protolambda's avatar
protolambda committed
494
func (eq *EngineQueue) tryNextUnsafePayload(ctx context.Context) error {
495 496
	firstEnvelope := eq.unsafePayloads.Peek()
	first := firstEnvelope.ExecutionPayload
protolambda's avatar
protolambda committed
497

498 499
	if uint64(first.BlockNumber) <= eq.ec.SafeL2Head().Number {
		eq.log.Info("skipping unsafe payload, since it is older than safe head", "safe", eq.ec.SafeL2Head().ID(), "unsafe", first.ID(), "payload", first.ID())
500
		eq.unsafePayloads.Pop()
protolambda's avatar
protolambda committed
501 502
		return nil
	}
503 504
	if uint64(first.BlockNumber) <= eq.ec.UnsafeL2Head().Number {
		eq.log.Info("skipping unsafe payload, since it is older than unsafe head", "unsafe", eq.ec.UnsafeL2Head().ID(), "unsafe_payload", first.ID())
505 506 507
		eq.unsafePayloads.Pop()
		return nil
	}
protolambda's avatar
protolambda committed
508

509
	// Ensure that the unsafe payload builds upon the current unsafe head
510
	if first.ParentHash != eq.ec.UnsafeL2Head().Hash {
511 512
		if uint64(first.BlockNumber) == eq.ec.UnsafeL2Head().Number+1 {
			eq.log.Info("skipping unsafe payload, since it does not build onto the existing unsafe chain", "safe", eq.ec.SafeL2Head().ID(), "unsafe", first.ID(), "payload", first.ID())
513 514 515
			eq.unsafePayloads.Pop()
		}
		return io.EOF // time to go to next stage if we cannot process the first unsafe payload
protolambda's avatar
protolambda committed
516 517
	}

518
	ref, err := PayloadToBlockRef(eq.cfg, first)
protolambda's avatar
protolambda committed
519 520
	if err != nil {
		eq.log.Error("failed to decode L2 block ref from payload", "err", err)
521
		eq.unsafePayloads.Pop()
protolambda's avatar
protolambda committed
522 523 524
		return nil
	}

525
	if err := eq.ec.InsertUnsafePayload(ctx, firstEnvelope, ref); errors.Is(err, ErrTemporary) {
526 527 528 529
		eq.log.Debug("Temporary error while inserting unsafe payload", "hash", ref.Hash, "number", ref.Number, "timestamp", ref.Time, "l1Origin", ref.L1Origin)
		return err
	} else if err != nil {
		eq.log.Warn("Dropping invalid unsafe payload", "hash", ref.Hash, "number", ref.Number, "timestamp", ref.Time, "l1Origin", ref.L1Origin)
530
		eq.unsafePayloads.Pop()
531
		return err
532
	}
533
	eq.unsafePayloads.Pop()
534
	eq.log.Trace("Executed unsafe payload", "hash", ref.Hash, "number", ref.Number, "timestamp", ref.Time, "l1Origin", ref.L1Origin)
535
	eq.logSyncProgress("unsafe payload from sequencer")
536

protolambda's avatar
protolambda committed
537 538 539 540
	return nil
}

func (eq *EngineQueue) tryNextSafeAttributes(ctx context.Context) error {
protolambda's avatar
protolambda committed
541
	if eq.safeAttributes == nil { // sanity check the attributes are there
542 543
		return nil
	}
544
	// validate the safe attributes before processing them. The engine may have completed processing them through other means.
545
	if eq.ec.PendingSafeL2Head() != eq.safeAttributes.parent {
546
		// Previously the attribute's parent was the pending safe head. If the pending safe head advances so pending safe head's parent is the same as the
547
		// attribute's parent then we need to cancel the attributes.
548
		if eq.ec.PendingSafeL2Head().ParentHash == eq.safeAttributes.parent.Hash {
549
			eq.log.Warn("queued safe attributes are stale, safehead progressed",
550
				"pending_safe_head", eq.ec.PendingSafeL2Head(), "pending_safe_head_parent", eq.ec.PendingSafeL2Head().ParentID(),
551
				"attributes_parent", eq.safeAttributes.parent)
552 553
			eq.safeAttributes = nil
			return nil
554
		}
555
		// If something other than a simple advance occurred, perform a full reset
556
		return NewResetError(fmt.Errorf("pending safe head changed to %s with parent %s, conflicting with queued safe attributes on top of %s",
557
			eq.ec.PendingSafeL2Head(), eq.ec.PendingSafeL2Head().ParentID(), eq.safeAttributes.parent))
558

559
	}
560
	if eq.ec.PendingSafeL2Head().Number < eq.ec.UnsafeL2Head().Number {
protolambda's avatar
protolambda committed
561
		return eq.consolidateNextSafeAttributes(ctx)
562
	} else if eq.ec.PendingSafeL2Head().Number == eq.ec.UnsafeL2Head().Number {
protolambda's avatar
protolambda committed
563 564
		return eq.forceNextSafeAttributes(ctx)
	} else {
565
		// For some reason the unsafe head is behind the pending safe head. Log it, and correct it.
566 567
		eq.log.Error("invalid sync state, unsafe head is behind pending safe head", "unsafe", eq.ec.UnsafeL2Head(), "pending_safe", eq.ec.PendingSafeL2Head())
		eq.ec.SetUnsafeHead(eq.ec.PendingSafeL2Head())
protolambda's avatar
protolambda committed
568 569 570 571 572 573 574 575 576 577 578
		return nil
	}
}

// consolidateNextSafeAttributes tries to match the next safe attributes against the existing unsafe chain,
// to avoid extra processing or unnecessary unwinding of the chain.
// However, if the attributes do not match, they will be forced with forceNextSafeAttributes.
func (eq *EngineQueue) consolidateNextSafeAttributes(ctx context.Context) error {
	ctx, cancel := context.WithTimeout(ctx, time.Second*10)
	defer cancel()

579
	envelope, err := eq.engine.PayloadByNumber(ctx, eq.ec.PendingSafeL2Head().Number+1)
protolambda's avatar
protolambda committed
580
	if err != nil {
581 582 583 584
		if errors.Is(err, ethereum.NotFound) {
			// engine may have restarted, or inconsistent safe head. We need to reset
			return NewResetError(fmt.Errorf("expected engine was synced and had unsafe block to reconcile, but cannot find the block: %w", err))
		}
585
		return NewTemporaryError(fmt.Errorf("failed to get existing unsafe payload to compare against derived attributes from L1: %w", err))
protolambda's avatar
protolambda committed
586
	}
587
	if err := AttributesMatchBlock(eq.cfg, eq.safeAttributes.attributes, eq.ec.PendingSafeL2Head().Hash, envelope, eq.log); err != nil {
588
		eq.log.Warn("L2 reorg: existing unsafe block does not match derived attributes from L1", "err", err, "unsafe", eq.ec.UnsafeL2Head(), "pending_safe", eq.ec.PendingSafeL2Head(), "safe", eq.ec.SafeL2Head())
protolambda's avatar
protolambda committed
589 590 591
		// geth cannot wind back a chain without reorging to a new, previously non-canonical, block
		return eq.forceNextSafeAttributes(ctx)
	}
592
	ref, err := PayloadToBlockRef(eq.cfg, envelope.ExecutionPayload)
protolambda's avatar
protolambda committed
593
	if err != nil {
594
		return NewResetError(fmt.Errorf("failed to decode L2 block ref from payload: %w", err))
protolambda's avatar
protolambda committed
595
	}
596
	eq.ec.SetPendingSafeL2Head(ref)
597
	if eq.safeAttributes.isLastInSpan {
598
		eq.ec.SetSafeHead(ref)
599 600 601
		if err := eq.postProcessSafeL2(); err != nil {
			return err
		}
602
	}
protolambda's avatar
protolambda committed
603
	// unsafe head stays the same, we did not reorg the chain.
s7v7nislands's avatar
s7v7nislands committed
604
	eq.safeAttributes = nil
605
	eq.logSyncProgress("reconciled with L1")
606

protolambda's avatar
protolambda committed
607 608 609 610 611
	return nil
}

// forceNextSafeAttributes inserts the provided attributes, reorging away any conflicting unsafe chain.
func (eq *EngineQueue) forceNextSafeAttributes(ctx context.Context) error {
s7v7nislands's avatar
s7v7nislands committed
612
	if eq.safeAttributes == nil {
protolambda's avatar
protolambda committed
613 614
		return nil
	}
615
	attrs := eq.safeAttributes.attributes
616
	lastInSpan := eq.safeAttributes.isLastInSpan
617
	errType, err := eq.StartPayload(ctx, eq.ec.PendingSafeL2Head(), eq.safeAttributes, true)
618
	if err == nil {
619
		_, errType, err = eq.ec.ConfirmPayload(ctx, async.NoOpGossiper{}, &conductor.NoOpConductor{})
620
	}
621 622 623 624 625 626
	if err != nil {
		switch errType {
		case BlockInsertTemporaryErr:
			// RPC errors are recoverable, we can retry the buffered payload attributes later.
			return NewTemporaryError(fmt.Errorf("temporarily cannot insert new safe block: %w", err))
		case BlockInsertPrestateErr:
627
			_ = eq.CancelPayload(ctx, true)
628 629
			return NewResetError(fmt.Errorf("need reset to resolve pre-state problem: %w", err))
		case BlockInsertPayloadErr:
630
			_ = eq.CancelPayload(ctx, true)
631 632 633
			eq.log.Warn("could not process payload derived from L1 data, dropping batch", "err", err)
			// Count the number of deposits to see if the tx list is deposit only.
			depositCount := 0
634 635
			for _, tx := range attrs.Transactions {
				if len(tx) > 0 && tx[0] == types.DepositTxType {
636
					depositCount += 1
637
				}
638
			}
639 640 641
			// Deposit transaction execution errors are suppressed in the execution engine, but if the
			// block is somehow invalid, there is nothing we can do to recover & we should exit.
			// TODO: Can this be triggered by an empty batch with invalid data (like parent hash or gas limit?)
642
			if len(attrs.Transactions) == depositCount {
643
				eq.log.Error("deposit only block was invalid", "parent", eq.safeAttributes.parent, "err", err)
644
				return NewCriticalError(fmt.Errorf("failed to process block with only deposit transactions: %w", err))
645
			}
646
			// drop the payload without inserting it
s7v7nislands's avatar
s7v7nislands committed
647
			eq.safeAttributes = nil
648
			// Revert the pending safe head to the safe head.
649
			eq.ec.SetPendingSafeL2Head(eq.ec.SafeL2Head())
650
			// suppress the error b/c we want to retry with the next batch from the batch queue
651 652
			// If there is no valid batch the node will eventually force a deposit only block. If
			// the deposit only block fails, this will return the critical error above.
653

654 655 656 657
			// Try to restore to previous known unsafe chain.
			eq.ec.SetBackupUnsafeL2Head(eq.ec.BackupUnsafeL2Head(), true)

			return nil
658 659
		default:
			return NewCriticalError(fmt.Errorf("unknown InsertHeadBlock error type %d: %w", errType, err))
660
		}
protolambda's avatar
protolambda committed
661
	}
s7v7nislands's avatar
s7v7nislands committed
662
	eq.safeAttributes = nil
663
	eq.logSyncProgress("processed safe block derived from L1")
664
	if lastInSpan {
665 666 667
		if err := eq.postProcessSafeL2(); err != nil {
			return err
		}
668
	}
669 670 671 672

	return nil
}

673 674
func (eq *EngineQueue) StartPayload(ctx context.Context, parent eth.L2BlockRef, attrs *AttributesWithParent, updateSafe bool) (errType BlockInsertionErrType, err error) {
	return eq.ec.StartPayload(ctx, parent, attrs, updateSafe)
675 676
}

677 678
func (eq *EngineQueue) ConfirmPayload(ctx context.Context, agossip async.AsyncGossiper, sequencerConductor conductor.SequencerConductor) (out *eth.ExecutionPayloadEnvelope, errTyp BlockInsertionErrType, err error) {
	return eq.ec.ConfirmPayload(ctx, agossip, sequencerConductor)
679 680 681
}

func (eq *EngineQueue) CancelPayload(ctx context.Context, force bool) error {
682
	return eq.ec.CancelPayload(ctx, force)
protolambda's avatar
protolambda committed
683 684
}

685
func (eq *EngineQueue) BuildingPayload() (onto eth.L2BlockRef, id eth.PayloadID, safe bool) {
686
	return eq.ec.BuildingPayload()
687 688
}

pengin7384's avatar
pengin7384 committed
689
// Reset walks the L2 chain backwards until it finds an L2 block whose L1 origin is canonical.
protolambda's avatar
protolambda committed
690
// The unsafe head is set to the head of the L2 chain, unless the existing safe head is not canonical.
691
func (eq *EngineQueue) Reset(ctx context.Context, _ eth.L1BlockRef, _ eth.SystemConfig) error {
692
	result, err := sync.FindL2Heads(ctx, eq.cfg, eq.l1Fetcher, eq.engine, eq.log, eq.syncCfg)
693
	if err != nil {
694
		return NewTemporaryError(fmt.Errorf("failed to find the L2 Heads to start from: %w", err))
protolambda's avatar
protolambda committed
695
	}
protolambda's avatar
protolambda committed
696
	finalized, safe, unsafe := result.Finalized, result.Safe, result.Unsafe
697
	l1Origin, err := eq.l1Fetcher.L1BlockRefByHash(ctx, safe.L1Origin.Hash)
protolambda's avatar
protolambda committed
698
	if err != nil {
699
		return NewTemporaryError(fmt.Errorf("failed to fetch the new L1 progress: origin: %v; err: %w", safe.L1Origin, err))
protolambda's avatar
protolambda committed
700
	}
701
	if safe.Time < l1Origin.Time {
702 703
		return NewResetError(fmt.Errorf("cannot reset block derivation to start at L2 block %s with time %d older than its L1 origin %s with time %d, time invariant is broken",
			safe, safe.Time, l1Origin, l1Origin.Time))
704
	}
705

706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724
	// Walk back L2 chain to find the L1 origin that is old enough to start buffering channel data from.
	pipelineL2 := safe
	for {
		afterL2Genesis := pipelineL2.Number > eq.cfg.Genesis.L2.Number
		afterL1Genesis := pipelineL2.L1Origin.Number > eq.cfg.Genesis.L1.Number
		afterChannelTimeout := pipelineL2.L1Origin.Number+eq.cfg.ChannelTimeout > l1Origin.Number
		if afterL2Genesis && afterL1Genesis && afterChannelTimeout {
			parent, err := eq.engine.L2BlockRefByHash(ctx, pipelineL2.ParentHash)
			if err != nil {
				return NewResetError(fmt.Errorf("failed to fetch L2 parent block %s", pipelineL2.ParentID()))
			}
			pipelineL2 = parent
		} else {
			break
		}
	}
	pipelineOrigin, err := eq.l1Fetcher.L1BlockRefByHash(ctx, pipelineL2.L1Origin.Hash)
	if err != nil {
		return NewTemporaryError(fmt.Errorf("failed to fetch the new L1 progress: origin: %s; err: %w", pipelineL2.L1Origin, err))
725
	}
726
	l1Cfg, err := eq.engine.SystemConfigByL2Hash(ctx, pipelineL2.Hash)
727
	if err != nil {
Joshua Gutow's avatar
Joshua Gutow committed
728
		return NewTemporaryError(fmt.Errorf("failed to fetch L1 config of L2 block %s: %w", pipelineL2.ID(), err))
729
	}
730
	eq.log.Debug("Reset engine queue", "safeHead", safe, "unsafe", unsafe, "safe_timestamp", safe.Time, "unsafe_timestamp", unsafe.Time, "l1Origin", l1Origin)
731 732 733 734
	eq.ec.SetUnsafeHead(unsafe)
	eq.ec.SetSafeHead(safe)
	eq.ec.SetPendingSafeL2Head(safe)
	eq.ec.SetFinalizedHead(finalized)
735
	eq.ec.SetBackupUnsafeL2Head(eth.L2BlockRef{}, false)
Joshua Gutow's avatar
Joshua Gutow committed
736
	eq.safeAttributes = nil
737
	eq.ec.ResetBuildingState()
738
	eq.finalityData = eq.finalityData[:0]
739
	// note: finalizedL1 and triedFinalizeAt do not reset, since these do not change between reorgs.
740
	// note: we do not clear the unsafe payloads queue; if the payloads are not applicable anymore the parent hash checks will clear out the old payloads.
741
	eq.origin = pipelineOrigin
742
	eq.sysCfg = l1Cfg
743 744 745 746
	eq.lastNotifiedSafeHead = safe
	if err := eq.safeHeadNotifs.SafeHeadReset(safe); err != nil {
		return err
	}
747
	if eq.safeHeadNotifs.Enabled() && safe.Number == eq.cfg.Genesis.L2.Number && safe.Hash == eq.cfg.Genesis.L2.Hash {
748 749 750 751 752 753 754 755 756 757 758 759 760
		// The rollup genesis block is always safe by definition. So if the pipeline resets this far back we know
		// we will process all safe head updates and can record genesis as always safe from L1 genesis.
		// Note that it is not safe to use cfg.Genesis.L1 here as it is the block immediately before the L2 genesis
		// but the contracts may have been deployed earlier than that, allowing creating a dispute game
		// with a L1 head prior to cfg.Genesis.L1
		l1Genesis, err := eq.l1Fetcher.L1BlockRefByNumber(ctx, 0)
		if err != nil {
			return fmt.Errorf("failed to retrieve L1 genesis: %w", err)
		}
		if err := eq.safeHeadNotifs.SafeHeadUpdated(safe, l1Genesis.ID()); err != nil {
			return err
		}
	}
761
	eq.logSyncProgress("reset derivation work")
762
	return io.EOF
protolambda's avatar
protolambda committed
763
}
clabby's avatar
clabby committed
764

765 766
// UnsafeL2SyncTarget retrieves the first queued-up L2 unsafe payload, or a zeroed reference if there is none.
func (eq *EngineQueue) UnsafeL2SyncTarget() eth.L2BlockRef {
clabby's avatar
clabby committed
767
	if first := eq.unsafePayloads.Peek(); first != nil {
768
		ref, err := PayloadToBlockRef(eq.cfg, first.ExecutionPayload)
769 770 771 772
		if err != nil {
			return eth.L2BlockRef{}
		}
		return ref
clabby's avatar
clabby committed
773
	} else {
774
		return eth.L2BlockRef{}
clabby's avatar
clabby committed
775 776
	}
}