engine_queue.go 33.8 KB
Newer Older
protolambda's avatar
protolambda committed
1 2 3 4
package derive

import (
	"context"
5
	"errors"
protolambda's avatar
protolambda committed
6 7 8 9
	"fmt"
	"io"
	"time"

10
	"github.com/ethereum/go-ethereum"
protolambda's avatar
protolambda committed
11
	"github.com/ethereum/go-ethereum/common"
12
	"github.com/ethereum/go-ethereum/core/types"
protolambda's avatar
protolambda committed
13
	"github.com/ethereum/go-ethereum/log"
14 15 16

	"github.com/ethereum-optimism/optimism/op-node/rollup"
	"github.com/ethereum-optimism/optimism/op-node/rollup/sync"
17
	"github.com/ethereum-optimism/optimism/op-service/eth"
protolambda's avatar
protolambda committed
18 19
)

20 21 22 23 24
type attributesWithParent struct {
	attributes *eth.PayloadAttributes
	parent     eth.L2BlockRef
}

25 26 27 28 29
type NextAttributesProvider interface {
	Origin() eth.L1BlockRef
	NextAttributes(context.Context, eth.L2BlockRef) (*eth.PayloadAttributes, error)
}

protolambda's avatar
protolambda committed
30 31 32 33 34 35
type Engine interface {
	GetPayload(ctx context.Context, payloadId eth.PayloadID) (*eth.ExecutionPayload, error)
	ForkchoiceUpdate(ctx context.Context, state *eth.ForkchoiceState, attr *eth.PayloadAttributes) (*eth.ForkchoiceUpdatedResult, error)
	NewPayload(ctx context.Context, payload *eth.ExecutionPayload) (*eth.PayloadStatusV1, error)
	PayloadByHash(context.Context, common.Hash) (*eth.ExecutionPayload, error)
	PayloadByNumber(context.Context, uint64) (*eth.ExecutionPayload, error)
36
	L2BlockRefByLabel(ctx context.Context, label eth.BlockLabel) (eth.L2BlockRef, error)
protolambda's avatar
protolambda committed
37
	L2BlockRefByHash(ctx context.Context, l2Hash common.Hash) (eth.L2BlockRef, error)
38
	SystemConfigL2Fetcher
protolambda's avatar
protolambda committed
39 40
}

41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
// EngineState provides a read-only interface of the forkchoice state properties of the L2 Engine.
type EngineState interface {
	Finalized() eth.L2BlockRef
	UnsafeL2Head() eth.L2BlockRef
	SafeL2Head() eth.L2BlockRef
}

// EngineControl enables other components to build blocks with the Engine,
// while keeping the forkchoice state and payload-id management internal to
// avoid state inconsistencies between different users of the EngineControl.
type EngineControl interface {
	EngineState

	// StartPayload requests the engine to start building a block with the given attributes.
	// If updateSafe, the resulting block will be marked as a safe block.
	StartPayload(ctx context.Context, parent eth.L2BlockRef, attrs *eth.PayloadAttributes, updateSafe bool) (errType BlockInsertionErrType, err error)
	// ConfirmPayload requests the engine to complete the current block. If no block is being built, or if it fails, an error is returned.
	ConfirmPayload(ctx context.Context) (out *eth.ExecutionPayload, errTyp BlockInsertionErrType, err error)
	// CancelPayload requests the engine to stop building the current block without making it canonical.
	// This is optional, as the engine expires building jobs that are left uncompleted, but can still save resources.
	CancelPayload(ctx context.Context, force bool) error
	// BuildingPayload indicates if a payload is being built, and onto which block it is being built, and whether or not it is a safe payload.
	BuildingPayload() (onto eth.L2BlockRef, id eth.PayloadID, safe bool)
}

66 67
// Max memory used for buffering unsafe payloads
const maxUnsafePayloadsMemory = 500 * 1024 * 1024
protolambda's avatar
protolambda committed
68

69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
// finalityLookback defines the amount of L1<>L2 relations to track for finalization purposes, one per L1 block.
//
// When L1 finalizes blocks, it finalizes finalityLookback blocks behind the L1 head.
// Non-finality may take longer, but when it does finalize again, it is within this range of the L1 head.
// Thus we only need to retain the L1<>L2 derivation relation data of this many L1 blocks.
//
// In the event of older finalization signals, misconfiguration, or insufficient L1<>L2 derivation relation data,
// then we may miss the opportunity to finalize more L2 blocks.
// This does not cause any divergence, it just causes lagging finalization status.
//
// The beacon chain on mainnet has 32 slots per epoch,
// and new finalization events happen at most 4 epochs behind the head.
// And then we add 1 to make pruning easier by leaving room for a new item without pruning the 32*4.
const finalityLookback = 4*32 + 1

84 85 86 87
// finalityDelay is the number of L1 blocks to traverse before trying to finalize L2 blocks again.
// We do not want to do this too often, since it requires fetching a L1 block by number, so no cache data.
const finalityDelay = 64

88 89 90 91 92 93 94 95
type FinalityData struct {
	// The last L2 block that was fully derived and inserted into the L2 engine while processing this L1 block.
	L2Block eth.L2BlockRef
	// The L1 block this stage was at when inserting the L2 block.
	// When this L1 block is finalized, the L2 chain up to this block can be fully reproduced from finalized L1 data.
	L1Block eth.BlockID
}

protolambda's avatar
protolambda committed
96 97 98 99 100 101 102 103 104
// EngineQueue queues up payload attributes to consolidate or process with the provided Engine
type EngineQueue struct {
	log log.Logger
	cfg *rollup.Config

	finalized  eth.L2BlockRef
	safeHead   eth.L2BlockRef
	unsafeHead eth.L2BlockRef

105 106 107 108
	// Target L2 block the engine is currently syncing to.
	// If the engine p2p sync is enabled, it can be different with unsafeHead. Otherwise, it must be same with unsafeHead.
	engineSyncTarget eth.L2BlockRef

109 110 111 112
	buildingOnto eth.L2BlockRef
	buildingID   eth.PayloadID
	buildingSafe bool

113 114 115 116 117
	// Track when the rollup node changes the forkchoice without engine action,
	// e.g. on a reset after a reorg, or after consolidating a block.
	// This update may repeat if the engine returns a temporary error.
	needForkchoiceUpdate bool

118 119
	// finalizedL1 is the currently perceived finalized L1 block.
	// This may be ahead of the current traversed origin when syncing.
120
	finalizedL1 eth.L1BlockRef
protolambda's avatar
protolambda committed
121

122 123 124
	// triedFinalizeAt tracks at which origin we last tried to finalize during sync.
	triedFinalizeAt eth.L1BlockRef

125
	// The queued-up attributes
126 127
	safeAttributes *attributesWithParent
	unsafePayloads *PayloadsQueue // queue of unsafe payloads, ordered by ascending block number, may have gaps and duplicates
protolambda's avatar
protolambda committed
128

129 130 131
	// Tracks which L2 blocks where last derived from which L1 block. At most finalityLookback large.
	finalityData []FinalityData

protolambda's avatar
protolambda committed
132
	engine Engine
133 134
	prev   NextAttributesProvider

135 136
	origin eth.L1BlockRef   // updated on resets, and whenever we read from the previous stage.
	sysCfg eth.SystemConfig // only used for pipeline resets
137

138 139
	metrics   Metrics
	l1Fetcher L1Fetcher
140 141

	syncCfg *sync.Config
protolambda's avatar
protolambda committed
142 143
}

144 145
var _ EngineControl = (*EngineQueue)(nil)

protolambda's avatar
protolambda committed
146
// NewEngineQueue creates a new EngineQueue, which should be Reset(origin) before use.
147
func NewEngineQueue(log log.Logger, cfg *rollup.Config, engine Engine, metrics Metrics, prev NextAttributesProvider, l1Fetcher L1Fetcher, syncCfg *sync.Config) *EngineQueue {
148
	return &EngineQueue{
149 150 151 152 153 154 155 156
		log:            log,
		cfg:            cfg,
		engine:         engine,
		metrics:        metrics,
		finalityData:   make([]FinalityData, 0, finalityLookback),
		unsafePayloads: NewPayloadsQueue(maxUnsafePayloadsMemory, payloadMemSize),
		prev:           prev,
		l1Fetcher:      l1Fetcher,
157
		syncCfg:        syncCfg,
158
	}
protolambda's avatar
protolambda committed
159 160
}

161
// Origin identifies the L1 chain (incl.) that included and/or produced all the safe L2 blocks.
162 163
func (eq *EngineQueue) Origin() eth.L1BlockRef {
	return eq.origin
protolambda's avatar
protolambda committed
164 165
}

166 167 168 169
func (eq *EngineQueue) SystemConfig() eth.SystemConfig {
	return eq.sysCfg
}

protolambda's avatar
protolambda committed
170 171
func (eq *EngineQueue) SetUnsafeHead(head eth.L2BlockRef) {
	eq.unsafeHead = head
172
	eq.metrics.RecordL2Ref("l2_unsafe", head)
protolambda's avatar
protolambda committed
173 174
}

175 176 177 178 179
func (eq *EngineQueue) SetEngineSyncTarget(head eth.L2BlockRef) {
	eq.engineSyncTarget = head
	eq.metrics.RecordL2Ref("l2_engineSyncTarget", head)
}

protolambda's avatar
protolambda committed
180
func (eq *EngineQueue) AddUnsafePayload(payload *eth.ExecutionPayload) {
181 182 183
	if payload == nil {
		eq.log.Warn("cannot add nil unsafe payload")
		return
protolambda's avatar
protolambda committed
184
	}
185 186 187 188 189 190 191
	if err := eq.unsafePayloads.Push(payload); err != nil {
		eq.log.Warn("Could not add unsafe payload", "id", payload.ID(), "timestamp", uint64(payload.Timestamp), "err", err)
		return
	}
	p := eq.unsafePayloads.Peek()
	eq.metrics.RecordUnsafePayloadsBuffer(uint64(eq.unsafePayloads.Len()), eq.unsafePayloads.MemSize(), p.ID())
	eq.log.Trace("Next unsafe payload to process", "next", p.ID(), "timestamp", uint64(p.Timestamp))
protolambda's avatar
protolambda committed
192 193
}

194 195 196 197 198
func (eq *EngineQueue) Finalize(l1Origin eth.L1BlockRef) {
	if l1Origin.Number < eq.finalizedL1.Number {
		eq.log.Error("ignoring old L1 finalized block signal! Is the L1 provider corrupted?", "prev_finalized_l1", eq.finalizedL1, "signaled_finalized_l1", l1Origin)
		return
	}
199 200 201 202 203 204 205 206 207

	// remember the L1 finalization signal
	eq.finalizedL1 = l1Origin

	// Sanity check: we only try to finalize L2 immediately, without fetching additional data,
	// if we are on the same chain as the signal.
	// If we are on a different chain, the signal will be ignored,
	// and tryFinalizeL1Origin() will eventually detect that we are on the wrong chain,
	// if not resetting due to reorg elsewhere already.
208 209 210 211 212 213
	for _, fd := range eq.finalityData {
		if fd.L1Block == l1Origin.ID() {
			eq.tryFinalizeL2()
			return
		}
	}
214 215

	eq.log.Info("received L1 finality signal, but missing data for immediate L2 finalization", "prev_finalized_l1", eq.finalizedL1, "signaled_finalized_l1", l1Origin)
216 217 218 219 220 221
}

// FinalizedL1 identifies the L1 chain (incl.) that included and/or produced all the finalized L2 blocks.
// This may return a zeroed ID if no finalization signals have been seen yet.
func (eq *EngineQueue) FinalizedL1() eth.L1BlockRef {
	return eq.finalizedL1
protolambda's avatar
protolambda committed
222 223 224 225 226 227 228 229 230 231 232 233 234 235
}

func (eq *EngineQueue) Finalized() eth.L2BlockRef {
	return eq.finalized
}

func (eq *EngineQueue) UnsafeL2Head() eth.L2BlockRef {
	return eq.unsafeHead
}

func (eq *EngineQueue) SafeL2Head() eth.L2BlockRef {
	return eq.safeHead
}

236 237 238 239 240 241 242 243 244
func (eq *EngineQueue) EngineSyncTarget() eth.L2BlockRef {
	return eq.engineSyncTarget
}

// Determine if the engine is syncing to the target block
func (eq *EngineQueue) isEngineSyncing() bool {
	return eq.unsafeHead.Hash != eq.engineSyncTarget.Hash
}

245
func (eq *EngineQueue) Step(ctx context.Context) error {
246 247 248
	if eq.needForkchoiceUpdate {
		return eq.tryUpdateEngine(ctx)
	}
249 250 251 252 253 254 255 256 257 258 259 260
	// Trying unsafe payload should be done before safe attributes
	// It allows the unsafe head can move forward while the long-range consolidation is in progress.
	if eq.unsafePayloads.Len() > 0 {
		if err := eq.tryNextUnsafePayload(ctx); err != io.EOF {
			return err
		}
		// EOF error means we can't process the next unsafe payload. Then we should process next safe attributes.
	}
	if eq.isEngineSyncing() {
		// Make pipeline first focus to sync unsafe blocks to engineSyncTarget
		return EngineP2PSyncing
	}
s7v7nislands's avatar
s7v7nislands committed
261
	if eq.safeAttributes != nil {
protolambda's avatar
protolambda committed
262 263
		return eq.tryNextSafeAttributes(ctx)
	}
264
	outOfData := false
265 266 267 268 269 270 271
	newOrigin := eq.prev.Origin()
	// Check if the L2 unsafe head origin is consistent with the new origin
	if err := eq.verifyNewL1Origin(ctx, newOrigin); err != nil {
		return err
	}
	eq.origin = newOrigin
	eq.postProcessSafeL2() // make sure we track the last L2 safe head for every new L1 block
272 273 274 275
	// try to finalize the L2 blocks we have synced so far (no-op if L1 finality is behind)
	if err := eq.tryFinalizePastL2Blocks(ctx); err != nil {
		return err
	}
276 277 278 279 280
	if next, err := eq.prev.NextAttributes(ctx, eq.safeHead); err == io.EOF {
		outOfData = true
	} else if err != nil {
		return err
	} else {
281 282 283 284 285
		eq.safeAttributes = &attributesWithParent{
			attributes: next,
			parent:     eq.safeHead,
		}
		eq.log.Debug("Adding next safe attributes", "safe_head", eq.safeHead, "next", next)
286
		return NotEnoughData
287
	}
288

289 290 291 292 293
	if outOfData {
		return io.EOF
	} else {
		return nil
	}
protolambda's avatar
protolambda committed
294 295
}

296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327
// verifyNewL1Origin checks that the L2 unsafe head still has a L1 origin that is on the canonical chain.
// If the unsafe head origin is after the new L1 origin it is assumed to still be canonical.
// The check is only required when moving to a new L1 origin.
func (eq *EngineQueue) verifyNewL1Origin(ctx context.Context, newOrigin eth.L1BlockRef) error {
	if newOrigin == eq.origin {
		return nil
	}
	unsafeOrigin := eq.unsafeHead.L1Origin
	if newOrigin.Number == unsafeOrigin.Number && newOrigin.ID() != unsafeOrigin {
		return NewResetError(fmt.Errorf("l1 origin was inconsistent with l2 unsafe head origin, need reset to resolve: l1 origin: %v; unsafe origin: %v",
			newOrigin.ID(), unsafeOrigin))
	}
	// Avoid requesting an older block by checking against the parent hash
	if newOrigin.Number == unsafeOrigin.Number+1 && newOrigin.ParentHash != unsafeOrigin.Hash {
		return NewResetError(fmt.Errorf("l2 unsafe head origin is no longer canonical, need reset to resolve: canonical hash: %v; unsafe origin hash: %v",
			newOrigin.ParentHash, unsafeOrigin.Hash))
	}
	if newOrigin.Number > unsafeOrigin.Number+1 {
		// If unsafe origin is further behind new origin, check it's still on the canonical chain.
		canonical, err := eq.l1Fetcher.L1BlockRefByNumber(ctx, unsafeOrigin.Number)
		if err != nil {
			return NewTemporaryError(fmt.Errorf("failed to fetch canonical L1 block at slot: %v; err: %w", unsafeOrigin.Number, err))
		}
		if canonical.ID() != unsafeOrigin {
			eq.log.Error("Resetting due to origin mismatch")
			return NewResetError(fmt.Errorf("l2 unsafe head origin is no longer canonical, need reset to resolve: canonical: %v; unsafe origin: %v",
				canonical, unsafeOrigin))
		}
	}
	return nil
}

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
func (eq *EngineQueue) tryFinalizePastL2Blocks(ctx context.Context) error {
	if eq.finalizedL1 == (eth.L1BlockRef{}) {
		return nil
	}

	// If the L1 is finalized beyond the point we are traversing (e.g. during sync),
	// then we should check if we can finalize this L1 block we are traversing.
	// Otherwise, nothing to act on here, we will finalize later on a new finality signal matching the recent history.
	if eq.finalizedL1.Number < eq.origin.Number {
		return nil
	}

	// If we recently tried finalizing, then don't try again just yet, but traverse more of L1 first.
	if eq.triedFinalizeAt != (eth.L1BlockRef{}) && eq.origin.Number <= eq.triedFinalizeAt.Number+finalityDelay {
		return nil
	}

	eq.log.Info("processing L1 finality information", "l1_finalized", eq.finalizedL1, "l1_origin", eq.origin, "previous", eq.triedFinalizeAt)

	// Sanity check we are indeed on the finalizing chain, and not stuck on something else.
	// We assume that the block-by-number query is consistent with the previously received finalized chain signal
	ref, err := eq.l1Fetcher.L1BlockRefByNumber(ctx, eq.origin.Number)
	if err != nil {
		return NewTemporaryError(fmt.Errorf("failed to check if on finalizing L1 chain: %w", err))
	}
	if ref.Hash != eq.origin.Hash {
		return NewResetError(fmt.Errorf("need to reset, we are on %s, not on the finalizing L1 chain %s (towards %s)", eq.origin, ref, eq.finalizedL1))
	}
	eq.tryFinalizeL2()
	return nil
}

360 361 362 363
// tryFinalizeL2 traverses the past L1 blocks, checks if any has been finalized,
// and then marks the latest fully derived L2 block from this as finalized,
// or defaults to the current finalized L2 block.
func (eq *EngineQueue) tryFinalizeL2() {
364
	if eq.finalizedL1 == (eth.L1BlockRef{}) {
365 366
		return // if no L1 information is finalized yet, then skip this
	}
367
	eq.triedFinalizeAt = eq.origin
368 369 370 371 372 373
	// default to keep the same finalized block
	finalizedL2 := eq.finalized
	// go through the latest inclusion data, and find the last L2 block that was derived from a finalized L1 block
	for _, fd := range eq.finalityData {
		if fd.L2Block.Number > finalizedL2.Number && fd.L1Block.Number <= eq.finalizedL1.Number {
			finalizedL2 = fd.L2Block
374
			eq.needForkchoiceUpdate = true
375 376 377
		}
	}
	eq.finalized = finalizedL2
378
	eq.metrics.RecordL2Ref("l2_finalized", finalizedL2)
379 380 381 382 383 384 385 386 387 388
}

// postProcessSafeL2 buffers the L1 block the safe head was fully derived from,
// to finalize it once the L1 block, or later, finalizes.
func (eq *EngineQueue) postProcessSafeL2() {
	// prune finality data if necessary
	if len(eq.finalityData) >= finalityLookback {
		eq.finalityData = append(eq.finalityData[:0], eq.finalityData[1:finalityLookback]...)
	}
	// remember the last L2 block that we fully derived from the given finality data
389
	if len(eq.finalityData) == 0 || eq.finalityData[len(eq.finalityData)-1].L1Block.Number < eq.origin.Number {
390 391 392
		// append entry for new L1 block
		eq.finalityData = append(eq.finalityData, FinalityData{
			L2Block: eq.safeHead,
393
			L1Block: eq.origin.ID(),
394
		})
395 396
		last := &eq.finalityData[len(eq.finalityData)-1]
		eq.log.Debug("extended finality-data", "last_l1", last.L1Block, "last_l2", last.L2Block)
397
	} else {
398 399 400 401 402 403
		// if it's a new L2 block that was derived from the same latest L1 block, then just update the entry
		last := &eq.finalityData[len(eq.finalityData)-1]
		if last.L2Block != eq.safeHead { // avoid logging if there are no changes
			last.L2Block = eq.safeHead
			eq.log.Debug("updated finality-data", "last_l1", last.L1Block, "last_l2", last.L2Block)
		}
404 405
	}
}
protolambda's avatar
protolambda committed
406

407 408 409 410 411 412
func (eq *EngineQueue) logSyncProgress(reason string) {
	eq.log.Info("Sync progress",
		"reason", reason,
		"l2_finalized", eq.finalized,
		"l2_safe", eq.safeHead,
		"l2_unsafe", eq.unsafeHead,
413
		"l2_engineSyncTarget", eq.engineSyncTarget,
414
		"l2_time", eq.unsafeHead.Time,
415
		"l1_derived", eq.origin,
416 417 418
	)
}

419 420 421
// tryUpdateEngine attempts to update the engine with the current forkchoice state of the rollup node,
// this is a no-op if the nodes already agree on the forkchoice state.
func (eq *EngineQueue) tryUpdateEngine(ctx context.Context) error {
422 423 424
	if eq.unsafeHead.Hash != eq.engineSyncTarget.Hash {
		eq.log.Warn("Attempting to update forkchoice state while engine is P2P syncing")
	}
425
	fc := eth.ForkchoiceState{
426
		HeadBlockHash:      eq.engineSyncTarget.Hash,
427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447
		SafeBlockHash:      eq.safeHead.Hash,
		FinalizedBlockHash: eq.finalized.Hash,
	}
	_, err := eq.engine.ForkchoiceUpdate(ctx, &fc, nil)
	if err != nil {
		var inputErr eth.InputError
		if errors.As(err, &inputErr) {
			switch inputErr.Code {
			case eth.InvalidForkchoiceState:
				return NewResetError(fmt.Errorf("forkchoice update was inconsistent with engine, need reset to resolve: %w", inputErr.Unwrap()))
			default:
				return NewTemporaryError(fmt.Errorf("unexpected error code in forkchoice-updated response: %w", err))
			}
		} else {
			return NewTemporaryError(fmt.Errorf("failed to sync forkchoice with engine: %w", err))
		}
	}
	eq.needForkchoiceUpdate = false
	return nil
}

448 449 450
// checkNewPayloadStatus checks returned status of engine_newPayloadV1 request for next unsafe payload.
// It returns true if the status is acceptable.
func (eq *EngineQueue) checkNewPayloadStatus(status eth.ExecutePayloadStatus) bool {
451
	if eq.syncCfg.EngineSync {
452 453 454 455 456 457 458 459 460
		// Allow SYNCING and ACCEPTED if engine P2P sync is enabled
		return status == eth.ExecutionValid || status == eth.ExecutionSyncing || status == eth.ExecutionAccepted
	}
	return status == eth.ExecutionValid
}

// checkForkchoiceUpdatedStatus checks returned status of engine_forkchoiceUpdatedV1 request for next unsafe payload.
// It returns true if the status is acceptable.
func (eq *EngineQueue) checkForkchoiceUpdatedStatus(status eth.ExecutePayloadStatus) bool {
461
	if eq.syncCfg.EngineSync {
462 463 464 465 466 467
		// Allow SYNCING if engine P2P sync is enabled
		return status == eth.ExecutionValid || status == eth.ExecutionSyncing
	}
	return status == eth.ExecutionValid
}

protolambda's avatar
protolambda committed
468
func (eq *EngineQueue) tryNextUnsafePayload(ctx context.Context) error {
469
	first := eq.unsafePayloads.Peek()
protolambda's avatar
protolambda committed
470 471 472

	if uint64(first.BlockNumber) <= eq.safeHead.Number {
		eq.log.Info("skipping unsafe payload, since it is older than safe head", "safe", eq.safeHead.ID(), "unsafe", first.ID(), "payload", first.ID())
473
		eq.unsafePayloads.Pop()
protolambda's avatar
protolambda committed
474 475
		return nil
	}
476 477 478 479 480
	if uint64(first.BlockNumber) <= eq.unsafeHead.Number {
		eq.log.Info("skipping unsafe payload, since it is older than unsafe head", "unsafe", eq.unsafeHead.ID(), "unsafe_payload", first.ID())
		eq.unsafePayloads.Pop()
		return nil
	}
protolambda's avatar
protolambda committed
481

482
	// Ensure that the unsafe payload builds upon the current unsafe head
483
	if !eq.syncCfg.EngineSync && first.ParentHash != eq.unsafeHead.Hash {
484 485 486 487 488
		if uint64(first.BlockNumber) == eq.unsafeHead.Number+1 {
			eq.log.Info("skipping unsafe payload, since it does not build onto the existing unsafe chain", "safe", eq.safeHead.ID(), "unsafe", first.ID(), "payload", first.ID())
			eq.unsafePayloads.Pop()
		}
		return io.EOF // time to go to next stage if we cannot process the first unsafe payload
protolambda's avatar
protolambda committed
489 490 491 492 493
	}

	ref, err := PayloadToBlockRef(first, &eq.cfg.Genesis)
	if err != nil {
		eq.log.Error("failed to decode L2 block ref from payload", "err", err)
494
		eq.unsafePayloads.Pop()
protolambda's avatar
protolambda committed
495 496 497
		return nil
	}

498 499 500 501
	status, err := eq.engine.NewPayload(ctx, first)
	if err != nil {
		return NewTemporaryError(fmt.Errorf("failed to update insert payload: %w", err))
	}
502
	if !eq.checkNewPayloadStatus(status.Status) {
503 504 505 506
		eq.unsafePayloads.Pop()
		return NewTemporaryError(fmt.Errorf("cannot process unsafe payload: new - %v; parent: %v; err: %w",
			first.ID(), first.ParentID(), eth.NewPayloadErr(first, status)))
	}
protolambda's avatar
protolambda committed
507

508
	// Mark the new payload as valid
protolambda's avatar
protolambda committed
509
	fc := eth.ForkchoiceState{
510
		HeadBlockHash:      first.BlockHash,
protolambda's avatar
protolambda committed
511 512 513 514 515
		SafeBlockHash:      eq.safeHead.Hash, // this should guarantee we do not reorg past the safe head
		FinalizedBlockHash: eq.finalized.Hash,
	}
	fcRes, err := eq.engine.ForkchoiceUpdate(ctx, &fc, nil)
	if err != nil {
516 517 518 519 520 521 522 523 524 525 526
		var inputErr eth.InputError
		if errors.As(err, &inputErr) {
			switch inputErr.Code {
			case eth.InvalidForkchoiceState:
				return NewResetError(fmt.Errorf("pre-unsafe-block forkchoice update was inconsistent with engine, need reset to resolve: %w", inputErr.Unwrap()))
			default:
				return NewTemporaryError(fmt.Errorf("unexpected error code in forkchoice-updated response: %w", err))
			}
		} else {
			return NewTemporaryError(fmt.Errorf("failed to update forkchoice to prepare for new unsafe payload: %w", err))
		}
protolambda's avatar
protolambda committed
527
	}
528
	if !eq.checkForkchoiceUpdatedStatus(fcRes.PayloadStatus.Status) {
529
		eq.unsafePayloads.Pop()
530
		return NewTemporaryError(fmt.Errorf("cannot prepare unsafe chain for new payload: new - %v; parent: %v; err: %w",
531
			first.ID(), first.ParentID(), eth.ForkchoiceUpdateErr(fcRes.PayloadStatus)))
protolambda's avatar
protolambda committed
532
	}
533

534 535 536 537 538 539 540
	eq.engineSyncTarget = ref
	eq.metrics.RecordL2Ref("l2_engineSyncTarget", ref)
	// unsafeHead should be updated only if the payload status is VALID
	if fcRes.PayloadStatus.Status == eth.ExecutionValid {
		eq.unsafeHead = ref
		eq.metrics.RecordL2Ref("l2_unsafe", ref)
	}
541
	eq.unsafePayloads.Pop()
542
	eq.log.Trace("Executed unsafe payload", "hash", ref.Hash, "number", ref.Number, "timestamp", ref.Time, "l1Origin", ref.L1Origin)
543
	eq.logSyncProgress("unsafe payload from sequencer")
544

protolambda's avatar
protolambda committed
545 546 547 548
	return nil
}

func (eq *EngineQueue) tryNextSafeAttributes(ctx context.Context) error {
protolambda's avatar
protolambda committed
549
	if eq.safeAttributes == nil { // sanity check the attributes are there
550 551
		return nil
	}
552
	// validate the safe attributes before processing them. The engine may have completed processing them through other means.
553
	if eq.safeHead != eq.safeAttributes.parent {
554 555 556 557 558 559 560
		// Previously the attribute's parent was the safe head. If the safe head advances so safe head's parent is the same as the
		// attribute's parent then we need to cancel the attributes.
		if eq.safeHead.ParentHash == eq.safeAttributes.parent.Hash {
			eq.log.Warn("queued safe attributes are stale, safehead progressed",
				"safe_head", eq.safeHead, "safe_head_parent", eq.safeHead.ParentID(), "attributes_parent", eq.safeAttributes.parent)
			eq.safeAttributes = nil
			return nil
561
		}
562 563 564
		// If something other than a simple advance occurred, perform a full reset
		return NewResetError(fmt.Errorf("safe head changed to %s with parent %s, conflicting with queued safe attributes on top of %s",
			eq.safeHead, eq.safeHead.ParentID(), eq.safeAttributes.parent))
565

566
	}
protolambda's avatar
protolambda committed
567 568 569 570 571 572 573 574
	if eq.safeHead.Number < eq.unsafeHead.Number {
		return eq.consolidateNextSafeAttributes(ctx)
	} else if eq.safeHead.Number == eq.unsafeHead.Number {
		return eq.forceNextSafeAttributes(ctx)
	} else {
		// For some reason the unsafe head is behind the safe head. Log it, and correct it.
		eq.log.Error("invalid sync state, unsafe head is behind safe head", "unsafe", eq.unsafeHead, "safe", eq.safeHead)
		eq.unsafeHead = eq.safeHead
575
		eq.engineSyncTarget = eq.safeHead
576
		eq.metrics.RecordL2Ref("l2_unsafe", eq.unsafeHead)
577
		eq.metrics.RecordL2Ref("l2_engineSyncTarget", eq.unsafeHead)
protolambda's avatar
protolambda committed
578 579 580 581 582 583 584 585 586 587 588 589 590
		return nil
	}
}

// consolidateNextSafeAttributes tries to match the next safe attributes against the existing unsafe chain,
// to avoid extra processing or unnecessary unwinding of the chain.
// However, if the attributes do not match, they will be forced with forceNextSafeAttributes.
func (eq *EngineQueue) consolidateNextSafeAttributes(ctx context.Context) error {
	ctx, cancel := context.WithTimeout(ctx, time.Second*10)
	defer cancel()

	payload, err := eq.engine.PayloadByNumber(ctx, eq.safeHead.Number+1)
	if err != nil {
591 592 593 594
		if errors.Is(err, ethereum.NotFound) {
			// engine may have restarted, or inconsistent safe head. We need to reset
			return NewResetError(fmt.Errorf("expected engine was synced and had unsafe block to reconcile, but cannot find the block: %w", err))
		}
595
		return NewTemporaryError(fmt.Errorf("failed to get existing unsafe payload to compare against derived attributes from L1: %w", err))
protolambda's avatar
protolambda committed
596
	}
597
	if err := AttributesMatchBlock(eq.safeAttributes.attributes, eq.safeHead.Hash, payload, eq.log); err != nil {
598
		eq.log.Warn("L2 reorg: existing unsafe block does not match derived attributes from L1", "err", err, "unsafe", eq.unsafeHead, "safe", eq.safeHead)
protolambda's avatar
protolambda committed
599 600 601 602 603
		// geth cannot wind back a chain without reorging to a new, previously non-canonical, block
		return eq.forceNextSafeAttributes(ctx)
	}
	ref, err := PayloadToBlockRef(payload, &eq.cfg.Genesis)
	if err != nil {
604
		return NewResetError(fmt.Errorf("failed to decode L2 block ref from payload: %w", err))
protolambda's avatar
protolambda committed
605 606
	}
	eq.safeHead = ref
607
	eq.needForkchoiceUpdate = true
608
	eq.metrics.RecordL2Ref("l2_safe", ref)
protolambda's avatar
protolambda committed
609
	// unsafe head stays the same, we did not reorg the chain.
s7v7nislands's avatar
s7v7nislands committed
610
	eq.safeAttributes = nil
611
	eq.postProcessSafeL2()
612
	eq.logSyncProgress("reconciled with L1")
613

protolambda's avatar
protolambda committed
614 615 616 617 618
	return nil
}

// forceNextSafeAttributes inserts the provided attributes, reorging away any conflicting unsafe chain.
func (eq *EngineQueue) forceNextSafeAttributes(ctx context.Context) error {
s7v7nislands's avatar
s7v7nislands committed
619
	if eq.safeAttributes == nil {
protolambda's avatar
protolambda committed
620 621
		return nil
	}
622
	attrs := eq.safeAttributes.attributes
623 624 625 626
	errType, err := eq.StartPayload(ctx, eq.safeHead, attrs, true)
	if err == nil {
		_, errType, err = eq.ConfirmPayload(ctx)
	}
627 628 629 630 631 632
	if err != nil {
		switch errType {
		case BlockInsertTemporaryErr:
			// RPC errors are recoverable, we can retry the buffered payload attributes later.
			return NewTemporaryError(fmt.Errorf("temporarily cannot insert new safe block: %w", err))
		case BlockInsertPrestateErr:
633
			_ = eq.CancelPayload(ctx, true)
634 635
			return NewResetError(fmt.Errorf("need reset to resolve pre-state problem: %w", err))
		case BlockInsertPayloadErr:
636
			_ = eq.CancelPayload(ctx, true)
637 638 639
			eq.log.Warn("could not process payload derived from L1 data, dropping batch", "err", err)
			// Count the number of deposits to see if the tx list is deposit only.
			depositCount := 0
640 641
			for _, tx := range attrs.Transactions {
				if len(tx) > 0 && tx[0] == types.DepositTxType {
642
					depositCount += 1
643
				}
644
			}
645 646 647
			// Deposit transaction execution errors are suppressed in the execution engine, but if the
			// block is somehow invalid, there is nothing we can do to recover & we should exit.
			// TODO: Can this be triggered by an empty batch with invalid data (like parent hash or gas limit?)
648 649 650
			if len(attrs.Transactions) == depositCount {
				eq.log.Error("deposit only block was invalid", "parent", eq.safeHead, "err", err)
				return NewCriticalError(fmt.Errorf("failed to process block with only deposit transactions: %w", err))
651
			}
652
			// drop the payload without inserting it
s7v7nislands's avatar
s7v7nislands committed
653
			eq.safeAttributes = nil
654
			// suppress the error b/c we want to retry with the next batch from the batch queue
655 656
			// If there is no valid batch the node will eventually force a deposit only block. If
			// the deposit only block fails, this will return the critical error above.
657 658
			return nil

659 660
		default:
			return NewCriticalError(fmt.Errorf("unknown InsertHeadBlock error type %d: %w", errType, err))
661
		}
protolambda's avatar
protolambda committed
662
	}
s7v7nislands's avatar
s7v7nislands committed
663
	eq.safeAttributes = nil
664 665 666 667 668 669
	eq.logSyncProgress("processed safe block derived from L1")

	return nil
}

func (eq *EngineQueue) StartPayload(ctx context.Context, parent eth.L2BlockRef, attrs *eth.PayloadAttributes, updateSafe bool) (errType BlockInsertionErrType, err error) {
670 671 672
	if eq.isEngineSyncing() {
		return BlockInsertTemporaryErr, fmt.Errorf("engine is in progess of p2p sync")
	}
673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
	if eq.buildingID != (eth.PayloadID{}) {
		eq.log.Warn("did not finish previous block building, starting new building now", "prev_onto", eq.buildingOnto, "prev_payload_id", eq.buildingID, "new_onto", parent)
		// TODO: maybe worth it to force-cancel the old payload ID here.
	}
	fc := eth.ForkchoiceState{
		HeadBlockHash:      parent.Hash,
		SafeBlockHash:      eq.safeHead.Hash,
		FinalizedBlockHash: eq.finalized.Hash,
	}
	id, errTyp, err := StartPayload(ctx, eq.engine, fc, attrs)
	if err != nil {
		return errTyp, err
	}
	eq.buildingID = id
	eq.buildingSafe = updateSafe
	eq.buildingOnto = parent
	return BlockInsertOK, nil
}

func (eq *EngineQueue) ConfirmPayload(ctx context.Context) (out *eth.ExecutionPayload, errTyp BlockInsertionErrType, err error) {
	if eq.buildingID == (eth.PayloadID{}) {
		return nil, BlockInsertPrestateErr, fmt.Errorf("cannot complete payload building: not currently building a payload")
	}
	if eq.buildingOnto.Hash != eq.unsafeHead.Hash { // E.g. when safe-attributes consolidation fails, it will drop the existing work.
697
		eq.log.Warn("engine is building block that reorgs previous unsafe head", "onto", eq.buildingOnto, "unsafe", eq.unsafeHead)
698 699 700 701 702 703 704 705 706 707
	}
	fc := eth.ForkchoiceState{
		HeadBlockHash:      common.Hash{}, // gets overridden
		SafeBlockHash:      eq.safeHead.Hash,
		FinalizedBlockHash: eq.finalized.Hash,
	}
	payload, errTyp, err := ConfirmPayload(ctx, eq.log, eq.engine, fc, eq.buildingID, eq.buildingSafe)
	if err != nil {
		return nil, errTyp, fmt.Errorf("failed to complete building on top of L2 chain %s, id: %s, error (%d): %w", eq.buildingOnto, eq.buildingID, errTyp, err)
	}
protolambda's avatar
protolambda committed
708 709
	ref, err := PayloadToBlockRef(payload, &eq.cfg.Genesis)
	if err != nil {
710
		return nil, BlockInsertPayloadErr, NewResetError(fmt.Errorf("failed to decode L2 block ref from payload: %w", err))
protolambda's avatar
protolambda committed
711
	}
712

protolambda's avatar
protolambda committed
713
	eq.unsafeHead = ref
714
	eq.engineSyncTarget = ref
715
	eq.metrics.RecordL2Ref("l2_unsafe", ref)
716
	eq.metrics.RecordL2Ref("l2_engineSyncTarget", ref)
717

718 719 720 721 722 723 724 725 726 727
	if eq.buildingSafe {
		eq.safeHead = ref
		eq.postProcessSafeL2()
		eq.metrics.RecordL2Ref("l2_safe", ref)
	}
	eq.resetBuildingState()
	return payload, BlockInsertOK, nil
}

func (eq *EngineQueue) CancelPayload(ctx context.Context, force bool) error {
728 729 730
	if eq.buildingID == (eth.PayloadID{}) { // only cancel if there is something to cancel.
		return nil
	}
731 732 733 734 735 736 737 738 739 740
	// the building job gets wrapped up as soon as the payload is retrieved, there's no explicit cancel in the Engine API
	eq.log.Error("cancelling old block sealing job", "payload", eq.buildingID)
	_, err := eq.engine.GetPayload(ctx, eq.buildingID)
	if err != nil {
		eq.log.Error("failed to cancel block building job", "payload", eq.buildingID, "err", err)
		if !force {
			return err
		}
	}
	eq.resetBuildingState()
protolambda's avatar
protolambda committed
741 742 743
	return nil
}

744 745 746 747 748 749 750 751 752 753
func (eq *EngineQueue) BuildingPayload() (onto eth.L2BlockRef, id eth.PayloadID, safe bool) {
	return eq.buildingOnto, eq.buildingID, eq.buildingSafe
}

func (eq *EngineQueue) resetBuildingState() {
	eq.buildingID = eth.PayloadID{}
	eq.buildingOnto = eth.L2BlockRef{}
	eq.buildingSafe = false
}

pengin7384's avatar
pengin7384 committed
754
// Reset walks the L2 chain backwards until it finds an L2 block whose L1 origin is canonical.
protolambda's avatar
protolambda committed
755
// The unsafe head is set to the head of the L2 chain, unless the existing safe head is not canonical.
756
func (eq *EngineQueue) Reset(ctx context.Context, _ eth.L1BlockRef, _ eth.SystemConfig) error {
757
	result, err := sync.FindL2Heads(ctx, eq.cfg, eq.l1Fetcher, eq.engine, eq.log, eq.syncCfg)
758
	if err != nil {
759
		return NewTemporaryError(fmt.Errorf("failed to find the L2 Heads to start from: %w", err))
protolambda's avatar
protolambda committed
760
	}
protolambda's avatar
protolambda committed
761
	finalized, safe, unsafe := result.Finalized, result.Safe, result.Unsafe
762
	l1Origin, err := eq.l1Fetcher.L1BlockRefByHash(ctx, safe.L1Origin.Hash)
protolambda's avatar
protolambda committed
763
	if err != nil {
764
		return NewTemporaryError(fmt.Errorf("failed to fetch the new L1 progress: origin: %v; err: %w", safe.L1Origin, err))
protolambda's avatar
protolambda committed
765
	}
766
	if safe.Time < l1Origin.Time {
767 768
		return NewResetError(fmt.Errorf("cannot reset block derivation to start at L2 block %s with time %d older than its L1 origin %s with time %d, time invariant is broken",
			safe, safe.Time, l1Origin, l1Origin.Time))
769
	}
770

771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
	// Walk back L2 chain to find the L1 origin that is old enough to start buffering channel data from.
	pipelineL2 := safe
	for {
		afterL2Genesis := pipelineL2.Number > eq.cfg.Genesis.L2.Number
		afterL1Genesis := pipelineL2.L1Origin.Number > eq.cfg.Genesis.L1.Number
		afterChannelTimeout := pipelineL2.L1Origin.Number+eq.cfg.ChannelTimeout > l1Origin.Number
		if afterL2Genesis && afterL1Genesis && afterChannelTimeout {
			parent, err := eq.engine.L2BlockRefByHash(ctx, pipelineL2.ParentHash)
			if err != nil {
				return NewResetError(fmt.Errorf("failed to fetch L2 parent block %s", pipelineL2.ParentID()))
			}
			pipelineL2 = parent
		} else {
			break
		}
	}
	pipelineOrigin, err := eq.l1Fetcher.L1BlockRefByHash(ctx, pipelineL2.L1Origin.Hash)
	if err != nil {
		return NewTemporaryError(fmt.Errorf("failed to fetch the new L1 progress: origin: %s; err: %w", pipelineL2.L1Origin, err))
790
	}
791
	l1Cfg, err := eq.engine.SystemConfigByL2Hash(ctx, pipelineL2.Hash)
792
	if err != nil {
Joshua Gutow's avatar
Joshua Gutow committed
793
		return NewTemporaryError(fmt.Errorf("failed to fetch L1 config of L2 block %s: %w", pipelineL2.ID(), err))
794
	}
795
	eq.log.Debug("Reset engine queue", "safeHead", safe, "unsafe", unsafe, "safe_timestamp", safe.Time, "unsafe_timestamp", unsafe.Time, "l1Origin", l1Origin)
796
	eq.unsafeHead = unsafe
797
	eq.engineSyncTarget = unsafe
798
	eq.safeHead = safe
Joshua Gutow's avatar
Joshua Gutow committed
799
	eq.safeAttributes = nil
800
	eq.finalized = finalized
801
	eq.resetBuildingState()
802
	eq.needForkchoiceUpdate = true
803
	eq.finalityData = eq.finalityData[:0]
804
	// note: finalizedL1 and triedFinalizeAt do not reset, since these do not change between reorgs.
805
	// note: we do not clear the unsafe payloads queue; if the payloads are not applicable anymore the parent hash checks will clear out the old payloads.
806
	eq.origin = pipelineOrigin
807
	eq.sysCfg = l1Cfg
808
	eq.metrics.RecordL2Ref("l2_finalized", finalized)
809 810
	eq.metrics.RecordL2Ref("l2_safe", safe)
	eq.metrics.RecordL2Ref("l2_unsafe", unsafe)
811
	eq.metrics.RecordL2Ref("l2_engineSyncTarget", unsafe)
812
	eq.logSyncProgress("reset derivation work")
813
	return io.EOF
protolambda's avatar
protolambda committed
814
}
clabby's avatar
clabby committed
815

816 817
// UnsafeL2SyncTarget retrieves the first queued-up L2 unsafe payload, or a zeroed reference if there is none.
func (eq *EngineQueue) UnsafeL2SyncTarget() eth.L2BlockRef {
clabby's avatar
clabby committed
818
	if first := eq.unsafePayloads.Peek(); first != nil {
819 820 821 822 823
		ref, err := PayloadToBlockRef(first, &eq.cfg.Genesis)
		if err != nil {
			return eth.L2BlockRef{}
		}
		return ref
clabby's avatar
clabby committed
824
	} else {
825
		return eth.L2BlockRef{}
clabby's avatar
clabby committed
826 827
	}
}