engine_queue.go 34.8 KB
Newer Older
protolambda's avatar
protolambda committed
1 2 3 4
package derive

import (
	"context"
5
	"errors"
protolambda's avatar
protolambda committed
6 7 8 9
	"fmt"
	"io"
	"time"

10
	"github.com/ethereum/go-ethereum"
protolambda's avatar
protolambda committed
11
	"github.com/ethereum/go-ethereum/common"
12
	"github.com/ethereum/go-ethereum/core/types"
protolambda's avatar
protolambda committed
13
	"github.com/ethereum/go-ethereum/log"
14 15 16

	"github.com/ethereum-optimism/optimism/op-node/rollup"
	"github.com/ethereum-optimism/optimism/op-node/rollup/sync"
17
	"github.com/ethereum-optimism/optimism/op-service/eth"
protolambda's avatar
protolambda committed
18 19
)

20 21 22 23
type AttributesWithParent struct {
	attributes   *eth.PayloadAttributes
	parent       eth.L2BlockRef
	isLastInSpan bool
24 25
}

26 27
type NextAttributesProvider interface {
	Origin() eth.L1BlockRef
28
	NextAttributes(context.Context, eth.L2BlockRef) (*AttributesWithParent, error)
29 30
}

protolambda's avatar
protolambda committed
31 32 33 34 35 36
type Engine interface {
	GetPayload(ctx context.Context, payloadId eth.PayloadID) (*eth.ExecutionPayload, error)
	ForkchoiceUpdate(ctx context.Context, state *eth.ForkchoiceState, attr *eth.PayloadAttributes) (*eth.ForkchoiceUpdatedResult, error)
	NewPayload(ctx context.Context, payload *eth.ExecutionPayload) (*eth.PayloadStatusV1, error)
	PayloadByHash(context.Context, common.Hash) (*eth.ExecutionPayload, error)
	PayloadByNumber(context.Context, uint64) (*eth.ExecutionPayload, error)
37
	L2BlockRefByLabel(ctx context.Context, label eth.BlockLabel) (eth.L2BlockRef, error)
protolambda's avatar
protolambda committed
38
	L2BlockRefByHash(ctx context.Context, l2Hash common.Hash) (eth.L2BlockRef, error)
39
	L2BlockRefByNumber(ctx context.Context, num uint64) (eth.L2BlockRef, error)
40
	SystemConfigL2Fetcher
protolambda's avatar
protolambda committed
41 42
}

43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
// EngineState provides a read-only interface of the forkchoice state properties of the L2 Engine.
type EngineState interface {
	Finalized() eth.L2BlockRef
	UnsafeL2Head() eth.L2BlockRef
	SafeL2Head() eth.L2BlockRef
}

// EngineControl enables other components to build blocks with the Engine,
// while keeping the forkchoice state and payload-id management internal to
// avoid state inconsistencies between different users of the EngineControl.
type EngineControl interface {
	EngineState

	// StartPayload requests the engine to start building a block with the given attributes.
	// If updateSafe, the resulting block will be marked as a safe block.
	StartPayload(ctx context.Context, parent eth.L2BlockRef, attrs *eth.PayloadAttributes, updateSafe bool) (errType BlockInsertionErrType, err error)
	// ConfirmPayload requests the engine to complete the current block. If no block is being built, or if it fails, an error is returned.
	ConfirmPayload(ctx context.Context) (out *eth.ExecutionPayload, errTyp BlockInsertionErrType, err error)
	// CancelPayload requests the engine to stop building the current block without making it canonical.
	// This is optional, as the engine expires building jobs that are left uncompleted, but can still save resources.
	CancelPayload(ctx context.Context, force bool) error
	// BuildingPayload indicates if a payload is being built, and onto which block it is being built, and whether or not it is a safe payload.
	BuildingPayload() (onto eth.L2BlockRef, id eth.PayloadID, safe bool)
}

68 69
// Max memory used for buffering unsafe payloads
const maxUnsafePayloadsMemory = 500 * 1024 * 1024
protolambda's avatar
protolambda committed
70

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
// finalityLookback defines the amount of L1<>L2 relations to track for finalization purposes, one per L1 block.
//
// When L1 finalizes blocks, it finalizes finalityLookback blocks behind the L1 head.
// Non-finality may take longer, but when it does finalize again, it is within this range of the L1 head.
// Thus we only need to retain the L1<>L2 derivation relation data of this many L1 blocks.
//
// In the event of older finalization signals, misconfiguration, or insufficient L1<>L2 derivation relation data,
// then we may miss the opportunity to finalize more L2 blocks.
// This does not cause any divergence, it just causes lagging finalization status.
//
// The beacon chain on mainnet has 32 slots per epoch,
// and new finalization events happen at most 4 epochs behind the head.
// And then we add 1 to make pruning easier by leaving room for a new item without pruning the 32*4.
const finalityLookback = 4*32 + 1

86 87 88 89
// finalityDelay is the number of L1 blocks to traverse before trying to finalize L2 blocks again.
// We do not want to do this too often, since it requires fetching a L1 block by number, so no cache data.
const finalityDelay = 64

90 91 92 93 94 95 96 97
type FinalityData struct {
	// The last L2 block that was fully derived and inserted into the L2 engine while processing this L1 block.
	L2Block eth.L2BlockRef
	// The L1 block this stage was at when inserting the L2 block.
	// When this L1 block is finalized, the L2 chain up to this block can be fully reproduced from finalized L1 data.
	L1Block eth.BlockID
}

protolambda's avatar
protolambda committed
98 99 100 101 102 103 104 105 106
// EngineQueue queues up payload attributes to consolidate or process with the provided Engine
type EngineQueue struct {
	log log.Logger
	cfg *rollup.Config

	finalized  eth.L2BlockRef
	safeHead   eth.L2BlockRef
	unsafeHead eth.L2BlockRef

107 108 109 110
	// L2 block processed from the batch, but not consolidated to the safe block yet.
	// Consolidation will be pending until the entire batch is processed successfully, to guarantee the span batch atomicity.
	pendingSafeHead eth.L2BlockRef

111 112 113 114
	// Target L2 block the engine is currently syncing to.
	// If the engine p2p sync is enabled, it can be different with unsafeHead. Otherwise, it must be same with unsafeHead.
	engineSyncTarget eth.L2BlockRef

115 116 117 118
	buildingOnto eth.L2BlockRef
	buildingID   eth.PayloadID
	buildingSafe bool

119 120 121 122 123
	// Track when the rollup node changes the forkchoice without engine action,
	// e.g. on a reset after a reorg, or after consolidating a block.
	// This update may repeat if the engine returns a temporary error.
	needForkchoiceUpdate bool

124 125
	// finalizedL1 is the currently perceived finalized L1 block.
	// This may be ahead of the current traversed origin when syncing.
126
	finalizedL1 eth.L1BlockRef
protolambda's avatar
protolambda committed
127

128 129 130
	// triedFinalizeAt tracks at which origin we last tried to finalize during sync.
	triedFinalizeAt eth.L1BlockRef

131
	// The queued-up attributes
132
	safeAttributes *AttributesWithParent
133
	unsafePayloads *PayloadsQueue // queue of unsafe payloads, ordered by ascending block number, may have gaps and duplicates
protolambda's avatar
protolambda committed
134

135 136 137
	// Tracks which L2 blocks where last derived from which L1 block. At most finalityLookback large.
	finalityData []FinalityData

protolambda's avatar
protolambda committed
138
	engine Engine
139 140
	prev   NextAttributesProvider

141 142
	origin eth.L1BlockRef   // updated on resets, and whenever we read from the previous stage.
	sysCfg eth.SystemConfig // only used for pipeline resets
143

144 145
	metrics   Metrics
	l1Fetcher L1Fetcher
146 147

	syncCfg *sync.Config
protolambda's avatar
protolambda committed
148 149
}

150 151
var _ EngineControl = (*EngineQueue)(nil)

protolambda's avatar
protolambda committed
152
// NewEngineQueue creates a new EngineQueue, which should be Reset(origin) before use.
153
func NewEngineQueue(log log.Logger, cfg *rollup.Config, engine Engine, metrics Metrics, prev NextAttributesProvider, l1Fetcher L1Fetcher, syncCfg *sync.Config) *EngineQueue {
154
	return &EngineQueue{
155 156 157 158 159 160 161 162
		log:            log,
		cfg:            cfg,
		engine:         engine,
		metrics:        metrics,
		finalityData:   make([]FinalityData, 0, finalityLookback),
		unsafePayloads: NewPayloadsQueue(maxUnsafePayloadsMemory, payloadMemSize),
		prev:           prev,
		l1Fetcher:      l1Fetcher,
163
		syncCfg:        syncCfg,
164
	}
protolambda's avatar
protolambda committed
165 166
}

167
// Origin identifies the L1 chain (incl.) that included and/or produced all the safe L2 blocks.
168 169
func (eq *EngineQueue) Origin() eth.L1BlockRef {
	return eq.origin
protolambda's avatar
protolambda committed
170 171
}

172 173 174 175
func (eq *EngineQueue) SystemConfig() eth.SystemConfig {
	return eq.sysCfg
}

protolambda's avatar
protolambda committed
176 177
func (eq *EngineQueue) SetUnsafeHead(head eth.L2BlockRef) {
	eq.unsafeHead = head
178
	eq.metrics.RecordL2Ref("l2_unsafe", head)
protolambda's avatar
protolambda committed
179 180
}

181 182 183 184 185
func (eq *EngineQueue) SetEngineSyncTarget(head eth.L2BlockRef) {
	eq.engineSyncTarget = head
	eq.metrics.RecordL2Ref("l2_engineSyncTarget", head)
}

protolambda's avatar
protolambda committed
186
func (eq *EngineQueue) AddUnsafePayload(payload *eth.ExecutionPayload) {
187 188 189
	if payload == nil {
		eq.log.Warn("cannot add nil unsafe payload")
		return
protolambda's avatar
protolambda committed
190
	}
Danyal Prout's avatar
Danyal Prout committed
191

192 193 194 195 196 197 198
	if err := eq.unsafePayloads.Push(payload); err != nil {
		eq.log.Warn("Could not add unsafe payload", "id", payload.ID(), "timestamp", uint64(payload.Timestamp), "err", err)
		return
	}
	p := eq.unsafePayloads.Peek()
	eq.metrics.RecordUnsafePayloadsBuffer(uint64(eq.unsafePayloads.Len()), eq.unsafePayloads.MemSize(), p.ID())
	eq.log.Trace("Next unsafe payload to process", "next", p.ID(), "timestamp", uint64(p.Timestamp))
protolambda's avatar
protolambda committed
199 200
}

201 202 203 204 205
func (eq *EngineQueue) Finalize(l1Origin eth.L1BlockRef) {
	if l1Origin.Number < eq.finalizedL1.Number {
		eq.log.Error("ignoring old L1 finalized block signal! Is the L1 provider corrupted?", "prev_finalized_l1", eq.finalizedL1, "signaled_finalized_l1", l1Origin)
		return
	}
206 207 208 209 210 211 212 213 214

	// remember the L1 finalization signal
	eq.finalizedL1 = l1Origin

	// Sanity check: we only try to finalize L2 immediately, without fetching additional data,
	// if we are on the same chain as the signal.
	// If we are on a different chain, the signal will be ignored,
	// and tryFinalizeL1Origin() will eventually detect that we are on the wrong chain,
	// if not resetting due to reorg elsewhere already.
215 216 217 218 219 220
	for _, fd := range eq.finalityData {
		if fd.L1Block == l1Origin.ID() {
			eq.tryFinalizeL2()
			return
		}
	}
221 222

	eq.log.Info("received L1 finality signal, but missing data for immediate L2 finalization", "prev_finalized_l1", eq.finalizedL1, "signaled_finalized_l1", l1Origin)
223 224 225 226 227 228
}

// FinalizedL1 identifies the L1 chain (incl.) that included and/or produced all the finalized L2 blocks.
// This may return a zeroed ID if no finalization signals have been seen yet.
func (eq *EngineQueue) FinalizedL1() eth.L1BlockRef {
	return eq.finalizedL1
protolambda's avatar
protolambda committed
229 230 231 232 233 234 235 236 237 238 239 240 241 242
}

func (eq *EngineQueue) Finalized() eth.L2BlockRef {
	return eq.finalized
}

func (eq *EngineQueue) UnsafeL2Head() eth.L2BlockRef {
	return eq.unsafeHead
}

func (eq *EngineQueue) SafeL2Head() eth.L2BlockRef {
	return eq.safeHead
}

243 244 245 246
func (eq *EngineQueue) PendingSafeL2Head() eth.L2BlockRef {
	return eq.pendingSafeHead
}

247 248 249 250 251 252 253 254 255
func (eq *EngineQueue) EngineSyncTarget() eth.L2BlockRef {
	return eq.engineSyncTarget
}

// Determine if the engine is syncing to the target block
func (eq *EngineQueue) isEngineSyncing() bool {
	return eq.unsafeHead.Hash != eq.engineSyncTarget.Hash
}

256
func (eq *EngineQueue) Step(ctx context.Context) error {
257 258 259
	if eq.needForkchoiceUpdate {
		return eq.tryUpdateEngine(ctx)
	}
260 261 262 263 264 265 266 267 268 269 270 271
	// Trying unsafe payload should be done before safe attributes
	// It allows the unsafe head can move forward while the long-range consolidation is in progress.
	if eq.unsafePayloads.Len() > 0 {
		if err := eq.tryNextUnsafePayload(ctx); err != io.EOF {
			return err
		}
		// EOF error means we can't process the next unsafe payload. Then we should process next safe attributes.
	}
	if eq.isEngineSyncing() {
		// Make pipeline first focus to sync unsafe blocks to engineSyncTarget
		return EngineP2PSyncing
	}
s7v7nislands's avatar
s7v7nislands committed
272
	if eq.safeAttributes != nil {
protolambda's avatar
protolambda committed
273 274
		return eq.tryNextSafeAttributes(ctx)
	}
275
	outOfData := false
276 277 278 279 280 281 282
	newOrigin := eq.prev.Origin()
	// Check if the L2 unsafe head origin is consistent with the new origin
	if err := eq.verifyNewL1Origin(ctx, newOrigin); err != nil {
		return err
	}
	eq.origin = newOrigin
	eq.postProcessSafeL2() // make sure we track the last L2 safe head for every new L1 block
283 284 285 286
	// try to finalize the L2 blocks we have synced so far (no-op if L1 finality is behind)
	if err := eq.tryFinalizePastL2Blocks(ctx); err != nil {
		return err
	}
287
	if next, err := eq.prev.NextAttributes(ctx, eq.pendingSafeHead); err == io.EOF {
288 289 290 291
		outOfData = true
	} else if err != nil {
		return err
	} else {
292 293 294
		eq.safeAttributes = next
		eq.log.Debug("Adding next safe attributes", "safe_head", eq.safeHead,
			"pending_safe_head", eq.pendingSafeHead, "next", next)
295
		return NotEnoughData
296
	}
297

298 299 300 301 302
	if outOfData {
		return io.EOF
	} else {
		return nil
	}
protolambda's avatar
protolambda committed
303 304
}

305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
// verifyNewL1Origin checks that the L2 unsafe head still has a L1 origin that is on the canonical chain.
// If the unsafe head origin is after the new L1 origin it is assumed to still be canonical.
// The check is only required when moving to a new L1 origin.
func (eq *EngineQueue) verifyNewL1Origin(ctx context.Context, newOrigin eth.L1BlockRef) error {
	if newOrigin == eq.origin {
		return nil
	}
	unsafeOrigin := eq.unsafeHead.L1Origin
	if newOrigin.Number == unsafeOrigin.Number && newOrigin.ID() != unsafeOrigin {
		return NewResetError(fmt.Errorf("l1 origin was inconsistent with l2 unsafe head origin, need reset to resolve: l1 origin: %v; unsafe origin: %v",
			newOrigin.ID(), unsafeOrigin))
	}
	// Avoid requesting an older block by checking against the parent hash
	if newOrigin.Number == unsafeOrigin.Number+1 && newOrigin.ParentHash != unsafeOrigin.Hash {
		return NewResetError(fmt.Errorf("l2 unsafe head origin is no longer canonical, need reset to resolve: canonical hash: %v; unsafe origin hash: %v",
			newOrigin.ParentHash, unsafeOrigin.Hash))
	}
	if newOrigin.Number > unsafeOrigin.Number+1 {
		// If unsafe origin is further behind new origin, check it's still on the canonical chain.
		canonical, err := eq.l1Fetcher.L1BlockRefByNumber(ctx, unsafeOrigin.Number)
		if err != nil {
			return NewTemporaryError(fmt.Errorf("failed to fetch canonical L1 block at slot: %v; err: %w", unsafeOrigin.Number, err))
		}
		if canonical.ID() != unsafeOrigin {
			eq.log.Error("Resetting due to origin mismatch")
			return NewResetError(fmt.Errorf("l2 unsafe head origin is no longer canonical, need reset to resolve: canonical: %v; unsafe origin: %v",
				canonical, unsafeOrigin))
		}
	}
	return nil
}

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
func (eq *EngineQueue) tryFinalizePastL2Blocks(ctx context.Context) error {
	if eq.finalizedL1 == (eth.L1BlockRef{}) {
		return nil
	}

	// If the L1 is finalized beyond the point we are traversing (e.g. during sync),
	// then we should check if we can finalize this L1 block we are traversing.
	// Otherwise, nothing to act on here, we will finalize later on a new finality signal matching the recent history.
	if eq.finalizedL1.Number < eq.origin.Number {
		return nil
	}

	// If we recently tried finalizing, then don't try again just yet, but traverse more of L1 first.
	if eq.triedFinalizeAt != (eth.L1BlockRef{}) && eq.origin.Number <= eq.triedFinalizeAt.Number+finalityDelay {
		return nil
	}

	eq.log.Info("processing L1 finality information", "l1_finalized", eq.finalizedL1, "l1_origin", eq.origin, "previous", eq.triedFinalizeAt)

	// Sanity check we are indeed on the finalizing chain, and not stuck on something else.
	// We assume that the block-by-number query is consistent with the previously received finalized chain signal
	ref, err := eq.l1Fetcher.L1BlockRefByNumber(ctx, eq.origin.Number)
	if err != nil {
		return NewTemporaryError(fmt.Errorf("failed to check if on finalizing L1 chain: %w", err))
	}
	if ref.Hash != eq.origin.Hash {
		return NewResetError(fmt.Errorf("need to reset, we are on %s, not on the finalizing L1 chain %s (towards %s)", eq.origin, ref, eq.finalizedL1))
	}
	eq.tryFinalizeL2()
	return nil
}

369 370 371 372
// tryFinalizeL2 traverses the past L1 blocks, checks if any has been finalized,
// and then marks the latest fully derived L2 block from this as finalized,
// or defaults to the current finalized L2 block.
func (eq *EngineQueue) tryFinalizeL2() {
373
	if eq.finalizedL1 == (eth.L1BlockRef{}) {
374 375
		return // if no L1 information is finalized yet, then skip this
	}
376
	eq.triedFinalizeAt = eq.origin
377 378 379 380 381 382
	// default to keep the same finalized block
	finalizedL2 := eq.finalized
	// go through the latest inclusion data, and find the last L2 block that was derived from a finalized L1 block
	for _, fd := range eq.finalityData {
		if fd.L2Block.Number > finalizedL2.Number && fd.L1Block.Number <= eq.finalizedL1.Number {
			finalizedL2 = fd.L2Block
383
			eq.needForkchoiceUpdate = true
384 385 386
		}
	}
	eq.finalized = finalizedL2
387
	eq.metrics.RecordL2Ref("l2_finalized", finalizedL2)
388 389 390 391 392 393 394 395 396 397
}

// postProcessSafeL2 buffers the L1 block the safe head was fully derived from,
// to finalize it once the L1 block, or later, finalizes.
func (eq *EngineQueue) postProcessSafeL2() {
	// prune finality data if necessary
	if len(eq.finalityData) >= finalityLookback {
		eq.finalityData = append(eq.finalityData[:0], eq.finalityData[1:finalityLookback]...)
	}
	// remember the last L2 block that we fully derived from the given finality data
398
	if len(eq.finalityData) == 0 || eq.finalityData[len(eq.finalityData)-1].L1Block.Number < eq.origin.Number {
399 400 401
		// append entry for new L1 block
		eq.finalityData = append(eq.finalityData, FinalityData{
			L2Block: eq.safeHead,
402
			L1Block: eq.origin.ID(),
403
		})
404 405
		last := &eq.finalityData[len(eq.finalityData)-1]
		eq.log.Debug("extended finality-data", "last_l1", last.L1Block, "last_l2", last.L2Block)
406
	} else {
407 408 409 410 411 412
		// if it's a new L2 block that was derived from the same latest L1 block, then just update the entry
		last := &eq.finalityData[len(eq.finalityData)-1]
		if last.L2Block != eq.safeHead { // avoid logging if there are no changes
			last.L2Block = eq.safeHead
			eq.log.Debug("updated finality-data", "last_l1", last.L1Block, "last_l2", last.L2Block)
		}
413 414
	}
}
protolambda's avatar
protolambda committed
415

416 417 418 419 420
func (eq *EngineQueue) logSyncProgress(reason string) {
	eq.log.Info("Sync progress",
		"reason", reason,
		"l2_finalized", eq.finalized,
		"l2_safe", eq.safeHead,
421
		"l2_safe_pending", eq.pendingSafeHead,
422
		"l2_unsafe", eq.unsafeHead,
423
		"l2_engineSyncTarget", eq.engineSyncTarget,
424
		"l2_time", eq.unsafeHead.Time,
425
		"l1_derived", eq.origin,
426 427 428
	)
}

429 430 431
// tryUpdateEngine attempts to update the engine with the current forkchoice state of the rollup node,
// this is a no-op if the nodes already agree on the forkchoice state.
func (eq *EngineQueue) tryUpdateEngine(ctx context.Context) error {
432 433 434
	if eq.unsafeHead.Hash != eq.engineSyncTarget.Hash {
		eq.log.Warn("Attempting to update forkchoice state while engine is P2P syncing")
	}
435
	fc := eth.ForkchoiceState{
436
		HeadBlockHash:      eq.engineSyncTarget.Hash,
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
		SafeBlockHash:      eq.safeHead.Hash,
		FinalizedBlockHash: eq.finalized.Hash,
	}
	_, err := eq.engine.ForkchoiceUpdate(ctx, &fc, nil)
	if err != nil {
		var inputErr eth.InputError
		if errors.As(err, &inputErr) {
			switch inputErr.Code {
			case eth.InvalidForkchoiceState:
				return NewResetError(fmt.Errorf("forkchoice update was inconsistent with engine, need reset to resolve: %w", inputErr.Unwrap()))
			default:
				return NewTemporaryError(fmt.Errorf("unexpected error code in forkchoice-updated response: %w", err))
			}
		} else {
			return NewTemporaryError(fmt.Errorf("failed to sync forkchoice with engine: %w", err))
		}
	}
	eq.needForkchoiceUpdate = false
	return nil
}

458 459 460
// checkNewPayloadStatus checks returned status of engine_newPayloadV1 request for next unsafe payload.
// It returns true if the status is acceptable.
func (eq *EngineQueue) checkNewPayloadStatus(status eth.ExecutePayloadStatus) bool {
461
	if eq.syncCfg.EngineSync {
462 463 464 465 466 467 468 469 470
		// Allow SYNCING and ACCEPTED if engine P2P sync is enabled
		return status == eth.ExecutionValid || status == eth.ExecutionSyncing || status == eth.ExecutionAccepted
	}
	return status == eth.ExecutionValid
}

// checkForkchoiceUpdatedStatus checks returned status of engine_forkchoiceUpdatedV1 request for next unsafe payload.
// It returns true if the status is acceptable.
func (eq *EngineQueue) checkForkchoiceUpdatedStatus(status eth.ExecutePayloadStatus) bool {
471
	if eq.syncCfg.EngineSync {
472 473 474 475 476 477
		// Allow SYNCING if engine P2P sync is enabled
		return status == eth.ExecutionValid || status == eth.ExecutionSyncing
	}
	return status == eth.ExecutionValid
}

protolambda's avatar
protolambda committed
478
func (eq *EngineQueue) tryNextUnsafePayload(ctx context.Context) error {
479
	first := eq.unsafePayloads.Peek()
protolambda's avatar
protolambda committed
480 481 482

	if uint64(first.BlockNumber) <= eq.safeHead.Number {
		eq.log.Info("skipping unsafe payload, since it is older than safe head", "safe", eq.safeHead.ID(), "unsafe", first.ID(), "payload", first.ID())
483
		eq.unsafePayloads.Pop()
protolambda's avatar
protolambda committed
484 485
		return nil
	}
486 487 488 489 490
	if uint64(first.BlockNumber) <= eq.unsafeHead.Number {
		eq.log.Info("skipping unsafe payload, since it is older than unsafe head", "unsafe", eq.unsafeHead.ID(), "unsafe_payload", first.ID())
		eq.unsafePayloads.Pop()
		return nil
	}
protolambda's avatar
protolambda committed
491

492
	// Ensure that the unsafe payload builds upon the current unsafe head
493
	if !eq.syncCfg.EngineSync && first.ParentHash != eq.unsafeHead.Hash {
494 495 496 497 498
		if uint64(first.BlockNumber) == eq.unsafeHead.Number+1 {
			eq.log.Info("skipping unsafe payload, since it does not build onto the existing unsafe chain", "safe", eq.safeHead.ID(), "unsafe", first.ID(), "payload", first.ID())
			eq.unsafePayloads.Pop()
		}
		return io.EOF // time to go to next stage if we cannot process the first unsafe payload
protolambda's avatar
protolambda committed
499 500 501 502 503
	}

	ref, err := PayloadToBlockRef(first, &eq.cfg.Genesis)
	if err != nil {
		eq.log.Error("failed to decode L2 block ref from payload", "err", err)
504
		eq.unsafePayloads.Pop()
protolambda's avatar
protolambda committed
505 506 507
		return nil
	}

508 509 510 511
	status, err := eq.engine.NewPayload(ctx, first)
	if err != nil {
		return NewTemporaryError(fmt.Errorf("failed to update insert payload: %w", err))
	}
512
	if !eq.checkNewPayloadStatus(status.Status) {
513 514 515 516
		eq.unsafePayloads.Pop()
		return NewTemporaryError(fmt.Errorf("cannot process unsafe payload: new - %v; parent: %v; err: %w",
			first.ID(), first.ParentID(), eth.NewPayloadErr(first, status)))
	}
protolambda's avatar
protolambda committed
517

518
	// Mark the new payload as valid
protolambda's avatar
protolambda committed
519
	fc := eth.ForkchoiceState{
520
		HeadBlockHash:      first.BlockHash,
protolambda's avatar
protolambda committed
521 522 523 524 525
		SafeBlockHash:      eq.safeHead.Hash, // this should guarantee we do not reorg past the safe head
		FinalizedBlockHash: eq.finalized.Hash,
	}
	fcRes, err := eq.engine.ForkchoiceUpdate(ctx, &fc, nil)
	if err != nil {
526 527 528 529 530 531 532 533 534 535 536
		var inputErr eth.InputError
		if errors.As(err, &inputErr) {
			switch inputErr.Code {
			case eth.InvalidForkchoiceState:
				return NewResetError(fmt.Errorf("pre-unsafe-block forkchoice update was inconsistent with engine, need reset to resolve: %w", inputErr.Unwrap()))
			default:
				return NewTemporaryError(fmt.Errorf("unexpected error code in forkchoice-updated response: %w", err))
			}
		} else {
			return NewTemporaryError(fmt.Errorf("failed to update forkchoice to prepare for new unsafe payload: %w", err))
		}
protolambda's avatar
protolambda committed
537
	}
538
	if !eq.checkForkchoiceUpdatedStatus(fcRes.PayloadStatus.Status) {
539
		eq.unsafePayloads.Pop()
540
		return NewTemporaryError(fmt.Errorf("cannot prepare unsafe chain for new payload: new - %v; parent: %v; err: %w",
541
			first.ID(), first.ParentID(), eth.ForkchoiceUpdateErr(fcRes.PayloadStatus)))
protolambda's avatar
protolambda committed
542
	}
543

544 545 546 547 548 549 550
	eq.engineSyncTarget = ref
	eq.metrics.RecordL2Ref("l2_engineSyncTarget", ref)
	// unsafeHead should be updated only if the payload status is VALID
	if fcRes.PayloadStatus.Status == eth.ExecutionValid {
		eq.unsafeHead = ref
		eq.metrics.RecordL2Ref("l2_unsafe", ref)
	}
551
	eq.unsafePayloads.Pop()
552
	eq.log.Trace("Executed unsafe payload", "hash", ref.Hash, "number", ref.Number, "timestamp", ref.Time, "l1Origin", ref.L1Origin)
553
	eq.logSyncProgress("unsafe payload from sequencer")
554

protolambda's avatar
protolambda committed
555 556 557 558
	return nil
}

func (eq *EngineQueue) tryNextSafeAttributes(ctx context.Context) error {
protolambda's avatar
protolambda committed
559
	if eq.safeAttributes == nil { // sanity check the attributes are there
560 561
		return nil
	}
562
	// validate the safe attributes before processing them. The engine may have completed processing them through other means.
563 564
	if eq.pendingSafeHead != eq.safeAttributes.parent {
		// Previously the attribute's parent was the pending safe head. If the pending safe head advances so pending safe head's parent is the same as the
565
		// attribute's parent then we need to cancel the attributes.
566
		if eq.pendingSafeHead.ParentHash == eq.safeAttributes.parent.Hash {
567
			eq.log.Warn("queued safe attributes are stale, safehead progressed",
568 569
				"pending_safe_head", eq.pendingSafeHead, "pending_safe_head_parent", eq.pendingSafeHead.ParentID(),
				"attributes_parent", eq.safeAttributes.parent)
570 571
			eq.safeAttributes = nil
			return nil
572
		}
573
		// If something other than a simple advance occurred, perform a full reset
574 575
		return NewResetError(fmt.Errorf("pending safe head changed to %s with parent %s, conflicting with queued safe attributes on top of %s",
			eq.pendingSafeHead, eq.pendingSafeHead.ParentID(), eq.safeAttributes.parent))
576

577
	}
578
	if eq.pendingSafeHead.Number < eq.unsafeHead.Number {
protolambda's avatar
protolambda committed
579
		return eq.consolidateNextSafeAttributes(ctx)
580
	} else if eq.pendingSafeHead.Number == eq.unsafeHead.Number {
protolambda's avatar
protolambda committed
581 582
		return eq.forceNextSafeAttributes(ctx)
	} else {
583 584 585 586
		// For some reason the unsafe head is behind the pending safe head. Log it, and correct it.
		eq.log.Error("invalid sync state, unsafe head is behind pending safe head", "unsafe", eq.unsafeHead, "pending_safe", eq.pendingSafeHead)
		eq.unsafeHead = eq.pendingSafeHead
		eq.engineSyncTarget = eq.pendingSafeHead
587
		eq.metrics.RecordL2Ref("l2_unsafe", eq.unsafeHead)
588
		eq.metrics.RecordL2Ref("l2_engineSyncTarget", eq.unsafeHead)
protolambda's avatar
protolambda committed
589 590 591 592 593 594 595 596 597 598 599
		return nil
	}
}

// consolidateNextSafeAttributes tries to match the next safe attributes against the existing unsafe chain,
// to avoid extra processing or unnecessary unwinding of the chain.
// However, if the attributes do not match, they will be forced with forceNextSafeAttributes.
func (eq *EngineQueue) consolidateNextSafeAttributes(ctx context.Context) error {
	ctx, cancel := context.WithTimeout(ctx, time.Second*10)
	defer cancel()

600
	payload, err := eq.engine.PayloadByNumber(ctx, eq.pendingSafeHead.Number+1)
protolambda's avatar
protolambda committed
601
	if err != nil {
602 603 604 605
		if errors.Is(err, ethereum.NotFound) {
			// engine may have restarted, or inconsistent safe head. We need to reset
			return NewResetError(fmt.Errorf("expected engine was synced and had unsafe block to reconcile, but cannot find the block: %w", err))
		}
606
		return NewTemporaryError(fmt.Errorf("failed to get existing unsafe payload to compare against derived attributes from L1: %w", err))
protolambda's avatar
protolambda committed
607
	}
608 609
	if err := AttributesMatchBlock(eq.safeAttributes.attributes, eq.pendingSafeHead.Hash, payload, eq.log); err != nil {
		eq.log.Warn("L2 reorg: existing unsafe block does not match derived attributes from L1", "err", err, "unsafe", eq.unsafeHead, "pending_safe", eq.pendingSafeHead, "safe", eq.safeHead)
protolambda's avatar
protolambda committed
610 611 612 613 614
		// geth cannot wind back a chain without reorging to a new, previously non-canonical, block
		return eq.forceNextSafeAttributes(ctx)
	}
	ref, err := PayloadToBlockRef(payload, &eq.cfg.Genesis)
	if err != nil {
615
		return NewResetError(fmt.Errorf("failed to decode L2 block ref from payload: %w", err))
protolambda's avatar
protolambda committed
616
	}
617 618 619 620 621 622 623
	eq.pendingSafeHead = ref
	if eq.safeAttributes.isLastInSpan {
		eq.safeHead = ref
		eq.needForkchoiceUpdate = true
		eq.metrics.RecordL2Ref("l2_safe", ref)
		eq.postProcessSafeL2()
	}
protolambda's avatar
protolambda committed
624
	// unsafe head stays the same, we did not reorg the chain.
s7v7nislands's avatar
s7v7nislands committed
625
	eq.safeAttributes = nil
626
	eq.logSyncProgress("reconciled with L1")
627

protolambda's avatar
protolambda committed
628 629 630 631 632
	return nil
}

// forceNextSafeAttributes inserts the provided attributes, reorging away any conflicting unsafe chain.
func (eq *EngineQueue) forceNextSafeAttributes(ctx context.Context) error {
s7v7nislands's avatar
s7v7nislands committed
633
	if eq.safeAttributes == nil {
protolambda's avatar
protolambda committed
634 635
		return nil
	}
636
	attrs := eq.safeAttributes.attributes
637
	errType, err := eq.StartPayload(ctx, eq.pendingSafeHead, attrs, true)
638 639 640
	if err == nil {
		_, errType, err = eq.ConfirmPayload(ctx)
	}
641 642 643 644 645 646
	if err != nil {
		switch errType {
		case BlockInsertTemporaryErr:
			// RPC errors are recoverable, we can retry the buffered payload attributes later.
			return NewTemporaryError(fmt.Errorf("temporarily cannot insert new safe block: %w", err))
		case BlockInsertPrestateErr:
647
			_ = eq.CancelPayload(ctx, true)
648 649
			return NewResetError(fmt.Errorf("need reset to resolve pre-state problem: %w", err))
		case BlockInsertPayloadErr:
650
			_ = eq.CancelPayload(ctx, true)
651 652 653
			eq.log.Warn("could not process payload derived from L1 data, dropping batch", "err", err)
			// Count the number of deposits to see if the tx list is deposit only.
			depositCount := 0
654 655
			for _, tx := range attrs.Transactions {
				if len(tx) > 0 && tx[0] == types.DepositTxType {
656
					depositCount += 1
657
				}
658
			}
659 660 661
			// Deposit transaction execution errors are suppressed in the execution engine, but if the
			// block is somehow invalid, there is nothing we can do to recover & we should exit.
			// TODO: Can this be triggered by an empty batch with invalid data (like parent hash or gas limit?)
662
			if len(attrs.Transactions) == depositCount {
663
				eq.log.Error("deposit only block was invalid", "parent", eq.safeAttributes.parent, "err", err)
664
				return NewCriticalError(fmt.Errorf("failed to process block with only deposit transactions: %w", err))
665
			}
666
			// drop the payload without inserting it
s7v7nislands's avatar
s7v7nislands committed
667
			eq.safeAttributes = nil
668 669
			// Revert the pending safe head to the safe head.
			eq.pendingSafeHead = eq.safeHead
670
			// suppress the error b/c we want to retry with the next batch from the batch queue
671 672
			// If there is no valid batch the node will eventually force a deposit only block. If
			// the deposit only block fails, this will return the critical error above.
673 674
			return nil

675 676
		default:
			return NewCriticalError(fmt.Errorf("unknown InsertHeadBlock error type %d: %w", errType, err))
677
		}
protolambda's avatar
protolambda committed
678
	}
s7v7nislands's avatar
s7v7nislands committed
679
	eq.safeAttributes = nil
680 681 682 683 684 685
	eq.logSyncProgress("processed safe block derived from L1")

	return nil
}

func (eq *EngineQueue) StartPayload(ctx context.Context, parent eth.L2BlockRef, attrs *eth.PayloadAttributes, updateSafe bool) (errType BlockInsertionErrType, err error) {
686 687 688
	if eq.isEngineSyncing() {
		return BlockInsertTemporaryErr, fmt.Errorf("engine is in progess of p2p sync")
	}
689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712
	if eq.buildingID != (eth.PayloadID{}) {
		eq.log.Warn("did not finish previous block building, starting new building now", "prev_onto", eq.buildingOnto, "prev_payload_id", eq.buildingID, "new_onto", parent)
		// TODO: maybe worth it to force-cancel the old payload ID here.
	}
	fc := eth.ForkchoiceState{
		HeadBlockHash:      parent.Hash,
		SafeBlockHash:      eq.safeHead.Hash,
		FinalizedBlockHash: eq.finalized.Hash,
	}
	id, errTyp, err := StartPayload(ctx, eq.engine, fc, attrs)
	if err != nil {
		return errTyp, err
	}
	eq.buildingID = id
	eq.buildingSafe = updateSafe
	eq.buildingOnto = parent
	return BlockInsertOK, nil
}

func (eq *EngineQueue) ConfirmPayload(ctx context.Context) (out *eth.ExecutionPayload, errTyp BlockInsertionErrType, err error) {
	if eq.buildingID == (eth.PayloadID{}) {
		return nil, BlockInsertPrestateErr, fmt.Errorf("cannot complete payload building: not currently building a payload")
	}
	if eq.buildingOnto.Hash != eq.unsafeHead.Hash { // E.g. when safe-attributes consolidation fails, it will drop the existing work.
713
		eq.log.Warn("engine is building block that reorgs previous unsafe head", "onto", eq.buildingOnto, "unsafe", eq.unsafeHead)
714 715 716 717 718 719
	}
	fc := eth.ForkchoiceState{
		HeadBlockHash:      common.Hash{}, // gets overridden
		SafeBlockHash:      eq.safeHead.Hash,
		FinalizedBlockHash: eq.finalized.Hash,
	}
720 721 722
	// Update the safe head if the payload is built with the last attributes in the batch.
	updateSafe := eq.buildingSafe && eq.safeAttributes != nil && eq.safeAttributes.isLastInSpan
	payload, errTyp, err := ConfirmPayload(ctx, eq.log, eq.engine, fc, eq.buildingID, updateSafe)
723 724 725
	if err != nil {
		return nil, errTyp, fmt.Errorf("failed to complete building on top of L2 chain %s, id: %s, error (%d): %w", eq.buildingOnto, eq.buildingID, errTyp, err)
	}
protolambda's avatar
protolambda committed
726 727
	ref, err := PayloadToBlockRef(payload, &eq.cfg.Genesis)
	if err != nil {
728
		return nil, BlockInsertPayloadErr, NewResetError(fmt.Errorf("failed to decode L2 block ref from payload: %w", err))
protolambda's avatar
protolambda committed
729
	}
730

protolambda's avatar
protolambda committed
731
	eq.unsafeHead = ref
732
	eq.engineSyncTarget = ref
733
	eq.metrics.RecordL2Ref("l2_unsafe", ref)
734
	eq.metrics.RecordL2Ref("l2_engineSyncTarget", ref)
735

736
	if eq.buildingSafe {
737 738 739 740 741 742
		eq.pendingSafeHead = ref
		if updateSafe {
			eq.safeHead = ref
			eq.postProcessSafeL2()
			eq.metrics.RecordL2Ref("l2_safe", ref)
		}
743 744 745 746 747 748
	}
	eq.resetBuildingState()
	return payload, BlockInsertOK, nil
}

func (eq *EngineQueue) CancelPayload(ctx context.Context, force bool) error {
749 750 751
	if eq.buildingID == (eth.PayloadID{}) { // only cancel if there is something to cancel.
		return nil
	}
752 753 754 755 756 757 758 759 760 761
	// the building job gets wrapped up as soon as the payload is retrieved, there's no explicit cancel in the Engine API
	eq.log.Error("cancelling old block sealing job", "payload", eq.buildingID)
	_, err := eq.engine.GetPayload(ctx, eq.buildingID)
	if err != nil {
		eq.log.Error("failed to cancel block building job", "payload", eq.buildingID, "err", err)
		if !force {
			return err
		}
	}
	eq.resetBuildingState()
protolambda's avatar
protolambda committed
762 763 764
	return nil
}

765 766 767 768 769 770 771 772 773 774
func (eq *EngineQueue) BuildingPayload() (onto eth.L2BlockRef, id eth.PayloadID, safe bool) {
	return eq.buildingOnto, eq.buildingID, eq.buildingSafe
}

func (eq *EngineQueue) resetBuildingState() {
	eq.buildingID = eth.PayloadID{}
	eq.buildingOnto = eth.L2BlockRef{}
	eq.buildingSafe = false
}

pengin7384's avatar
pengin7384 committed
775
// Reset walks the L2 chain backwards until it finds an L2 block whose L1 origin is canonical.
protolambda's avatar
protolambda committed
776
// The unsafe head is set to the head of the L2 chain, unless the existing safe head is not canonical.
777
func (eq *EngineQueue) Reset(ctx context.Context, _ eth.L1BlockRef, _ eth.SystemConfig) error {
778
	result, err := sync.FindL2Heads(ctx, eq.cfg, eq.l1Fetcher, eq.engine, eq.log, eq.syncCfg)
779
	if err != nil {
780
		return NewTemporaryError(fmt.Errorf("failed to find the L2 Heads to start from: %w", err))
protolambda's avatar
protolambda committed
781
	}
protolambda's avatar
protolambda committed
782
	finalized, safe, unsafe := result.Finalized, result.Safe, result.Unsafe
783
	l1Origin, err := eq.l1Fetcher.L1BlockRefByHash(ctx, safe.L1Origin.Hash)
protolambda's avatar
protolambda committed
784
	if err != nil {
785
		return NewTemporaryError(fmt.Errorf("failed to fetch the new L1 progress: origin: %v; err: %w", safe.L1Origin, err))
protolambda's avatar
protolambda committed
786
	}
787
	if safe.Time < l1Origin.Time {
788 789
		return NewResetError(fmt.Errorf("cannot reset block derivation to start at L2 block %s with time %d older than its L1 origin %s with time %d, time invariant is broken",
			safe, safe.Time, l1Origin, l1Origin.Time))
790
	}
791

792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810
	// Walk back L2 chain to find the L1 origin that is old enough to start buffering channel data from.
	pipelineL2 := safe
	for {
		afterL2Genesis := pipelineL2.Number > eq.cfg.Genesis.L2.Number
		afterL1Genesis := pipelineL2.L1Origin.Number > eq.cfg.Genesis.L1.Number
		afterChannelTimeout := pipelineL2.L1Origin.Number+eq.cfg.ChannelTimeout > l1Origin.Number
		if afterL2Genesis && afterL1Genesis && afterChannelTimeout {
			parent, err := eq.engine.L2BlockRefByHash(ctx, pipelineL2.ParentHash)
			if err != nil {
				return NewResetError(fmt.Errorf("failed to fetch L2 parent block %s", pipelineL2.ParentID()))
			}
			pipelineL2 = parent
		} else {
			break
		}
	}
	pipelineOrigin, err := eq.l1Fetcher.L1BlockRefByHash(ctx, pipelineL2.L1Origin.Hash)
	if err != nil {
		return NewTemporaryError(fmt.Errorf("failed to fetch the new L1 progress: origin: %s; err: %w", pipelineL2.L1Origin, err))
811
	}
812
	l1Cfg, err := eq.engine.SystemConfigByL2Hash(ctx, pipelineL2.Hash)
813
	if err != nil {
Joshua Gutow's avatar
Joshua Gutow committed
814
		return NewTemporaryError(fmt.Errorf("failed to fetch L1 config of L2 block %s: %w", pipelineL2.ID(), err))
815
	}
816
	eq.log.Debug("Reset engine queue", "safeHead", safe, "unsafe", unsafe, "safe_timestamp", safe.Time, "unsafe_timestamp", unsafe.Time, "l1Origin", l1Origin)
817
	eq.unsafeHead = unsafe
818
	eq.engineSyncTarget = unsafe
819
	eq.safeHead = safe
820
	eq.pendingSafeHead = safe
Joshua Gutow's avatar
Joshua Gutow committed
821
	eq.safeAttributes = nil
822
	eq.finalized = finalized
823
	eq.resetBuildingState()
824
	eq.needForkchoiceUpdate = true
825
	eq.finalityData = eq.finalityData[:0]
826
	// note: finalizedL1 and triedFinalizeAt do not reset, since these do not change between reorgs.
827
	// note: we do not clear the unsafe payloads queue; if the payloads are not applicable anymore the parent hash checks will clear out the old payloads.
828
	eq.origin = pipelineOrigin
829
	eq.sysCfg = l1Cfg
830
	eq.metrics.RecordL2Ref("l2_finalized", finalized)
831 832
	eq.metrics.RecordL2Ref("l2_safe", safe)
	eq.metrics.RecordL2Ref("l2_unsafe", unsafe)
833
	eq.metrics.RecordL2Ref("l2_engineSyncTarget", unsafe)
834
	eq.logSyncProgress("reset derivation work")
835
	return io.EOF
protolambda's avatar
protolambda committed
836
}
clabby's avatar
clabby committed
837

838 839
// UnsafeL2SyncTarget retrieves the first queued-up L2 unsafe payload, or a zeroed reference if there is none.
func (eq *EngineQueue) UnsafeL2SyncTarget() eth.L2BlockRef {
clabby's avatar
clabby committed
840
	if first := eq.unsafePayloads.Peek(); first != nil {
841 842 843 844 845
		ref, err := PayloadToBlockRef(first, &eq.cfg.Genesis)
		if err != nil {
			return eth.L2BlockRef{}
		}
		return ref
clabby's avatar
clabby committed
846
	} else {
847
		return eth.L2BlockRef{}
clabby's avatar
clabby committed
848 849
	}
}