From 68dcaf412f1b9a3037c5d34fc2ee5f33a36cf033 Mon Sep 17 00:00:00 2001
From: Tei Im <tei.im@testinprod.io>
Date: Wed, 28 Jun 2023 19:46:54 +0900
Subject: [PATCH] op-node: Fix reorg depth check, Add skip-sanity-check flag

---
 op-node/flags/flags.go                |  8 +++++++
 op-node/rollup/derive/engine_queue.go |  2 +-
 op-node/rollup/sync/config.go         |  2 ++
 op-node/rollup/sync/start.go          | 23 +++++++++++--------
 op-node/rollup/sync/start_test.go     | 33 ++++++++++++++++++++++++++-
 op-node/service.go                    |  1 +
 6 files changed, 58 insertions(+), 11 deletions(-)

diff --git a/op-node/flags/flags.go b/op-node/flags/flags.go
index 1f8c9c9e1..64df1c9fd 100644
--- a/op-node/flags/flags.go
+++ b/op-node/flags/flags.go
@@ -221,6 +221,13 @@ var (
 		Required: false,
 		Value:    false,
 	}
+	SkipSanityCheck = &cli.BoolFlag{
+		Name:     "skip-sanity-check",
+		Usage:    "Skip chain sanity check on pipeline reset",
+		EnvVars:  prefixEnvVars("SKIP_SANITY_CHECK"),
+		Required: false,
+		Value:    false,
+	}
 )
 
 var requiredFlags = []cli.Flag{
@@ -260,6 +267,7 @@ var optionalFlags = []cli.Flag{
 	BackupL2UnsafeSyncRPC,
 	BackupL2UnsafeSyncRPCTrustRPC,
 	L2EngineP2PEnabled,
+	SkipSanityCheck,
 }
 
 // Flags contains the list of configuration options available to the binary.
diff --git a/op-node/rollup/derive/engine_queue.go b/op-node/rollup/derive/engine_queue.go
index 915e440d1..523ebe0b0 100644
--- a/op-node/rollup/derive/engine_queue.go
+++ b/op-node/rollup/derive/engine_queue.go
@@ -749,7 +749,7 @@ func (eq *EngineQueue) resetBuildingState() {
 // Reset walks the L2 chain backwards until it finds an L2 block whose L1 origin is canonical.
 // The unsafe head is set to the head of the L2 chain, unless the existing safe head is not canonical.
 func (eq *EngineQueue) Reset(ctx context.Context, _ eth.L1BlockRef, _ eth.SystemConfig) error {
-	result, err := sync.FindL2Heads(ctx, eq.cfg, eq.l1Fetcher, eq.engine, eq.log)
+	result, err := sync.FindL2Heads(ctx, eq.cfg, eq.l1Fetcher, eq.engine, eq.log, eq.syncCfg)
 	if err != nil {
 		return NewTemporaryError(fmt.Errorf("failed to find the L2 Heads to start from: %w", err))
 	}
diff --git a/op-node/rollup/sync/config.go b/op-node/rollup/sync/config.go
index 2441b00d9..4b83170b8 100644
--- a/op-node/rollup/sync/config.go
+++ b/op-node/rollup/sync/config.go
@@ -3,4 +3,6 @@ package sync
 type Config struct {
 	// EngineP2PEnabled is true when the EngineQueue can trigger execution engine P2P sync.
 	EngineP2PEnabled bool `json:"engine_p2p_enabled"`
+	// SkipSanityCheck is true when the EngineQueue does not do sanity check on pipeline reset.
+	SkipSanityCheck bool `json:"skip_sanity_check"`
 }
diff --git a/op-node/rollup/sync/start.go b/op-node/rollup/sync/start.go
index 91118222d..38e2cae5a 100644
--- a/op-node/rollup/sync/start.go
+++ b/op-node/rollup/sync/start.go
@@ -102,7 +102,7 @@ func currentHeads(ctx context.Context, cfg *rollup.Config, l2 L2Chain) (*FindHea
 // Plausible: meaning that the blockhash of the L2 block's L1 origin
 // (as reported in the L1 Attributes deposit within the L2 block) is not canonical at another height in the L1 chain,
 // and the same holds for all its ancestors.
-func FindL2Heads(ctx context.Context, cfg *rollup.Config, l1 L1Chain, l2 L2Chain, lgr log.Logger) (result *FindHeadsResult, err error) {
+func FindL2Heads(ctx context.Context, cfg *rollup.Config, l1 L1Chain, l2 L2Chain, lgr log.Logger, syncCfg *Config) (result *FindHeadsResult, err error) {
 	// Fetch current L2 forkchoice state
 	result, err = currentHeads(ctx, cfg, l2)
 	if err != nil {
@@ -170,18 +170,18 @@ func FindL2Heads(ctx context.Context, cfg *rollup.Config, l1 L1Chain, l2 L2Chain
 		if (n.Number == result.Finalized.Number) && (n.Hash != result.Finalized.Hash) {
 			return nil, fmt.Errorf("%w: finalized %s, got: %s", ReorgFinalizedErr, result.Finalized, n)
 		}
-		// Check we are not reorging L2 incredibly deep
-		if n.L1Origin.Number+(MaxReorgSeqWindows*cfg.SeqWindowSize) < prevUnsafe.L1Origin.Number {
-			// If the reorg depth is too large, something is fishy.
-			// This can legitimately happen if L1 goes down for a while. But in that case,
-			// restarting the L2 node with a bigger configured MaxReorgDepth is an acceptable
-			// stopgap solution.
-			return nil, fmt.Errorf("%w: traversed back to L2 block %s, but too deep compared to previous unsafe block %s", TooDeepReorgErr, n, prevUnsafe)
-		}
 
 		// If we don't have a usable unsafe head, then set it
 		if result.Unsafe == (eth.L2BlockRef{}) {
 			result.Unsafe = n
+			// Check we are not reorging L2 incredibly deep
+			if n.L1Origin.Number+(MaxReorgSeqWindows*cfg.SeqWindowSize) < prevUnsafe.L1Origin.Number {
+				// If the reorg depth is too large, something is fishy.
+				// This can legitimately happen if L1 goes down for a while. But in that case,
+				// restarting the L2 node with a bigger configured MaxReorgDepth is an acceptable
+				// stopgap solution.
+				return nil, fmt.Errorf("%w: traversed back to L2 block %s, but too deep compared to previous unsafe block %s", TooDeepReorgErr, n, prevUnsafe)
+			}
 		}
 
 		if ahead {
@@ -212,6 +212,11 @@ func FindL2Heads(ctx context.Context, cfg *rollup.Config, l1 L1Chain, l2 L2Chain
 			return result, nil
 		}
 
+		if syncCfg.SkipSanityCheck && highestL2WithCanonicalL1Origin.Hash == n.Hash {
+			lgr.Info("Found highest L2 block with canonical L1 origin. Skip further sanity check and jump to the safe head")
+			n = result.Safe
+			continue
+		}
 		// Pull L2 parent for next iteration
 		parent, err := l2.L2BlockRefByHash(ctx, n.ParentHash)
 		if err != nil {
diff --git a/op-node/rollup/sync/start_test.go b/op-node/rollup/sync/start_test.go
index 376ce7767..1b820d22e 100644
--- a/op-node/rollup/sync/start_test.go
+++ b/op-node/rollup/sync/start_test.go
@@ -76,7 +76,7 @@ func (c *syncStartTestCase) Run(t *testing.T) {
 	}
 	lgr := log.New()
 	lgr.SetHandler(log.DiscardHandler())
-	result, err := FindL2Heads(context.Background(), cfg, chain, chain, lgr)
+	result, err := FindL2Heads(context.Background(), cfg, chain, chain, lgr, &Config{})
 	if c.ExpectedErr != nil {
 		require.ErrorIs(t, err, c.ExpectedErr, "expected error")
 		return
@@ -286,6 +286,37 @@ func TestFindSyncStart(t *testing.T) {
 			SafeL2Head:     'D',
 			ExpectedErr:    WrongChainErr,
 		},
+		{
+			// FindL2Heads() keeps walking back to safe head after finding canonical unsafe head
+			// TooDeepReorgErr must not be raised
+			Name:           "long traverse to safe head",
+			GenesisL1Num:   0,
+			L1:             "abcdefgh",
+			L2:             "ABCDEFGH",
+			NewL1:          "abcdefgx",
+			PreFinalizedL2: 'B',
+			PreSafeL2:      'B',
+			GenesisL1:      'a',
+			GenesisL2:      'A',
+			UnsafeL2Head:   'G',
+			SeqWindowSize:  1,
+			SafeL2Head:     'B',
+			ExpectedErr:    nil,
+		},
+		{
+			// L2 reorg is too deep
+			Name:           "reorg too deep",
+			GenesisL1Num:   0,
+			L1:             "abcdefgh",
+			L2:             "ABCDEFGH",
+			NewL1:          "abijklmn",
+			PreFinalizedL2: 'B',
+			PreSafeL2:      'B',
+			GenesisL1:      'a',
+			GenesisL2:      'A',
+			SeqWindowSize:  1,
+			ExpectedErr:    TooDeepReorgErr,
+		},
 	}
 
 	for _, testCase := range testCases {
diff --git a/op-node/service.go b/op-node/service.go
index 352b6b498..90d2f7b88 100644
--- a/op-node/service.go
+++ b/op-node/service.go
@@ -216,5 +216,6 @@ func NewSnapshotLogger(ctx *cli.Context) (log.Logger, error) {
 func NewSyncConfig(ctx *cli.Context) *sync.Config {
 	return &sync.Config{
 		EngineP2PEnabled: ctx.Bool(flags.L2EngineP2PEnabled.Name),
+		SkipSanityCheck:  ctx.Bool(flags.SkipSanityCheck.Name),
 	}
 }
-- 
2.23.0