op-batcher: syncActions is aware of safe=unsafe edge case (#13292)

* add additional test case for computeSyncActions * fixup test * check for unsafe = safe edge case & replace oldestUnsafeBlock with nextSafeBlock * add test case for no progress and safe=unsafe * refine log * rename variable * harmonize log ordering and labels * tighten up test behaviour for expectedLogs * add test case: no progress + unsafe=safe + blocks in state and fix behaviour so we don't try to fetch unsafe blocks if there aren't any, even when there are blocks in state * typo

op-batcher: syncActions is aware of safe=unsafe edge case (#13292)
* add additional test case for computeSyncActions * fixup test * check for unsafe = safe edge case & replace oldestUnsafeBlock with nextSafeBlock * add test case for no progress and safe=unsafe * refine log * rename variable * harmonize log ordering and labels * tighten up test behaviour for expectedLogs * add test case: no progress + unsafe=safe + blocks in state and fix behaviour so we don't try to fetch unsafe blocks if there aren't any, even when there are blocks in state * typo
53034d28 · George Knee · GitHub · 2824b3b2 · 53034d28 · 53034d28
Commit 53034d28 authored Dec 09, 2024 by George Knee Committed by GitHub Dec 09, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 89 additions and 27 deletions

sync_actions.go op-batcher/batcher/sync_actions.go +29 -20

sync_actions_test.go op-batcher/batcher/sync_actions_test.go +60 -7

No files found.
--- a/op-batcher/batcher/sync_actions.go
+++ b/op-batcher/batcher/sync_actions.go
@@ -48,14 +48,17 @@ func computeSyncActions[T channelStatuser](newSyncStatus eth.SyncStatus, prevCur
 		return syncActions{}, true
 	}

+	var allUnsafeBlocks *inclusiveBlockRange
+	if newSyncStatus.UnsafeL2.Number > newSyncStatus.SafeL2.Number {
+		allUnsafeBlocks = &inclusiveBlockRange{newSyncStatus.SafeL2.Number + 1, newSyncStatus.UnsafeL2.Number}
+	}
+
 	// PART 2: checks involving only the oldest block in the state
 	oldestBlockInState, hasBlocks := blocks.Peek()
-	oldestUnsafeBlockNum := newSyncStatus.SafeL2.Number + 1
-	youngestUnsafeBlockNum := newSyncStatus.UnsafeL2.Number

 	if !hasBlocks {
 		s := syncActions{
-			blocksToLoad: &inclusiveBlockRange{oldestUnsafeBlockNum, youngestUnsafeBlockNum},
+			blocksToLoad: allUnsafeBlocks,
 		}
 		l.Info("no blocks in state", "syncActions", s)
 		return s, false
@@ -63,17 +66,21 @@ func computeSyncActions[T channelStatuser](newSyncStatus eth.SyncStatus, prevCur

 	// These actions apply in multiple unhappy scenarios below, where
 	// we detect that the existing state is invalidated
-	// and we need to start over from the sequencer's oldest
-	// unsafe (and not safe) block.
+	// and we need to start over, loading all unsafe blocks
+	// from the sequencer.
 	startAfresh := syncActions{
 		clearState:   &newSyncStatus.SafeL2.L1Origin,
-		blocksToLoad: &inclusiveBlockRange{oldestUnsafeBlockNum, youngestUnsafeBlockNum},
+		blocksToLoad: allUnsafeBlocks,
 	}

 	oldestBlockInStateNum := oldestBlockInState.NumberU64()
+	nextSafeBlockNum := newSyncStatus.SafeL2.Number + 1

-	if oldestUnsafeBlockNum < oldestBlockInStateNum {
-		l.Warn("oldest unsafe block is below oldest block in state", "syncActions", startAfresh, "oldestBlockInState", oldestBlockInState, "newSafeBlock", newSyncStatus.SafeL2)
+	if nextSafeBlockNum < oldestBlockInStateNum {
+		l.Warn("next safe block is below oldest block in state",
+			"syncActions", startAfresh,
+			"oldestBlockInState", oldestBlockInState,
+			"safeL2", newSyncStatus.SafeL2)
 		return startAfresh, false
 	}

@@ -81,25 +88,25 @@ func computeSyncActions[T channelStatuser](newSyncStatus eth.SyncStatus, prevCur
 	newestBlockInState := blocks[blocks.Len()-1]
 	newestBlockInStateNum := newestBlockInState.NumberU64()

-	numBlocksToDequeue := oldestUnsafeBlockNum - oldestBlockInStateNum
+	numBlocksToDequeue := nextSafeBlockNum - oldestBlockInStateNum

 	if numBlocksToDequeue > uint64(blocks.Len()) {
 		// This could happen if the batcher restarted.
 		// The sequencer may have derived the safe chain
 		// from channels sent by a previous batcher instance.
-		l.Warn("oldest unsafe block above newest block in state, clearing channel manager state",
-			"oldestUnsafeBlockNum", oldestUnsafeBlockNum,
+		l.Warn("safe head above newest block in state, clearing channel manager state",
+			"syncActions", startAfresh,
+			"safeL2", newSyncStatus.SafeL2,
 			"newestBlockInState", eth.ToBlockID(newestBlockInState),
-			"syncActions",
-			startAfresh)
+		)
 		return startAfresh, false
 	}

 	if numBlocksToDequeue > 0 && blocks[numBlocksToDequeue-1].Hash() != newSyncStatus.SafeL2.Hash {
 		l.Warn("safe chain reorg, clearing channel manager state",
+			"syncActions", startAfresh,
 			"existingBlock", eth.ToBlockID(blocks[numBlocksToDequeue-1]),
-			"newSafeBlock", newSyncStatus.SafeL2,
-			"syncActions", startAfresh)
+			"safeL2", newSyncStatus.SafeL2)
 		return startAfresh, false
 	}

@@ -114,9 +121,9 @@ func computeSyncActions[T channelStatuser](newSyncStatus eth.SyncStatus, prevCur
 			// that the derivation pipeline may have stalled
 			// e.g. because of Holocene strict ordering rules.
 			l.Warn("sequencer did not make expected progress",
+				"syncActions", startAfresh,
 				"existingBlock", ch.LatestL2(),
-				"newSafeBlock", newSyncStatus.SafeL2,
-				"syncActions", startAfresh)
+				"safeL2", newSyncStatus.SafeL2)
 			return startAfresh, false
 		}
 	}
@@ -132,12 +139,14 @@ func computeSyncActions[T channelStatuser](newSyncStatus eth.SyncStatus, prevCur
 		numChannelsToPrune++
 	}

-	start := newestBlockInStateNum + 1
-	end := youngestUnsafeBlockNum
+	var allUnsafeBlocksAboveState *inclusiveBlockRange
+	if newSyncStatus.UnsafeL2.Number > newestBlockInStateNum {
+		allUnsafeBlocksAboveState = &inclusiveBlockRange{newestBlockInStateNum + 1, newSyncStatus.UnsafeL2.Number}
+	}

 	return syncActions{
 		blocksToPrune:   int(numBlocksToDequeue),
 		channelsToPrune: numChannelsToPrune,
-		blocksToLoad:    &inclusiveBlockRange{start, end},
+		blocksToLoad:    allUnsafeBlocksAboveState,
 	}, false
 }
--- a/op-batcher/batcher/sync_actions_test.go
+++ b/op-batcher/batcher/sync_actions_test.go
@@ -55,6 +55,8 @@ func TestBatchSubmitter_computeSyncActions(t *testing.T) {
 		timedOut:       false,
 	}

+	happyCaseLogs := []string{} // in the happy case we expect no logs
+
 	type TestCase struct {
 		name string
 		// inputs
@@ -105,7 +107,7 @@ func TestBatchSubmitter_computeSyncActions(t *testing.T) {
 				clearState:   &eth.BlockID{Number: 1},
 				blocksToLoad: &inclusiveBlockRange{101, 109},
 			},
-			expectedLogs: []string{"oldest unsafe block is below oldest block in state"},
+			expectedLogs: []string{"next safe block is below oldest block in state"},
 		},
 		{name: "unexpectedly good progress",
 			// This can happen if another batcher instance got some blocks
@@ -123,7 +125,7 @@ func TestBatchSubmitter_computeSyncActions(t *testing.T) {
 				clearState:   &eth.BlockID{Number: 1},
 				blocksToLoad: &inclusiveBlockRange{105, 109},
 			},
-			expectedLogs: []string{"oldest unsafe block above newest block in state"},
+			expectedLogs: []string{"safe head above newest block in state"},
 		},
 		{name: "safe chain reorg",
 			// This can happen if there is an L1 reorg, the safe chain is at an acceptable
@@ -161,6 +163,23 @@ func TestBatchSubmitter_computeSyncActions(t *testing.T) {
 			},
 			expectedLogs: []string{"sequencer did not make expected progress"},
 		},
+		{name: "failed to make expected progress (unsafe=safe)",
+			// Edge case where unsafe = safe
+			newSyncStatus: eth.SyncStatus{
+				HeadL1:    eth.BlockRef{Number: 3},
+				CurrentL1: eth.BlockRef{Number: 2},
+				SafeL2:    eth.L2BlockRef{Number: 101, Hash: block101.Hash(), L1Origin: eth.BlockID{Number: 1}},
+				UnsafeL2:  eth.L2BlockRef{Number: 101},
+			},
+			prevCurrentL1: eth.BlockRef{Number: 1},
+			blocks:        queue.Queue[*types.Block]{block102, block103},
+			channels:      []channelStatuser{channel103},
+			expected: syncActions{
+				clearState: &eth.BlockID{Number: 1},
+				// no blocks to load since there are no unsafe blocks
+			},
+			expectedLogs: []string{"sequencer did not make expected progress"},
+		},
 		{name: "no progress",
 			// This can happen if we have a long channel duration
 			// and we didn't submit or have any txs confirmed since
@@ -192,6 +211,7 @@ func TestBatchSubmitter_computeSyncActions(t *testing.T) {
 			expected: syncActions{
 				blocksToLoad: &inclusiveBlockRange{104, 109},
 			},
+			expectedLogs: []string{"no blocks in state"},
 		},
 		{name: "happy path",
 			// This happens when the safe chain is being progressed as expected:
@@ -225,10 +245,39 @@ func TestBatchSubmitter_computeSyncActions(t *testing.T) {
 				channelsToPrune: 1,
 				blocksToLoad:    &inclusiveBlockRange{105, 109},
 			},
+			expectedLogs: happyCaseLogs,
+		},
+		{name: "no progress + unsafe=safe",
+			newSyncStatus: eth.SyncStatus{
+				HeadL1:    eth.BlockRef{Number: 5},
+				CurrentL1: eth.BlockRef{Number: 2},
+				SafeL2:    eth.L2BlockRef{Number: 100},
+				UnsafeL2:  eth.L2BlockRef{Number: 100},
+			},
+			prevCurrentL1: eth.BlockRef{Number: 1},
+			blocks:        queue.Queue[*types.Block]{},
+			channels:      []channelStatuser{},
+			expected:      syncActions{},
+			expectedLogs:  []string{"no blocks in state"},
+		},
+		{name: "no progress + unsafe=safe + blocks in state",
+			newSyncStatus: eth.SyncStatus{
+				HeadL1:    eth.BlockRef{Number: 5},
+				CurrentL1: eth.BlockRef{Number: 2},
+				SafeL2:    eth.L2BlockRef{Number: 101, Hash: block101.Hash()},
+				UnsafeL2:  eth.L2BlockRef{Number: 101},
+			},
+			prevCurrentL1: eth.BlockRef{Number: 1},
+			blocks:        queue.Queue[*types.Block]{block101},
+			channels:      []channelStatuser{},
+			expected: syncActions{
+				blocksToPrune: 1,
+			},
+			expectedLogs: happyCaseLogs,
 		},
 	}

-	for _, tc := range testCases {
+	for _, tc := range testCases[len(testCases)-1:] {

 		t.Run(tc.name, func(t *testing.T) {
 			l, h := testlog.CaptureLogger(t, log.LevelDebug)
@@ -237,11 +286,15 @@ func TestBatchSubmitter_computeSyncActions(t *testing.T) {
 				tc.newSyncStatus, tc.prevCurrentL1, tc.blocks, tc.channels, l,
 			)

-			require.Equal(t, tc.expected, result)
+			require.Equal(t, tc.expected, result, "unexpected actions")
 			require.Equal(t, tc.expectedSeqOutOfSync, outOfSync)
-			for _, e := range tc.expectedLogs {
-				r := h.FindLog(testlog.NewMessageContainsFilter(e))
-				require.NotNil(t, r, "could not find log message containing '%s'", e)
+			if tc.expectedLogs == nil {
+				require.Empty(t, h.Logs, "expected no logs but found some", "logs", h.Logs)
+			} else {
+				for _, e := range tc.expectedLogs {
+					r := h.FindLog(testlog.NewMessageContainsFilter(e))
+					require.NotNil(t, r, "could not find log message containing '%s'", e)
+				}
 			}
 		})
 	}