op-node/specs: refactor batch queue, add parent_hash to batches, extend testing (#3221)

* op-node,specs: batches track parent-hash, refactor+test batch queue, update specs * op-node,specs: implement review suggestions * op-node,specs: implement suggestions/fixes based on review from mark Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

op-node/specs: refactor batch queue, add parent_hash to batches, extend testing (#3221)
* op-node,specs: batches track parent-hash, refactor+test batch queue, update specs * op-node,specs: implement review suggestions * op-node,specs: implement suggestions/fixes based on review from mark Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
c3493a02 · protolambda · GitHub · 63373dc8 · c3493a02 · c3493a02
Commit c3493a02 authored Aug 24, 2022 by protolambda Committed by GitHub Aug 24, 2022
12 changed files
--- a/op-node/rollup/derive/attributes_queue.go
+++ b/op-node/rollup/derive/attributes_queue.go
@@ -2,6 +2,7 @@ package derive

 import (
 	"context"
+	"fmt"
 	"io"
 	"time"

@@ -63,9 +64,14 @@ func (aq *AttributesQueue) Step(ctx context.Context, outer Progress) error {
 	}
 	batch := aq.batches[0]

+	safeL2Head := aq.next.SafeL2Head()
+	// sanity check parent hash
+	if batch.ParentHash != safeL2Head.Hash {
+		return NewCriticalError(fmt.Errorf("valid batch has bad parent hash %s, expected %s", batch.ParentHash, safeL2Head.Hash))
+	}
 	fetchCtx, cancel := context.WithTimeout(ctx, 20*time.Second)
 	defer cancel()
-	attrs, err := PreparePayloadAttributes(fetchCtx, aq.config, aq.dl, aq.next.SafeL2Head(), batch.Timestamp, batch.Epoch())
+	attrs, err := PreparePayloadAttributes(fetchCtx, aq.config, aq.dl, safeL2Head, batch.Timestamp, batch.Epoch())
 	if err != nil {
 		return err
 	}

--- a/op-node/rollup/derive/attributes_queue_test.go
+++ b/op-node/rollup/derive/attributes_queue_test.go
@@ -68,6 +68,7 @@ func TestAttributesQueue_Step(t *testing.T) {
 	out.ExpectSafeL2Head(safeHead)

 	batch := &BatchData{BatchV1{
+		ParentHash:   safeHead.Hash,
 		EpochNum:     rollup.Epoch(l1Info.InfoNum),
 		EpochHash:    l1Info.InfoHash,
 		Timestamp:    safeHead.Time + cfg.BlockTime,

--- a/op-node/rollup/derive/batch.go
+++ b/op-node/rollup/derive/batch.go
@@ -35,9 +35,10 @@ const (
 )

 type BatchV1 struct {
-	EpochNum  rollup.Epoch // aka l1 num
-	EpochHash common.Hash  // block hash
-	Timestamp uint64
+	ParentHash common.Hash  // parent L2 block hash
+	EpochNum   rollup.Epoch // aka l1 num
+	EpochHash  common.Hash  // block hash
+	Timestamp  uint64
 	// no feeRecipient address input, all fees go to a L2 contract
 	Transactions []hexutil.Bytes
 }

--- a/op-node/rollup/derive/batch_queue.go
+++ b/op-node/rollup/derive/batch_queue.go
--- a/op-node/rollup/derive/batch_queue_test.go
+++ b/op-node/rollup/derive/batch_queue_test.go
--- a/op-node/rollup/derive/batch_test.go
+++ b/op-node/rollup/derive/batch_test.go
@@ -3,6 +3,8 @@ package derive
 import (
 	"testing"

+	"github.com/ethereum/go-ethereum/common"
+
 	"github.com/ethereum/go-ethereum/common/hexutil"
 	"github.com/stretchr/testify/assert"
 )
@@ -11,6 +13,7 @@ func TestBatchRoundTrip(t *testing.T) {
 	batches := []*BatchData{
 		{
 			BatchV1: BatchV1{
+				ParentHash:   common.Hash{},
 				EpochNum:     0,
 				Timestamp:    0,
 				Transactions: []hexutil.Bytes{},
@@ -18,6 +21,7 @@ func TestBatchRoundTrip(t *testing.T) {
 		},
 		{
 			BatchV1: BatchV1{
+				ParentHash:   common.Hash{31: 0x42},
 				EpochNum:     1,
 				Timestamp:    1647026951,
 				Transactions: []hexutil.Bytes{[]byte{0, 0, 0}, []byte{0x76, 0xfd, 0x7c}},

--- a/op-node/rollup/derive/batches.go
+++ b/op-node/rollup/derive/batches.go
 package derive

 import (
-	"errors"
-	"fmt"
-
 	"github.com/ethereum-optimism/optimism/op-node/eth"
 	"github.com/ethereum-optimism/optimism/op-node/rollup"
 	"github.com/ethereum/go-ethereum/core/types"
 	"github.com/ethereum/go-ethereum/log"
 )

-var DifferentEpoch = errors.New("batch is of different epoch")
-
-func FilterBatches(log log.Logger, config *rollup.Config, epoch eth.BlockID, minL2Time uint64, maxL2Time uint64, batches []*BatchData) (out []*BatchData) {
-	uniqueTime := make(map[uint64]struct{})
-	for _, batch := range batches {
-		if err := ValidBatch(batch, config, epoch, minL2Time, maxL2Time); err != nil {
-			if err == DifferentEpoch {
-				log.Trace("ignoring batch of different epoch", "expected_epoch", epoch,
-					"epoch", batch.Epoch(), "timestamp", batch.Timestamp, "txs", len(batch.Transactions))
-			} else {
-				log.Warn("filtered batch", "expected_epoch", epoch, "min", minL2Time, "max", maxL2Time,
-					"epoch", batch.Epoch(), "timestamp", batch.Timestamp, "txs", len(batch.Transactions), "err", err)
-			}
-			continue
-		}
-		// Check if we have already seen a batch for this L2 block
-		if _, ok := uniqueTime[batch.Timestamp]; ok {
-			log.Warn("duplicate batch", "epoch", batch.Epoch(), "timestamp", batch.Timestamp, "txs", len(batch.Transactions))
-			// block already exists, batch is duplicate (first batch persists, others are ignored)
-			continue
-		}
-		uniqueTime[batch.Timestamp] = struct{}{}
-		out = append(out, batch)
-	}
-	return
+type BatchWithL1InclusionBlock struct {
+	L1InclusionBlock eth.L1BlockRef
+	Batch            *BatchData
 }

-func ValidBatch(batch *BatchData, config *rollup.Config, epoch eth.BlockID, minL2Time uint64, maxL2Time uint64) error {
-	if batch.EpochNum != rollup.Epoch(epoch.Number) {
-		// Batch was tagged for past or future epoch,
-		// i.e. it was included too late or depends on the given L1 block to be processed first.
-		// This is a very common error, batches may just be buffered for a later epoch.
-		return DifferentEpoch
+type BatchValidity uint8
+
+const (
+	// BatchDrop indicates that the batch is invalid, and will always be in the future, unless we reorg
+	BatchDrop = iota
+	// BatchAccept indicates that the batch is valid and should be processed
+	BatchAccept
+	// BatchUndecided indicates we are lacking L1 information until we can proceed batch filtering
+	BatchUndecided
+	// BatchFuture indicates that the batch may be valid, but cannot be processed yet and should be checked again later
+	BatchFuture
+)
+
+// CheckBatch checks if the given batch can be applied on top of the given l2SafeHead, given the contextual L1 blocks the batch was included in.
+// The first entry of the l1Blocks should match the origin of the l2SafeHead. One or more consecutive l1Blocks should be provided.
+// In case of only a single L1 block, the decision whether a batch is valid may have to stay undecided.
+func CheckBatch(cfg *rollup.Config, log log.Logger, l1Blocks []eth.L1BlockRef, l2SafeHead eth.L2BlockRef, batch *BatchWithL1InclusionBlock) BatchValidity {
+	// add details to the log
+	log = log.New(
+		"batch_timestamp", batch.Batch.Timestamp,
+		"parent_hash", batch.Batch.ParentHash,
+		"batch_epoch", batch.Batch.Epoch(),
+		"txs", len(batch.Batch.Transactions),
+	)
+
+	// sanity check we have consistent inputs
+	if len(l1Blocks) == 0 {
+		log.Warn("missing L1 block input, cannot proceed with batch checking")
+		return BatchUndecided
 	}
-	if batch.EpochHash != epoch.Hash {
-		return fmt.Errorf("batch was meant for alternative L1 chain")
+	epoch := l1Blocks[0]
+	if epoch.Hash != l2SafeHead.L1Origin.Hash {
+		log.Warn("safe L2 head L1 origin does not match batch first l1 block (current epoch)",
+			"safe_l2", l2SafeHead, "safe_origin", l2SafeHead.L1Origin, "epoch", epoch)
+		return BatchUndecided
 	}
-	if (batch.Timestamp-config.Genesis.L2Time)%config.BlockTime != 0 {
-		return fmt.Errorf("bad timestamp %d, not a multiple of the block time", batch.Timestamp)
+
+	nextTimestamp := l2SafeHead.Time + cfg.BlockTime
+	if batch.Batch.Timestamp > nextTimestamp {
+		log.Trace("received out-of-order batch for future processing after next batch", "next_timestamp", nextTimestamp)
+		return BatchFuture
 	}
-	if batch.Timestamp < minL2Time {
-		return fmt.Errorf("old batch: %d < %d", batch.Timestamp, minL2Time)
+	if batch.Batch.Timestamp < nextTimestamp {
+		log.Warn("dropping batch with old timestamp", "min_timestamp", nextTimestamp)
+		return BatchDrop
 	}
-	// limit timestamp upper bound to avoid huge amount of empty blocks
-	if batch.Timestamp >= maxL2Time {
-		return fmt.Errorf("batch too far into future: %d > %d", batch.Timestamp, maxL2Time)
+
+	// dependent on above timestamp check. If the timestamp is correct, then it must build on top of the safe head.
+	if batch.Batch.ParentHash != l2SafeHead.Hash {
+		log.Warn("ignoring batch with mismatching parent hash", "current_safe_head", l2SafeHead.Hash)
+		return BatchDrop
 	}
-	for i, txBytes := range batch.Transactions {
-		if len(txBytes) == 0 {
-			return fmt.Errorf("transaction data must not be empty, but tx %d is empty", i)
-		}
-		if txBytes[0] == types.DepositTxType {
-			return fmt.Errorf("sequencers may not embed any deposits into batch data, but tx %d has one", i)
+
+	// Filter out batches that were included too late.
+	if uint64(batch.Batch.EpochNum)+cfg.SeqWindowSize < batch.L1InclusionBlock.Number {
+		log.Warn("batch was included too late, sequence window expired")
+		return BatchDrop
+	}
+
+	// Check the L1 origin of the batch
+	batchOrigin := epoch
+	if uint64(batch.Batch.EpochNum) < epoch.Number {
+		log.Warn("dropped batch, epoch is too old", "minimum", epoch.ID())
+		// batch epoch too old
+		return BatchDrop
+	} else if uint64(batch.Batch.EpochNum) == epoch.Number {
+		// Batch is sticking to the current epoch, continue.
+	} else if uint64(batch.Batch.EpochNum) == epoch.Number+1 {
+		// With only 1 l1Block we cannot look at the next L1 Origin.
+		// Note: This means that we are unable to determine validity of a batch
+		// without more information. In this case we should bail out until we have
+		// more information otherwise the eager algorithm may diverge from a non-eager
+		// algorithm.
+		if len(l1Blocks) < 2 {
+			log.Info("eager batch wants to advance epoch, but could not without more L1 blocks", "current_epoch", epoch.ID())
+			return BatchUndecided
 		}
+		batchOrigin = l1Blocks[1]
+	} else {
+		log.Warn("batch is for future epoch too far ahead, while it has the next timestamp, so it must be invalid", "current_epoch", epoch.ID())
+		return BatchDrop
 	}
-	return nil
-}

-// FillMissingBatches turns a collection of batches to the input batches for a series of blocks
-func FillMissingBatches(batches []*BatchData, epoch eth.BlockID, blockTime, minL2Time, nextL1Time uint64) []*BatchData {
-	m := make(map[uint64]*BatchData)
-	// The number of L2 blocks per sequencing window is variable, we do not immediately fill to maxL2Time:
-	// - ensure at least 1 block
-	// - fill up to the next L1 block timestamp, if higher, to keep up with L1 time
-	// - fill up to the last valid batch, to keep up with L2 time
-	newHeadL2Timestamp := minL2Time
-	if nextL1Time > newHeadL2Timestamp+1 {
-		newHeadL2Timestamp = nextL1Time - 1
+	if batch.Batch.EpochHash != batchOrigin.Hash {
+		log.Warn("batch is for different L1 chain, epoch hash does not match", "expected", batchOrigin.ID())
+		return BatchDrop
 	}
-	for _, b := range batches {
-		m[b.Timestamp] = b
-		if b.Timestamp > newHeadL2Timestamp {
-			newHeadL2Timestamp = b.Timestamp
-		}
+
+	// If we ran out of sequencer time drift, then we drop the batch and produce an empty batch instead,
+	// as the sequencer is not allowed to include anything past this point without moving to the next epoch.
+	if max := batchOrigin.Time + cfg.MaxSequencerDrift; batch.Batch.Timestamp > max {
+		log.Warn("batch exceeded sequencer time drift, sequencer must adopt new L1 origin to include transactions again", "max_time", max)
+		return BatchDrop
 	}
-	var out []*BatchData
-	for t := minL2Time; t <= newHeadL2Timestamp; t += blockTime {
-		b, ok := m[t]
-		if ok {
-			out = append(out, b)
-		} else {
-			out = append(out,
-				&BatchData{
-					BatchV1{
-						EpochNum:  rollup.Epoch(epoch.Number),
-						EpochHash: epoch.Hash,
-						Timestamp: t,
-					},
-				})
-		}

+	// We can do this check earlier, but it's a more intensive one, so we do this last.
+	for i, txBytes := range batch.Batch.Transactions {
+		if len(txBytes) == 0 {
+			log.Warn("transaction data must not be empty, but found empty tx", "tx_index", i)
+			return BatchDrop
+		}
+		if txBytes[0] == types.DepositTxType {
+			log.Warn("sequencers may not embed any deposits into batch data, but found tx that has one", "tx_index", i)
+			return BatchDrop
+		}
 	}
-	return out
+
+	return BatchAccept
 }
--- a/op-node/rollup/derive/batches_test.go
+++ b/op-node/rollup/derive/batches_test.go
--- a/op-node/rollup/derive/channel_out.go
+++ b/op-node/rollup/derive/channel_out.go
@@ -162,6 +162,7 @@ func blockToBatch(block *types.Block, w io.Writer) error {
 	}

 	batch := &BatchData{BatchV1{
+		ParentHash:   block.ParentHash(),
 		EpochNum:     rollup.Epoch(l1Info.Number),
 		EpochHash:    l1Info.BlockHash,
 		Timestamp:    block.Time(),

--- a/op-node/rollup/derive/pipeline.go
+++ b/op-node/rollup/derive/pipeline.go
@@ -77,7 +77,7 @@ type DerivationPipeline struct {
 func NewDerivationPipeline(log log.Logger, cfg *rollup.Config, l1Fetcher L1Fetcher, engine Engine) *DerivationPipeline {
 	eng := NewEngineQueue(log, cfg, engine)
 	attributesQueue := NewAttributesQueue(log, cfg, l1Fetcher, eng)
-	batchQueue := NewBatchQueue(log, cfg, l1Fetcher, attributesQueue)
+	batchQueue := NewBatchQueue(log, cfg, attributesQueue)
 	chInReader := NewChannelInReader(log, batchQueue)
 	bank := NewChannelBank(log, cfg, chInReader)
 	dataSrc := NewCalldataSource(log, cfg, l1Fetcher)

--- a/op-node/rollup/types.go
+++ b/op-node/rollup/types.go
@@ -30,7 +30,7 @@ type Config struct {
 	// Note: When L1 has many 1 second consecutive blocks, and L2 grows at fixed 2 seconds,
 	// the L2 time may still grow beyond this difference.
 	MaxSequencerDrift uint64 `json:"max_sequencer_drift"`
-	// Number of epochs (L1 blocks) per sequencing window
+	// Number of epochs (L1 blocks) per sequencing window, including the epoch L1 origin block itself
 	SeqWindowSize uint64 `json:"seq_window_size"`
 	// Number of seconds (w.r.t. L1 time) that a frame can be valid when included in L1
 	ChannelTimeout uint64 `json:"channel_timeout"`

--- a/specs/derivation.md
+++ b/specs/derivation.md
@@ -390,16 +390,18 @@ contain.

 Recall that a batch contains a list of transactions to be included in a specific L2 block.

-A batch is encoded as `batch_version ++ content`, where `content` depends on the version:
+A batch is encoded as `batch_version ++ content`, where `content` depends on the `batch_version`:

-| `batch_version` | `content`                                                             |
-| --------------- | --------------------------------------------------------------------- |
-| 0               | `rlp_encode([epoch_number, epoch_hash, timestamp, transaction_list])` |
+| `batch_version` | `content`                                                                          |
+| --------------- |------------------------------------------------------------------------------------|
+| 0               | `rlp_encode([parent_hash, epoch_number, epoch_hash, timestamp, transaction_list])` |

 where:

+- `batch_version` is a single byte, prefixed before the RLP contents, alike to transaction typing.
 - `rlp_encode` is a function that encodes a batch according to the [RLP format], and `[x, y, z]` denotes a list
  containing items `x`, `y` and `z`
+- `parent_hash` is the block hash of the previous L2 block
 - `epoch_number` and `epoch_hash` are the number and hash of the L1 block corresponding to the [sequencing
  epoch][g-sequencing-epoch] of the L2 block
 - `timestamp` is the timestamp of the L2 block
@@ -552,28 +554,67 @@ Note that the presence of any gaps in the batches derived from L1 means that thi
 [sequencing window][g-sequencing-window] before it can generate empty batches (because the missing batch(es) could have
 data in the last L1 block of the window in the worst case).

-We also ignore invalid batches, which do not satisfy one of the following constraints:
-
- The timestamp is aligned to the [block time][g-block-time]:
-  `(batch.timestamp - genesis_l2_timestamp) % block_time == 0`
- The timestamp is within the allowed range: `min_l2_timestamp <= batch.timestamp < max_l2_timestamp`, where
-  - all these values are denominated in seconds
-  - `min_l2_timestamp = prev_l2_timestamp + l2_block_time`
-    - `prev_l2_timestamp` is the timestamp of the previous L2 block: the last block of the previous epoch,
-      or the L2 genesis block timestamp if there is no previous epoch.
-    - `l2_block_time` is a configurable parameter of the time between L2 blocks (on Optimism, 2s)
-  - `max_l2_timestamp = max(l1_timestamp + max_sequencer_drift, min_l2_timestamp + l2_block_time)`
-    - `l1_timestamp` is the timestamp of the L1 block associated with the L2 block's epoch
-    - `max_sequencer_drift` is the maximum amount of time an L2 block's timestamp is allowed to get ahead of the
-       timestamp of its [L1 origin][g-l1-origin]
-  - Note that we always have `min_l2_timestamp >= l1_timestamp`, i.e. a L2 block timestamp is always equal or ahead of
-    the timestamp of its [L1 origin][g-l1-origin].
- The batch is the first batch with `batch.timestamp` in this sequencing window, i.e. one batch per L2 block number.
- The batch only contains sequenced transactions, i.e. it must NOT contain any [deposited-type transactions][
-  g-deposit-tx-type].
-
-> **TODO** specify `max_sequencer_drift` (see TODO above) (current thinking: on the order of 10 minutes, we've been
-> using 2-4 minutes in testnets)
+A batch can have 4 different forms of validity:
+
+- `drop`: the batch is invalid, and will always be in the future, unless we reorg. It can be removed from the buffer.
+- `accept`: the batch is valid and should be processed.
+- `undecided`: we are lacking L1 information until we can proceed batch filtering.
+- `future`: the batch may be valid, but cannot be processed yet and should be checked again later.
+
+The batches are processed in order of the inclusion on L1: if multiple batches can be `accept`-ed the first is applied.
+
+The batches validity is derived as follows:
+
+Definitions:
+
+- `batch` as defined in the [Batch format section][batch-format].
+- `epoch = safe_l2_head.l1_origin` a [L1 origin][g-l1-origin] coupled to the batch, with properties:
+  `number` (L1 block number), `hash` (L1 block hash), and `timestamp` (L1 block timestamp).
+- `inclusion_block_number` is the L1 block number when `batch` was first *fully* derived,
+   i.e. decoded and output by the previous stage.
+- `next_timestamp = safe_l2_head.timestamp + block_time` is the expected L2 timestamp the next batch should have,
+  see [block time information][g-block-time].
+- `next_epoch` may not be known yet, but would be the L1 block after `epoch` if available.
+- `batch_origin` is either `epoch` or `next_epoch`, depending on validation.
+
+Note that processing of a batch can be deferred until `batch.timestamp <= next_timestamp`,
+since `future` batches will have to be retained anyway.
+
+Rules, in validation order:
+
+- `batch.timestamp > next_timestamp` -> `future`: i.e. the batch must be ready to process.
+- `batch.timestamp < next_timestamp` -> `drop`: i.e. the batch must not be too old.
+- `batch.parent_hash != safe_l2_head.hash` -> `drop`: i.e. the parent hash must be equal to the L2 safe head block hash.
+- `batch.epoch_num + sequence_window_size < inclusion_block_number` -> `drop`: i.e. the batch must be included timely.
+- `batch.epoch_num < epoch.number` -> `drop`: i.e. the batch origin is not older than that of the L2 safe head.
+- `batch.epoch_num == epoch.number`: define `batch_origin` as `epoch`.
+- `batch.epoch_num == epoch.number+1`:
+  - If `next_epoch` is not known -> `undecided`:
+    i.e. a batch that changes the L1 origin cannot be processed until we have the L1 origin data.
+  - If known, then define `batch_origin` as `next_epoch`
+- `batch.epoch_num > epoch.number+1` -> `drop`: i.e. the L1 origin cannot change by more than one L1 block per L2 block.
+- `batch.epoch_hash != batch_origin.hash` -> `drop`: i.e. a batch must reference a canonical L1 origin,
+  to prevent batches from being replayed onto unexpected L1 chains.
+- `batch.timestamp > batch_origin.time + max_sequencer_drift` -> `drop`: i.e. a batch that does not adopt the next L1
+  within time will be dropped, in favor of an empty batch that can advance the L1 origin.
+- `batch.transactions`: `drop` if the `batch.transactions` list contains a transaction
+  that is invalid or derived by other means exclusively:
+  - any transaction that is empty (zero length byte string)
+  - any [deposited transactions][g-deposit-tx-type] (identified by the transaction type prefix byte)
+
+If no batch can be `accept`-ed, and the stage has completed buffering of all batches that can fully be read from the L1
+block at height `epoch.number + sequence_window_size`, and the `next_epoch` is available,
+then an empty batch can be derived with the following properties:
+
+- `parent_hash = safe_l2_head.hash`
+- `timestamp = next_timestamp`
+- `transactions` is empty, i.e. no sequencer transactions. Deposited transactions may be added in the next stage.
+- If `next_timestamp < next_epoch.time`: the current L1 origin is repeated, to preserve the L2 time invariant.
+  - `epoch_num = epoch.number`
+  - `epoch_hash = epoch.hash`
+- Otherwise,
+  - `epoch_num = next_epoch.number`
+  - `epoch_hash = next_epoch.hash`

 ### Payload Attributes Derivation