txmgr: Restructure internals

cd2975c6 · Joshua Gutow · 0fad37ea · cd2975c6 · cd2975c6 · cd2975c6
Commit cd2975c6 authored Mar 31, 2023 by Joshua Gutow
8 changed files
--- a/op-e2e/migration_test.go
+++ b/op-e2e/migration_test.go
@@ -346,6 +346,7 @@ func TestMigration(t *testing.T) {
 			NumConfirmations:          1,
 			ResubmissionTimeout:       5 * time.Second,
 			SafeAbortNonceTooLowCount: 3,
+			TxNotInMempoolTimeout:     2 * time.Minute,
 		},
 		LogConfig: oplog.CLIConfig{
 			Level:  "info",
@@ -371,6 +372,7 @@ func TestMigration(t *testing.T) {
 			NumConfirmations:          1,
 			ResubmissionTimeout:       3 * time.Second,
 			SafeAbortNonceTooLowCount: 3,
+			TxNotInMempoolTimeout:     2 * time.Minute,
 		},
 		LogConfig: oplog.CLIConfig{
 			Level:  "info",

--- a/op-e2e/setup.go
+++ b/op-e2e/setup.go
@@ -580,6 +580,7 @@ func (cfg SystemConfig) Start(_opts ...SystemConfigOption) (*System, error) {
 			ResubmissionTimeout:       3 * time.Second,
 			ReceiptQueryInterval:      50 * time.Millisecond,
 			NetworkTimeout:            2 * time.Second,
+			TxNotInMempoolTimeout:     2 * time.Minute,
 		},
 		AllowNonFinalized: cfg.NonFinalizedProposals,
 		LogConfig: oplog.CLIConfig{
@@ -615,6 +616,7 @@ func (cfg SystemConfig) Start(_opts ...SystemConfigOption) (*System, error) {
 			ResubmissionTimeout:       3 * time.Second,
 			ReceiptQueryInterval:      50 * time.Millisecond,
 			NetworkTimeout:            2 * time.Second,
+			TxNotInMempoolTimeout:     2 * time.Minute,
 		},
 		LogConfig: oplog.CLIConfig{
 			Level:  "info",

--- a/op-service/txmgr/cli.go
+++ b/op-service/txmgr/cli.go
@@ -28,6 +28,7 @@ const (
 	ResubmissionTimeoutFlagName       = "resubmission-timeout"
 	NetworkTimeoutFlagName            = "network-timeout"
 	TxSendTimeoutFlagName             = "txmgr.send-timeout"
+	TxNotInMempoolTimeoutFlagName     = "txmgr.not-in-mempool-timeout"
 	ReceiptQueryIntervalFlagName      = "txmgr.receipt-query-interval"
 )

@@ -95,6 +96,12 @@ func CLIFlags(envPrefix string) []cli.Flag {
 			Value:  0,
 			EnvVar: opservice.PrefixEnvVar(envPrefix, "TXMGR_TX_SEND_TIMEOUT"),
 		},
+		cli.DurationFlag{
+			Name:   TxNotInMempoolTimeoutFlagName,
+			Usage:  "Timeout for aborting a tx send if the tx does not make it to the mempool.",
+			Value:  2 * time.Minute,
+			EnvVar: opservice.PrefixEnvVar(envPrefix, "TXMGR_TX_NOT_IN_MEMPOOL_TIMEOUT"),
+		},
 		cli.DurationFlag{
 			Name:   ReceiptQueryIntervalFlagName,
 			Usage:  "Frequency to poll for receipts",
@@ -118,6 +125,7 @@ type CLIConfig struct {
 	ReceiptQueryInterval      time.Duration
 	NetworkTimeout            time.Duration
 	TxSendTimeout             time.Duration
+	TxNotInMempoolTimeout     time.Duration
 }

 func (m CLIConfig) Check() error {
@@ -125,16 +133,22 @@ func (m CLIConfig) Check() error {
 		return errors.New("must provide a L1 RPC url")
 	}
 	if m.NumConfirmations == 0 {
-		return errors.New("num confirmations must not be 0")
+		return errors.New("NumConfirmations must not be 0")
 	}
 	if m.NetworkTimeout == 0 {
-		return errors.New("must provide a network timeout")
+		return errors.New("must provide NetworkTimeout")
 	}
 	if m.ResubmissionTimeout == 0 {
-		return errors.New("must provide a resumbission interval")
+		return errors.New("must provide ResubmissionTimeout")
 	}
 	if m.ReceiptQueryInterval == 0 {
-		return errors.New("must provide a receipt query interval")
+		return errors.New("must provide ReceiptQueryInterval")
+	}
+	if m.TxNotInMempoolTimeout == 0 {
+		return errors.New("must provide TxNotInMempoolTimeout")
+	}
+	if m.SafeAbortNonceTooLowCount == 0 {
+		return errors.New("SafeAbortNonceTooLowCount must not be 0")
 	}
 	if err := m.SignerCLIConfig.Check(); err != nil {
 		return err
@@ -157,6 +171,7 @@ func ReadCLIConfig(ctx *cli.Context) CLIConfig {
 		ReceiptQueryInterval:      ctx.GlobalDuration(ReceiptQueryIntervalFlagName),
 		NetworkTimeout:            ctx.GlobalDuration(NetworkTimeoutFlagName),
 		TxSendTimeout:             ctx.GlobalDuration(TxSendTimeoutFlagName),
+		TxNotInMempoolTimeout:     ctx.GlobalDuration(TxNotInMempoolTimeoutFlagName),
 	}
 }

@@ -197,6 +212,7 @@ func NewConfig(cfg CLIConfig, l log.Logger) (Config, error) {
 		ResubmissionTimeout:       cfg.ResubmissionTimeout,
 		ChainID:                   chainID,
 		TxSendTimeout:             cfg.TxSendTimeout,
+		TxNotInMempoolTimeout:     cfg.TxNotInMempoolTimeout,
 		NetworkTimeout:            cfg.NetworkTimeout,
 		ReceiptQueryInterval:      cfg.ReceiptQueryInterval,
 		NumConfirmations:          cfg.NumConfirmations,
@@ -222,6 +238,10 @@ type Config struct {
 	// By default it is unbounded. If set, this is recommended to be at least 20 minutes.
 	TxSendTimeout time.Duration

+	// TxNotInMempoolTimeout is how long to wait before aborting a transaction send if the transaction does not
+	// make it to the mempool. If the tx is in the mempool, TxSendTimeout is used instead.
+	TxNotInMempoolTimeout time.Duration
+
 	// NetworkTimeout is the allowed duration for a single network request.
 	// This is intended to be used for network requests that can be replayed.
 	NetworkTimeout time.Duration

--- a/op-service/txmgr/send_state.go
+++ b/op-service/txmgr/send_state.go
@@ -3,6 +3,7 @@ package txmgr
 import (
 	"strings"
 	"sync"
+	"time"

 	"github.com/ethereum/go-ethereum/common"
 	"github.com/ethereum/go-ethereum/core"
@@ -12,48 +13,47 @@ import (
 // this context, a txn may correspond to multiple different txn hashes due to
 // varying gas prices, though we treat them all as the same logical txn. This
 // struct is primarily used to determine whether or not the txmgr should abort a
-// given txn and retry with a higher nonce.
+// given txn.
 type SendState struct {
-	minedTxs         map[common.Hash]struct{}
-	nonceTooLowCount uint64
-	mu               sync.RWMutex
+	minedTxs map[common.Hash]struct{}
+	mu       sync.RWMutex

-	safeAbortNonceTooLowCount uint64
+	// Config
+	nonceTooLowCount    uint64
+	txInMempoolDeadline time.Time // deadline to abort at if no transactions are in the mempool
+
+	// Counts of the different types of errors
+	successFullPublishCount   uint64 // nil error => tx made it to the mempool
+	safeAbortNonceTooLowCount uint64 // nonce too low error
 }

 // NewSendState parameterizes a new SendState from the passed
 // safeAbortNonceTooLowCount.
-func NewSendState(safeAbortNonceTooLowCount uint64) *SendState {
+func NewSendState(safeAbortNonceTooLowCount uint64, unableToSendTimeout time.Duration) *SendState {
 	if safeAbortNonceTooLowCount == 0 {
 		panic("txmgr: safeAbortNonceTooLowCount cannot be zero")
 	}

 	return &SendState{
 		minedTxs:                  make(map[common.Hash]struct{}),
-		nonceTooLowCount:          0,
 		safeAbortNonceTooLowCount: safeAbortNonceTooLowCount,
+		txInMempoolDeadline:       time.Now().Add(unableToSendTimeout),
 	}
 }

 // ProcessSendError should be invoked with the error returned for each
 // publication. It is safe to call this method with nil or arbitrary errors.
-// Currently it only acts on errors containing the ErrNonceTooLow message.
 func (s *SendState) ProcessSendError(err error) {
-	// Nothing to do.
-	if err == nil {
-		return
-	}
-
-	// Only concerned with ErrNonceTooLow.
-	if !strings.Contains(err.Error(), core.ErrNonceTooLow.Error()) {
-		return
-	}
-
 	s.mu.Lock()
 	defer s.mu.Unlock()

-	// Record this nonce too low observation.
-	s.nonceTooLowCount++
+	// Record the type of error
+	switch {
+	case err == nil:
+		s.successFullPublishCount++
+	case strings.Contains(err.Error(), core.ErrNonceTooLow.Error()):
+		s.nonceTooLowCount++
+	}
 }

 // TxMined records that the txn with txnHash has been mined and is await
@@ -85,8 +85,9 @@ func (s *SendState) TxNotMined(txHash common.Hash) {
 }

 // ShouldAbortImmediately returns true if the txmgr should give up on trying a
-// given txn with the target nonce. For now, this only happens if we see an
-// extended period of getting ErrNonceTooLow without having a txn mined.
+// given txn with the target nonce.
+// This occurs when the set of errors recorded indicates that no further progress can be made
+// on this transaction.
 func (s *SendState) ShouldAbortImmediately() bool {
 	s.mu.RLock()
 	defer s.mu.RUnlock()
@@ -96,9 +97,14 @@ func (s *SendState) ShouldAbortImmediately() bool {
 		return false
 	}

-	// Only abort if we've observed enough ErrNonceTooLow to meet our safe abort
-	// threshold.
-	return s.nonceTooLowCount >= s.safeAbortNonceTooLowCount
+	// If we have exceeded the nonce too low count, abort
+	if s.nonceTooLowCount >= s.safeAbortNonceTooLowCount ||
+		// If we have not published a transaction in the allotted time, abort
+		(s.successFullPublishCount == 0 && time.Now().After(s.txInMempoolDeadline)) {
+		return true
+	}
+
+	return false
 }

 // IsWaitingForConfirmation returns true if we have at least one confirmation on

--- a/op-service/txmgr/send_state_test.go
+++ b/op-service/txmgr/send_state_test.go
@@ -3,6 +3,7 @@ package txmgr_test
 import (
 	"errors"
 	"testing"
+	"time"

 	"github.com/stretchr/testify/require"

@@ -11,14 +12,18 @@ import (
 	"github.com/ethereum/go-ethereum/core"
 )

-const testSafeAbortNonceTooLowCount = 3
-
 var (
 	testHash = common.HexToHash("0x01")
 )

+const testSafeAbortNonceTooLowCount = 3
+
 func newSendState() *txmgr.SendState {
-	return txmgr.NewSendState(testSafeAbortNonceTooLowCount)
+	return newSendStateWithTimeout(time.Hour)
+}
+
+func newSendStateWithTimeout(t time.Duration) *txmgr.SendState {
+	return txmgr.NewSendState(testSafeAbortNonceTooLowCount, t)
 }

 func processNSendErrors(sendState *txmgr.SendState, err error, n int) {
@@ -160,3 +165,20 @@ func TestSendStateIsNotWaitingForConfirmationAfterTxUnmined(t *testing.T) {
 	sendState.TxNotMined(testHash)
 	require.False(t, sendState.IsWaitingForConfirmation())
 }
+
+// TestSendStateTimeoutAbort ensure that this will abort if it passes the tx pool timeout
+// when no successful transactions have been recorded
+func TestSendStateTimeoutAbort(t *testing.T) {
+	sendState := newSendStateWithTimeout(10 * time.Millisecond)
+	time.Sleep(20 * time.Millisecond)
+	require.True(t, sendState.ShouldAbortImmediately(), "Should abort after timing out")
+}
+
+// TestSendStateNoTimeoutAbortIfPublishedTx ensure that this will not abort if there is
+// a successful transaction send.
+func TestSendStateNoTimeoutAbortIfPublishedTx(t *testing.T) {
+	sendState := newSendStateWithTimeout(10 * time.Millisecond)
+	sendState.ProcessSendError(nil)
+	time.Sleep(20 * time.Millisecond)
+	require.False(t, sendState.ShouldAbortImmediately(), "Should not abort if published transcation successfully")
+}
--- a/op-service/txmgr/txmgr.go
+++ b/op-service/txmgr/txmgr.go
--- a/op-service/txmgr/txmgr_test.go
+++ b/op-service/txmgr/txmgr_test.go
@@ -3,6 +3,7 @@ package txmgr
 import (
 	"context"
 	"errors"
+	"fmt"
 	"math/big"
 	"sync"
 	"testing"
@@ -20,6 +21,10 @@ import (

 type sendTransactionFunc func(ctx context.Context, tx *types.Transaction) error

+func testSendState() *SendState {
+	return NewSendState(100, time.Hour)
+}
+
 // testHarness houses the necessary resources to test the SimpleTxManager.
 type testHarness struct {
 	cfg       Config
@@ -68,6 +73,7 @@ func configWithNumConfs(numConfirmations uint64) Config {
 		ReceiptQueryInterval:      50 * time.Millisecond,
 		NumConfirmations:          numConfirmations,
 		SafeAbortNonceTooLowCount: 3,
+		TxNotInMempoolTimeout:     1 * time.Hour,
 		Signer: func(ctx context.Context, from common.Address, tx *types.Transaction) (*types.Transaction, error) {
 			return tx, nil
 		},
@@ -530,7 +536,7 @@ func TestWaitMinedReturnsReceiptOnFirstSuccess(t *testing.T) {

 	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
-	receipt, err := h.mgr.waitMined(ctx, tx, nil)
+	receipt, err := h.mgr.waitMined(ctx, tx, testSendState())
 	require.Nil(t, err)
 	require.NotNil(t, receipt)
 	require.Equal(t, receipt.TxHash, txHash)
@@ -549,7 +555,7 @@ func TestWaitMinedCanBeCanceled(t *testing.T) {
 	// Create an unimined tx.
 	tx := types.NewTx(&types.LegacyTx{})

-	receipt, err := h.mgr.waitMined(ctx, tx, nil)
+	receipt, err := h.mgr.waitMined(ctx, tx, NewSendState(10, time.Hour))
 	require.Equal(t, err, context.DeadlineExceeded)
 	require.Nil(t, receipt)
 }
@@ -570,7 +576,7 @@ func TestWaitMinedMultipleConfs(t *testing.T) {
 	txHash := tx.Hash()
 	h.backend.mine(&txHash, new(big.Int))

-	receipt, err := h.mgr.waitMined(ctx, tx, nil)
+	receipt, err := h.mgr.waitMined(ctx, tx, NewSendState(10, time.Hour))
 	require.Equal(t, err, context.DeadlineExceeded)
 	require.Nil(t, receipt)

@@ -579,7 +585,7 @@ func TestWaitMinedMultipleConfs(t *testing.T) {

 	// Mine an empty block, tx should now be confirmed.
 	h.backend.mine(nil, nil)
-	receipt, err = h.mgr.waitMined(ctx, tx, nil)
+	receipt, err = h.mgr.waitMined(ctx, tx, NewSendState(10, time.Hour))
 	require.Nil(t, err)
 	require.NotNil(t, receipt)
 	require.Equal(t, txHash, receipt.TxHash)
@@ -692,7 +698,7 @@ func TestWaitMinedReturnsReceiptAfterFailure(t *testing.T) {

 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
-	receipt, err := mgr.waitMined(ctx, tx, nil)
+	receipt, err := mgr.waitMined(ctx, tx, testSendState())
 	require.Nil(t, err)
 	require.NotNil(t, receipt)
 	require.Equal(t, receipt.TxHash, txHash)
@@ -724,8 +730,7 @@ func doGasPriceIncrease(t *testing.T, txTipCap, txFeeCap, newTip, newBaseFee int
 		GasTipCap: big.NewInt(txTipCap),
 		GasFeeCap: big.NewInt(txFeeCap),
 	})
-	newTx, err := mgr.increaseGasPrice(context.Background(), tx)
-	require.NoError(t, err)
+	newTx := mgr.increaseGasPrice(context.Background(), tx)
 	return tx, newTx
 }

@@ -831,11 +836,32 @@ func TestIncreaseGasPriceNotExponential(t *testing.T) {
 	// Run IncreaseGasPrice a bunch of times in a row to simulate a very fast resubmit loop.
 	for i := 0; i < 20; i++ {
 		ctx := context.Background()
-		newTx, err := mgr.increaseGasPrice(ctx, tx)
-		require.NoError(t, err)
+		newTx := mgr.increaseGasPrice(ctx, tx)
 		require.True(t, newTx.GasFeeCap().Cmp(feeCap) == 0, "new tx fee cap must be equal L1")
 		require.True(t, newTx.GasTipCap().Cmp(borkedBackend.gasTip) == 0, "new tx tip must be equal L1")
 		tx = newTx
 	}

 }
+
+func TestErrStringMatch(t *testing.T) {
+	tests := []struct {
+		err    error
+		target error
+		match  bool
+	}{
+		{err: nil, target: nil, match: true},
+		{err: errors.New("exists"), target: nil, match: false},
+		{err: nil, target: errors.New("exists"), match: false},
+		{err: errors.New("exact match"), target: errors.New("exact match"), match: true},
+		{err: errors.New("partial: match"), target: errors.New("match"), match: true},
+	}
+
+	for i, test := range tests {
+		i := i
+		test := test
+		t.Run(fmt.Sprint(i), func(t *testing.T) {
+			require.Equal(t, test.match, errStringMatch(test.err, test.target))
+		})
+	}
+}
--- a/ops-bedrock/docker-compose.yml
+++ b/ops-bedrock/docker-compose.yml
@@ -123,7 +123,6 @@ services:
      OP_BATCHER_L1_ETH_RPC: http://l1:8545
      OP_BATCHER_L2_ETH_RPC: http://l2:8545
      OP_BATCHER_ROLLUP_RPC: http://op-node:8545
-      TX_MANAGER_TIMEOUT: 10m
      OFFLINE_GAS_ESTIMATION: false
      OP_BATCHER_MAX_CHANNEL_DURATION: 1
      OP_BATCHER_MAX_L1_TX_SIZE_BYTES: 120000