Commit 1e157a72 authored by protolambda's avatar protolambda

op-challenger: cleanup service lifecycle, improve subscription ctx usage

parent 2e35a8f6
...@@ -312,6 +312,10 @@ func (bs *BatcherService) Stop(ctx context.Context) error { ...@@ -312,6 +312,10 @@ func (bs *BatcherService) Stop(ctx context.Context) error {
result = errors.Join(result, fmt.Errorf("failed to close balance metricer: %w", err)) result = errors.Join(result, fmt.Errorf("failed to close balance metricer: %w", err))
} }
} }
if bs.TxManager != nil {
bs.TxManager.Close()
}
if bs.metricsSrv != nil { if bs.metricsSrv != nil {
if err := bs.metricsSrv.Stop(ctx); err != nil { if err := bs.metricsSrv.Stop(ctx); err != nil {
result = errors.Join(result, fmt.Errorf("failed to stop metrics server: %w", err)) result = errors.Join(result, fmt.Errorf("failed to stop metrics server: %w", err))
......
...@@ -2,22 +2,19 @@ package op_challenger ...@@ -2,22 +2,19 @@ package op_challenger
import ( import (
"context" "context"
"fmt"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum-optimism/optimism/op-challenger/config" "github.com/ethereum-optimism/optimism/op-challenger/config"
"github.com/ethereum-optimism/optimism/op-challenger/game" "github.com/ethereum-optimism/optimism/op-challenger/game"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum-optimism/optimism/op-service/cliapp"
) )
// Main is the programmatic entry-point for running op-challenger // Main is the programmatic entry-point for running op-challenger with a given configuration.
func Main(ctx context.Context, logger log.Logger, cfg *config.Config) error { func Main(ctx context.Context, logger log.Logger, cfg *config.Config) (cliapp.Lifecycle, error) {
if err := cfg.Check(); err != nil { if err := cfg.Check(); err != nil {
return err return nil, err
} }
service, err := game.NewService(ctx, logger, cfg) srv, err := game.NewService(ctx, logger, cfg)
if err != nil { return srv, err
return fmt.Errorf("failed to create the fault service: %w", err)
}
return service.MonitorGame(ctx)
} }
...@@ -12,6 +12,7 @@ import ( ...@@ -12,6 +12,7 @@ import (
func TestMainShouldReturnErrorWhenConfigInvalid(t *testing.T) { func TestMainShouldReturnErrorWhenConfigInvalid(t *testing.T) {
cfg := &config.Config{} cfg := &config.Config{}
err := Main(context.Background(), testlog.Logger(t, log.LvlInfo), cfg) app, err := Main(context.Background(), testlog.Logger(t, log.LvlInfo), cfg)
require.ErrorIs(t, err, cfg.Check()) require.ErrorIs(t, err, cfg.Check())
require.Nil(t, app)
} }
...@@ -4,16 +4,18 @@ import ( ...@@ -4,16 +4,18 @@ import (
"context" "context"
"os" "os"
op_challenger "github.com/ethereum-optimism/optimism/op-challenger"
opservice "github.com/ethereum-optimism/optimism/op-service"
"github.com/ethereum/go-ethereum/log"
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"github.com/ethereum/go-ethereum/log"
challenger "github.com/ethereum-optimism/optimism/op-challenger"
"github.com/ethereum-optimism/optimism/op-challenger/config" "github.com/ethereum-optimism/optimism/op-challenger/config"
"github.com/ethereum-optimism/optimism/op-challenger/flags" "github.com/ethereum-optimism/optimism/op-challenger/flags"
"github.com/ethereum-optimism/optimism/op-challenger/version" "github.com/ethereum-optimism/optimism/op-challenger/version"
opservice "github.com/ethereum-optimism/optimism/op-service"
"github.com/ethereum-optimism/optimism/op-service/cliapp" "github.com/ethereum-optimism/optimism/op-service/cliapp"
oplog "github.com/ethereum-optimism/optimism/op-service/log" oplog "github.com/ethereum-optimism/optimism/op-service/log"
"github.com/ethereum-optimism/optimism/op-service/opio"
) )
var ( var (
...@@ -26,14 +28,15 @@ var VersionWithMeta = opservice.FormatVersion(version.Version, GitCommit, GitDat ...@@ -26,14 +28,15 @@ var VersionWithMeta = opservice.FormatVersion(version.Version, GitCommit, GitDat
func main() { func main() {
args := os.Args args := os.Args
if err := run(args, op_challenger.Main); err != nil { ctx := opio.WithInterruptBlocker(context.Background())
if err := run(ctx, args, challenger.Main); err != nil {
log.Crit("Application failed", "err", err) log.Crit("Application failed", "err", err)
} }
} }
type ConfigAction func(ctx context.Context, log log.Logger, config *config.Config) error type ConfiguredLifecycle func(ctx context.Context, log log.Logger, config *config.Config) (cliapp.Lifecycle, error)
func run(args []string, action ConfigAction) error { func run(ctx context.Context, args []string, action ConfiguredLifecycle) error {
oplog.SetupDefaults() oplog.SetupDefaults()
app := cli.NewApp() app := cli.NewApp()
...@@ -42,20 +45,20 @@ func run(args []string, action ConfigAction) error { ...@@ -42,20 +45,20 @@ func run(args []string, action ConfigAction) error {
app.Name = "op-challenger" app.Name = "op-challenger"
app.Usage = "Challenge outputs" app.Usage = "Challenge outputs"
app.Description = "Ensures that on chain outputs are correct." app.Description = "Ensures that on chain outputs are correct."
app.Action = func(ctx *cli.Context) error { app.Action = cliapp.LifecycleCmd(func(ctx *cli.Context, close context.CancelCauseFunc) (cliapp.Lifecycle, error) {
logger, err := setupLogging(ctx) logger, err := setupLogging(ctx)
if err != nil { if err != nil {
return err return nil, err
} }
logger.Info("Starting op-challenger", "version", VersionWithMeta) logger.Info("Starting op-challenger", "version", VersionWithMeta)
cfg, err := flags.NewConfigFromCLI(ctx) cfg, err := flags.NewConfigFromCLI(ctx)
if err != nil { if err != nil {
return err return nil, err
} }
return action(ctx.Context, logger, cfg) return action(ctx.Context, logger, cfg)
} })
return app.Run(args) return app.RunContext(ctx, args)
} }
func setupLogging(ctx *cli.Context) (log.Logger, error) { func setupLogging(ctx *cli.Context) (log.Logger, error) {
......
...@@ -2,15 +2,19 @@ package main ...@@ -2,15 +2,19 @@ package main
import ( import (
"context" "context"
"errors"
"fmt" "fmt"
"testing" "testing"
"time" "time"
"github.com/ethereum-optimism/optimism/op-challenger/config" "github.com/stretchr/testify/require"
"github.com/ethereum-optimism/optimism/op-service/txmgr"
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
"github.com/stretchr/testify/require"
"github.com/ethereum-optimism/optimism/op-challenger/config"
"github.com/ethereum-optimism/optimism/op-service/cliapp"
"github.com/ethereum-optimism/optimism/op-service/txmgr"
) )
var ( var (
...@@ -36,7 +40,7 @@ func TestLogLevel(t *testing.T) { ...@@ -36,7 +40,7 @@ func TestLogLevel(t *testing.T) {
for _, lvl := range []string{"trace", "debug", "info", "error", "crit"} { for _, lvl := range []string{"trace", "debug", "info", "error", "crit"} {
lvl := lvl lvl := lvl
t.Run("AcceptValid_"+lvl, func(t *testing.T) { t.Run("AcceptValid_"+lvl, func(t *testing.T) {
logger, _, err := runWithArgs(addRequiredArgs(config.TraceTypeAlphabet, "--log.level", lvl)) logger, _, err := dryRunWithArgs(addRequiredArgs(config.TraceTypeAlphabet, "--log.level", lvl))
require.NoError(t, err) require.NoError(t, err)
require.NotNil(t, logger) require.NotNil(t, logger)
}) })
...@@ -431,25 +435,29 @@ func TestCannonL2Genesis(t *testing.T) { ...@@ -431,25 +435,29 @@ func TestCannonL2Genesis(t *testing.T) {
} }
func verifyArgsInvalid(t *testing.T, messageContains string, cliArgs []string) { func verifyArgsInvalid(t *testing.T, messageContains string, cliArgs []string) {
_, _, err := runWithArgs(cliArgs) _, _, err := dryRunWithArgs(cliArgs)
require.ErrorContains(t, err, messageContains) require.ErrorContains(t, err, messageContains)
} }
func configForArgs(t *testing.T, cliArgs []string) config.Config { func configForArgs(t *testing.T, cliArgs []string) config.Config {
_, cfg, err := runWithArgs(cliArgs) _, cfg, err := dryRunWithArgs(cliArgs)
require.NoError(t, err) require.NoError(t, err)
return cfg return cfg
} }
func runWithArgs(cliArgs []string) (log.Logger, config.Config, error) { func dryRunWithArgs(cliArgs []string) (log.Logger, config.Config, error) {
cfg := new(config.Config) cfg := new(config.Config)
var logger log.Logger var logger log.Logger
fullArgs := append([]string{"op-challenger"}, cliArgs...) fullArgs := append([]string{"op-challenger"}, cliArgs...)
err := run(fullArgs, func(ctx context.Context, log log.Logger, config *config.Config) error { testErr := errors.New("dry-run")
err := run(context.Background(), fullArgs, func(ctx context.Context, log log.Logger, config *config.Config) (cliapp.Lifecycle, error) {
logger = log logger = log
cfg = config cfg = config
return nil return nil, testErr
}) })
if errors.Is(err, testErr) { // expected error
err = nil
}
return logger, *cfg, err return logger, *cfg, err
} }
......
...@@ -232,6 +232,9 @@ func (m *mockTxManager) From() common.Address { ...@@ -232,6 +232,9 @@ func (m *mockTxManager) From() common.Address {
return m.from return m.from
} }
func (m *mockTxManager) Close() {
}
type mockContract struct { type mockContract struct {
calls int calls int
callFails bool callFails bool
......
...@@ -4,6 +4,7 @@ import ( ...@@ -4,6 +4,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"sync"
"time" "time"
"github.com/ethereum-optimism/optimism/op-challenger/game/scheduler" "github.com/ethereum-optimism/optimism/op-challenger/game/scheduler"
...@@ -39,6 +40,7 @@ type gameMonitor struct { ...@@ -39,6 +40,7 @@ type gameMonitor struct {
allowedGames []common.Address allowedGames []common.Address
l1HeadsSub ethereum.Subscription l1HeadsSub ethereum.Subscription
l1Source *headSource l1Source *headSource
runState sync.Mutex
} }
type MinimalSubscriber interface { type MinimalSubscriber interface {
...@@ -126,8 +128,10 @@ func (m *gameMonitor) onNewL1Head(ctx context.Context, sig eth.L1BlockRef) { ...@@ -126,8 +128,10 @@ func (m *gameMonitor) onNewL1Head(ctx context.Context, sig eth.L1BlockRef) {
} }
} }
func (m *gameMonitor) resubscribeFunction(ctx context.Context) event.ResubscribeErrFunc { func (m *gameMonitor) resubscribeFunction() event.ResubscribeErrFunc {
return func(innerCtx context.Context, err error) (event.Subscription, error) { // The ctx is cancelled as soon as the subscription is returned,
// but is only used to create the subscription, and does not affect the returned subscription.
return func(ctx context.Context, err error) (event.Subscription, error) {
if err != nil { if err != nil {
m.logger.Warn("resubscribing after failed L1 subscription", "err", err) m.logger.Warn("resubscribing after failed L1 subscription", "err", err)
} }
...@@ -135,18 +139,21 @@ func (m *gameMonitor) resubscribeFunction(ctx context.Context) event.Resubscribe ...@@ -135,18 +139,21 @@ func (m *gameMonitor) resubscribeFunction(ctx context.Context) event.Resubscribe
} }
} }
func (m *gameMonitor) MonitorGames(ctx context.Context) error { func (m *gameMonitor) StartMonitoring() {
m.l1HeadsSub = event.ResubscribeErr(time.Second*10, m.resubscribeFunction(ctx)) m.runState.Lock()
for { defer m.runState.Unlock()
select { if m.l1HeadsSub != nil {
case <-ctx.Done(): return // already started
m.l1HeadsSub.Unsubscribe() }
return nil m.l1HeadsSub = event.ResubscribeErr(time.Second*10, m.resubscribeFunction())
case err, ok := <-m.l1HeadsSub.Err(): }
if !ok {
return err func (m *gameMonitor) StopMonitoring() {
} m.runState.Lock()
m.logger.Error("L1 subscription error", "err", err) defer m.runState.Unlock()
} if m.l1HeadsSub == nil {
return // already stopped
} }
m.l1HeadsSub.Unsubscribe()
m.l1HeadsSub = nil
} }
...@@ -84,8 +84,9 @@ func TestMonitorGames(t *testing.T) { ...@@ -84,8 +84,9 @@ func TestMonitorGames(t *testing.T) {
cancel() cancel()
}() }()
err := monitor.MonitorGames(ctx) monitor.StartMonitoring()
require.NoError(t, err) <-ctx.Done()
monitor.StopMonitoring()
require.Len(t, sched.scheduled, 1) require.Len(t, sched.scheduled, 1)
require.Equal(t, []common.Address{addr1, addr2}, sched.scheduled[0]) require.Equal(t, []common.Address{addr1, addr2}, sched.scheduled[0])
}) })
...@@ -129,8 +130,9 @@ func TestMonitorGames(t *testing.T) { ...@@ -129,8 +130,9 @@ func TestMonitorGames(t *testing.T) {
cancel() cancel()
}() }()
err := monitor.MonitorGames(ctx) monitor.StartMonitoring()
require.NoError(t, err) <-ctx.Done()
monitor.StopMonitoring()
require.NotEmpty(t, sched.scheduled) // We might get more than one update scheduled. require.NotEmpty(t, sched.scheduled) // We might get more than one update scheduled.
require.Equal(t, []common.Address{addr1, addr2}, sched.scheduled[0]) require.Equal(t, []common.Address{addr1, addr2}, sched.scheduled[0])
}) })
......
...@@ -4,6 +4,11 @@ import ( ...@@ -4,6 +4,11 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"io"
"sync/atomic"
"github.com/ethereum/go-ethereum/ethclient"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum-optimism/optimism/op-challenger/config" "github.com/ethereum-optimism/optimism/op-challenger/config"
"github.com/ethereum-optimism/optimism/op-challenger/game/fault" "github.com/ethereum-optimism/optimism/op-challenger/game/fault"
...@@ -13,14 +18,14 @@ import ( ...@@ -13,14 +18,14 @@ import (
"github.com/ethereum-optimism/optimism/op-challenger/game/scheduler" "github.com/ethereum-optimism/optimism/op-challenger/game/scheduler"
"github.com/ethereum-optimism/optimism/op-challenger/metrics" "github.com/ethereum-optimism/optimism/op-challenger/metrics"
"github.com/ethereum-optimism/optimism/op-challenger/version" "github.com/ethereum-optimism/optimism/op-challenger/version"
opClient "github.com/ethereum-optimism/optimism/op-service/client" "github.com/ethereum-optimism/optimism/op-service/client"
"github.com/ethereum-optimism/optimism/op-service/clock" "github.com/ethereum-optimism/optimism/op-service/clock"
"github.com/ethereum-optimism/optimism/op-service/dial" "github.com/ethereum-optimism/optimism/op-service/dial"
"github.com/ethereum-optimism/optimism/op-service/httputil" "github.com/ethereum-optimism/optimism/op-service/httputil"
opmetrics "github.com/ethereum-optimism/optimism/op-service/metrics"
oppprof "github.com/ethereum-optimism/optimism/op-service/pprof" oppprof "github.com/ethereum-optimism/optimism/op-service/pprof"
"github.com/ethereum-optimism/optimism/op-service/sources/batching" "github.com/ethereum-optimism/optimism/op-service/sources/batching"
"github.com/ethereum-optimism/optimism/op-service/txmgr" "github.com/ethereum-optimism/optimism/op-service/txmgr"
"github.com/ethereum/go-ethereum/log"
) )
type Service struct { type Service struct {
...@@ -29,101 +34,196 @@ type Service struct { ...@@ -29,101 +34,196 @@ type Service struct {
monitor *gameMonitor monitor *gameMonitor
sched *scheduler.Scheduler sched *scheduler.Scheduler
txMgr *txmgr.SimpleTxManager
loader *loader.GameLoader
l1Client *ethclient.Client
pollClient client.RPC
pprofSrv *httputil.HTTPServer pprofSrv *httputil.HTTPServer
metricsSrv *httputil.HTTPServer metricsSrv *httputil.HTTPServer
balanceMetricer io.Closer
stopped atomic.Bool
} }
func (s *Service) Stop(ctx context.Context) error { // NewService creates a new Service.
var result error func NewService(ctx context.Context, logger log.Logger, cfg *config.Config) (*Service, error) {
if s.sched != nil { s := &Service{
result = errors.Join(result, s.sched.Close()) logger: logger,
metrics: metrics.NewMetrics(),
} }
if s.pprofSrv != nil {
result = errors.Join(result, s.pprofSrv.Stop(ctx)) if err := s.initFromConfig(ctx, cfg); err != nil {
// upon initialization error we can try to close any of the service components that may have started already.
return nil, errors.Join(fmt.Errorf("failed to init challenger game service: %w", err), s.Stop(ctx))
} }
if s.metricsSrv != nil {
result = errors.Join(result, s.metricsSrv.Stop(ctx)) return s, nil
}
func (s *Service) initFromConfig(ctx context.Context, cfg *config.Config) error {
if err := s.initTxManager(cfg); err != nil {
return err
} }
return result if err := s.initL1Client(ctx, cfg); err != nil {
return err
}
if err := s.initPollClient(ctx, cfg); err != nil {
return err
}
if err := s.initPProfServer(&cfg.PprofConfig); err != nil {
return err
}
if err := s.initMetricsServer(&cfg.MetricsConfig); err != nil {
return err
}
if err := s.initGameLoader(cfg); err != nil {
return err
}
s.initScheduler(ctx, cfg)
s.initMonitor(cfg)
s.metrics.RecordInfo(version.SimpleWithMeta)
s.metrics.RecordUp()
return nil
} }
// NewService creates a new Service. func (s *Service) initTxManager(cfg *config.Config) error {
func NewService(ctx context.Context, logger log.Logger, cfg *config.Config) (*Service, error) { txMgr, err := txmgr.NewSimpleTxManager("challenger", s.logger, s.metrics, cfg.TxMgrConfig)
cl := clock.SystemClock
m := metrics.NewMetrics()
txMgr, err := txmgr.NewSimpleTxManager("challenger", logger, &m.TxMetrics, cfg.TxMgrConfig)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create the transaction manager: %w", err) return fmt.Errorf("failed to create the transaction manager: %w", err)
} }
s.txMgr = txMgr
return nil
}
l1Client, err := dial.DialEthClientWithTimeout(ctx, dial.DefaultDialTimeout, logger, cfg.L1EthRpc) func (s *Service) initL1Client(ctx context.Context, cfg *config.Config) error {
l1Client, err := dial.DialEthClientWithTimeout(ctx, dial.DefaultDialTimeout, s.logger, cfg.L1EthRpc)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to dial L1: %w", err) return fmt.Errorf("failed to dial L1: %w", err)
} }
s.l1Client = l1Client
return nil
}
s := &Service{ func (s *Service) initPollClient(ctx context.Context, cfg *config.Config) error {
logger: logger, pollClient, err := client.NewRPCWithClient(ctx, s.logger, cfg.L1EthRpc, client.NewBaseRPCClient(s.l1Client.Client()), cfg.PollInterval)
metrics: m, if err != nil {
return fmt.Errorf("failed to create RPC client: %w", err)
} }
s.pollClient = pollClient
return nil
}
pprofConfig := cfg.PprofConfig func (s *Service) initPProfServer(cfg *oppprof.CLIConfig) error {
if pprofConfig.Enabled { if !cfg.Enabled {
logger.Debug("starting pprof", "addr", pprofConfig.ListenAddr, "port", pprofConfig.ListenPort) return nil
pprofSrv, err := oppprof.StartServer(pprofConfig.ListenAddr, pprofConfig.ListenPort)
if err != nil {
return nil, errors.Join(fmt.Errorf("failed to start pprof server: %w", err), s.Stop(ctx))
}
s.pprofSrv = pprofSrv
logger.Info("started pprof server", "addr", pprofSrv.Addr())
} }
s.logger.Debug("starting pprof", "addr", cfg.ListenAddr, "port", cfg.ListenPort)
pprofSrv, err := oppprof.StartServer(cfg.ListenAddr, cfg.ListenPort)
if err != nil {
return fmt.Errorf("failed to start pprof server: %w", err)
}
s.pprofSrv = pprofSrv
s.logger.Info("started pprof server", "addr", pprofSrv.Addr())
return nil
}
metricsCfg := cfg.MetricsConfig func (s *Service) initMetricsServer(cfg *opmetrics.CLIConfig) error {
if metricsCfg.Enabled { if !cfg.Enabled {
logger.Debug("starting metrics server", "addr", metricsCfg.ListenAddr, "port", metricsCfg.ListenPort) return nil
metricsSrv, err := m.Start(metricsCfg.ListenAddr, metricsCfg.ListenPort) }
if err != nil { s.logger.Debug("starting metrics server", "addr", cfg.ListenAddr, "port", cfg.ListenPort)
return nil, errors.Join(fmt.Errorf("failed to start metrics server: %w", err), s.Stop(ctx)) m, ok := s.metrics.(opmetrics.RegistryMetricer)
} if !ok {
logger.Info("started metrics server", "addr", metricsSrv.Addr()) return fmt.Errorf("metrics were enabled, but metricer %T does not expose registry for metrics-server", s.metrics)
s.metricsSrv = metricsSrv
m.StartBalanceMetrics(ctx, logger, l1Client, txMgr.From())
} }
metricsSrv, err := opmetrics.StartServer(m.Registry(), cfg.ListenAddr, cfg.ListenPort)
if err != nil {
return fmt.Errorf("failed to start metrics server: %w", err)
}
s.logger.Info("started metrics server", "addr", metricsSrv.Addr())
s.metricsSrv = metricsSrv
s.balanceMetricer = s.metrics.StartBalanceMetrics(s.logger, s.l1Client, s.txMgr.From())
return nil
}
factoryContract, err := contracts.NewDisputeGameFactoryContract(cfg.GameFactoryAddress, batching.NewMultiCaller(l1Client.Client(), batching.DefaultBatchSize)) func (s *Service) initGameLoader(cfg *config.Config) error {
factoryContract, err := contracts.NewDisputeGameFactoryContract(cfg.GameFactoryAddress,
batching.NewMultiCaller(s.l1Client.Client(), batching.DefaultBatchSize))
if err != nil { if err != nil {
return nil, errors.Join(fmt.Errorf("failed to bind the fault dispute game factory contract: %w", err), s.Stop(ctx)) return fmt.Errorf("failed to bind the fault dispute game factory contract: %w", err)
} }
loader := loader.NewGameLoader(factoryContract) s.loader = loader.NewGameLoader(factoryContract)
return nil
}
func (s *Service) initScheduler(ctx context.Context, cfg *config.Config) {
gameTypeRegistry := registry.NewGameTypeRegistry() gameTypeRegistry := registry.NewGameTypeRegistry()
fault.RegisterGameTypes(gameTypeRegistry, ctx, logger, m, cfg, txMgr, l1Client) fault.RegisterGameTypes(gameTypeRegistry, ctx, s.logger, s.metrics, cfg, s.txMgr, s.l1Client)
disk := newDiskManager(cfg.Datadir) disk := newDiskManager(cfg.Datadir)
s.sched = scheduler.NewScheduler( s.sched = scheduler.NewScheduler(s.logger, s.metrics, disk, cfg.MaxConcurrency, gameTypeRegistry.CreatePlayer)
logger, }
m,
disk,
cfg.MaxConcurrency,
gameTypeRegistry.CreatePlayer)
pollClient, err := opClient.NewRPCWithClient(ctx, logger, cfg.L1EthRpc, opClient.NewBaseRPCClient(l1Client.Client()), cfg.PollInterval)
if err != nil {
return nil, errors.Join(fmt.Errorf("failed to create RPC client: %w", err), s.Stop(ctx))
}
s.monitor = newGameMonitor(logger, cl, loader, s.sched, cfg.GameWindow, l1Client.BlockNumber, cfg.GameAllowlist, pollClient)
m.RecordInfo(version.SimpleWithMeta)
m.RecordUp()
return s, nil func (s *Service) initMonitor(cfg *config.Config) {
cl := clock.SystemClock
s.monitor = newGameMonitor(s.logger, cl, s.loader, s.sched, cfg.GameWindow, s.l1Client.BlockNumber, cfg.GameAllowlist, s.pollClient)
} }
// MonitorGame monitors the fault dispute game and attempts to progress it. func (s *Service) Start(ctx context.Context) error {
func (s *Service) MonitorGame(ctx context.Context) error { s.logger.Info("starting scheduler")
s.sched.Start(ctx) s.sched.Start(ctx)
err := s.monitor.MonitorGames(ctx) s.logger.Info("starting monitoring")
// The other ctx is the close-trigger. s.monitor.StartMonitoring()
// We need to refactor Service more to allow for graceful/force-shutdown granularity. s.logger.Info("challenger game service start completed")
err = errors.Join(err, s.Stop(context.Background())) return nil
return err }
func (s *Service) Stopped() bool {
return s.stopped.Load()
}
func (s *Service) Stop(ctx context.Context) error {
s.logger.Info("stopping challenger game service")
var result error
if s.sched != nil {
if err := s.sched.Close(); err != nil {
result = errors.Join(result, fmt.Errorf("failed to close scheduler: %w", err))
}
}
if s.monitor != nil {
s.monitor.StopMonitoring()
}
if s.pprofSrv != nil {
if err := s.pprofSrv.Stop(ctx); err != nil {
result = errors.Join(result, fmt.Errorf("failed to close pprof server: %w", err))
}
}
if s.balanceMetricer != nil {
if err := s.balanceMetricer.Close(); err != nil {
result = errors.Join(result, fmt.Errorf("failed to close balance metricer: %w", err))
}
}
if s.txMgr != nil {
s.txMgr.Close()
}
if s.l1Client != nil {
s.l1Client.Close()
}
if s.metricsSrv != nil {
if err := s.metricsSrv.Stop(ctx); err != nil {
result = errors.Join(result, fmt.Errorf("failed to close metrics server: %w", err))
}
}
s.stopped.Store(true)
s.logger.Info("stopped challenger game service", "err", result)
return result
} }
package metrics package metrics
import ( import (
"context" "io"
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethclient" "github.com/ethereum/go-ethereum/ethclient"
...@@ -19,6 +19,8 @@ type Metricer interface { ...@@ -19,6 +19,8 @@ type Metricer interface {
RecordInfo(version string) RecordInfo(version string)
RecordUp() RecordUp()
StartBalanceMetrics(l log.Logger, client *ethclient.Client, account common.Address) io.Closer
// Record Tx metrics // Record Tx metrics
txmetrics.TxMetricer txmetrics.TxMetricer
...@@ -128,17 +130,11 @@ func (m *Metrics) Start(host string, port int) (*httputil.HTTPServer, error) { ...@@ -128,17 +130,11 @@ func (m *Metrics) Start(host string, port int) (*httputil.HTTPServer, error) {
} }
func (m *Metrics) StartBalanceMetrics( func (m *Metrics) StartBalanceMetrics(
ctx context.Context,
l log.Logger, l log.Logger,
client *ethclient.Client, client *ethclient.Client,
account common.Address, account common.Address,
) { ) io.Closer {
// TODO(7684): util was refactored to close, but ctx is still being used by caller for shutdown return opmetrics.LaunchBalanceMetrics(l, m.registry, m.ns, client, account)
balanceMetric := opmetrics.LaunchBalanceMetrics(l, m.registry, m.ns, client, account)
go func() {
<-ctx.Done()
_ = balanceMetric.Close()
}()
} }
// RecordInfo sets a pseudo-metric that contains versioning and // RecordInfo sets a pseudo-metric that contains versioning and
......
package metrics package metrics
import ( import (
"io"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethclient"
"github.com/ethereum/go-ethereum/log"
txmetrics "github.com/ethereum-optimism/optimism/op-service/txmgr/metrics" txmetrics "github.com/ethereum-optimism/optimism/op-service/txmgr/metrics"
) )
...@@ -8,6 +14,10 @@ type NoopMetricsImpl struct { ...@@ -8,6 +14,10 @@ type NoopMetricsImpl struct {
txmetrics.NoopTxMetrics txmetrics.NoopTxMetrics
} }
func (i *NoopMetricsImpl) StartBalanceMetrics(l log.Logger, client *ethclient.Client, account common.Address) io.Closer {
return nil
}
var NoopMetrics Metricer = new(NoopMetricsImpl) var NoopMetrics Metricer = new(NoopMetricsImpl)
func (*NoopMetricsImpl) RecordInfo(version string) {} func (*NoopMetricsImpl) RecordInfo(version string) {}
......
...@@ -54,6 +54,8 @@ func (f fakeTxMgr) BlockNumber(_ context.Context) (uint64, error) { ...@@ -54,6 +54,8 @@ func (f fakeTxMgr) BlockNumber(_ context.Context) (uint64, error) {
func (f fakeTxMgr) Send(_ context.Context, _ txmgr.TxCandidate) (*types.Receipt, error) { func (f fakeTxMgr) Send(_ context.Context, _ txmgr.TxCandidate) (*types.Receipt, error) {
panic("unimplemented") panic("unimplemented")
} }
func (f fakeTxMgr) Close() {
}
func NewL2Proposer(t Testing, log log.Logger, cfg *ProposerCfg, l1 *ethclient.Client, rollupCl *sources.RollupClient) *L2Proposer { func NewL2Proposer(t Testing, log log.Logger, cfg *ProposerCfg, l1 *ethclient.Client, rollupCl *sources.RollupClient) *L2Proposer {
proposerConfig := proposer.ProposerConfig{ proposerConfig := proposer.ProposerConfig{
......
...@@ -11,16 +11,19 @@ import ( ...@@ -11,16 +11,19 @@ import (
"testing" "testing"
"time" "time"
op_challenger "github.com/ethereum-optimism/optimism/op-challenger" "github.com/stretchr/testify/require"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core"
"github.com/ethereum/go-ethereum/log"
challenger "github.com/ethereum-optimism/optimism/op-challenger"
"github.com/ethereum-optimism/optimism/op-challenger/config" "github.com/ethereum-optimism/optimism/op-challenger/config"
"github.com/ethereum-optimism/optimism/op-e2e/e2eutils" "github.com/ethereum-optimism/optimism/op-e2e/e2eutils"
"github.com/ethereum-optimism/optimism/op-e2e/e2eutils/wait" "github.com/ethereum-optimism/optimism/op-e2e/e2eutils/wait"
"github.com/ethereum-optimism/optimism/op-node/rollup" "github.com/ethereum-optimism/optimism/op-node/rollup"
"github.com/ethereum-optimism/optimism/op-service/cliapp"
"github.com/ethereum-optimism/optimism/op-service/testlog" "github.com/ethereum-optimism/optimism/op-service/testlog"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core"
"github.com/ethereum/go-ethereum/log"
"github.com/stretchr/testify/require"
) )
type Helper struct { type Helper struct {
...@@ -28,8 +31,7 @@ type Helper struct { ...@@ -28,8 +31,7 @@ type Helper struct {
t *testing.T t *testing.T
require *require.Assertions require *require.Assertions
dir string dir string
cancel func() chl cliapp.Lifecycle
errors chan error
} }
type Option func(config2 *config.Config) type Option func(config2 *config.Config)
...@@ -127,20 +129,16 @@ func NewChallenger(t *testing.T, ctx context.Context, l1Endpoint string, name st ...@@ -127,20 +129,16 @@ func NewChallenger(t *testing.T, ctx context.Context, l1Endpoint string, name st
log := testlog.Logger(t, log.LvlDebug).New("role", name) log := testlog.Logger(t, log.LvlDebug).New("role", name)
log.Info("Creating challenger", "l1", l1Endpoint) log.Info("Creating challenger", "l1", l1Endpoint)
cfg := NewChallengerConfig(t, l1Endpoint, options...) cfg := NewChallengerConfig(t, l1Endpoint, options...)
chl, err := challenger.Main(ctx, log, cfg)
require.NoError(t, err, "must init challenger")
require.NoError(t, chl.Start(ctx), "must start challenger")
errCh := make(chan error, 1)
ctx, cancel := context.WithCancel(ctx)
go func() {
defer close(errCh)
errCh <- op_challenger.Main(ctx, log, cfg)
}()
return &Helper{ return &Helper{
log: log, log: log,
t: t, t: t,
require: require.New(t), require: require.New(t),
dir: cfg.Datadir, dir: cfg.Datadir,
cancel: cancel, chl: chl,
errors: errCh,
} }
} }
...@@ -179,16 +177,9 @@ func NewChallengerConfig(t *testing.T, l1Endpoint string, options ...Option) *co ...@@ -179,16 +177,9 @@ func NewChallengerConfig(t *testing.T, l1Endpoint string, options ...Option) *co
} }
func (h *Helper) Close() error { func (h *Helper) Close() error {
h.cancel() ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
select { defer cancel()
case <-time.After(1 * time.Minute): return h.chl.Stop(ctx)
return errors.New("timed out while stopping challenger")
case err := <-h.errors:
if !errors.Is(err, context.Canceled) {
return err
}
return nil
}
} }
type GameAddr interface { type GameAddr interface {
......
...@@ -174,7 +174,7 @@ func (n *OpNode) initL1(ctx context.Context, cfg *Config) error { ...@@ -174,7 +174,7 @@ func (n *OpNode) initL1(ctx context.Context, cfg *Config) error {
if err != nil { if err != nil {
n.log.Warn("resubscribing after failed L1 subscription", "err", err) n.log.Warn("resubscribing after failed L1 subscription", "err", err)
} }
return eth.WatchHeadChanges(n.resourcesCtx, n.l1Source, n.OnNewL1Head) return eth.WatchHeadChanges(ctx, n.l1Source, n.OnNewL1Head)
}) })
go func() { go func() {
err, ok := <-n.l1HeadsSub.Err() err, ok := <-n.l1HeadsSub.Err()
...@@ -186,9 +186,9 @@ func (n *OpNode) initL1(ctx context.Context, cfg *Config) error { ...@@ -186,9 +186,9 @@ func (n *OpNode) initL1(ctx context.Context, cfg *Config) error {
// Poll for the safe L1 block and finalized block, // Poll for the safe L1 block and finalized block,
// which only change once per epoch at most and may be delayed. // which only change once per epoch at most and may be delayed.
n.l1SafeSub = eth.PollBlockChanges(n.resourcesCtx, n.log, n.l1Source, n.OnNewL1Safe, eth.Safe, n.l1SafeSub = eth.PollBlockChanges(n.log, n.l1Source, n.OnNewL1Safe, eth.Safe,
cfg.L1EpochPollInterval, time.Second*10) cfg.L1EpochPollInterval, time.Second*10)
n.l1FinalizedSub = eth.PollBlockChanges(n.resourcesCtx, n.log, n.l1Source, n.OnNewL1Finalized, eth.Finalized, n.l1FinalizedSub = eth.PollBlockChanges(n.log, n.l1Source, n.OnNewL1Finalized, eth.Finalized,
cfg.L1EpochPollInterval, time.Second*10) cfg.L1EpochPollInterval, time.Second*10)
return nil return nil
} }
...@@ -582,6 +582,14 @@ func (n *OpNode) Stop(ctx context.Context) error { ...@@ -582,6 +582,14 @@ func (n *OpNode) Stop(ctx context.Context) error {
if n.l1HeadsSub != nil { if n.l1HeadsSub != nil {
n.l1HeadsSub.Unsubscribe() n.l1HeadsSub.Unsubscribe()
} }
// stop polling for L1 safe-head changes
if n.l1SafeSub != nil {
n.l1SafeSub.Unsubscribe()
}
// stop polling for L1 finalized-head changes
if n.l1FinalizedSub != nil {
n.l1FinalizedSub.Unsubscribe()
}
// close L2 driver // close L2 driver
if n.l2Driver != nil { if n.l2Driver != nil {
......
...@@ -279,6 +279,11 @@ func (ps *ProposerService) Stop(ctx context.Context) error { ...@@ -279,6 +279,11 @@ func (ps *ProposerService) Stop(ctx context.Context) error {
result = errors.Join(result, fmt.Errorf("failed to close balance metricer: %w", err)) result = errors.Join(result, fmt.Errorf("failed to close balance metricer: %w", err))
} }
} }
if ps.TxManager != nil {
ps.TxManager.Close()
}
if ps.metricsSrv != nil { if ps.metricsSrv != nil {
if err := ps.metricsSrv.Stop(ctx); err != nil { if err := ps.metricsSrv.Stop(ctx); err != nil {
result = errors.Join(result, fmt.Errorf("failed to stop metrics server: %w", err)) result = errors.Join(result, fmt.Errorf("failed to stop metrics server: %w", err))
......
...@@ -17,7 +17,8 @@ type NewHeadSource interface { ...@@ -17,7 +17,8 @@ type NewHeadSource interface {
SubscribeNewHead(ctx context.Context, ch chan<- *types.Header) (ethereum.Subscription, error) SubscribeNewHead(ctx context.Context, ch chan<- *types.Header) (ethereum.Subscription, error)
} }
// WatchHeadChanges wraps a new-head subscription from NewHeadSource to feed the given Tracker // WatchHeadChanges wraps a new-head subscription from NewHeadSource to feed the given Tracker.
// The ctx is only used to create the subscription, and does not affect the returned subscription.
func WatchHeadChanges(ctx context.Context, src NewHeadSource, fn HeadSignalFn) (ethereum.Subscription, error) { func WatchHeadChanges(ctx context.Context, src NewHeadSource, fn HeadSignalFn) (ethereum.Subscription, error) {
headChanges := make(chan *types.Header, 10) headChanges := make(chan *types.Header, 10)
sub, err := src.SubscribeNewHead(ctx, headChanges) sub, err := src.SubscribeNewHead(ctx, headChanges)
...@@ -25,22 +26,33 @@ func WatchHeadChanges(ctx context.Context, src NewHeadSource, fn HeadSignalFn) ( ...@@ -25,22 +26,33 @@ func WatchHeadChanges(ctx context.Context, src NewHeadSource, fn HeadSignalFn) (
return nil, err return nil, err
} }
return event.NewSubscription(func(quit <-chan struct{}) error { return event.NewSubscription(func(quit <-chan struct{}) error {
eventsCtx, eventsCancel := context.WithCancel(context.Background())
defer sub.Unsubscribe() defer sub.Unsubscribe()
defer eventsCancel()
// We can handle a quit signal while fn is running, by closing the ctx.
go func() {
select {
case <-quit:
eventsCancel()
case <-eventsCtx.Done(): // don't wait for quit signal if we closed for other reasons.
return
}
}()
for { for {
select { select {
case header := <-headChanges: case header := <-headChanges:
fn(ctx, L1BlockRef{ fn(eventsCtx, L1BlockRef{
Hash: header.Hash(), Hash: header.Hash(),
Number: header.Number.Uint64(), Number: header.Number.Uint64(),
ParentHash: header.ParentHash, ParentHash: header.ParentHash,
Time: header.Time, Time: header.Time,
}) })
case err := <-sub.Err(): case <-eventsCtx.Done():
return err
case <-ctx.Done():
return ctx.Err()
case <-quit:
return nil return nil
case err := <-sub.Err(): // if the underlying subscription fails, stop
return err
} }
} }
}), nil }), nil
...@@ -53,7 +65,7 @@ type L1BlockRefsSource interface { ...@@ -53,7 +65,7 @@ type L1BlockRefsSource interface {
// PollBlockChanges opens a polling loop to fetch the L1 block reference with the given label, // PollBlockChanges opens a polling loop to fetch the L1 block reference with the given label,
// on provided interval and with request timeout. Results are returned with provided callback fn, // on provided interval and with request timeout. Results are returned with provided callback fn,
// which may block to pause/back-pressure polling. // which may block to pause/back-pressure polling.
func PollBlockChanges(ctx context.Context, log log.Logger, src L1BlockRefsSource, fn HeadSignalFn, func PollBlockChanges(log log.Logger, src L1BlockRefsSource, fn HeadSignalFn,
label BlockLabel, interval time.Duration, timeout time.Duration) ethereum.Subscription { label BlockLabel, interval time.Duration, timeout time.Duration) ethereum.Subscription {
return event.NewSubscription(func(quit <-chan struct{}) error { return event.NewSubscription(func(quit <-chan struct{}) error {
if interval <= 0 { if interval <= 0 {
...@@ -61,22 +73,32 @@ func PollBlockChanges(ctx context.Context, log log.Logger, src L1BlockRefsSource ...@@ -61,22 +73,32 @@ func PollBlockChanges(ctx context.Context, log log.Logger, src L1BlockRefsSource
<-quit <-quit
return nil return nil
} }
eventsCtx, eventsCancel := context.WithCancel(context.Background())
defer eventsCancel()
// We can handle a quit signal while fn is running, by closing the ctx.
go func() {
select {
case <-quit:
eventsCancel()
case <-eventsCtx.Done(): // don't wait for quit signal if we closed for other reasons.
return
}
}()
ticker := time.NewTicker(interval) ticker := time.NewTicker(interval)
defer ticker.Stop() defer ticker.Stop()
for { for {
select { select {
case <-ticker.C: case <-ticker.C:
reqCtx, reqCancel := context.WithTimeout(ctx, timeout) reqCtx, reqCancel := context.WithTimeout(eventsCtx, timeout)
ref, err := src.L1BlockRefByLabel(reqCtx, label) ref, err := src.L1BlockRefByLabel(reqCtx, label)
reqCancel() reqCancel()
if err != nil { if err != nil {
log.Warn("failed to poll L1 block", "label", label, "err", err) log.Warn("failed to poll L1 block", "label", label, "err", err)
} else { } else {
fn(ctx, ref) fn(eventsCtx, ref)
} }
case <-ctx.Done(): case <-eventsCtx.Done():
return ctx.Err()
case <-quit:
return nil return nil
} }
} }
......
...@@ -43,6 +43,11 @@ func (_m *TxManager) BlockNumber(ctx context.Context) (uint64, error) { ...@@ -43,6 +43,11 @@ func (_m *TxManager) BlockNumber(ctx context.Context) (uint64, error) {
return r0, r1 return r0, r1
} }
// Close provides a mock function with given fields:
func (_m *TxManager) Close() {
_m.Called()
}
// From provides a mock function with given fields: // From provides a mock function with given fields:
func (_m *TxManager) From() common.Address { func (_m *TxManager) From() common.Address {
ret := _m.Called() ret := _m.Called()
......
...@@ -49,6 +49,9 @@ type TxManager interface { ...@@ -49,6 +49,9 @@ type TxManager interface {
// BlockNumber returns the most recent block number from the underlying network. // BlockNumber returns the most recent block number from the underlying network.
BlockNumber(ctx context.Context) (uint64, error) BlockNumber(ctx context.Context) (uint64, error)
// Close the underlying connection
Close()
} }
// ETHBackend is the set of methods that the transaction manager uses to resubmit gas & determine // ETHBackend is the set of methods that the transaction manager uses to resubmit gas & determine
...@@ -80,6 +83,8 @@ type ETHBackend interface { ...@@ -80,6 +83,8 @@ type ETHBackend interface {
// EstimateGas returns an estimate of the amount of gas needed to execute the given // EstimateGas returns an estimate of the amount of gas needed to execute the given
// transaction against the current pending block. // transaction against the current pending block.
EstimateGas(ctx context.Context, msg ethereum.CallMsg) (uint64, error) EstimateGas(ctx context.Context, msg ethereum.CallMsg) (uint64, error)
// Close the underlying eth connection
Close()
} }
// SimpleTxManager is a implementation of TxManager that performs linear fee // SimpleTxManager is a implementation of TxManager that performs linear fee
...@@ -131,6 +136,10 @@ func (m *SimpleTxManager) BlockNumber(ctx context.Context) (uint64, error) { ...@@ -131,6 +136,10 @@ func (m *SimpleTxManager) BlockNumber(ctx context.Context) (uint64, error) {
return m.backend.BlockNumber(ctx) return m.backend.BlockNumber(ctx)
} }
func (m *SimpleTxManager) Close() {
m.backend.Close()
}
// TxCandidate is a transaction candidate that can be submitted to ask the // TxCandidate is a transaction candidate that can be submitted to ask the
// [TxManager] to construct a transaction with gas price bounds. // [TxManager] to construct a transaction with gas price bounds.
type TxCandidate struct { type TxCandidate struct {
......
...@@ -261,6 +261,9 @@ func (b *mockBackend) TransactionReceipt(ctx context.Context, txHash common.Hash ...@@ -261,6 +261,9 @@ func (b *mockBackend) TransactionReceipt(ctx context.Context, txHash common.Hash
}, nil }, nil
} }
func (b *mockBackend) Close() {
}
// TestTxMgrConfirmAtMinGasPrice asserts that Send returns the min gas price tx // TestTxMgrConfirmAtMinGasPrice asserts that Send returns the min gas price tx
// if the tx is mined instantly. // if the tx is mined instantly.
func TestTxMgrConfirmAtMinGasPrice(t *testing.T) { func TestTxMgrConfirmAtMinGasPrice(t *testing.T) {
...@@ -755,6 +758,9 @@ func (b *failingBackend) ChainID(ctx context.Context) (*big.Int, error) { ...@@ -755,6 +758,9 @@ func (b *failingBackend) ChainID(ctx context.Context) (*big.Int, error) {
return nil, errors.New("unimplemented") return nil, errors.New("unimplemented")
} }
func (b *failingBackend) Close() {
}
// TestWaitMinedReturnsReceiptAfterFailure asserts that WaitMined is able to // TestWaitMinedReturnsReceiptAfterFailure asserts that WaitMined is able to
// recover from failed calls to the backend. It uses the failedBackend to // recover from failed calls to the backend. It uses the failedBackend to
// simulate an rpc call failure, followed by the successful return of a receipt. // simulate an rpc call failure, followed by the successful return of a receipt.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment