Commit 27d2f163 authored by Adrian Sutton's avatar Adrian Sutton Committed by GitHub

op-challenger: Fix highest acted L1 block metric for completed games (#10549)

* op-challenger: Fix highest acted L1 block metric for completed games

Adds an e2e test to prove it really does work.

* op-challenger: Add more context to comment.
parent fa88d9c3
...@@ -3,6 +3,7 @@ package op_challenger ...@@ -3,6 +3,7 @@ package op_challenger
import ( import (
"context" "context"
"github.com/ethereum-optimism/optimism/op-challenger/metrics"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
"github.com/ethereum-optimism/optimism/op-challenger/config" "github.com/ethereum-optimism/optimism/op-challenger/config"
...@@ -11,9 +12,9 @@ import ( ...@@ -11,9 +12,9 @@ import (
) )
// Main is the programmatic entry-point for running op-challenger with a given configuration. // Main is the programmatic entry-point for running op-challenger with a given configuration.
func Main(ctx context.Context, logger log.Logger, cfg *config.Config) (cliapp.Lifecycle, error) { func Main(ctx context.Context, logger log.Logger, cfg *config.Config, m metrics.Metricer) (cliapp.Lifecycle, error) {
if err := cfg.Check(); err != nil { if err := cfg.Check(); err != nil {
return nil, err return nil, err
} }
return game.NewService(ctx, logger, cfg) return game.NewService(ctx, logger, cfg, m)
} }
...@@ -5,6 +5,7 @@ import ( ...@@ -5,6 +5,7 @@ import (
"testing" "testing"
"github.com/ethereum-optimism/optimism/op-challenger/config" "github.com/ethereum-optimism/optimism/op-challenger/config"
"github.com/ethereum-optimism/optimism/op-challenger/metrics"
"github.com/ethereum-optimism/optimism/op-service/testlog" "github.com/ethereum-optimism/optimism/op-service/testlog"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
...@@ -12,7 +13,7 @@ import ( ...@@ -12,7 +13,7 @@ import (
func TestMainShouldReturnErrorWhenConfigInvalid(t *testing.T) { func TestMainShouldReturnErrorWhenConfigInvalid(t *testing.T) {
cfg := &config.Config{} cfg := &config.Config{}
app, err := Main(context.Background(), testlog.Logger(t, log.LevelInfo), cfg) app, err := Main(context.Background(), testlog.Logger(t, log.LevelInfo), cfg, metrics.NoopMetrics)
require.ErrorIs(t, err, cfg.Check()) require.ErrorIs(t, err, cfg.Check())
require.Nil(t, app) require.Nil(t, app)
} }
...@@ -4,6 +4,7 @@ import ( ...@@ -4,6 +4,7 @@ import (
"context" "context"
"os" "os"
"github.com/ethereum-optimism/optimism/op-challenger/metrics"
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
...@@ -29,7 +30,9 @@ var VersionWithMeta = opservice.FormatVersion(version.Version, GitCommit, GitDat ...@@ -29,7 +30,9 @@ var VersionWithMeta = opservice.FormatVersion(version.Version, GitCommit, GitDat
func main() { func main() {
args := os.Args args := os.Args
ctx := opio.WithInterruptBlocker(context.Background()) ctx := opio.WithInterruptBlocker(context.Background())
if err := run(ctx, args, challenger.Main); err != nil { if err := run(ctx, args, func(ctx context.Context, l log.Logger, config *config.Config) (cliapp.Lifecycle, error) {
return challenger.Main(ctx, l, config, metrics.NewMetrics())
}); err != nil {
log.Crit("Application failed", "err", err) log.Crit("Application failed", "err", err)
} }
} }
......
...@@ -143,12 +143,12 @@ func (c *coordinator) createJob(ctx context.Context, game types.GameMetadata, bl ...@@ -143,12 +143,12 @@ func (c *coordinator) createJob(ctx context.Context, game types.GameMetadata, bl
state.player = player state.player = player
state.status = player.Status() state.status = player.Status()
} }
state.inflight = true
if state.status != types.GameStatusInProgress { if state.status != types.GameStatusInProgress {
c.logger.Debug("Not rescheduling resolved game", "game", game.Proxy, "status", state.status) c.logger.Debug("Not rescheduling resolved game", "game", game.Proxy, "status", state.status)
state.lastProcessedBlockNum = blockNumber state.lastProcessedBlockNum = blockNumber
return nil, nil return nil, nil
} }
state.inflight = true
return newJob(blockNumber, game.Proxy, state.player, state.status), nil return newJob(blockNumber, game.Proxy, state.player, state.status), nil
} }
......
...@@ -204,29 +204,63 @@ func TestDeleteDataForResolvedGames(t *testing.T) { ...@@ -204,29 +204,63 @@ func TestDeleteDataForResolvedGames(t *testing.T) {
require.True(t, disk.gameDirExists[gameAddr1], "game 1 data should be preserved (not resolved)") require.True(t, disk.gameDirExists[gameAddr1], "game 1 data should be preserved (not resolved)")
require.False(t, disk.gameDirExists[gameAddr2], "game 2 data should be deleted") require.False(t, disk.gameDirExists[gameAddr2], "game 2 data should be deleted")
require.True(t, disk.gameDirExists[gameAddr3], "game 3 data should be preserved (inflight)") // Game 3 never got marked as in-flight because it was already resolved so got skipped.
// We shouldn't be able to have a known-resolved game that is also in-flight because we always skip processing it.
require.False(t, disk.gameDirExists[gameAddr3], "game 3 data should be deleted")
} }
func TestSchedule_RecordActedL1Block(t *testing.T) { func TestSchedule_RecordActedL1Block(t *testing.T) {
c, workQueue, _, _, _, _ := setupCoordinatorTest(t, 10) c, workQueue, _, _, _, _ := setupCoordinatorTest(t, 10)
gameAddr3 := common.Address{0xcc} gameAddr1 := common.Address{0xaa}
gameAddr2 := common.Address{0xcc}
ctx := context.Background() ctx := context.Background()
// The first game should be tracked // The first game should be tracked
require.NoError(t, c.schedule(ctx, asGames(gameAddr3), 1)) require.NoError(t, c.schedule(ctx, asGames(gameAddr1, gameAddr2), 1))
// Process the result // Process the result
require.Len(t, workQueue, 1) require.Len(t, workQueue, 2)
j := <-workQueue j := <-workQueue
require.Equal(t, gameAddr1, j.addr)
j.status = types.GameStatusDefenderWon j.status = types.GameStatusDefenderWon
require.NoError(t, c.processResult(j)) require.NoError(t, c.processResult(j))
j = <-workQueue
require.Equal(t, gameAddr2, j.addr)
j.status = types.GameStatusInProgress
require.NoError(t, c.processResult(j))
// Schedule another block
require.NoError(t, c.schedule(ctx, asGames(gameAddr1, gameAddr2), 2))
// Process the result (only the in-progress game gets rescheduled)
require.Len(t, workQueue, 1)
j = <-workQueue
require.Equal(t, gameAddr2, j.addr)
require.Equal(t, uint64(2), j.block)
j.status = types.GameStatusInProgress
require.NoError(t, c.processResult(j))
// Schedule a third block
require.NoError(t, c.schedule(ctx, asGames(gameAddr1, gameAddr2), 3))
// Process the result (only the in-progress game gets rescheduled)
// This is deliberately done a third time, because there was actually a bug where it worked for the first two
// cycles and failed on the third. This was because the first cycle the game status was unknown so it was processed
// the second cycle was the first time the game was known to be complete so was skipped but crucially it left it
// marked as in-flight. On the third update the was incorrectly skipped as in-flight and the l1 block number
// wasn't updated. From then on the block number would never be updated.
require.Len(t, workQueue, 1)
j = <-workQueue
require.Equal(t, gameAddr2, j.addr)
require.Equal(t, uint64(3), j.block)
j.status = types.GameStatusInProgress
require.NoError(t, c.processResult(j))
// Schedule so that the metric is updated // Schedule so that the metric is updated
require.NoError(t, c.schedule(ctx, asGames(gameAddr3), 2)) require.NoError(t, c.schedule(ctx, asGames(gameAddr1, gameAddr2), 4))
// Verify that the block number is recorded by the metricer as acted upon // Verify that the block number is recorded by the metricer as acted upon
// The one game is now complete so its block number is updated immediately because it doesn't get scheduled require.Equal(t, uint64(3), c.m.(*stubSchedulerMetrics).actedL1Blocks)
require.Equal(t, uint64(2), c.m.(*stubSchedulerMetrics).actedL1Blocks)
} }
func TestSchedule_RecordActedL1BlockMultipleGames(t *testing.T) { func TestSchedule_RecordActedL1BlockMultipleGames(t *testing.T) {
......
...@@ -69,12 +69,12 @@ type Service struct { ...@@ -69,12 +69,12 @@ type Service struct {
} }
// NewService creates a new Service. // NewService creates a new Service.
func NewService(ctx context.Context, logger log.Logger, cfg *config.Config) (*Service, error) { func NewService(ctx context.Context, logger log.Logger, cfg *config.Config, m metrics.Metricer) (*Service, error) {
s := &Service{ s := &Service{
systemClock: clock.SystemClock, systemClock: clock.SystemClock,
l1Clock: clock.NewSimpleClock(), l1Clock: clock.NewSimpleClock(),
logger: logger, logger: logger,
metrics: metrics.NewMetrics(), metrics: m,
} }
if err := s.initFromConfig(ctx, cfg); err != nil { if err := s.initFromConfig(ctx, cfg); err != nil {
......
...@@ -12,6 +12,7 @@ import ( ...@@ -12,6 +12,7 @@ import (
"time" "time"
"github.com/ethereum-optimism/optimism/op-service/metrics" "github.com/ethereum-optimism/optimism/op-service/metrics"
"github.com/ethereum/go-ethereum/ethclient"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/ethereum/go-ethereum/common" "github.com/ethereum/go-ethereum/common"
...@@ -39,15 +40,17 @@ type Helper struct { ...@@ -39,15 +40,17 @@ type Helper struct {
require *require.Assertions require *require.Assertions
dir string dir string
chl cliapp.Lifecycle chl cliapp.Lifecycle
metrics *CapturingMetrics
} }
func NewHelper(log log.Logger, t *testing.T, require *require.Assertions, dir string, chl cliapp.Lifecycle) *Helper { func NewHelper(log log.Logger, t *testing.T, require *require.Assertions, dir string, chl cliapp.Lifecycle, m *CapturingMetrics) *Helper {
return &Helper{ return &Helper{
log: log, log: log,
t: t, t: t,
require: require, require: require,
dir: dir, dir: dir,
chl: chl, chl: chl,
metrics: m,
} }
} }
...@@ -134,17 +137,13 @@ func NewChallenger(t *testing.T, ctx context.Context, sys EndpointProvider, name ...@@ -134,17 +137,13 @@ func NewChallenger(t *testing.T, ctx context.Context, sys EndpointProvider, name
log := testlog.Logger(t, log.LevelDebug).New("role", name) log := testlog.Logger(t, log.LevelDebug).New("role", name)
log.Info("Creating challenger") log.Info("Creating challenger")
cfg := NewChallengerConfig(t, sys, "sequencer", options...) cfg := NewChallengerConfig(t, sys, "sequencer", options...)
chl, err := challenger.Main(ctx, log, cfg) cfg.MetricsConfig.Enabled = false // Don't start the metrics server
m := NewCapturingMetrics()
chl, err := challenger.Main(ctx, log, cfg, m)
require.NoError(t, err, "must init challenger") require.NoError(t, err, "must init challenger")
require.NoError(t, chl.Start(ctx), "must start challenger") require.NoError(t, chl.Start(ctx), "must start challenger")
return &Helper{ return NewHelper(log, t, require.New(t), cfg.Datadir, chl, m)
log: log,
t: t,
require: require.New(t),
dir: cfg.Datadir,
chl: chl,
}
} }
func NewChallengerConfig(t *testing.T, sys EndpointProvider, l2NodeName string, options ...Option) *config.Config { func NewChallengerConfig(t *testing.T, sys EndpointProvider, l2NodeName string, options ...Option) *config.Config {
...@@ -234,3 +233,21 @@ func (h *Helper) WaitForGameDataDeletion(ctx context.Context, games ...GameAddr) ...@@ -234,3 +233,21 @@ func (h *Helper) WaitForGameDataDeletion(ctx context.Context, games ...GameAddr)
func (h *Helper) gameDataDir(addr common.Address) string { func (h *Helper) gameDataDir(addr common.Address) string {
return filepath.Join(h.dir, "game-"+addr.Hex()) return filepath.Join(h.dir, "game-"+addr.Hex())
} }
func (h *Helper) WaitL1HeadActedOn(ctx context.Context, client *ethclient.Client) {
l1Head, err := client.BlockNumber(ctx)
h.require.NoError(err)
h.WaitForHighestActedL1Block(ctx, l1Head)
}
func (h *Helper) WaitForHighestActedL1Block(ctx context.Context, head uint64) {
timedCtx, cancel := context.WithTimeout(ctx, 30*time.Second)
defer cancel()
var actual uint64
err := wait.For(timedCtx, time.Second, func() (bool, error) {
actual = h.metrics.HighestActedL1Block.Load()
h.log.Info("Waiting for highest acted L1 block", "target", head, "actual", actual)
return actual >= head, nil
})
h.require.NoErrorf(err, "Highest acted L1 block did not reach %v, was: %v", head, actual)
}
package challenger
import (
"sync/atomic"
"github.com/ethereum-optimism/optimism/op-challenger/metrics"
)
type CapturingMetrics struct {
metrics.NoopMetricsImpl
HighestActedL1Block atomic.Uint64
}
func NewCapturingMetrics() *CapturingMetrics {
return &CapturingMetrics{}
}
var _ metrics.Metricer = (*CapturingMetrics)(nil)
func (c *CapturingMetrics) RecordActedL1Block(block uint64) {
c.HighestActedL1Block.Store(block)
}
...@@ -293,3 +293,27 @@ func TestOutputAlphabetGame_FreeloaderEarnsNothing(t *testing.T) { ...@@ -293,3 +293,27 @@ func TestOutputAlphabetGame_FreeloaderEarnsNothing(t *testing.T) {
amt := game.Credit(ctx, freeloaderOpts.From) amt := game.Credit(ctx, freeloaderOpts.From)
require.True(t, amt.BitLen() == 0, "freeloaders should not be rewarded") require.True(t, amt.BitLen() == 0, "freeloaders should not be rewarded")
} }
func TestHighestActedL1BlockMetric(t *testing.T) {
op_e2e.InitParallel(t)
ctx := context.Background()
sys, l1Client := StartFaultDisputeSystem(t)
t.Cleanup(sys.Close)
disputeGameFactory := disputegame.NewFactoryHelper(t, ctx, sys)
honestChallenger := disputeGameFactory.StartChallenger(ctx, "Honest", challenger.WithAlphabet(), challenger.WithPrivKey(sys.Cfg.Secrets.Alice))
game1 := disputeGameFactory.StartOutputAlphabetGame(ctx, "sequencer", 1, common.Hash{0xaa})
sys.AdvanceTime(game1.MaxClockDuration(ctx))
require.NoError(t, wait.ForNextBlock(ctx, l1Client))
game1.WaitForGameStatus(ctx, disputegame.StatusDefenderWins)
disputeGameFactory.StartOutputAlphabetGame(ctx, "sequencer", 2, common.Hash{0xaa})
disputeGameFactory.StartOutputAlphabetGame(ctx, "sequencer", 3, common.Hash{0xaa})
honestChallenger.WaitL1HeadActedOn(ctx, l1Client)
require.NoError(t, wait.ForNextBlock(ctx, l1Client))
honestChallenger.WaitL1HeadActedOn(ctx, l1Client)
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment