Commit 25b5f1cc authored by OptimismBot's avatar OptimismBot Committed by GitHub

Merge pull request #5674 from ethereum-optimism/felipe/moar-consensus-metrics

feat(proxyd): moar consensus metrics
parents 915b275a 971f6fcd
...@@ -211,7 +211,10 @@ func NewConsensusPoller(bg *BackendGroup, opts ...ConsensusOpt) *ConsensusPoller ...@@ -211,7 +211,10 @@ func NewConsensusPoller(bg *BackendGroup, opts ...ConsensusOpt) *ConsensusPoller
// UpdateBackend refreshes the consensus state of a single backend // UpdateBackend refreshes the consensus state of a single backend
func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if cp.IsBanned(be) { banned := cp.IsBanned(be)
RecordConsensusBackendBanned(be, banned)
if banned {
log.Debug("skipping backend banned", "backend", be.Name) log.Debug("skipping backend banned", "backend", be.Name)
return return
} }
...@@ -220,6 +223,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { ...@@ -220,6 +223,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if !be.Online() || !be.IsHealthy() { if !be.Online() || !be.IsHealthy() {
log.Warn("backend banned - not online or not healthy", "backend", be.Name) log.Warn("backend banned - not online or not healthy", "backend", be.Name)
cp.Ban(be) cp.Ban(be)
return
} }
// if backend it not in sync we'll check again after ban // if backend it not in sync we'll check again after ban
...@@ -227,7 +231,9 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { ...@@ -227,7 +231,9 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
if err != nil || !inSync { if err != nil || !inSync {
log.Warn("backend banned - not in sync", "backend", be.Name) log.Warn("backend banned - not in sync", "backend", be.Name)
cp.Ban(be) cp.Ban(be)
return
} }
RecordConsensusBackendInSync(be, inSync)
// if backend exhausted rate limit we'll skip it for now // if backend exhausted rate limit we'll skip it for now
if be.IsRateLimited() { if be.IsRateLimited() {
...@@ -241,6 +247,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { ...@@ -241,6 +247,7 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
log.Warn("error updating backend", "name", be.Name, "err", err) log.Warn("error updating backend", "name", be.Name, "err", err)
return return
} }
RecordConsensusBackendPeerCount(be, peerCount)
} }
latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest") latestBlockNumber, latestBlockHash, err := cp.fetchBlock(ctx, be, "latest")
...@@ -249,15 +256,17 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) { ...@@ -249,15 +256,17 @@ func (cp *ConsensusPoller) UpdateBackend(ctx context.Context, be *Backend) {
return return
} }
changed := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash) changed, updateDelay := cp.setBackendState(be, peerCount, latestBlockNumber, latestBlockHash)
if changed { if changed {
RecordBackendLatestBlock(be, latestBlockNumber) RecordBackendLatestBlock(be, latestBlockNumber)
RecordConsensusBackendUpdateDelay(be, updateDelay)
log.Debug("backend state updated", log.Debug("backend state updated",
"name", be.Name, "name", be.Name,
"peerCount", peerCount, "peerCount", peerCount,
"latestBlockNumber", latestBlockNumber, "latestBlockNumber", latestBlockNumber,
"latestBlockHash", latestBlockHash) "latestBlockHash", latestBlockHash,
"updateDelay", updateDelay)
} }
} }
...@@ -388,11 +397,15 @@ func (cp *ConsensusPoller) UpdateBackendGroupConsensus(ctx context.Context) { ...@@ -388,11 +397,15 @@ func (cp *ConsensusPoller) UpdateBackendGroupConsensus(ctx context.Context) {
} }
cp.tracker.SetConsensusBlockNumber(proposedBlock) cp.tracker.SetConsensusBlockNumber(proposedBlock)
RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock)
cp.consensusGroupMux.Lock() cp.consensusGroupMux.Lock()
cp.consensusGroup = consensusBackends cp.consensusGroup = consensusBackends
cp.consensusGroupMux.Unlock() cp.consensusGroupMux.Unlock()
RecordGroupConsensusLatestBlock(cp.backendGroup, proposedBlock)
RecordGroupConsensusCount(cp.backendGroup, len(consensusBackends))
RecordGroupConsensusFilteredCount(cp.backendGroup, len(filteredBackendsNames))
RecordGroupTotalCount(cp.backendGroup, len(cp.backendGroup.Backends))
log.Debug("group state", "proposedBlock", proposedBlock, "consensusBackends", strings.Join(consensusBackendsNames, ", "), "filteredBackends", strings.Join(filteredBackendsNames, ", ")) log.Debug("group state", "proposedBlock", proposedBlock, "consensusBackends", strings.Join(consensusBackendsNames, ", "), "filteredBackends", strings.Join(filteredBackendsNames, ", "))
} }
...@@ -497,13 +510,14 @@ func (cp *ConsensusPoller) getBackendState(be *Backend) (peerCount uint64, block ...@@ -497,13 +510,14 @@ func (cp *ConsensusPoller) getBackendState(be *Backend) (peerCount uint64, block
return return
} }
func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool) { func (cp *ConsensusPoller) setBackendState(be *Backend, peerCount uint64, blockNumber hexutil.Uint64, blockHash string) (changed bool, updateDelay time.Duration) {
bs := cp.backendState[be] bs := cp.backendState[be]
bs.backendStateMux.Lock() bs.backendStateMux.Lock()
changed = bs.latestBlockHash != blockHash changed = bs.latestBlockHash != blockHash
bs.peerCount = peerCount bs.peerCount = peerCount
bs.latestBlockNumber = blockNumber bs.latestBlockNumber = blockNumber
bs.latestBlockHash = blockHash bs.latestBlockHash = blockHash
updateDelay = time.Since(bs.lastUpdate)
bs.lastUpdate = time.Now() bs.lastUpdate = time.Now()
bs.backendStateMux.Unlock() bs.backendStateMux.Unlock()
return return
......
...@@ -4,6 +4,7 @@ import ( ...@@ -4,6 +4,7 @@ import (
"context" "context"
"strconv" "strconv"
"strings" "strings"
"time"
"github.com/ethereum/go-ethereum/common/hexutil" "github.com/ethereum/go-ethereum/common/hexutil"
...@@ -260,6 +261,62 @@ var ( ...@@ -260,6 +261,62 @@ var (
}, []string{ }, []string{
"backend_name", "backend_name",
}) })
consensusGroupCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "group_consensus_count",
Help: "Consensus group serving traffic count",
}, []string{
"backend_group_name",
})
consensusGroupFilteredCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "group_consensus_filtered_count",
Help: "Consensus group filtered out from serving traffic count",
}, []string{
"backend_group_name",
})
consensusGroupTotalCount = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "group_consensus_total_count",
Help: "Total count of candidates to be part of consensus group",
}, []string{
"backend_group_name",
})
consensusBannedBackends = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_banned",
Help: "Bool gauge for banned backends",
}, []string{
"backend_name",
})
consensusPeerCountBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_peer_count",
Help: "Peer count",
}, []string{
"backend_name",
})
consensusInSyncBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_in_sync",
Help: "Bool gauge for backends in sync",
}, []string{
"backend_name",
})
consensusUpdateDelayBackend = promauto.NewGaugeVec(prometheus.GaugeOpts{
Namespace: MetricsNamespace,
Name: "consensus_backend_update_delay",
Help: "Delay (ms) for backend update",
}, []string{
"backend_name",
})
) )
func RecordRedisError(source string) { func RecordRedisError(source string) {
...@@ -321,10 +378,46 @@ func RecordBatchSize(size int) { ...@@ -321,10 +378,46 @@ func RecordBatchSize(size int) {
batchSizeHistogram.Observe(float64(size)) batchSizeHistogram.Observe(float64(size))
} }
func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) {
consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber))
}
func RecordGroupConsensusCount(group *BackendGroup, count int) {
consensusGroupCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordGroupConsensusFilteredCount(group *BackendGroup, count int) {
consensusGroupFilteredCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordGroupTotalCount(group *BackendGroup, count int) {
consensusGroupTotalCount.WithLabelValues(group.Name).Set(float64(count))
}
func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) { func RecordBackendLatestBlock(be *Backend, blockNumber hexutil.Uint64) {
backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber)) backendLatestBlockBackend.WithLabelValues(be.Name).Set(float64(blockNumber))
} }
func RecordGroupConsensusLatestBlock(group *BackendGroup, blockNumber hexutil.Uint64) { func RecordConsensusBackendBanned(be *Backend, banned bool) {
consensusLatestBlock.WithLabelValues(group.Name).Set(float64(blockNumber)) v := float64(0)
if banned {
v = float64(1)
}
consensusBannedBackends.WithLabelValues(be.Name).Set(v)
}
func RecordConsensusBackendPeerCount(be *Backend, peerCount uint64) {
consensusPeerCountBackend.WithLabelValues(be.Name).Set(float64(peerCount))
}
func RecordConsensusBackendInSync(be *Backend, inSync bool) {
v := float64(0)
if inSync {
v = float64(1)
}
consensusInSyncBackend.WithLabelValues(be.Name).Set(v)
}
func RecordConsensusBackendUpdateDelay(be *Backend, delay time.Duration) {
consensusUpdateDelayBackend.WithLabelValues(be.Name).Set(float64(delay.Milliseconds()))
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment