Commit 4a5e1832 authored by smartcontracts's avatar smartcontracts Committed by GitHub

feat(fd): use better metric labels (#3488)

Improves and simplifies metrics by using better metric labels.
parent c427f0c0
---
'@eth-optimism/fault-detector': minor
---
Updates metrics to use better labels.
...@@ -22,11 +22,9 @@ type Options = { ...@@ -22,11 +22,9 @@ type Options = {
} }
type Metrics = { type Metrics = {
highestCheckedBatchIndex: Gauge highestBatchIndex: Gauge
highestKnownBatchIndex: Gauge
isCurrentlyMismatched: Gauge isCurrentlyMismatched: Gauge
l1NodeConnectionFailures: Gauge nodeConnectionFailures: Gauge
l2NodeConnectionFailures: Gauge
} }
type State = { type State = {
...@@ -64,25 +62,19 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -64,25 +62,19 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
}, },
}, },
metricsSpec: { metricsSpec: {
highestCheckedBatchIndex: { highestBatchIndex: {
type: Gauge, type: Gauge,
desc: 'Highest good batch index', desc: 'Highest batch indices (checked and known)',
}, labels: ['type'],
highestKnownBatchIndex: {
type: Gauge,
desc: 'Highest known batch index',
}, },
isCurrentlyMismatched: { isCurrentlyMismatched: {
type: Gauge, type: Gauge,
desc: '0 if state is ok, 1 if state is mismatched', desc: '0 if state is ok, 1 if state is mismatched',
}, },
l1NodeConnectionFailures: { nodeConnectionFailures: {
type: Gauge, type: Gauge,
desc: 'Number of times L1 node connection has failed', desc: 'Number of times node connection has failed',
}, labels: ['layer', 'section'],
l2NodeConnectionFailures: {
type: Gauge,
desc: 'Number of times L2 node connection has failed',
}, },
}, },
}) })
...@@ -148,7 +140,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -148,7 +140,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
node: 'l1', node: 'l1',
section: 'getTotalBatches', section: 'getTotalBatches',
}) })
this.metrics.l1NodeConnectionFailures.inc() this.metrics.nodeConnectionFailures.inc({
layer: 'l1',
section: 'getTotalBatches',
})
await sleep(15000) await sleep(15000)
return return
} }
...@@ -157,7 +152,12 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -157,7 +152,12 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
await sleep(15000) await sleep(15000)
return return
} else { } else {
this.metrics.highestKnownBatchIndex.set(latestBatchIndex) this.metrics.highestBatchIndex.set(
{
type: 'known',
},
latestBatchIndex
)
} }
this.logger.info(`checking batch`, { this.logger.info(`checking batch`, {
...@@ -177,7 +177,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -177,7 +177,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
node: 'l1', node: 'l1',
section: 'findEventForStateBatch', section: 'findEventForStateBatch',
}) })
this.metrics.l1NodeConnectionFailures.inc() this.metrics.nodeConnectionFailures.inc({
layer: 'l1',
section: 'findEventForStateBatch',
})
await sleep(15000) await sleep(15000)
return return
} }
...@@ -191,7 +194,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -191,7 +194,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
node: 'l1', node: 'l1',
section: 'getTransaction', section: 'getTransaction',
}) })
this.metrics.l1NodeConnectionFailures.inc() this.metrics.nodeConnectionFailures.inc({
layer: 'l1',
section: 'getTransaction',
})
await sleep(15000) await sleep(15000)
return return
} }
...@@ -214,7 +220,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -214,7 +220,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
node: 'l2', node: 'l2',
section: 'getBlockNumber', section: 'getBlockNumber',
}) })
this.metrics.l2NodeConnectionFailures.inc() this.metrics.nodeConnectionFailures.inc({
layer: 'l2',
section: 'getBlockNumber',
})
await sleep(15000) await sleep(15000)
return return
} }
...@@ -246,7 +255,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -246,7 +255,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
node: 'l2', node: 'l2',
section: 'getBlockRange', section: 'getBlockRange',
}) })
this.metrics.l2NodeConnectionFailures.inc() this.metrics.nodeConnectionFailures.inc({
layer: 'l2',
section: 'getBlockRange',
})
await sleep(15000) await sleep(15000)
return return
} }
...@@ -276,7 +288,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -276,7 +288,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
} }
this.state.highestCheckedBatchIndex++ this.state.highestCheckedBatchIndex++
this.metrics.highestCheckedBatchIndex.set( this.metrics.highestBatchIndex.set(
{
type: 'checked',
},
this.state.highestCheckedBatchIndex this.state.highestCheckedBatchIndex
) )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment