Commit f981b8da authored by Kelvin Fichter's avatar Kelvin Fichter

fix(rhc): fix bug in healthcheck dockerfile

Fixes a bug in the replica-healthcheck dockerfile. Also includes
replica-healthcheck as part of the ops setup by default to make this
stuff easier to catch and debug in the future. We will likely still want
to add integration tests that query the metrics defined in the
replica-healthcheck to confirm that everything is working as expected.
parent a3b12065
---
'@eth-optimism/replica-healthcheck': patch
---
Fixes a bug in the replica-healthcheck docker file
---
'@eth-optimism/common-ts': patch
---
Properly exposes metrics as part of a metrics server at port 7300
...@@ -197,6 +197,25 @@ services: ...@@ -197,6 +197,25 @@ services:
- ${REPLICA_HTTP_PORT:-8549}:8545 - ${REPLICA_HTTP_PORT:-8549}:8545
- ${REPLICA_WS_PORT:-8550}:8546 - ${REPLICA_WS_PORT:-8550}:8546
replica-healthcheck:
depends_on:
- l2geth
- replica
deploy:
replicas: 1
build:
context: ..
dockerfile: ./ops/docker/Dockerfile.packages
target: replica-healthcheck
image: ethereumoptimism/replica-healthcheck:${DOCKER_TAG_REPLICA_HEALTHCHECK:-latest}
entrypoint: ./healthcheck.sh
environment:
HEALTHCHECK__REFERENCE_RPC_PROVIDER: http://l2geth:8545
HEALTHCHECK__TARGET_RPC_PROVIDER: http://replica:8545
RETRIES: 60
ports:
- ${HEALTHCHECK_HTTP_PORT:-7300}:7300
integration_tests: integration_tests:
deploy: deploy:
replicas: 0 replicas: 0
......
...@@ -61,5 +61,6 @@ CMD ["npm", "run", "start"] ...@@ -61,5 +61,6 @@ CMD ["npm", "run", "start"]
FROM base as replica-healthcheck FROM base as replica-healthcheck
WORKDIR /opts/optimism/packages/replica-healthcheck WORKDIR /opt/optimism/packages/replica-healthcheck
COPY ./ops/scripts/healthcheck.sh .
ENTRYPOINT ["npm", "run", "start"] ENTRYPOINT ["npm", "run", "start"]
#!/bin/bash
set -e
RETRIES=${RETRIES:-60}
# wait for reference RPC to be up
curl \
--fail \
--show-error \
--silent \
--output /dev/null \
--retry-connrefused \
--retry $RETRIES \
--retry-delay 1 \
$HEALTHCHECK__REFERENCE_RPC_PROVIDER
# wait for target RPC to be up
curl \
--fail \
--show-error \
--silent \
--output /dev/null \
--retry-connrefused \
--retry $RETRIES \
--retry-delay 1 \
$HEALTHCHECK__TARGET_RPC_PROVIDER
# go
exec yarn start
/* Imports: External */ import { Server } from 'net'
import Config from 'bcfg' import Config from 'bcfg'
import * as dotenv from 'dotenv' import * as dotenv from 'dotenv'
import { Command, Option } from 'commander' import { Command, Option } from 'commander'
import { ValidatorSpec, Spec, cleanEnv } from 'envalid' import { ValidatorSpec, Spec, cleanEnv } from 'envalid'
import { sleep } from '@eth-optimism/core-utils' import { sleep } from '@eth-optimism/core-utils'
import snakeCase from 'lodash/snakeCase' import snakeCase from 'lodash/snakeCase'
import express from 'express'
import prometheus, { Registry } from 'prom-client'
/* Imports: Internal */
import { Logger } from '../common/logger' import { Logger } from '../common/logger'
import { Metric } from './metrics' import { Metric } from './metrics'
...@@ -82,6 +84,26 @@ export abstract class BaseServiceV2< ...@@ -82,6 +84,26 @@ export abstract class BaseServiceV2<
*/ */
protected readonly metrics: TMetrics protected readonly metrics: TMetrics
/**
* Registry for prometheus metrics.
*/
protected readonly metricsRegistry: Registry
/**
* Metrics server.
*/
protected metricsServer: Server
/**
* Port for the metrics server.
*/
protected readonly metricsServerPort: number
/**
* Hostname for the metrics server.
*/
protected readonly metricsServerHostname: string
/** /**
* @param params Options for the construction of the service. * @param params Options for the construction of the service.
* @param params.name Name for the service. This name will determine the prefix used for logging, * @param params.name Name for the service. This name will determine the prefix used for logging,
...@@ -93,6 +115,8 @@ export abstract class BaseServiceV2< ...@@ -93,6 +115,8 @@ export abstract class BaseServiceV2<
* @param params.options Options to pass to the service. * @param params.options Options to pass to the service.
* @param params.loops Whether or not the service should loop. Defaults to true. * @param params.loops Whether or not the service should loop. Defaults to true.
* @param params.loopIntervalMs Loop interval in milliseconds. Defaults to zero. * @param params.loopIntervalMs Loop interval in milliseconds. Defaults to zero.
* @param params.metricsServerPort Port for the metrics server. Defaults to 7300.
* @param params.metricsServerHostname Hostname for the metrics server. Defaults to 0.0.0.0.
*/ */
constructor(params: { constructor(params: {
name: string name: string
...@@ -101,6 +125,8 @@ export abstract class BaseServiceV2< ...@@ -101,6 +125,8 @@ export abstract class BaseServiceV2<
options?: Partial<TOptions> options?: Partial<TOptions>
loop?: boolean loop?: boolean
loopIntervalMs?: number loopIntervalMs?: number
metricsServerPort?: number
metricsServerHostname?: string
}) { }) {
this.loop = params.loop !== undefined ? params.loop : true this.loop = params.loop !== undefined ? params.loop : true
this.loopIntervalMs = this.loopIntervalMs =
...@@ -203,6 +229,11 @@ export abstract class BaseServiceV2< ...@@ -203,6 +229,11 @@ export abstract class BaseServiceV2<
return acc return acc
}, {}) as TMetrics }, {}) as TMetrics
// Create the metrics server.
this.metricsRegistry = prometheus.register
this.metricsServerPort = params.metricsServerPort || 7300
this.metricsServerHostname = params.metricsServerHostname || '0.0.0.0'
this.logger = new Logger({ name: params.name }) this.logger = new Logger({ name: params.name })
// Gracefully handle stop signals. // Gracefully handle stop signals.
...@@ -222,6 +253,33 @@ export abstract class BaseServiceV2< ...@@ -222,6 +253,33 @@ export abstract class BaseServiceV2<
public async run(): Promise<void> { public async run(): Promise<void> {
this.done = false this.done = false
// Start the metrics server if not yet running.
if (!this.metricsServer) {
this.logger.info('starting metrics server')
await new Promise((resolve) => {
const app = express()
app.get('/metrics', async (_, res) => {
res.status(200).send(await this.metricsRegistry.metrics())
})
this.metricsServer = app.listen(
this.metricsServerPort,
this.metricsServerHostname,
() => {
resolve(null)
}
)
})
this.logger.info(`metrics started`, {
port: this.metricsServerPort,
hostname: this.metricsServerHostname,
route: '/metrics',
})
}
if (this.init) { if (this.init) {
this.logger.info('initializing service') this.logger.info('initializing service')
await this.init() await this.init()
...@@ -267,7 +325,18 @@ export abstract class BaseServiceV2< ...@@ -267,7 +325,18 @@ export abstract class BaseServiceV2<
while (!this.done) { while (!this.done) {
await sleep(1000) await sleep(1000)
} }
this.logger.info('main loop finished, goodbye!')
// Shut down the metrics server if it's running.
if (this.metricsServer) {
this.logger.info('stopping metrics server')
await new Promise((resolve) => {
this.metricsServer.close(() => {
resolve(null)
})
})
this.logger.info('metrics server stopped')
this.metricsServer = undefined
}
} }
/** /**
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment