Commit ed9b67c8 authored by OptimismBot's avatar OptimismBot Committed by GitHub

Merge pull request #5727 from ethereum-optimism/hamdi/fault.detector.logging

[fault-detector] extra logging
parents 5b31a02b cb4f076f
import { Contract, BigNumber } from 'ethers' import { Contract, BigNumber } from 'ethers'
import { Logger } from '@eth-optimism/common-ts'
export interface OutputOracle<TSubmissionEventArgs> { export interface OutputOracle<TSubmissionEventArgs> {
contract: Contract contract: Contract
...@@ -39,7 +40,7 @@ const getCache = ( ...@@ -39,7 +40,7 @@ const getCache = (
} => { } => {
if (!caches[address]) { if (!caches[address]) {
caches[address] = { caches[address] = {
highestBlock: 0, highestBlock: -1,
eventCache: new Map(), eventCache: new Map(),
} }
} }
...@@ -54,15 +55,28 @@ const getCache = ( ...@@ -54,15 +55,28 @@ const getCache = (
* @param filter Event filter to use. * @param filter Event filter to use.
*/ */
export const updateOracleCache = async <TSubmissionEventArgs>( export const updateOracleCache = async <TSubmissionEventArgs>(
oracle: OutputOracle<TSubmissionEventArgs> oracle: OutputOracle<TSubmissionEventArgs>,
logger?: Logger
): Promise<void> => { ): Promise<void> => {
const cache = getCache(oracle.contract.address) const cache = getCache(oracle.contract.address)
let currentBlock = cache.highestBlock const endBlock = await oracle.contract.provider.getBlockNumber()
const endingBlock = await oracle.contract.provider.getBlockNumber() logger?.info('visiting uncached oracle events for range', {
let step = endingBlock - currentBlock node: 'l1',
cachedUntilBlock: cache.highestBlock,
latestBlock: endBlock,
})
let failures = 0 let failures = 0
while (currentBlock < endingBlock) { let currentBlock = cache.highestBlock + 1
let step = endBlock - currentBlock
while (currentBlock < endBlock) {
try { try {
logger?.info('polling events for range', {
node: 'l1',
startBlock: currentBlock,
blockRangeSize: step,
})
const events = await oracle.contract.queryFilter( const events = await oracle.contract.queryFilter(
oracle.filter, oracle.filter,
currentBlock, currentBlock,
...@@ -83,7 +97,13 @@ export const updateOracleCache = async <TSubmissionEventArgs>( ...@@ -83,7 +97,13 @@ export const updateOracleCache = async <TSubmissionEventArgs>(
// Update the current block and increase the step size for the next iteration. // Update the current block and increase the step size for the next iteration.
currentBlock += step currentBlock += step
step = Math.ceil(step * 2) step = Math.ceil(step * 2)
} catch { } catch (err) {
logger?.error('error fetching events', {
err,
node: 'l1',
section: 'getLogs',
})
// Might happen if we're querying too large an event range. // Might happen if we're querying too large an event range.
step = Math.floor(step / 2) step = Math.floor(step / 2)
...@@ -97,13 +117,15 @@ export const updateOracleCache = async <TSubmissionEventArgs>( ...@@ -97,13 +117,15 @@ export const updateOracleCache = async <TSubmissionEventArgs>(
// We've failed 3 times in a row, we're probably stuck. // We've failed 3 times in a row, we're probably stuck.
if (failures >= 3) { if (failures >= 3) {
logger?.fatal('unable to fetch oracle events', { err })
throw new Error('failed to update event cache') throw new Error('failed to update event cache')
} }
} }
} }
// Update the highest block. // Update the highest block.
cache.highestBlock = endingBlock cache.highestBlock = endBlock
logger?.info('done caching oracle events')
} }
/** /**
...@@ -115,7 +137,8 @@ export const updateOracleCache = async <TSubmissionEventArgs>( ...@@ -115,7 +137,8 @@ export const updateOracleCache = async <TSubmissionEventArgs>(
*/ */
export const findEventForStateBatch = async <TSubmissionEventArgs>( export const findEventForStateBatch = async <TSubmissionEventArgs>(
oracle: OutputOracle<TSubmissionEventArgs>, oracle: OutputOracle<TSubmissionEventArgs>,
index: number index: number,
logger?: Logger
): Promise<PartialEvent> => { ): Promise<PartialEvent> => {
const cache = getCache(oracle.contract.address) const cache = getCache(oracle.contract.address)
...@@ -125,10 +148,12 @@ export const findEventForStateBatch = async <TSubmissionEventArgs>( ...@@ -125,10 +148,12 @@ export const findEventForStateBatch = async <TSubmissionEventArgs>(
} }
// Update the event cache if we don't have the event. // Update the event cache if we don't have the event.
await updateOracleCache(oracle) logger?.info('event not cached for index. warming cache...', { index })
await updateOracleCache(oracle, logger)
// Event better be in cache now! // Event better be in cache now!
if (cache.eventCache[index] === undefined) { if (cache.eventCache[index] === undefined) {
logger?.fatal('expected event for index!', { index })
throw new Error(`unable to find event for batch ${index}`) throw new Error(`unable to find event for batch ${index}`)
} }
...@@ -143,7 +168,8 @@ export const findEventForStateBatch = async <TSubmissionEventArgs>( ...@@ -143,7 +168,8 @@ export const findEventForStateBatch = async <TSubmissionEventArgs>(
*/ */
export const findFirstUnfinalizedStateBatchIndex = async <TSubmissionEventArgs>( export const findFirstUnfinalizedStateBatchIndex = async <TSubmissionEventArgs>(
oracle: OutputOracle<TSubmissionEventArgs>, oracle: OutputOracle<TSubmissionEventArgs>,
fpw: number fpw: number,
logger?: Logger
): Promise<number> => { ): Promise<number> => {
const latestBlock = await oracle.contract.provider.getBlock('latest') const latestBlock = await oracle.contract.provider.getBlock('latest')
const totalBatches = (await oracle.getTotalElements()).toNumber() const totalBatches = (await oracle.getTotalElements()).toNumber()
...@@ -153,7 +179,7 @@ export const findFirstUnfinalizedStateBatchIndex = async <TSubmissionEventArgs>( ...@@ -153,7 +179,7 @@ export const findFirstUnfinalizedStateBatchIndex = async <TSubmissionEventArgs>(
let hi = totalBatches let hi = totalBatches
while (lo !== hi) { while (lo !== hi) {
const mid = Math.floor((lo + hi) / 2) const mid = Math.floor((lo + hi) / 2)
const event = await findEventForStateBatch(oracle, mid) const event = await findEventForStateBatch(oracle, mid, logger)
const block = await oracle.contract.provider.getBlock(event.blockNumber) const block = await oracle.contract.provider.getBlock(event.blockNumber)
if (block.timestamp + fpw < latestBlock.timestamp) { if (block.timestamp + fpw < latestBlock.timestamp) {
......
...@@ -72,7 +72,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -72,7 +72,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
startBatchIndex: { startBatchIndex: {
validator: validators.num, validator: validators.num,
default: -1, default: -1,
desc: 'Batch index to start checking from', desc: 'Batch index to start checking from. For bedrock chains, this is the L2 height to start from',
public: true, public: true,
}, },
bedrock: { bedrock: {
...@@ -124,7 +124,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -124,7 +124,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
* - Legacy: StateCommitmentChain to query for output roots. * - Legacy: StateCommitmentChain to query for output roots.
* *
* @param l2ChainId op chain id * @param l2ChainId op chain id
* @returns OEL1ContractsLike set of L1 contracts with only the required addresses set * @returns OEL1ContractsLike set of L1 contracts with only the required addresses set
*/ */
async getOEL1Contracts(l2ChainId: number): Promise<OEL1ContractsLike> { async getOEL1Contracts(l2ChainId: number): Promise<OEL1ContractsLike> {
// CrossChainMessenger requires all address to be defined. Default to `AddressZero` to ignore unused contracts // CrossChainMessenger requires all address to be defined. Default to `AddressZero` to ignore unused contracts
...@@ -219,6 +219,8 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -219,6 +219,8 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
// We use this a lot, a bit cleaner to pull out to the top level of the state object. // We use this a lot, a bit cleaner to pull out to the top level of the state object.
this.state.fpw = await this.state.messenger.getChallengePeriodSeconds() this.state.fpw = await this.state.messenger.getChallengePeriodSeconds()
this.logger.info(`fault proof window is ${this.state.fpw} seconds`)
if (this.options.bedrock) { if (this.options.bedrock) {
const oo = this.state.messenger.contracts.l1.L2OutputOracle const oo = this.state.messenger.contracts.l1.L2OutputOracle
this.state.oo = { this.state.oo = {
...@@ -238,25 +240,25 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -238,25 +240,25 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
} }
// Populate the event cache. // Populate the event cache.
this.logger.info(`warming event cache, this might take a while...`) this.logger.info('warming event cache, this might take a while...')
await updateOracleCache(this.state.oo) await updateOracleCache(this.state.oo, this.logger)
// Figure out where to start syncing from. // Figure out where to start syncing from.
if (this.options.startBatchIndex === -1) { if (this.options.startBatchIndex === -1) {
this.logger.info(`finding appropriate starting height`) this.logger.info('finding appropriate starting unfinalized batch')
const firstUnfinalized = await findFirstUnfinalizedStateBatchIndex( const firstUnfinalized = await findFirstUnfinalizedStateBatchIndex(
this.state.oo, this.state.oo,
this.state.fpw this.state.fpw,
this.logger
) )
// We may not have an unfinalized batches in the case where no batches have been submitted // We may not have an unfinalized batches in the case where no batches have been submitted
// for the entire duration of the FPW. We generally do not expect this to happen on mainnet, // for the entire duration of the FPW. We generally do not expect this to happen on mainnet,
// but it happens often on testnets because the FPW is very short. // but it happens often on testnets because the FPW is very short.
if (firstUnfinalized === undefined) { if (firstUnfinalized === undefined) {
this.logger.info(`no unfinalized batches found, starting from latest`) this.logger.info('no unfinalized batches found. skipping all batches.')
this.state.currentBatchIndex = ( const totalBatches = await this.state.oo.getTotalElements()
await this.state.oo.getTotalElements() this.state.currentBatchIndex = totalBatches.toNumber() - 1
).toNumber()
} else { } else {
this.state.currentBatchIndex = firstUnfinalized this.state.currentBatchIndex = firstUnfinalized
} }
...@@ -264,8 +266,8 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -264,8 +266,8 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
this.state.currentBatchIndex = this.options.startBatchIndex this.state.currentBatchIndex = this.options.startBatchIndex
} }
this.logger.info('starting height', { this.logger.info('starting batch', {
startBatchIndex: this.state.currentBatchIndex, batchIndex: this.state.currentBatchIndex,
}) })
// Set the initial metrics. // Set the initial metrics.
...@@ -285,49 +287,50 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -285,49 +287,50 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
let latestBatchIndex: number let latestBatchIndex: number
try { try {
latestBatchIndex = (await this.state.oo.getTotalElements()).toNumber() const totalBatches = await this.state.oo.getTotalElements()
latestBatchIndex = totalBatches.toNumber() - 1
} catch (err) { } catch (err) {
this.logger.error(`got error when connecting to node`, { this.logger.error('failed to query total # of batches', {
error: err, error: err,
node: 'l1', node: 'l1',
section: 'getTotalBatches', section: 'getTotalElements',
}) })
this.metrics.nodeConnectionFailures.inc({ this.metrics.nodeConnectionFailures.inc({
layer: 'l1', layer: 'l1',
section: 'getTotalBatches', section: 'getTotalElements',
}) })
await sleep(15000) await sleep(15000)
return return
} }
if (this.state.currentBatchIndex >= latestBatchIndex) { if (this.state.currentBatchIndex > latestBatchIndex) {
this.logger.info('batch index is ahead of the oracle. waiting...', {
batchIndex: this.state.currentBatchIndex,
latestBatchIndex,
})
await sleep(15000) await sleep(15000)
return return
} else {
this.metrics.highestBatchIndex.set(
{
type: 'known',
},
latestBatchIndex
)
} }
this.logger.info(`checking batch`, { this.metrics.highestBatchIndex.set({ type: 'known' }, latestBatchIndex)
this.logger.info('checking batch', {
batchIndex: this.state.currentBatchIndex, batchIndex: this.state.currentBatchIndex,
latestIndex: latestBatchIndex, latestBatchIndex,
}) })
let event: PartialEvent let event: PartialEvent
try { try {
event = await findEventForStateBatch( event = await findEventForStateBatch(
this.state.oo, this.state.oo,
this.state.currentBatchIndex this.state.currentBatchIndex,
this.logger
) )
} catch (err) { } catch (err) {
this.logger.error(`got error when connecting to node`, { this.logger.error('failed to fetch event associated with batch', {
error: err, error: err,
node: 'l1', node: 'l1',
section: 'findEventForStateBatch', section: 'findEventForStateBatch',
batchIndex: this.state.currentBatchIndex,
}) })
this.metrics.nodeConnectionFailures.inc({ this.metrics.nodeConnectionFailures.inc({
layer: 'l1', layer: 'l1',
...@@ -341,7 +344,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -341,7 +344,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
try { try {
latestBlock = await this.options.l2RpcProvider.getBlockNumber() latestBlock = await this.options.l2RpcProvider.getBlockNumber()
} catch (err) { } catch (err) {
this.logger.error(`got error when connecting to node`, { this.logger.error('failed to query L2 block height', {
error: err, error: err,
node: 'l2', node: 'l2',
section: 'getBlockNumber', section: 'getBlockNumber',
...@@ -355,27 +358,29 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -355,27 +358,29 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
} }
if (this.options.bedrock) { if (this.options.bedrock) {
if (latestBlock < event.args.l2BlockNumber.toNumber()) { const outputBlockNumber = event.args.l2BlockNumber.toNumber()
this.logger.info(`node is behind, waiting for sync`, { if (latestBlock < outputBlockNumber) {
batchEnd: event.args.l2BlockNumber.toNumber(), this.logger.info('L2 node is behind, waiting for sync...', {
latestBlock, l2BlockHeight: latestBlock,
outputBlock: outputBlockNumber,
}) })
return return
} }
let targetBlock: any let outputBlock: any
try { try {
targetBlock = await ( outputBlock = await (
this.options.l2RpcProvider as ethers.providers.JsonRpcProvider this.options.l2RpcProvider as ethers.providers.JsonRpcProvider
).send('eth_getBlockByNumber', [ ).send('eth_getBlockByNumber', [
toRpcHexString(event.args.l2BlockNumber.toNumber()), toRpcHexString(outputBlockNumber),
false, false,
]) ])
} catch (err) { } catch (err) {
this.logger.error(`got error when connecting to node`, { this.logger.error('failed to fetch output block', {
error: err, error: err,
node: 'l2', node: 'l2',
section: 'getBlock', section: 'getBlock',
block: outputBlockNumber,
}) })
this.metrics.nodeConnectionFailures.inc({ this.metrics.nodeConnectionFailures.inc({
layer: 'l2', layer: 'l2',
...@@ -392,13 +397,14 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -392,13 +397,14 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
).send('eth_getProof', [ ).send('eth_getProof', [
this.state.messenger.contracts.l2.BedrockMessagePasser.address, this.state.messenger.contracts.l2.BedrockMessagePasser.address,
[], [],
toRpcHexString(event.args.l2BlockNumber.toNumber()), toRpcHexString(outputBlockNumber),
]) ])
} catch (err) { } catch (err) {
this.logger.error(`got error when connecting to node`, { this.logger.error('failed to fetch message passer proof', {
error: err, error: err,
node: 'l2', node: 'l2',
section: 'getProof', section: 'getProof',
block: outputBlockNumber,
}) })
this.metrics.nodeConnectionFailures.inc({ this.metrics.nodeConnectionFailures.inc({
layer: 'l2', layer: 'l2',
...@@ -412,22 +418,22 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -412,22 +418,22 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
['uint256', 'bytes32', 'bytes32', 'bytes32'], ['uint256', 'bytes32', 'bytes32', 'bytes32'],
[ [
0, 0,
targetBlock.stateRoot, outputBlock.stateRoot,
messagePasserProofResponse.storageHash, messagePasserProofResponse.storageHash,
targetBlock.hash, outputBlock.hash,
] ]
) )
if (outputRoot !== event.args.outputRoot) { if (outputRoot !== event.args.outputRoot) {
this.state.diverged = true this.state.diverged = true
this.metrics.isCurrentlyMismatched.set(1) this.metrics.isCurrentlyMismatched.set(1)
this.logger.error(`state root mismatch`, { this.logger.error('state root mismatch', {
blockNumber: targetBlock.number, blockNumber: outputBlock.number,
expectedStateRoot: event.args.outputRoot, expectedStateRoot: event.args.outputRoot,
actualStateRoot: outputRoot, actualStateRoot: outputRoot,
finalizationTime: dateformat( finalizationTime: dateformat(
new Date( new Date(
(ethers.BigNumber.from(targetBlock.timestamp).toNumber() + (ethers.BigNumber.from(outputBlock.timestamp).toNumber() +
this.state.fpw) * this.state.fpw) *
1000 1000
), ),
...@@ -443,7 +449,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -443,7 +449,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
event.transactionHash event.transactionHash
) )
} catch (err) { } catch (err) {
this.logger.error(`got error when connecting to node`, { this.logger.error('failed to acquire batch transaction', {
error: err, error: err,
node: 'l1', node: 'l1',
section: 'getTransaction', section: 'getTransaction',
...@@ -466,9 +472,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -466,9 +472,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
const batchEnd = batchStart + batchSize const batchEnd = batchStart + batchSize
if (latestBlock < batchEnd) { if (latestBlock < batchEnd) {
this.logger.info(`node is behind, waiting for sync`, { this.logger.info('L2 node is behind. waiting for sync...', {
batchEnd, batchBlockStart: batchStart,
latestBlock, batchBlockEnd: batchEnd,
l2BlockHeight: latestBlock,
}) })
return return
} }
...@@ -487,7 +494,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -487,7 +494,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
false, false,
]) ])
} catch (err) { } catch (err) {
this.logger.error(`got error when connecting to node`, { this.logger.error('failed to query for blocks in batch', {
error: err, error: err,
node: 'l2', node: 'l2',
section: 'getBlockRange', section: 'getBlockRange',
...@@ -507,7 +514,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -507,7 +514,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
if (blocks[i].stateRoot !== stateRoot) { if (blocks[i].stateRoot !== stateRoot) {
this.state.diverged = true this.state.diverged = true
this.metrics.isCurrentlyMismatched.set(1) this.metrics.isCurrentlyMismatched.set(1)
this.logger.error(`state root mismatch`, { this.logger.error('state root mismatch', {
blockNumber: blocks[i].number, blockNumber: blocks[i].number,
expectedStateRoot: blocks[i].stateRoot, expectedStateRoot: blocks[i].stateRoot,
actualStateRoot: stateRoot, actualStateRoot: stateRoot,
...@@ -533,9 +540,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> { ...@@ -533,9 +540,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
timeMs: elapsedMs, timeMs: elapsedMs,
}) })
this.metrics.highestBatchIndex.set( this.metrics.highestBatchIndex.set(
{ { type: 'checked' },
type: 'checked',
},
this.state.currentBatchIndex this.state.currentBatchIndex
) )
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment