Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
N
nebula
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
exchain
nebula
Commits
ed9b67c8
Unverified
Commit
ed9b67c8
authored
May 18, 2023
by
OptimismBot
Committed by
GitHub
May 18, 2023
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #5727 from ethereum-optimism/hamdi/fault.detector.logging
[fault-detector] extra logging
parents
5b31a02b
cb4f076f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
96 additions
and
65 deletions
+96
-65
helpers.ts
packages/fault-detector/src/helpers.ts
+38
-12
service.ts
packages/fault-detector/src/service.ts
+58
-53
No files found.
packages/fault-detector/src/helpers.ts
View file @
ed9b67c8
import
{
Contract
,
BigNumber
}
from
'
ethers
'
import
{
Logger
}
from
'
@eth-optimism/common-ts
'
export
interface
OutputOracle
<
TSubmissionEventArgs
>
{
contract
:
Contract
...
...
@@ -39,7 +40,7 @@ const getCache = (
}
=>
{
if
(
!
caches
[
address
])
{
caches
[
address
]
=
{
highestBlock
:
0
,
highestBlock
:
-
1
,
eventCache
:
new
Map
(),
}
}
...
...
@@ -54,15 +55,28 @@ const getCache = (
* @param filter Event filter to use.
*/
export
const
updateOracleCache
=
async
<
TSubmissionEventArgs
>
(
oracle
:
OutputOracle
<
TSubmissionEventArgs
>
oracle
:
OutputOracle
<
TSubmissionEventArgs
>
,
logger
?:
Logger
):
Promise
<
void
>
=>
{
const
cache
=
getCache
(
oracle
.
contract
.
address
)
let
currentBlock
=
cache
.
highestBlock
const
endingBlock
=
await
oracle
.
contract
.
provider
.
getBlockNumber
()
let
step
=
endingBlock
-
currentBlock
const
endBlock
=
await
oracle
.
contract
.
provider
.
getBlockNumber
()
logger
?.
info
(
'
visiting uncached oracle events for range
'
,
{
node
:
'
l1
'
,
cachedUntilBlock
:
cache
.
highestBlock
,
latestBlock
:
endBlock
,
})
let
failures
=
0
while
(
currentBlock
<
endingBlock
)
{
let
currentBlock
=
cache
.
highestBlock
+
1
let
step
=
endBlock
-
currentBlock
while
(
currentBlock
<
endBlock
)
{
try
{
logger
?.
info
(
'
polling events for range
'
,
{
node
:
'
l1
'
,
startBlock
:
currentBlock
,
blockRangeSize
:
step
,
})
const
events
=
await
oracle
.
contract
.
queryFilter
(
oracle
.
filter
,
currentBlock
,
...
...
@@ -83,7 +97,13 @@ export const updateOracleCache = async <TSubmissionEventArgs>(
// Update the current block and increase the step size for the next iteration.
currentBlock
+=
step
step
=
Math
.
ceil
(
step
*
2
)
}
catch
{
}
catch
(
err
)
{
logger
?.
error
(
'
error fetching events
'
,
{
err
,
node
:
'
l1
'
,
section
:
'
getLogs
'
,
})
// Might happen if we're querying too large an event range.
step
=
Math
.
floor
(
step
/
2
)
...
...
@@ -97,13 +117,15 @@ export const updateOracleCache = async <TSubmissionEventArgs>(
// We've failed 3 times in a row, we're probably stuck.
if
(
failures
>=
3
)
{
logger
?.
fatal
(
'
unable to fetch oracle events
'
,
{
err
})
throw
new
Error
(
'
failed to update event cache
'
)
}
}
}
// Update the highest block.
cache
.
highestBlock
=
endingBlock
cache
.
highestBlock
=
endBlock
logger
?.
info
(
'
done caching oracle events
'
)
}
/**
...
...
@@ -115,7 +137,8 @@ export const updateOracleCache = async <TSubmissionEventArgs>(
*/
export
const
findEventForStateBatch
=
async
<
TSubmissionEventArgs
>
(
oracle
:
OutputOracle
<
TSubmissionEventArgs
>
,
index
:
number
index
:
number
,
logger
?:
Logger
):
Promise
<
PartialEvent
>
=>
{
const
cache
=
getCache
(
oracle
.
contract
.
address
)
...
...
@@ -125,10 +148,12 @@ export const findEventForStateBatch = async <TSubmissionEventArgs>(
}
// Update the event cache if we don't have the event.
await
updateOracleCache
(
oracle
)
logger
?.
info
(
'
event not cached for index. warming cache...
'
,
{
index
})
await
updateOracleCache
(
oracle
,
logger
)
// Event better be in cache now!
if
(
cache
.
eventCache
[
index
]
===
undefined
)
{
logger
?.
fatal
(
'
expected event for index!
'
,
{
index
})
throw
new
Error
(
`unable to find event for batch
${
index
}
`
)
}
...
...
@@ -143,7 +168,8 @@ export const findEventForStateBatch = async <TSubmissionEventArgs>(
*/
export
const
findFirstUnfinalizedStateBatchIndex
=
async
<
TSubmissionEventArgs
>
(
oracle
:
OutputOracle
<
TSubmissionEventArgs
>
,
fpw
:
number
fpw
:
number
,
logger
?:
Logger
):
Promise
<
number
>
=>
{
const
latestBlock
=
await
oracle
.
contract
.
provider
.
getBlock
(
'
latest
'
)
const
totalBatches
=
(
await
oracle
.
getTotalElements
()).
toNumber
()
...
...
@@ -153,7 +179,7 @@ export const findFirstUnfinalizedStateBatchIndex = async <TSubmissionEventArgs>(
let
hi
=
totalBatches
while
(
lo
!==
hi
)
{
const
mid
=
Math
.
floor
((
lo
+
hi
)
/
2
)
const
event
=
await
findEventForStateBatch
(
oracle
,
mid
)
const
event
=
await
findEventForStateBatch
(
oracle
,
mid
,
logger
)
const
block
=
await
oracle
.
contract
.
provider
.
getBlock
(
event
.
blockNumber
)
if
(
block
.
timestamp
+
fpw
<
latestBlock
.
timestamp
)
{
...
...
packages/fault-detector/src/service.ts
View file @
ed9b67c8
...
...
@@ -72,7 +72,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
startBatchIndex
:
{
validator
:
validators
.
num
,
default
:
-
1
,
desc
:
'
Batch index to start checking from
'
,
desc
:
'
Batch index to start checking from
. For bedrock chains, this is the L2 height to start from
'
,
public
:
true
,
},
bedrock
:
{
...
...
@@ -124,7 +124,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
* - Legacy: StateCommitmentChain to query for output roots.
*
* @param l2ChainId op chain id
* @returns OEL1ContractsLike
set of L1 contracts with only the required addresses set
* @returns OEL1ContractsLike set of L1 contracts with only the required addresses set
*/
async
getOEL1Contracts
(
l2ChainId
:
number
):
Promise
<
OEL1ContractsLike
>
{
// CrossChainMessenger requires all address to be defined. Default to `AddressZero` to ignore unused contracts
...
...
@@ -219,6 +219,8 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
// We use this a lot, a bit cleaner to pull out to the top level of the state object.
this
.
state
.
fpw
=
await
this
.
state
.
messenger
.
getChallengePeriodSeconds
()
this
.
logger
.
info
(
`fault proof window is
${
this
.
state
.
fpw
}
seconds`
)
if
(
this
.
options
.
bedrock
)
{
const
oo
=
this
.
state
.
messenger
.
contracts
.
l1
.
L2OutputOracle
this
.
state
.
oo
=
{
...
...
@@ -238,25 +240,25 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
}
// Populate the event cache.
this
.
logger
.
info
(
`warming event cache, this might take a while...`
)
await
updateOracleCache
(
this
.
state
.
oo
)
this
.
logger
.
info
(
'
warming event cache, this might take a while...
'
)
await
updateOracleCache
(
this
.
state
.
oo
,
this
.
logger
)
// Figure out where to start syncing from.
if
(
this
.
options
.
startBatchIndex
===
-
1
)
{
this
.
logger
.
info
(
`finding appropriate starting height`
)
this
.
logger
.
info
(
'
finding appropriate starting unfinalized batch
'
)
const
firstUnfinalized
=
await
findFirstUnfinalizedStateBatchIndex
(
this
.
state
.
oo
,
this
.
state
.
fpw
this
.
state
.
fpw
,
this
.
logger
)
// We may not have an unfinalized batches in the case where no batches have been submitted
// for the entire duration of the FPW. We generally do not expect this to happen on mainnet,
// but it happens often on testnets because the FPW is very short.
if
(
firstUnfinalized
===
undefined
)
{
this
.
logger
.
info
(
`no unfinalized batches found, starting from latest`
)
this
.
state
.
currentBatchIndex
=
(
await
this
.
state
.
oo
.
getTotalElements
()
).
toNumber
()
this
.
logger
.
info
(
'
no unfinalized batches found. skipping all batches.
'
)
const
totalBatches
=
await
this
.
state
.
oo
.
getTotalElements
()
this
.
state
.
currentBatchIndex
=
totalBatches
.
toNumber
()
-
1
}
else
{
this
.
state
.
currentBatchIndex
=
firstUnfinalized
}
...
...
@@ -264,8 +266,8 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
this
.
state
.
currentBatchIndex
=
this
.
options
.
startBatchIndex
}
this
.
logger
.
info
(
'
starting
height
'
,
{
startB
atchIndex
:
this
.
state
.
currentBatchIndex
,
this
.
logger
.
info
(
'
starting
batch
'
,
{
b
atchIndex
:
this
.
state
.
currentBatchIndex
,
})
// Set the initial metrics.
...
...
@@ -285,49 +287,50 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
let
latestBatchIndex
:
number
try
{
latestBatchIndex
=
(
await
this
.
state
.
oo
.
getTotalElements
()).
toNumber
()
const
totalBatches
=
await
this
.
state
.
oo
.
getTotalElements
()
latestBatchIndex
=
totalBatches
.
toNumber
()
-
1
}
catch
(
err
)
{
this
.
logger
.
error
(
`got error when connecting to node`
,
{
this
.
logger
.
error
(
'
failed to query total # of batches
'
,
{
error
:
err
,
node
:
'
l1
'
,
section
:
'
getTotal
Batche
s
'
,
section
:
'
getTotal
Element
s
'
,
})
this
.
metrics
.
nodeConnectionFailures
.
inc
({
layer
:
'
l1
'
,
section
:
'
getTotal
Batche
s
'
,
section
:
'
getTotal
Element
s
'
,
})
await
sleep
(
15000
)
return
}
if
(
this
.
state
.
currentBatchIndex
>=
latestBatchIndex
)
{
if
(
this
.
state
.
currentBatchIndex
>
latestBatchIndex
)
{
this
.
logger
.
info
(
'
batch index is ahead of the oracle. waiting...
'
,
{
batchIndex
:
this
.
state
.
currentBatchIndex
,
latestBatchIndex
,
})
await
sleep
(
15000
)
return
}
else
{
this
.
metrics
.
highestBatchIndex
.
set
(
{
type
:
'
known
'
,
},
latestBatchIndex
)
}
this
.
logger
.
info
(
`checking batch`
,
{
this
.
metrics
.
highestBatchIndex
.
set
({
type
:
'
known
'
},
latestBatchIndex
)
this
.
logger
.
info
(
'
checking batch
'
,
{
batchIndex
:
this
.
state
.
currentBatchIndex
,
latest
Index
:
latest
BatchIndex
,
latestBatchIndex
,
})
let
event
:
PartialEvent
try
{
event
=
await
findEventForStateBatch
(
this
.
state
.
oo
,
this
.
state
.
currentBatchIndex
this
.
state
.
currentBatchIndex
,
this
.
logger
)
}
catch
(
err
)
{
this
.
logger
.
error
(
`got error when connecting to node`
,
{
this
.
logger
.
error
(
'
failed to fetch event associated with batch
'
,
{
error
:
err
,
node
:
'
l1
'
,
section
:
'
findEventForStateBatch
'
,
batchIndex
:
this
.
state
.
currentBatchIndex
,
})
this
.
metrics
.
nodeConnectionFailures
.
inc
({
layer
:
'
l1
'
,
...
...
@@ -341,7 +344,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
try
{
latestBlock
=
await
this
.
options
.
l2RpcProvider
.
getBlockNumber
()
}
catch
(
err
)
{
this
.
logger
.
error
(
`got error when connecting to node`
,
{
this
.
logger
.
error
(
'
failed to query L2 block height
'
,
{
error
:
err
,
node
:
'
l2
'
,
section
:
'
getBlockNumber
'
,
...
...
@@ -355,27 +358,29 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
}
if
(
this
.
options
.
bedrock
)
{
if
(
latestBlock
<
event
.
args
.
l2BlockNumber
.
toNumber
())
{
this
.
logger
.
info
(
`node is behind, waiting for sync`
,
{
batchEnd
:
event
.
args
.
l2BlockNumber
.
toNumber
(),
latestBlock
,
const
outputBlockNumber
=
event
.
args
.
l2BlockNumber
.
toNumber
()
if
(
latestBlock
<
outputBlockNumber
)
{
this
.
logger
.
info
(
'
L2 node is behind, waiting for sync...
'
,
{
l2BlockHeight
:
latestBlock
,
outputBlock
:
outputBlockNumber
,
})
return
}
let
targe
tBlock
:
any
let
outpu
tBlock
:
any
try
{
targe
tBlock
=
await
(
outpu
tBlock
=
await
(
this
.
options
.
l2RpcProvider
as
ethers
.
providers
.
JsonRpcProvider
).
send
(
'
eth_getBlockByNumber
'
,
[
toRpcHexString
(
event
.
args
.
l2BlockNumber
.
toNumber
()
),
toRpcHexString
(
outputBlockNumber
),
false
,
])
}
catch
(
err
)
{
this
.
logger
.
error
(
`got error when connecting to node`
,
{
this
.
logger
.
error
(
'
failed to fetch output block
'
,
{
error
:
err
,
node
:
'
l2
'
,
section
:
'
getBlock
'
,
block
:
outputBlockNumber
,
})
this
.
metrics
.
nodeConnectionFailures
.
inc
({
layer
:
'
l2
'
,
...
...
@@ -392,13 +397,14 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
).
send
(
'
eth_getProof
'
,
[
this
.
state
.
messenger
.
contracts
.
l2
.
BedrockMessagePasser
.
address
,
[],
toRpcHexString
(
event
.
args
.
l2BlockNumber
.
toNumber
()
),
toRpcHexString
(
outputBlockNumber
),
])
}
catch
(
err
)
{
this
.
logger
.
error
(
`got error when connecting to node`
,
{
this
.
logger
.
error
(
'
failed to fetch message passer proof
'
,
{
error
:
err
,
node
:
'
l2
'
,
section
:
'
getProof
'
,
block
:
outputBlockNumber
,
})
this
.
metrics
.
nodeConnectionFailures
.
inc
({
layer
:
'
l2
'
,
...
...
@@ -412,22 +418,22 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
[
'
uint256
'
,
'
bytes32
'
,
'
bytes32
'
,
'
bytes32
'
],
[
0
,
targe
tBlock
.
stateRoot
,
outpu
tBlock
.
stateRoot
,
messagePasserProofResponse
.
storageHash
,
targe
tBlock
.
hash
,
outpu
tBlock
.
hash
,
]
)
if
(
outputRoot
!==
event
.
args
.
outputRoot
)
{
this
.
state
.
diverged
=
true
this
.
metrics
.
isCurrentlyMismatched
.
set
(
1
)
this
.
logger
.
error
(
`state root mismatch`
,
{
blockNumber
:
targe
tBlock
.
number
,
this
.
logger
.
error
(
'
state root mismatch
'
,
{
blockNumber
:
outpu
tBlock
.
number
,
expectedStateRoot
:
event
.
args
.
outputRoot
,
actualStateRoot
:
outputRoot
,
finalizationTime
:
dateformat
(
new
Date
(
(
ethers
.
BigNumber
.
from
(
targe
tBlock
.
timestamp
).
toNumber
()
+
(
ethers
.
BigNumber
.
from
(
outpu
tBlock
.
timestamp
).
toNumber
()
+
this
.
state
.
fpw
)
*
1000
),
...
...
@@ -443,7 +449,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
event
.
transactionHash
)
}
catch
(
err
)
{
this
.
logger
.
error
(
`got error when connecting to node`
,
{
this
.
logger
.
error
(
'
failed to acquire batch transaction
'
,
{
error
:
err
,
node
:
'
l1
'
,
section
:
'
getTransaction
'
,
...
...
@@ -466,9 +472,10 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
const
batchEnd
=
batchStart
+
batchSize
if
(
latestBlock
<
batchEnd
)
{
this
.
logger
.
info
(
`node is behind, waiting for sync`
,
{
batchEnd
,
latestBlock
,
this
.
logger
.
info
(
'
L2 node is behind. waiting for sync...
'
,
{
batchBlockStart
:
batchStart
,
batchBlockEnd
:
batchEnd
,
l2BlockHeight
:
latestBlock
,
})
return
}
...
...
@@ -487,7 +494,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
false
,
])
}
catch
(
err
)
{
this
.
logger
.
error
(
`got error when connecting to node`
,
{
this
.
logger
.
error
(
'
failed to query for blocks in batch
'
,
{
error
:
err
,
node
:
'
l2
'
,
section
:
'
getBlockRange
'
,
...
...
@@ -507,7 +514,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
if
(
blocks
[
i
].
stateRoot
!==
stateRoot
)
{
this
.
state
.
diverged
=
true
this
.
metrics
.
isCurrentlyMismatched
.
set
(
1
)
this
.
logger
.
error
(
`state root mismatch`
,
{
this
.
logger
.
error
(
'
state root mismatch
'
,
{
blockNumber
:
blocks
[
i
].
number
,
expectedStateRoot
:
blocks
[
i
].
stateRoot
,
actualStateRoot
:
stateRoot
,
...
...
@@ -533,9 +540,7 @@ export class FaultDetector extends BaseServiceV2<Options, Metrics, State> {
timeMs
:
elapsedMs
,
})
this
.
metrics
.
highestBatchIndex
.
set
(
{
type
:
'
checked
'
,
},
{
type
:
'
checked
'
},
this
.
state
.
currentBatchIndex
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment