Commit 5a2ac1b4 authored by Axel Kingsley's avatar Axel Kingsley Committed by GitHub

Tool: Receipt Reference Aggregator (#9219)

* Data Puller

* comment edits

* remove redundant getTx over RPC

* fix

* Add Write and Merge Functionality

* lint fix

* add gob tooling

* run timer

* batching ; fit and finish ; rebase

* drain results

* fit and finish 2

* error handle

* improved resiliency ; remove ENV

* filename changes
parent 7b732eaa
......@@ -4,6 +4,9 @@ op-version-check:
ecotone-scalar:
go build -o ./bin/ecotone-scalar ./cmd/ecotone-scalar/main.go
receipt-reference-builder:
go build -o ./bin/receipt-reference-builder ./cmd/receipt-reference-builder/*.go
test:
go test ./...
......
# Receipt Reference Tool
Receipt Reference Tool is a data-pulling tool for operational use by Superchain operators of chains which have Post-Bedrock-Pre-Canyon activity.
## Data Collection
### Pull
The `pull` subcommand manages a collection of workers to request blocks from an RPC endpoint, and then checks each block for deposit transactions. Those transactions are built up into an aggregate data structure and written.
### Merge
The `merge` subcommand targets an array of files, confirms that there is no gap in the processed block ranges, and then merges the aggregates into a single file.
### Convert
The `convert` subcommand targets a single file and writes it as a new file in the requested format.
### Print
`print` is a debug subcommand to read in a file and print it to screen.
## Data Spec
The output data of this tool is an "aggregate". Each aggregate contains the following attributes
- Start Block, End Block
- Chain ID
- Results Map:
- Key of BlockNumber
- Value of Nonces as a slice
Transaction Nonces are inserted to the value slice in the order they appear in the block. Transaction Nonces are only included if they are related to a user deposit.
Blocks which contain no deposit transactions have no key in the data.
Users of this data can easily find if the data is appropriate for their network (using ChainID), covers a given block (using Start and End), and provides the nonces for user deposits.
## Best Practices
This tool is designed with a static range of blocks in mind, the size of which is about 10 Million blocks. In order to get such a large body of data in one place, this tool is built for parallel execution and retries.
To maximize parallel efficiency, a higher number of `-workers` can utilize more RPC requests per second. Additionally `-batch-size` can be increased to group more RPC requests together per network exchange. I am using 5 workers with 100 requests per batch.
To avoid wasteful abandon of work already done, errors which are encountered by workers are noted, but do not stop the aggregation process. Jobs which fail are reinserted into the work queue with no maximum retry, and workers back off when encountering failures. This is all to allow an RPC endpoint to become temporarily unavailalbe while letting aggregation stay persistent.
Even at high speed, collecting this much data can take several hours. You may benefit from planning a collection of smaller-sized runs, merging them with the `merge` subcommand as they become available.
package main
import (
"errors"
"github.com/ethereum/go-ethereum/log"
"github.com/urfave/cli/v2"
)
var convertCommand = &cli.Command{
Name: "convert",
Usage: "convert an aggregate from one format to another",
Flags: []cli.Flag{FilesFlag, OutputFlag, InputFormatFlag, OutputFormatFlag},
Action: convert,
}
func convert(ctx *cli.Context) error {
log := log.New()
files := ctx.StringSlice("files")
if len(files) != 1 {
return errors.New("only one file is supported")
}
if ctx.String("input-format") == ctx.String("output-format") {
log.Info("no conversion needed. specify different input and output formats")
return nil
}
r := formats[ctx.String("input-format")]
w := formats[ctx.String("output-format")]
for _, f := range files {
a, err := r.readAggregate(f)
if err != nil {
log.Error("failed to read aggregate", "file", f, "err", err)
return err
}
err = w.writeAggregate(a, ctx.String("output"))
if err != nil {
log.Error("failed to write aggregate", "file", f, "err", err)
return err
}
}
return nil
}
package main
import (
"os"
"time"
"github.com/mattn/go-isatty"
"github.com/urfave/cli/v2"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/log"
opservice "github.com/ethereum-optimism/optimism/op-service"
)
const EnvPrefix = "OP_CHAIN_OPS_RECEIPT_REFERENCE_BUILDER"
var (
StartFlag = &cli.Uint64Flag{
Name: "start",
Usage: "the first block to include in data collection. INCLUSIVE",
}
EndFlag = &cli.Uint64Flag{
Name: "end",
Usage: "the last block of the collection range. EXCLUSIVE",
}
RPCURLFlag = &cli.StringFlag{
Name: "rpc-url",
Usage: "RPC URL to connect to",
EnvVars: opservice.PrefixEnvVar(EnvPrefix, "RPC_URL"),
}
BackoffFlag = &cli.DurationFlag{
Name: "backoff",
Value: 30 * time.Second,
Usage: "how long to wait when a worker errors before retrying",
}
WorkerFlag = &cli.Uint64Flag{
Name: "workers",
Value: 1,
Usage: "how many workers to use to fetch txs",
}
BatchSizeFlag = &cli.Uint64Flag{
Name: "batch-size",
Value: 50,
Usage: "how many blocks to batch together for each worker",
}
OutputFlag = &cli.StringFlag{
Name: "output",
Aliases: []string{"o"},
Usage: "the file to write the results to",
}
FilesFlag = &cli.StringSliceFlag{
Name: "files",
Aliases: []string{"f"},
Usage: "the set of files to merge",
}
InputFormatFlag = &cli.StringFlag{
Name: "input-format",
Aliases: []string{"if"},
Value: "json",
Usage: "the format to read aggregate files: json, gob",
}
OutputFormatFlag = &cli.StringFlag{
Name: "output-format",
Aliases: []string{"of"},
Value: "json",
Usage: "the format to write the results in. Options: json, gob",
}
formats = map[string]aggregateReaderWriter{
"json": jsonAggregateReaderWriter{},
"gob": gobAggregateReaderWriter{},
}
systemAddress = common.HexToAddress("0xDeaDDEaDDeAdDeAdDEAdDEaddeAddEAdDEAd0001")
depositType = uint8(126)
)
func main() {
log.Root().SetHandler(log.StreamHandler(os.Stderr, log.TerminalFormat(isatty.IsTerminal(os.Stderr.Fd()))))
app := &cli.App{
Name: "receipt-reference-builder",
Usage: "Used to generate reference data for deposit receipts of pre-canyon blocks",
Flags: []cli.Flag{},
Writer: os.Stdout,
}
app.Commands = []*cli.Command{
pullCommand,
mergeCommand,
convertCommand,
printCommand,
}
if err := app.Run(os.Args); err != nil {
log.Crit("critical error", "err", err)
}
}
type result struct {
BlockNumber uint64 `json:"blockNumber"`
Nonces []uint64 `json:"nonces"`
}
type aggregate struct {
Results map[uint64][]uint64 `json:"results"`
ChainID uint64 `json:"chainId"`
First uint64 `json:"start"`
Last uint64 `json:"end"`
}
package main
import (
"errors"
"sort"
"github.com/ethereum/go-ethereum/log"
"github.com/urfave/cli/v2"
)
var mergeCommand = &cli.Command{
Name: "merge",
Usage: "Merge one or more output files into a single file. Later files take precedence per key",
Flags: []cli.Flag{FilesFlag, OutputFlag, InputFormatFlag, OutputFormatFlag},
Action: merge,
}
// merge merges one or more files into a single file
func merge(ctx *cli.Context) error {
log := log.New()
files := ctx.StringSlice("files")
if len(files) < 2 {
return errors.New("need at least two files to merge")
}
log.Info("merging", "files", files)
reader, ok := formats[ctx.String("input-format")]
if !ok {
log.Error("Invalid Input Format. Defaulting to JSON", "Format", ctx.String("input-format"))
reader = formats["json"]
}
writer, ok := formats[ctx.String("output-format")]
if !ok {
log.Error("Invalid Output Format. Defaulting to JSON", "Format", ctx.String("output-format"))
writer = formats["json"]
}
aggregates := []aggregate{}
for _, f := range files {
a, err := reader.readAggregate(f)
if err != nil {
log.Error("failed to read aggregate", "file", f, "err", err)
return err
}
aggregates = append(aggregates, a)
}
// sort the aggregates by first block
sort.Sort(ByFirst(aggregates))
// check that the block ranges don't have a gap
err := checkBlockRanges(aggregates)
if err != nil {
log.Error("error evaluating block ranges", "err", err)
return err
}
// merge the aggregates
merged := aggregates[0]
log.Info("aggregates info", "aggs", aggregates, "len", len(aggregates))
for _, a := range aggregates[1:] {
merged = mergeAggregates(merged, a, log)
}
// write the merged aggregate
err = writer.writeAggregate(merged, ctx.String("output"))
if err != nil {
log.Error("failed to write aggregate", "err", err)
return err
}
return nil
}
type ByFirst []aggregate
func (a ByFirst) Len() int { return len(a) }
func (a ByFirst) Less(i, j int) bool { return a[i].First < a[j].First }
func (a ByFirst) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
// checkBlockRanges checks that the block ranges don't have a gap
// this function assumes the aggregates are sorted by first block
func checkBlockRanges(aggregates []aggregate) error {
last := aggregates[0].Last
for _, a := range aggregates[1:] {
if a.First > last+1 {
return errors.New("gap in block ranges")
}
last = a.Last
}
return nil
}
// mergeAggregates merges two aggregates
// this function assumes the aggregates are sorted by first block
func mergeAggregates(a1, a2 aggregate, log log.Logger) aggregate {
log.Info("merging", "a1", a1, "a2", a2)
// merge the results
for k, v := range a2.Results {
a1.Results[k] = v
}
a1.Last = a2.Last
log.Info("result", "aggregate", a1)
return a1
}
package main
import (
"github.com/ethereum/go-ethereum/log"
"github.com/urfave/cli/v2"
)
var printCommand = &cli.Command{
Name: "print",
Usage: "read an aggregate file and print it to stdout",
Flags: []cli.Flag{FilesFlag, InputFormatFlag},
Action: print,
}
func print(ctx *cli.Context) error {
log := log.New()
files := ctx.StringSlice("files")
r := formats[ctx.String("input-format")]
for _, f := range files {
a, err := r.readAggregate(f)
if err != nil {
log.Error("failed to read aggregate", "file", f, "err", err)
return err
}
log.Info("aggregate", "aggregate", a)
}
return nil
}
This diff is collapsed.
package main
import (
"encoding/gob"
"encoding/json"
"fmt"
"os"
)
type aggregateReaderWriter interface {
writeAggregate(a aggregate, o string) error
readAggregate(f string) (aggregate, error)
}
type jsonAggregateReaderWriter struct{}
// writeAggregate writes the aggregate to a file in json format
// if the output file is not specified, it will create a file based on the block range
func (w jsonAggregateReaderWriter) writeAggregate(a aggregate, o string) error {
if o == "" {
o = fmt.Sprintf("%d.%d-%d.json", a.ChainID, a.First, a.Last)
}
// write the results to a file
aggregateJson, err := json.Marshal(a)
if err != nil {
return err
}
err = os.WriteFile(o, aggregateJson, 0644)
return err
}
// readAggregate reads the aggregate from a file in json format
func (w jsonAggregateReaderWriter) readAggregate(f string) (aggregate, error) {
// read the file
aggregateJson, err := os.ReadFile(f)
if err != nil {
return aggregate{}, err
}
var a aggregate
err = json.Unmarshal(aggregateJson, &a)
if err != nil {
return aggregate{}, err
}
return a, nil
}
type gobAggregateReaderWriter struct{}
// writeAggregate writes the aggregate to a file in gob format
// if the output file is not specified, it will creeate a file based on the block range
func (w gobAggregateReaderWriter) writeAggregate(a aggregate, o string) error {
if o == "" {
o = fmt.Sprintf("%d.%d-%d.gob", a.ChainID, a.First, a.Last)
}
file, err := os.Create(o)
if err != nil {
return err
}
defer file.Close()
encoder := gob.NewEncoder(file)
err = encoder.Encode(&a)
return err
}
// readAggregate reads the aggregate from a file in gob format
func (w gobAggregateReaderWriter) readAggregate(f string) (aggregate, error) {
file, err := os.Open(f)
if err != nil {
return aggregate{}, err
}
defer file.Close()
a := aggregate{}
decoder := gob.NewDecoder(file)
err = decoder.Decode(&a)
return a, err
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment