Tool: Receipt Reference Aggregator (#9219)

* Data Puller * comment edits * remove redundant getTx over RPC * fix * Add Write and Merge Functionality * lint fix * add gob tooling * run timer * batching ; fit and finish ; rebase * drain results * fit and finish 2 * error handle * improved resiliency ; remove ENV * filename changes

Tool: Receipt Reference Aggregator (#9219)
* Data Puller * comment edits * remove redundant getTx over RPC * fix * Add Write and Merge Functionality * lint fix * add gob tooling * run timer * batching ; fit and finish ; rebase * drain results * fit and finish 2 * error handle * improved resiliency ; remove ENV * filename changes
5a2ac1b4 · Axel Kingsley · GitHub · 7b732eaa · 5a2ac1b4 · 5a2ac1b4
Commit 5a2ac1b4 authored Feb 04, 2024 by Axel Kingsley Committed by GitHub Feb 04, 2024
8 changed files
--- a/op-chain-ops/Makefile
+++ b/op-chain-ops/Makefile
@@ -4,6 +4,9 @@ op-version-check:
 ecotone-scalar:
 	go build -o ./bin/ecotone-scalar ./cmd/ecotone-scalar/main.go

+receipt-reference-builder:
+	go build -o ./bin/receipt-reference-builder ./cmd/receipt-reference-builder/*.go
+
 test:
 	go test ./...


--- a/op-chain-ops/cmd/receipt-reference-builder/README.md
+++ b/op-chain-ops/cmd/receipt-reference-builder/README.md
+# Receipt Reference Tool
+
+Receipt Reference Tool is a data-pulling tool for operational use by Superchain operators of chains which have Post-Bedrock-Pre-Canyon activity.
+
+## Data Collection
+
+### Pull
+The `pull` subcommand manages a collection of workers to request blocks from an RPC endpoint, and then checks each block for deposit transactions. Those transactions are built up into an aggregate data structure and written.
+
+### Merge
+The `merge` subcommand targets an array of files, confirms that there is no gap in the processed block ranges, and then merges the aggregates into a single file.
+
+### Convert
+The `convert` subcommand targets a single file and writes it as a new file in the requested format.
+
+### Print
+`print` is a debug subcommand to read in a file and print it to screen.
+
+## Data Spec
+The output data of this tool is an "aggregate". Each aggregate contains the following attributes
+- Start Block, End Block
+- Chain ID
+- Results Map:
+  - Key of BlockNumber
+  - Value of Nonces as a slice
+
+Transaction Nonces are inserted to the value slice in the order they appear in the block. Transaction Nonces are only included if they are related to a user deposit.
+Blocks which contain no deposit transactions have no key in the data.
+
+Users of this data can easily find if the data is appropriate for their network (using ChainID), covers a given block (using Start and End), and provides the nonces for user deposits.
+
+## Best Practices
+This tool is designed with a static range of blocks in mind, the size of which is about 10 Million blocks. In order to get such a large body of data in one place, this tool is built for parallel execution and retries.
+
+To maximize parallel efficiency, a higher number of `-workers` can utilize more RPC requests per second. Additionally `-batch-size` can be increased to group more RPC requests together per network exchange. I am using 5 workers with 100 requests per batch.
+
+To avoid wasteful abandon of work already done, errors which are encountered by workers are noted, but do not stop the aggregation process. Jobs which fail are reinserted into the work queue with no maximum retry, and workers back off when encountering failures. This is all to allow an RPC endpoint to become temporarily unavailalbe while letting aggregation stay persistent.
+
+Even at high speed, collecting this much data can take several hours. You may benefit from planning a collection of smaller-sized runs, merging them with the `merge` subcommand as they become available.
--- a/op-chain-ops/cmd/receipt-reference-builder/convert.go
+++ b/op-chain-ops/cmd/receipt-reference-builder/convert.go
+package main
+
+import (
+	"errors"
+
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/urfave/cli/v2"
+)
+
+var convertCommand = &cli.Command{
+	Name:   "convert",
+	Usage:  "convert an aggregate from one format to another",
+	Flags:  []cli.Flag{FilesFlag, OutputFlag, InputFormatFlag, OutputFormatFlag},
+	Action: convert,
+}
+
+func convert(ctx *cli.Context) error {
+	log := log.New()
+	files := ctx.StringSlice("files")
+	if len(files) != 1 {
+		return errors.New("only one file is supported")
+	}
+
+	if ctx.String("input-format") == ctx.String("output-format") {
+		log.Info("no conversion needed. specify different input and output formats")
+		return nil
+	}
+
+	r := formats[ctx.String("input-format")]
+	w := formats[ctx.String("output-format")]
+
+	for _, f := range files {
+		a, err := r.readAggregate(f)
+		if err != nil {
+			log.Error("failed to read aggregate", "file", f, "err", err)
+			return err
+		}
+		err = w.writeAggregate(a, ctx.String("output"))
+		if err != nil {
+			log.Error("failed to write aggregate", "file", f, "err", err)
+			return err
+		}
+
+	}
+	return nil
+}
--- a/op-chain-ops/cmd/receipt-reference-builder/main.go
+++ b/op-chain-ops/cmd/receipt-reference-builder/main.go
+package main
+
+import (
+	"os"
+	"time"
+
+	"github.com/mattn/go-isatty"
+	"github.com/urfave/cli/v2"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/log"
+
+	opservice "github.com/ethereum-optimism/optimism/op-service"
+)
+
+const EnvPrefix = "OP_CHAIN_OPS_RECEIPT_REFERENCE_BUILDER"
+
+var (
+	StartFlag = &cli.Uint64Flag{
+		Name:  "start",
+		Usage: "the first block to include in data collection. INCLUSIVE",
+	}
+	EndFlag = &cli.Uint64Flag{
+		Name:  "end",
+		Usage: "the last block of the collection range. EXCLUSIVE",
+	}
+	RPCURLFlag = &cli.StringFlag{
+		Name:    "rpc-url",
+		Usage:   "RPC URL to connect to",
+		EnvVars: opservice.PrefixEnvVar(EnvPrefix, "RPC_URL"),
+	}
+	BackoffFlag = &cli.DurationFlag{
+		Name:  "backoff",
+		Value: 30 * time.Second,
+		Usage: "how long to wait when a worker errors before retrying",
+	}
+	WorkerFlag = &cli.Uint64Flag{
+		Name:  "workers",
+		Value: 1,
+		Usage: "how many workers to use to fetch txs",
+	}
+	BatchSizeFlag = &cli.Uint64Flag{
+		Name:  "batch-size",
+		Value: 50,
+		Usage: "how many blocks to batch together for each worker",
+	}
+	OutputFlag = &cli.StringFlag{
+		Name:    "output",
+		Aliases: []string{"o"},
+		Usage:   "the file to write the results to",
+	}
+	FilesFlag = &cli.StringSliceFlag{
+		Name:    "files",
+		Aliases: []string{"f"},
+		Usage:   "the set of files to merge",
+	}
+	InputFormatFlag = &cli.StringFlag{
+		Name:    "input-format",
+		Aliases: []string{"if"},
+		Value:   "json",
+		Usage:   "the format to read aggregate files: json, gob",
+	}
+	OutputFormatFlag = &cli.StringFlag{
+		Name:    "output-format",
+		Aliases: []string{"of"},
+		Value:   "json",
+		Usage:   "the format to write the results in. Options: json, gob",
+	}
+	formats = map[string]aggregateReaderWriter{
+		"json": jsonAggregateReaderWriter{},
+		"gob":  gobAggregateReaderWriter{},
+	}
+	systemAddress = common.HexToAddress("0xDeaDDEaDDeAdDeAdDEAdDEaddeAddEAdDEAd0001")
+	depositType   = uint8(126)
+)
+
+func main() {
+	log.Root().SetHandler(log.StreamHandler(os.Stderr, log.TerminalFormat(isatty.IsTerminal(os.Stderr.Fd()))))
+
+	app := &cli.App{
+		Name:   "receipt-reference-builder",
+		Usage:  "Used to generate reference data for deposit receipts of pre-canyon blocks",
+		Flags:  []cli.Flag{},
+		Writer: os.Stdout,
+	}
+
+	app.Commands = []*cli.Command{
+		pullCommand,
+		mergeCommand,
+		convertCommand,
+		printCommand,
+	}
+
+	if err := app.Run(os.Args); err != nil {
+		log.Crit("critical error", "err", err)
+	}
+}
+
+type result struct {
+	BlockNumber uint64   `json:"blockNumber"`
+	Nonces      []uint64 `json:"nonces"`
+}
+
+type aggregate struct {
+	Results map[uint64][]uint64 `json:"results"`
+	ChainID uint64              `json:"chainId"`
+	First   uint64              `json:"start"`
+	Last    uint64              `json:"end"`
+}
--- a/op-chain-ops/cmd/receipt-reference-builder/merge.go
+++ b/op-chain-ops/cmd/receipt-reference-builder/merge.go
+package main
+
+import (
+	"errors"
+	"sort"
+
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/urfave/cli/v2"
+)
+
+var mergeCommand = &cli.Command{
+	Name:   "merge",
+	Usage:  "Merge one or more output files into a single file. Later files take precedence per key",
+	Flags:  []cli.Flag{FilesFlag, OutputFlag, InputFormatFlag, OutputFormatFlag},
+	Action: merge,
+}
+
+// merge merges one or more files into a single file
+func merge(ctx *cli.Context) error {
+	log := log.New()
+	files := ctx.StringSlice("files")
+	if len(files) < 2 {
+		return errors.New("need at least two files to merge")
+	}
+
+	log.Info("merging", "files", files)
+	reader, ok := formats[ctx.String("input-format")]
+	if !ok {
+		log.Error("Invalid Input Format. Defaulting to JSON", "Format", ctx.String("input-format"))
+		reader = formats["json"]
+	}
+	writer, ok := formats[ctx.String("output-format")]
+	if !ok {
+		log.Error("Invalid Output Format. Defaulting to JSON", "Format", ctx.String("output-format"))
+		writer = formats["json"]
+	}
+
+	aggregates := []aggregate{}
+	for _, f := range files {
+		a, err := reader.readAggregate(f)
+		if err != nil {
+			log.Error("failed to read aggregate", "file", f, "err", err)
+			return err
+		}
+		aggregates = append(aggregates, a)
+	}
+
+	// sort the aggregates by first block
+	sort.Sort(ByFirst(aggregates))
+
+	// check that the block ranges don't have a gap
+	err := checkBlockRanges(aggregates)
+	if err != nil {
+		log.Error("error evaluating block ranges", "err", err)
+		return err
+	}
+
+	// merge the aggregates
+	merged := aggregates[0]
+	log.Info("aggregates info", "aggs", aggregates, "len", len(aggregates))
+	for _, a := range aggregates[1:] {
+		merged = mergeAggregates(merged, a, log)
+	}
+
+	// write the merged aggregate
+	err = writer.writeAggregate(merged, ctx.String("output"))
+	if err != nil {
+		log.Error("failed to write aggregate", "err", err)
+		return err
+	}
+
+	return nil
+}
+
+type ByFirst []aggregate
+
+func (a ByFirst) Len() int           { return len(a) }
+func (a ByFirst) Less(i, j int) bool { return a[i].First < a[j].First }
+func (a ByFirst) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
+
+// checkBlockRanges checks that the block ranges don't have a gap
+// this function assumes the aggregates are sorted by first block
+func checkBlockRanges(aggregates []aggregate) error {
+	last := aggregates[0].Last
+	for _, a := range aggregates[1:] {
+		if a.First > last+1 {
+			return errors.New("gap in block ranges")
+		}
+		last = a.Last
+	}
+	return nil
+}
+
+// mergeAggregates merges two aggregates
+// this function assumes the aggregates are sorted by first block
+func mergeAggregates(a1, a2 aggregate, log log.Logger) aggregate {
+	log.Info("merging", "a1", a1, "a2", a2)
+	// merge the results
+	for k, v := range a2.Results {
+		a1.Results[k] = v
+	}
+	a1.Last = a2.Last
+	log.Info("result", "aggregate", a1)
+	return a1
+}
--- a/op-chain-ops/cmd/receipt-reference-builder/print.go
+++ b/op-chain-ops/cmd/receipt-reference-builder/print.go
+package main
+
+import (
+	"github.com/ethereum/go-ethereum/log"
+	"github.com/urfave/cli/v2"
+)
+
+var printCommand = &cli.Command{
+	Name:   "print",
+	Usage:  "read an aggregate file and print it to stdout",
+	Flags:  []cli.Flag{FilesFlag, InputFormatFlag},
+	Action: print,
+}
+
+func print(ctx *cli.Context) error {
+	log := log.New()
+	files := ctx.StringSlice("files")
+	r := formats[ctx.String("input-format")]
+	for _, f := range files {
+		a, err := r.readAggregate(f)
+		if err != nil {
+			log.Error("failed to read aggregate", "file", f, "err", err)
+			return err
+		}
+		log.Info("aggregate", "aggregate", a)
+	}
+	return nil
+}
--- a/op-chain-ops/cmd/receipt-reference-builder/pull.go
+++ b/op-chain-ops/cmd/receipt-reference-builder/pull.go
--- a/op-chain-ops/cmd/receipt-reference-builder/readwrite.go
+++ b/op-chain-ops/cmd/receipt-reference-builder/readwrite.go
+package main
+
+import (
+	"encoding/gob"
+	"encoding/json"
+	"fmt"
+	"os"
+)
+
+type aggregateReaderWriter interface {
+	writeAggregate(a aggregate, o string) error
+	readAggregate(f string) (aggregate, error)
+}
+
+type jsonAggregateReaderWriter struct{}
+
+// writeAggregate writes the aggregate to a file in json format
+// if the output file is not specified, it will create a file based on the block range
+func (w jsonAggregateReaderWriter) writeAggregate(a aggregate, o string) error {
+	if o == "" {
+		o = fmt.Sprintf("%d.%d-%d.json", a.ChainID, a.First, a.Last)
+	}
+	// write the results to a file
+	aggregateJson, err := json.Marshal(a)
+	if err != nil {
+		return err
+	}
+	err = os.WriteFile(o, aggregateJson, 0644)
+	return err
+}
+
+// readAggregate reads the aggregate from a file in json format
+func (w jsonAggregateReaderWriter) readAggregate(f string) (aggregate, error) {
+	// read the file
+	aggregateJson, err := os.ReadFile(f)
+	if err != nil {
+		return aggregate{}, err
+	}
+	var a aggregate
+	err = json.Unmarshal(aggregateJson, &a)
+	if err != nil {
+		return aggregate{}, err
+	}
+	return a, nil
+}
+
+type gobAggregateReaderWriter struct{}
+
+// writeAggregate writes the aggregate to a file in gob format
+// if the output file is not specified, it will creeate a file based on the block range
+func (w gobAggregateReaderWriter) writeAggregate(a aggregate, o string) error {
+	if o == "" {
+		o = fmt.Sprintf("%d.%d-%d.gob", a.ChainID, a.First, a.Last)
+	}
+	file, err := os.Create(o)
+	if err != nil {
+		return err
+	}
+	defer file.Close()
+
+	encoder := gob.NewEncoder(file)
+	err = encoder.Encode(&a)
+	return err
+}
+
+// readAggregate reads the aggregate from a file in gob format
+func (w gobAggregateReaderWriter) readAggregate(f string) (aggregate, error) {
+	file, err := os.Open(f)
+	if err != nil {
+		return aggregate{}, err
+	}
+	defer file.Close()
+
+	a := aggregate{}
+	decoder := gob.NewDecoder(file)
+	err = decoder.Decode(&a)
+	return a, err
+}