proxyd: Request-scoped context for fast batch RPC short-circuits (#2443)

* proxyd: Request-scoped context for fast batch RPC short-circuits * add batch RPC short-circuit metric Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>

proxyd: Request-scoped context for fast batch RPC short-circuits (#2443)
* proxyd: Request-scoped context for fast batch RPC short-circuits * add batch RPC short-circuit metric Co-authored-by: mergify[bot] <37929162+mergify[bot]@users.noreply.github.com>
ae112021 · Murphy Law · GitHub · aeda5301 · ae112021 · ae112021
Commit ae112021 authored Apr 11, 2022 by Murphy Law Committed by GitHub Apr 11, 2022
8 changed files
--- a/.changeset/shiny-fishes-buy.md
+++ b/.changeset/shiny-fishes-buy.md
+---
+'@eth-optimism/proxyd': patch
+---
+proxyd: Request-scoped context for fast batch RPC short-circuiting
--- a/go/proxyd/backend.go
+++ b/go/proxyd/backend.go
@@ -66,6 +66,11 @@ var (
 		Code:    JSONRPCErrorInternal - 14,
 		Message: "too many RPC calls in batch request",
 	}
+	ErrGatewayTimeout = &RPCErr{
+		Code:          JSONRPCErrorInternal - 15,
+		Message:       "gateway timeout",
+		HTTPErrorCode: 504,
+	}
 )
 func ErrInvalidRequest(msg string) *RPCErr {
@@ -217,7 +222,7 @@ func (b *Backend) Forward(ctx context.Context, req *RPCReq) (*RPCRes, error) {
 			)
 			respTimer.ObserveDuration()
 			RecordRPCError(ctx, b.Name, req.Method, err)
-			time.Sleep(calcBackoff(i))
+			sleepContext(ctx, calcBackoff(i))
 			continue
 		}
 		respTimer.ObserveDuration()
@@ -331,7 +336,7 @@ func (b *Backend) setOffline() {
 func (b *Backend) doForward(ctx context.Context, rpcReq *RPCReq) (*RPCRes, error) {
 	body := mustMarshalJSON(rpcReq)
-	httpReq, err := http.NewRequest("POST", b.rpcURL, bytes.NewReader(body))
+	httpReq, err := http.NewRequestWithContext(ctx, "POST", b.rpcURL, bytes.NewReader(body))
 	if err != nil {
 		return nil, wrapErr(err, "error creating backend request")
 	}
@@ -681,3 +686,10 @@ func formatWSError(err error) []byte {
 	}
 	return m
 }
+func sleepContext(ctx context.Context, duration time.Duration) {
+	select {
+	case <-ctx.Done():
+	case <-time.After(duration):
+	}
+}
--- a/go/proxyd/config.go
+++ b/go/proxyd/config.go
@@ -12,6 +12,9 @@ type ServerConfig struct {
 	WSHost           string `toml:"ws_host"`
 	WSPort           int    `toml:"ws_port"`
 	MaxBodySizeBytes int64  `toml:"max_body_size_bytes"`
+	// TimeoutSeconds specifies the maximum time spent serving an HTTP request. Note that isn't used for websocket connections
+	TimeoutSeconds int `toml:"timeout_seconds"`
 }
 type CacheConfig struct {

--- a/go/proxyd/integration_tests/batch_timeout_test.go
+++ b/go/proxyd/integration_tests/batch_timeout_test.go
+package integration_tests
+import (
+	"net/http"
+	"os"
+	"testing"
+	"time"
+	"github.com/ethereum-optimism/optimism/go/proxyd"
+	"github.com/stretchr/testify/require"
+)
+const (
+	batchTimeoutResponse = `{"error":{"code":-32015,"message":"gateway timeout"},"id":null,"jsonrpc":"2.0"}`
+)
+func TestBatchTimeout(t *testing.T) {
+	slowBackend := NewMockBackend(nil)
+	defer slowBackend.Close()
+	require.NoError(t, os.Setenv("SLOW_BACKEND_RPC_URL", slowBackend.URL()))
+	config := ReadConfig("batch_timeout")
+	client := NewProxydClient("http://127.0.0.1:8545")
+	shutdown, err := proxyd.Start(config)
+	require.NoError(t, err)
+	defer shutdown()
+	slowBackend.SetHandler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		// check the config. The sleep duration should be at least double the server.timeout_seconds config to prevent flakes
+		time.Sleep(time.Second * 2)
+		SingleResponseHandler(200, goodResponse)(w, r)
+	}))
+	res, statusCode, err := client.SendBatchRPC(
+		NewRPCReq("1", "eth_chainId", nil),
+		NewRPCReq("1", "eth_chainId", nil),
+	)
+	require.NoError(t, err)
+	require.Equal(t, 504, statusCode)
+	RequireEqualJSON(t, []byte(batchTimeoutResponse), res)
+	require.Equal(t, 1, len(slowBackend.Requests()))
+}
--- a/go/proxyd/integration_tests/testdata/batch_timeout.toml
+++ b/go/proxyd/integration_tests/testdata/batch_timeout.toml
+[server]
+rpc_port = 8545
+timeout_seconds = 1
+[backend]
+response_timeout_seconds = 1
+max_retries = 3
+[backends]
+[backends.slow]
+rpc_url = "$SLOW_BACKEND_RPC_URL"
+ws_url = "$SLOW_BACKEND_RPC_URL"
+[backend_groups]
+[backend_groups.main]
+backends = ["slow"]
+[rpc_method_mappings]
+eth_chainId = "main"
--- a/go/proxyd/metrics.go
+++ b/go/proxyd/metrics.go
@@ -193,6 +193,12 @@ var (
 		"key",
 	})
+	batchRPCShortCircuitsTotal = promauto.NewCounter(prometheus.CounterOpts{
+		Namespace: MetricsNamespace,
+		Name:      "batch_rpc_short_circuits_total",
+		Help:      "Count of total batch RPC short-circuits.",
+	})
 	rpcSpecialErrors = []string{
 		"nonce too low",
 		"gas price too high",

--- a/go/proxyd/proxyd.go
+++ b/go/proxyd/proxyd.go
@@ -211,6 +211,7 @@ func Start(config *Config) (func(), error) {
 		config.RPCMethodMappings,
 		config.Server.MaxBodySizeBytes,
 		resolvedAuth,
+		secondsToDuration(config.Server.TimeoutSeconds),
 		rpcCache,
 	)

--- a/go/proxyd/server.go
+++ b/go/proxyd/server.go
@@ -26,6 +26,7 @@ const (
 	ContextKeyXForwardedFor = "x_forwarded_for"
 	MaxBatchRPCCalls        = 100
 	cacheStatusHdr          = "X-Proxyd-Cache-Status"
+	defaultServerTimeout    = time.Second * 10
 )
 type Server struct {
@@ -35,6 +36,7 @@ type Server struct {
 	rpcMethodMappings  map[string]string
 	maxBodySize        int64
 	authenticatedPaths map[string]string
+	timeout            time.Duration
 	upgrader           *websocket.Upgrader
 	rpcServer          *http.Server
 	wsServer           *http.Server
@@ -48,6 +50,7 @@ func NewServer(
 	rpcMethodMappings map[string]string,
 	maxBodySize int64,
 	authenticatedPaths map[string]string,
+	timeout time.Duration,
 	cache RPCCache,
 ) *Server {
 	if cache == nil {
@@ -58,6 +61,10 @@ func NewServer(
 		maxBodySize = math.MaxInt64
 	}
+	if timeout == 0 {
+		timeout = defaultServerTimeout
+	}
 	return &Server{
 		backendGroups:      backendGroups,
 		wsBackendGroup:     wsBackendGroup,
@@ -65,6 +72,7 @@ func NewServer(
 		rpcMethodMappings:  rpcMethodMappings,
 		maxBodySize:        maxBodySize,
 		authenticatedPaths: authenticatedPaths,
+		timeout:            timeout,
 		cache:              cache,
 		upgrader: &websocket.Upgrader{
 			HandshakeTimeout: 5 * time.Second,
@@ -123,6 +131,9 @@ func (s *Server) HandleRPC(w http.ResponseWriter, r *http.Request) {
 	if ctx == nil {
 		return
 	}
+	var cancel context.CancelFunc
+	ctx, cancel = context.WithTimeout(ctx, s.timeout)
+	defer cancel()
 	log.Info(
 		"received RPC request",
@@ -162,6 +173,19 @@ func (s *Server) HandleRPC(w http.ResponseWriter, r *http.Request) {
 		batchRes := make([]*RPCRes, len(reqs))
 		var batchContainsCached bool
 		for i := 0; i < len(reqs); i++ {
+			if ctx.Err() == context.DeadlineExceeded {
+				log.Info(
+					"short-circuiting batch RPC",
+					"req_id", GetReqID(ctx),
+					"auth", GetAuthCtx(ctx),
+					"index", i,
+					"batch_size", len(reqs),
+				)
+				batchRPCShortCircuitsTotal.Inc()
+				writeRPCError(ctx, w, nil, ErrGatewayTimeout)
+				return
+			}
 			req, err := ParseRPCReq(reqs[i])
 			if err != nil {
 				log.Info("error parsing RPC call", "source", "rpc", "err", err)