Commit e70e762b authored by Mark Tyneway's avatar Mark Tyneway Committed by GitHub

Merge pull request #1660 from mslipper/feature/proxy-method-metrics

Add proxy method metrics
parents e7159d5d 8cc824e5
---
'@eth-optimism/proxyd': minor
---
Updates proxyd to include additional error metrics.
package proxyd package proxyd
import ( import (
"bytes" "bytes"
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promauto"
"io" "io"
"io/ioutil" "io/ioutil"
"math" "math"
"math/rand" "math/rand"
"net/http" "net/http"
"sync/atomic" "sync/atomic"
"time" "time"
) )
const ( const (
JSONRPCVersion = "2.0" JSONRPCVersion = "2.0"
) )
var ( var (
ErrNoBackend = errors.New("no backend available for method") ErrNoBackend = errors.New("no backend available for method")
ErrBackendsInconsistent = errors.New("backends inconsistent, try again") ErrBackendsInconsistent = errors.New("backends inconsistent, try again")
ErrBackendOffline = errors.New("backend offline") ErrBackendOffline = errors.New("backend offline")
backendRequestsCtr = promauto.NewCounterVec(prometheus.CounterOpts{ backendRequestsCtr = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "proxyd", Namespace: "proxyd",
Name: "backend_requests_total", Name: "backend_requests_total",
Help: "Count of backend requests.", Help: "Count of backend requests.",
}, []string{ }, []string{
"name", "backend_name",
}) "method_name",
})
backendErrorsCtr = promauto.NewCounterVec(prometheus.CounterOpts{
Namespace: "proxyd", backendErrorsCtr = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "backend_errors_total", Namespace: "proxyd",
Help: "Count of backend errors.", Name: "backend_errors_total",
}, []string{ Help: "Count of backend errors.",
"name", }, []string{
}) "backend_name",
"method_name",
backendPermanentErrorsCtr = promauto.NewCounterVec(prometheus.CounterOpts{ })
Namespace: "proxyd",
Name: "backend_permanent_errors_total", backendPermanentErrorsCtr = promauto.NewCounterVec(prometheus.CounterOpts{
Help: "Count of backend errors that mark a backend as offline.", Namespace: "proxyd",
}, []string{ Name: "backend_permanent_errors_total",
"name", Help: "Count of backend errors that mark a backend as offline.",
}) }, []string{
"backend_name",
"method_name",
})
backendResponseTimeSummary = promauto.NewSummaryVec(prometheus.SummaryOpts{
Namespace: "proxyd",
Name: "backend_response_time_seconds",
Help: "Summary of backend response times broken down by backend and method name.",
Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
}, []string{
"backend_name",
"method_name",
})
) )
type Backend struct { type Backend struct {
Name string Name string
authUsername string authUsername string
authPassword string authPassword string
baseURL string baseURL string
client *http.Client client *http.Client
maxRetries int maxRetries int
maxResponseSize int64 maxResponseSize int64
lastPermError int64 lastPermError int64
unhealthyRetryInterval int64 unhealthyRetryInterval int64
} }
type BackendOpt func(b *Backend) type BackendOpt func(b *Backend)
func WithBasicAuth(username, password string) BackendOpt { func WithBasicAuth(username, password string) BackendOpt {
return func(b *Backend) { return func(b *Backend) {
b.authUsername = username b.authUsername = username
b.authPassword = password b.authPassword = password
} }
} }
func WithTimeout(timeout time.Duration) BackendOpt { func WithTimeout(timeout time.Duration) BackendOpt {
return func(b *Backend) { return func(b *Backend) {
b.client.Timeout = timeout b.client.Timeout = timeout
} }
} }
func WithMaxRetries(retries int) BackendOpt { func WithMaxRetries(retries int) BackendOpt {
return func(b *Backend) { return func(b *Backend) {
b.maxRetries = retries b.maxRetries = retries
} }
} }
func WithMaxResponseSize(size int64) BackendOpt { func WithMaxResponseSize(size int64) BackendOpt {
return func(b *Backend) { return func(b *Backend) {
b.maxResponseSize = size b.maxResponseSize = size
} }
} }
func WithUnhealthyRetryInterval(interval int64) BackendOpt { func WithUnhealthyRetryInterval(interval int64) BackendOpt {
return func(b *Backend) { return func(b *Backend) {
b.unhealthyRetryInterval = interval b.unhealthyRetryInterval = interval
} }
} }
func NewBackend(name, baseURL string, opts ...BackendOpt) *Backend { func NewBackend(name, baseURL string, opts ...BackendOpt) *Backend {
backend := &Backend{ backend := &Backend{
Name: name, Name: name,
baseURL: baseURL, baseURL: baseURL,
maxResponseSize: math.MaxInt64, maxResponseSize: math.MaxInt64,
client: &http.Client{ client: &http.Client{
Timeout: 5 * time.Second, Timeout: 5 * time.Second,
}, },
} }
for _, opt := range opts { for _, opt := range opts {
opt(backend) opt(backend)
} }
return backend return backend
} }
func (b *Backend) Forward(body []byte) (*RPCRes, error) { func (b *Backend) Forward(req *RPCReq) (*RPCRes, error) {
if time.Now().Unix()-atomic.LoadInt64(&b.lastPermError) < b.unhealthyRetryInterval { if time.Now().Unix()-atomic.LoadInt64(&b.lastPermError) < b.unhealthyRetryInterval {
return nil, ErrBackendOffline return nil, ErrBackendOffline
} }
var lastError error var lastError error
// <= to account for the first attempt not technically being // <= to account for the first attempt not technically being
// a retry // a retry
for i := 0; i <= b.maxRetries; i++ { for i := 0; i <= b.maxRetries; i++ {
resB, err := b.doForward(body) resB, err := b.doForward(req)
if err != nil { if err != nil {
lastError = err lastError = err
log.Warn("backend request failed, trying again", "err", err, "name", b.Name) log.Warn("backend request failed, trying again", "err", err, "name", b.Name)
time.Sleep(calcBackoff(i)) time.Sleep(calcBackoff(i))
continue continue
} }
res := new(RPCRes) res := new(RPCRes)
// don't mark the backend down if they give us a bad response body // don't mark the backend down if they give us a bad response body
if err := json.Unmarshal(resB, res); err != nil { if err := json.Unmarshal(resB, res); err != nil {
return nil, wrapErr(err, "error unmarshaling JSON") return nil, wrapErr(err, "error unmarshaling JSON")
} }
return res, nil return res, nil
} }
atomic.StoreInt64(&b.lastPermError, time.Now().Unix()) atomic.StoreInt64(&b.lastPermError, time.Now().Unix())
backendPermanentErrorsCtr.WithLabelValues(b.Name).Inc() backendPermanentErrorsCtr.WithLabelValues(b.Name, req.Method).Inc()
return nil, wrapErr(lastError, "permanent error forwarding request") return nil, wrapErr(lastError, "permanent error forwarding request")
} }
func (b *Backend) doForward(body []byte) ([]byte, error) { func (b *Backend) doForward(rpcReq *RPCReq) ([]byte, error) {
req, err := http.NewRequest("POST", b.baseURL, bytes.NewReader(body)) body, err := json.Marshal(rpcReq)
if err != nil { if err != nil {
backendErrorsCtr.WithLabelValues(b.Name).Inc() return nil, wrapErr(err, "error marshaling request in forward")
return nil, wrapErr(err, "error creating backend request") }
}
httpReq, err := http.NewRequest("POST", b.baseURL, bytes.NewReader(body))
if b.authPassword != "" { if err != nil {
req.SetBasicAuth(b.authUsername, b.authPassword) backendErrorsCtr.WithLabelValues(b.Name, rpcReq.Method).Inc()
} return nil, wrapErr(err, "error creating backend request")
}
res, err := b.client.Do(req)
if err != nil { if b.authPassword != "" {
backendErrorsCtr.WithLabelValues(b.Name).Inc() httpReq.SetBasicAuth(b.authUsername, b.authPassword)
return nil, wrapErr(err, "error in backend request") }
}
timer := prometheus.NewTimer(backendResponseTimeSummary.WithLabelValues(b.Name, rpcReq.Method))
if res.StatusCode != 200 { defer timer.ObserveDuration()
backendErrorsCtr.WithLabelValues(b.Name).Inc() defer backendRequestsCtr.WithLabelValues(b.Name, rpcReq.Method).Inc()
return nil, fmt.Errorf("response code %d", res.StatusCode) res, err := b.client.Do(httpReq)
} if err != nil {
backendErrorsCtr.WithLabelValues(b.Name, rpcReq.Method).Inc()
defer res.Body.Close() return nil, wrapErr(err, "error in backend request")
resB, err := ioutil.ReadAll(io.LimitReader(res.Body, b.maxResponseSize)) }
if err != nil {
backendErrorsCtr.WithLabelValues(b.Name).Inc() if res.StatusCode != 200 {
return nil, wrapErr(err, "error reading response body") backendErrorsCtr.WithLabelValues(b.Name, rpcReq.Method).Inc()
} return nil, fmt.Errorf("response code %d", res.StatusCode)
}
backendRequestsCtr.WithLabelValues(b.Name).Inc()
return resB, nil defer res.Body.Close()
resB, err := ioutil.ReadAll(io.LimitReader(res.Body, b.maxResponseSize))
if err != nil {
backendErrorsCtr.WithLabelValues(b.Name, rpcReq.Method).Inc()
return nil, wrapErr(err, "error reading response body")
}
return resB, nil
} }
type BackendGroup struct { type BackendGroup struct {
Name string Name string
backends []*Backend backends []*Backend
i int64 i int64
} }
func (b *BackendGroup) Forward(body []byte) (*RPCRes, error) { func (b *BackendGroup) Forward(rpcReq *RPCReq) (*RPCRes, error) {
var outRes *RPCRes var outRes *RPCRes
for _, back := range b.backends { for _, back := range b.backends {
res, err := back.Forward(body) res, err := back.Forward(rpcReq)
if err == ErrBackendOffline { if err == ErrBackendOffline {
log.Debug("skipping offline backend", "name", back.Name) log.Debug("skipping offline backend", "name", back.Name)
continue continue
} }
if err != nil { if err != nil {
log.Error("error forwarding request to backend", "err", err, "name", b.Name) log.Error("error forwarding request to backend", "err", err, "name", b.Name)
continue continue
} }
outRes = res outRes = res
break break
} }
if outRes == nil { if outRes == nil {
return nil, errors.New("no backends available") return nil, errors.New("no backends available")
} }
return outRes, nil return outRes, nil
} }
type MethodMapping struct { type MethodMapping struct {
methods map[string]*BackendGroup methods map[string]*BackendGroup
} }
func NewMethodMapping(methods map[string]*BackendGroup) *MethodMapping { func NewMethodMapping(methods map[string]*BackendGroup) *MethodMapping {
return &MethodMapping{methods: methods} return &MethodMapping{methods: methods}
} }
func (m *MethodMapping) BackendGroupFor(method string) (*BackendGroup, error) { func (m *MethodMapping) BackendGroupFor(method string) (*BackendGroup, error) {
group := m.methods[method] group := m.methods[method]
if group == nil { if group == nil {
return nil, ErrNoBackend return nil, ErrNoBackend
} }
return group, nil return group, nil
} }
func calcBackoff(i int) time.Duration { func calcBackoff(i int) time.Duration {
jitter := float64(rand.Int63n(250)) jitter := float64(rand.Int63n(250))
ms := math.Min(math.Pow(2, float64(i))*1000+jitter, 10000) ms := math.Min(math.Pow(2, float64(i))*1000+jitter, 10000)
return time.Duration(ms) * time.Millisecond return time.Duration(ms) * time.Millisecond
} }
package proxyd package proxyd
import ( import (
"encoding/json" "encoding/json"
"errors" "errors"
"fmt" "fmt"
"github.com/ethereum/go-ethereum/log" "github.com/ethereum/go-ethereum/log"
"github.com/gorilla/mux" "github.com/gorilla/mux"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/client_golang/prometheus/promauto"
"github.com/rs/cors" "github.com/rs/cors"
"io" "io"
"io/ioutil" "io/ioutil"
"net/http" "net/http"
"time"
) )
var ( var (
...@@ -22,17 +21,11 @@ var ( ...@@ -22,17 +21,11 @@ var (
Help: "Count of total HTTP requests.", Help: "Count of total HTTP requests.",
}) })
httpRequestDurationHisto = promauto.NewHistogram(prometheus.HistogramOpts{ httpRequestDurationSummary = promauto.NewSummary(prometheus.SummaryOpts{
Namespace: "proxyd", Namespace: "proxyd",
Name: "http_request_duration_histogram_seconds", Name: "http_request_duration_seconds",
Help: "Histogram of HTTP request durations.", Help: "Summary of HTTP request durations, in seconds.",
Buckets: []float64{ Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
0,
0.1,
0.25,
0.75,
1,
},
}) })
rpcRequestsCtr = promauto.NewCounterVec(prometheus.CounterOpts{ rpcRequestsCtr = promauto.NewCounterVec(prometheus.CounterOpts{
...@@ -96,8 +89,8 @@ func (s *Server) ListenAndServe(host string, port int) error { ...@@ -96,8 +89,8 @@ func (s *Server) ListenAndServe(host string, port int) error {
hdlr.HandleFunc("/healthz", s.HandleHealthz).Methods("GET") hdlr.HandleFunc("/healthz", s.HandleHealthz).Methods("GET")
hdlr.HandleFunc("/", s.HandleRPC).Methods("POST") hdlr.HandleFunc("/", s.HandleRPC).Methods("POST")
c := cors.New(cors.Options{ c := cors.New(cors.Options{
AllowedOrigins: []string{"*"}, AllowedOrigins: []string{"*"},
}) })
addr := fmt.Sprintf("%s:%d", host, port) addr := fmt.Sprintf("%s:%d", host, port)
server := &http.Server{ server := &http.Server{
Handler: instrumentedHdlr(c.Handler(hdlr)), Handler: instrumentedHdlr(c.Handler(hdlr)),
...@@ -142,7 +135,7 @@ func (s *Server) HandleRPC(w http.ResponseWriter, r *http.Request) { ...@@ -142,7 +135,7 @@ func (s *Server) HandleRPC(w http.ResponseWriter, r *http.Request) {
return return
} }
backendRes, err := group.Forward(body) backendRes, err := group.Forward(req)
if err != nil { if err != nil {
log.Error("error forwarding RPC request", "group", group.Name, "method", req.Method, "err", err) log.Error("error forwarding RPC request", "group", group.Name, "method", req.Method, "err", err)
rpcErrorsCtr.WithLabelValues("-32603").Inc() rpcErrorsCtr.WithLabelValues("-32603").Inc()
...@@ -181,9 +174,8 @@ func writeRPCError(w http.ResponseWriter, id *int, code int, msg string) { ...@@ -181,9 +174,8 @@ func writeRPCError(w http.ResponseWriter, id *int, code int, msg string) {
func instrumentedHdlr(h http.Handler) http.HandlerFunc { func instrumentedHdlr(h http.Handler) http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
httpRequestsCtr.Inc() httpRequestsCtr.Inc()
start := time.Now() timer := prometheus.NewTimer(httpRequestDurationSummary)
defer timer.ObserveDuration()
h.ServeHTTP(w, r) h.ServeHTTP(w, r)
dur := time.Since(start)
httpRequestDurationHisto.Observe(float64(dur) / float64(time.Second))
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment