Commit 23054ecb authored by Joshua Gutow's avatar Joshua Gutow

op-service,batcher,proposer: Retry when dialing RPC URLs

This adds retries when dialing ethclient or rollupclient RPC urls.
Because this now does up to 10 exponential backoffs, the dial timeout
has been increased to 15s (though this may not be enough).

I have also modified the API to DialClient to no longer take a context.
This is because we are not sharing the context & if we want to pass in
a context, we should probably set the timeout outside the dial function.
parent dc01f9bb
...@@ -50,17 +50,17 @@ func NewBatchSubmitterFromCLIConfig(cfg CLIConfig, l log.Logger, m metrics.Metri ...@@ -50,17 +50,17 @@ func NewBatchSubmitterFromCLIConfig(cfg CLIConfig, l log.Logger, m metrics.Metri
// Connect to L1 and L2 providers. Perform these last since they are the // Connect to L1 and L2 providers. Perform these last since they are the
// most expensive. // most expensive.
l1Client, err := opclient.DialEthClientWithTimeout(ctx, cfg.L1EthRpc, opclient.DefaultDialTimeout) l1Client, err := opclient.DialEthClientWithTimeout(opclient.DefaultDialTimeout, l, cfg.L1EthRpc)
if err != nil { if err != nil {
return nil, err return nil, err
} }
l2Client, err := opclient.DialEthClientWithTimeout(ctx, cfg.L2EthRpc, opclient.DefaultDialTimeout) l2Client, err := opclient.DialEthClientWithTimeout(opclient.DefaultDialTimeout, l, cfg.L2EthRpc)
if err != nil { if err != nil {
return nil, err return nil, err
} }
rollupClient, err := opclient.DialRollupClientWithTimeout(ctx, cfg.RollupRpc, opclient.DefaultDialTimeout) rollupClient, err := opclient.DialRollupClientWithTimeout(opclient.DefaultDialTimeout, l, cfg.RollupRpc)
if err != nil { if err != nil {
return nil, err return nil, err
} }
......
...@@ -157,13 +157,12 @@ func NewL2OutputSubmitterConfigFromCLIConfig(cfg CLIConfig, l log.Logger, m metr ...@@ -157,13 +157,12 @@ func NewL2OutputSubmitterConfigFromCLIConfig(cfg CLIConfig, l log.Logger, m metr
} }
// Connect to L1 and L2 providers. Perform these last since they are the most expensive. // Connect to L1 and L2 providers. Perform these last since they are the most expensive.
ctx := context.Background() l1Client, err := opclient.DialEthClientWithTimeout(opclient.DefaultDialTimeout, l, cfg.L1EthRpc)
l1Client, err := opclient.DialEthClientWithTimeout(ctx, cfg.L1EthRpc, opclient.DefaultDialTimeout)
if err != nil { if err != nil {
return nil, err return nil, err
} }
rollupClient, err := opclient.DialRollupClientWithTimeout(ctx, cfg.RollupRpc, opclient.DefaultDialTimeout) rollupClient, err := opclient.DialRollupClientWithTimeout(opclient.DefaultDialTimeout, l, cfg.RollupRpc)
if err != nil { if err != nil {
return nil, err return nil, err
} }
......
package client
import (
"context"
"fmt"
"net"
"net/url"
"time"
"github.com/ethereum-optimism/optimism/op-node/client"
"github.com/ethereum-optimism/optimism/op-node/sources"
"github.com/ethereum-optimism/optimism/op-service/backoff"
"github.com/ethereum/go-ethereum/ethclient"
"github.com/ethereum/go-ethereum/log"
"github.com/ethereum/go-ethereum/rpc"
)
// DefaultDialTimeout is a default timeout for dialing a client.
const DefaultDialTimeout = 30 * time.Second
const defaultRetryCount = 30
const defaultRetryTime = 1 * time.Second
// DialEthClientWithTimeout attempts to dial the L1 provider using the provided
// URL. If the dial doesn't complete within defaultDialTimeout seconds, this
// method will return an error.
func DialEthClientWithTimeout(timeout time.Duration, log log.Logger, url string) (*ethclient.Client, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
c, err := dialRPCClientWithBackoff(ctx, log, url)
if err != nil {
return nil, err
}
return ethclient.NewClient(c), nil
}
// DialRollupClientWithTimeout attempts to dial the RPC provider using the provided URL.
// If the dial doesn't complete within timeout seconds, this method will return an error.
func DialRollupClientWithTimeout(timeout time.Duration, log log.Logger, url string) (*sources.RollupClient, error) {
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
rpcCl, err := dialRPCClientWithBackoff(ctx, log, url)
if err != nil {
return nil, err
}
return sources.NewRollupClient(client.NewBaseRPCClient(rpcCl)), nil
}
// Dials a JSON-RPC endpoint repeatedly, with a backoff, until a client connection is established. Auth is optional.
func dialRPCClientWithBackoff(ctx context.Context, log log.Logger, addr string) (*rpc.Client, error) {
bOff := backoff.Fixed(defaultRetryTime)
var ret *rpc.Client
err := backoff.DoCtx(ctx, defaultRetryCount, bOff, func() error {
client, err := rpc.DialOptions(ctx, addr)
if err != nil {
return fmt.Errorf("failed to dial address (%s): %w", addr, err)
}
// log.Warn("failed to dial address, but may connect later", "addr", addr, "err", err)
ret = client
return nil
})
if err != nil {
return nil, err
}
return ret, nil
}
func IsURLAvailable(address string) bool {
u, err := url.Parse(address)
if err != nil {
return false
}
conn, err := net.DialTimeout("tcp", u.Host, 1*time.Second)
if err != nil {
return false
}
conn.Close()
return true
}
package client
import (
"net/http"
"testing"
"github.com/stretchr/testify/require"
)
func TestIsURLAvailable(t *testing.T) {
go func() {
_ = http.ListenAndServe(":8989", nil)
}()
require.True(t, IsURLAvailable("http://localhost:8989"))
require.False(t, IsURLAvailable("http://localhost:9898"))
}
package client
import (
"context"
"time"
"github.com/ethereum/go-ethereum/ethclient"
)
// DialEthClientWithTimeout attempts to dial the L1 provider using the provided
// URL. If the dial doesn't complete within defaultDialTimeout seconds, this
// method will return an error.
func DialEthClientWithTimeout(ctx context.Context, url string, timeout time.Duration) (*ethclient.Client, error) {
ctxt, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
return ethclient.DialContext(ctxt, url)
}
package client
import (
"context"
"time"
"github.com/ethereum-optimism/optimism/op-node/client"
"github.com/ethereum-optimism/optimism/op-node/sources"
"github.com/ethereum/go-ethereum/rpc"
)
// DialRollupClientWithTimeout attempts to dial the RPC provider using the provided
// URL. If the dial doesn't complete within defaultDialTimeout seconds, this
// method will return an error.
func DialRollupClientWithTimeout(ctx context.Context, url string, timeout time.Duration) (*sources.RollupClient, error) {
ctxt, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
rpcCl, err := rpc.DialContext(ctxt, url)
if err != nil {
return nil, err
}
return sources.NewRollupClient(client.NewBaseRPCClient(rpcCl)), nil
}
package client
import (
"time"
)
// DefaultDialTimeout is a default timeout for dialing a client.
const DefaultDialTimeout = 5 * time.Second
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment