Commit bdf314a7 authored by Francis Li's avatar Francis Li Committed by GitHub

Improve conductor initialization error handling. (#8906)

parent d8f88772
......@@ -73,13 +73,18 @@ func NewOpConductor(
oc.paused.Store(cfg.Paused)
oc.stopped.Store(false)
// do not rely on the default context, use a dedicated context for shutdown.
oc.shutdownCtx, oc.shutdownCancel = context.WithCancel(context.Background())
err := oc.init(ctx)
if err != nil {
log.Error("failed to initialize OpConductor", "err", err)
// ensure we always close the resources if we fail to initialize the conductor.
if closeErr := oc.Stop(ctx); closeErr != nil {
return nil, multierror.Append(err, closeErr)
closeErr := oc.Stop(ctx)
if closeErr != nil {
err = multierror.Append(err, closeErr)
}
return nil, err
}
return oc, nil
......@@ -252,7 +257,6 @@ func (oc *OpConductor) Start(ctx context.Context) error {
return errors.Wrap(err, "failed to start JSON-RPC server")
}
oc.shutdownCtx, oc.shutdownCancel = context.WithCancel(ctx)
oc.wg.Add(1)
go oc.loop()
......@@ -274,17 +278,23 @@ func (oc *OpConductor) Stop(ctx context.Context) error {
oc.shutdownCancel()
oc.wg.Wait()
if err := oc.rpcServer.Stop(); err != nil {
result = multierror.Append(result, errors.Wrap(err, "failed to stop rpc server"))
if oc.rpcServer != nil {
if err := oc.rpcServer.Stop(); err != nil {
result = multierror.Append(result, errors.Wrap(err, "failed to stop rpc server"))
}
}
// stop health check
if err := oc.hmon.Stop(); err != nil {
result = multierror.Append(result, errors.Wrap(err, "failed to stop health monitor"))
if oc.hmon != nil {
if err := oc.hmon.Stop(); err != nil {
result = multierror.Append(result, errors.Wrap(err, "failed to stop health monitor"))
}
}
if err := oc.cons.Shutdown(); err != nil {
result = multierror.Append(result, errors.Wrap(err, "failed to shutdown consensus"))
if oc.cons != nil {
if err := oc.cons.Shutdown(); err != nil {
result = multierror.Append(result, errors.Wrap(err, "failed to shutdown consensus"))
}
}
if result.ErrorOrNil() != nil {
......
......@@ -11,6 +11,7 @@ import (
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/common/hexutil"
"github.com/ethereum/go-ethereum/log"
"github.com/hashicorp/go-multierror"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/suite"
......@@ -512,6 +513,14 @@ func (s *OpConductorTestSuite) TestFailureAndRetry2() {
s.cons.AssertNumberOfCalls(s.T(), "TransferLeader", 1)
}
func (s *OpConductorTestSuite) TestHandleInitError() {
// This will cause an error in the init function, which should cause the conductor to stop successfully without issues.
_, err := New(s.ctx, &s.cfg, s.log, s.version)
_, ok := err.(*multierror.Error)
// error should not be a multierror, this means that init failed, but Stop() succeeded, which is what we expect.
s.False(ok)
}
func TestHealthMonitor(t *testing.T) {
suite.Run(t, new(OpConductorTestSuite))
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment