mirror of
https://github.com/tendermint/tendermint.git
synced 2026-01-05 13:05:09 +00:00
libs/service: regularize Stop semantics and concurrency primitives (#7809)
This commit is contained in:
@@ -3,7 +3,7 @@ package service
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"sync/atomic"
|
||||
"sync"
|
||||
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
)
|
||||
@@ -30,9 +30,6 @@ type Service interface {
|
||||
// Return true if the service is running
|
||||
IsRunning() bool
|
||||
|
||||
// String representation of the service
|
||||
String() string
|
||||
|
||||
// Wait blocks until the service is stopped.
|
||||
Wait()
|
||||
}
|
||||
@@ -40,8 +37,6 @@ type Service interface {
|
||||
// Implementation describes the implementation that the
|
||||
// BaseService implementation wraps.
|
||||
type Implementation interface {
|
||||
Service
|
||||
|
||||
// Called by the Services Start Method
|
||||
OnStart(context.Context) error
|
||||
|
||||
@@ -57,12 +52,7 @@ Users can override the OnStart/OnStop methods. In the absence of errors, these
|
||||
methods are guaranteed to be called at most once. If OnStart returns an error,
|
||||
service won't be marked as started, so the user can call Start again.
|
||||
|
||||
A call to Reset will panic, unless OnReset is overwritten, allowing
|
||||
OnStart/OnStop to be called again.
|
||||
|
||||
The caller must ensure that Start and Stop are not called concurrently.
|
||||
|
||||
It is ok to call Stop without calling Start first.
|
||||
It is safe, but an error, to call Stop without calling Start first.
|
||||
|
||||
Typical usage:
|
||||
|
||||
@@ -80,23 +70,21 @@ Typical usage:
|
||||
}
|
||||
|
||||
func (fs *FooService) OnStart(ctx context.Context) error {
|
||||
fs.BaseService.OnStart() // Always call the overridden method.
|
||||
// initialize private fields
|
||||
// start subroutines, etc.
|
||||
}
|
||||
|
||||
func (fs *FooService) OnStop() error {
|
||||
fs.BaseService.OnStop() // Always call the overridden method.
|
||||
// close/destroy private fields
|
||||
// stop subroutines, etc.
|
||||
}
|
||||
*/
|
||||
type BaseService struct {
|
||||
logger log.Logger
|
||||
name string
|
||||
started uint32 // atomic
|
||||
stopped uint32 // atomic
|
||||
quit chan struct{}
|
||||
logger log.Logger
|
||||
name string
|
||||
mtx sync.Mutex
|
||||
quit <-chan (struct{})
|
||||
cancel context.CancelFunc
|
||||
|
||||
// The "subclass" of BaseService
|
||||
impl Implementation
|
||||
@@ -107,7 +95,6 @@ func NewBaseService(logger log.Logger, name string, impl Implementation) *BaseSe
|
||||
return &BaseService{
|
||||
logger: logger,
|
||||
name: name,
|
||||
quit: make(chan struct{}),
|
||||
impl: impl,
|
||||
}
|
||||
}
|
||||
@@ -116,83 +103,101 @@ func NewBaseService(logger log.Logger, name string, impl Implementation) *BaseSe
|
||||
// returned if the service is already running or stopped. To restart a
|
||||
// stopped service, call Reset.
|
||||
func (bs *BaseService) Start(ctx context.Context) error {
|
||||
if atomic.CompareAndSwapUint32(&bs.started, 0, 1) {
|
||||
if atomic.LoadUint32(&bs.stopped) == 1 {
|
||||
bs.logger.Error("not starting service; already stopped", "service", bs.name, "impl", bs.impl.String())
|
||||
atomic.StoreUint32(&bs.started, 0)
|
||||
return ErrAlreadyStopped
|
||||
}
|
||||
bs.mtx.Lock()
|
||||
defer bs.mtx.Unlock()
|
||||
|
||||
bs.logger.Info("starting service", "service", bs.name, "impl", bs.impl.String())
|
||||
if bs.quit != nil {
|
||||
return ErrAlreadyStarted
|
||||
}
|
||||
|
||||
select {
|
||||
case <-bs.quit:
|
||||
return ErrAlreadyStopped
|
||||
default:
|
||||
bs.logger.Info("starting service", "service", bs.name, "impl", bs.name)
|
||||
if err := bs.impl.OnStart(ctx); err != nil {
|
||||
// revert flag
|
||||
atomic.StoreUint32(&bs.started, 0)
|
||||
return err
|
||||
}
|
||||
|
||||
// we need a separate context to ensure that we start
|
||||
// a thread that will get cleaned up and that the
|
||||
// Stop/Wait functions work as expected.
|
||||
srvCtx, cancel := context.WithCancel(context.Background())
|
||||
bs.cancel = cancel
|
||||
bs.quit = srvCtx.Done()
|
||||
|
||||
go func(ctx context.Context) {
|
||||
select {
|
||||
case <-bs.quit:
|
||||
// someone else explicitly called stop
|
||||
// and then we shouldn't.
|
||||
case <-srvCtx.Done():
|
||||
// this means stop was called manually
|
||||
return
|
||||
case <-ctx.Done():
|
||||
// if nothing is running, no need to
|
||||
// shut down again.
|
||||
if !bs.impl.IsRunning() {
|
||||
return
|
||||
}
|
||||
|
||||
// the context was cancel and we
|
||||
// should stop.
|
||||
if err := bs.Stop(); err != nil {
|
||||
bs.logger.Error("stopped service",
|
||||
"err", err.Error(),
|
||||
"service", bs.name,
|
||||
"impl", bs.impl.String())
|
||||
}
|
||||
|
||||
bs.logger.Info("stopped service",
|
||||
"service", bs.name,
|
||||
"impl", bs.impl.String())
|
||||
_ = bs.Stop()
|
||||
}
|
||||
|
||||
bs.logger.Info("stopped service",
|
||||
"service", bs.name)
|
||||
}(ctx)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return ErrAlreadyStarted
|
||||
}
|
||||
|
||||
// Stop implements Service by calling OnStop (if defined) and closing quit
|
||||
// channel. An error will be returned if the service is already stopped.
|
||||
func (bs *BaseService) Stop() error {
|
||||
if atomic.CompareAndSwapUint32(&bs.stopped, 0, 1) {
|
||||
if atomic.LoadUint32(&bs.started) == 0 {
|
||||
bs.logger.Error("not stopping service; not started yet", "service", bs.name, "impl", bs.impl.String())
|
||||
atomic.StoreUint32(&bs.stopped, 0)
|
||||
return ErrNotStarted
|
||||
}
|
||||
bs.mtx.Lock()
|
||||
defer bs.mtx.Unlock()
|
||||
|
||||
bs.logger.Info("stopping service", "service", bs.name, "impl", bs.impl.String())
|
||||
if bs.quit == nil {
|
||||
return ErrNotStarted
|
||||
}
|
||||
|
||||
select {
|
||||
case <-bs.quit:
|
||||
return ErrAlreadyStopped
|
||||
default:
|
||||
bs.logger.Info("stopping service", "service", bs.name)
|
||||
bs.impl.OnStop()
|
||||
close(bs.quit)
|
||||
bs.cancel()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
return ErrAlreadyStopped
|
||||
}
|
||||
|
||||
// IsRunning implements Service by returning true or false depending on the
|
||||
// service's state.
|
||||
func (bs *BaseService) IsRunning() bool {
|
||||
return atomic.LoadUint32(&bs.started) == 1 && atomic.LoadUint32(&bs.stopped) == 0
|
||||
bs.mtx.Lock()
|
||||
defer bs.mtx.Unlock()
|
||||
|
||||
if bs.quit == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
select {
|
||||
case <-bs.quit:
|
||||
return false
|
||||
default:
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (bs *BaseService) getWait() <-chan struct{} {
|
||||
bs.mtx.Lock()
|
||||
defer bs.mtx.Unlock()
|
||||
|
||||
if bs.quit == nil {
|
||||
out := make(chan struct{})
|
||||
close(out)
|
||||
return out
|
||||
}
|
||||
|
||||
return bs.quit
|
||||
}
|
||||
|
||||
// Wait blocks until the service is stopped.
|
||||
func (bs *BaseService) Wait() { <-bs.quit }
|
||||
func (bs *BaseService) Wait() { <-bs.getWait() }
|
||||
|
||||
// String implements Service by returning a string representation of the service.
|
||||
func (bs *BaseService) String() string { return bs.name }
|
||||
|
||||
@@ -2,45 +2,135 @@ package service
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/fortytw2/leaktest"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
)
|
||||
|
||||
type testService struct {
|
||||
started bool
|
||||
stopped bool
|
||||
multiStopped bool
|
||||
mu sync.Mutex
|
||||
BaseService
|
||||
}
|
||||
|
||||
func (testService) OnStop() {}
|
||||
func (testService) OnStart(context.Context) error {
|
||||
func (t *testService) OnStop() {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
if t.stopped == true {
|
||||
t.multiStopped = true
|
||||
}
|
||||
t.stopped = true
|
||||
}
|
||||
func (t *testService) OnStart(context.Context) error {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
|
||||
t.started = true
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestBaseServiceWait(t *testing.T) {
|
||||
func (t *testService) isStarted() bool {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
return t.started
|
||||
}
|
||||
|
||||
func (t *testService) isStopped() bool {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
return t.stopped
|
||||
}
|
||||
|
||||
func (t *testService) isMultiStopped() bool {
|
||||
t.mu.Lock()
|
||||
defer t.mu.Unlock()
|
||||
return t.multiStopped
|
||||
}
|
||||
|
||||
func TestBaseService(t *testing.T) {
|
||||
t.Cleanup(leaktest.Check(t))
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
logger := log.NewTestingLogger(t)
|
||||
logger := log.NewNopLogger()
|
||||
|
||||
ts := &testService{}
|
||||
ts.BaseService = *NewBaseService(logger, "TestService", ts)
|
||||
err := ts.Start(ctx)
|
||||
require.NoError(t, err)
|
||||
t.Run("Wait", func(t *testing.T) {
|
||||
wctx, wcancel := context.WithCancel(ctx)
|
||||
defer wcancel()
|
||||
ts := &testService{}
|
||||
ts.BaseService = *NewBaseService(logger, t.Name(), ts)
|
||||
err := ts.Start(wctx)
|
||||
require.NoError(t, err)
|
||||
require.True(t, ts.isStarted())
|
||||
|
||||
waitFinished := make(chan struct{})
|
||||
go func() {
|
||||
ts.Wait()
|
||||
waitFinished <- struct{}{}
|
||||
}()
|
||||
waitFinished := make(chan struct{})
|
||||
wcancel()
|
||||
go func() {
|
||||
ts.Wait()
|
||||
close(waitFinished)
|
||||
}()
|
||||
|
||||
go cancel()
|
||||
select {
|
||||
case <-waitFinished:
|
||||
assert.True(t, ts.isStopped(), "failed to stop")
|
||||
assert.False(t, ts.IsRunning(), "is not running")
|
||||
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
t.Fatal("expected Wait() to finish within 100 ms.")
|
||||
}
|
||||
})
|
||||
t.Run("ManualStop", func(t *testing.T) {
|
||||
ts := &testService{}
|
||||
ts.BaseService = *NewBaseService(logger, t.Name(), ts)
|
||||
require.False(t, ts.IsRunning())
|
||||
require.False(t, ts.isStarted())
|
||||
require.NoError(t, ts.Start(ctx))
|
||||
|
||||
require.True(t, ts.isStarted())
|
||||
|
||||
require.NoError(t, ts.Stop())
|
||||
require.True(t, ts.isStopped())
|
||||
require.False(t, ts.IsRunning())
|
||||
})
|
||||
t.Run("MultiStop", func(t *testing.T) {
|
||||
t.Run("SingleThreaded", func(t *testing.T) {
|
||||
ts := &testService{}
|
||||
ts.BaseService = *NewBaseService(logger, t.Name(), ts)
|
||||
|
||||
require.NoError(t, ts.Start(ctx))
|
||||
require.True(t, ts.isStarted())
|
||||
require.NoError(t, ts.Stop())
|
||||
require.True(t, ts.isStopped())
|
||||
require.False(t, ts.isMultiStopped())
|
||||
require.Error(t, ts.Stop())
|
||||
require.False(t, ts.isMultiStopped())
|
||||
})
|
||||
t.Run("MultiThreaded", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
ts := &testService{}
|
||||
ts.BaseService = *NewBaseService(logger, t.Name(), ts)
|
||||
|
||||
require.NoError(t, ts.Start(ctx))
|
||||
require.True(t, ts.isStarted())
|
||||
|
||||
go func() { _ = ts.Stop() }()
|
||||
go cancel()
|
||||
|
||||
ts.Wait()
|
||||
|
||||
require.True(t, ts.isStopped())
|
||||
require.False(t, ts.isMultiStopped())
|
||||
})
|
||||
|
||||
})
|
||||
|
||||
select {
|
||||
case <-waitFinished:
|
||||
// all good
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
t.Fatal("expected Wait() to finish within 100 ms.")
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user