statesync/event: emit statesync start/end event (#6700)

This commit is contained in:
JayT106
2021-07-22 02:16:50 -04:00
committed by GitHub
parent 478f5321ad
commit e70445f942
12 changed files with 373 additions and 64 deletions

View File

@@ -63,26 +63,25 @@ type nodeImpl struct {
isListening bool
// services
eventBus *types.EventBus // pub/sub for services
stateStore sm.Store
blockStore *store.BlockStore // store the blockchain to disk
bcReactor service.Service // for fast-syncing
mempoolReactor service.Service // for gossipping transactions
mempool mempool.Mempool
stateSync bool // whether the node should state sync on startup
stateSyncReactor *statesync.Reactor // for hosting and restoring state sync snapshots
stateSyncProvider statesync.StateProvider // provides state data for bootstrapping a node
consensusState *cs.State // latest consensus state
consensusReactor *cs.Reactor // for participating in the consensus
pexReactor *pex.Reactor // for exchanging peer addresses
pexReactorV2 *pex.ReactorV2 // for exchanging peer addresses
evidenceReactor *evidence.Reactor
evidencePool *evidence.Pool // tracking evidence
proxyApp proxy.AppConns // connection to the application
rpcListeners []net.Listener // rpc servers
eventSinks []indexer.EventSink
indexerService *indexer.Service
prometheusSrv *http.Server
eventBus *types.EventBus // pub/sub for services
stateStore sm.Store
blockStore *store.BlockStore // store the blockchain to disk
bcReactor service.Service // for fast-syncing
mempoolReactor service.Service // for gossipping transactions
mempool mempool.Mempool
stateSync bool // whether the node should state sync on startup
stateSyncReactor *statesync.Reactor // for hosting and restoring state sync snapshots
consensusState *cs.State // latest consensus state
consensusReactor *cs.Reactor // for participating in the consensus
pexReactor *pex.Reactor // for exchanging peer addresses
pexReactorV2 *pex.ReactorV2 // for exchanging peer addresses
evidenceReactor *evidence.Reactor
evidencePool *evidence.Pool // tracking evidence
proxyApp proxy.AppConns // connection to the application
rpcListeners []net.Listener // rpc servers
eventSinks []indexer.EventSink
indexerService *indexer.Service
prometheusSrv *http.Server
}
// newDefaultNode returns a Tendermint node with default settings for the
@@ -663,9 +662,15 @@ func (n *nodeImpl) OnStart() error {
return fmt.Errorf("unable to derive state: %w", err)
}
err = startStateSync(n.stateSyncReactor, bcR, n.consensusReactor, n.stateSyncProvider,
n.config.StateSync, n.config.FastSyncMode, n.stateStore, n.blockStore, state, n.eventBus)
ssc := n.config.StateSync
sp, err := constructStateProvider(ssc, state, n.Logger.With("module", "light"))
if err != nil {
return fmt.Errorf("failed to set up light client state provider: %w", err)
}
if err := startStateSync(n.stateSyncReactor, bcR, n.consensusReactor, sp,
ssc, n.config.FastSyncMode, state.InitialHeight, n.eventBus); err != nil {
return fmt.Errorf("failed to start state sync: %w", err)
}
}
@@ -1027,54 +1032,57 @@ func (n *nodeImpl) NodeInfo() types.NodeInfo {
}
// startStateSync starts an asynchronous state sync process, then switches to fast sync mode.
func startStateSync(ssR *statesync.Reactor, bcR cs.FastSyncReactor, conR *cs.Reactor,
stateProvider statesync.StateProvider, config *cfg.StateSyncConfig, fastSync bool,
stateStore sm.Store, blockStore *store.BlockStore, state sm.State, eventbus *types.EventBus) error {
ssR.Logger.Info("starting state sync...")
func startStateSync(
ssR statesync.SyncReactor,
bcR cs.FastSyncReactor,
conR cs.ConsSyncReactor,
sp statesync.StateProvider,
config *cfg.StateSyncConfig,
fastSync bool,
stateInitHeight int64,
eb *types.EventBus,
) error {
stateSyncLogger := eb.Logger.With("module", "statesync")
if stateProvider == nil {
var err error
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
stateProvider, err = statesync.NewLightClientStateProvider(
ctx,
state.ChainID, state.Version, state.InitialHeight,
config.RPCServers, light.TrustOptions{
Period: config.TrustPeriod,
Height: config.TrustHeight,
Hash: config.TrustHashBytes(),
}, ssR.Logger.With("module", "light"))
if err != nil {
return fmt.Errorf("failed to set up light client state provider: %w", err)
}
stateSyncLogger.Info("starting state sync...")
// at the beginning of the statesync start, we use the initialHeight as the event height
// because of the statesync doesn't have the concreate state height before fetched the snapshot.
d := types.EventDataStateSyncStatus{Complete: false, Height: stateInitHeight}
if err := eb.PublishEventStateSyncStatus(d); err != nil {
stateSyncLogger.Error("failed to emit the statesync start event", "err", err)
}
go func() {
state, err := ssR.Sync(context.TODO(), stateProvider, config.DiscoveryTime)
state, err := ssR.Sync(context.TODO(), sp, config.DiscoveryTime)
if err != nil {
ssR.Logger.Error("state sync failed", "err", err)
stateSyncLogger.Error("state sync failed", "err", err)
return
}
err = ssR.Backfill(state)
if err != nil {
ssR.Logger.Error("backfill failed; node has insufficient history to verify all evidence;"+
if err := ssR.Backfill(state); err != nil {
stateSyncLogger.Error("backfill failed; node has insufficient history to verify all evidence;"+
" proceeding optimistically...", "err", err)
}
conR.Metrics.StateSyncing.Set(0)
conR.SetStateSyncingMetrics(0)
d := types.EventDataStateSyncStatus{Complete: true, Height: state.LastBlockHeight}
if err := eb.PublishEventStateSyncStatus(d); err != nil {
stateSyncLogger.Error("failed to emit the statesync start event", "err", err)
}
if fastSync {
// FIXME Very ugly to have these metrics bleed through here.
conR.Metrics.FastSyncing.Set(1)
err = bcR.SwitchToFastSync(state)
if err != nil {
ssR.Logger.Error("failed to switch to fast sync", "err", err)
conR.SetFastSyncingMetrics(1)
if err := bcR.SwitchToFastSync(state); err != nil {
stateSyncLogger.Error("failed to switch to fast sync", "err", err)
return
}
d := types.EventDataFastSyncStatus{Complete: false, Height: state.LastBlockHeight}
if err := eventbus.PublishEventFastSyncStatus(d); err != nil {
ssR.Logger.Error("failed to emit the fastsync starting event", "err", err)
if err := eb.PublishEventFastSyncStatus(d); err != nil {
stateSyncLogger.Error("failed to emit the fastsync starting event", "err", err)
}
} else {
@@ -1266,3 +1274,24 @@ func getChannelsFromShim(reactorShim *p2p.ReactorShim) map[p2p.ChannelID]*p2p.Ch
return channels
}
func constructStateProvider(
ssc *cfg.StateSyncConfig,
state sm.State,
logger log.Logger,
) (statesync.StateProvider, error) {
ctx, cancel := context.WithTimeout(context.TODO(), 10*time.Second)
defer cancel()
to := light.TrustOptions{
Period: ssc.TrustPeriod,
Height: ssc.TrustHeight,
Hash: ssc.TrustHashBytes(),
}
return statesync.NewLightClientStateProvider(
ctx,
state.ChainID, state.Version, state.InitialHeight,
ssc.RPCServers, to, logger,
)
}

View File

@@ -21,11 +21,16 @@ import (
"github.com/tendermint/tendermint/crypto"
"github.com/tendermint/tendermint/crypto/ed25519"
"github.com/tendermint/tendermint/crypto/tmhash"
consmocks "github.com/tendermint/tendermint/internal/consensus/mocks"
ssmocks "github.com/tendermint/tendermint/internal/statesync/mocks"
"github.com/tendermint/tendermint/internal/evidence"
"github.com/tendermint/tendermint/internal/mempool"
mempoolv0 "github.com/tendermint/tendermint/internal/mempool/v0"
statesync "github.com/tendermint/tendermint/internal/statesync"
"github.com/tendermint/tendermint/internal/test/factory"
"github.com/tendermint/tendermint/libs/log"
tmpubsub "github.com/tendermint/tendermint/libs/pubsub"
tmrand "github.com/tendermint/tendermint/libs/rand"
tmtime "github.com/tendermint/tendermint/libs/time"
"github.com/tendermint/tendermint/privval"
@@ -626,6 +631,12 @@ func state(nVals int, height int64) (sm.State, dbm.DB, []types.PrivValidator) {
}
func TestLoadStateFromGenesis(t *testing.T) {
_ = loadStatefromGenesis(t)
}
func loadStatefromGenesis(t *testing.T) sm.State {
t.Helper()
stateDB := dbm.NewMemDB()
stateStore := sm.NewStore(stateDB)
config := cfg.ResetTestRoot("load_state_from_genesis")
@@ -642,4 +653,68 @@ func TestLoadStateFromGenesis(t *testing.T) {
)
require.NoError(t, err)
require.NotNil(t, state)
return state
}
func TestNodeStartStateSync(t *testing.T) {
mockSSR := &statesync.MockSyncReactor{}
mockFSR := &consmocks.FastSyncReactor{}
mockCSR := &consmocks.ConsSyncReactor{}
mockSP := &ssmocks.StateProvider{}
state := loadStatefromGenesis(t)
config := cfg.ResetTestRoot("load_state_from_genesis")
eventBus, err := createAndStartEventBus(log.TestingLogger())
defer func() {
err := eventBus.Stop()
require.NoError(t, err)
}()
require.NoError(t, err)
require.NotNil(t, eventBus)
sub, err := eventBus.Subscribe(context.Background(), "test-client", types.EventQueryStateSyncStatus, 10)
require.NoError(t, err)
require.NotNil(t, sub)
cfgSS := config.StateSync
mockSSR.On("Sync", context.TODO(), mockSP, cfgSS.DiscoveryTime).Return(state, nil).
On("Backfill", state).Return(nil)
mockCSR.On("SetStateSyncingMetrics", float64(0)).Return().
On("SwitchToConsensus", state, true).Return()
require.NoError(t,
startStateSync(mockSSR, mockFSR, mockCSR, mockSP, config.StateSync, false, state.InitialHeight, eventBus))
for cnt := 0; cnt < 2; {
select {
case <-time.After(3 * time.Second):
t.Errorf("StateSyncStatus timeout")
case msg := <-sub.Out():
if cnt == 0 {
ensureStateSyncStatus(t, msg, false, state.InitialHeight)
cnt++
} else {
// the state height = 0 because we are not actually update the state in this test
ensureStateSyncStatus(t, msg, true, 0)
cnt++
}
}
}
mockSSR.AssertNumberOfCalls(t, "Sync", 1)
mockSSR.AssertNumberOfCalls(t, "Backfill", 1)
mockCSR.AssertNumberOfCalls(t, "SetStateSyncingMetrics", 1)
mockCSR.AssertNumberOfCalls(t, "SwitchToConsensus", 1)
}
func ensureStateSyncStatus(t *testing.T, msg tmpubsub.Message, complete bool, height int64) {
t.Helper()
status, ok := msg.Data().(types.EventDataStateSyncStatus)
require.True(t, ok)
require.Equal(t, complete, status.Complete)
require.Equal(t, height, status.Height)
}