add state sync reactor (#4705)

Fixes #828. Adds state sync, as outlined in [ADR-053](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-053-state-sync-prototype.md). See related PRs in Cosmos SDK (https://github.com/cosmos/cosmos-sdk/pull/5803) and Gaia (https://github.com/cosmos/gaia/pull/327).

This is split out of the previous PR #4645, and branched off of the ABCI interface in #4704. 

* Adds a new P2P reactor which exchanges snapshots with peers, and bootstraps an empty local node from remote snapshots when requested.

* Adds a new configuration section `[statesync]` that enables state sync and configures the light client. Also enables `statesync:info` logging by default.

* Integrates state sync into node startup. Does not support the v2 blockchain reactor, since it needs some reorganization to defer startup.
This commit is contained in:
Erik Grinaker
2020-04-29 10:47:00 +02:00
committed by GitHub
parent 569981325a
commit 511ab6717c
32 changed files with 4145 additions and 106 deletions

View File

@@ -112,13 +112,13 @@ func TestByzantine(t *testing.T) {
// note these must be started before the byz
for i := 1; i < N; i++ {
cr := reactors[i].(*Reactor)
cr.SwitchToConsensus(cr.conS.GetState(), 0)
cr.SwitchToConsensus(cr.conS.GetState(), false)
}
// start the byzantine state machine
byzR := reactors[0].(*ByzantineReactor)
s := byzR.reactor.conS.GetState()
byzR.reactor.SwitchToConsensus(s, 0)
byzR.reactor.SwitchToConsensus(s, false)
// byz proposer sends one block to peers[0]
// and the other block to peers[1] and peers[2].
@@ -268,8 +268,8 @@ func (br *ByzantineReactor) AddPeer(peer p2p.Peer) {
peer.Set(types.PeerStateKey, peerState)
// Send our state to peer.
// If we're fast_syncing, broadcast a RoundStepMessage later upon SwitchToConsensus().
if !br.reactor.fastSync {
// If we're syncing, broadcast a RoundStepMessage later upon SwitchToConsensus().
if !br.reactor.waitSync {
br.reactor.sendNewRoundStepMessage(peer)
}
}

View File

@@ -55,6 +55,8 @@ type Metrics struct {
CommittedHeight metrics.Gauge
// Whether or not a node is fast syncing. 1 if yes, 0 if no.
FastSyncing metrics.Gauge
// Whether or not a node is state syncing. 1 if yes, 0 if no.
StateSyncing metrics.Gauge
// Number of blockparts transmitted by peer.
BlockParts metrics.Counter
@@ -174,6 +176,12 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "fast_syncing",
Help: "Whether or not a node is fast syncing. 1 if yes, 0 if no.",
}, labels).With(labelsAndValues...),
StateSyncing: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "state_syncing",
Help: "Whether or not a node is state syncing. 1 if yes, 0 if no.",
}, labels).With(labelsAndValues...),
BlockParts: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
@@ -208,6 +216,7 @@ func NopMetrics() *Metrics {
TotalTxs: discard.NewGauge(),
CommittedHeight: discard.NewGauge(),
FastSyncing: discard.NewGauge(),
StateSyncing: discard.NewGauge(),
BlockParts: discard.NewCounter(),
}
}

View File

@@ -41,23 +41,22 @@ type Reactor struct {
conS *State
mtx sync.RWMutex
fastSync bool
waitSync bool
eventBus *types.EventBus
metrics *Metrics
Metrics *Metrics
}
type ReactorOption func(*Reactor)
// NewReactor returns a new Reactor with the given
// consensusState.
func NewReactor(consensusState *State, fastSync bool, options ...ReactorOption) *Reactor {
func NewReactor(consensusState *State, waitSync bool, options ...ReactorOption) *Reactor {
conR := &Reactor{
conS: consensusState,
fastSync: fastSync,
metrics: NopMetrics(),
waitSync: waitSync,
Metrics: NopMetrics(),
}
conR.updateFastSyncingMetric()
conR.BaseReactor = *p2p.NewBaseReactor("Consensus", conR)
for _, option := range options {
@@ -70,14 +69,14 @@ func NewReactor(consensusState *State, fastSync bool, options ...ReactorOption)
// OnStart implements BaseService by subscribing to events, which later will be
// broadcasted to other peers and starting state if we're not in fast sync.
func (conR *Reactor) OnStart() error {
conR.Logger.Info("Reactor ", "fastSync", conR.FastSync())
conR.Logger.Info("Reactor ", "waitSync", conR.WaitSync())
// start routine that computes peer statistics for evaluating peer quality
go conR.peerStatsRoutine()
conR.subscribeToBroadcastEvents()
if !conR.FastSync() {
if !conR.WaitSync() {
err := conR.conS.Start()
if err != nil {
return err
@@ -92,14 +91,14 @@ func (conR *Reactor) OnStart() error {
func (conR *Reactor) OnStop() {
conR.unsubscribeFromBroadcastEvents()
conR.conS.Stop()
if !conR.FastSync() {
if !conR.WaitSync() {
conR.conS.Wait()
}
}
// SwitchToConsensus switches from fast_sync mode to consensus mode.
// It resets the state, turns off fast_sync, and starts the consensus state-machine
func (conR *Reactor) SwitchToConsensus(state sm.State, blocksSynced uint64) {
func (conR *Reactor) SwitchToConsensus(state sm.State, skipWAL bool) {
conR.Logger.Info("SwitchToConsensus")
conR.conS.reconstructLastCommit(state)
// NOTE: The line below causes broadcastNewRoundStepRoutine() to
@@ -107,12 +106,12 @@ func (conR *Reactor) SwitchToConsensus(state sm.State, blocksSynced uint64) {
conR.conS.updateToState(state)
conR.mtx.Lock()
conR.fastSync = false
conR.waitSync = false
conR.mtx.Unlock()
conR.metrics.FastSyncing.Set(0)
conR.Metrics.FastSyncing.Set(0)
conR.Metrics.StateSyncing.Set(0)
if blocksSynced > 0 {
// dont bother with the WAL if we fast synced
if skipWAL {
conR.conS.doWALCatchup = false
}
err := conR.conS.Start()
@@ -187,7 +186,7 @@ func (conR *Reactor) AddPeer(peer p2p.Peer) {
// Send our state to peer.
// If we're fast_syncing, broadcast a RoundStepMessage later upon SwitchToConsensus().
if !conR.FastSync() {
if !conR.WaitSync() {
conR.sendNewRoundStepMessage(peer)
}
}
@@ -284,8 +283,8 @@ func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
}
case DataChannel:
if conR.FastSync() {
conR.Logger.Info("Ignoring message received during fastSync", "msg", msg)
if conR.WaitSync() {
conR.Logger.Info("Ignoring message received during sync", "msg", msg)
return
}
switch msg := msg.(type) {
@@ -296,15 +295,15 @@ func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
ps.ApplyProposalPOLMessage(msg)
case *BlockPartMessage:
ps.SetHasProposalBlockPart(msg.Height, msg.Round, msg.Part.Index)
conR.metrics.BlockParts.With("peer_id", string(src.ID())).Add(1)
conR.Metrics.BlockParts.With("peer_id", string(src.ID())).Add(1)
conR.conS.peerMsgQueue <- msgInfo{msg, src.ID()}
default:
conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
}
case VoteChannel:
if conR.FastSync() {
conR.Logger.Info("Ignoring message received during fastSync", "msg", msg)
if conR.WaitSync() {
conR.Logger.Info("Ignoring message received during sync", "msg", msg)
return
}
switch msg := msg.(type) {
@@ -325,8 +324,8 @@ func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
}
case VoteSetBitsChannel:
if conR.FastSync() {
conR.Logger.Info("Ignoring message received during fastSync", "msg", msg)
if conR.WaitSync() {
conR.Logger.Info("Ignoring message received during sync", "msg", msg)
return
}
switch msg := msg.(type) {
@@ -366,11 +365,11 @@ func (conR *Reactor) SetEventBus(b *types.EventBus) {
conR.conS.SetEventBus(b)
}
// FastSync returns whether the consensus reactor is in fast-sync mode.
func (conR *Reactor) FastSync() bool {
// WaitSync returns whether the consensus reactor is waiting for state/fast sync.
func (conR *Reactor) WaitSync() bool {
conR.mtx.RLock()
defer conR.mtx.RUnlock()
return conR.fastSync
return conR.waitSync
}
//--------------------------------------
@@ -886,19 +885,9 @@ func (conR *Reactor) StringIndented(indent string) string {
return s
}
func (conR *Reactor) updateFastSyncingMetric() {
var fastSyncing float64
if conR.fastSync {
fastSyncing = 1
} else {
fastSyncing = 0
}
conR.metrics.FastSyncing.Set(fastSyncing)
}
// ReactorMetrics sets the metrics
func ReactorMetrics(metrics *Metrics) ReactorOption {
return func(conR *Reactor) { conR.metrics = metrics }
return func(conR *Reactor) { conR.Metrics = metrics }
}
//-----------------------------------------------------------------------------

View File

@@ -75,7 +75,7 @@ func startConsensusNet(t *testing.T, css []*State, n int) (
// TODO: is this still true with new pubsub?
for i := 0; i < n; i++ {
s := reactors[i].conS.GetState()
reactors[i].SwitchToConsensus(s, 0)
reactors[i].SwitchToConsensus(s, false)
}
return reactors, blocksSubs, eventBuses
}