mirror of
https://github.com/tendermint/tendermint.git
synced 2026-01-07 13:55:17 +00:00
add state sync reactor (#4705)
Fixes #828. Adds state sync, as outlined in [ADR-053](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-053-state-sync-prototype.md). See related PRs in Cosmos SDK (https://github.com/cosmos/cosmos-sdk/pull/5803) and Gaia (https://github.com/cosmos/gaia/pull/327). This is split out of the previous PR #4645, and branched off of the ABCI interface in #4704. * Adds a new P2P reactor which exchanges snapshots with peers, and bootstraps an empty local node from remote snapshots when requested. * Adds a new configuration section `[statesync]` that enables state sync and configures the light client. Also enables `statesync:info` logging by default. * Integrates state sync into node startup. Does not support the v2 blockchain reactor, since it needs some reorganization to defer startup.
This commit is contained in:
@@ -112,13 +112,13 @@ func TestByzantine(t *testing.T) {
|
||||
// note these must be started before the byz
|
||||
for i := 1; i < N; i++ {
|
||||
cr := reactors[i].(*Reactor)
|
||||
cr.SwitchToConsensus(cr.conS.GetState(), 0)
|
||||
cr.SwitchToConsensus(cr.conS.GetState(), false)
|
||||
}
|
||||
|
||||
// start the byzantine state machine
|
||||
byzR := reactors[0].(*ByzantineReactor)
|
||||
s := byzR.reactor.conS.GetState()
|
||||
byzR.reactor.SwitchToConsensus(s, 0)
|
||||
byzR.reactor.SwitchToConsensus(s, false)
|
||||
|
||||
// byz proposer sends one block to peers[0]
|
||||
// and the other block to peers[1] and peers[2].
|
||||
@@ -268,8 +268,8 @@ func (br *ByzantineReactor) AddPeer(peer p2p.Peer) {
|
||||
peer.Set(types.PeerStateKey, peerState)
|
||||
|
||||
// Send our state to peer.
|
||||
// If we're fast_syncing, broadcast a RoundStepMessage later upon SwitchToConsensus().
|
||||
if !br.reactor.fastSync {
|
||||
// If we're syncing, broadcast a RoundStepMessage later upon SwitchToConsensus().
|
||||
if !br.reactor.waitSync {
|
||||
br.reactor.sendNewRoundStepMessage(peer)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -55,6 +55,8 @@ type Metrics struct {
|
||||
CommittedHeight metrics.Gauge
|
||||
// Whether or not a node is fast syncing. 1 if yes, 0 if no.
|
||||
FastSyncing metrics.Gauge
|
||||
// Whether or not a node is state syncing. 1 if yes, 0 if no.
|
||||
StateSyncing metrics.Gauge
|
||||
|
||||
// Number of blockparts transmitted by peer.
|
||||
BlockParts metrics.Counter
|
||||
@@ -174,6 +176,12 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
|
||||
Name: "fast_syncing",
|
||||
Help: "Whether or not a node is fast syncing. 1 if yes, 0 if no.",
|
||||
}, labels).With(labelsAndValues...),
|
||||
StateSyncing: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
Name: "state_syncing",
|
||||
Help: "Whether or not a node is state syncing. 1 if yes, 0 if no.",
|
||||
}, labels).With(labelsAndValues...),
|
||||
BlockParts: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
|
||||
Namespace: namespace,
|
||||
Subsystem: MetricsSubsystem,
|
||||
@@ -208,6 +216,7 @@ func NopMetrics() *Metrics {
|
||||
TotalTxs: discard.NewGauge(),
|
||||
CommittedHeight: discard.NewGauge(),
|
||||
FastSyncing: discard.NewGauge(),
|
||||
StateSyncing: discard.NewGauge(),
|
||||
BlockParts: discard.NewCounter(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -41,23 +41,22 @@ type Reactor struct {
|
||||
conS *State
|
||||
|
||||
mtx sync.RWMutex
|
||||
fastSync bool
|
||||
waitSync bool
|
||||
eventBus *types.EventBus
|
||||
|
||||
metrics *Metrics
|
||||
Metrics *Metrics
|
||||
}
|
||||
|
||||
type ReactorOption func(*Reactor)
|
||||
|
||||
// NewReactor returns a new Reactor with the given
|
||||
// consensusState.
|
||||
func NewReactor(consensusState *State, fastSync bool, options ...ReactorOption) *Reactor {
|
||||
func NewReactor(consensusState *State, waitSync bool, options ...ReactorOption) *Reactor {
|
||||
conR := &Reactor{
|
||||
conS: consensusState,
|
||||
fastSync: fastSync,
|
||||
metrics: NopMetrics(),
|
||||
waitSync: waitSync,
|
||||
Metrics: NopMetrics(),
|
||||
}
|
||||
conR.updateFastSyncingMetric()
|
||||
conR.BaseReactor = *p2p.NewBaseReactor("Consensus", conR)
|
||||
|
||||
for _, option := range options {
|
||||
@@ -70,14 +69,14 @@ func NewReactor(consensusState *State, fastSync bool, options ...ReactorOption)
|
||||
// OnStart implements BaseService by subscribing to events, which later will be
|
||||
// broadcasted to other peers and starting state if we're not in fast sync.
|
||||
func (conR *Reactor) OnStart() error {
|
||||
conR.Logger.Info("Reactor ", "fastSync", conR.FastSync())
|
||||
conR.Logger.Info("Reactor ", "waitSync", conR.WaitSync())
|
||||
|
||||
// start routine that computes peer statistics for evaluating peer quality
|
||||
go conR.peerStatsRoutine()
|
||||
|
||||
conR.subscribeToBroadcastEvents()
|
||||
|
||||
if !conR.FastSync() {
|
||||
if !conR.WaitSync() {
|
||||
err := conR.conS.Start()
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -92,14 +91,14 @@ func (conR *Reactor) OnStart() error {
|
||||
func (conR *Reactor) OnStop() {
|
||||
conR.unsubscribeFromBroadcastEvents()
|
||||
conR.conS.Stop()
|
||||
if !conR.FastSync() {
|
||||
if !conR.WaitSync() {
|
||||
conR.conS.Wait()
|
||||
}
|
||||
}
|
||||
|
||||
// SwitchToConsensus switches from fast_sync mode to consensus mode.
|
||||
// It resets the state, turns off fast_sync, and starts the consensus state-machine
|
||||
func (conR *Reactor) SwitchToConsensus(state sm.State, blocksSynced uint64) {
|
||||
func (conR *Reactor) SwitchToConsensus(state sm.State, skipWAL bool) {
|
||||
conR.Logger.Info("SwitchToConsensus")
|
||||
conR.conS.reconstructLastCommit(state)
|
||||
// NOTE: The line below causes broadcastNewRoundStepRoutine() to
|
||||
@@ -107,12 +106,12 @@ func (conR *Reactor) SwitchToConsensus(state sm.State, blocksSynced uint64) {
|
||||
conR.conS.updateToState(state)
|
||||
|
||||
conR.mtx.Lock()
|
||||
conR.fastSync = false
|
||||
conR.waitSync = false
|
||||
conR.mtx.Unlock()
|
||||
conR.metrics.FastSyncing.Set(0)
|
||||
conR.Metrics.FastSyncing.Set(0)
|
||||
conR.Metrics.StateSyncing.Set(0)
|
||||
|
||||
if blocksSynced > 0 {
|
||||
// dont bother with the WAL if we fast synced
|
||||
if skipWAL {
|
||||
conR.conS.doWALCatchup = false
|
||||
}
|
||||
err := conR.conS.Start()
|
||||
@@ -187,7 +186,7 @@ func (conR *Reactor) AddPeer(peer p2p.Peer) {
|
||||
|
||||
// Send our state to peer.
|
||||
// If we're fast_syncing, broadcast a RoundStepMessage later upon SwitchToConsensus().
|
||||
if !conR.FastSync() {
|
||||
if !conR.WaitSync() {
|
||||
conR.sendNewRoundStepMessage(peer)
|
||||
}
|
||||
}
|
||||
@@ -284,8 +283,8 @@ func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
|
||||
}
|
||||
|
||||
case DataChannel:
|
||||
if conR.FastSync() {
|
||||
conR.Logger.Info("Ignoring message received during fastSync", "msg", msg)
|
||||
if conR.WaitSync() {
|
||||
conR.Logger.Info("Ignoring message received during sync", "msg", msg)
|
||||
return
|
||||
}
|
||||
switch msg := msg.(type) {
|
||||
@@ -296,15 +295,15 @@ func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
|
||||
ps.ApplyProposalPOLMessage(msg)
|
||||
case *BlockPartMessage:
|
||||
ps.SetHasProposalBlockPart(msg.Height, msg.Round, msg.Part.Index)
|
||||
conR.metrics.BlockParts.With("peer_id", string(src.ID())).Add(1)
|
||||
conR.Metrics.BlockParts.With("peer_id", string(src.ID())).Add(1)
|
||||
conR.conS.peerMsgQueue <- msgInfo{msg, src.ID()}
|
||||
default:
|
||||
conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
|
||||
}
|
||||
|
||||
case VoteChannel:
|
||||
if conR.FastSync() {
|
||||
conR.Logger.Info("Ignoring message received during fastSync", "msg", msg)
|
||||
if conR.WaitSync() {
|
||||
conR.Logger.Info("Ignoring message received during sync", "msg", msg)
|
||||
return
|
||||
}
|
||||
switch msg := msg.(type) {
|
||||
@@ -325,8 +324,8 @@ func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
|
||||
}
|
||||
|
||||
case VoteSetBitsChannel:
|
||||
if conR.FastSync() {
|
||||
conR.Logger.Info("Ignoring message received during fastSync", "msg", msg)
|
||||
if conR.WaitSync() {
|
||||
conR.Logger.Info("Ignoring message received during sync", "msg", msg)
|
||||
return
|
||||
}
|
||||
switch msg := msg.(type) {
|
||||
@@ -366,11 +365,11 @@ func (conR *Reactor) SetEventBus(b *types.EventBus) {
|
||||
conR.conS.SetEventBus(b)
|
||||
}
|
||||
|
||||
// FastSync returns whether the consensus reactor is in fast-sync mode.
|
||||
func (conR *Reactor) FastSync() bool {
|
||||
// WaitSync returns whether the consensus reactor is waiting for state/fast sync.
|
||||
func (conR *Reactor) WaitSync() bool {
|
||||
conR.mtx.RLock()
|
||||
defer conR.mtx.RUnlock()
|
||||
return conR.fastSync
|
||||
return conR.waitSync
|
||||
}
|
||||
|
||||
//--------------------------------------
|
||||
@@ -886,19 +885,9 @@ func (conR *Reactor) StringIndented(indent string) string {
|
||||
return s
|
||||
}
|
||||
|
||||
func (conR *Reactor) updateFastSyncingMetric() {
|
||||
var fastSyncing float64
|
||||
if conR.fastSync {
|
||||
fastSyncing = 1
|
||||
} else {
|
||||
fastSyncing = 0
|
||||
}
|
||||
conR.metrics.FastSyncing.Set(fastSyncing)
|
||||
}
|
||||
|
||||
// ReactorMetrics sets the metrics
|
||||
func ReactorMetrics(metrics *Metrics) ReactorOption {
|
||||
return func(conR *Reactor) { conR.metrics = metrics }
|
||||
return func(conR *Reactor) { conR.Metrics = metrics }
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
@@ -75,7 +75,7 @@ func startConsensusNet(t *testing.T, css []*State, n int) (
|
||||
// TODO: is this still true with new pubsub?
|
||||
for i := 0; i < n; i++ {
|
||||
s := reactors[i].conS.GetState()
|
||||
reactors[i].SwitchToConsensus(s, 0)
|
||||
reactors[i].SwitchToConsensus(s, false)
|
||||
}
|
||||
return reactors, blocksSubs, eventBuses
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user