add state sync reactor (#4705)

Fixes #828. Adds state sync, as outlined in [ADR-053](https://github.com/tendermint/tendermint/blob/master/docs/architecture/adr-053-state-sync-prototype.md). See related PRs in Cosmos SDK (https://github.com/cosmos/cosmos-sdk/pull/5803) and Gaia (https://github.com/cosmos/gaia/pull/327).

This is split out of the previous PR #4645, and branched off of the ABCI interface in #4704. 

* Adds a new P2P reactor which exchanges snapshots with peers, and bootstraps an empty local node from remote snapshots when requested.

* Adds a new configuration section `[statesync]` that enables state sync and configures the light client. Also enables `statesync:info` logging by default.

* Integrates state sync into node startup. Does not support the v2 blockchain reactor, since it needs some reorganization to defer startup.
This commit is contained in:
Erik Grinaker
2020-04-29 10:47:00 +02:00
committed by GitHub
parent 569981325a
commit 511ab6717c
32 changed files with 4145 additions and 106 deletions

View File

@@ -1,13 +1,13 @@
package config
import (
"encoding/hex"
"errors"
"fmt"
"net/http"
"os"
"path/filepath"
"time"
"github.com/pkg/errors"
)
const (
@@ -65,6 +65,7 @@ type Config struct {
RPC *RPCConfig `mapstructure:"rpc"`
P2P *P2PConfig `mapstructure:"p2p"`
Mempool *MempoolConfig `mapstructure:"mempool"`
StateSync *StateSyncConfig `mapstructure:"statesync"`
FastSync *FastSyncConfig `mapstructure:"fastsync"`
Consensus *ConsensusConfig `mapstructure:"consensus"`
TxIndex *TxIndexConfig `mapstructure:"tx_index"`
@@ -78,6 +79,7 @@ func DefaultConfig() *Config {
RPC: DefaultRPCConfig(),
P2P: DefaultP2PConfig(),
Mempool: DefaultMempoolConfig(),
StateSync: DefaultStateSyncConfig(),
FastSync: DefaultFastSyncConfig(),
Consensus: DefaultConsensusConfig(),
TxIndex: DefaultTxIndexConfig(),
@@ -92,6 +94,7 @@ func TestConfig() *Config {
RPC: TestRPCConfig(),
P2P: TestP2PConfig(),
Mempool: TestMempoolConfig(),
StateSync: TestStateSyncConfig(),
FastSync: TestFastSyncConfig(),
Consensus: TestConsensusConfig(),
TxIndex: TestTxIndexConfig(),
@@ -116,24 +119,27 @@ func (cfg *Config) ValidateBasic() error {
return err
}
if err := cfg.RPC.ValidateBasic(); err != nil {
return errors.Wrap(err, "Error in [rpc] section")
return fmt.Errorf("error in [rpc] section: %w", err)
}
if err := cfg.P2P.ValidateBasic(); err != nil {
return errors.Wrap(err, "Error in [p2p] section")
return fmt.Errorf("error in [p2p] section: %w", err)
}
if err := cfg.Mempool.ValidateBasic(); err != nil {
return errors.Wrap(err, "Error in [mempool] section")
return fmt.Errorf("error in [mempool] section: %w", err)
}
if err := cfg.StateSync.ValidateBasic(); err != nil {
return fmt.Errorf("error in [statesync] section: %w", err)
}
if err := cfg.FastSync.ValidateBasic(); err != nil {
return errors.Wrap(err, "Error in [fastsync] section")
return fmt.Errorf("error in [fastsync] section: %w", err)
}
if err := cfg.Consensus.ValidateBasic(); err != nil {
return errors.Wrap(err, "Error in [consensus] section")
return fmt.Errorf("error in [consensus] section: %w", err)
}
return errors.Wrap(
cfg.Instrumentation.ValidateBasic(),
"Error in [instrumentation] section",
)
if err := cfg.Instrumentation.ValidateBasic(); err != nil {
return fmt.Errorf("error in [instrumentation] section: %w", err)
}
return nil
}
//-----------------------------------------------------------------------------
@@ -292,7 +298,7 @@ func DefaultLogLevel() string {
// DefaultPackageLogLevels returns a default log level setting so all packages
// log at "error", while the `state` and `main` packages log at "info"
func DefaultPackageLogLevels() string {
return fmt.Sprintf("main:info,state:info,*:%s", DefaultLogLevel())
return fmt.Sprintf("main:info,state:info,statesync:info,*:%s", DefaultLogLevel())
}
//-----------------------------------------------------------------------------
@@ -701,6 +707,69 @@ func (cfg *MempoolConfig) ValidateBasic() error {
return nil
}
//-----------------------------------------------------------------------------
// StateSyncConfig
// StateSyncConfig defines the configuration for the Tendermint state sync service
type StateSyncConfig struct {
Enable bool `mapstructure:"enable"`
TempDir string `mapstructure:"temp_dir"`
RPCServers []string `mapstructure:"rpc_servers"`
TrustPeriod time.Duration `mapstructure:"trust_period"`
TrustHeight int64 `mapstructure:"trust_height"`
TrustHash string `mapstructure:"trust_hash"`
}
func (cfg *StateSyncConfig) TrustHashBytes() []byte {
// validated in ValidateBasic, so we can safely panic here
bytes, err := hex.DecodeString(cfg.TrustHash)
if err != nil {
panic(err)
}
return bytes
}
// DefaultStateSyncConfig returns a default configuration for the state sync service
func DefaultStateSyncConfig() *StateSyncConfig {
return &StateSyncConfig{}
}
// TestFastSyncConfig returns a default configuration for the state sync service
func TestStateSyncConfig() *StateSyncConfig {
return DefaultStateSyncConfig()
}
// ValidateBasic performs basic validation.
func (cfg *StateSyncConfig) ValidateBasic() error {
if cfg.Enable {
if len(cfg.RPCServers) == 0 {
return errors.New("rpc_servers is required")
}
if len(cfg.RPCServers) < 2 {
return errors.New("at least two rpc_servers entries is required")
}
for _, server := range cfg.RPCServers {
if len(server) == 0 {
return errors.New("found empty rpc_servers entry")
}
}
if cfg.TrustPeriod <= 0 {
return errors.New("trusted_period is required")
}
if cfg.TrustHeight <= 0 {
return errors.New("trusted_height is required")
}
if len(cfg.TrustHash) == 0 {
return errors.New("trusted_hash is required")
}
_, err := hex.DecodeString(cfg.TrustHash)
if err != nil {
return fmt.Errorf("invalid trusted_hash: %w", err)
}
}
return nil
}
//-----------------------------------------------------------------------------
// FastSyncConfig

View File

@@ -6,6 +6,7 @@ import (
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestDefaultConfig(t *testing.T) {
@@ -122,6 +123,11 @@ func TestMempoolConfigValidateBasic(t *testing.T) {
}
}
func TestStateSyncConfigValidateBasic(t *testing.T) {
cfg := TestStateSyncConfig()
require.NoError(t, cfg.ValidateBasic())
}
func TestFastSyncConfigValidateBasic(t *testing.T) {
cfg := TestFastSyncConfig()
assert.NoError(t, cfg.ValidateBasic())

View File

@@ -315,6 +315,30 @@ cache_size = {{ .Mempool.CacheSize }}
# NOTE: the max size of a tx transmitted over the network is {max_tx_bytes} + {amino overhead}.
max_tx_bytes = {{ .Mempool.MaxTxBytes }}
##### state sync configuration options #####
[statesync]
# State sync rapidly bootstraps a new node by discovering, fetching, and restoring a state machine
# snapshot from peers instead of fetching and replaying historical blocks. Requires some peers in
# the network to take and serve state machine snapshots. State sync is not attempted if the node
# has any local state (LastBlockHeight > 0). The node will have a truncated block history,
# starting from the height of the snapshot.
enable = {{ .StateSync.Enable }}
# RPC servers (comma-separated) for light client verification of the synced state machine and
# retrieval of state data for node bootstrapping. Also needs a trusted height and corresponding
# header hash obtained from a trusted source, and a period during which validators can be trusted.
#
# For Cosmos SDK-based chains, trust_period should usually be about 2/3 of the unbonding time (~2
# weeks) during which they can be financially punished (slashed) for misbehavior.
rpc_servers = ""
trust_height = {{ .StateSync.TrustHeight }}
trust_hash = "{{ .StateSync.TrustHash }}"
trust_period = "{{ .StateSync.TrustPeriod }}"
# Temporary directory for state sync snapshot chunks, defaults to the OS tempdir (typically /tmp).
# Will create a new, randomly named directory within, and remove it when done.
temp_dir = "{{ .StateSync.TempDir }}"
##### fast sync configuration options #####
[fastsync]