Merge branch 'master' into callum/4728-check-evidence

This commit is contained in:
Callum Waters
2020-04-29 14:04:04 +02:00
committed by GitHub
54 changed files with 10344 additions and 626 deletions

View File

@@ -14,6 +14,8 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi
- Apps
- [abci] [\#4704](https://github.com/tendermint/tendermint/pull/4704) Add ABCI methods `ListSnapshots`, `LoadSnapshotChunk`, `OfferSnapshot`, and `ApplySnapshotChunk` for state sync snapshots. `ABCIVersion` bumped to 0.17.0.
- P2P Protocol
- Go API
@@ -25,6 +27,7 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi
### FEATURES:
- [statesync] Add state sync support, where a new node can be rapidly bootstrapped by fetching state snapshots from peers instead of replaying blocks. See the `[statesync]` config section.
- [evidence] [\#4532](https://github.com/tendermint/tendermint/pull/4532) Handle evidence from light clients (@melekes)
- [lite2] [\#4532](https://github.com/tendermint/tendermint/pull/4532) Submit conflicting headers, if any, to a full node & all witnesses (@melekes)
@@ -41,4 +44,6 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi
### BUG FIXES:
- [blockchain/v2] [\#4761](https://github.com/tendermint/tendermint/pull/4761) Fix excessive CPU usage caused by spinning on closed channels (@erikgrinaker)
- [blockchain/v2] Respect `fast_sync` option (@erikgrinaker)
- [light] [\#4741](https://github.com/tendermint/tendermint/pull/4741) Correctly return `ErrSignedHeaderNotFound` and `ErrValidatorSetNotFound` on corresponding RPC errors (@erikgrinaker)

View File

@@ -35,6 +35,10 @@ type Client interface {
InitChainAsync(types.RequestInitChain) *ReqRes
BeginBlockAsync(types.RequestBeginBlock) *ReqRes
EndBlockAsync(types.RequestEndBlock) *ReqRes
ListSnapshotsAsync(types.RequestListSnapshots) *ReqRes
OfferSnapshotAsync(types.RequestOfferSnapshot) *ReqRes
LoadSnapshotChunkAsync(types.RequestLoadSnapshotChunk) *ReqRes
ApplySnapshotChunkAsync(types.RequestApplySnapshotChunk) *ReqRes
FlushSync() error
EchoSync(msg string) (*types.ResponseEcho, error)
@@ -47,6 +51,10 @@ type Client interface {
InitChainSync(types.RequestInitChain) (*types.ResponseInitChain, error)
BeginBlockSync(types.RequestBeginBlock) (*types.ResponseBeginBlock, error)
EndBlockSync(types.RequestEndBlock) (*types.ResponseEndBlock, error)
ListSnapshotsSync(types.RequestListSnapshots) (*types.ResponseListSnapshots, error)
OfferSnapshotSync(types.RequestOfferSnapshot) (*types.ResponseOfferSnapshot, error)
LoadSnapshotChunkSync(types.RequestLoadSnapshotChunk) (*types.ResponseLoadSnapshotChunk, error)
ApplySnapshotChunkSync(types.RequestApplySnapshotChunk) (*types.ResponseApplySnapshotChunk, error)
}
//----------------------------------------

View File

@@ -223,6 +223,42 @@ func (cli *grpcClient) EndBlockAsync(params types.RequestEndBlock) *ReqRes {
return cli.finishAsyncCall(req, &types.Response{Value: &types.Response_EndBlock{EndBlock: res}})
}
func (cli *grpcClient) ListSnapshotsAsync(params types.RequestListSnapshots) *ReqRes {
req := types.ToRequestListSnapshots(params)
res, err := cli.client.ListSnapshots(context.Background(), req.GetListSnapshots(), grpc.WaitForReady(true))
if err != nil {
cli.StopForError(err)
}
return cli.finishAsyncCall(req, &types.Response{Value: &types.Response_ListSnapshots{ListSnapshots: res}})
}
func (cli *grpcClient) OfferSnapshotAsync(params types.RequestOfferSnapshot) *ReqRes {
req := types.ToRequestOfferSnapshot(params)
res, err := cli.client.OfferSnapshot(context.Background(), req.GetOfferSnapshot(), grpc.WaitForReady(true))
if err != nil {
cli.StopForError(err)
}
return cli.finishAsyncCall(req, &types.Response{Value: &types.Response_OfferSnapshot{OfferSnapshot: res}})
}
func (cli *grpcClient) LoadSnapshotChunkAsync(params types.RequestLoadSnapshotChunk) *ReqRes {
req := types.ToRequestLoadSnapshotChunk(params)
res, err := cli.client.LoadSnapshotChunk(context.Background(), req.GetLoadSnapshotChunk(), grpc.WaitForReady(true))
if err != nil {
cli.StopForError(err)
}
return cli.finishAsyncCall(req, &types.Response{Value: &types.Response_LoadSnapshotChunk{LoadSnapshotChunk: res}})
}
func (cli *grpcClient) ApplySnapshotChunkAsync(params types.RequestApplySnapshotChunk) *ReqRes {
req := types.ToRequestApplySnapshotChunk(params)
res, err := cli.client.ApplySnapshotChunk(context.Background(), req.GetApplySnapshotChunk(), grpc.WaitForReady(true))
if err != nil {
cli.StopForError(err)
}
return cli.finishAsyncCall(req, &types.Response{Value: &types.Response_ApplySnapshotChunk{ApplySnapshotChunk: res}})
}
func (cli *grpcClient) finishAsyncCall(req *types.Request, res *types.Response) *ReqRes {
reqres := NewReqRes(req)
reqres.Response = res // Set response
@@ -304,3 +340,25 @@ func (cli *grpcClient) EndBlockSync(params types.RequestEndBlock) (*types.Respon
reqres := cli.EndBlockAsync(params)
return reqres.Response.GetEndBlock(), cli.Error()
}
func (cli *grpcClient) ListSnapshotsSync(params types.RequestListSnapshots) (*types.ResponseListSnapshots, error) {
reqres := cli.ListSnapshotsAsync(params)
return reqres.Response.GetListSnapshots(), cli.Error()
}
func (cli *grpcClient) OfferSnapshotSync(params types.RequestOfferSnapshot) (*types.ResponseOfferSnapshot, error) {
reqres := cli.OfferSnapshotAsync(params)
return reqres.Response.GetOfferSnapshot(), cli.Error()
}
func (cli *grpcClient) LoadSnapshotChunkSync(
params types.RequestLoadSnapshotChunk) (*types.ResponseLoadSnapshotChunk, error) {
reqres := cli.LoadSnapshotChunkAsync(params)
return reqres.Response.GetLoadSnapshotChunk(), cli.Error()
}
func (cli *grpcClient) ApplySnapshotChunkSync(
params types.RequestApplySnapshotChunk) (*types.ResponseApplySnapshotChunk, error) {
reqres := cli.ApplySnapshotChunkAsync(params)
return reqres.Response.GetApplySnapshotChunk(), cli.Error()
}

View File

@@ -158,6 +158,50 @@ func (app *localClient) EndBlockAsync(req types.RequestEndBlock) *ReqRes {
)
}
func (app *localClient) ListSnapshotsAsync(req types.RequestListSnapshots) *ReqRes {
app.mtx.Lock()
defer app.mtx.Unlock()
res := app.Application.ListSnapshots(req)
return app.callback(
types.ToRequestListSnapshots(req),
types.ToResponseListSnapshots(res),
)
}
func (app *localClient) OfferSnapshotAsync(req types.RequestOfferSnapshot) *ReqRes {
app.mtx.Lock()
defer app.mtx.Unlock()
res := app.Application.OfferSnapshot(req)
return app.callback(
types.ToRequestOfferSnapshot(req),
types.ToResponseOfferSnapshot(res),
)
}
func (app *localClient) LoadSnapshotChunkAsync(req types.RequestLoadSnapshotChunk) *ReqRes {
app.mtx.Lock()
defer app.mtx.Unlock()
res := app.Application.LoadSnapshotChunk(req)
return app.callback(
types.ToRequestLoadSnapshotChunk(req),
types.ToResponseLoadSnapshotChunk(res),
)
}
func (app *localClient) ApplySnapshotChunkAsync(req types.RequestApplySnapshotChunk) *ReqRes {
app.mtx.Lock()
defer app.mtx.Unlock()
res := app.Application.ApplySnapshotChunk(req)
return app.callback(
types.ToRequestApplySnapshotChunk(req),
types.ToResponseApplySnapshotChunk(res),
)
}
//-------------------------------------------------------
func (app *localClient) FlushSync() error {
@@ -240,6 +284,40 @@ func (app *localClient) EndBlockSync(req types.RequestEndBlock) (*types.Response
return &res, nil
}
func (app *localClient) ListSnapshotsSync(req types.RequestListSnapshots) (*types.ResponseListSnapshots, error) {
app.mtx.Lock()
defer app.mtx.Unlock()
res := app.Application.ListSnapshots(req)
return &res, nil
}
func (app *localClient) OfferSnapshotSync(req types.RequestOfferSnapshot) (*types.ResponseOfferSnapshot, error) {
app.mtx.Lock()
defer app.mtx.Unlock()
res := app.Application.OfferSnapshot(req)
return &res, nil
}
func (app *localClient) LoadSnapshotChunkSync(
req types.RequestLoadSnapshotChunk) (*types.ResponseLoadSnapshotChunk, error) {
app.mtx.Lock()
defer app.mtx.Unlock()
res := app.Application.LoadSnapshotChunk(req)
return &res, nil
}
func (app *localClient) ApplySnapshotChunkSync(
req types.RequestApplySnapshotChunk) (*types.ResponseApplySnapshotChunk, error) {
app.mtx.Lock()
defer app.mtx.Unlock()
res := app.Application.ApplySnapshotChunk(req)
return &res, nil
}
//-------------------------------------------------------
func (app *localClient) callback(req *types.Request, res *types.Response) *ReqRes {

View File

@@ -266,6 +266,22 @@ func (cli *socketClient) EndBlockAsync(req types.RequestEndBlock) *ReqRes {
return cli.queueRequest(types.ToRequestEndBlock(req))
}
func (cli *socketClient) ListSnapshotsAsync(req types.RequestListSnapshots) *ReqRes {
return cli.queueRequest(types.ToRequestListSnapshots(req))
}
func (cli *socketClient) OfferSnapshotAsync(req types.RequestOfferSnapshot) *ReqRes {
return cli.queueRequest(types.ToRequestOfferSnapshot(req))
}
func (cli *socketClient) LoadSnapshotChunkAsync(req types.RequestLoadSnapshotChunk) *ReqRes {
return cli.queueRequest(types.ToRequestLoadSnapshotChunk(req))
}
func (cli *socketClient) ApplySnapshotChunkAsync(req types.RequestApplySnapshotChunk) *ReqRes {
return cli.queueRequest(types.ToRequestApplySnapshotChunk(req))
}
//----------------------------------------
func (cli *socketClient) FlushSync() error {
@@ -337,6 +353,32 @@ func (cli *socketClient) EndBlockSync(req types.RequestEndBlock) (*types.Respons
return reqres.Response.GetEndBlock(), cli.Error()
}
func (cli *socketClient) ListSnapshotsSync(req types.RequestListSnapshots) (*types.ResponseListSnapshots, error) {
reqres := cli.queueRequest(types.ToRequestListSnapshots(req))
cli.FlushSync()
return reqres.Response.GetListSnapshots(), cli.Error()
}
func (cli *socketClient) OfferSnapshotSync(req types.RequestOfferSnapshot) (*types.ResponseOfferSnapshot, error) {
reqres := cli.queueRequest(types.ToRequestOfferSnapshot(req))
cli.FlushSync()
return reqres.Response.GetOfferSnapshot(), cli.Error()
}
func (cli *socketClient) LoadSnapshotChunkSync(
req types.RequestLoadSnapshotChunk) (*types.ResponseLoadSnapshotChunk, error) {
reqres := cli.queueRequest(types.ToRequestLoadSnapshotChunk(req))
cli.FlushSync()
return reqres.Response.GetLoadSnapshotChunk(), cli.Error()
}
func (cli *socketClient) ApplySnapshotChunkSync(
req types.RequestApplySnapshotChunk) (*types.ResponseApplySnapshotChunk, error) {
reqres := cli.queueRequest(types.ToRequestApplySnapshotChunk(req))
cli.FlushSync()
return reqres.Response.GetApplySnapshotChunk(), cli.Error()
}
//----------------------------------------
func (cli *socketClient) queueRequest(req *types.Request) *ReqRes {

View File

@@ -144,6 +144,26 @@ func (app *PersistentKVStoreApplication) EndBlock(req types.RequestEndBlock) typ
return types.ResponseEndBlock{ValidatorUpdates: app.ValUpdates}
}
func (app *PersistentKVStoreApplication) ListSnapshots(
req types.RequestListSnapshots) types.ResponseListSnapshots {
return types.ResponseListSnapshots{}
}
func (app *PersistentKVStoreApplication) LoadSnapshotChunk(
req types.RequestLoadSnapshotChunk) types.ResponseLoadSnapshotChunk {
return types.ResponseLoadSnapshotChunk{}
}
func (app *PersistentKVStoreApplication) OfferSnapshot(
req types.RequestOfferSnapshot) types.ResponseOfferSnapshot {
return types.ResponseOfferSnapshot{Result: types.ResponseOfferSnapshot_abort}
}
func (app *PersistentKVStoreApplication) ApplySnapshotChunk(
req types.RequestApplySnapshotChunk) types.ResponseApplySnapshotChunk {
return types.ResponseApplySnapshotChunk{Result: types.ResponseApplySnapshotChunk_abort}
}
//---------------------------------------------
// update validators

View File

@@ -224,6 +224,18 @@ func (s *SocketServer) handleRequest(req *types.Request, responses chan<- *types
case *types.Request_EndBlock:
res := s.app.EndBlock(*r.EndBlock)
responses <- types.ToResponseEndBlock(res)
case *types.Request_ListSnapshots:
res := s.app.ListSnapshots(*r.ListSnapshots)
responses <- types.ToResponseListSnapshots(res)
case *types.Request_OfferSnapshot:
res := s.app.OfferSnapshot(*r.OfferSnapshot)
responses <- types.ToResponseOfferSnapshot(res)
case *types.Request_LoadSnapshotChunk:
res := s.app.LoadSnapshotChunk(*r.LoadSnapshotChunk)
responses <- types.ToResponseLoadSnapshotChunk(res)
case *types.Request_ApplySnapshotChunk:
res := s.app.ApplySnapshotChunk(*r.ApplySnapshotChunk)
responses <- types.ToResponseApplySnapshotChunk(res)
default:
responses <- types.ToResponseException("Unknown request")
}

View File

@@ -23,6 +23,12 @@ type Application interface {
DeliverTx(RequestDeliverTx) ResponseDeliverTx // Deliver a tx for full processing
EndBlock(RequestEndBlock) ResponseEndBlock // Signals the end of a block, returns changes to the validator set
Commit() ResponseCommit // Commit the state and return the application Merkle root hash
// State Sync Connection
ListSnapshots(RequestListSnapshots) ResponseListSnapshots // List available snapshots
OfferSnapshot(RequestOfferSnapshot) ResponseOfferSnapshot // Offer a snapshot to the application
LoadSnapshotChunk(RequestLoadSnapshotChunk) ResponseLoadSnapshotChunk // Load a snapshot chunk
ApplySnapshotChunk(RequestApplySnapshotChunk) ResponseApplySnapshotChunk // Apply a shapshot chunk
}
//-------------------------------------------------------
@@ -73,6 +79,22 @@ func (BaseApplication) EndBlock(req RequestEndBlock) ResponseEndBlock {
return ResponseEndBlock{}
}
func (BaseApplication) ListSnapshots(req RequestListSnapshots) ResponseListSnapshots {
return ResponseListSnapshots{}
}
func (BaseApplication) OfferSnapshot(req RequestOfferSnapshot) ResponseOfferSnapshot {
return ResponseOfferSnapshot{}
}
func (BaseApplication) LoadSnapshotChunk(req RequestLoadSnapshotChunk) ResponseLoadSnapshotChunk {
return ResponseLoadSnapshotChunk{}
}
func (BaseApplication) ApplySnapshotChunk(req RequestApplySnapshotChunk) ResponseApplySnapshotChunk {
return ResponseApplySnapshotChunk{}
}
//-------------------------------------------------------
// GRPCApplication is a GRPC wrapper for Application
@@ -136,3 +158,27 @@ func (app *GRPCApplication) EndBlock(ctx context.Context, req *RequestEndBlock)
res := app.app.EndBlock(*req)
return &res, nil
}
func (app *GRPCApplication) ListSnapshots(
ctx context.Context, req *RequestListSnapshots) (*ResponseListSnapshots, error) {
res := app.app.ListSnapshots(*req)
return &res, nil
}
func (app *GRPCApplication) OfferSnapshot(
ctx context.Context, req *RequestOfferSnapshot) (*ResponseOfferSnapshot, error) {
res := app.app.OfferSnapshot(*req)
return &res, nil
}
func (app *GRPCApplication) LoadSnapshotChunk(
ctx context.Context, req *RequestLoadSnapshotChunk) (*ResponseLoadSnapshotChunk, error) {
res := app.app.LoadSnapshotChunk(*req)
return &res, nil
}
func (app *GRPCApplication) ApplySnapshotChunk(
ctx context.Context, req *RequestApplySnapshotChunk) (*ResponseApplySnapshotChunk, error) {
res := app.app.ApplySnapshotChunk(*req)
return &res, nil
}

View File

@@ -135,6 +135,30 @@ func ToRequestEndBlock(req RequestEndBlock) *Request {
}
}
func ToRequestListSnapshots(req RequestListSnapshots) *Request {
return &Request{
Value: &Request_ListSnapshots{&req},
}
}
func ToRequestOfferSnapshot(req RequestOfferSnapshot) *Request {
return &Request{
Value: &Request_OfferSnapshot{&req},
}
}
func ToRequestLoadSnapshotChunk(req RequestLoadSnapshotChunk) *Request {
return &Request{
Value: &Request_LoadSnapshotChunk{&req},
}
}
func ToRequestApplySnapshotChunk(req RequestApplySnapshotChunk) *Request {
return &Request{
Value: &Request_ApplySnapshotChunk{&req},
}
}
//----------------------------------------
func ToResponseException(errStr string) *Response {
@@ -208,3 +232,27 @@ func ToResponseEndBlock(res ResponseEndBlock) *Response {
Value: &Response_EndBlock{&res},
}
}
func ToResponseListSnapshots(res ResponseListSnapshots) *Response {
return &Response{
Value: &Response_ListSnapshots{&res},
}
}
func ToResponseOfferSnapshot(res ResponseOfferSnapshot) *Response {
return &Response{
Value: &Response_OfferSnapshot{&res},
}
}
func ToResponseLoadSnapshotChunk(res ResponseLoadSnapshotChunk) *Response {
return &Response{
Value: &Response_LoadSnapshotChunk{&res},
}
}
func ToResponseApplySnapshotChunk(res ResponseApplySnapshotChunk) *Response {
return &Response{
Value: &Response_ApplySnapshotChunk{&res},
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -27,17 +27,21 @@ option (gogoproto.testgen_all) = true;
message Request {
oneof value {
RequestEcho echo = 2;
RequestFlush flush = 3;
RequestInfo info = 4;
RequestSetOption set_option = 5;
RequestInitChain init_chain = 6;
RequestQuery query = 7;
RequestBeginBlock begin_block = 8;
RequestCheckTx check_tx = 9;
RequestDeliverTx deliver_tx = 19;
RequestEndBlock end_block = 11;
RequestCommit commit = 12;
RequestEcho echo = 2;
RequestFlush flush = 3;
RequestInfo info = 4;
RequestSetOption set_option = 5;
RequestInitChain init_chain = 6;
RequestQuery query = 7;
RequestBeginBlock begin_block = 8;
RequestCheckTx check_tx = 9;
RequestDeliverTx deliver_tx = 19;
RequestEndBlock end_block = 11;
RequestCommit commit = 12;
RequestListSnapshots list_snapshots = 13;
RequestOfferSnapshot offer_snapshot = 14;
RequestLoadSnapshotChunk load_snapshot_chunk = 15;
RequestApplySnapshotChunk apply_snapshot_chunk = 16;
}
}
@@ -101,23 +105,51 @@ message RequestEndBlock {
message RequestCommit {}
// lists available snapshots
message RequestListSnapshots {
}
// offers a snapshot to the application
message RequestOfferSnapshot {
Snapshot snapshot = 1; // snapshot offered by peers
bytes app_hash = 2; // light client-verified app hash for snapshot height
}
// loads a snapshot chunk
message RequestLoadSnapshotChunk {
uint64 height = 1;
uint32 format = 2;
uint32 chunk = 3;
}
// Applies a snapshot chunk
message RequestApplySnapshotChunk {
uint32 index = 1;
bytes chunk = 2;
string sender = 3;
}
//----------------------------------------
// Response types
message Response {
oneof value {
ResponseException exception = 1;
ResponseEcho echo = 2;
ResponseFlush flush = 3;
ResponseInfo info = 4;
ResponseSetOption set_option = 5;
ResponseInitChain init_chain = 6;
ResponseQuery query = 7;
ResponseBeginBlock begin_block = 8;
ResponseCheckTx check_tx = 9;
ResponseDeliverTx deliver_tx = 10;
ResponseEndBlock end_block = 11;
ResponseCommit commit = 12;
ResponseException exception = 1;
ResponseEcho echo = 2;
ResponseFlush flush = 3;
ResponseInfo info = 4;
ResponseSetOption set_option = 5;
ResponseInitChain init_chain = 6;
ResponseQuery query = 7;
ResponseBeginBlock begin_block = 8;
ResponseCheckTx check_tx = 9;
ResponseDeliverTx deliver_tx = 10;
ResponseEndBlock end_block = 11;
ResponseCommit commit = 12;
ResponseListSnapshots list_snapshots = 13;
ResponseOfferSnapshot offer_snapshot = 14;
ResponseLoadSnapshotChunk load_snapshot_chunk = 15;
ResponseApplySnapshotChunk apply_snapshot_chunk = 16;
}
}
@@ -210,6 +242,40 @@ message ResponseCommit {
int64 retain_height = 3;
}
message ResponseListSnapshots {
repeated Snapshot snapshots = 1;
}
message ResponseOfferSnapshot {
Result result = 1;
enum Result {
accept = 0; // Snapshot accepted, apply chunks
abort = 1; // Abort all snapshot restoration
reject = 2; // Reject this specific snapshot, and try a different one
reject_format = 3; // Reject all snapshots of this format, and try a different one
reject_sender = 4; // Reject all snapshots from the sender(s), and try a different one
}
}
message ResponseLoadSnapshotChunk {
bytes chunk = 1;
}
message ResponseApplySnapshotChunk {
Result result = 1;
repeated uint32 refetch_chunks = 2; // Chunks to refetch and reapply (regardless of result)
repeated string reject_senders = 3; // Chunk senders to reject and ban (regardless of result)
enum Result {
accept = 0; // Chunk successfully accepted
abort = 1; // Abort all snapshot restoration
retry = 2; // Retry chunk, combine with refetch and reject as appropriate
retry_snapshot = 3; // Retry snapshot, combine with refetch and reject as appropriate
reject_snapshot = 4; // Reject this snapshot, try a different one but keep sender rejections
}
}
//----------------------------------------
// Misc.
@@ -336,6 +402,17 @@ message Evidence {
int64 total_voting_power = 5;
}
//----------------------------------------
// State Sync Types
message Snapshot {
uint64 height = 1; // The height at which the snapshot was taken
uint32 format = 2; // The application-specific snapshot format
uint32 chunks = 3; // Number of chunks in the snapshot
bytes hash = 4; // Arbitrary snapshot hash - should be equal only for identical snapshots
bytes metadata = 5; // Arbitrary application metadata
}
//----------------------------------------
// Service Definition
@@ -351,4 +428,8 @@ service ABCIApplication {
rpc InitChain(RequestInitChain) returns (ResponseInitChain);
rpc BeginBlock(RequestBeginBlock) returns (ResponseBeginBlock);
rpc EndBlock(RequestEndBlock) returns (ResponseEndBlock);
rpc ListSnapshots(RequestListSnapshots) returns (ResponseListSnapshots);
rpc OfferSnapshot(RequestOfferSnapshot) returns (ResponseOfferSnapshot);
rpc LoadSnapshotChunk(RequestLoadSnapshotChunk) returns (ResponseLoadSnapshotChunk);
rpc ApplySnapshotChunk(RequestApplySnapshotChunk) returns (ResponseApplySnapshotChunk);
}

File diff suppressed because it is too large Load Diff

View File

@@ -41,7 +41,7 @@ const (
type consensusReactor interface {
// for when we switch from blockchain reactor and fast sync to
// the consensus machine
SwitchToConsensus(sm.State, uint64)
SwitchToConsensus(state sm.State, skipWAL bool)
}
type peerError struct {
@@ -115,11 +115,25 @@ func (bcR *BlockchainReactor) OnStart() error {
if err != nil {
return err
}
go bcR.poolRoutine()
go bcR.poolRoutine(false)
}
return nil
}
// SwitchToFastSync is called by the state sync reactor when switching to fast sync.
func (bcR *BlockchainReactor) SwitchToFastSync(state sm.State) error {
bcR.fastSync = true
bcR.initialState = state
bcR.pool.height = state.LastBlockHeight + 1
err := bcR.pool.Start()
if err != nil {
return err
}
go bcR.poolRoutine(true)
return nil
}
// OnStop implements service.Service.
func (bcR *BlockchainReactor) OnStop() {
bcR.pool.Stop()
@@ -213,7 +227,7 @@ func (bcR *BlockchainReactor) Receive(chID byte, src p2p.Peer, msgBytes []byte)
// Handle messages from the poolReactor telling the reactor what to do.
// NOTE: Don't sleep in the FOR_LOOP or otherwise slow it down!
func (bcR *BlockchainReactor) poolRoutine() {
func (bcR *BlockchainReactor) poolRoutine(stateSynced bool) {
trySyncTicker := time.NewTicker(trySyncIntervalMS * time.Millisecond)
statusUpdateTicker := time.NewTicker(statusUpdateIntervalSeconds * time.Second)
@@ -273,7 +287,7 @@ FOR_LOOP:
bcR.pool.Stop()
conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor)
if ok {
conR.SwitchToConsensus(state, blocksSynced)
conR.SwitchToConsensus(state, blocksSynced > 0 || stateSynced)
}
// else {
// should only happen during testing

View File

@@ -44,7 +44,7 @@ var (
type consensusReactor interface {
// for when we switch from blockchain reactor and fast sync to
// the consensus machine
SwitchToConsensus(sm.State, uint64)
SwitchToConsensus(state sm.State, skipWAL bool)
}
// BlockchainReactor handles long-term catchup syncing.
@@ -57,7 +57,8 @@ type BlockchainReactor struct {
blockExec *sm.BlockExecutor
store *store.BlockStore
fastSync bool
fastSync bool
stateSynced bool
fsm *BcReactorFSM
blocksSynced uint64
@@ -154,6 +155,19 @@ func (bcR *BlockchainReactor) OnStop() {
_ = bcR.Stop()
}
// SwitchToFastSync is called by the state sync reactor when switching to fast sync.
func (bcR *BlockchainReactor) SwitchToFastSync(state sm.State) error {
bcR.fastSync = true
bcR.initialState = state
bcR.state = state
bcR.stateSynced = true
bcR.fsm = NewFSM(state.LastBlockHeight+1, bcR)
bcR.fsm.SetLogger(bcR.Logger)
go bcR.poolRoutine()
return nil
}
// GetChannels implements Reactor
func (bcR *BlockchainReactor) GetChannels() []*p2p.ChannelDescriptor {
return []*p2p.ChannelDescriptor{
@@ -474,7 +488,7 @@ func (bcR *BlockchainReactor) sendBlockRequest(peerID p2p.ID, height int64) erro
func (bcR *BlockchainReactor) switchToConsensus() {
conR, ok := bcR.Switch.Reactor("CONSENSUS").(consensusReactor)
if ok {
conR.SwitchToConsensus(bcR.state, bcR.blocksSynced)
conR.SwitchToConsensus(bcR.state, bcR.blocksSynced > 0 || bcR.stateSynced)
bcR.eventsFromFSMCh <- bcFsmMessage{event: syncFinishedEv}
}
// else {

View File

@@ -167,7 +167,7 @@ type consensusReactorTest struct {
mtx sync.Mutex
}
func (conR *consensusReactorTest) SwitchToConsensus(state sm.State, blocksSynced uint64) {
func (conR *consensusReactorTest) SwitchToConsensus(state sm.State, blocksSynced bool) {
conR.mtx.Lock()
defer conR.mtx.Unlock()
conR.switchedToConsensus = true

View File

@@ -16,7 +16,7 @@ type iIO interface {
broadcastStatusRequest(base int64, height int64)
trySwitchToConsensus(state state.State, blocksSynced int)
trySwitchToConsensus(state state.State, skipWAL bool)
}
type switchIO struct {
@@ -37,7 +37,7 @@ const (
type consensusReactor interface {
// for when we switch from blockchain reactor and fast sync to
// the consensus machine
SwitchToConsensus(state.State, int)
SwitchToConsensus(state state.State, skipWAL bool)
}
func (sio *switchIO) sendBlockRequest(peerID p2p.ID, height int64) error {
@@ -97,10 +97,10 @@ func (sio *switchIO) sendBlockNotFound(height int64, peerID p2p.ID) error {
return nil
}
func (sio *switchIO) trySwitchToConsensus(state state.State, blocksSynced int) {
func (sio *switchIO) trySwitchToConsensus(state state.State, skipWAL bool) {
conR, ok := sio.sw.Reactor("CONSENSUS").(consensusReactor)
if ok {
conR.SwitchToConsensus(state, blocksSynced)
conR.SwitchToConsensus(state, skipWAL)
}
}

View File

@@ -129,8 +129,8 @@ type blockStore interface {
type BlockchainReactor struct {
p2p.BaseReactor
fastSync bool // if true, enable fast sync on start
events chan Event // XXX: Rename eventsFromPeers
stopDemux chan struct{}
scheduler *Routine
processor *Routine
logger log.Logger
@@ -157,7 +157,7 @@ type blockApplier interface {
// XXX: unify naming in this package around tmState
// XXX: V1 stores a copy of state as initialState, which is never mutated. Is that nessesary?
func newReactor(state state.State, store blockStore, reporter behaviour.Reporter,
blockApplier blockApplier, bufferSize int) *BlockchainReactor {
blockApplier blockApplier, bufferSize int, fastSync bool) *BlockchainReactor {
scheduler := newScheduler(state.LastBlockHeight, time.Now())
pContext := newProcessorContext(store, blockApplier, state)
// TODO: Fix naming to just newProcesssor
@@ -166,12 +166,12 @@ func newReactor(state state.State, store blockStore, reporter behaviour.Reporter
return &BlockchainReactor{
events: make(chan Event, bufferSize),
stopDemux: make(chan struct{}),
scheduler: newRoutine("scheduler", scheduler.handle, bufferSize),
processor: newRoutine("processor", processor.handle, bufferSize),
store: store,
reporter: reporter,
logger: log.NewNopLogger(),
fastSync: fastSync,
}
}
@@ -182,7 +182,7 @@ func NewBlockchainReactor(
store blockStore,
fastSync bool) *BlockchainReactor {
reporter := behaviour.NewMockReporter()
return newReactor(state, store, reporter, blockApplier, 1000)
return newReactor(state, store, reporter, blockApplier, 1000, fastSync)
}
// SetSwitch implements Reactor interface.
@@ -226,9 +226,11 @@ func (r *BlockchainReactor) SetLogger(logger log.Logger) {
// Start implements cmn.Service interface
func (r *BlockchainReactor) Start() error {
r.reporter = behaviour.NewSwitchReporter(r.BaseReactor.Switch)
go r.scheduler.start()
go r.processor.start()
go r.demux()
if r.fastSync {
go r.scheduler.start()
go r.processor.start()
go r.demux()
}
return nil
}
@@ -306,19 +308,29 @@ func (r *BlockchainReactor) demux() {
processBlockFreq = 20 * time.Millisecond
doProcessBlockCh = make(chan struct{}, 1)
doProcessBlockTk = time.NewTicker(processBlockFreq)
)
defer doProcessBlockTk.Stop()
var (
prunePeerFreq = 1 * time.Second
doPrunePeerCh = make(chan struct{}, 1)
doPrunePeerTk = time.NewTicker(prunePeerFreq)
)
defer doPrunePeerTk.Stop()
var (
scheduleFreq = 20 * time.Millisecond
doScheduleCh = make(chan struct{}, 1)
doScheduleTk = time.NewTicker(scheduleFreq)
)
defer doScheduleTk.Stop()
var (
statusFreq = 10 * time.Second
doStatusCh = make(chan struct{}, 1)
doStatusTk = time.NewTicker(statusFreq)
)
defer doStatusTk.Stop()
// XXX: Extract timers to make testing atemporal
for {
@@ -355,14 +367,20 @@ func (r *BlockchainReactor) demux() {
case <-doStatusCh:
r.io.broadcastStatusRequest(r.store.Base(), r.SyncHeight())
// Events from peers
case event := <-r.events:
// Events from peers. Closing the channel signals event loop termination.
case event, ok := <-r.events:
if !ok {
r.logger.Info("Stopping event processing")
return
}
switch event := event.(type) {
case bcStatusResponse:
r.setMaxPeerHeight(event.height)
r.scheduler.send(event)
case bcAddNewPeer, bcRemovePeer, bcBlockResponse, bcNoBlockResponse:
r.scheduler.send(event)
default:
r.logger.Error("Received unknown event", "event", fmt.Sprintf("%T", event))
}
// Incremental events form scheduler
@@ -378,6 +396,9 @@ func (r *BlockchainReactor) demux() {
case scFinishedEv:
r.processor.send(event)
r.scheduler.stop()
case noOpEvent:
default:
r.logger.Error("Received unknown scheduler event", "event", fmt.Sprintf("%T", event))
}
// Incremental events from processor
@@ -395,22 +416,30 @@ func (r *BlockchainReactor) demux() {
case pcBlockVerificationFailure:
r.scheduler.send(event)
case pcFinished:
r.io.trySwitchToConsensus(event.tmState, event.blocksSynced)
r.io.trySwitchToConsensus(event.tmState, event.blocksSynced > 0)
r.processor.stop()
case noOpEvent:
default:
r.logger.Error("Received unknown processor event", "event", fmt.Sprintf("%T", event))
}
// Terminal events from scheduler
// Terminal event from scheduler
case err := <-r.scheduler.final():
r.logger.Info(fmt.Sprintf("scheduler final %s", err))
// send the processor stop?
switch err {
case nil:
r.logger.Info("Scheduler stopped")
default:
r.logger.Error("Scheduler aborted with error", "err", err)
}
// Terminal event from processor
case event := <-r.processor.final():
r.logger.Info(fmt.Sprintf("processor final %s", event))
case <-r.stopDemux:
r.logger.Info("demuxing stopped")
return
case err := <-r.processor.final():
switch err {
case nil:
r.logger.Info("Processor stopped")
default:
r.logger.Error("Processor aborted with error", "err", err)
}
}
}
}
@@ -421,7 +450,6 @@ func (r *BlockchainReactor) Stop() error {
r.scheduler.stop()
r.processor.stop()
close(r.stopDemux)
close(r.events)
r.logger.Info("reactor stopped")

View File

@@ -118,7 +118,7 @@ func (sio *mockSwitchIo) sendBlockNotFound(height int64, peerID p2p.ID) error {
return nil
}
func (sio *mockSwitchIo) trySwitchToConsensus(state sm.State, blocksSynced int) {
func (sio *mockSwitchIo) trySwitchToConsensus(state sm.State, skipWAL bool) {
sio.mtx.Lock()
defer sio.mtx.Unlock()
sio.switchedToConsensus = true
@@ -157,7 +157,7 @@ func newTestReactor(p testReactorParams) *BlockchainReactor {
sm.SaveState(db, state)
}
r := newReactor(state, store, reporter, appl, p.bufferSize)
r := newReactor(state, store, reporter, appl, p.bufferSize, true)
logger := log.TestingLogger()
r.SetLogger(logger.With("module", "blockchain"))

View File

@@ -69,9 +69,11 @@ func (rt *Routine) start() {
for {
events, err := rt.queue.Get(1)
if err != nil {
rt.logger.Info(fmt.Sprintf("%s: stopping\n", rt.name))
rt.terminate(fmt.Errorf("stopped"))
if err == queue.ErrDisposed {
rt.terminate(nil)
return
} else if err != nil {
rt.terminate(err)
return
}
oEvent, err := rt.handle(events[0].(Event))
@@ -131,6 +133,7 @@ func (rt *Routine) final() chan error {
// XXX: Maybe get rid of this
func (rt *Routine) terminate(reason error) {
close(rt.out)
// We don't close the rt.out channel here, to avoid spinning on the closed channel
// in the event loop.
rt.fin <- reason
}

View File

@@ -1,13 +1,13 @@
package config
import (
"encoding/hex"
"errors"
"fmt"
"net/http"
"os"
"path/filepath"
"time"
"github.com/pkg/errors"
)
const (
@@ -65,6 +65,7 @@ type Config struct {
RPC *RPCConfig `mapstructure:"rpc"`
P2P *P2PConfig `mapstructure:"p2p"`
Mempool *MempoolConfig `mapstructure:"mempool"`
StateSync *StateSyncConfig `mapstructure:"statesync"`
FastSync *FastSyncConfig `mapstructure:"fastsync"`
Consensus *ConsensusConfig `mapstructure:"consensus"`
TxIndex *TxIndexConfig `mapstructure:"tx_index"`
@@ -78,6 +79,7 @@ func DefaultConfig() *Config {
RPC: DefaultRPCConfig(),
P2P: DefaultP2PConfig(),
Mempool: DefaultMempoolConfig(),
StateSync: DefaultStateSyncConfig(),
FastSync: DefaultFastSyncConfig(),
Consensus: DefaultConsensusConfig(),
TxIndex: DefaultTxIndexConfig(),
@@ -92,6 +94,7 @@ func TestConfig() *Config {
RPC: TestRPCConfig(),
P2P: TestP2PConfig(),
Mempool: TestMempoolConfig(),
StateSync: TestStateSyncConfig(),
FastSync: TestFastSyncConfig(),
Consensus: TestConsensusConfig(),
TxIndex: TestTxIndexConfig(),
@@ -116,24 +119,27 @@ func (cfg *Config) ValidateBasic() error {
return err
}
if err := cfg.RPC.ValidateBasic(); err != nil {
return errors.Wrap(err, "Error in [rpc] section")
return fmt.Errorf("error in [rpc] section: %w", err)
}
if err := cfg.P2P.ValidateBasic(); err != nil {
return errors.Wrap(err, "Error in [p2p] section")
return fmt.Errorf("error in [p2p] section: %w", err)
}
if err := cfg.Mempool.ValidateBasic(); err != nil {
return errors.Wrap(err, "Error in [mempool] section")
return fmt.Errorf("error in [mempool] section: %w", err)
}
if err := cfg.StateSync.ValidateBasic(); err != nil {
return fmt.Errorf("error in [statesync] section: %w", err)
}
if err := cfg.FastSync.ValidateBasic(); err != nil {
return errors.Wrap(err, "Error in [fastsync] section")
return fmt.Errorf("error in [fastsync] section: %w", err)
}
if err := cfg.Consensus.ValidateBasic(); err != nil {
return errors.Wrap(err, "Error in [consensus] section")
return fmt.Errorf("error in [consensus] section: %w", err)
}
return errors.Wrap(
cfg.Instrumentation.ValidateBasic(),
"Error in [instrumentation] section",
)
if err := cfg.Instrumentation.ValidateBasic(); err != nil {
return fmt.Errorf("error in [instrumentation] section: %w", err)
}
return nil
}
//-----------------------------------------------------------------------------
@@ -292,7 +298,7 @@ func DefaultLogLevel() string {
// DefaultPackageLogLevels returns a default log level setting so all packages
// log at "error", while the `state` and `main` packages log at "info"
func DefaultPackageLogLevels() string {
return fmt.Sprintf("main:info,state:info,*:%s", DefaultLogLevel())
return fmt.Sprintf("main:info,state:info,statesync:info,*:%s", DefaultLogLevel())
}
//-----------------------------------------------------------------------------
@@ -701,6 +707,69 @@ func (cfg *MempoolConfig) ValidateBasic() error {
return nil
}
//-----------------------------------------------------------------------------
// StateSyncConfig
// StateSyncConfig defines the configuration for the Tendermint state sync service
type StateSyncConfig struct {
Enable bool `mapstructure:"enable"`
TempDir string `mapstructure:"temp_dir"`
RPCServers []string `mapstructure:"rpc_servers"`
TrustPeriod time.Duration `mapstructure:"trust_period"`
TrustHeight int64 `mapstructure:"trust_height"`
TrustHash string `mapstructure:"trust_hash"`
}
func (cfg *StateSyncConfig) TrustHashBytes() []byte {
// validated in ValidateBasic, so we can safely panic here
bytes, err := hex.DecodeString(cfg.TrustHash)
if err != nil {
panic(err)
}
return bytes
}
// DefaultStateSyncConfig returns a default configuration for the state sync service
func DefaultStateSyncConfig() *StateSyncConfig {
return &StateSyncConfig{}
}
// TestFastSyncConfig returns a default configuration for the state sync service
func TestStateSyncConfig() *StateSyncConfig {
return DefaultStateSyncConfig()
}
// ValidateBasic performs basic validation.
func (cfg *StateSyncConfig) ValidateBasic() error {
if cfg.Enable {
if len(cfg.RPCServers) == 0 {
return errors.New("rpc_servers is required")
}
if len(cfg.RPCServers) < 2 {
return errors.New("at least two rpc_servers entries is required")
}
for _, server := range cfg.RPCServers {
if len(server) == 0 {
return errors.New("found empty rpc_servers entry")
}
}
if cfg.TrustPeriod <= 0 {
return errors.New("trusted_period is required")
}
if cfg.TrustHeight <= 0 {
return errors.New("trusted_height is required")
}
if len(cfg.TrustHash) == 0 {
return errors.New("trusted_hash is required")
}
_, err := hex.DecodeString(cfg.TrustHash)
if err != nil {
return fmt.Errorf("invalid trusted_hash: %w", err)
}
}
return nil
}
//-----------------------------------------------------------------------------
// FastSyncConfig

View File

@@ -6,6 +6,7 @@ import (
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestDefaultConfig(t *testing.T) {
@@ -122,6 +123,11 @@ func TestMempoolConfigValidateBasic(t *testing.T) {
}
}
func TestStateSyncConfigValidateBasic(t *testing.T) {
cfg := TestStateSyncConfig()
require.NoError(t, cfg.ValidateBasic())
}
func TestFastSyncConfigValidateBasic(t *testing.T) {
cfg := TestFastSyncConfig()
assert.NoError(t, cfg.ValidateBasic())

View File

@@ -315,6 +315,30 @@ cache_size = {{ .Mempool.CacheSize }}
# NOTE: the max size of a tx transmitted over the network is {max_tx_bytes} + {amino overhead}.
max_tx_bytes = {{ .Mempool.MaxTxBytes }}
##### state sync configuration options #####
[statesync]
# State sync rapidly bootstraps a new node by discovering, fetching, and restoring a state machine
# snapshot from peers instead of fetching and replaying historical blocks. Requires some peers in
# the network to take and serve state machine snapshots. State sync is not attempted if the node
# has any local state (LastBlockHeight > 0). The node will have a truncated block history,
# starting from the height of the snapshot.
enable = {{ .StateSync.Enable }}
# RPC servers (comma-separated) for light client verification of the synced state machine and
# retrieval of state data for node bootstrapping. Also needs a trusted height and corresponding
# header hash obtained from a trusted source, and a period during which validators can be trusted.
#
# For Cosmos SDK-based chains, trust_period should usually be about 2/3 of the unbonding time (~2
# weeks) during which they can be financially punished (slashed) for misbehavior.
rpc_servers = ""
trust_height = {{ .StateSync.TrustHeight }}
trust_hash = "{{ .StateSync.TrustHash }}"
trust_period = "{{ .StateSync.TrustPeriod }}"
# Temporary directory for state sync snapshot chunks, defaults to the OS tempdir (typically /tmp).
# Will create a new, randomly named directory within, and remove it when done.
temp_dir = "{{ .StateSync.TempDir }}"
##### fast sync configuration options #####
[fastsync]

View File

@@ -112,13 +112,13 @@ func TestByzantine(t *testing.T) {
// note these must be started before the byz
for i := 1; i < N; i++ {
cr := reactors[i].(*Reactor)
cr.SwitchToConsensus(cr.conS.GetState(), 0)
cr.SwitchToConsensus(cr.conS.GetState(), false)
}
// start the byzantine state machine
byzR := reactors[0].(*ByzantineReactor)
s := byzR.reactor.conS.GetState()
byzR.reactor.SwitchToConsensus(s, 0)
byzR.reactor.SwitchToConsensus(s, false)
// byz proposer sends one block to peers[0]
// and the other block to peers[1] and peers[2].
@@ -268,8 +268,8 @@ func (br *ByzantineReactor) AddPeer(peer p2p.Peer) {
peer.Set(types.PeerStateKey, peerState)
// Send our state to peer.
// If we're fast_syncing, broadcast a RoundStepMessage later upon SwitchToConsensus().
if !br.reactor.fastSync {
// If we're syncing, broadcast a RoundStepMessage later upon SwitchToConsensus().
if !br.reactor.waitSync {
br.reactor.sendNewRoundStepMessage(peer)
}
}

View File

@@ -55,6 +55,8 @@ type Metrics struct {
CommittedHeight metrics.Gauge
// Whether or not a node is fast syncing. 1 if yes, 0 if no.
FastSyncing metrics.Gauge
// Whether or not a node is state syncing. 1 if yes, 0 if no.
StateSyncing metrics.Gauge
// Number of blockparts transmitted by peer.
BlockParts metrics.Counter
@@ -174,6 +176,12 @@ func PrometheusMetrics(namespace string, labelsAndValues ...string) *Metrics {
Name: "fast_syncing",
Help: "Whether or not a node is fast syncing. 1 if yes, 0 if no.",
}, labels).With(labelsAndValues...),
StateSyncing: prometheus.NewGaugeFrom(stdprometheus.GaugeOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
Name: "state_syncing",
Help: "Whether or not a node is state syncing. 1 if yes, 0 if no.",
}, labels).With(labelsAndValues...),
BlockParts: prometheus.NewCounterFrom(stdprometheus.CounterOpts{
Namespace: namespace,
Subsystem: MetricsSubsystem,
@@ -208,6 +216,7 @@ func NopMetrics() *Metrics {
TotalTxs: discard.NewGauge(),
CommittedHeight: discard.NewGauge(),
FastSyncing: discard.NewGauge(),
StateSyncing: discard.NewGauge(),
BlockParts: discard.NewCounter(),
}
}

View File

@@ -41,23 +41,22 @@ type Reactor struct {
conS *State
mtx sync.RWMutex
fastSync bool
waitSync bool
eventBus *types.EventBus
metrics *Metrics
Metrics *Metrics
}
type ReactorOption func(*Reactor)
// NewReactor returns a new Reactor with the given
// consensusState.
func NewReactor(consensusState *State, fastSync bool, options ...ReactorOption) *Reactor {
func NewReactor(consensusState *State, waitSync bool, options ...ReactorOption) *Reactor {
conR := &Reactor{
conS: consensusState,
fastSync: fastSync,
metrics: NopMetrics(),
waitSync: waitSync,
Metrics: NopMetrics(),
}
conR.updateFastSyncingMetric()
conR.BaseReactor = *p2p.NewBaseReactor("Consensus", conR)
for _, option := range options {
@@ -70,14 +69,14 @@ func NewReactor(consensusState *State, fastSync bool, options ...ReactorOption)
// OnStart implements BaseService by subscribing to events, which later will be
// broadcasted to other peers and starting state if we're not in fast sync.
func (conR *Reactor) OnStart() error {
conR.Logger.Info("Reactor ", "fastSync", conR.FastSync())
conR.Logger.Info("Reactor ", "waitSync", conR.WaitSync())
// start routine that computes peer statistics for evaluating peer quality
go conR.peerStatsRoutine()
conR.subscribeToBroadcastEvents()
if !conR.FastSync() {
if !conR.WaitSync() {
err := conR.conS.Start()
if err != nil {
return err
@@ -92,14 +91,14 @@ func (conR *Reactor) OnStart() error {
func (conR *Reactor) OnStop() {
conR.unsubscribeFromBroadcastEvents()
conR.conS.Stop()
if !conR.FastSync() {
if !conR.WaitSync() {
conR.conS.Wait()
}
}
// SwitchToConsensus switches from fast_sync mode to consensus mode.
// It resets the state, turns off fast_sync, and starts the consensus state-machine
func (conR *Reactor) SwitchToConsensus(state sm.State, blocksSynced uint64) {
func (conR *Reactor) SwitchToConsensus(state sm.State, skipWAL bool) {
conR.Logger.Info("SwitchToConsensus")
conR.conS.reconstructLastCommit(state)
// NOTE: The line below causes broadcastNewRoundStepRoutine() to
@@ -107,12 +106,12 @@ func (conR *Reactor) SwitchToConsensus(state sm.State, blocksSynced uint64) {
conR.conS.updateToState(state)
conR.mtx.Lock()
conR.fastSync = false
conR.waitSync = false
conR.mtx.Unlock()
conR.metrics.FastSyncing.Set(0)
conR.Metrics.FastSyncing.Set(0)
conR.Metrics.StateSyncing.Set(0)
if blocksSynced > 0 {
// dont bother with the WAL if we fast synced
if skipWAL {
conR.conS.doWALCatchup = false
}
err := conR.conS.Start()
@@ -187,7 +186,7 @@ func (conR *Reactor) AddPeer(peer p2p.Peer) {
// Send our state to peer.
// If we're fast_syncing, broadcast a RoundStepMessage later upon SwitchToConsensus().
if !conR.FastSync() {
if !conR.WaitSync() {
conR.sendNewRoundStepMessage(peer)
}
}
@@ -284,8 +283,8 @@ func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
}
case DataChannel:
if conR.FastSync() {
conR.Logger.Info("Ignoring message received during fastSync", "msg", msg)
if conR.WaitSync() {
conR.Logger.Info("Ignoring message received during sync", "msg", msg)
return
}
switch msg := msg.(type) {
@@ -296,15 +295,15 @@ func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
ps.ApplyProposalPOLMessage(msg)
case *BlockPartMessage:
ps.SetHasProposalBlockPart(msg.Height, msg.Round, msg.Part.Index)
conR.metrics.BlockParts.With("peer_id", string(src.ID())).Add(1)
conR.Metrics.BlockParts.With("peer_id", string(src.ID())).Add(1)
conR.conS.peerMsgQueue <- msgInfo{msg, src.ID()}
default:
conR.Logger.Error(fmt.Sprintf("Unknown message type %v", reflect.TypeOf(msg)))
}
case VoteChannel:
if conR.FastSync() {
conR.Logger.Info("Ignoring message received during fastSync", "msg", msg)
if conR.WaitSync() {
conR.Logger.Info("Ignoring message received during sync", "msg", msg)
return
}
switch msg := msg.(type) {
@@ -325,8 +324,8 @@ func (conR *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
}
case VoteSetBitsChannel:
if conR.FastSync() {
conR.Logger.Info("Ignoring message received during fastSync", "msg", msg)
if conR.WaitSync() {
conR.Logger.Info("Ignoring message received during sync", "msg", msg)
return
}
switch msg := msg.(type) {
@@ -366,11 +365,11 @@ func (conR *Reactor) SetEventBus(b *types.EventBus) {
conR.conS.SetEventBus(b)
}
// FastSync returns whether the consensus reactor is in fast-sync mode.
func (conR *Reactor) FastSync() bool {
// WaitSync returns whether the consensus reactor is waiting for state/fast sync.
func (conR *Reactor) WaitSync() bool {
conR.mtx.RLock()
defer conR.mtx.RUnlock()
return conR.fastSync
return conR.waitSync
}
//--------------------------------------
@@ -886,19 +885,9 @@ func (conR *Reactor) StringIndented(indent string) string {
return s
}
func (conR *Reactor) updateFastSyncingMetric() {
var fastSyncing float64
if conR.fastSync {
fastSyncing = 1
} else {
fastSyncing = 0
}
conR.metrics.FastSyncing.Set(fastSyncing)
}
// ReactorMetrics sets the metrics
func ReactorMetrics(metrics *Metrics) ReactorOption {
return func(conR *Reactor) { conR.metrics = metrics }
return func(conR *Reactor) { conR.Metrics = metrics }
}
//-----------------------------------------------------------------------------

View File

@@ -75,7 +75,7 @@ func startConsensusNet(t *testing.T, css []*State, n int) (
// TODO: is this still true with new pubsub?
for i := 0; i < n; i++ {
s := reactors[i].conS.GetState()
reactors[i].SwitchToConsensus(s, 0)
reactors[i].SwitchToConsensus(s, false)
}
return reactors, blocksSubs, eventBuses
}

View File

@@ -1,5 +1,7 @@
# ADR 053: State Sync Prototype
State sync is now [merged](https://github.com/tendermint/tendermint/pull/4705). Up-to-date ABCI documentation is [available](https://github.com/tendermint/spec/pull/90), refer to it rather than this ADR for details.
This ADR outlines the plan for an initial state sync prototype, and is subject to change as we gain feedback and experience. It builds on discussions and findings in [ADR-042](./adr-042-state-sync.md), see that for background information.
## Changelog
@@ -22,6 +24,8 @@ This ADR outlines the plan for an initial state sync prototype, and is subject t
* ABCI: chunks are now 0-indexed, for parity with `chunk_hashes` array.
* Reduced maximum chunk size to 16 MB, and increased snapshot message size to 4 MB.
* 2020-04-29: Update with final released ABCI interface (Erik Grinaker)
## Context
State sync will allow a new node to receive a snapshot of the application state without downloading blocks or going through consensus. This bootstraps the node significantly faster than the current fast sync system, which replays all historical blocks.
@@ -48,10 +52,11 @@ A node can have multiple snapshots taken at various heights. Snapshots can be ta
```proto
message Snapshot {
uint64 height = 1; // The height at which the snapshot was taken
uint32 format = 2; // The application-specific snapshot format
repeated bytes chunk_hashes = 3; // SHA-256 checksums of all chunks, in order
bytes metadata = 4; // Arbitrary application metadata
uint64 height = 1; // The height at which the snapshot was taken
uint32 format = 2; // The application-specific snapshot format
uint32 chunks = 3; // Number of chunks in the snapshot
bytes hash = 4; // Arbitrary snapshot hash - should be equal only for identical snapshots
bytes metadata = 5; // Arbitrary application metadata
}
```
@@ -64,50 +69,57 @@ Chunks are exchanged simply as `bytes`, and cannot be larger than 16 MB. `Snapsh
message RequestListSnapshots {}
message ResponseListSnapshots {
repeated Snapshot snapshots = 1;
repeated Snapshot snapshots = 1;
}
// Offers a snapshot to the application
message RequestOfferSnapshot {
Snapshot snapshot = 1;
bytes app_hash = 2;
}
Snapshot snapshot = 1; // snapshot offered by peers
bytes app_hash = 2; // light client-verified app hash for snapshot height
}
message ResponseOfferSnapshot {
bool accepted = 1;
Reason reason = 2;
Result result = 1;
enum Reason { // Reason why snapshot was rejected
unknown = 0; // Unknown or generic reason
invalid_height = 1; // Height is rejected: avoid this height
invalid_format = 2; // Format is rejected: avoid this format
}
enum Result {
accept = 0; // Snapshot accepted, apply chunks
abort = 1; // Abort all snapshot restoration
reject = 2; // Reject this specific snapshot, and try a different one
reject_format = 3; // Reject all snapshots of this format, and try a different one
reject_sender = 4; // Reject all snapshots from the sender(s), and try a different one
}
}
// Loads a snapshot chunk
message RequestLoadSnapshotChunk {
uint64 height = 1;
uint32 format = 2;
uint32 chunk = 3; // Zero-indexed
uint64 height = 1;
uint32 format = 2;
uint32 chunk = 3; // Zero-indexed
}
message ResponseLoadSnapshotChunk {
bytes chunk = 1;
bytes chunk = 1;
}
// Applies a snapshot chunk
message RequestApplySnapshotChunk {
bytes chunk = 1;
}
uint32 index = 1;
bytes chunk = 2;
string sender = 3;
}
message ResponseApplySnapshotChunk {
bool applied = 1;
Reason reason = 2; // Reason why chunk failed
Result result = 1;
repeated uint32 refetch_chunks = 2; // Chunks to refetch and reapply (regardless of result)
repeated string reject_senders = 3; // Chunk senders to reject and ban (regardless of result)
enum Reason { // Reason why chunk failed
unknown = 0; // Unknown or generic reason
verify_failed = 1; // Snapshot verification failed
}
enum Result {
accept = 0; // Chunk successfully accepted
abort = 1; // Abort all snapshot restoration
retry = 2; // Retry chunk, combine with refetch and reject as appropriate
retry_snapshot = 3; // Retry snapshot, combine with refetch and reject as appropriate
reject_snapshot = 4; // Reject this snapshot, try a different one but keep sender rejections
}
}
```
@@ -141,15 +153,15 @@ When starting an empty node with state sync and fast sync enabled, snapshots are
4. The node requests available snapshots via P2P from peers, via `RequestListSnapshots`. Peers will return the 10 most recent snapshots, one message per snapshot.
5. The node aggregates snapshots from multiple peers, ordered by height and format (in reverse). If there are `chunk_hashes` mismatches between different snapshots, the one hosted by the largest amount of peers is chosen. The node iterates over all snapshots in reverse order by height and format until it finds one that satisfies all of the following conditions:
5. The node aggregates snapshots from multiple peers, ordered by height and format (in reverse). If there are mismatches between different snapshots, the one hosted by the largest amount of peers is chosen. The node iterates over all snapshots in reverse order by height and format until it finds one that satisfies all of the following conditions:
* The snapshot height's block is considered trustworthy by the light client (i.e. snapshot height is greater than trusted header and within unbonding period of the latest trustworthy block).
* The snapshot's height or format hasn't been explicitly rejected by an earlier `RequestOfferSnapshot` call (via `invalid_height` or `invalid_format`).
* The snapshot's height or format hasn't been explicitly rejected by an earlier `RequestOfferSnapshot`.
* The application accepts the `RequestOfferSnapshot` call.
6. The node downloads chunks in parallel from multiple peers, via `RequestLoadSnapshotChunk`, and both the sender and receiver verifies their checksums. Chunk messages cannot exceed 16 MB.
6. The node downloads chunks in parallel from multiple peers, via `RequestLoadSnapshotChunk`. Chunk messages cannot exceed 16 MB.
7. The node passes chunks sequentially to the app via `RequestApplySnapshotChunk`.
@@ -191,38 +203,6 @@ Taking consistent snapshots of IAVL trees is greatly simplified by them being ve
Snapshots must also be garbage collected after some configurable time, e.g. by keeping the latest `n` snapshots.
## Experimental Prototype
An experimental but functional state sync prototype is available in the `erik/statesync-prototype` branches of the Tendermint, IAVL, Cosmos SDK, and Gaia repositories. To fetch the necessary branches:
```sh
$ mkdir statesync
$ cd statesync
$ git clone git@github.com:tendermint/tendermint -b erik/statesync-prototype
$ git clone git@github.com:tendermint/iavl -b erik/statesync-prototype
$ git clone git@github.com:cosmos/cosmos-sdk -b erik/statesync-prototype
$ git clone git@github.com:cosmos/gaia -b erik/statesync-prototype
```
To spin up three nodes of a four-node testnet:
```sh
$ cd gaia
$ ./tools/start.sh
```
Wait for the first snapshot to be taken at height 3, then (in a separate terminal) start the fourth node with state sync enabled:
```sh
$ ./tools/sync.sh
```
To stop the testnet, run:
```sh
$ ./tools/stop.sh
```
## Resolved Questions
* Is it OK for state-synced nodes to not have historical blocks nor historical IAVL versions?
@@ -265,46 +245,6 @@ $ ./tools/stop.sh
> No, just use a max message size.
## Implementation Plan
### Core Tasks
* **Tendermint:** light client P2P transport [#4456](https://github.com/tendermint/tendermint/issues/4456)
* **IAVL:** export/import API [#210](https://github.com/tendermint/iavl/issues/210)
* **Cosmos SDK:** snapshotting, scheduling, and pruning [#5689](https://github.com/cosmos/cosmos-sdk/issues/5689)
* **Tendermint:** support starting with a truncated block history
* **Tendermint:** state sync reactor and ABCI interface [#828](https://github.com/tendermint/tendermint/issues/828)
* **Cosmos SDK:** snapshot ABCI implementation [#5690](https://github.com/cosmos/cosmos-sdk/issues/5690)
### Nice-to-Haves
* **Tendermint:** staged reactor startup (state sync → fast sync → block replay → wal replay → consensus)
> Let's do a time-boxed prototype (a few days) and see how much work it will be.
* Notify P2P peers about channel changes [#4394](https://github.com/tendermint/tendermint/issues/4394)
* Check peers have certain channels [#1148](https://github.com/tendermint/tendermint/issues/1148)
* **Tendermint:** prune blockchain history [#3652](https://github.com/tendermint/tendermint/issues/3652)
* **Tendermint:** allow genesis to start from non-zero height [#2543](https://github.com/tendermint/tendermint/issues/2543)
### Follow-up Tasks
* **Tendermint:** light client verification for fast sync [#4457](https://github.com/tendermint/tendermint/issues/4457)
* **Tendermint:** allow start with only blockstore [#3713](https://github.com/tendermint/tendermint/issues/3713)
* **Tendermint:** node should go back to fast-syncing when lagging significantly [#129](https://github.com/tendermint/tendermint/issues/129)
* **Tendermint:** backfill historical blocks [#4629](https://github.com/tendermint/tendermint/issues/4629)
## Status
Accepted

2
go.mod
View File

@@ -17,7 +17,7 @@ require (
github.com/libp2p/go-buffer-pool v0.0.2
github.com/magiconair/properties v1.8.1
github.com/pkg/errors v0.9.1
github.com/prometheus/client_golang v1.5.1
github.com/prometheus/client_golang v1.6.0
github.com/rcrowley/go-metrics v0.0.0-20200313005456-10cdbea86bc0
github.com/rs/cors v1.7.0
github.com/snikch/goodman v0.0.0-20171125024755-10e37e294daa

13
go.sum
View File

@@ -325,8 +325,8 @@ github.com/prometheus/client_golang v0.9.3/go.mod h1:/TN21ttK/J9q6uSwhBd54HahCDf
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.3.0 h1:miYCvYqFXtl/J9FIy8eNpBfYthAEFg+Ys0XyUVEcDsc=
github.com/prometheus/client_golang v1.3.0/go.mod h1:hJaj2vgQTGQmVCsAACORcieXFeDPbaTKGT+JTgUa3og=
github.com/prometheus/client_golang v1.5.1 h1:bdHYieyGlH+6OLEk2YQha8THib30KP0/yD0YH9m6xcA=
github.com/prometheus/client_golang v1.5.1/go.mod h1:e9GMxYsXl05ICDXkRhurwBS4Q3OK1iX/F2sw+iXX5zU=
github.com/prometheus/client_golang v1.6.0 h1:YVPodQOcK15POxhgARIvnDRVpLcuK8mglnMrWfyrw6A=
github.com/prometheus/client_golang v1.6.0/go.mod h1:ZLOG9ck3JLRdB5MgO8f+lLTe83AXG6ro35rLTxvnIl4=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910 h1:idejC8f05m9MGOsuEi1ATq9shN03HrxNkD/luQvxCv8=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190115171406-56726106282f/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
@@ -355,6 +355,8 @@ github.com/prometheus/procfs v0.0.0-20190507164030-5867b95ac084/go.mod h1:TjEm7z
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.8 h1:+fpWZdT24pJBiqJdAwYBjPSk+5YmQzYNPYzQsdzLkt8=
github.com/prometheus/procfs v0.0.8/go.mod h1:7Qr8sr6344vo1JqZ6HhLceV9o3AJ1Ff+GxbHq6oeK9A=
github.com/prometheus/procfs v0.0.11 h1:DhHlBtkHWPYi8O2y31JkK0TF+DGM+51OopZjH/Ia5qI=
github.com/prometheus/procfs v0.0.11/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
github.com/prometheus/tsdb v0.7.1/go.mod h1:qhTCs0VvXwvX/y3TZrWD7rabWM+ijKTux40TwIPHuXU=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a h1:9ZKAASQSHhDYGoxY8uLVpewe1GDZ2vu2Tr/vTdVAkFQ=
github.com/rcrowley/go-metrics v0.0.0-20181016184325-3113b8401b8a/go.mod h1:bCqnVzQkZxMG4s8nGwiZ5l3QUCyqpo9Y+/ZMZ9VjZe4=
@@ -511,10 +513,11 @@ golang.org/x/sys v0.0.0-20190726091711-fc99dfbffb4e/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20190826190057-c7b8b68b1456/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191220142924-d4481acd189f h1:68K/z8GLUxV76xGSqwTWw2gyk/jwn79LUL43rES2g8o=
golang.org/x/sys v0.0.0-20191220142924-d4481acd189f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82 h1:ywK/j/KkyTHcdyYSZNXGjMwgmDSfjglYZ3vStQ/gSCU=
golang.org/x/sys v0.0.0-20200122134326-e047566fdf82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200106162015-b016eb3dc98e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd h1:xhmwyvizuTgC2qz7ZlMluP20uW+C3Rm0FD/WLDX8884=
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f h1:gWF768j/LaZugp8dyS4UwsslYCYz9XgFxvlgsn0n9H8=
golang.org/x/sys v0.0.0-20200420163511-1957bb5e6d1f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
@@ -563,8 +566,6 @@ google.golang.org/grpc v1.26.0 h1:2dTRdpdFEEhJYQD8EMLB61nnrzSCTbG38PhqdhvOltg=
google.golang.org/grpc v1.26.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk=
google.golang.org/grpc v1.28.0 h1:bO/TA4OxCOummhSf10siHuG7vJOiwh7SpRpFZDkOgl4=
google.golang.org/grpc v1.28.0/go.mod h1:rpkK4SK4GF4Ach/+MFLZUBavHOvF2JJB5uozKKal+60=
google.golang.org/grpc v1.29.0 h1:2pJjwYOdkZ9HlN4sWRYBg9ttH5bCOlsueaM+b/oYjwo=
google.golang.org/grpc v1.29.0/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
google.golang.org/grpc v1.29.1 h1:EC2SB8S04d2r73uptxphDSUG+kTKVgjRPF+N3xpxRB4=
google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk=
google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8=

View File

@@ -30,6 +30,7 @@ import (
"github.com/tendermint/tendermint/libs/log"
tmpubsub "github.com/tendermint/tendermint/libs/pubsub"
"github.com/tendermint/tendermint/libs/service"
lite "github.com/tendermint/tendermint/lite2"
mempl "github.com/tendermint/tendermint/mempool"
"github.com/tendermint/tendermint/p2p"
"github.com/tendermint/tendermint/p2p/pex"
@@ -43,6 +44,7 @@ import (
"github.com/tendermint/tendermint/state/txindex"
"github.com/tendermint/tendermint/state/txindex/kv"
"github.com/tendermint/tendermint/state/txindex/null"
"github.com/tendermint/tendermint/statesync"
"github.com/tendermint/tendermint/store"
"github.com/tendermint/tendermint/types"
tmtime "github.com/tendermint/tendermint/types/time"
@@ -123,6 +125,12 @@ func DefaultMetricsProvider(config *cfg.InstrumentationConfig) MetricsProvider {
// Option sets a parameter for the node.
type Option func(*Node)
// Temporary interface for switching to fast sync, we should get rid of v0 and v1 reactors.
// See: https://github.com/tendermint/tendermint/issues/4595
type fastSyncReactor interface {
SwitchToFastSync(sm.State) error
}
// CustomReactors allows you to add custom reactors (name -> p2p.Reactor) to
// the node's Switch.
//
@@ -134,6 +142,7 @@ type Option func(*Node)
// - CONSENSUS
// - EVIDENCE
// - PEX
// - STATESYNC
func CustomReactors(reactors map[string]p2p.Reactor) Option {
return func(n *Node) {
for name, reactor := range reactors {
@@ -147,6 +156,15 @@ func CustomReactors(reactors map[string]p2p.Reactor) Option {
}
}
// StateProvider overrides the state provider used by state sync to retrieve trusted app hashes and
// build a State object for bootstrapping the node.
// WARNING: this interface is considered unstable and subject to change.
func StateProvider(stateProvider statesync.StateProvider) Option {
return func(n *Node) {
n.stateSyncProvider = stateProvider
}
}
//------------------------------------------------------------------------------
// Node is the highest level interface to a full Tendermint node.
@@ -168,21 +186,24 @@ type Node struct {
isListening bool
// services
eventBus *types.EventBus // pub/sub for services
stateDB dbm.DB
blockStore *store.BlockStore // store the blockchain to disk
bcReactor p2p.Reactor // for fast-syncing
mempoolReactor *mempl.Reactor // for gossipping transactions
mempool mempl.Mempool
consensusState *cs.State // latest consensus state
consensusReactor *cs.Reactor // for participating in the consensus
pexReactor *pex.Reactor // for exchanging peer addresses
evidencePool *evidence.Pool // tracking evidence
proxyApp proxy.AppConns // connection to the application
rpcListeners []net.Listener // rpc servers
txIndexer txindex.TxIndexer
indexerService *txindex.IndexerService
prometheusSrv *http.Server
eventBus *types.EventBus // pub/sub for services
stateDB dbm.DB
blockStore *store.BlockStore // store the blockchain to disk
bcReactor p2p.Reactor // for fast-syncing
mempoolReactor *mempl.Reactor // for gossipping transactions
mempool mempl.Mempool
stateSync bool // whether the node should state sync on startup
stateSyncReactor *statesync.Reactor // for hosting and restoring state sync snapshots
stateSyncProvider statesync.StateProvider // provides state data for bootstrapping a node
consensusState *cs.State // latest consensus state
consensusReactor *cs.Reactor // for participating in the consensus
pexReactor *pex.Reactor // for exchanging peer addresses
evidencePool *evidence.Pool // tracking evidence
proxyApp proxy.AppConns // connection to the application
rpcListeners []net.Listener // rpc servers
txIndexer txindex.TxIndexer
indexerService *txindex.IndexerService
prometheusSrv *http.Server
}
func initDBs(config *cfg.Config, dbProvider DBProvider) (blockStore *store.BlockStore, stateDB dbm.DB, err error) {
@@ -369,7 +390,7 @@ func createConsensusReactor(config *cfg.Config,
evidencePool *evidence.Pool,
privValidator types.PrivValidator,
csMetrics *cs.Metrics,
fastSync bool,
waitSync bool,
eventBus *types.EventBus,
consensusLogger log.Logger) (*consensus.Reactor, *consensus.State) {
@@ -386,7 +407,7 @@ func createConsensusReactor(config *cfg.Config,
if privValidator != nil {
consensusState.SetPrivValidator(privValidator)
}
consensusReactor := cs.NewReactor(consensusState, fastSync, cs.ReactorMetrics(csMetrics))
consensusReactor := cs.NewReactor(consensusState, waitSync, cs.ReactorMetrics(csMetrics))
consensusReactor.SetLogger(consensusLogger)
// services which will be publishing and/or subscribing for messages (events)
// consensusReactor will set it on consensusState and blockExecutor
@@ -469,6 +490,7 @@ func createSwitch(config *cfg.Config,
peerFilters []p2p.PeerFilterFunc,
mempoolReactor *mempl.Reactor,
bcReactor p2p.Reactor,
stateSyncReactor *statesync.Reactor,
consensusReactor *consensus.Reactor,
evidenceReactor *evidence.Reactor,
nodeInfo p2p.NodeInfo,
@@ -486,6 +508,7 @@ func createSwitch(config *cfg.Config,
sw.AddReactor("BLOCKCHAIN", bcReactor)
sw.AddReactor("CONSENSUS", consensusReactor)
sw.AddReactor("EVIDENCE", evidenceReactor)
sw.AddReactor("STATESYNC", stateSyncReactor)
sw.SetNodeInfo(nodeInfo)
sw.SetNodeKey(nodeKey)
@@ -542,6 +565,59 @@ func createPEXReactorAndAddToSwitch(addrBook pex.AddrBook, config *cfg.Config,
return pexReactor
}
// startStateSync starts an asynchronous state sync process, then switches to fast sync mode.
func startStateSync(ssR *statesync.Reactor, bcR fastSyncReactor, conR *consensus.Reactor,
stateProvider statesync.StateProvider, config *cfg.StateSyncConfig, fastSync bool,
stateDB dbm.DB, blockStore *store.BlockStore) error {
ssR.Logger.Info("Starting state sync")
state := sm.LoadState(stateDB)
if stateProvider == nil {
var err error
stateProvider, err = statesync.NewLightClientStateProvider(state.ChainID, state.Version,
config.RPCServers, lite.TrustOptions{
Period: config.TrustPeriod,
Height: config.TrustHeight,
Hash: config.TrustHashBytes(),
}, ssR.Logger.With("module", "lite"))
if err != nil {
return fmt.Errorf("failed to set up light client state provider: %w", err)
}
}
go func() {
state, commit, err := ssR.Sync(stateProvider)
if err != nil {
ssR.Logger.Error("State sync failed", "err", err)
return
}
err = sm.BootstrapState(stateDB, state)
if err != nil {
ssR.Logger.Error("Failed to bootstrap node with new state", "err", err)
return
}
err = blockStore.SaveSeenCommit(state.LastBlockHeight, commit)
if err != nil {
ssR.Logger.Error("Failed to store last seen commit", "err", err)
return
}
if fastSync {
// FIXME Very ugly to have these metrics bleed through here.
conR.Metrics.StateSyncing.Set(0)
conR.Metrics.FastSyncing.Set(1)
err = bcR.SwitchToFastSync(state)
if err != nil {
ssR.Logger.Error("Failed to switch to fast sync", "err", err)
return
}
} else {
conR.SwitchToConsensus(state, true)
}
}()
return nil
}
// NewNode returns a new, ready to go, Tendermint Node.
func NewNode(config *cfg.Config,
privValidator types.PrivValidator,
@@ -584,18 +660,6 @@ func NewNode(config *cfg.Config,
return nil, err
}
// Create the handshaker, which calls RequestInfo, sets the AppVersion on the state,
// and replays any blocks as necessary to sync tendermint with the app.
consensusLogger := logger.With("module", "consensus")
if err := doHandshake(stateDB, state, blockStore, genDoc, eventBus, proxyApp, consensusLogger); err != nil {
return nil, err
}
// Reload the state. It will have the Version.Consensus.App set by the
// Handshake, and may have other modifications as well (ie. depending on
// what happened during block replay).
state = sm.LoadState(stateDB)
// If an address is provided, listen on the socket for a connection from an
// external signing process.
if config.PrivValidatorListenAddr != "" {
@@ -611,11 +675,34 @@ func NewNode(config *cfg.Config,
return nil, errors.Wrap(err, "can't get pubkey")
}
logNodeStartupInfo(state, pubKey, logger, consensusLogger)
// Decide whether to fast-sync or not
// Determine whether we should do state and/or fast sync.
// We don't fast-sync when the only validator is us.
fastSync := config.FastSyncMode && !onlyValidatorIsUs(state, pubKey)
stateSync := config.StateSync.Enable && !onlyValidatorIsUs(state, pubKey)
if stateSync && state.LastBlockHeight > 0 {
logger.Info("Found local state with non-zero height, skipping state sync")
stateSync = false
}
// Don't check fastSync == true, since the v2 reactor has a bug where it fast syncs regardless.
if stateSync && config.FastSync.Version == "v2" {
return nil, errors.New("state sync is not supported with blockchain v2 reactor")
}
// Create the handshaker, which calls RequestInfo, sets the AppVersion on the state,
// and replays any blocks as necessary to sync tendermint with the app.
consensusLogger := logger.With("module", "consensus")
if !stateSync {
if err := doHandshake(stateDB, state, blockStore, genDoc, eventBus, proxyApp, consensusLogger); err != nil {
return nil, err
}
// Reload the state. It will have the Version.Consensus.App set by the
// Handshake, and may have other modifications as well (ie. depending on
// what happened during block replay).
state = sm.LoadState(stateDB)
}
logNodeStartupInfo(state, pubKey, logger, consensusLogger)
csMetrics, p2pMetrics, memplMetrics, smMetrics := metricsProvider(genDoc.ChainID)
@@ -638,18 +725,32 @@ func NewNode(config *cfg.Config,
sm.BlockExecutorWithMetrics(smMetrics),
)
// Make BlockchainReactor
bcReactor, err := createBlockchainReactor(config, state, blockExec, blockStore, fastSync, logger)
// Make BlockchainReactor. Don't start fast sync if we're doing a state sync first.
bcReactor, err := createBlockchainReactor(config, state, blockExec, blockStore, fastSync && !stateSync, logger)
if err != nil {
return nil, errors.Wrap(err, "could not create blockchain reactor")
}
// Make ConsensusReactor
// Make ConsensusReactor. Don't enable fully if doing a state sync and/or fast sync first.
// FIXME We need to update metrics here, since other reactors don't have access to them.
if stateSync {
csMetrics.StateSyncing.Set(1)
} else if fastSync {
csMetrics.FastSyncing.Set(1)
}
consensusReactor, consensusState := createConsensusReactor(
config, state, blockExec, blockStore, mempool, evidencePool,
privValidator, csMetrics, fastSync, eventBus, consensusLogger,
privValidator, csMetrics, stateSync || fastSync, eventBus, consensusLogger,
)
// Set up state sync reactor, and schedule a sync if requested.
// FIXME The way we do phased startups (e.g. replay -> fast sync -> consensus) is very messy,
// we should clean this whole thing up. See:
// https://github.com/tendermint/tendermint/issues/4644
stateSyncReactor := statesync.NewReactor(proxyApp.Snapshot(), proxyApp.Query(),
config.StateSync.TempDir)
stateSyncReactor.SetLogger(logger.With("module", "statesync"))
nodeInfo, err := makeNodeInfo(config, nodeKey, txIndexer, genDoc, state)
if err != nil {
return nil, err
@@ -662,7 +763,7 @@ func NewNode(config *cfg.Config,
p2pLogger := logger.With("module", "p2p")
sw := createSwitch(
config, transport, p2pMetrics, peerFilters, mempoolReactor, bcReactor,
consensusReactor, evidenceReactor, nodeInfo, nodeKey, p2pLogger,
stateSyncReactor, consensusReactor, evidenceReactor, nodeInfo, nodeKey, p2pLogger,
)
err = sw.AddPersistentPeers(splitAndTrimEmpty(config.P2P.PersistentPeers, ",", " "))
@@ -721,6 +822,8 @@ func NewNode(config *cfg.Config,
mempool: mempool,
consensusState: consensusState,
consensusReactor: consensusReactor,
stateSyncReactor: stateSyncReactor,
stateSync: stateSync,
pexReactor: pexReactor,
evidencePool: evidencePool,
proxyApp: proxyApp,
@@ -791,6 +894,19 @@ func (n *Node) OnStart() error {
return errors.Wrap(err, "could not dial peers from persistent_peers field")
}
// Run state sync
if n.stateSync {
bcR, ok := n.bcReactor.(fastSyncReactor)
if !ok {
return fmt.Errorf("this blockchain reactor does not support switching from state sync")
}
err := startStateSync(n.stateSyncReactor, bcR, n.consensusReactor, n.stateSyncProvider,
n.config.StateSync, n.config.FastSyncMode, n.stateDB, n.blockStore)
if err != nil {
return fmt.Errorf("failed to start state sync: %w", err)
}
}
return nil
}
@@ -1106,6 +1222,7 @@ func makeNodeInfo(
cs.StateChannel, cs.DataChannel, cs.VoteChannel, cs.VoteSetBitsChannel,
mempl.MempoolChannel,
evidence.EvidenceChannel,
statesync.SnapshotChannel, statesync.ChunkChannel,
},
Moniker: config.Moniker,
Other: p2p.DefaultNodeInfoOther{

331
p2p/mocks/peer.go Normal file
View File

@@ -0,0 +1,331 @@
// Code generated by mockery v1.0.0. DO NOT EDIT.
package mocks
import (
log "github.com/tendermint/tendermint/libs/log"
conn "github.com/tendermint/tendermint/p2p/conn"
mock "github.com/stretchr/testify/mock"
net "net"
p2p "github.com/tendermint/tendermint/p2p"
)
// Peer is an autogenerated mock type for the Peer type
type Peer struct {
mock.Mock
}
// CloseConn provides a mock function with given fields:
func (_m *Peer) CloseConn() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// FlushStop provides a mock function with given fields:
func (_m *Peer) FlushStop() {
_m.Called()
}
// Get provides a mock function with given fields: _a0
func (_m *Peer) Get(_a0 string) interface{} {
ret := _m.Called(_a0)
var r0 interface{}
if rf, ok := ret.Get(0).(func(string) interface{}); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(interface{})
}
}
return r0
}
// ID provides a mock function with given fields:
func (_m *Peer) ID() p2p.ID {
ret := _m.Called()
var r0 p2p.ID
if rf, ok := ret.Get(0).(func() p2p.ID); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(p2p.ID)
}
return r0
}
// IsOutbound provides a mock function with given fields:
func (_m *Peer) IsOutbound() bool {
ret := _m.Called()
var r0 bool
if rf, ok := ret.Get(0).(func() bool); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(bool)
}
return r0
}
// IsPersistent provides a mock function with given fields:
func (_m *Peer) IsPersistent() bool {
ret := _m.Called()
var r0 bool
if rf, ok := ret.Get(0).(func() bool); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(bool)
}
return r0
}
// IsRunning provides a mock function with given fields:
func (_m *Peer) IsRunning() bool {
ret := _m.Called()
var r0 bool
if rf, ok := ret.Get(0).(func() bool); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(bool)
}
return r0
}
// NodeInfo provides a mock function with given fields:
func (_m *Peer) NodeInfo() p2p.NodeInfo {
ret := _m.Called()
var r0 p2p.NodeInfo
if rf, ok := ret.Get(0).(func() p2p.NodeInfo); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(p2p.NodeInfo)
}
}
return r0
}
// OnReset provides a mock function with given fields:
func (_m *Peer) OnReset() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// OnStart provides a mock function with given fields:
func (_m *Peer) OnStart() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// OnStop provides a mock function with given fields:
func (_m *Peer) OnStop() {
_m.Called()
}
// Quit provides a mock function with given fields:
func (_m *Peer) Quit() <-chan struct{} {
ret := _m.Called()
var r0 <-chan struct{}
if rf, ok := ret.Get(0).(func() <-chan struct{}); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(<-chan struct{})
}
}
return r0
}
// RemoteAddr provides a mock function with given fields:
func (_m *Peer) RemoteAddr() net.Addr {
ret := _m.Called()
var r0 net.Addr
if rf, ok := ret.Get(0).(func() net.Addr); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(net.Addr)
}
}
return r0
}
// RemoteIP provides a mock function with given fields:
func (_m *Peer) RemoteIP() net.IP {
ret := _m.Called()
var r0 net.IP
if rf, ok := ret.Get(0).(func() net.IP); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(net.IP)
}
}
return r0
}
// Reset provides a mock function with given fields:
func (_m *Peer) Reset() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// Send provides a mock function with given fields: _a0, _a1
func (_m *Peer) Send(_a0 byte, _a1 []byte) bool {
ret := _m.Called(_a0, _a1)
var r0 bool
if rf, ok := ret.Get(0).(func(byte, []byte) bool); ok {
r0 = rf(_a0, _a1)
} else {
r0 = ret.Get(0).(bool)
}
return r0
}
// Set provides a mock function with given fields: _a0, _a1
func (_m *Peer) Set(_a0 string, _a1 interface{}) {
_m.Called(_a0, _a1)
}
// SetLogger provides a mock function with given fields: _a0
func (_m *Peer) SetLogger(_a0 log.Logger) {
_m.Called(_a0)
}
// SocketAddr provides a mock function with given fields:
func (_m *Peer) SocketAddr() *p2p.NetAddress {
ret := _m.Called()
var r0 *p2p.NetAddress
if rf, ok := ret.Get(0).(func() *p2p.NetAddress); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*p2p.NetAddress)
}
}
return r0
}
// Start provides a mock function with given fields:
func (_m *Peer) Start() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// Status provides a mock function with given fields:
func (_m *Peer) Status() conn.ConnectionStatus {
ret := _m.Called()
var r0 conn.ConnectionStatus
if rf, ok := ret.Get(0).(func() conn.ConnectionStatus); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(conn.ConnectionStatus)
}
return r0
}
// Stop provides a mock function with given fields:
func (_m *Peer) Stop() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// String provides a mock function with given fields:
func (_m *Peer) String() string {
ret := _m.Called()
var r0 string
if rf, ok := ret.Get(0).(func() string); ok {
r0 = rf()
} else {
r0 = ret.Get(0).(string)
}
return r0
}
// TrySend provides a mock function with given fields: _a0, _a1
func (_m *Peer) TrySend(_a0 byte, _a1 []byte) bool {
ret := _m.Called(_a0, _a1)
var r0 bool
if rf, ok := ret.Get(0).(func(byte, []byte) bool); ok {
r0 = rf(_a0, _a1)
} else {
r0 = ret.Get(0).(bool)
}
return r0
}

View File

@@ -12,6 +12,8 @@ import (
tmconn "github.com/tendermint/tendermint/p2p/conn"
)
//go:generate mockery -case underscore -name Peer
const metricsTickerDuration = 10 * time.Second
// Peer is an interface representing a peer connected on a reactor.

View File

@@ -5,6 +5,8 @@ import (
"github.com/tendermint/tendermint/abci/types"
)
//go:generate mockery -case underscore -name AppConnConsensus|AppConnMempool|AppConnQuery|AppConnSnapshot
//----------------------------------------------------------------------------------------
// Enforce which abci msgs can be sent on a connection at the type level
@@ -40,6 +42,15 @@ type AppConnQuery interface {
// SetOptionSync(key string, value string) (res types.Result)
}
type AppConnSnapshot interface {
Error() error
ListSnapshotsSync(types.RequestListSnapshots) (*types.ResponseListSnapshots, error)
OfferSnapshotSync(types.RequestOfferSnapshot) (*types.ResponseOfferSnapshot, error)
LoadSnapshotChunkSync(types.RequestLoadSnapshotChunk) (*types.ResponseLoadSnapshotChunk, error)
ApplySnapshotChunkSync(types.RequestApplySnapshotChunk) (*types.ResponseApplySnapshotChunk, error)
}
//-----------------------------------------------------------------------------------------
// Implements AppConnConsensus (subset of abcicli.Client)
@@ -142,3 +153,38 @@ func (app *appConnQuery) InfoSync(req types.RequestInfo) (*types.ResponseInfo, e
func (app *appConnQuery) QuerySync(reqQuery types.RequestQuery) (*types.ResponseQuery, error) {
return app.appConn.QuerySync(reqQuery)
}
//------------------------------------------------
// Implements AppConnSnapshot (subset of abcicli.Client)
type appConnSnapshot struct {
appConn abcicli.Client
}
func NewAppConnSnapshot(appConn abcicli.Client) AppConnSnapshot {
return &appConnSnapshot{
appConn: appConn,
}
}
func (app *appConnSnapshot) Error() error {
return app.appConn.Error()
}
func (app *appConnSnapshot) ListSnapshotsSync(req types.RequestListSnapshots) (*types.ResponseListSnapshots, error) {
return app.appConn.ListSnapshotsSync(req)
}
func (app *appConnSnapshot) OfferSnapshotSync(req types.RequestOfferSnapshot) (*types.ResponseOfferSnapshot, error) {
return app.appConn.OfferSnapshotSync(req)
}
func (app *appConnSnapshot) LoadSnapshotChunkSync(
req types.RequestLoadSnapshotChunk) (*types.ResponseLoadSnapshotChunk, error) {
return app.appConn.LoadSnapshotChunkSync(req)
}
func (app *appConnSnapshot) ApplySnapshotChunkSync(
req types.RequestApplySnapshotChunk) (*types.ResponseApplySnapshotChunk, error) {
return app.appConn.ApplySnapshotChunkSync(req)
}

View File

@@ -0,0 +1,142 @@
// Code generated by mockery v1.0.0. DO NOT EDIT.
package mocks
import (
mock "github.com/stretchr/testify/mock"
abcicli "github.com/tendermint/tendermint/abci/client"
types "github.com/tendermint/tendermint/abci/types"
)
// AppConnConsensus is an autogenerated mock type for the AppConnConsensus type
type AppConnConsensus struct {
mock.Mock
}
// BeginBlockSync provides a mock function with given fields: _a0
func (_m *AppConnConsensus) BeginBlockSync(_a0 types.RequestBeginBlock) (*types.ResponseBeginBlock, error) {
ret := _m.Called(_a0)
var r0 *types.ResponseBeginBlock
if rf, ok := ret.Get(0).(func(types.RequestBeginBlock) *types.ResponseBeginBlock); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseBeginBlock)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(types.RequestBeginBlock) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// CommitSync provides a mock function with given fields:
func (_m *AppConnConsensus) CommitSync() (*types.ResponseCommit, error) {
ret := _m.Called()
var r0 *types.ResponseCommit
if rf, ok := ret.Get(0).(func() *types.ResponseCommit); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseCommit)
}
}
var r1 error
if rf, ok := ret.Get(1).(func() error); ok {
r1 = rf()
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// DeliverTxAsync provides a mock function with given fields: _a0
func (_m *AppConnConsensus) DeliverTxAsync(_a0 types.RequestDeliverTx) *abcicli.ReqRes {
ret := _m.Called(_a0)
var r0 *abcicli.ReqRes
if rf, ok := ret.Get(0).(func(types.RequestDeliverTx) *abcicli.ReqRes); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*abcicli.ReqRes)
}
}
return r0
}
// EndBlockSync provides a mock function with given fields: _a0
func (_m *AppConnConsensus) EndBlockSync(_a0 types.RequestEndBlock) (*types.ResponseEndBlock, error) {
ret := _m.Called(_a0)
var r0 *types.ResponseEndBlock
if rf, ok := ret.Get(0).(func(types.RequestEndBlock) *types.ResponseEndBlock); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseEndBlock)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(types.RequestEndBlock) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// Error provides a mock function with given fields:
func (_m *AppConnConsensus) Error() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// InitChainSync provides a mock function with given fields: _a0
func (_m *AppConnConsensus) InitChainSync(_a0 types.RequestInitChain) (*types.ResponseInitChain, error) {
ret := _m.Called(_a0)
var r0 *types.ResponseInitChain
if rf, ok := ret.Get(0).(func(types.RequestInitChain) *types.ResponseInitChain); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseInitChain)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(types.RequestInitChain) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// SetResponseCallback provides a mock function with given fields: _a0
func (_m *AppConnConsensus) SetResponseCallback(_a0 abcicli.Callback) {
_m.Called(_a0)
}

View File

@@ -0,0 +1,80 @@
// Code generated by mockery v1.0.0. DO NOT EDIT.
package mocks
import (
mock "github.com/stretchr/testify/mock"
abcicli "github.com/tendermint/tendermint/abci/client"
types "github.com/tendermint/tendermint/abci/types"
)
// AppConnMempool is an autogenerated mock type for the AppConnMempool type
type AppConnMempool struct {
mock.Mock
}
// CheckTxAsync provides a mock function with given fields: _a0
func (_m *AppConnMempool) CheckTxAsync(_a0 types.RequestCheckTx) *abcicli.ReqRes {
ret := _m.Called(_a0)
var r0 *abcicli.ReqRes
if rf, ok := ret.Get(0).(func(types.RequestCheckTx) *abcicli.ReqRes); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*abcicli.ReqRes)
}
}
return r0
}
// Error provides a mock function with given fields:
func (_m *AppConnMempool) Error() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// FlushAsync provides a mock function with given fields:
func (_m *AppConnMempool) FlushAsync() *abcicli.ReqRes {
ret := _m.Called()
var r0 *abcicli.ReqRes
if rf, ok := ret.Get(0).(func() *abcicli.ReqRes); ok {
r0 = rf()
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*abcicli.ReqRes)
}
}
return r0
}
// FlushSync provides a mock function with given fields:
func (_m *AppConnMempool) FlushSync() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// SetResponseCallback provides a mock function with given fields: _a0
func (_m *AppConnMempool) SetResponseCallback(_a0 abcicli.Callback) {
_m.Called(_a0)
}

View File

@@ -0,0 +1,97 @@
// Code generated by mockery v1.0.0. DO NOT EDIT.
package mocks
import (
mock "github.com/stretchr/testify/mock"
types "github.com/tendermint/tendermint/abci/types"
)
// AppConnQuery is an autogenerated mock type for the AppConnQuery type
type AppConnQuery struct {
mock.Mock
}
// EchoSync provides a mock function with given fields: _a0
func (_m *AppConnQuery) EchoSync(_a0 string) (*types.ResponseEcho, error) {
ret := _m.Called(_a0)
var r0 *types.ResponseEcho
if rf, ok := ret.Get(0).(func(string) *types.ResponseEcho); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseEcho)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(string) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// Error provides a mock function with given fields:
func (_m *AppConnQuery) Error() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// InfoSync provides a mock function with given fields: _a0
func (_m *AppConnQuery) InfoSync(_a0 types.RequestInfo) (*types.ResponseInfo, error) {
ret := _m.Called(_a0)
var r0 *types.ResponseInfo
if rf, ok := ret.Get(0).(func(types.RequestInfo) *types.ResponseInfo); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseInfo)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(types.RequestInfo) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// QuerySync provides a mock function with given fields: _a0
func (_m *AppConnQuery) QuerySync(_a0 types.RequestQuery) (*types.ResponseQuery, error) {
ret := _m.Called(_a0)
var r0 *types.ResponseQuery
if rf, ok := ret.Get(0).(func(types.RequestQuery) *types.ResponseQuery); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseQuery)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(types.RequestQuery) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}

View File

@@ -0,0 +1,120 @@
// Code generated by mockery v1.0.0. DO NOT EDIT.
package mocks
import (
mock "github.com/stretchr/testify/mock"
types "github.com/tendermint/tendermint/abci/types"
)
// AppConnSnapshot is an autogenerated mock type for the AppConnSnapshot type
type AppConnSnapshot struct {
mock.Mock
}
// ApplySnapshotChunkSync provides a mock function with given fields: _a0
func (_m *AppConnSnapshot) ApplySnapshotChunkSync(_a0 types.RequestApplySnapshotChunk) (*types.ResponseApplySnapshotChunk, error) {
ret := _m.Called(_a0)
var r0 *types.ResponseApplySnapshotChunk
if rf, ok := ret.Get(0).(func(types.RequestApplySnapshotChunk) *types.ResponseApplySnapshotChunk); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseApplySnapshotChunk)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(types.RequestApplySnapshotChunk) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// Error provides a mock function with given fields:
func (_m *AppConnSnapshot) Error() error {
ret := _m.Called()
var r0 error
if rf, ok := ret.Get(0).(func() error); ok {
r0 = rf()
} else {
r0 = ret.Error(0)
}
return r0
}
// ListSnapshotsSync provides a mock function with given fields: _a0
func (_m *AppConnSnapshot) ListSnapshotsSync(_a0 types.RequestListSnapshots) (*types.ResponseListSnapshots, error) {
ret := _m.Called(_a0)
var r0 *types.ResponseListSnapshots
if rf, ok := ret.Get(0).(func(types.RequestListSnapshots) *types.ResponseListSnapshots); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseListSnapshots)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(types.RequestListSnapshots) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// LoadSnapshotChunkSync provides a mock function with given fields: _a0
func (_m *AppConnSnapshot) LoadSnapshotChunkSync(_a0 types.RequestLoadSnapshotChunk) (*types.ResponseLoadSnapshotChunk, error) {
ret := _m.Called(_a0)
var r0 *types.ResponseLoadSnapshotChunk
if rf, ok := ret.Get(0).(func(types.RequestLoadSnapshotChunk) *types.ResponseLoadSnapshotChunk); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseLoadSnapshotChunk)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(types.RequestLoadSnapshotChunk) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// OfferSnapshotSync provides a mock function with given fields: _a0
func (_m *AppConnSnapshot) OfferSnapshotSync(_a0 types.RequestOfferSnapshot) (*types.ResponseOfferSnapshot, error) {
ret := _m.Called(_a0)
var r0 *types.ResponseOfferSnapshot
if rf, ok := ret.Get(0).(func(types.RequestOfferSnapshot) *types.ResponseOfferSnapshot); ok {
r0 = rf(_a0)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.ResponseOfferSnapshot)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(types.RequestOfferSnapshot) error); ok {
r1 = rf(_a0)
} else {
r1 = ret.Error(1)
}
return r0, r1
}

View File

@@ -1,7 +1,7 @@
package proxy
import (
"github.com/pkg/errors"
"fmt"
"github.com/tendermint/tendermint/libs/service"
)
@@ -15,6 +15,7 @@ type AppConns interface {
Mempool() AppConnMempool
Consensus() AppConnConsensus
Query() AppConnQuery
Snapshot() AppConnSnapshot
}
func NewAppConns(clientCreator ClientCreator) AppConns {
@@ -33,6 +34,7 @@ type multiAppConn struct {
mempoolConn AppConnMempool
consensusConn AppConnConsensus
queryConn AppConnQuery
snapshotConn AppConnSnapshot
clientCreator ClientCreator
}
@@ -61,37 +63,53 @@ func (app *multiAppConn) Query() AppConnQuery {
return app.queryConn
}
// Returns the snapshot Connection
func (app *multiAppConn) Snapshot() AppConnSnapshot {
return app.snapshotConn
}
func (app *multiAppConn) OnStart() error {
// query connection
querycli, err := app.clientCreator.NewABCIClient()
if err != nil {
return errors.Wrap(err, "Error creating ABCI client (query connection)")
return fmt.Errorf("error creating ABCI client (query connection): %w", err)
}
querycli.SetLogger(app.Logger.With("module", "abci-client", "connection", "query"))
if err := querycli.Start(); err != nil {
return errors.Wrap(err, "Error starting ABCI client (query connection)")
return fmt.Errorf("error starting ABCI client (query connection): %w", err)
}
app.queryConn = NewAppConnQuery(querycli)
// snapshot connection
snapshotcli, err := app.clientCreator.NewABCIClient()
if err != nil {
return fmt.Errorf("error creating ABCI client (snapshot connection): %w", err)
}
snapshotcli.SetLogger(app.Logger.With("module", "abci-client", "connection", "snapshot"))
if err := snapshotcli.Start(); err != nil {
return fmt.Errorf("error starting ABCI client (snapshot connection): %w", err)
}
app.snapshotConn = NewAppConnSnapshot(snapshotcli)
// mempool connection
memcli, err := app.clientCreator.NewABCIClient()
if err != nil {
return errors.Wrap(err, "Error creating ABCI client (mempool connection)")
return fmt.Errorf("error creating ABCI client (mempool connection): %w", err)
}
memcli.SetLogger(app.Logger.With("module", "abci-client", "connection", "mempool"))
if err := memcli.Start(); err != nil {
return errors.Wrap(err, "Error starting ABCI client (mempool connection)")
return fmt.Errorf("error starting ABCI client (mempool connection): %w", err)
}
app.mempoolConn = NewAppConnMempool(memcli)
// consensus connection
concli, err := app.clientCreator.NewABCIClient()
if err != nil {
return errors.Wrap(err, "Error creating ABCI client (consensus connection)")
return fmt.Errorf("error creating ABCI client (consensus connection): %w", err)
}
concli.SetLogger(app.Logger.With("module", "abci-client", "connection", "consensus"))
if err := concli.Start(); err != nil {
return errors.Wrap(err, "Error starting ABCI client (consensus connection)")
return fmt.Errorf("error starting ABCI client (consensus connection): %w", err)
}
app.consensusConn = NewAppConnConsensus(concli)

View File

@@ -31,7 +31,7 @@ func Status(ctx *rpctypes.Context) (*ctypes.ResultStatus, error) {
}
var latestHeight int64
if consensusReactor.FastSync() {
if consensusReactor.WaitSync() {
latestHeight = blockStore.Height()
} else {
latestHeight = consensusState.GetLastHeight()
@@ -66,7 +66,7 @@ func Status(ctx *rpctypes.Context) (*ctypes.ResultStatus, error) {
EarliestAppHash: earliestAppHash,
EarliestBlockHeight: earliestBlockHeight,
EarliestBlockTime: time.Unix(0, earliestBlockTimeNano),
CatchingUp: consensusReactor.FastSync(),
CatchingUp: consensusReactor.WaitSync(),
},
ValidatorInfo: ctypes.ValidatorInfo{
Address: pubKey.Address(),

View File

@@ -117,6 +117,16 @@ func saveState(db dbm.DB, state State, key []byte) {
}
}
// BootstrapState saves a new state, used e.g. by state sync when starting from non-zero height.
func BootstrapState(db dbm.DB, state State) error {
height := state.LastBlockHeight
saveValidatorsInfo(db, height, height, state.LastValidators)
saveValidatorsInfo(db, height+1, height+1, state.Validators)
saveValidatorsInfo(db, height+2, height+2, state.NextValidators)
saveConsensusParamsInfo(db, height+1, height+1, state.ConsensusParams)
return db.SetSync(stateKey, state.Bytes())
}
//------------------------------------------------------------------------
// ABCIResponses retains the responses

321
statesync/chunks.go Normal file
View File

@@ -0,0 +1,321 @@
package statesync
import (
"errors"
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strconv"
"sync"
"time"
"github.com/tendermint/tendermint/p2p"
)
// errDone is returned by chunkQueue.Next() when all chunks have been returned.
var errDone = errors.New("chunk queue has completed")
// chunk contains data for a chunk.
type chunk struct {
Height uint64
Format uint32
Index uint32
Chunk []byte
Sender p2p.ID
}
// chunkQueue manages chunks for a state sync process, ordering them if requested. It acts as an
// iterator over all chunks, but callers can request chunks to be retried, optionally after
// refetching.
type chunkQueue struct {
sync.Mutex
snapshot *snapshot // if this is nil, the queue has been closed
dir string // temp dir for on-disk chunk storage
chunkFiles map[uint32]string // path to temporary chunk file
chunkSenders map[uint32]p2p.ID // the peer who sent the given chunk
chunkAllocated map[uint32]bool // chunks that have been allocated via Allocate()
chunkReturned map[uint32]bool // chunks returned via Next()
waiters map[uint32][]chan<- uint32 // signals WaitFor() waiters about chunk arrival
}
// newChunkQueue creates a new chunk queue for a snapshot, using a temp dir for storage.
// Callers must call Close() when done.
func newChunkQueue(snapshot *snapshot, tempDir string) (*chunkQueue, error) {
dir, err := ioutil.TempDir(tempDir, "tm-statesync")
if err != nil {
return nil, fmt.Errorf("unable to create temp dir for state sync chunks: %w", err)
}
if snapshot.Chunks == 0 {
return nil, errors.New("snapshot has no chunks")
}
return &chunkQueue{
snapshot: snapshot,
dir: dir,
chunkFiles: make(map[uint32]string, snapshot.Chunks),
chunkSenders: make(map[uint32]p2p.ID, snapshot.Chunks),
chunkAllocated: make(map[uint32]bool, snapshot.Chunks),
chunkReturned: make(map[uint32]bool, snapshot.Chunks),
waiters: make(map[uint32][]chan<- uint32),
}, nil
}
// Add adds a chunk to the queue. It ignores chunks that already exist, returning false.
func (q *chunkQueue) Add(chunk *chunk) (bool, error) {
if chunk == nil || chunk.Chunk == nil {
return false, errors.New("cannot add nil chunk")
}
q.Lock()
defer q.Unlock()
if q.snapshot == nil {
return false, nil // queue is closed
}
if chunk.Height != q.snapshot.Height {
return false, fmt.Errorf("invalid chunk height %v, expected %v", chunk.Height, q.snapshot.Height)
}
if chunk.Format != q.snapshot.Format {
return false, fmt.Errorf("invalid chunk format %v, expected %v", chunk.Format, q.snapshot.Format)
}
if chunk.Index >= q.snapshot.Chunks {
return false, fmt.Errorf("received unexpected chunk %v", chunk.Index)
}
if q.chunkFiles[chunk.Index] != "" {
return false, nil
}
path := filepath.Join(q.dir, strconv.FormatUint(uint64(chunk.Index), 10))
err := ioutil.WriteFile(path, chunk.Chunk, 0644)
if err != nil {
return false, fmt.Errorf("failed to save chunk %v to file %v: %w", chunk.Index, path, err)
}
q.chunkFiles[chunk.Index] = path
q.chunkSenders[chunk.Index] = chunk.Sender
// Signal any waiters that the chunk has arrived.
for _, waiter := range q.waiters[chunk.Index] {
waiter <- chunk.Index
close(waiter)
}
delete(q.waiters, chunk.Index)
return true, nil
}
// Allocate allocates a chunk to the caller, making it responsible for fetching it. Returns
// errDone once no chunks are left or the queue is closed.
func (q *chunkQueue) Allocate() (uint32, error) {
q.Lock()
defer q.Unlock()
if q.snapshot == nil {
return 0, errDone
}
if uint32(len(q.chunkAllocated)) >= q.snapshot.Chunks {
return 0, errDone
}
for i := uint32(0); i < q.snapshot.Chunks; i++ {
if !q.chunkAllocated[i] {
q.chunkAllocated[i] = true
return i, nil
}
}
return 0, errDone
}
// Close closes the chunk queue, cleaning up all temporary files.
func (q *chunkQueue) Close() error {
q.Lock()
defer q.Unlock()
if q.snapshot == nil {
return nil
}
for _, waiters := range q.waiters {
for _, waiter := range waiters {
close(waiter)
}
}
q.waiters = nil
q.snapshot = nil
err := os.RemoveAll(q.dir)
if err != nil {
return fmt.Errorf("failed to clean up state sync tempdir %v: %w", q.dir, err)
}
return nil
}
// Discard discards a chunk. It will be removed from the queue, available for allocation, and can
// be added and returned via Next() again. If the chunk is not already in the queue this does
// nothing, to avoid it being allocated to multiple fetchers.
func (q *chunkQueue) Discard(index uint32) error {
q.Lock()
defer q.Unlock()
return q.discard(index)
}
// discard discards a chunk, scheduling it for refetching. The caller must hold the mutex lock.
func (q *chunkQueue) discard(index uint32) error {
if q.snapshot == nil {
return nil
}
path := q.chunkFiles[index]
if path == "" {
return nil
}
err := os.Remove(path)
if err != nil {
return fmt.Errorf("failed to remove chunk %v: %w", index, err)
}
delete(q.chunkFiles, index)
delete(q.chunkReturned, index)
delete(q.chunkAllocated, index)
return nil
}
// DiscardSender discards all *unreturned* chunks from a given sender. If the caller wants to
// discard already returned chunks, this can be done via Discard().
func (q *chunkQueue) DiscardSender(peerID p2p.ID) error {
q.Lock()
defer q.Unlock()
for index, sender := range q.chunkSenders {
if sender == peerID && !q.chunkReturned[index] {
err := q.discard(index)
if err != nil {
return err
}
delete(q.chunkSenders, index)
}
}
return nil
}
// GetSender returns the sender of the chunk with the given index, or empty if not found.
func (q *chunkQueue) GetSender(index uint32) p2p.ID {
q.Lock()
defer q.Unlock()
return q.chunkSenders[index]
}
// Has checks whether a chunk exists in the queue.
func (q *chunkQueue) Has(index uint32) bool {
q.Lock()
defer q.Unlock()
return q.chunkFiles[index] != ""
}
// load loads a chunk from disk, or nil if the chunk is not in the queue. The caller must hold the
// mutex lock.
func (q *chunkQueue) load(index uint32) (*chunk, error) {
path, ok := q.chunkFiles[index]
if !ok {
return nil, nil
}
body, err := ioutil.ReadFile(path)
if err != nil {
return nil, fmt.Errorf("failed to load chunk %v: %w", index, err)
}
return &chunk{
Height: q.snapshot.Height,
Format: q.snapshot.Format,
Index: index,
Chunk: body,
Sender: q.chunkSenders[index],
}, nil
}
// Next returns the next chunk from the queue, or errDone if all chunks have been returned. It
// blocks until the chunk is available. Concurrent Next() calls may return the same chunk.
func (q *chunkQueue) Next() (*chunk, error) {
q.Lock()
var chunk *chunk
index, err := q.nextUp()
if err == nil {
chunk, err = q.load(index)
if err == nil {
q.chunkReturned[index] = true
}
}
q.Unlock()
if chunk != nil || err != nil {
return chunk, err
}
select {
case _, ok := <-q.WaitFor(index):
if !ok {
return nil, errDone // queue closed
}
case <-time.After(chunkTimeout):
return nil, errTimeout
}
q.Lock()
defer q.Unlock()
chunk, err = q.load(index)
if err != nil {
return nil, err
}
q.chunkReturned[index] = true
return chunk, nil
}
// nextUp returns the next chunk to be returned, or errDone if all chunks have been returned. The
// caller must hold the mutex lock.
func (q *chunkQueue) nextUp() (uint32, error) {
if q.snapshot == nil {
return 0, errDone
}
for i := uint32(0); i < q.snapshot.Chunks; i++ {
if !q.chunkReturned[i] {
return i, nil
}
}
return 0, errDone
}
// Retry schedules a chunk to be retried, without refetching it.
func (q *chunkQueue) Retry(index uint32) {
q.Lock()
defer q.Unlock()
delete(q.chunkReturned, index)
}
// RetryAll schedules all chunks to be retried, without refetching them.
func (q *chunkQueue) RetryAll() {
q.Lock()
defer q.Unlock()
q.chunkReturned = make(map[uint32]bool)
}
// Size returns the total number of chunks for the snapshot and queue, or 0 when closed.
func (q *chunkQueue) Size() uint32 {
q.Lock()
defer q.Unlock()
if q.snapshot == nil {
return 0
}
return q.snapshot.Chunks
}
// WaitFor returns a channel that receives a chunk index when it arrives in the queue, or
// immediately if it has already arrived. The channel is closed without a value if the queue is
// closed or if the chunk index is not valid.
func (q *chunkQueue) WaitFor(index uint32) <-chan uint32 {
q.Lock()
defer q.Unlock()
ch := make(chan uint32, 1)
switch {
case q.snapshot == nil:
close(ch)
case index >= q.snapshot.Chunks:
close(ch)
case q.chunkFiles[index] != "":
ch <- index
close(ch)
default:
if q.waiters[index] == nil {
q.waiters[index] = make([]chan<- uint32, 0)
}
q.waiters[index] = append(q.waiters[index], ch)
}
return ch
}

551
statesync/chunks_test.go Normal file
View File

@@ -0,0 +1,551 @@
package statesync
import (
"io/ioutil"
"os"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"github.com/tendermint/tendermint/p2p"
)
func setupChunkQueue(t *testing.T) (*chunkQueue, func()) {
snapshot := &snapshot{
Height: 3,
Format: 1,
Chunks: 5,
Hash: []byte{7},
Metadata: nil,
}
queue, err := newChunkQueue(snapshot, "")
require.NoError(t, err)
teardown := func() {
err := queue.Close()
require.NoError(t, err)
}
return queue, teardown
}
func TestNewChunkQueue_TempDir(t *testing.T) {
snapshot := &snapshot{
Height: 3,
Format: 1,
Chunks: 5,
Hash: []byte{7},
Metadata: nil,
}
dir, err := ioutil.TempDir("", "newchunkqueue")
require.NoError(t, err)
defer os.RemoveAll(dir)
queue, err := newChunkQueue(snapshot, dir)
require.NoError(t, err)
files, err := ioutil.ReadDir(dir)
require.NoError(t, err)
assert.Len(t, files, 1)
err = queue.Close()
require.NoError(t, err)
files, err = ioutil.ReadDir(dir)
require.NoError(t, err)
assert.Len(t, files, 0)
}
func TestChunkQueue(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
// Adding the first chunk should be fine
added, err := queue.Add(&chunk{Height: 3, Format: 1, Index: 0, Chunk: []byte{3, 1, 0}})
require.NoError(t, err)
assert.True(t, added)
// Adding the last chunk should also be fine
added, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 4, Chunk: []byte{3, 1, 4}})
require.NoError(t, err)
assert.True(t, added)
// Adding the first or last chunks again should return false
added, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 0, Chunk: []byte{3, 1, 0}})
require.NoError(t, err)
assert.False(t, added)
added, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 4, Chunk: []byte{3, 1, 4}})
require.NoError(t, err)
assert.False(t, added)
// Adding the remaining chunks in reverse should be fine
added, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 3, Chunk: []byte{3, 1, 3}})
require.NoError(t, err)
assert.True(t, added)
added, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 2, Chunk: []byte{3, 1, 2}})
require.NoError(t, err)
assert.True(t, added)
added, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 1, Chunk: []byte{3, 1, 1}})
require.NoError(t, err)
assert.True(t, added)
// At this point, we should be able to retrieve them all via Next
for i := 0; i < 5; i++ {
c, err := queue.Next()
require.NoError(t, err)
assert.Equal(t, &chunk{Height: 3, Format: 1, Index: uint32(i), Chunk: []byte{3, 1, byte(i)}}, c)
}
_, err = queue.Next()
require.Error(t, err)
assert.Equal(t, errDone, err)
// It should still be possible to try to add chunks (which will be ignored)
added, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 0, Chunk: []byte{3, 1, 0}})
require.NoError(t, err)
assert.False(t, added)
// After closing the queue it will also return false
err = queue.Close()
require.NoError(t, err)
added, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 0, Chunk: []byte{3, 1, 0}})
require.NoError(t, err)
assert.False(t, added)
// Closing the queue again should also be fine
err = queue.Close()
require.NoError(t, err)
}
func TestChunkQueue_Add_ChunkErrors(t *testing.T) {
testcases := map[string]struct {
chunk *chunk
}{
"nil chunk": {nil},
"nil body": {&chunk{Height: 3, Format: 1, Index: 0, Chunk: nil}},
"wrong height": {&chunk{Height: 9, Format: 1, Index: 0, Chunk: []byte{3, 1, 0}}},
"wrong format": {&chunk{Height: 3, Format: 9, Index: 0, Chunk: []byte{3, 1, 0}}},
"invalid index": {&chunk{Height: 3, Format: 1, Index: 5, Chunk: []byte{3, 1, 0}}},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
_, err := queue.Add(tc.chunk)
require.Error(t, err)
})
}
}
func TestChunkQueue_Allocate(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
for i := uint32(0); i < queue.Size(); i++ {
index, err := queue.Allocate()
require.NoError(t, err)
assert.EqualValues(t, i, index)
}
_, err := queue.Allocate()
require.Error(t, err)
assert.Equal(t, errDone, err)
for i := uint32(0); i < queue.Size(); i++ {
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: i, Chunk: []byte{byte(i)}})
require.NoError(t, err)
}
// After all chunks have been allocated and retrieved, discarding a chunk will reallocate it.
err = queue.Discard(2)
require.NoError(t, err)
index, err := queue.Allocate()
require.NoError(t, err)
assert.EqualValues(t, 2, index)
_, err = queue.Allocate()
require.Error(t, err)
assert.Equal(t, errDone, err)
// Discarding a chunk the closing the queue will return errDone.
err = queue.Discard(2)
require.NoError(t, err)
err = queue.Close()
require.NoError(t, err)
_, err = queue.Allocate()
require.Error(t, err)
assert.Equal(t, errDone, err)
}
func TestChunkQueue_Discard(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
// Add a few chunks to the queue and fetch a couple
_, err := queue.Add(&chunk{Height: 3, Format: 1, Index: 0, Chunk: []byte{byte(0)}})
require.NoError(t, err)
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 1, Chunk: []byte{byte(1)}})
require.NoError(t, err)
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 2, Chunk: []byte{byte(2)}})
require.NoError(t, err)
c, err := queue.Next()
require.NoError(t, err)
assert.EqualValues(t, 0, c.Index)
c, err = queue.Next()
require.NoError(t, err)
assert.EqualValues(t, 1, c.Index)
// Discarding the first chunk and re-adding it should cause it to be returned
// immediately by Next(), before procceeding with chunk 2
err = queue.Discard(0)
require.NoError(t, err)
added, err := queue.Add(&chunk{Height: 3, Format: 1, Index: 0, Chunk: []byte{byte(0)}})
require.NoError(t, err)
assert.True(t, added)
c, err = queue.Next()
require.NoError(t, err)
assert.EqualValues(t, 0, c.Index)
c, err = queue.Next()
require.NoError(t, err)
assert.EqualValues(t, 2, c.Index)
// Discard then allocate, add and fetch all chunks
for i := uint32(0); i < queue.Size(); i++ {
err := queue.Discard(i)
require.NoError(t, err)
}
for i := uint32(0); i < queue.Size(); i++ {
_, err := queue.Allocate()
require.NoError(t, err)
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: i, Chunk: []byte{byte(i)}})
require.NoError(t, err)
c, err = queue.Next()
require.NoError(t, err)
assert.EqualValues(t, i, c.Index)
}
// Discarding a non-existent chunk does nothing.
err = queue.Discard(99)
require.NoError(t, err)
// When discard a couple of chunks, we should be able to allocate, add, and fetch them again.
err = queue.Discard(3)
require.NoError(t, err)
err = queue.Discard(1)
require.NoError(t, err)
index, err := queue.Allocate()
require.NoError(t, err)
assert.EqualValues(t, 1, index)
index, err = queue.Allocate()
require.NoError(t, err)
assert.EqualValues(t, 3, index)
added, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 3, Chunk: []byte{3}})
require.NoError(t, err)
assert.True(t, added)
added, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 1, Chunk: []byte{1}})
require.NoError(t, err)
assert.True(t, added)
chunk, err := queue.Next()
require.NoError(t, err)
assert.EqualValues(t, 1, chunk.Index)
chunk, err = queue.Next()
require.NoError(t, err)
assert.EqualValues(t, 3, chunk.Index)
_, err = queue.Next()
require.Error(t, err)
assert.Equal(t, errDone, err)
// After closing the queue, discarding does nothing
err = queue.Close()
require.NoError(t, err)
err = queue.Discard(2)
require.NoError(t, err)
}
func TestChunkQueue_DiscardSender(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
// Allocate and add all chunks to the queue
senders := []p2p.ID{"a", "b", "c"}
for i := uint32(0); i < queue.Size(); i++ {
_, err := queue.Allocate()
require.NoError(t, err)
_, err = queue.Add(&chunk{
Height: 3,
Format: 1,
Index: i,
Chunk: []byte{byte(i)},
Sender: senders[int(i)%len(senders)],
})
require.NoError(t, err)
}
// Fetch the first three chunks
for i := uint32(0); i < 3; i++ {
_, err := queue.Next()
require.NoError(t, err)
}
// Discarding an unknown sender should do nothing
err := queue.DiscardSender("x")
require.NoError(t, err)
_, err = queue.Allocate()
assert.Equal(t, errDone, err)
// Discarding sender b should discard chunk 4, but not chunk 1 which has already been
// returned.
err = queue.DiscardSender("b")
require.NoError(t, err)
index, err := queue.Allocate()
require.NoError(t, err)
assert.EqualValues(t, 4, index)
_, err = queue.Allocate()
assert.Equal(t, errDone, err)
}
func TestChunkQueue_GetSender(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
_, err := queue.Add(&chunk{Height: 3, Format: 1, Index: 0, Chunk: []byte{1}, Sender: p2p.ID("a")})
require.NoError(t, err)
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 1, Chunk: []byte{2}, Sender: p2p.ID("b")})
require.NoError(t, err)
assert.EqualValues(t, "a", queue.GetSender(0))
assert.EqualValues(t, "b", queue.GetSender(1))
assert.EqualValues(t, "", queue.GetSender(2))
// After the chunk has been processed, we should still know who the sender was
chunk, err := queue.Next()
require.NoError(t, err)
require.NotNil(t, chunk)
require.EqualValues(t, 0, chunk.Index)
assert.EqualValues(t, "a", queue.GetSender(0))
}
func TestChunkQueue_Next(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
// Next should block waiting for the next chunks, even when given out of order.
chNext := make(chan *chunk, 10)
go func() {
for {
c, err := queue.Next()
if err == errDone {
close(chNext)
break
}
require.NoError(t, err)
chNext <- c
}
}()
assert.Empty(t, chNext)
_, err := queue.Add(&chunk{Height: 3, Format: 1, Index: 1, Chunk: []byte{3, 1, 1}, Sender: p2p.ID("b")})
require.NoError(t, err)
select {
case <-chNext:
assert.Fail(t, "channel should be empty")
default:
}
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 0, Chunk: []byte{3, 1, 0}, Sender: p2p.ID("a")})
require.NoError(t, err)
assert.Equal(t,
&chunk{Height: 3, Format: 1, Index: 0, Chunk: []byte{3, 1, 0}, Sender: p2p.ID("a")},
<-chNext)
assert.Equal(t,
&chunk{Height: 3, Format: 1, Index: 1, Chunk: []byte{3, 1, 1}, Sender: p2p.ID("b")},
<-chNext)
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 4, Chunk: []byte{3, 1, 4}, Sender: p2p.ID("e")})
require.NoError(t, err)
select {
case <-chNext:
assert.Fail(t, "channel should be empty")
default:
}
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 2, Chunk: []byte{3, 1, 2}, Sender: p2p.ID("c")})
require.NoError(t, err)
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 3, Chunk: []byte{3, 1, 3}, Sender: p2p.ID("d")})
require.NoError(t, err)
assert.Equal(t,
&chunk{Height: 3, Format: 1, Index: 2, Chunk: []byte{3, 1, 2}, Sender: p2p.ID("c")},
<-chNext)
assert.Equal(t,
&chunk{Height: 3, Format: 1, Index: 3, Chunk: []byte{3, 1, 3}, Sender: p2p.ID("d")},
<-chNext)
assert.Equal(t,
&chunk{Height: 3, Format: 1, Index: 4, Chunk: []byte{3, 1, 4}, Sender: p2p.ID("e")},
<-chNext)
_, ok := <-chNext
assert.False(t, ok, "channel should be closed")
// Calling next on a finished queue should return done
_, err = queue.Next()
assert.Equal(t, errDone, err)
}
func TestChunkQueue_Next_Closed(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
// Calling Next on a closed queue should return done
_, err := queue.Add(&chunk{Height: 3, Format: 1, Index: 1, Chunk: []byte{3, 1, 1}})
require.NoError(t, err)
err = queue.Close()
require.NoError(t, err)
_, err = queue.Next()
assert.Equal(t, errDone, err)
}
func TestChunkQueue_Retry(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
// Allocate and add all chunks to the queue
for i := uint32(0); i < queue.Size(); i++ {
_, err := queue.Allocate()
require.NoError(t, err)
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: i, Chunk: []byte{byte(i)}})
require.NoError(t, err)
_, err = queue.Next()
require.NoError(t, err)
}
// Retrying a couple of chunks makes Next() return them, but they are not allocatable
queue.Retry(3)
queue.Retry(1)
_, err := queue.Allocate()
assert.Equal(t, errDone, err)
chunk, err := queue.Next()
require.NoError(t, err)
assert.EqualValues(t, 1, chunk.Index)
chunk, err = queue.Next()
require.NoError(t, err)
assert.EqualValues(t, 3, chunk.Index)
_, err = queue.Next()
assert.Equal(t, errDone, err)
}
func TestChunkQueue_RetryAll(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
// Allocate and add all chunks to the queue
for i := uint32(0); i < queue.Size(); i++ {
_, err := queue.Allocate()
require.NoError(t, err)
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: i, Chunk: []byte{byte(i)}})
require.NoError(t, err)
_, err = queue.Next()
require.NoError(t, err)
}
_, err := queue.Next()
assert.Equal(t, errDone, err)
queue.RetryAll()
_, err = queue.Allocate()
assert.Equal(t, errDone, err)
for i := uint32(0); i < queue.Size(); i++ {
chunk, err := queue.Next()
require.NoError(t, err)
assert.EqualValues(t, i, chunk.Index)
}
_, err = queue.Next()
assert.Equal(t, errDone, err)
}
func TestChunkQueue_Size(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
assert.EqualValues(t, 5, queue.Size())
err := queue.Close()
require.NoError(t, err)
assert.EqualValues(t, 0, queue.Size())
}
func TestChunkQueue_WaitFor(t *testing.T) {
queue, teardown := setupChunkQueue(t)
defer teardown()
waitFor1 := queue.WaitFor(1)
waitFor4 := queue.WaitFor(4)
// Adding 0 and 2 should not trigger waiters
_, err := queue.Add(&chunk{Height: 3, Format: 1, Index: 0, Chunk: []byte{3, 1, 0}})
require.NoError(t, err)
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 2, Chunk: []byte{3, 1, 2}})
require.NoError(t, err)
select {
case <-waitFor1:
require.Fail(t, "WaitFor(1) should not trigger on 0 or 2")
case <-waitFor4:
require.Fail(t, "WaitFor(4) should not trigger on 0 or 2")
default:
}
// Adding 1 should trigger WaitFor(1), but not WaitFor(4). The channel should be closed.
_, err = queue.Add(&chunk{Height: 3, Format: 1, Index: 1, Chunk: []byte{3, 1, 1}})
require.NoError(t, err)
assert.EqualValues(t, 1, <-waitFor1)
_, ok := <-waitFor1
assert.False(t, ok)
select {
case <-waitFor4:
require.Fail(t, "WaitFor(4) should not trigger on 0 or 2")
default:
}
// Fetch the first chunk. At this point, waiting for either 0 (retrieved from pool) or 1
// (queued in pool) should immediately return true.
c, err := queue.Next()
require.NoError(t, err)
assert.EqualValues(t, 0, c.Index)
w := queue.WaitFor(0)
assert.EqualValues(t, 0, <-w)
_, ok = <-w
assert.False(t, ok)
w = queue.WaitFor(1)
assert.EqualValues(t, 1, <-w)
_, ok = <-w
assert.False(t, ok)
// Close the queue. This should cause the waiter for 4 to close, and also cause any future
// waiters to get closed channels.
err = queue.Close()
require.NoError(t, err)
_, ok = <-waitFor4
assert.False(t, ok)
w = queue.WaitFor(3)
_, ok = <-w
assert.False(t, ok)
}

129
statesync/messages.go Normal file
View File

@@ -0,0 +1,129 @@
package statesync
import (
"errors"
"fmt"
amino "github.com/tendermint/go-amino"
"github.com/tendermint/tendermint/types"
)
const (
// snapshotMsgSize is the maximum size of a snapshotResponseMessage
snapshotMsgSize = int(4e6)
// chunkMsgSize is the maximum size of a chunkResponseMessage
chunkMsgSize = int(16e6)
// maxMsgSize is the maximum size of any message
maxMsgSize = chunkMsgSize
)
var cdc = amino.NewCodec()
func init() {
cdc.RegisterInterface((*Message)(nil), nil)
cdc.RegisterConcrete(&snapshotsRequestMessage{}, "tendermint/SnapshotsRequestMessage", nil)
cdc.RegisterConcrete(&snapshotsResponseMessage{}, "tendermint/SnapshotsResponseMessage", nil)
cdc.RegisterConcrete(&chunkRequestMessage{}, "tendermint/ChunkRequestMessage", nil)
cdc.RegisterConcrete(&chunkResponseMessage{}, "tendermint/ChunkResponseMessage", nil)
types.RegisterBlockAmino(cdc)
}
// decodeMsg decodes a message.
func decodeMsg(bz []byte) (Message, error) {
if len(bz) > maxMsgSize {
return nil, fmt.Errorf("msg exceeds max size (%d > %d)", len(bz), maxMsgSize)
}
var msg Message
err := cdc.UnmarshalBinaryBare(bz, &msg)
if err != nil {
return nil, err
}
return msg, nil
}
// Message is a message sent and received by the reactor.
type Message interface {
ValidateBasic() error
}
// snapshotsRequestMessage requests recent snapshots from a peer.
type snapshotsRequestMessage struct{}
// ValidateBasic implements Message.
func (m *snapshotsRequestMessage) ValidateBasic() error {
if m == nil {
return errors.New("nil message")
}
return nil
}
// SnapshotResponseMessage contains information about a single snapshot.
type snapshotsResponseMessage struct {
Height uint64
Format uint32
Chunks uint32
Hash []byte
Metadata []byte
}
// ValidateBasic implements Message.
func (m *snapshotsResponseMessage) ValidateBasic() error {
if m == nil {
return errors.New("nil message")
}
if m.Height == 0 {
return errors.New("height cannot be 0")
}
if len(m.Hash) == 0 {
return errors.New("snapshot has no hash")
}
if m.Chunks == 0 {
return errors.New("snapshot has no chunks")
}
return nil
}
// chunkRequestMessage requests a single chunk from a peer.
type chunkRequestMessage struct {
Height uint64
Format uint32
Index uint32
}
// ValidateBasic implements Message.
func (m *chunkRequestMessage) ValidateBasic() error {
if m == nil {
return errors.New("nil message")
}
if m.Height == 0 {
return errors.New("height cannot be 0")
}
return nil
}
// chunkResponseMessage contains a single chunk from a peer.
type chunkResponseMessage struct {
Height uint64
Format uint32
Index uint32
Chunk []byte
Missing bool
}
// ValidateBasic implements Message.
func (m *chunkResponseMessage) ValidateBasic() error {
if m == nil {
return errors.New("nil message")
}
if m.Height == 0 {
return errors.New("height cannot be 0")
}
if m.Missing && len(m.Chunk) > 0 {
return errors.New("missing chunk cannot have contents")
}
if !m.Missing && m.Chunk == nil {
return errors.New("chunk cannot be nil")
}
return nil
}

106
statesync/messages_test.go Normal file
View File

@@ -0,0 +1,106 @@
package statesync
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestSnapshotsRequestMessage_ValidateBasic(t *testing.T) {
testcases := map[string]struct {
msg *snapshotsRequestMessage
valid bool
}{
"nil": {nil, false},
"valid": {&snapshotsRequestMessage{}, true},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
err := tc.msg.ValidateBasic()
if tc.valid {
require.NoError(t, err)
} else {
require.Error(t, err)
}
})
}
}
func TestSnapshotsResponseMessage_ValidateBasic(t *testing.T) {
testcases := map[string]struct {
msg *snapshotsResponseMessage
valid bool
}{
"nil": {nil, false},
"valid": {&snapshotsResponseMessage{Height: 1, Format: 1, Chunks: 2, Hash: []byte{1}}, true},
"0 height": {&snapshotsResponseMessage{Height: 0, Format: 1, Chunks: 2, Hash: []byte{1}}, false},
"0 format": {&snapshotsResponseMessage{Height: 1, Format: 0, Chunks: 2, Hash: []byte{1}}, true},
"0 chunks": {&snapshotsResponseMessage{Height: 1, Format: 1, Hash: []byte{1}}, false},
"no hash": {&snapshotsResponseMessage{Height: 1, Format: 1, Chunks: 2, Hash: []byte{}}, false},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
err := tc.msg.ValidateBasic()
if tc.valid {
require.NoError(t, err)
} else {
require.Error(t, err)
}
})
}
}
func TestChunkRequestMessage_ValidateBasic(t *testing.T) {
testcases := map[string]struct {
msg *chunkRequestMessage
valid bool
}{
"nil": {nil, false},
"valid": {&chunkRequestMessage{Height: 1, Format: 1, Index: 1}, true},
"0 height": {&chunkRequestMessage{Height: 0, Format: 1, Index: 1}, false},
"0 format": {&chunkRequestMessage{Height: 1, Format: 0, Index: 1}, true},
"0 chunk": {&chunkRequestMessage{Height: 1, Format: 1, Index: 0}, true},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
err := tc.msg.ValidateBasic()
if tc.valid {
require.NoError(t, err)
} else {
require.Error(t, err)
}
})
}
}
func TestChunkResponseMessage_ValidateBasic(t *testing.T) {
testcases := map[string]struct {
msg *chunkResponseMessage
valid bool
}{
"nil message": {nil, false},
"valid": {&chunkResponseMessage{Height: 1, Format: 1, Index: 1, Chunk: []byte{1}}, true},
"0 height": {&chunkResponseMessage{Height: 0, Format: 1, Index: 1, Chunk: []byte{1}}, false},
"0 format": {&chunkResponseMessage{Height: 1, Format: 0, Index: 1, Chunk: []byte{1}}, true},
"0 chunk": {&chunkResponseMessage{Height: 1, Format: 1, Index: 0, Chunk: []byte{1}}, true},
"empty body": {&chunkResponseMessage{Height: 1, Format: 1, Index: 1, Chunk: []byte{}}, true},
"nil body": {&chunkResponseMessage{Height: 1, Format: 1, Index: 1, Chunk: nil}, false},
"missing": {&chunkResponseMessage{Height: 1, Format: 1, Index: 1, Missing: true}, true},
"missing with empty": {&chunkResponseMessage{Height: 1, Format: 1, Index: 1, Missing: true, Chunk: []byte{}}, true},
"missing with body": {&chunkResponseMessage{Height: 1, Format: 1, Index: 1, Missing: true, Chunk: []byte{1}}, false},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
err := tc.msg.ValidateBasic()
if tc.valid {
require.NoError(t, err)
} else {
require.Error(t, err)
}
})
}
}

View File

@@ -0,0 +1,82 @@
// Code generated by mockery v1.0.0. DO NOT EDIT.
package mocks
import (
mock "github.com/stretchr/testify/mock"
state "github.com/tendermint/tendermint/state"
types "github.com/tendermint/tendermint/types"
)
// StateProvider is an autogenerated mock type for the StateProvider type
type StateProvider struct {
mock.Mock
}
// AppHash provides a mock function with given fields: height
func (_m *StateProvider) AppHash(height uint64) ([]byte, error) {
ret := _m.Called(height)
var r0 []byte
if rf, ok := ret.Get(0).(func(uint64) []byte); ok {
r0 = rf(height)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).([]byte)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(uint64) error); ok {
r1 = rf(height)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// Commit provides a mock function with given fields: height
func (_m *StateProvider) Commit(height uint64) (*types.Commit, error) {
ret := _m.Called(height)
var r0 *types.Commit
if rf, ok := ret.Get(0).(func(uint64) *types.Commit); ok {
r0 = rf(height)
} else {
if ret.Get(0) != nil {
r0 = ret.Get(0).(*types.Commit)
}
}
var r1 error
if rf, ok := ret.Get(1).(func(uint64) error); ok {
r1 = rf(height)
} else {
r1 = ret.Error(1)
}
return r0, r1
}
// State provides a mock function with given fields: height
func (_m *StateProvider) State(height uint64) (state.State, error) {
ret := _m.Called(height)
var r0 state.State
if rf, ok := ret.Get(0).(func(uint64) state.State); ok {
r0 = rf(height)
} else {
r0 = ret.Get(0).(state.State)
}
var r1 error
if rf, ok := ret.Get(1).(func(uint64) error); ok {
r1 = rf(height)
} else {
r1 = ret.Error(1)
}
return r0, r1
}

261
statesync/reactor.go Normal file
View File

@@ -0,0 +1,261 @@
package statesync
import (
"errors"
"sort"
"sync"
abci "github.com/tendermint/tendermint/abci/types"
"github.com/tendermint/tendermint/p2p"
"github.com/tendermint/tendermint/proxy"
sm "github.com/tendermint/tendermint/state"
"github.com/tendermint/tendermint/types"
)
const (
// SnapshotChannel exchanges snapshot metadata
SnapshotChannel = byte(0x60)
// ChunkChannel exchanges chunk contents
ChunkChannel = byte(0x61)
// recentSnapshots is the number of recent snapshots to send and receive per peer.
recentSnapshots = 10
)
// Reactor handles state sync, both restoring snapshots for the local node and serving snapshots
// for other nodes.
type Reactor struct {
p2p.BaseReactor
conn proxy.AppConnSnapshot
connQuery proxy.AppConnQuery
tempDir string
// This will only be set when a state sync is in progress. It is used to feed received
// snapshots and chunks into the sync.
mtx sync.RWMutex
syncer *syncer
}
// NewReactor creates a new state sync reactor.
func NewReactor(conn proxy.AppConnSnapshot, connQuery proxy.AppConnQuery, tempDir string) *Reactor {
r := &Reactor{
conn: conn,
connQuery: connQuery,
}
r.BaseReactor = *p2p.NewBaseReactor("StateSync", r)
return r
}
// GetChannels implements p2p.Reactor.
func (r *Reactor) GetChannels() []*p2p.ChannelDescriptor {
return []*p2p.ChannelDescriptor{
{
ID: SnapshotChannel,
Priority: 3,
SendQueueCapacity: 10,
RecvMessageCapacity: snapshotMsgSize,
},
{
ID: ChunkChannel,
Priority: 1,
SendQueueCapacity: 4,
RecvMessageCapacity: chunkMsgSize,
},
}
}
// OnStart implements p2p.Reactor.
func (r *Reactor) OnStart() error {
return nil
}
// AddPeer implements p2p.Reactor.
func (r *Reactor) AddPeer(peer p2p.Peer) {
r.mtx.RLock()
defer r.mtx.RUnlock()
if r.syncer != nil {
r.syncer.AddPeer(peer)
}
}
// RemovePeer implements p2p.Reactor.
func (r *Reactor) RemovePeer(peer p2p.Peer, reason interface{}) {
r.mtx.RLock()
defer r.mtx.RUnlock()
if r.syncer != nil {
r.syncer.RemovePeer(peer)
}
}
// Receive implements p2p.Reactor.
func (r *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
if !r.IsRunning() {
return
}
msg, err := decodeMsg(msgBytes)
if err != nil {
r.Logger.Error("Error decoding message", "src", src, "chId", chID, "msg", msg, "err", err, "bytes", msgBytes)
r.Switch.StopPeerForError(src, err)
return
}
err = msg.ValidateBasic()
if err != nil {
r.Logger.Error("Invalid message", "peer", src, "msg", msg, "err", err)
r.Switch.StopPeerForError(src, err)
return
}
switch chID {
case SnapshotChannel:
switch msg := msg.(type) {
case *snapshotsRequestMessage:
snapshots, err := r.recentSnapshots(recentSnapshots)
if err != nil {
r.Logger.Error("Failed to fetch snapshots", "err", err)
return
}
for _, snapshot := range snapshots {
r.Logger.Debug("Advertising snapshot", "height", snapshot.Height,
"format", snapshot.Format, "peer", src.ID())
src.Send(chID, cdc.MustMarshalBinaryBare(&snapshotsResponseMessage{
Height: snapshot.Height,
Format: snapshot.Format,
Chunks: snapshot.Chunks,
Hash: snapshot.Hash,
Metadata: snapshot.Metadata,
}))
}
case *snapshotsResponseMessage:
r.mtx.RLock()
defer r.mtx.RUnlock()
if r.syncer == nil {
r.Logger.Debug("Received unexpected snapshot, no state sync in progress")
return
}
r.Logger.Debug("Received snapshot", "height", msg.Height, "format", msg.Format, "peer", src.ID())
_, err := r.syncer.AddSnapshot(src, &snapshot{
Height: msg.Height,
Format: msg.Format,
Chunks: msg.Chunks,
Hash: msg.Hash,
Metadata: msg.Metadata,
})
if err != nil {
r.Logger.Error("Failed to add snapshot", "height", msg.Height, "format", msg.Format,
"peer", src.ID(), "err", err)
return
}
default:
r.Logger.Error("Received unknown message %T", msg)
}
case ChunkChannel:
switch msg := msg.(type) {
case *chunkRequestMessage:
r.Logger.Debug("Received chunk request", "height", msg.Height, "format", msg.Format,
"chunk", msg.Index, "peer", src.ID())
resp, err := r.conn.LoadSnapshotChunkSync(abci.RequestLoadSnapshotChunk{
Height: msg.Height,
Format: msg.Format,
Chunk: msg.Index,
})
if err != nil {
r.Logger.Error("Failed to load chunk", "height", msg.Height, "format", msg.Format,
"chunk", msg.Index, "err", err)
return
}
r.Logger.Debug("Sending chunk", "height", msg.Height, "format", msg.Format,
"chunk", msg.Index, "peer", src.ID())
src.Send(ChunkChannel, cdc.MustMarshalBinaryBare(&chunkResponseMessage{
Height: msg.Height,
Format: msg.Format,
Index: msg.Index,
Chunk: resp.Chunk,
Missing: resp.Chunk == nil,
}))
case *chunkResponseMessage:
r.mtx.RLock()
defer r.mtx.RUnlock()
if r.syncer == nil {
r.Logger.Debug("Received unexpected chunk, no state sync in progress", "peer", src.ID())
return
}
r.Logger.Debug("Received chunk, adding to sync", "height", msg.Height, "format", msg.Format,
"chunk", msg.Index, "peer", src.ID())
_, err := r.syncer.AddChunk(&chunk{
Height: msg.Height,
Format: msg.Format,
Index: msg.Index,
Chunk: msg.Chunk,
Sender: src.ID(),
})
if err != nil {
r.Logger.Error("Failed to add chunk", "height", msg.Height, "format", msg.Format,
"chunk", msg.Index, "err", err)
return
}
default:
r.Logger.Error("Received unknown message %T", msg)
}
default:
r.Logger.Error("Received message on invalid channel %x", chID)
}
}
// recentSnapshots fetches the n most recent snapshots from the app
func (r *Reactor) recentSnapshots(n uint32) ([]*snapshot, error) {
resp, err := r.conn.ListSnapshotsSync(abci.RequestListSnapshots{})
if err != nil {
return nil, err
}
sort.Slice(resp.Snapshots, func(i, j int) bool {
a := resp.Snapshots[i]
b := resp.Snapshots[j]
switch {
case a.Height > b.Height:
return true
case a.Height == b.Height && a.Format > b.Format:
return true
default:
return false
}
})
snapshots := make([]*snapshot, 0, n)
for i, s := range resp.Snapshots {
if i >= recentSnapshots {
break
}
snapshots = append(snapshots, &snapshot{
Height: s.Height,
Format: s.Format,
Chunks: s.Chunks,
Hash: s.Hash,
Metadata: s.Metadata,
})
}
return snapshots, nil
}
// Sync runs a state sync, returning the new state and last commit at the snapshot height.
// The caller must store the state and commit in the state database and block store.
func (r *Reactor) Sync(stateProvider StateProvider) (sm.State, *types.Commit, error) {
r.mtx.Lock()
if r.syncer != nil {
r.mtx.Unlock()
return sm.State{}, nil, errors.New("a state sync is already in progress")
}
r.syncer = newSyncer(r.Logger, r.conn, r.connQuery, stateProvider, r.tempDir)
r.mtx.Unlock()
state, commit, err := r.syncer.SyncAny(defaultDiscoveryTime)
r.mtx.Lock()
r.syncer = nil
r.mtx.Unlock()
return state, commit, err
}

148
statesync/reactor_test.go Normal file
View File

@@ -0,0 +1,148 @@
package statesync
import (
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
abci "github.com/tendermint/tendermint/abci/types"
"github.com/tendermint/tendermint/p2p"
p2pmocks "github.com/tendermint/tendermint/p2p/mocks"
proxymocks "github.com/tendermint/tendermint/proxy/mocks"
)
func TestReactor_Receive_ChunkRequestMessage(t *testing.T) {
testcases := map[string]struct {
request *chunkRequestMessage
chunk []byte
expectResponse *chunkResponseMessage
}{
"chunk is returned": {
&chunkRequestMessage{Height: 1, Format: 1, Index: 1},
[]byte{1, 2, 3},
&chunkResponseMessage{Height: 1, Format: 1, Index: 1, Chunk: []byte{1, 2, 3}}},
"empty chunk is returned, as nil": {
&chunkRequestMessage{Height: 1, Format: 1, Index: 1},
[]byte{},
&chunkResponseMessage{Height: 1, Format: 1, Index: 1, Chunk: nil}},
"nil (missing) chunk is returned as missing": {
&chunkRequestMessage{Height: 1, Format: 1, Index: 1},
nil,
&chunkResponseMessage{Height: 1, Format: 1, Index: 1, Missing: true},
},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
// Mock ABCI connection to return local snapshots
conn := &proxymocks.AppConnSnapshot{}
conn.On("LoadSnapshotChunkSync", abci.RequestLoadSnapshotChunk{
Height: tc.request.Height,
Format: tc.request.Format,
Chunk: tc.request.Index,
}).Return(&abci.ResponseLoadSnapshotChunk{Chunk: tc.chunk}, nil)
// Mock peer to store response, if found
peer := &p2pmocks.Peer{}
peer.On("ID").Return(p2p.ID("id"))
var response *chunkResponseMessage
if tc.expectResponse != nil {
peer.On("Send", ChunkChannel, mock.Anything).Run(func(args mock.Arguments) {
msg, err := decodeMsg(args[1].([]byte))
require.NoError(t, err)
response = msg.(*chunkResponseMessage)
}).Return(true)
}
// Start a reactor and send a chunkRequestMessage, then wait for and check response
r := NewReactor(conn, nil, "")
err := r.Start()
require.NoError(t, err)
defer r.Stop()
r.Receive(ChunkChannel, peer, cdc.MustMarshalBinaryBare(tc.request))
time.Sleep(100 * time.Millisecond)
assert.Equal(t, tc.expectResponse, response)
conn.AssertExpectations(t)
peer.AssertExpectations(t)
})
}
}
func TestReactor_Receive_SnapshotRequestMessage(t *testing.T) {
testcases := map[string]struct {
snapshots []*abci.Snapshot
expectResponses []*snapshotsResponseMessage
}{
"no snapshots": {nil, []*snapshotsResponseMessage{}},
">10 unordered snapshots": {
[]*abci.Snapshot{
{Height: 1, Format: 2, Chunks: 7, Hash: []byte{1, 2}, Metadata: []byte{1}},
{Height: 2, Format: 2, Chunks: 7, Hash: []byte{2, 2}, Metadata: []byte{2}},
{Height: 3, Format: 2, Chunks: 7, Hash: []byte{3, 2}, Metadata: []byte{3}},
{Height: 1, Format: 1, Chunks: 7, Hash: []byte{1, 1}, Metadata: []byte{4}},
{Height: 2, Format: 1, Chunks: 7, Hash: []byte{2, 1}, Metadata: []byte{5}},
{Height: 3, Format: 1, Chunks: 7, Hash: []byte{3, 1}, Metadata: []byte{6}},
{Height: 1, Format: 4, Chunks: 7, Hash: []byte{1, 4}, Metadata: []byte{7}},
{Height: 2, Format: 4, Chunks: 7, Hash: []byte{2, 4}, Metadata: []byte{8}},
{Height: 3, Format: 4, Chunks: 7, Hash: []byte{3, 4}, Metadata: []byte{9}},
{Height: 1, Format: 3, Chunks: 7, Hash: []byte{1, 3}, Metadata: []byte{10}},
{Height: 2, Format: 3, Chunks: 7, Hash: []byte{2, 3}, Metadata: []byte{11}},
{Height: 3, Format: 3, Chunks: 7, Hash: []byte{3, 3}, Metadata: []byte{12}},
},
[]*snapshotsResponseMessage{
{Height: 3, Format: 4, Chunks: 7, Hash: []byte{3, 4}, Metadata: []byte{9}},
{Height: 3, Format: 3, Chunks: 7, Hash: []byte{3, 3}, Metadata: []byte{12}},
{Height: 3, Format: 2, Chunks: 7, Hash: []byte{3, 2}, Metadata: []byte{3}},
{Height: 3, Format: 1, Chunks: 7, Hash: []byte{3, 1}, Metadata: []byte{6}},
{Height: 2, Format: 4, Chunks: 7, Hash: []byte{2, 4}, Metadata: []byte{8}},
{Height: 2, Format: 3, Chunks: 7, Hash: []byte{2, 3}, Metadata: []byte{11}},
{Height: 2, Format: 2, Chunks: 7, Hash: []byte{2, 2}, Metadata: []byte{2}},
{Height: 2, Format: 1, Chunks: 7, Hash: []byte{2, 1}, Metadata: []byte{5}},
{Height: 1, Format: 4, Chunks: 7, Hash: []byte{1, 4}, Metadata: []byte{7}},
{Height: 1, Format: 3, Chunks: 7, Hash: []byte{1, 3}, Metadata: []byte{10}},
},
},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
// Mock ABCI connection to return local snapshots
conn := &proxymocks.AppConnSnapshot{}
conn.On("ListSnapshotsSync", abci.RequestListSnapshots{}).Return(&abci.ResponseListSnapshots{
Snapshots: tc.snapshots,
}, nil)
// Mock peer to catch responses and store them in a slice
responses := []*snapshotsResponseMessage{}
peer := &p2pmocks.Peer{}
if len(tc.expectResponses) > 0 {
peer.On("ID").Return(p2p.ID("id"))
peer.On("Send", SnapshotChannel, mock.Anything).Run(func(args mock.Arguments) {
msg, err := decodeMsg(args[1].([]byte))
require.NoError(t, err)
responses = append(responses, msg.(*snapshotsResponseMessage))
}).Return(true)
}
// Start a reactor and send a SnapshotsRequestMessage, then wait for and check responses
r := NewReactor(conn, nil, "")
err := r.Start()
require.NoError(t, err)
defer r.Stop()
r.Receive(SnapshotChannel, peer, cdc.MustMarshalBinaryBare(&snapshotsRequestMessage{}))
time.Sleep(100 * time.Millisecond)
assert.Equal(t, tc.expectResponses, responses)
conn.AssertExpectations(t)
peer.AssertExpectations(t)
})
}
}

263
statesync/snapshots.go Normal file
View File

@@ -0,0 +1,263 @@
package statesync
import (
"crypto/sha256"
"fmt"
"math/rand"
"sort"
"sync"
"github.com/tendermint/tendermint/p2p"
)
// snapshotKey is a snapshot key used for lookups.
type snapshotKey [sha256.Size]byte
// snapshot contains data about a snapshot.
type snapshot struct {
Height uint64
Format uint32
Chunks uint32
Hash []byte
Metadata []byte
trustedAppHash []byte // populated by light client
}
// Key generates a snapshot key, used for lookups. It takes into account not only the height and
// format, but also the chunks, hash, and metadata in case peers have generated snapshots in a
// non-deterministic manner. All fields must be equal for the snapshot to be considered the same.
func (s *snapshot) Key() snapshotKey {
// Hash.Write() never returns an error.
hasher := sha256.New()
hasher.Write([]byte(fmt.Sprintf("%v:%v:%v", s.Height, s.Format, s.Chunks)))
hasher.Write(s.Hash)
hasher.Write(s.Metadata)
var key snapshotKey
copy(key[:], hasher.Sum(nil))
return key
}
// snapshotPool discovers and aggregates snapshots across peers.
type snapshotPool struct {
stateProvider StateProvider
sync.Mutex
snapshots map[snapshotKey]*snapshot
snapshotPeers map[snapshotKey]map[p2p.ID]p2p.Peer
// indexes for fast searches
formatIndex map[uint32]map[snapshotKey]bool
heightIndex map[uint64]map[snapshotKey]bool
peerIndex map[p2p.ID]map[snapshotKey]bool
// blacklists for rejected items
formatBlacklist map[uint32]bool
peerBlacklist map[p2p.ID]bool
snapshotBlacklist map[snapshotKey]bool
}
// newSnapshotPool creates a new snapshot pool. The state source is used for
func newSnapshotPool(stateProvider StateProvider) *snapshotPool {
return &snapshotPool{
stateProvider: stateProvider,
snapshots: make(map[snapshotKey]*snapshot),
snapshotPeers: make(map[snapshotKey]map[p2p.ID]p2p.Peer),
formatIndex: make(map[uint32]map[snapshotKey]bool),
heightIndex: make(map[uint64]map[snapshotKey]bool),
peerIndex: make(map[p2p.ID]map[snapshotKey]bool),
formatBlacklist: make(map[uint32]bool),
peerBlacklist: make(map[p2p.ID]bool),
snapshotBlacklist: make(map[snapshotKey]bool),
}
}
// Add adds a snapshot to the pool, unless the peer has already sent recentSnapshots snapshots. It
// returns true if this was a new, non-blacklisted snapshot. The snapshot height is verified using
// the light client, and the expected app hash is set for the snapshot.
func (p *snapshotPool) Add(peer p2p.Peer, snapshot *snapshot) (bool, error) {
appHash, err := p.stateProvider.AppHash(snapshot.Height)
if err != nil {
return false, err
}
snapshot.trustedAppHash = appHash
key := snapshot.Key()
p.Lock()
defer p.Unlock()
switch {
case p.formatBlacklist[snapshot.Format]:
return false, nil
case p.peerBlacklist[peer.ID()]:
return false, nil
case p.snapshotBlacklist[key]:
return false, nil
case len(p.peerIndex[peer.ID()]) >= recentSnapshots:
return false, nil
}
if p.snapshotPeers[key] == nil {
p.snapshotPeers[key] = make(map[p2p.ID]p2p.Peer)
}
p.snapshotPeers[key][peer.ID()] = peer
if p.peerIndex[peer.ID()] == nil {
p.peerIndex[peer.ID()] = make(map[snapshotKey]bool)
}
p.peerIndex[peer.ID()][key] = true
if p.snapshots[key] != nil {
return false, nil
}
p.snapshots[key] = snapshot
if p.formatIndex[snapshot.Format] == nil {
p.formatIndex[snapshot.Format] = make(map[snapshotKey]bool)
}
p.formatIndex[snapshot.Format][key] = true
if p.heightIndex[snapshot.Height] == nil {
p.heightIndex[snapshot.Height] = make(map[snapshotKey]bool)
}
p.heightIndex[snapshot.Height][key] = true
return true, nil
}
// Best returns the "best" currently known snapshot, if any.
func (p *snapshotPool) Best() *snapshot {
ranked := p.Ranked()
if len(ranked) == 0 {
return nil
}
return ranked[0]
}
// GetPeer returns a random peer for a snapshot, if any.
func (p *snapshotPool) GetPeer(snapshot *snapshot) p2p.Peer {
peers := p.GetPeers(snapshot)
if len(peers) == 0 {
return nil
}
return peers[rand.Intn(len(peers))]
}
// GetPeers returns the peers for a snapshot.
func (p *snapshotPool) GetPeers(snapshot *snapshot) []p2p.Peer {
key := snapshot.Key()
p.Lock()
defer p.Unlock()
peers := make([]p2p.Peer, 0, len(p.snapshotPeers[key]))
for _, peer := range p.snapshotPeers[key] {
peers = append(peers, peer)
}
// sort results, for testability (otherwise order is random, so tests randomly fail)
sort.Slice(peers, func(a int, b int) bool {
return peers[a].ID() < peers[b].ID()
})
return peers
}
// Ranked returns a list of snapshots ranked by preference. The current heuristic is very naïve,
// preferring the snapshot with the greatest height, then greatest format, then greatest number of
// peers. This can be improved quite a lot.
func (p *snapshotPool) Ranked() []*snapshot {
p.Lock()
defer p.Unlock()
candidates := make([]*snapshot, 0, len(p.snapshots))
for _, snapshot := range p.snapshots {
candidates = append(candidates, snapshot)
}
sort.Slice(candidates, func(i, j int) bool {
a := candidates[i]
b := candidates[j]
switch {
case a.Height > b.Height:
return true
case a.Height < b.Height:
return false
case a.Format > b.Format:
return true
case a.Format < b.Format:
return false
case len(p.snapshotPeers[a.Key()]) > len(p.snapshotPeers[b.Key()]):
return true
default:
return false
}
})
return candidates
}
// Reject rejects a snapshot. Rejected snapshots will never be used again.
func (p *snapshotPool) Reject(snapshot *snapshot) {
key := snapshot.Key()
p.Lock()
defer p.Unlock()
p.snapshotBlacklist[key] = true
p.removeSnapshot(key)
}
// RejectFormat rejects a snapshot format. It will never be used again.
func (p *snapshotPool) RejectFormat(format uint32) {
p.Lock()
defer p.Unlock()
p.formatBlacklist[format] = true
for key := range p.formatIndex[format] {
p.removeSnapshot(key)
}
}
// RejectPeer rejects a peer. It will never be used again.
func (p *snapshotPool) RejectPeer(peerID p2p.ID) {
if peerID == "" {
return
}
p.Lock()
defer p.Unlock()
p.removePeer(peerID)
p.peerBlacklist[peerID] = true
}
// RemovePeer removes a peer from the pool, and any snapshots that no longer have peers.
func (p *snapshotPool) RemovePeer(peerID p2p.ID) {
p.Lock()
defer p.Unlock()
p.removePeer(peerID)
}
// removePeer removes a peer. The caller must hold the mutex lock.
func (p *snapshotPool) removePeer(peerID p2p.ID) {
for key := range p.peerIndex[peerID] {
delete(p.snapshotPeers[key], peerID)
if len(p.snapshotPeers[key]) == 0 {
p.removeSnapshot(key)
}
}
delete(p.peerIndex, peerID)
}
// removeSnapshot removes a snapshot. The caller must hold the mutex lock.
func (p *snapshotPool) removeSnapshot(key snapshotKey) {
snapshot := p.snapshots[key]
if snapshot == nil {
return
}
delete(p.snapshots, key)
delete(p.formatIndex[snapshot.Format], key)
delete(p.heightIndex[snapshot.Height], key)
for peerID := range p.snapshotPeers[key] {
delete(p.peerIndex[peerID], key)
}
delete(p.snapshotPeers, key)
}

326
statesync/snapshots_test.go Normal file
View File

@@ -0,0 +1,326 @@
package statesync
import (
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
"github.com/tendermint/tendermint/p2p"
p2pmocks "github.com/tendermint/tendermint/p2p/mocks"
"github.com/tendermint/tendermint/statesync/mocks"
)
func TestSnapshot_Key(t *testing.T) {
testcases := map[string]struct {
modify func(*snapshot)
}{
"new height": {func(s *snapshot) { s.Height = 9 }},
"new format": {func(s *snapshot) { s.Format = 9 }},
"new chunk count": {func(s *snapshot) { s.Chunks = 9 }},
"new hash": {func(s *snapshot) { s.Hash = []byte{9} }},
"no metadata": {func(s *snapshot) { s.Metadata = nil }},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
s := snapshot{
Height: 3,
Format: 1,
Chunks: 7,
Hash: []byte{1, 2, 3},
Metadata: []byte{255},
}
before := s.Key()
tc.modify(&s)
after := s.Key()
assert.NotEqual(t, before, after)
})
}
}
func TestSnapshotPool_Add(t *testing.T) {
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", uint64(1)).Return([]byte("app_hash"), nil)
peer := &p2pmocks.Peer{}
peer.On("ID").Return(p2p.ID("id"))
// Adding to the pool should work
pool := newSnapshotPool(stateProvider)
added, err := pool.Add(peer, &snapshot{
Height: 1,
Format: 1,
Chunks: 1,
Hash: []byte{1},
})
require.NoError(t, err)
assert.True(t, added)
// Adding again from a different peer should return false
otherPeer := &p2pmocks.Peer{}
otherPeer.On("ID").Return(p2p.ID("other"))
added, err = pool.Add(peer, &snapshot{
Height: 1,
Format: 1,
Chunks: 1,
Hash: []byte{1},
})
require.NoError(t, err)
assert.False(t, added)
// The pool should have populated the snapshot with the trusted app hash
snapshot := pool.Best()
require.NotNil(t, snapshot)
assert.Equal(t, []byte("app_hash"), snapshot.trustedAppHash)
stateProvider.AssertExpectations(t)
}
func TestSnapshotPool_GetPeer(t *testing.T) {
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
pool := newSnapshotPool(stateProvider)
s := &snapshot{Height: 1, Format: 1, Chunks: 1, Hash: []byte{1}}
peerA := &p2pmocks.Peer{}
peerA.On("ID").Return(p2p.ID("a"))
peerB := &p2pmocks.Peer{}
peerB.On("ID").Return(p2p.ID("b"))
_, err := pool.Add(peerA, s)
require.NoError(t, err)
_, err = pool.Add(peerB, s)
require.NoError(t, err)
_, err = pool.Add(peerA, &snapshot{Height: 2, Format: 1, Chunks: 1, Hash: []byte{1}})
require.NoError(t, err)
// GetPeer currently picks a random peer, so lets run it until we've seen both.
seenA := false
seenB := false
for !seenA || !seenB {
peer := pool.GetPeer(s)
switch peer.ID() {
case p2p.ID("a"):
seenA = true
case p2p.ID("b"):
seenB = true
}
}
// GetPeer should return nil for an unknown snapshot
peer := pool.GetPeer(&snapshot{Height: 9, Format: 9})
assert.Nil(t, peer)
}
func TestSnapshotPool_GetPeers(t *testing.T) {
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
pool := newSnapshotPool(stateProvider)
s := &snapshot{Height: 1, Format: 1, Chunks: 1, Hash: []byte{1}}
peerA := &p2pmocks.Peer{}
peerA.On("ID").Return(p2p.ID("a"))
peerB := &p2pmocks.Peer{}
peerB.On("ID").Return(p2p.ID("b"))
_, err := pool.Add(peerA, s)
require.NoError(t, err)
_, err = pool.Add(peerB, s)
require.NoError(t, err)
_, err = pool.Add(peerA, &snapshot{Height: 2, Format: 1, Chunks: 1, Hash: []byte{2}})
require.NoError(t, err)
peers := pool.GetPeers(s)
assert.Len(t, peers, 2)
assert.EqualValues(t, "a", peers[0].ID())
assert.EqualValues(t, "b", peers[1].ID())
}
func TestSnapshotPool_Ranked_Best(t *testing.T) {
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
pool := newSnapshotPool(stateProvider)
// snapshots in expected order (best to worst). Highest height wins, then highest format.
// Snapshots with different chunk hashes are considered different, and the most peers is
// tie-breaker.
expectSnapshots := []struct {
snapshot *snapshot
peers []string
}{
{&snapshot{Height: 2, Format: 2, Chunks: 4, Hash: []byte{1, 3}}, []string{"a", "b", "c"}},
{&snapshot{Height: 2, Format: 2, Chunks: 5, Hash: []byte{1, 2}}, []string{"a"}},
{&snapshot{Height: 2, Format: 1, Chunks: 3, Hash: []byte{1, 2}}, []string{"a", "b"}},
{&snapshot{Height: 1, Format: 2, Chunks: 5, Hash: []byte{1, 2}}, []string{"a", "b"}},
{&snapshot{Height: 1, Format: 1, Chunks: 4, Hash: []byte{1, 2}}, []string{"a", "b", "c"}},
}
// Add snapshots in reverse order, to make sure the pool enforces some order.
for i := len(expectSnapshots) - 1; i >= 0; i-- {
for _, peerID := range expectSnapshots[i].peers {
peer := &p2pmocks.Peer{}
peer.On("ID").Return(p2p.ID(peerID))
_, err := pool.Add(peer, expectSnapshots[i].snapshot)
require.NoError(t, err)
}
}
// Ranked should return the snapshots in the same order
ranked := pool.Ranked()
assert.Len(t, ranked, len(expectSnapshots))
for i := range ranked {
assert.Equal(t, expectSnapshots[i].snapshot, ranked[i])
}
// Check that best snapshots are returned in expected order
for i := range expectSnapshots {
snapshot := expectSnapshots[i].snapshot
require.Equal(t, snapshot, pool.Best())
pool.Reject(snapshot)
}
assert.Nil(t, pool.Best())
}
func TestSnapshotPool_Reject(t *testing.T) {
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
pool := newSnapshotPool(stateProvider)
peer := &p2pmocks.Peer{}
peer.On("ID").Return(p2p.ID("id"))
snapshots := []*snapshot{
{Height: 2, Format: 2, Chunks: 1, Hash: []byte{1, 2}},
{Height: 2, Format: 1, Chunks: 1, Hash: []byte{1, 2}},
{Height: 1, Format: 2, Chunks: 1, Hash: []byte{1, 2}},
{Height: 1, Format: 1, Chunks: 1, Hash: []byte{1, 2}},
}
for _, s := range snapshots {
_, err := pool.Add(peer, s)
require.NoError(t, err)
}
pool.Reject(snapshots[0])
assert.Equal(t, snapshots[1:], pool.Ranked())
added, err := pool.Add(peer, snapshots[0])
require.NoError(t, err)
assert.False(t, added)
added, err = pool.Add(peer, &snapshot{Height: 3, Format: 3, Chunks: 1, Hash: []byte{1}})
require.NoError(t, err)
assert.True(t, added)
}
// nolint: dupl
func TestSnapshotPool_RejectFormat(t *testing.T) {
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
pool := newSnapshotPool(stateProvider)
peer := &p2pmocks.Peer{}
peer.On("ID").Return(p2p.ID("id"))
snapshots := []*snapshot{
{Height: 2, Format: 2, Chunks: 1, Hash: []byte{1, 2}},
{Height: 2, Format: 1, Chunks: 1, Hash: []byte{1, 2}},
{Height: 1, Format: 2, Chunks: 1, Hash: []byte{1, 2}},
{Height: 1, Format: 1, Chunks: 1, Hash: []byte{1, 2}},
}
for _, s := range snapshots {
_, err := pool.Add(peer, s)
require.NoError(t, err)
}
pool.RejectFormat(1)
assert.Equal(t, []*snapshot{snapshots[0], snapshots[2]}, pool.Ranked())
added, err := pool.Add(peer, &snapshot{Height: 3, Format: 1, Chunks: 1, Hash: []byte{1}})
require.NoError(t, err)
assert.False(t, added)
assert.Equal(t, []*snapshot{snapshots[0], snapshots[2]}, pool.Ranked())
added, err = pool.Add(peer, &snapshot{Height: 3, Format: 3, Chunks: 1, Hash: []byte{1}})
require.NoError(t, err)
assert.True(t, added)
}
func TestSnapshotPool_RejectPeer(t *testing.T) {
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
pool := newSnapshotPool(stateProvider)
peerA := &p2pmocks.Peer{}
peerA.On("ID").Return(p2p.ID("a"))
peerB := &p2pmocks.Peer{}
peerB.On("ID").Return(p2p.ID("b"))
s1 := &snapshot{Height: 1, Format: 1, Chunks: 1, Hash: []byte{1}}
s2 := &snapshot{Height: 2, Format: 1, Chunks: 1, Hash: []byte{2}}
s3 := &snapshot{Height: 3, Format: 1, Chunks: 1, Hash: []byte{2}}
_, err := pool.Add(peerA, s1)
require.NoError(t, err)
_, err = pool.Add(peerA, s2)
require.NoError(t, err)
_, err = pool.Add(peerB, s2)
require.NoError(t, err)
_, err = pool.Add(peerB, s3)
require.NoError(t, err)
pool.RejectPeer(peerA.ID())
assert.Empty(t, pool.GetPeers(s1))
peers2 := pool.GetPeers(s2)
assert.Len(t, peers2, 1)
assert.EqualValues(t, "b", peers2[0].ID())
peers3 := pool.GetPeers(s2)
assert.Len(t, peers3, 1)
assert.EqualValues(t, "b", peers3[0].ID())
// it should no longer be possible to add the peer back
_, err = pool.Add(peerA, s1)
require.NoError(t, err)
assert.Empty(t, pool.GetPeers(s1))
}
func TestSnapshotPool_RemovePeer(t *testing.T) {
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
pool := newSnapshotPool(stateProvider)
peerA := &p2pmocks.Peer{}
peerA.On("ID").Return(p2p.ID("a"))
peerB := &p2pmocks.Peer{}
peerB.On("ID").Return(p2p.ID("b"))
s1 := &snapshot{Height: 1, Format: 1, Chunks: 1, Hash: []byte{1}}
s2 := &snapshot{Height: 2, Format: 1, Chunks: 1, Hash: []byte{2}}
_, err := pool.Add(peerA, s1)
require.NoError(t, err)
_, err = pool.Add(peerA, s2)
require.NoError(t, err)
_, err = pool.Add(peerB, s1)
require.NoError(t, err)
pool.RemovePeer(peerA.ID())
peers1 := pool.GetPeers(s1)
assert.Len(t, peers1, 1)
assert.EqualValues(t, "b", peers1[0].ID())
peers2 := pool.GetPeers(s2)
assert.Empty(t, peers2)
// it should still be possible to add the peer back
_, err = pool.Add(peerA, s1)
require.NoError(t, err)
peers1 = pool.GetPeers(s1)
assert.Len(t, peers1, 2)
assert.EqualValues(t, "a", peers1[0].ID())
assert.EqualValues(t, "b", peers1[1].ID())
}

179
statesync/stateprovider.go Normal file
View File

@@ -0,0 +1,179 @@
package statesync
import (
"fmt"
"strings"
"sync"
"time"
dbm "github.com/tendermint/tm-db"
"github.com/tendermint/tendermint/libs/log"
lite "github.com/tendermint/tendermint/lite2"
liteprovider "github.com/tendermint/tendermint/lite2/provider"
litehttp "github.com/tendermint/tendermint/lite2/provider/http"
literpc "github.com/tendermint/tendermint/lite2/rpc"
litedb "github.com/tendermint/tendermint/lite2/store/db"
rpchttp "github.com/tendermint/tendermint/rpc/client/http"
sm "github.com/tendermint/tendermint/state"
"github.com/tendermint/tendermint/types"
)
//go:generate mockery -case underscore -name StateProvider
// StateProvider is a provider of trusted state data for bootstrapping a node. This refers
// to the state.State object, not the state machine.
type StateProvider interface {
// AppHash returns the app hash after the given height has been committed.
AppHash(height uint64) ([]byte, error)
// Commit returns the commit at the given height.
Commit(height uint64) (*types.Commit, error)
// State returns a state object at the given height.
State(height uint64) (sm.State, error)
}
// lightClientStateProvider is a state provider using the light client.
type lightClientStateProvider struct {
sync.Mutex // lite.Client is not concurrency-safe
lc *lite.Client
version sm.Version
providers map[liteprovider.Provider]string
}
// NewLightClientStateProvider creates a new StateProvider using a light client and RPC clients.
func NewLightClientStateProvider(
chainID string,
version sm.Version,
servers []string,
trustOptions lite.TrustOptions,
logger log.Logger,
) (StateProvider, error) {
if len(servers) < 2 {
return nil, fmt.Errorf("at least 2 RPC servers are required, got %v", len(servers))
}
providers := make([]liteprovider.Provider, 0, len(servers))
providerRemotes := make(map[liteprovider.Provider]string)
for _, server := range servers {
client, err := rpcClient(server)
if err != nil {
return nil, fmt.Errorf("failed to set up RPC client: %w", err)
}
provider := litehttp.NewWithClient(chainID, client)
providers = append(providers, provider)
// We store the RPC addresses keyed by provider, so we can find the address of the primary
// provider used by the light client and use it to fetch consensus parameters.
providerRemotes[provider] = server
}
lc, err := lite.NewClient(chainID, trustOptions, providers[0], providers[1:],
litedb.New(dbm.NewMemDB(), ""), lite.Logger(logger), lite.MaxRetryAttempts(5))
if err != nil {
return nil, err
}
return &lightClientStateProvider{
lc: lc,
version: version,
providers: providerRemotes,
}, nil
}
// AppHash implements StateProvider.
func (s *lightClientStateProvider) AppHash(height uint64) ([]byte, error) {
s.Lock()
defer s.Unlock()
// We have to fetch the next height, which contains the app hash for the previous height.
header, err := s.lc.VerifyHeaderAtHeight(int64(height+1), time.Now())
if err != nil {
return nil, err
}
return header.AppHash, nil
}
// Commit implements StateProvider.
func (s *lightClientStateProvider) Commit(height uint64) (*types.Commit, error) {
s.Lock()
defer s.Unlock()
header, err := s.lc.VerifyHeaderAtHeight(int64(height), time.Now())
if err != nil {
return nil, err
}
return header.Commit, nil
}
// State implements StateProvider.
func (s *lightClientStateProvider) State(height uint64) (sm.State, error) {
s.Lock()
defer s.Unlock()
state := sm.State{
ChainID: s.lc.ChainID(),
Version: s.version,
}
// We need to verify up until h+2, to get the validator set. This also prefetches the headers
// for h and h+1 in the typical case where the trusted header is after the snapshot height.
_, err := s.lc.VerifyHeaderAtHeight(int64(height+2), time.Now())
if err != nil {
return sm.State{}, err
}
header, err := s.lc.VerifyHeaderAtHeight(int64(height), time.Now())
if err != nil {
return sm.State{}, err
}
nextHeader, err := s.lc.VerifyHeaderAtHeight(int64(height+1), time.Now())
if err != nil {
return sm.State{}, err
}
state.LastBlockHeight = header.Height
state.LastBlockTime = header.Time
state.LastBlockID = header.Commit.BlockID
state.AppHash = nextHeader.AppHash
state.LastResultsHash = nextHeader.LastResultsHash
state.LastValidators, _, err = s.lc.TrustedValidatorSet(int64(height))
if err != nil {
return sm.State{}, err
}
state.Validators, _, err = s.lc.TrustedValidatorSet(int64(height + 1))
if err != nil {
return sm.State{}, err
}
state.NextValidators, _, err = s.lc.TrustedValidatorSet(int64(height + 2))
if err != nil {
return sm.State{}, err
}
state.LastHeightValidatorsChanged = int64(height)
// We'll also need to fetch consensus params via RPC, using light client verification.
primaryURL, ok := s.providers[s.lc.Primary()]
if !ok || primaryURL == "" {
return sm.State{}, fmt.Errorf("could not find address for primary light client provider")
}
primaryRPC, err := rpcClient(primaryURL)
if err != nil {
return sm.State{}, fmt.Errorf("unable to create RPC client: %w", err)
}
rpcclient := literpc.NewClient(primaryRPC, s.lc)
result, err := rpcclient.ConsensusParams(&nextHeader.Height)
if err != nil {
return sm.State{}, fmt.Errorf("unable to fetch consensus parameters for height %v: %w",
nextHeader.Height, err)
}
state.ConsensusParams = result.ConsensusParams
return state, nil
}
// rpcClient sets up a new RPC client
func rpcClient(server string) (*rpchttp.HTTP, error) {
if !strings.Contains(server, "://") {
server = "http://" + server
}
c, err := rpchttp.New(server, "/websocket")
if err != nil {
return nil, err
}
return c, nil
}

442
statesync/syncer.go Normal file
View File

@@ -0,0 +1,442 @@
package statesync
import (
"bytes"
"context"
"errors"
"fmt"
"sync"
"time"
abci "github.com/tendermint/tendermint/abci/types"
"github.com/tendermint/tendermint/libs/log"
"github.com/tendermint/tendermint/p2p"
"github.com/tendermint/tendermint/proxy"
sm "github.com/tendermint/tendermint/state"
"github.com/tendermint/tendermint/types"
"github.com/tendermint/tendermint/version"
)
const (
// defaultDiscoveryTime is the time to spend discovering snapshots.
defaultDiscoveryTime = 20 * time.Second
// chunkFetchers is the number of concurrent chunk fetchers to run.
chunkFetchers = 4
// chunkTimeout is the timeout while waiting for the next chunk from the chunk queue.
chunkTimeout = 2 * time.Minute
// requestTimeout is the timeout before rerequesting a chunk, possibly from a different peer.
chunkRequestTimeout = 10 * time.Second
)
var (
// errAbort is returned by Sync() when snapshot restoration is aborted.
errAbort = errors.New("state sync aborted")
// errRetrySnapshot is returned by Sync() when the snapshot should be retried.
errRetrySnapshot = errors.New("retry snapshot")
// errRejectSnapshot is returned by Sync() when the snapshot is rejected.
errRejectSnapshot = errors.New("snapshot was rejected")
// errRejectFormat is returned by Sync() when the snapshot format is rejected.
errRejectFormat = errors.New("snapshot format was rejected")
// errRejectSender is returned by Sync() when the snapshot sender is rejected.
errRejectSender = errors.New("snapshot sender was rejected")
// errVerifyFailed is returned by Sync() when app hash or last height verification fails.
errVerifyFailed = errors.New("verification failed")
// errTimeout is returned by Sync() when we've waited too long to receive a chunk.
errTimeout = errors.New("timed out waiting for chunk")
// errNoSnapshots is returned by SyncAny() if no snapshots are found and discovery is disabled.
errNoSnapshots = errors.New("no suitable snapshots found")
)
// syncer runs a state sync against an ABCI app. Use either SyncAny() to automatically attempt to
// sync all snapshots in the pool (pausing to discover new ones), or Sync() to sync a specific
// snapshot. Snapshots and chunks are fed via AddSnapshot() and AddChunk() as appropriate.
type syncer struct {
logger log.Logger
stateProvider StateProvider
conn proxy.AppConnSnapshot
connQuery proxy.AppConnQuery
snapshots *snapshotPool
tempDir string
mtx sync.RWMutex
chunks *chunkQueue
}
// newSyncer creates a new syncer.
func newSyncer(logger log.Logger, conn proxy.AppConnSnapshot, connQuery proxy.AppConnQuery,
stateProvider StateProvider, tempDir string) *syncer {
return &syncer{
logger: logger,
stateProvider: stateProvider,
conn: conn,
connQuery: connQuery,
snapshots: newSnapshotPool(stateProvider),
tempDir: tempDir,
}
}
// AddChunk adds a chunk to the chunk queue, if any. It returns false if the chunk has already
// been added to the queue, or an error if there's no sync in progress.
func (s *syncer) AddChunk(chunk *chunk) (bool, error) {
s.mtx.RLock()
defer s.mtx.RUnlock()
if s.chunks == nil {
return false, errors.New("no state sync in progress")
}
added, err := s.chunks.Add(chunk)
if err != nil {
return false, err
}
if added {
s.logger.Debug("Added chunk to queue", "height", chunk.Height, "format", chunk.Format,
"chunk", chunk.Index)
} else {
s.logger.Debug("Ignoring duplicate chunk in queue", "height", chunk.Height, "format", chunk.Format,
"chunk", chunk.Index)
}
return added, nil
}
// AddSnapshot adds a snapshot to the snapshot pool. It returns true if a new, previously unseen
// snapshot was accepted and added.
func (s *syncer) AddSnapshot(peer p2p.Peer, snapshot *snapshot) (bool, error) {
added, err := s.snapshots.Add(peer, snapshot)
if err != nil {
return false, err
}
if added {
s.logger.Info("Discovered new snapshot", "height", snapshot.Height, "format", snapshot.Format,
"hash", fmt.Sprintf("%X", snapshot.Hash))
}
return added, nil
}
// AddPeer adds a peer to the pool. For now we just keep it simple and send a single request
// to discover snapshots, later we may want to do retries and stuff.
func (s *syncer) AddPeer(peer p2p.Peer) {
s.logger.Debug("Requesting snapshots from peer", "peer", peer.ID())
peer.Send(SnapshotChannel, cdc.MustMarshalBinaryBare(&snapshotsRequestMessage{}))
}
// RemovePeer removes a peer from the pool.
func (s *syncer) RemovePeer(peer p2p.Peer) {
s.logger.Debug("Removing peer from sync", "peer", peer.ID())
s.snapshots.RemovePeer(peer.ID())
}
// SyncAny tries to sync any of the snapshots in the snapshot pool, waiting to discover further
// snapshots if none were found and discoveryTime > 0. It returns the latest state and block commit
// which the caller must use to bootstrap the node.
func (s *syncer) SyncAny(discoveryTime time.Duration) (sm.State, *types.Commit, error) {
if discoveryTime > 0 {
s.logger.Info(fmt.Sprintf("Discovering snapshots for %v", discoveryTime))
time.Sleep(discoveryTime)
}
// The app may ask us to retry a snapshot restoration, in which case we need to reuse
// the snapshot and chunk queue from the previous loop iteration.
var (
snapshot *snapshot
chunks *chunkQueue
err error
)
for {
// If not nil, we're going to retry restoration of the same snapshot.
if snapshot == nil {
snapshot = s.snapshots.Best()
chunks = nil
}
if snapshot == nil {
if discoveryTime == 0 {
return sm.State{}, nil, errNoSnapshots
}
s.logger.Info(fmt.Sprintf("Discovering snapshots for %v", discoveryTime))
time.Sleep(discoveryTime)
continue
}
if chunks == nil {
chunks, err = newChunkQueue(snapshot, s.tempDir)
if err != nil {
return sm.State{}, nil, fmt.Errorf("failed to create chunk queue: %w", err)
}
defer chunks.Close() // in case we forget to close it elsewhere
}
newState, commit, err := s.Sync(snapshot, chunks)
switch {
case err == nil:
return newState, commit, nil
case errors.Is(err, errAbort):
return sm.State{}, nil, err
case errors.Is(err, errRetrySnapshot):
chunks.RetryAll()
s.logger.Info("Retrying snapshot", "height", snapshot.Height, "format", snapshot.Format,
"hash", fmt.Sprintf("%X", snapshot.Hash))
continue
case errors.Is(err, errTimeout):
s.snapshots.Reject(snapshot)
s.logger.Error("Timed out waiting for snapshot chunks, rejected snapshot",
"height", snapshot.Height, "format", snapshot.Format, "hash", fmt.Sprintf("%X", snapshot.Hash))
case errors.Is(err, errRejectSnapshot):
s.snapshots.Reject(snapshot)
s.logger.Info("Snapshot rejected", "height", snapshot.Height, "format", snapshot.Format,
"hash", fmt.Sprintf("%X", snapshot.Hash))
case errors.Is(err, errRejectFormat):
s.snapshots.RejectFormat(snapshot.Format)
s.logger.Info("Snapshot format rejected", "format", snapshot.Format)
case errors.Is(err, errRejectSender):
s.logger.Info("Snapshot senders rejected", "height", snapshot.Height, "format", snapshot.Format,
"hash", fmt.Sprintf("%X", snapshot.Hash))
for _, peer := range s.snapshots.GetPeers(snapshot) {
s.snapshots.RejectPeer(peer.ID())
s.logger.Info("Snapshot sender rejected", "peer", peer.ID())
}
default:
return sm.State{}, nil, fmt.Errorf("snapshot restoration failed: %w", err)
}
// Discard snapshot and chunks for next iteration
err = chunks.Close()
if err != nil {
s.logger.Error("Failed to clean up chunk queue", "err", err)
}
snapshot = nil
chunks = nil
}
}
// Sync executes a sync for a specific snapshot, returning the latest state and block commit which
// the caller must use to bootstrap the node.
func (s *syncer) Sync(snapshot *snapshot, chunks *chunkQueue) (sm.State, *types.Commit, error) {
s.mtx.Lock()
if s.chunks != nil {
s.mtx.Unlock()
return sm.State{}, nil, errors.New("a state sync is already in progress")
}
s.chunks = chunks
s.mtx.Unlock()
defer func() {
s.mtx.Lock()
s.chunks = nil
s.mtx.Unlock()
}()
// Offer snapshot to ABCI app.
err := s.offerSnapshot(snapshot)
if err != nil {
return sm.State{}, nil, err
}
// Spawn chunk fetchers. They will terminate when the chunk queue is closed or context cancelled.
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
for i := int32(0); i < chunkFetchers; i++ {
go s.fetchChunks(ctx, snapshot, chunks)
}
// Optimistically build new state, so we don't discover any light client failures at the end.
state, err := s.stateProvider.State(snapshot.Height)
if err != nil {
return sm.State{}, nil, fmt.Errorf("failed to build new state: %w", err)
}
commit, err := s.stateProvider.Commit(snapshot.Height)
if err != nil {
return sm.State{}, nil, fmt.Errorf("failed to fetch commit: %w", err)
}
// Restore snapshot
err = s.applyChunks(chunks)
if err != nil {
return sm.State{}, nil, err
}
// Verify app and update app version
appVersion, err := s.verifyApp(snapshot)
if err != nil {
return sm.State{}, nil, err
}
state.Version.Consensus.App = version.Protocol(appVersion)
// Done! 🎉
s.logger.Info("Snapshot restored", "height", snapshot.Height, "format", snapshot.Format,
"hash", fmt.Sprintf("%X", snapshot.Hash))
return state, commit, nil
}
// offerSnapshot offers a snapshot to the app. It returns various errors depending on the app's
// response, or nil if the snapshot was accepted.
func (s *syncer) offerSnapshot(snapshot *snapshot) error {
s.logger.Info("Offering snapshot to ABCI app", "height", snapshot.Height,
"format", snapshot.Format, "hash", fmt.Sprintf("%X", snapshot.Hash))
resp, err := s.conn.OfferSnapshotSync(abci.RequestOfferSnapshot{
Snapshot: &abci.Snapshot{
Height: snapshot.Height,
Format: snapshot.Format,
Chunks: snapshot.Chunks,
Hash: snapshot.Hash,
Metadata: snapshot.Metadata,
},
AppHash: snapshot.trustedAppHash,
})
if err != nil {
return fmt.Errorf("failed to offer snapshot: %w", err)
}
switch resp.Result {
case abci.ResponseOfferSnapshot_accept:
s.logger.Info("Snapshot accepted, restoring", "height", snapshot.Height,
"format", snapshot.Format, "hash", fmt.Sprintf("%X", snapshot.Hash))
return nil
case abci.ResponseOfferSnapshot_abort:
return errAbort
case abci.ResponseOfferSnapshot_reject:
return errRejectSnapshot
case abci.ResponseOfferSnapshot_reject_format:
return errRejectFormat
case abci.ResponseOfferSnapshot_reject_sender:
return errRejectSender
default:
return fmt.Errorf("invalid ResponseOfferSnapshot result %v", resp.Result)
}
}
// applyChunks applies chunks to the app. It returns various errors depending on the app's
// response, or nil once the snapshot is fully restored.
func (s *syncer) applyChunks(chunks *chunkQueue) error {
for {
chunk, err := chunks.Next()
if err == errDone {
return nil
} else if err != nil {
return fmt.Errorf("failed to fetch chunk: %w", err)
}
resp, err := s.conn.ApplySnapshotChunkSync(abci.RequestApplySnapshotChunk{
Index: chunk.Index,
Chunk: chunk.Chunk,
Sender: string(chunk.Sender),
})
if err != nil {
return fmt.Errorf("failed to apply chunk %v: %w", chunk.Index, err)
}
s.logger.Info("Applied snapshot chunk to ABCI app", "height", chunk.Height,
"format", chunk.Format, "chunk", chunk.Index, "total", chunks.Size())
// Discard and refetch any chunks as requested by the app
for _, index := range resp.RefetchChunks {
err := chunks.Discard(index)
if err != nil {
return fmt.Errorf("failed to discard chunk %v: %w", index, err)
}
}
// Reject any senders as requested by the app
for _, sender := range resp.RejectSenders {
if sender != "" {
s.snapshots.RejectPeer(p2p.ID(sender))
err := chunks.DiscardSender(p2p.ID(sender))
if err != nil {
return fmt.Errorf("failed to reject sender: %w", err)
}
}
}
switch resp.Result {
case abci.ResponseApplySnapshotChunk_accept:
case abci.ResponseApplySnapshotChunk_abort:
return errAbort
case abci.ResponseApplySnapshotChunk_retry:
chunks.Retry(chunk.Index)
case abci.ResponseApplySnapshotChunk_retry_snapshot:
return errRetrySnapshot
case abci.ResponseApplySnapshotChunk_reject_snapshot:
return errRejectSnapshot
default:
return fmt.Errorf("unknown ResponseApplySnapshotChunk result %v", resp.Result)
}
}
}
// fetchChunks requests chunks from peers, receiving allocations from the chunk queue. Chunks
// will be received from the reactor via syncer.AddChunks() to chunkQueue.Add().
func (s *syncer) fetchChunks(ctx context.Context, snapshot *snapshot, chunks *chunkQueue) {
for {
index, err := chunks.Allocate()
if err == errDone {
// Keep checking until the context is cancelled (restore is done), in case any
// chunks need to be refetched.
select {
case <-ctx.Done():
return
default:
}
time.Sleep(2 * time.Second)
continue
}
if err != nil {
s.logger.Error("Failed to allocate chunk from queue", "err", err)
return
}
s.logger.Info("Fetching snapshot chunk", "height", snapshot.Height,
"format", snapshot.Format, "chunk", index, "total", chunks.Size())
ticker := time.NewTicker(chunkRequestTimeout)
defer ticker.Stop()
s.requestChunk(snapshot, index)
select {
case <-chunks.WaitFor(index):
case <-ticker.C:
s.requestChunk(snapshot, index)
case <-ctx.Done():
return
}
ticker.Stop()
}
}
// requestChunk requests a chunk from a peer.
func (s *syncer) requestChunk(snapshot *snapshot, chunk uint32) {
peer := s.snapshots.GetPeer(snapshot)
if peer == nil {
s.logger.Error("No valid peers found for snapshot", "height", snapshot.Height,
"format", snapshot.Format, "hash", snapshot.Hash)
return
}
s.logger.Debug("Requesting snapshot chunk", "height", snapshot.Height,
"format", snapshot.Format, "chunk", chunk, "peer", peer.ID())
peer.Send(ChunkChannel, cdc.MustMarshalBinaryBare(&chunkRequestMessage{
Height: snapshot.Height,
Format: snapshot.Format,
Index: chunk,
}))
}
// verifyApp verifies the sync, checking the app hash and last block height. It returns the
// app version, which should be returned as part of the initial state.
func (s *syncer) verifyApp(snapshot *snapshot) (uint64, error) {
resp, err := s.connQuery.InfoSync(proxy.RequestInfo)
if err != nil {
return 0, fmt.Errorf("failed to query ABCI app for appHash: %w", err)
}
if !bytes.Equal(snapshot.trustedAppHash, resp.LastBlockAppHash) {
s.logger.Error("appHash verification failed",
"expected", fmt.Sprintf("%X", snapshot.trustedAppHash),
"actual", fmt.Sprintf("%X", resp.LastBlockAppHash))
return 0, errVerifyFailed
}
if uint64(resp.LastBlockHeight) != snapshot.Height {
s.logger.Error("ABCI app reported unexpected last block height",
"expected", snapshot.Height, "actual", resp.LastBlockHeight)
return 0, errVerifyFailed
}
s.logger.Info("Verified ABCI app", "height", snapshot.Height,
"appHash", fmt.Sprintf("%X", snapshot.trustedAppHash))
return resp.AppVersion, nil
}

639
statesync/syncer_test.go Normal file
View File

@@ -0,0 +1,639 @@
package statesync
import (
"errors"
"sync"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
"github.com/stretchr/testify/require"
abci "github.com/tendermint/tendermint/abci/types"
"github.com/tendermint/tendermint/libs/log"
"github.com/tendermint/tendermint/p2p"
p2pmocks "github.com/tendermint/tendermint/p2p/mocks"
"github.com/tendermint/tendermint/proxy"
proxymocks "github.com/tendermint/tendermint/proxy/mocks"
sm "github.com/tendermint/tendermint/state"
"github.com/tendermint/tendermint/statesync/mocks"
"github.com/tendermint/tendermint/types"
"github.com/tendermint/tendermint/version"
)
// Sets up a basic syncer that can be used to test OfferSnapshot requests
func setupOfferSyncer(t *testing.T) (*syncer, *proxymocks.AppConnSnapshot) {
connQuery := &proxymocks.AppConnQuery{}
connSnapshot := &proxymocks.AppConnSnapshot{}
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
syncer := newSyncer(log.NewNopLogger(), connSnapshot, connQuery, stateProvider, "")
return syncer, connSnapshot
}
// Sets up a simple peer mock with an ID
func simplePeer(id string) *p2pmocks.Peer {
peer := &p2pmocks.Peer{}
peer.On("ID").Return(p2p.ID(id))
return peer
}
func TestSyncer_SyncAny(t *testing.T) {
state := sm.State{
ChainID: "chain",
Version: sm.Version{
Consensus: version.Consensus{
Block: version.BlockProtocol,
App: 0,
},
Software: version.TMCoreSemVer,
},
LastBlockHeight: 1,
LastBlockID: types.BlockID{Hash: []byte("blockhash")},
LastBlockTime: time.Now(),
LastResultsHash: []byte("last_results_hash"),
AppHash: []byte("app_hash"),
LastValidators: &types.ValidatorSet{Proposer: &types.Validator{Address: []byte("val1")}},
Validators: &types.ValidatorSet{Proposer: &types.Validator{Address: []byte("val2")}},
NextValidators: &types.ValidatorSet{Proposer: &types.Validator{Address: []byte("val3")}},
ConsensusParams: *types.DefaultConsensusParams(),
LastHeightConsensusParamsChanged: 1,
}
commit := &types.Commit{BlockID: types.BlockID{Hash: []byte("blockhash")}}
chunks := []*chunk{
{Height: 1, Format: 1, Index: 0, Chunk: []byte{1, 1, 0}},
{Height: 1, Format: 1, Index: 1, Chunk: []byte{1, 1, 1}},
{Height: 1, Format: 1, Index: 2, Chunk: []byte{1, 1, 2}},
}
s := &snapshot{Height: 1, Format: 1, Chunks: 3, Hash: []byte{1, 2, 3}}
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", uint64(1)).Return(state.AppHash, nil)
stateProvider.On("AppHash", uint64(2)).Return([]byte("app_hash_2"), nil)
stateProvider.On("Commit", uint64(1)).Return(commit, nil)
stateProvider.On("State", uint64(1)).Return(state, nil)
connSnapshot := &proxymocks.AppConnSnapshot{}
connQuery := &proxymocks.AppConnQuery{}
syncer := newSyncer(log.NewNopLogger(), connSnapshot, connQuery, stateProvider, "")
// Adding a chunk should error when no sync is in progress
_, err := syncer.AddChunk(&chunk{Height: 1, Format: 1, Index: 0, Chunk: []byte{1}})
require.Error(t, err)
// Adding a couple of peers should trigger snapshot discovery messages
peerA := &p2pmocks.Peer{}
peerA.On("ID").Return(p2p.ID("a"))
peerA.On("Send", SnapshotChannel, cdc.MustMarshalBinaryBare(&snapshotsRequestMessage{})).Return(true)
syncer.AddPeer(peerA)
peerA.AssertExpectations(t)
peerB := &p2pmocks.Peer{}
peerB.On("ID").Return(p2p.ID("b"))
peerB.On("Send", SnapshotChannel, cdc.MustMarshalBinaryBare(&snapshotsRequestMessage{})).Return(true)
syncer.AddPeer(peerB)
peerB.AssertExpectations(t)
// Both peers report back with snapshots. One of them also returns a snapshot we don't want, in
// format 2, which will be rejected by the ABCI application.
new, err := syncer.AddSnapshot(peerA, s)
require.NoError(t, err)
assert.True(t, new)
new, err = syncer.AddSnapshot(peerB, s)
require.NoError(t, err)
assert.False(t, new)
new, err = syncer.AddSnapshot(peerB, &snapshot{Height: 2, Format: 2, Chunks: 3, Hash: []byte{1}})
require.NoError(t, err)
assert.True(t, new)
// We start a sync, with peers sending back chunks when requested. We first reject the snapshot
// with height 2 format 2, and accept the snapshot at height 1.
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: &abci.Snapshot{
Height: 2,
Format: 2,
Chunks: 3,
Hash: []byte{1},
},
AppHash: []byte("app_hash_2"),
}).Return(&abci.ResponseOfferSnapshot{Result: abci.ResponseOfferSnapshot_reject_format}, nil)
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: &abci.Snapshot{
Height: s.Height,
Format: s.Format,
Chunks: s.Chunks,
Hash: s.Hash,
Metadata: s.Metadata,
},
AppHash: []byte("app_hash"),
}).Times(2).Return(&abci.ResponseOfferSnapshot{Result: abci.ResponseOfferSnapshot_accept}, nil)
chunkRequests := make(map[uint32]int)
chunkRequestsMtx := sync.Mutex{}
onChunkRequest := func(args mock.Arguments) {
msg := &chunkRequestMessage{}
err := cdc.UnmarshalBinaryBare(args[1].([]byte), &msg)
require.NoError(t, err)
require.EqualValues(t, 1, msg.Height)
require.EqualValues(t, 1, msg.Format)
require.LessOrEqual(t, msg.Index, uint32(len(chunks)))
added, err := syncer.AddChunk(chunks[msg.Index])
require.NoError(t, err)
assert.True(t, added)
chunkRequestsMtx.Lock()
chunkRequests[msg.Index]++
chunkRequestsMtx.Unlock()
}
peerA.On("Send", ChunkChannel, mock.Anything).Maybe().Run(onChunkRequest).Return(true)
peerB.On("Send", ChunkChannel, mock.Anything).Maybe().Run(onChunkRequest).Return(true)
// The first time we're applying chunk 2 we tell it to retry the snapshot and discard chunk 1,
// which should cause it to keep the existing chunk 0 and 2, and restart restoration from
// beginning. We also wait for a little while, to exercise the retry logic in fetchChunks().
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 2, Chunk: []byte{1, 1, 2},
}).Once().Run(func(args mock.Arguments) { time.Sleep(2 * time.Second) }).Return(
&abci.ResponseApplySnapshotChunk{
Result: abci.ResponseApplySnapshotChunk_retry_snapshot,
RefetchChunks: []uint32{1},
}, nil)
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 0, Chunk: []byte{1, 1, 0},
}).Times(2).Return(&abci.ResponseApplySnapshotChunk{Result: abci.ResponseApplySnapshotChunk_accept}, nil)
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 1, Chunk: []byte{1, 1, 1},
}).Times(2).Return(&abci.ResponseApplySnapshotChunk{Result: abci.ResponseApplySnapshotChunk_accept}, nil)
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 2, Chunk: []byte{1, 1, 2},
}).Once().Return(&abci.ResponseApplySnapshotChunk{Result: abci.ResponseApplySnapshotChunk_accept}, nil)
connQuery.On("InfoSync", proxy.RequestInfo).Return(&abci.ResponseInfo{
AppVersion: 9,
LastBlockHeight: 1,
LastBlockAppHash: []byte("app_hash"),
}, nil)
newState, lastCommit, err := syncer.SyncAny(0)
require.NoError(t, err)
chunkRequestsMtx.Lock()
assert.Equal(t, map[uint32]int{0: 1, 1: 2, 2: 1}, chunkRequests)
chunkRequestsMtx.Unlock()
// The syncer should have updated the state app version from the ABCI info response.
expectState := state
expectState.Version.Consensus.App = 9
assert.Equal(t, expectState, newState)
assert.Equal(t, commit, lastCommit)
connSnapshot.AssertExpectations(t)
connQuery.AssertExpectations(t)
peerA.AssertExpectations(t)
peerB.AssertExpectations(t)
}
func TestSyncer_SyncAny_noSnapshots(t *testing.T) {
syncer, _ := setupOfferSyncer(t)
_, _, err := syncer.SyncAny(0)
assert.Equal(t, errNoSnapshots, err)
}
func TestSyncer_SyncAny_abort(t *testing.T) {
syncer, connSnapshot := setupOfferSyncer(t)
s := &snapshot{Height: 1, Format: 1, Chunks: 3, Hash: []byte{1, 2, 3}}
syncer.AddSnapshot(simplePeer("id"), s)
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: toABCI(s), AppHash: []byte("app_hash"),
}).Once().Return(&abci.ResponseOfferSnapshot{Result: abci.ResponseOfferSnapshot_abort}, nil)
_, _, err := syncer.SyncAny(0)
assert.Equal(t, errAbort, err)
connSnapshot.AssertExpectations(t)
}
func TestSyncer_SyncAny_reject(t *testing.T) {
syncer, connSnapshot := setupOfferSyncer(t)
// s22 is tried first, then s12, then s11, then errNoSnapshots
s22 := &snapshot{Height: 2, Format: 2, Chunks: 3, Hash: []byte{1, 2, 3}}
s12 := &snapshot{Height: 1, Format: 2, Chunks: 3, Hash: []byte{1, 2, 3}}
s11 := &snapshot{Height: 1, Format: 1, Chunks: 3, Hash: []byte{1, 2, 3}}
syncer.AddSnapshot(simplePeer("id"), s22)
syncer.AddSnapshot(simplePeer("id"), s12)
syncer.AddSnapshot(simplePeer("id"), s11)
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: toABCI(s22), AppHash: []byte("app_hash"),
}).Once().Return(&abci.ResponseOfferSnapshot{Result: abci.ResponseOfferSnapshot_reject}, nil)
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: toABCI(s12), AppHash: []byte("app_hash"),
}).Once().Return(&abci.ResponseOfferSnapshot{Result: abci.ResponseOfferSnapshot_reject}, nil)
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: toABCI(s11), AppHash: []byte("app_hash"),
}).Once().Return(&abci.ResponseOfferSnapshot{Result: abci.ResponseOfferSnapshot_reject}, nil)
_, _, err := syncer.SyncAny(0)
assert.Equal(t, errNoSnapshots, err)
connSnapshot.AssertExpectations(t)
}
func TestSyncer_SyncAny_reject_format(t *testing.T) {
syncer, connSnapshot := setupOfferSyncer(t)
// s22 is tried first, which reject s22 and s12, then s11 will abort.
s22 := &snapshot{Height: 2, Format: 2, Chunks: 3, Hash: []byte{1, 2, 3}}
s12 := &snapshot{Height: 1, Format: 2, Chunks: 3, Hash: []byte{1, 2, 3}}
s11 := &snapshot{Height: 1, Format: 1, Chunks: 3, Hash: []byte{1, 2, 3}}
syncer.AddSnapshot(simplePeer("id"), s22)
syncer.AddSnapshot(simplePeer("id"), s12)
syncer.AddSnapshot(simplePeer("id"), s11)
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: toABCI(s22), AppHash: []byte("app_hash"),
}).Once().Return(&abci.ResponseOfferSnapshot{Result: abci.ResponseOfferSnapshot_reject_format}, nil)
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: toABCI(s11), AppHash: []byte("app_hash"),
}).Once().Return(&abci.ResponseOfferSnapshot{Result: abci.ResponseOfferSnapshot_abort}, nil)
_, _, err := syncer.SyncAny(0)
assert.Equal(t, errAbort, err)
connSnapshot.AssertExpectations(t)
}
func TestSyncer_SyncAny_reject_sender(t *testing.T) {
syncer, connSnapshot := setupOfferSyncer(t)
peerA := simplePeer("a")
peerB := simplePeer("b")
peerC := simplePeer("c")
// sbc will be offered first, which will be rejected with reject_sender, causing all snapshots
// submitted by both b and c (i.e. sb, sc, sbc) to be rejected. Finally, sa will reject and
// errNoSnapshots is returned.
sa := &snapshot{Height: 1, Format: 1, Chunks: 3, Hash: []byte{1, 2, 3}}
sb := &snapshot{Height: 2, Format: 1, Chunks: 3, Hash: []byte{1, 2, 3}}
sc := &snapshot{Height: 3, Format: 1, Chunks: 3, Hash: []byte{1, 2, 3}}
sbc := &snapshot{Height: 4, Format: 1, Chunks: 3, Hash: []byte{1, 2, 3}}
_, err := syncer.AddSnapshot(peerA, sa)
require.NoError(t, err)
_, err = syncer.AddSnapshot(peerB, sb)
require.NoError(t, err)
_, err = syncer.AddSnapshot(peerC, sc)
require.NoError(t, err)
_, err = syncer.AddSnapshot(peerB, sbc)
require.NoError(t, err)
_, err = syncer.AddSnapshot(peerC, sbc)
require.NoError(t, err)
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: toABCI(sbc), AppHash: []byte("app_hash"),
}).Once().Return(&abci.ResponseOfferSnapshot{Result: abci.ResponseOfferSnapshot_reject_sender}, nil)
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: toABCI(sa), AppHash: []byte("app_hash"),
}).Once().Return(&abci.ResponseOfferSnapshot{Result: abci.ResponseOfferSnapshot_reject}, nil)
_, _, err = syncer.SyncAny(0)
assert.Equal(t, errNoSnapshots, err)
connSnapshot.AssertExpectations(t)
}
func TestSyncer_SyncAny_abciError(t *testing.T) {
syncer, connSnapshot := setupOfferSyncer(t)
errBoom := errors.New("boom")
s := &snapshot{Height: 1, Format: 1, Chunks: 3, Hash: []byte{1, 2, 3}}
syncer.AddSnapshot(simplePeer("id"), s)
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: toABCI(s), AppHash: []byte("app_hash"),
}).Once().Return(nil, errBoom)
_, _, err := syncer.SyncAny(0)
assert.True(t, errors.Is(err, errBoom))
connSnapshot.AssertExpectations(t)
}
func TestSyncer_offerSnapshot(t *testing.T) {
unknownErr := errors.New("unknown error")
boom := errors.New("boom")
testcases := map[string]struct {
result abci.ResponseOfferSnapshot_Result
err error
expectErr error
}{
"accept": {abci.ResponseOfferSnapshot_accept, nil, nil},
"abort": {abci.ResponseOfferSnapshot_abort, nil, errAbort},
"reject": {abci.ResponseOfferSnapshot_reject, nil, errRejectSnapshot},
"reject_format": {abci.ResponseOfferSnapshot_reject_format, nil, errRejectFormat},
"reject_sender": {abci.ResponseOfferSnapshot_reject_sender, nil, errRejectSender},
"error": {0, boom, boom},
"unknown result": {9, nil, unknownErr},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
syncer, connSnapshot := setupOfferSyncer(t)
s := &snapshot{Height: 1, Format: 1, Chunks: 3, Hash: []byte{1, 2, 3}, trustedAppHash: []byte("app_hash")}
connSnapshot.On("OfferSnapshotSync", abci.RequestOfferSnapshot{
Snapshot: toABCI(s),
AppHash: []byte("app_hash"),
}).Return(&abci.ResponseOfferSnapshot{Result: tc.result}, tc.err)
err := syncer.offerSnapshot(s)
if tc.expectErr == unknownErr {
require.Error(t, err)
} else {
unwrapped := errors.Unwrap(err)
if unwrapped != nil {
err = unwrapped
}
assert.Equal(t, tc.expectErr, err)
}
})
}
}
func TestSyncer_applyChunks_Results(t *testing.T) {
unknownErr := errors.New("unknown error")
boom := errors.New("boom")
testcases := map[string]struct {
result abci.ResponseApplySnapshotChunk_Result
err error
expectErr error
}{
"accept": {abci.ResponseApplySnapshotChunk_accept, nil, nil},
"abort": {abci.ResponseApplySnapshotChunk_abort, nil, errAbort},
"retry": {abci.ResponseApplySnapshotChunk_retry, nil, nil},
"retry_snapshot": {abci.ResponseApplySnapshotChunk_retry_snapshot, nil, errRetrySnapshot},
"reject_snapshot": {abci.ResponseApplySnapshotChunk_reject_snapshot, nil, errRejectSnapshot},
"error": {0, boom, boom},
"unknown result": {9, nil, unknownErr},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
connQuery := &proxymocks.AppConnQuery{}
connSnapshot := &proxymocks.AppConnSnapshot{}
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
syncer := newSyncer(log.NewNopLogger(), connSnapshot, connQuery, stateProvider, "")
body := []byte{1, 2, 3}
chunks, err := newChunkQueue(&snapshot{Height: 1, Format: 1, Chunks: 1}, "")
chunks.Add(&chunk{Height: 1, Format: 1, Index: 0, Chunk: body})
require.NoError(t, err)
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 0, Chunk: body,
}).Once().Return(&abci.ResponseApplySnapshotChunk{Result: tc.result}, tc.err)
if tc.result == abci.ResponseApplySnapshotChunk_retry {
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 0, Chunk: body,
}).Once().Return(&abci.ResponseApplySnapshotChunk{
Result: abci.ResponseApplySnapshotChunk_accept}, nil)
}
err = syncer.applyChunks(chunks)
if tc.expectErr == unknownErr {
require.Error(t, err)
} else {
unwrapped := errors.Unwrap(err)
if unwrapped != nil {
err = unwrapped
}
assert.Equal(t, tc.expectErr, err)
}
connSnapshot.AssertExpectations(t)
})
}
}
func TestSyncer_applyChunks_RefetchChunks(t *testing.T) {
// Discarding chunks via refetch_chunks should work the same for all results
testcases := map[string]struct {
result abci.ResponseApplySnapshotChunk_Result
}{
"accept": {abci.ResponseApplySnapshotChunk_accept},
"abort": {abci.ResponseApplySnapshotChunk_abort},
"retry": {abci.ResponseApplySnapshotChunk_retry},
"retry_snapshot": {abci.ResponseApplySnapshotChunk_retry_snapshot},
"reject_snapshot": {abci.ResponseApplySnapshotChunk_reject_snapshot},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
connQuery := &proxymocks.AppConnQuery{}
connSnapshot := &proxymocks.AppConnSnapshot{}
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
syncer := newSyncer(log.NewNopLogger(), connSnapshot, connQuery, stateProvider, "")
chunks, err := newChunkQueue(&snapshot{Height: 1, Format: 1, Chunks: 3}, "")
require.NoError(t, err)
added, err := chunks.Add(&chunk{Height: 1, Format: 1, Index: 0, Chunk: []byte{0}})
require.True(t, added)
require.NoError(t, err)
added, err = chunks.Add(&chunk{Height: 1, Format: 1, Index: 1, Chunk: []byte{1}})
require.True(t, added)
require.NoError(t, err)
added, err = chunks.Add(&chunk{Height: 1, Format: 1, Index: 2, Chunk: []byte{2}})
require.True(t, added)
require.NoError(t, err)
// The first two chunks are accepted, before the last one asks for 1 to be refetched
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 0, Chunk: []byte{0},
}).Once().Return(&abci.ResponseApplySnapshotChunk{Result: abci.ResponseApplySnapshotChunk_accept}, nil)
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 1, Chunk: []byte{1},
}).Once().Return(&abci.ResponseApplySnapshotChunk{Result: abci.ResponseApplySnapshotChunk_accept}, nil)
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 2, Chunk: []byte{2},
}).Once().Return(&abci.ResponseApplySnapshotChunk{
Result: tc.result,
RefetchChunks: []uint32{1},
}, nil)
// Since removing the chunk will cause Next() to block, we spawn a goroutine, then
// check the queue contents, and finally close the queue to end the goroutine.
// We don't really care about the result of applyChunks, since it has separate test.
go func() {
syncer.applyChunks(chunks)
}()
time.Sleep(50 * time.Millisecond)
assert.True(t, chunks.Has(0))
assert.False(t, chunks.Has(1))
assert.True(t, chunks.Has(2))
err = chunks.Close()
require.NoError(t, err)
})
}
}
func TestSyncer_applyChunks_RejectSenders(t *testing.T) {
// Banning chunks senders via ban_chunk_senders should work the same for all results
testcases := map[string]struct {
result abci.ResponseApplySnapshotChunk_Result
}{
"accept": {abci.ResponseApplySnapshotChunk_accept},
"abort": {abci.ResponseApplySnapshotChunk_abort},
"retry": {abci.ResponseApplySnapshotChunk_retry},
"retry_snapshot": {abci.ResponseApplySnapshotChunk_retry_snapshot},
"reject_snapshot": {abci.ResponseApplySnapshotChunk_reject_snapshot},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
connQuery := &proxymocks.AppConnQuery{}
connSnapshot := &proxymocks.AppConnSnapshot{}
stateProvider := &mocks.StateProvider{}
stateProvider.On("AppHash", mock.Anything).Return([]byte("app_hash"), nil)
syncer := newSyncer(log.NewNopLogger(), connSnapshot, connQuery, stateProvider, "")
// Set up three peers across two snapshots, and ask for one of them to be banned.
// It should be banned from all snapshots.
peerA := simplePeer("a")
peerB := simplePeer("b")
peerC := simplePeer("c")
s1 := &snapshot{Height: 1, Format: 1, Chunks: 3}
s2 := &snapshot{Height: 2, Format: 1, Chunks: 3}
syncer.AddSnapshot(peerA, s1)
syncer.AddSnapshot(peerA, s2)
syncer.AddSnapshot(peerB, s1)
syncer.AddSnapshot(peerB, s2)
syncer.AddSnapshot(peerC, s1)
syncer.AddSnapshot(peerC, s2)
chunks, err := newChunkQueue(s1, "")
require.NoError(t, err)
added, err := chunks.Add(&chunk{Height: 1, Format: 1, Index: 0, Chunk: []byte{0}, Sender: peerA.ID()})
require.True(t, added)
require.NoError(t, err)
added, err = chunks.Add(&chunk{Height: 1, Format: 1, Index: 1, Chunk: []byte{1}, Sender: peerB.ID()})
require.True(t, added)
require.NoError(t, err)
added, err = chunks.Add(&chunk{Height: 1, Format: 1, Index: 2, Chunk: []byte{2}, Sender: peerC.ID()})
require.True(t, added)
require.NoError(t, err)
// The first two chunks are accepted, before the last one asks for b sender to be rejected
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 0, Chunk: []byte{0}, Sender: "a",
}).Once().Return(&abci.ResponseApplySnapshotChunk{Result: abci.ResponseApplySnapshotChunk_accept}, nil)
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 1, Chunk: []byte{1}, Sender: "b",
}).Once().Return(&abci.ResponseApplySnapshotChunk{Result: abci.ResponseApplySnapshotChunk_accept}, nil)
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 2, Chunk: []byte{2}, Sender: "c",
}).Once().Return(&abci.ResponseApplySnapshotChunk{
Result: tc.result,
RejectSenders: []string{string(peerB.ID())},
}, nil)
// On retry, the last chunk will be tried again, so we just accept it then.
if tc.result == abci.ResponseApplySnapshotChunk_retry {
connSnapshot.On("ApplySnapshotChunkSync", abci.RequestApplySnapshotChunk{
Index: 2, Chunk: []byte{2}, Sender: "c",
}).Once().Return(&abci.ResponseApplySnapshotChunk{Result: abci.ResponseApplySnapshotChunk_accept}, nil)
}
// We don't really care about the result of applyChunks, since it has separate test.
// However, it will block on e.g. retry result, so we spawn a goroutine that will
// be shut down when the chunk queue closes.
go func() {
syncer.applyChunks(chunks)
}()
time.Sleep(50 * time.Millisecond)
s1peers := syncer.snapshots.GetPeers(s1)
assert.Len(t, s1peers, 2)
assert.EqualValues(t, "a", s1peers[0].ID())
assert.EqualValues(t, "c", s1peers[1].ID())
syncer.snapshots.GetPeers(s1)
assert.Len(t, s1peers, 2)
assert.EqualValues(t, "a", s1peers[0].ID())
assert.EqualValues(t, "c", s1peers[1].ID())
err = chunks.Close()
require.NoError(t, err)
})
}
}
func TestSyncer_verifyApp(t *testing.T) {
boom := errors.New("boom")
s := &snapshot{Height: 3, Format: 1, Chunks: 5, Hash: []byte{1, 2, 3}, trustedAppHash: []byte("app_hash")}
testcases := map[string]struct {
response *abci.ResponseInfo
err error
expectErr error
}{
"verified": {&abci.ResponseInfo{
LastBlockHeight: 3,
LastBlockAppHash: []byte("app_hash"),
AppVersion: 9,
}, nil, nil},
"invalid height": {&abci.ResponseInfo{
LastBlockHeight: 5,
LastBlockAppHash: []byte("app_hash"),
AppVersion: 9,
}, nil, errVerifyFailed},
"invalid hash": {&abci.ResponseInfo{
LastBlockHeight: 3,
LastBlockAppHash: []byte("xxx"),
AppVersion: 9,
}, nil, errVerifyFailed},
"error": {nil, boom, boom},
}
for name, tc := range testcases {
tc := tc
t.Run(name, func(t *testing.T) {
connQuery := &proxymocks.AppConnQuery{}
connSnapshot := &proxymocks.AppConnSnapshot{}
stateProvider := &mocks.StateProvider{}
syncer := newSyncer(log.NewNopLogger(), connSnapshot, connQuery, stateProvider, "")
connQuery.On("InfoSync", proxy.RequestInfo).Return(tc.response, tc.err)
version, err := syncer.verifyApp(s)
unwrapped := errors.Unwrap(err)
if unwrapped != nil {
err = unwrapped
}
assert.Equal(t, tc.expectErr, err)
if err == nil {
assert.Equal(t, tc.response.AppVersion, version)
}
})
}
}
func toABCI(s *snapshot) *abci.Snapshot {
return &abci.Snapshot{
Height: s.Height,
Format: s.Format,
Chunks: s.Chunks,
Hash: s.Hash,
Metadata: s.Metadata,
}
}

View File

@@ -333,6 +333,12 @@ func (bs *BlockStore) saveState() {
bsJSON.Save(bs.db)
}
// SaveSeenCommit saves a seen commit, used by e.g. the state sync reactor when bootstrapping node.
func (bs *BlockStore) SaveSeenCommit(height int64, seenCommit *types.Commit) error {
seenCommitBytes := cdc.MustMarshalBinaryBare(seenCommit)
return bs.db.Set(calcSeenCommitKey(height), seenCommitBytes)
}
//-----------------------------------------------------------------------------
func calcBlockMetaKey(height int64) []byte {

View File

@@ -23,7 +23,7 @@ const (
TMCoreSemVer = "0.33.4"
// ABCISemVer is the semantic version of the ABCI library
ABCISemVer = "0.16.2"
ABCISemVer = "0.17.0"
ABCIVersion = ABCISemVer
)