mirror of
https://github.com/tendermint/tendermint.git
synced 2026-01-09 22:47:24 +00:00
light: correctly handle contexts (backport -> v0.34.x) (#6685)
This commit is contained in:
@@ -24,3 +24,6 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi
|
||||
|
||||
### BUG FIXES
|
||||
|
||||
- [light] [\#6685](https://github.com/tendermint/tendermint/pull/6685) fix bug
|
||||
with incorrecly handling contexts which occasionally froze state sync.
|
||||
|
||||
|
||||
@@ -996,6 +996,9 @@ func (c *Client) lightBlockFromPrimary(ctx context.Context, height int64) (*type
|
||||
// Everything went smoothly. We reset the lightBlockRequests and return the light block
|
||||
return l, nil
|
||||
|
||||
case context.Canceled, context.DeadlineExceeded:
|
||||
return l, err
|
||||
|
||||
case provider.ErrNoResponse, provider.ErrLightBlockNotFound, provider.ErrHeightTooHigh:
|
||||
// we find a new witness to replace the primary
|
||||
c.logger.Debug("error from light block request from primary, replacing...",
|
||||
|
||||
@@ -18,8 +18,9 @@ var (
|
||||
// This is very brittle, see: https://github.com/tendermint/tendermint/issues/4740
|
||||
regexpMissingHeight = regexp.MustCompile(`height \d+ is not available`)
|
||||
regexpTooHigh = regexp.MustCompile(`height \d+ must be less than or equal to`)
|
||||
regexpTimedOut = regexp.MustCompile(`Timeout exceeded`)
|
||||
|
||||
maxRetryAttempts = 10
|
||||
maxRetryAttempts = 5
|
||||
timeout uint = 5 // sec.
|
||||
)
|
||||
|
||||
@@ -119,45 +120,51 @@ func (p *http) validatorSet(ctx context.Context, height *int64) (*types.Validato
|
||||
total = -1
|
||||
)
|
||||
|
||||
OUTER_LOOP:
|
||||
for len(vals) != total && page <= maxPages {
|
||||
for attempt := 1; attempt <= maxRetryAttempts; attempt++ {
|
||||
res, err := p.client.Validators(ctx, height, &page, &perPage)
|
||||
if err != nil {
|
||||
// TODO: standardize errors on the RPC side
|
||||
if regexpTooHigh.MatchString(err.Error()) {
|
||||
return nil, provider.ErrHeightTooHigh
|
||||
switch {
|
||||
case err == nil:
|
||||
// Validate response.
|
||||
if len(res.Validators) == 0 {
|
||||
return nil, provider.ErrBadLightBlock{
|
||||
Reason: fmt.Errorf("validator set is empty (height: %d, page: %d, per_page: %d)",
|
||||
height, page, perPage),
|
||||
}
|
||||
}
|
||||
if res.Total <= 0 {
|
||||
return nil, provider.ErrBadLightBlock{
|
||||
Reason: fmt.Errorf("total number of vals is <= 0: %d (height: %d, page: %d, per_page: %d)",
|
||||
res.Total, height, page, perPage),
|
||||
}
|
||||
}
|
||||
|
||||
if regexpMissingHeight.MatchString(err.Error()) {
|
||||
return nil, provider.ErrLightBlockNotFound
|
||||
}
|
||||
// if we have exceeded retry attempts then return no response error
|
||||
if attempt == maxRetryAttempts {
|
||||
return nil, provider.ErrNoResponse
|
||||
}
|
||||
// else we wait and try again with exponential backoff
|
||||
total = res.Total
|
||||
vals = append(vals, res.Validators...)
|
||||
page++
|
||||
continue OUTER_LOOP
|
||||
|
||||
case regexpTooHigh.MatchString(err.Error()):
|
||||
return nil, provider.ErrHeightTooHigh
|
||||
|
||||
case regexpMissingHeight.MatchString(err.Error()):
|
||||
return nil, provider.ErrLightBlockNotFound
|
||||
|
||||
// if we have exceeded retry attempts then return no response error
|
||||
case attempt == maxRetryAttempts:
|
||||
return nil, provider.ErrNoResponse
|
||||
|
||||
case regexpTimedOut.MatchString(err.Error()):
|
||||
// we wait and try again with exponential backoff
|
||||
time.Sleep(backoffTimeout(uint16(attempt)))
|
||||
continue
|
||||
|
||||
// context canceled or connection refused we return the error
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Validate response.
|
||||
if len(res.Validators) == 0 {
|
||||
return nil, provider.ErrBadLightBlock{
|
||||
Reason: fmt.Errorf("validator set is empty (height: %d, page: %d, per_page: %d)",
|
||||
height, page, perPage),
|
||||
}
|
||||
}
|
||||
if res.Total <= 0 {
|
||||
return nil, provider.ErrBadLightBlock{
|
||||
Reason: fmt.Errorf("total number of vals is <= 0: %d (height: %d, page: %d, per_page: %d)",
|
||||
res.Total, height, page, perPage),
|
||||
}
|
||||
}
|
||||
|
||||
total = res.Total
|
||||
vals = append(vals, res.Validators...)
|
||||
page++
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
@@ -171,20 +178,25 @@ func (p *http) validatorSet(ctx context.Context, height *int64) (*types.Validato
|
||||
func (p *http) signedHeader(ctx context.Context, height *int64) (*types.SignedHeader, error) {
|
||||
for attempt := 1; attempt <= maxRetryAttempts; attempt++ {
|
||||
commit, err := p.client.Commit(ctx, height)
|
||||
if err != nil {
|
||||
// TODO: standardize errors on the RPC side
|
||||
if regexpTooHigh.MatchString(err.Error()) {
|
||||
return nil, provider.ErrHeightTooHigh
|
||||
}
|
||||
switch {
|
||||
case err == nil:
|
||||
return &commit.SignedHeader, nil
|
||||
|
||||
if regexpMissingHeight.MatchString(err.Error()) {
|
||||
return nil, provider.ErrLightBlockNotFound
|
||||
}
|
||||
case regexpTooHigh.MatchString(err.Error()):
|
||||
return nil, provider.ErrHeightTooHigh
|
||||
|
||||
case regexpMissingHeight.MatchString(err.Error()):
|
||||
return nil, provider.ErrLightBlockNotFound
|
||||
|
||||
case regexpTimedOut.MatchString(err.Error()):
|
||||
// we wait and try again with exponential backoff
|
||||
time.Sleep(backoffTimeout(uint16(attempt)))
|
||||
continue
|
||||
|
||||
// either context was cancelled or connection refused.
|
||||
default:
|
||||
return nil, err
|
||||
}
|
||||
return &commit.SignedHeader, nil
|
||||
}
|
||||
return nil, provider.ErrNoResponse
|
||||
}
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@@ -32,27 +33,17 @@ func TestNewProvider(t *testing.T) {
|
||||
require.Equal(t, fmt.Sprintf("%s", c), "http{http://153.200.0.1}")
|
||||
}
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
func TestProvider(t *testing.T) {
|
||||
app := kvstore.NewApplication()
|
||||
app.RetainBlocks = 9
|
||||
app.RetainBlocks = 10
|
||||
node := rpctest.StartTendermint(app)
|
||||
|
||||
code := m.Run()
|
||||
|
||||
rpctest.StopTendermint(node)
|
||||
os.Exit(code)
|
||||
}
|
||||
|
||||
func TestProvider(t *testing.T) {
|
||||
cfg := rpctest.GetConfig()
|
||||
defer os.RemoveAll(cfg.RootDir)
|
||||
rpcAddr := cfg.RPC.ListenAddress
|
||||
genDoc, err := types.GenesisDocFromFile(cfg.GenesisFile())
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
require.NoError(t, err)
|
||||
chainID := genDoc.ChainID
|
||||
t.Log("chainID:", chainID)
|
||||
|
||||
c, err := rpchttp.New(rpcAddr, "/websocket")
|
||||
require.Nil(t, err)
|
||||
@@ -66,26 +57,37 @@ func TestProvider(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
// let's get the highest block
|
||||
sh, err := p.LightBlock(context.Background(), 0)
|
||||
lb, err := p.LightBlock(context.Background(), 0)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, sh.Height < 1000)
|
||||
require.NotNil(t, lb)
|
||||
assert.True(t, lb.Height < 1000)
|
||||
|
||||
// let's check this is valid somehow
|
||||
assert.Nil(t, sh.ValidateBasic(chainID))
|
||||
assert.Nil(t, lb.ValidateBasic(chainID))
|
||||
|
||||
// historical queries now work :)
|
||||
lower := sh.Height - 3
|
||||
sh, err = p.LightBlock(context.Background(), lower)
|
||||
lower := lb.Height - 3
|
||||
lb, err = p.LightBlock(context.Background(), lower)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, lower, sh.Height)
|
||||
assert.Equal(t, lower, lb.Height)
|
||||
|
||||
// fetching missing heights (both future and pruned) should return appropriate errors
|
||||
lb, err := p.LightBlock(context.Background(), 1000)
|
||||
lb, err = p.LightBlock(context.Background(), 1000)
|
||||
require.Error(t, err)
|
||||
require.Nil(t, lb)
|
||||
assert.Equal(t, provider.ErrHeightTooHigh, err)
|
||||
|
||||
_, err = p.LightBlock(context.Background(), 1)
|
||||
require.Error(t, err)
|
||||
require.Nil(t, lb)
|
||||
assert.Equal(t, provider.ErrLightBlockNotFound, err)
|
||||
|
||||
// stop the full node and check that a no response error is returned
|
||||
rpctest.StopTendermint(node)
|
||||
time.Sleep(10 * time.Second)
|
||||
lb, err = p.LightBlock(context.Background(), lower+2)
|
||||
// we should see a connection refused
|
||||
require.Error(t, err)
|
||||
require.Contains(t, err.Error(), "connection refused")
|
||||
require.Nil(t, lb)
|
||||
}
|
||||
|
||||
@@ -142,7 +142,7 @@ func StartTendermint(app abci.Application, opts ...func(*Options)) *nm.Node {
|
||||
// cleans up test/config files.
|
||||
func StopTendermint(node *nm.Node) {
|
||||
if err := node.Stop(); err != nil {
|
||||
node.Logger.Error("Error when tryint to stop node", "err", err)
|
||||
node.Logger.Error("Error when trying to stop node", "err", err)
|
||||
}
|
||||
node.Wait()
|
||||
os.RemoveAll(node.Config().RootDir)
|
||||
|
||||
@@ -154,6 +154,7 @@ func (r *Reactor) Receive(chID byte, src p2p.Peer, msgBytes []byte) {
|
||||
Hash: msg.Hash,
|
||||
Metadata: msg.Metadata,
|
||||
})
|
||||
// TODO: We may want to consider punishing the peer for certain errors
|
||||
if err != nil {
|
||||
r.Logger.Error("Failed to add snapshot", "height", msg.Height, "format", msg.Format,
|
||||
"peer", src.ID(), "err", err)
|
||||
|
||||
@@ -213,6 +213,10 @@ func (s *syncer) SyncAny(discoveryTime time.Duration, retryHook func()) (sm.Stat
|
||||
s.logger.Info("Snapshot sender rejected", "peer", peer.ID())
|
||||
}
|
||||
|
||||
case errors.Is(err, context.DeadlineExceeded):
|
||||
s.logger.Info("Timed out validating snapshot, rejecting", "height", snapshot.Height, "err", err)
|
||||
s.snapshots.Reject(snapshot)
|
||||
|
||||
default:
|
||||
return sm.State{}, nil, fmt.Errorf("snapshot restoration failed: %w", err)
|
||||
}
|
||||
@@ -256,7 +260,7 @@ func (s *syncer) Sync(snapshot *snapshot, chunks *chunkQueue) (sm.State, *types.
|
||||
go s.fetchChunks(ctx, snapshot, chunks)
|
||||
}
|
||||
|
||||
pctx, pcancel := context.WithTimeout(context.Background(), 10*time.Second)
|
||||
pctx, pcancel := context.WithTimeout(context.Background(), 15*time.Second)
|
||||
defer pcancel()
|
||||
|
||||
// Optimistically build new state, so we don't discover any light client failures at the end.
|
||||
|
||||
Reference in New Issue
Block a user