mirror of
https://github.com/tendermint/tendermint.git
synced 2026-02-10 05:50:19 +00:00
make some tweaks to error handling
This commit is contained in:
@@ -307,7 +307,7 @@ func (r *Reactor) Sync(ctx context.Context) (sm.State, error) {
|
||||
|
||||
err = r.Backfill(ctx, state)
|
||||
if err != nil {
|
||||
return sm.State{}, err
|
||||
r.Logger.Error("backfill failed. Proceeding optimistically...", "err", err)
|
||||
}
|
||||
|
||||
return state, nil
|
||||
|
||||
@@ -45,9 +45,6 @@ var (
|
||||
errTimeout = errors.New("timed out waiting for chunk")
|
||||
// errNoSnapshots is returned by SyncAny() if no snapshots are found and discovery is disabled.
|
||||
errNoSnapshots = errors.New("no suitable snapshots found")
|
||||
// errStateProviderTimeout is returned by Sync() when the timeout for retrieving
|
||||
// tendermint state or the commit is exceeded
|
||||
errStateProviderTimeout = errors.New("state provider timed out")
|
||||
)
|
||||
|
||||
// syncer runs a state sync against an ABCI app. Use either SyncAny() to automatically attempt to
|
||||
@@ -153,8 +150,6 @@ func (s *syncer) SyncAny(
|
||||
discoveryTime time.Duration,
|
||||
requestSnapshots func(),
|
||||
) (sm.State, *types.Commit, error) {
|
||||
s.logger.Info("in sync any")
|
||||
|
||||
if discoveryTime != 0 && discoveryTime < minimumDiscoveryTime {
|
||||
discoveryTime = minimumDiscoveryTime
|
||||
}
|
||||
@@ -182,7 +177,6 @@ func (s *syncer) SyncAny(
|
||||
if discoveryTime == 0 {
|
||||
return sm.State{}, nil, errNoSnapshots
|
||||
}
|
||||
// requestSnapshots()
|
||||
s.logger.Info(fmt.Sprintf("Discovering snapshots for %v", discoveryTime))
|
||||
time.Sleep(discoveryTime)
|
||||
continue
|
||||
@@ -195,9 +189,7 @@ func (s *syncer) SyncAny(
|
||||
defer chunks.Close() // in case we forget to close it elsewhere
|
||||
}
|
||||
|
||||
s.logger.Info("starting sync")
|
||||
newState, commit, err := s.Sync(ctx, snapshot, chunks)
|
||||
s.logger.Info("after sync")
|
||||
switch {
|
||||
case err == nil:
|
||||
return newState, commit, nil
|
||||
@@ -233,10 +225,6 @@ func (s *syncer) SyncAny(
|
||||
s.logger.Info("Snapshot sender rejected", "peer", peer)
|
||||
}
|
||||
|
||||
case errors.Is(err, errStateProviderTimeout):
|
||||
s.logger.Info("Timed out retrieving state and commit, rejecting and retrying...", "height", snapshot.Height)
|
||||
s.snapshots.Reject(snapshot)
|
||||
|
||||
default:
|
||||
return sm.State{}, nil, fmt.Errorf("snapshot restoration failed: %w", err)
|
||||
}
|
||||
@@ -273,10 +261,13 @@ func (s *syncer) Sync(ctx context.Context, snapshot *snapshot, chunks *chunkQueu
|
||||
// Fetch the app hash corresponding to the snapshot
|
||||
appHash, err := s.stateProvider.AppHash(hctx, snapshot.Height)
|
||||
if err != nil {
|
||||
if err == context.DeadlineExceeded && ctx.Err() == nil {
|
||||
return sm.State{}, nil, errStateProviderTimeout
|
||||
// check if the main context was triggered
|
||||
if ctx.Err() != nil {
|
||||
return sm.State{}, nil, ctx.Err()
|
||||
}
|
||||
return sm.State{}, nil, fmt.Errorf("failed to get app hash: %w", err)
|
||||
s.logger.Info("failed to get and verify tendermint state. Dropping snapshot and trying again",
|
||||
"err", err, "height", snapshot.Height)
|
||||
return sm.State{}, nil, errRejectSnapshot
|
||||
}
|
||||
snapshot.trustedAppHash = appHash
|
||||
|
||||
@@ -299,21 +290,23 @@ func (s *syncer) Sync(ctx context.Context, snapshot *snapshot, chunks *chunkQueu
|
||||
// Optimistically build new state, so we don't discover any light client failures at the end.
|
||||
state, err := s.stateProvider.State(pctx, snapshot.Height)
|
||||
if err != nil {
|
||||
// check if the provider context exceeded the 10 second deadline
|
||||
if err == context.DeadlineExceeded && ctx.Err() == nil {
|
||||
return sm.State{}, nil, errStateProviderTimeout
|
||||
// check if the main context was triggered
|
||||
if ctx.Err() != nil {
|
||||
return sm.State{}, nil, ctx.Err()
|
||||
}
|
||||
|
||||
return sm.State{}, nil, fmt.Errorf("failed to build new state: %w", err)
|
||||
s.logger.Info("failed to get and verify tendermint state. Dropping snapshot and trying again",
|
||||
"err", err, "height", snapshot.Height)
|
||||
return sm.State{}, nil, errRejectSnapshot
|
||||
}
|
||||
commit, err := s.stateProvider.Commit(pctx, snapshot.Height)
|
||||
if err != nil {
|
||||
// check if the provider context exceeded the 10 second deadline
|
||||
if err == context.DeadlineExceeded && ctx.Err() == nil {
|
||||
return sm.State{}, nil, errStateProviderTimeout
|
||||
if ctx.Err() != nil {
|
||||
return sm.State{}, nil, ctx.Err()
|
||||
}
|
||||
|
||||
return sm.State{}, nil, fmt.Errorf("failed to fetch commit: %w", err)
|
||||
s.logger.Info("failed to get and verify commit. Dropping snapshot and trying again",
|
||||
"err", err, "height", snapshot.Height)
|
||||
return sm.State{}, nil, errRejectSnapshot
|
||||
}
|
||||
|
||||
// Restore snapshot
|
||||
|
||||
@@ -75,7 +75,7 @@ mode = "full"
|
||||
start_at = 1010
|
||||
# FIXME: should be v2, disabled due to flake
|
||||
block_sync = "v0"
|
||||
persistent_peers = ["validator01", "validator02", "validator03", "validator04", "validator05"]
|
||||
persistent_peers = ["validator01", "validator02", "validator03", "validator04"]
|
||||
perturb = ["restart"]
|
||||
retain_blocks = 7
|
||||
state_sync = "rpc"
|
||||
|
||||
@@ -85,17 +85,11 @@ func Start(testnet *e2e.Testnet) error {
|
||||
|
||||
networkHeight = node.StartAt
|
||||
|
||||
logger.Info("Waiting for network to advance before starting catch up node",
|
||||
"node", node.Name,
|
||||
"height", networkHeight)
|
||||
|
||||
if _, _, err := waitForHeight(testnet, networkHeight); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
logger.Info("Starting catch up node", "node", node.Name, "height", node.StartAt)
|
||||
|
||||
if err := execCompose(testnet.Dir, "up", "-d", node.Name); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user