blocksync: retry requests after timeout (backport #9518) (#9533)

* blocksync: retry requests after timeout (#9518)

* blocksync: retry requests after timeout

* Minimize changes to re-send block request after timeout

* TO REVERT: reduce queue capacity

* Add reset

* Revert "TO REVERT: reduce queue capacity"

This reverts commit dd0fee56924c958bed2ab7733e1917eb88fb5957.

* 30 seconds

* don't reset the timer

* Update blocksync/pool.go

Co-authored-by: Callum Waters <cmwaters19@gmail.com>

Co-authored-by: Sergio Mena <sergio@informal.systems>
Co-authored-by: Callum Waters <cmwaters19@gmail.com>
(cherry picked from commit a371b1e3a8)

* Add changelog entry

Co-authored-by: William Banfield <4561443+williambanfield@users.noreply.github.com>
Co-authored-by: Sergio Mena <sergio@informal.systems>
This commit is contained in:
mergify[bot]
2022-10-10 15:24:19 +02:00
committed by GitHub
parent cfb87ab6f2
commit 155110007b
2 changed files with 9 additions and 1 deletions

View File

@@ -51,3 +51,4 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi
- [consensus] \#9229 fix round number of `enterPropose` when handling `RoundStepNewRound` timeout. (@fatcat22)
- [docker] \#9073 enable cross platform build using docker buildx
- [docker] \#9462 ensure Docker image uses consistent version of Go
- [blocksync] \#9518 handle the case when the sending queue is full: retry block request after a timeout

View File

@@ -32,6 +32,7 @@ const (
maxTotalRequesters = 600
maxPendingRequests = maxTotalRequesters
maxPendingRequestsPerPeer = 20
requestRetrySeconds = 30
// Minimum recv rate to ensure we're receiving blocks from a peer fast
// enough. If a peer is not sending us data at at least that rate, we
@@ -602,7 +603,7 @@ OUTER_LOOP:
}
peer = bpr.pool.pickIncrAvailablePeer(bpr.height)
if peer == nil {
// log.Info("No peers available", "height", height)
bpr.Logger.Debug("No peers currently available; will retry shortly", "height", bpr.height)
time.Sleep(requestIntervalMS * time.Millisecond)
continue PICK_PEER_LOOP
}
@@ -612,6 +613,7 @@ OUTER_LOOP:
bpr.peerID = peer.id
bpr.mtx.Unlock()
to := time.NewTimer(requestRetrySeconds * time.Second)
// Send request and wait.
bpr.pool.sendRequest(bpr.height, peer.id)
WAIT_LOOP:
@@ -624,6 +626,11 @@ OUTER_LOOP:
return
case <-bpr.Quit():
return
case <-to.C:
bpr.Logger.Debug("Retrying block request after timeout", "height", bpr.height, "peer", bpr.peerID)
// Simulate a redo
bpr.reset()
continue OUTER_LOOP
case peerID := <-bpr.redoCh:
if peerID == bpr.peerID {
bpr.reset()