diff --git a/CHANGELOG_PENDING.md b/CHANGELOG_PENDING.md index 55640b08d..afb3464a3 100644 --- a/CHANGELOG_PENDING.md +++ b/CHANGELOG_PENDING.md @@ -51,3 +51,4 @@ Friendly reminder, we have a [bug bounty program](https://hackerone.com/tendermi - [consensus] \#9229 fix round number of `enterPropose` when handling `RoundStepNewRound` timeout. (@fatcat22) - [docker] \#9073 enable cross platform build using docker buildx - [docker] \#9462 ensure Docker image uses consistent version of Go +- [blocksync] \#9518 handle the case when the sending queue is full: retry block request after a timeout diff --git a/blocksync/pool.go b/blocksync/pool.go index 1a89cbe7d..57ba94ce8 100644 --- a/blocksync/pool.go +++ b/blocksync/pool.go @@ -32,6 +32,7 @@ const ( maxTotalRequesters = 600 maxPendingRequests = maxTotalRequesters maxPendingRequestsPerPeer = 20 + requestRetrySeconds = 30 // Minimum recv rate to ensure we're receiving blocks from a peer fast // enough. If a peer is not sending us data at at least that rate, we @@ -602,7 +603,7 @@ OUTER_LOOP: } peer = bpr.pool.pickIncrAvailablePeer(bpr.height) if peer == nil { - // log.Info("No peers available", "height", height) + bpr.Logger.Debug("No peers currently available; will retry shortly", "height", bpr.height) time.Sleep(requestIntervalMS * time.Millisecond) continue PICK_PEER_LOOP } @@ -612,6 +613,7 @@ OUTER_LOOP: bpr.peerID = peer.id bpr.mtx.Unlock() + to := time.NewTimer(requestRetrySeconds * time.Second) // Send request and wait. bpr.pool.sendRequest(bpr.height, peer.id) WAIT_LOOP: @@ -624,6 +626,11 @@ OUTER_LOOP: return case <-bpr.Quit(): return + case <-to.C: + bpr.Logger.Debug("Retrying block request after timeout", "height", bpr.height, "peer", bpr.peerID) + // Simulate a redo + bpr.reset() + continue OUTER_LOOP case peerID := <-bpr.redoCh: if peerID == bpr.peerID { bpr.reset()