statesync: tune backfill process (#6565)

This PR make some tweaks to backfill after running e2e tests: - Separates sync and backfill as two distinct processes that the node calls. The reason is because if sync fails then the node should fail but if backfill fails it is still possible to proceed. - Removes peers who don't have the block at a height from the local peer list. As the process goes backwards if a node doesn't have a block at a height they're likely pruning blocks and thus they won't have any prior ones either. - Sleep when we've run out of peers, then try again.
2026-01-08 14:21:14 +00:00 · 2021-06-11 17:26:18 +02:00
parent 10fa151749
commit 74af343f28
7 changed files with 67 additions and 57 deletions
--- a/node/node.go
+++ b/node/node.go
@@ -1054,20 +1054,21 @@ func startStateSync(ssR *statesync.Reactor, bcR fastSyncReactor, conR *cs.Reacto
 	}

 	go func() {
-		err := ssR.Sync(stateProvider, config.DiscoveryTime)
+		state, err := ssR.Sync(stateProvider, config.DiscoveryTime)
 		if err != nil {
 			ssR.Logger.Error("state sync failed", "err", err)
 			return
 		}

-		state, err := stateStore.Load()
+		err = ssR.Backfill(state)
 		if err != nil {
-			ssR.Logger.Error("failed to load state after statesync", "err", err)
+			ssR.Logger.Error("backfill failed; node has insufficient history to verify all evidence;"+
+				" proceeding optimistically...", "err", err)
 		}

+		conR.Metrics.StateSyncing.Set(0)
 		if fastSync {
 			// FIXME Very ugly to have these metrics bleed through here.
-			conR.Metrics.StateSyncing.Set(0)
 			conR.Metrics.FastSyncing.Set(1)
 			err = bcR.SwitchToFastSync(state)
 			if err != nil {