mirror of
https://github.com/tendermint/tendermint.git
synced 2026-01-04 04:04:00 +00:00
light: improve provider handling (#6053)
Introduces heuristics that track the amount of no responses or unavailable blocks a provider has for more robust provider handling by the light client. Use concurrent calls to all witnesses when a new primary is needed.
This commit is contained in:
315
light/client.go
315
light/client.go
@@ -6,6 +6,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/tendermint/tendermint/libs/log"
|
||||
@@ -126,8 +127,10 @@ type Client struct {
|
||||
pruningSize uint16
|
||||
// See ConfirmationFunction option
|
||||
confirmationFn func(action string) bool
|
||||
|
||||
quit chan struct{}
|
||||
// The light client keeps track of how many times it has requested a light
|
||||
// block from it's providers. When this exceeds the amount of witnesses the
|
||||
// light client will just return the last error sent by the providers
|
||||
// repeatRequests uint16
|
||||
|
||||
logger log.Logger
|
||||
}
|
||||
@@ -161,14 +164,14 @@ func NewClient(
|
||||
}
|
||||
|
||||
if c.latestTrustedBlock != nil {
|
||||
c.logger.Info("Checking trusted light block using options")
|
||||
c.logger.Info("checking trusted light block using options")
|
||||
if err := c.checkTrustedHeaderUsingOptions(ctx, trustOptions); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
if c.latestTrustedBlock == nil || c.latestTrustedBlock.Height < trustOptions.Height {
|
||||
c.logger.Info("Downloading trusted light block using options")
|
||||
c.logger.Info("downloading trusted light block using options")
|
||||
if err := c.initializeWithTrustOptions(ctx, trustOptions); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -199,7 +202,6 @@ func NewClientFromTrustedStore(
|
||||
trustedStore: trustedStore,
|
||||
pruningSize: defaultPruningSize,
|
||||
confirmationFn: func(action string) bool { return true },
|
||||
quit: make(chan struct{}),
|
||||
logger: log.NewNopLogger(),
|
||||
}
|
||||
|
||||
@@ -237,7 +239,7 @@ func (c *Client) restoreTrustedLightBlock() error {
|
||||
return fmt.Errorf("can't get last trusted light block: %w", err)
|
||||
}
|
||||
c.latestTrustedBlock = trustedBlock
|
||||
c.logger.Info("Restored trusted light block", "height", lastHeight)
|
||||
c.logger.Info("restored trusted light block", "height", lastHeight)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -273,7 +275,7 @@ func (c *Client) checkTrustedHeaderUsingOptions(ctx context.Context, options Tru
|
||||
case options.Height == c.latestTrustedBlock.Height:
|
||||
primaryHash = options.Hash
|
||||
case options.Height < c.latestTrustedBlock.Height:
|
||||
c.logger.Info("Client initialized with old header (trusted is more recent)",
|
||||
c.logger.Info("client initialized with old header (trusted is more recent)",
|
||||
"old", options.Height,
|
||||
"trustedHeight", c.latestTrustedBlock.Height,
|
||||
"trustedHash", c.latestTrustedBlock.Hash())
|
||||
@@ -299,11 +301,11 @@ func (c *Client) checkTrustedHeaderUsingOptions(ctx context.Context, options Tru
|
||||
}
|
||||
|
||||
if !bytes.Equal(primaryHash, c.latestTrustedBlock.Hash()) {
|
||||
c.logger.Info("Prev. trusted header's hash (h1) doesn't match hash from primary provider (h2)",
|
||||
c.logger.Info("previous trusted header's hash (h1) doesn't match hash from primary provider (h2)",
|
||||
"h1", c.latestTrustedBlock.Hash(), "h2", primaryHash)
|
||||
|
||||
action := fmt.Sprintf(
|
||||
"Prev. trusted header's hash %X doesn't match hash %X from primary provider. Remove all the stored light blocks?",
|
||||
"Previous trusted header's hash %X doesn't match hash %X from primary provider. Remove all the stored light blocks?",
|
||||
c.latestTrustedBlock.Hash(), primaryHash)
|
||||
if c.confirmationFn(action) {
|
||||
err := c.Cleanup()
|
||||
@@ -415,7 +417,7 @@ func (c *Client) Update(ctx context.Context, now time.Time) (*types.LightBlock,
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.logger.Info("Advanced to new state", "height", latestBlock.Height, "hash", latestBlock.Hash())
|
||||
c.logger.Info("advanced to new state", "height", latestBlock.Height, "hash", latestBlock.Hash())
|
||||
return latestBlock, nil
|
||||
}
|
||||
|
||||
@@ -440,7 +442,7 @@ func (c *Client) VerifyLightBlockAtHeight(ctx context.Context, height int64, now
|
||||
// Check if the light block is already verified.
|
||||
h, err := c.TrustedLightBlock(height)
|
||||
if err == nil {
|
||||
c.logger.Info("Header has already been verified", "height", height, "hash", h.Hash())
|
||||
c.logger.Debug("header has already been verified", "height", height, "hash", h.Hash())
|
||||
// Return already trusted light block
|
||||
return h, nil
|
||||
}
|
||||
@@ -497,7 +499,7 @@ func (c *Client) VerifyHeader(ctx context.Context, newHeader *types.Header, now
|
||||
if !bytes.Equal(l.Hash(), newHeader.Hash()) {
|
||||
return fmt.Errorf("existing trusted header %X does not match newHeader %X", l.Hash(), newHeader.Hash())
|
||||
}
|
||||
c.logger.Info("Header has already been verified",
|
||||
c.logger.Debug("header has already been verified",
|
||||
"height", newHeader.Height, "hash", newHeader.Hash())
|
||||
return nil
|
||||
}
|
||||
@@ -516,7 +518,7 @@ func (c *Client) VerifyHeader(ctx context.Context, newHeader *types.Header, now
|
||||
}
|
||||
|
||||
func (c *Client) verifyLightBlock(ctx context.Context, newLightBlock *types.LightBlock, now time.Time) error {
|
||||
c.logger.Info("VerifyHeader", "height", newLightBlock.Height, "hash", newLightBlock.Hash())
|
||||
c.logger.Info("verify light block", "height", newLightBlock.Height, "hash", newLightBlock.Hash())
|
||||
|
||||
var (
|
||||
verifyFunc func(ctx context.Context, trusted *types.LightBlock, new *types.LightBlock, now time.Time) error
|
||||
@@ -561,7 +563,7 @@ func (c *Client) verifyLightBlock(ctx context.Context, newLightBlock *types.Ligh
|
||||
err = verifyFunc(ctx, closestBlock, newLightBlock, now)
|
||||
}
|
||||
if err != nil {
|
||||
c.logger.Error("Can't verify", "err", err)
|
||||
c.logger.Error("failed to verify", "err", err)
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -595,7 +597,7 @@ func (c *Client) verifySequential(
|
||||
}
|
||||
|
||||
// 2) Verify them
|
||||
c.logger.Debug("Verify adjacent newLightBlock against verifiedBlock",
|
||||
c.logger.Debug("verify adjacent newLightBlock against verifiedBlock",
|
||||
"trustedHeight", verifiedBlock.Height,
|
||||
"trustedHash", verifiedBlock.Hash(),
|
||||
"newHeight", interimBlock.Height,
|
||||
@@ -610,32 +612,21 @@ func (c *Client) verifySequential(
|
||||
case ErrInvalidHeader:
|
||||
// If the target header is invalid, return immediately.
|
||||
if err.To == newLightBlock.Height {
|
||||
c.logger.Debug("Target header is invalid", "err", err)
|
||||
c.logger.Debug("target header is invalid", "err", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// If some intermediate header is invalid, replace the primary and try
|
||||
// again.
|
||||
c.logger.Error("primary sent invalid header -> replacing", "err", err, "primary", c.primary)
|
||||
replaceErr := c.replacePrimaryProvider()
|
||||
if replaceErr != nil {
|
||||
c.logger.Error("Can't replace primary", "err", replaceErr)
|
||||
// return original error
|
||||
return err
|
||||
}
|
||||
// If some intermediate header is invalid, remove the primary and try again.
|
||||
c.logger.Error("primary sent invalid header -> removing", "err", err, "primary", c.primary)
|
||||
|
||||
replacementBlock, fErr := c.lightBlockFromPrimary(ctx, newLightBlock.Height)
|
||||
if fErr != nil {
|
||||
c.logger.Error("Can't fetch light block from primary", "err", fErr)
|
||||
// return original error
|
||||
replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true)
|
||||
if removeErr != nil {
|
||||
c.logger.Debug("failed to replace primary. Returning original error", "err", removeErr)
|
||||
return err
|
||||
}
|
||||
|
||||
if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) {
|
||||
c.logger.Error("Replacement provider has a different light block",
|
||||
"newHash", newLightBlock.Hash(),
|
||||
"replHash", replacementBlock.Hash())
|
||||
// return original error
|
||||
c.logger.Debug("replaced primary but new primary has a different block to the initial one")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -686,7 +677,7 @@ func (c *Client) verifySkipping(
|
||||
)
|
||||
|
||||
for {
|
||||
c.logger.Debug("Verify non-adjacent newHeader against verifiedBlock",
|
||||
c.logger.Debug("verify non-adjacent newHeader against verifiedBlock",
|
||||
"trustedHeight", verifiedBlock.Height,
|
||||
"trustedHash", verifiedBlock.Hash(),
|
||||
"newHeight", blockCache[depth].Height,
|
||||
@@ -716,10 +707,20 @@ func (c *Client) verifySkipping(
|
||||
pivotHeight := verifiedBlock.Height + (blockCache[depth].Height-verifiedBlock.
|
||||
Height)*verifySkippingNumerator/verifySkippingDenominator
|
||||
interimBlock, providerErr := source.LightBlock(ctx, pivotHeight)
|
||||
if providerErr != nil {
|
||||
switch providerErr {
|
||||
case nil:
|
||||
blockCache = append(blockCache, interimBlock)
|
||||
|
||||
// if the error is benign, the client does not need to replace the primary
|
||||
case provider.ErrLightBlockNotFound, provider.ErrNoResponse:
|
||||
return nil, err
|
||||
|
||||
// all other errors such as ErrBadLightBlock or ErrUnreliableProvider are seen as malevolent and the
|
||||
// provider is removed
|
||||
default:
|
||||
return nil, ErrVerificationFailed{From: verifiedBlock.Height, To: pivotHeight, Reason: providerErr}
|
||||
}
|
||||
blockCache = append(blockCache, interimBlock)
|
||||
|
||||
}
|
||||
depth++
|
||||
|
||||
@@ -744,32 +745,20 @@ func (c *Client) verifySkippingAgainstPrimary(
|
||||
// If the target header is invalid, return immediately.
|
||||
invalidHeaderHeight := err.(ErrVerificationFailed).To
|
||||
if invalidHeaderHeight == newLightBlock.Height {
|
||||
c.logger.Debug("Target header is invalid", "err", err)
|
||||
c.logger.Debug("target header is invalid", "err", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// If some intermediate header is invalid, replace the primary and try
|
||||
// again.
|
||||
// If some intermediate header is invalid, remove the primary and try again.
|
||||
c.logger.Error("primary sent invalid header -> replacing", "err", err, "primary", c.primary)
|
||||
replaceErr := c.replacePrimaryProvider()
|
||||
if replaceErr != nil {
|
||||
c.logger.Error("Can't replace primary", "err", replaceErr)
|
||||
// return original error
|
||||
return err
|
||||
}
|
||||
|
||||
replacementBlock, fErr := c.lightBlockFromPrimary(ctx, newLightBlock.Height)
|
||||
if fErr != nil {
|
||||
c.logger.Error("Can't fetch light block from primary", "err", fErr)
|
||||
// return original error
|
||||
replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true)
|
||||
if removeErr != nil {
|
||||
c.logger.Error("failed to replace primary. Returning original error", "err", removeErr)
|
||||
return err
|
||||
}
|
||||
|
||||
if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) {
|
||||
c.logger.Error("Replacement provider has a different light block",
|
||||
"newHash", newLightBlock.Hash(),
|
||||
"replHash", replacementBlock.Hash())
|
||||
// return original error
|
||||
c.logger.Debug("replaced primary but new primary has a different block to the initial one. Returning original error")
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -835,7 +824,7 @@ func (c *Client) Witnesses() []provider.Provider {
|
||||
// Cleanup removes all the data (headers and validator sets) stored. Note: the
|
||||
// client must be stopped at this point.
|
||||
func (c *Client) Cleanup() error {
|
||||
c.logger.Info("Removing all light blocks")
|
||||
c.logger.Info("removing all light blocks")
|
||||
c.latestTrustedBlock = nil
|
||||
return c.trustedStore.Prune(0)
|
||||
}
|
||||
@@ -908,21 +897,31 @@ func (c *Client) backwards(
|
||||
return fmt.Errorf("failed to obtain the header at height #%d: %w", verifiedHeader.Height-1, err)
|
||||
}
|
||||
interimHeader = interimBlock.Header
|
||||
c.logger.Debug("Verify newHeader against verifiedHeader",
|
||||
c.logger.Debug("verify newHeader against verifiedHeader",
|
||||
"trustedHeight", verifiedHeader.Height,
|
||||
"trustedHash", verifiedHeader.Hash(),
|
||||
"newHeight", interimHeader.Height,
|
||||
"newHash", interimHeader.Hash())
|
||||
if err := VerifyBackwards(interimHeader, verifiedHeader); err != nil {
|
||||
c.logger.Error("primary sent invalid header -> replacing", "err", err, "primary", c.primary)
|
||||
if replaceErr := c.replacePrimaryProvider(); replaceErr != nil {
|
||||
c.logger.Error("Can't replace primary", "err", replaceErr)
|
||||
// return original error
|
||||
return fmt.Errorf("verify backwards from %d to %d failed: %w",
|
||||
verifiedHeader.Height, interimHeader.Height, err)
|
||||
// verification has failed
|
||||
c.logger.Error("backwards verification failed, replacing primary...", "err", err, "primary", c.primary)
|
||||
|
||||
// the client tries to see if it can get a witness to continue with the request
|
||||
newPrimarysBlock, replaceErr := c.findNewPrimary(ctx, newHeader.Height, true)
|
||||
if replaceErr != nil {
|
||||
c.logger.Debug("failed to replace primary. Returning original error", "err", replaceErr)
|
||||
return err
|
||||
}
|
||||
// we need to verify the header at the same height again
|
||||
continue
|
||||
|
||||
// before continuing we must check that they have the same target header to validate
|
||||
if !bytes.Equal(newPrimarysBlock.Hash(), newHeader.Hash()) {
|
||||
c.logger.Debug("replaced primary but new primary has a different block to the initial one")
|
||||
// return the original error
|
||||
return err
|
||||
}
|
||||
|
||||
// try again with the new primary
|
||||
return c.backwards(ctx, verifiedHeader, newPrimarysBlock.Header)
|
||||
}
|
||||
verifiedHeader = interimHeader
|
||||
}
|
||||
@@ -930,55 +929,145 @@ func (c *Client) backwards(
|
||||
return nil
|
||||
}
|
||||
|
||||
// NOTE: requires a providerMutex locked.
|
||||
func (c *Client) removeWitness(idx int) {
|
||||
switch len(c.witnesses) {
|
||||
case 0:
|
||||
panic(fmt.Sprintf("wanted to remove %d element from empty witnesses slice", idx))
|
||||
case 1:
|
||||
c.witnesses = make([]provider.Provider, 0)
|
||||
default:
|
||||
c.witnesses[idx] = c.witnesses[len(c.witnesses)-1]
|
||||
c.witnesses = c.witnesses[:len(c.witnesses)-1]
|
||||
}
|
||||
}
|
||||
|
||||
// replaceProvider takes the first alternative provider and promotes it as the
|
||||
// primary provider.
|
||||
func (c *Client) replacePrimaryProvider() error {
|
||||
c.providerMutex.Lock()
|
||||
defer c.providerMutex.Unlock()
|
||||
|
||||
if len(c.witnesses) <= 1 {
|
||||
return ErrNoWitnesses
|
||||
}
|
||||
c.primary = c.witnesses[0]
|
||||
c.witnesses = c.witnesses[1:]
|
||||
c.logger.Info("Replacing primary with the first witness", "new_primary", c.primary)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// lightBlockFromPrimary retrieves the lightBlock from the primary provider
|
||||
// at the specified height. Handles dropout by the primary provider by swapping
|
||||
// with an alternative provider.
|
||||
// at the specified height. This method also handles provider behavior as follows:
|
||||
//
|
||||
// 1. If the provider does not respond or does not have the block, it tries again
|
||||
// with a different provider
|
||||
// 2. If all providers return the same error, the light client forwards the error to
|
||||
// where the initial request came from
|
||||
// 3. If the provider provides an invalid light block, is deemed unreliable or returns
|
||||
// any other error, the primary is permanently dropped and is replaced by a witness.
|
||||
func (c *Client) lightBlockFromPrimary(ctx context.Context, height int64) (*types.LightBlock, error) {
|
||||
c.providerMutex.Lock()
|
||||
l, err := c.primary.LightBlock(ctx, height)
|
||||
c.providerMutex.Unlock()
|
||||
if err != nil {
|
||||
c.logger.Debug("Error on light block request from primary", "error", err, "primary", c.primary)
|
||||
replaceErr := c.replacePrimaryProvider()
|
||||
if replaceErr != nil {
|
||||
return nil, fmt.Errorf("%v. Tried to replace primary but: %w", err.Error(), replaceErr)
|
||||
}
|
||||
// replace primary and request a light block again
|
||||
return c.lightBlockFromPrimary(ctx, height)
|
||||
|
||||
switch err {
|
||||
case nil:
|
||||
// Everything went smoothly. We reset the lightBlockRequests and return the light block
|
||||
return l, nil
|
||||
|
||||
case provider.ErrNoResponse, provider.ErrLightBlockNotFound:
|
||||
// we find a new witness to replace the primary
|
||||
c.logger.Debug("error from light block request from primary, replacing...", "error", err, "primary", c.primary)
|
||||
return c.findNewPrimary(ctx, height, false)
|
||||
|
||||
default:
|
||||
// The light client has most likely received either provider.ErrUnreliableProvider or provider.ErrBadLightBlock
|
||||
// These errors mean that the light client should drop the primary and try with another provider instead
|
||||
c.logger.Error("error from light block request from primary, removing...", "error", err, "primary", c.primary)
|
||||
return c.findNewPrimary(ctx, height, true)
|
||||
}
|
||||
return l, err
|
||||
}
|
||||
|
||||
// compareFirstHeaderWithWitnesses compares h with all witnesses. If any
|
||||
// NOTE: requires a providerMutex lock
|
||||
func (c *Client) removeWitnesses(indexes []int) error {
|
||||
// check that we will still have witnesses remaining
|
||||
if len(c.witnesses) <= len(indexes) {
|
||||
return ErrNoWitnesses
|
||||
}
|
||||
|
||||
// we need to make sure that we remove witnesses by index in the reverse
|
||||
// order so as to not affect the indexes themselves
|
||||
sort.Ints(indexes)
|
||||
for i := len(indexes) - 1; i >= 0; i-- {
|
||||
c.witnesses[indexes[i]] = c.witnesses[len(c.witnesses)-1]
|
||||
c.witnesses = c.witnesses[:len(c.witnesses)-1]
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type witnessResponse struct {
|
||||
lb *types.LightBlock
|
||||
witnessIndex int
|
||||
err error
|
||||
}
|
||||
|
||||
// findNewPrimary concurrently sends a light block request, promoting the first witness to return
|
||||
// a valid light block as the new primary. The remove option indicates whether the primary should be
|
||||
// entire removed or just appended to the back of the witnesses list. This method also handles witness
|
||||
// errors. If no witness is available, it returns the last error of the witness.
|
||||
func (c *Client) findNewPrimary(ctx context.Context, height int64, remove bool) (*types.LightBlock, error) {
|
||||
c.providerMutex.Lock()
|
||||
defer c.providerMutex.Unlock()
|
||||
|
||||
if len(c.witnesses) <= 1 {
|
||||
return nil, ErrNoWitnesses
|
||||
}
|
||||
|
||||
var (
|
||||
witnessResponsesC = make(chan witnessResponse, len(c.witnesses))
|
||||
witnessesToRemove []int
|
||||
lastError error
|
||||
wg sync.WaitGroup
|
||||
)
|
||||
|
||||
// send out a light block request to all witnesses
|
||||
subctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
for index := range c.witnesses {
|
||||
wg.Add(1)
|
||||
go func(witnessIndex int, witnessResponsesC chan witnessResponse) {
|
||||
defer wg.Done()
|
||||
|
||||
lb, err := c.witnesses[witnessIndex].LightBlock(subctx, height)
|
||||
witnessResponsesC <- witnessResponse{lb, witnessIndex, err}
|
||||
}(index, witnessResponsesC)
|
||||
}
|
||||
|
||||
// process all the responses as they come in
|
||||
for i := 0; i < cap(witnessResponsesC); i++ {
|
||||
response := <-witnessResponsesC
|
||||
switch response.err {
|
||||
// success! We have found a new primary
|
||||
case nil:
|
||||
cancel() // cancel all remaining requests to other witnesses
|
||||
|
||||
wg.Wait() // wait for all goroutines to finish
|
||||
|
||||
// if we are not intending on removing the primary then append the old primary to the end of the witness slice
|
||||
if !remove {
|
||||
c.witnesses = append(c.witnesses, c.primary)
|
||||
}
|
||||
|
||||
// promote respondent as the new primary
|
||||
c.logger.Debug("found new primary", "primary", c.witnesses[response.witnessIndex])
|
||||
c.primary = c.witnesses[response.witnessIndex]
|
||||
|
||||
// add promoted witness to the list of witnesses to be removed
|
||||
witnessesToRemove = append(witnessesToRemove, response.witnessIndex)
|
||||
|
||||
// remove witnesses marked as bad (the client must do this before we alter the witness slice and change the indexes
|
||||
// of witnesses). Removal is done in descending order
|
||||
if err := c.removeWitnesses(witnessesToRemove); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// return the light block that new primary responded with
|
||||
return response.lb, nil
|
||||
|
||||
// process benign errors by logging them only
|
||||
case provider.ErrNoResponse, provider.ErrLightBlockNotFound:
|
||||
lastError = response.err
|
||||
c.logger.Debug("error on light block request from witness",
|
||||
"error", response.err, "primary", c.witnesses[response.witnessIndex])
|
||||
continue
|
||||
|
||||
// process malevolent errors like ErrUnreliableProvider and ErrBadLightBlock by removing the witness
|
||||
default:
|
||||
lastError = response.err
|
||||
c.logger.Error("error on light block request from witness, removing...",
|
||||
"error", response.err, "primary", c.witnesses[response.witnessIndex])
|
||||
witnessesToRemove = append(witnessesToRemove, response.witnessIndex)
|
||||
}
|
||||
}
|
||||
|
||||
return nil, lastError
|
||||
}
|
||||
|
||||
// compareFirstHeaderWithWitnesses concurrently compares h with all witnesses. If any
|
||||
// witness reports a different header than h, the function returns an error.
|
||||
func (c *Client) compareFirstHeaderWithWitnesses(ctx context.Context, h *types.SignedHeader) error {
|
||||
compareCtx, cancel := context.WithCancel(ctx)
|
||||
@@ -1006,26 +1095,20 @@ func (c *Client) compareFirstHeaderWithWitnesses(ctx context.Context, h *types.S
|
||||
case nil:
|
||||
continue
|
||||
case errConflictingHeaders:
|
||||
c.logger.Error(fmt.Sprintf(`Witness #%d has a different header. Please check primary is correct
|
||||
and remove witness. Otherwise, use the different primary`, e.WitnessIndex), "witness", c.witnesses[e.WitnessIndex])
|
||||
c.logger.Error(fmt.Sprintf(`witness #%d has a different header. Please check primary is correct
|
||||
and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witness", c.witnesses[e.WitnessIndex])
|
||||
return err
|
||||
case errBadWitness:
|
||||
// If witness sent us an invalid header, then remove it. If it didn't
|
||||
// respond or couldn't find the block, then we ignore it and move on to
|
||||
// the next witness.
|
||||
if _, ok := e.Reason.(provider.ErrBadLightBlock); ok {
|
||||
c.logger.Info("Witness sent us invalid header / vals -> removing it", "witness", c.witnesses[e.WitnessIndex])
|
||||
c.logger.Info("Witness sent an invalid light block, removing...", "witness", c.witnesses[e.WitnessIndex])
|
||||
witnessesToRemove = append(witnessesToRemove, e.WitnessIndex)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// we need to make sure that we remove witnesses by index in the reverse
|
||||
// order so as to not affect the indexes themselves
|
||||
sort.Ints(witnessesToRemove)
|
||||
for i := len(witnessesToRemove) - 1; i >= 0; i-- {
|
||||
c.removeWitness(witnessesToRemove[i])
|
||||
}
|
||||
|
||||
return nil
|
||||
// remove all witnesses that misbehaved
|
||||
return c.removeWitnesses(witnessesToRemove)
|
||||
}
|
||||
|
||||
@@ -740,7 +740,7 @@ func TestClient_Concurrency(t *testing.T) {
|
||||
defer wg.Done()
|
||||
|
||||
// NOTE: Cleanup, Stop, VerifyLightBlockAtHeight and Verify are not supposed
|
||||
// to be concurrenly safe.
|
||||
// to be concurrently safe.
|
||||
|
||||
assert.Equal(t, chainID, c.ChainID())
|
||||
|
||||
@@ -774,8 +774,12 @@ func TestClientReplacesPrimaryWithWitnessIfPrimaryIsUnavailable(t *testing.T) {
|
||||
_, err = c.Update(ctx, bTime.Add(2*time.Hour))
|
||||
require.NoError(t, err)
|
||||
|
||||
// the primary should no longer be the deadNode
|
||||
assert.NotEqual(t, c.Primary(), deadNode)
|
||||
assert.Equal(t, 1, len(c.Witnesses()))
|
||||
|
||||
// we should still have the dead node as a witness because it
|
||||
// hasn't repeatedly been unresponsive yet
|
||||
assert.Equal(t, 2, len(c.Witnesses()))
|
||||
}
|
||||
|
||||
func TestClient_BackwardsVerification(t *testing.T) {
|
||||
|
||||
@@ -5,7 +5,6 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"github.com/tendermint/tendermint/light/provider"
|
||||
@@ -35,7 +34,7 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig
|
||||
lastVerifiedHeader = primaryTrace[len(primaryTrace)-1].SignedHeader
|
||||
witnessesToRemove = make([]int, 0)
|
||||
)
|
||||
c.logger.Debug("Running detector against trace", "endBlockHeight", lastVerifiedHeader.Height,
|
||||
c.logger.Debug("running detector against trace", "endBlockHeight", lastVerifiedHeader.Height,
|
||||
"endBlockHash", lastVerifiedHeader.Hash, "length", len(primaryTrace))
|
||||
|
||||
c.providerMutex.Lock()
|
||||
@@ -75,7 +74,7 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig
|
||||
now,
|
||||
)
|
||||
if err != nil {
|
||||
c.logger.Info("Error validating witness's divergent header", "witness", supportingWitness, "err", err)
|
||||
c.logger.Info("error validating witness's divergent header", "witness", supportingWitness, "err", err)
|
||||
witnessesToRemove = append(witnessesToRemove, e.WitnessIndex)
|
||||
continue
|
||||
}
|
||||
@@ -83,7 +82,7 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig
|
||||
// We are suspecting that the primary is faulty, hence we hold the witness as the source of truth
|
||||
// and generate evidence against the primary that we can send to the witness
|
||||
primaryEv := newLightClientAttackEvidence(primaryBlock, witnessTrace[len(witnessTrace)-1], witnessTrace[0])
|
||||
c.logger.Error("Attempted attack detected. Sending evidence againt primary by witness", "ev", primaryEv,
|
||||
c.logger.Error("ATTEMPTED ATTACK DETECTED. Sending evidence againt primary by witness", "ev", primaryEv,
|
||||
"primary", c.primary, "witness", supportingWitness)
|
||||
c.sendEvidence(ctx, primaryEv, supportingWitness)
|
||||
|
||||
@@ -117,22 +116,17 @@ func (c *Client) detectDivergence(ctx context.Context, primaryTrace []*types.Lig
|
||||
return ErrLightClientAttack
|
||||
|
||||
case errBadWitness:
|
||||
c.logger.Info("Witness returned an error during header comparison", "witness", c.witnesses[e.WitnessIndex],
|
||||
"err", err)
|
||||
// if witness sent us an invalid header, then remove it. If it didn't respond or couldn't find the block, then we
|
||||
// ignore it and move on to the next witness
|
||||
if _, ok := e.Reason.(provider.ErrBadLightBlock); ok {
|
||||
c.logger.Info("Witness sent us invalid header / vals -> removing it", "witness", c.witnesses[e.WitnessIndex])
|
||||
witnessesToRemove = append(witnessesToRemove, e.WitnessIndex)
|
||||
}
|
||||
c.logger.Info("witness returned an error during header comparison, removing...",
|
||||
"witness", c.witnesses[e.WitnessIndex], "err", err)
|
||||
witnessesToRemove = append(witnessesToRemove, e.WitnessIndex)
|
||||
default:
|
||||
c.logger.Debug("error in light block request to witness", "err", err)
|
||||
}
|
||||
}
|
||||
|
||||
// we need to make sure that we remove witnesses by index in the reverse
|
||||
// order so as to not affect the indexes themselves
|
||||
sort.Ints(witnessesToRemove)
|
||||
for i := len(witnessesToRemove) - 1; i >= 0; i-- {
|
||||
c.removeWitness(witnessesToRemove[i])
|
||||
// remove witnesses that have misbehaved
|
||||
if err := c.removeWitnesses(witnessesToRemove); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// 1. If we had at least one witness that returned the same header then we
|
||||
@@ -156,7 +150,17 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro
|
||||
witness provider.Provider, witnessIndex int) {
|
||||
|
||||
lightBlock, err := witness.LightBlock(ctx, h.Height)
|
||||
if err != nil {
|
||||
switch err {
|
||||
case nil:
|
||||
break
|
||||
|
||||
case provider.ErrNoResponse, provider.ErrLightBlockNotFound:
|
||||
errc <- err
|
||||
return
|
||||
|
||||
default:
|
||||
// all other errors (i.e. invalid block or unreliable provider) we mark the witness as bad
|
||||
// and remove it
|
||||
errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex}
|
||||
return
|
||||
}
|
||||
@@ -165,7 +169,7 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro
|
||||
errc <- errConflictingHeaders{Block: lightBlock, WitnessIndex: witnessIndex}
|
||||
}
|
||||
|
||||
c.logger.Debug("Matching header received by witness", "height", h.Height, "witness", witnessIndex)
|
||||
c.logger.Debug("matching header received by witness", "height", h.Height, "witness", witnessIndex)
|
||||
errc <- nil
|
||||
}
|
||||
|
||||
@@ -173,7 +177,7 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro
|
||||
func (c *Client) sendEvidence(ctx context.Context, ev *types.LightClientAttackEvidence, receiver provider.Provider) {
|
||||
err := receiver.ReportEvidence(ctx, ev)
|
||||
if err != nil {
|
||||
c.logger.Error("Failed to report evidence to provider", "ev", ev, "provider", receiver)
|
||||
c.logger.Error("failed to report evidence to provider", "ev", ev, "provider", receiver)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -253,5 +253,5 @@ func TestClientDivergentTraces3(t *testing.T) {
|
||||
|
||||
_, err = c.VerifyLightBlockAtHeight(ctx, 10, bTime.Add(1*time.Hour))
|
||||
assert.Error(t, err)
|
||||
assert.Equal(t, 0, len(c.Witnesses()))
|
||||
assert.Equal(t, 1, len(c.Witnesses()))
|
||||
}
|
||||
|
||||
@@ -7,15 +7,15 @@ import (
|
||||
|
||||
var (
|
||||
// ErrLightBlockNotFound is returned when a provider can't find the
|
||||
// requested header.
|
||||
// requested header. The light client will not remove the provider
|
||||
ErrLightBlockNotFound = errors.New("light block not found")
|
||||
// ErrNoResponse is returned if the provider doesn't respond to the
|
||||
// request in a gieven time
|
||||
// request in a given time. The light client will not remove the provider
|
||||
ErrNoResponse = errors.New("client failed to respond")
|
||||
)
|
||||
|
||||
// ErrBadLightBlock is returned when a provider returns an invalid
|
||||
// light block.
|
||||
// light block. The light client will remove the provider.
|
||||
type ErrBadLightBlock struct {
|
||||
Reason error
|
||||
}
|
||||
@@ -23,3 +23,14 @@ type ErrBadLightBlock struct {
|
||||
func (e ErrBadLightBlock) Error() string {
|
||||
return fmt.Sprintf("client provided bad signed header: %s", e.Reason.Error())
|
||||
}
|
||||
|
||||
// ErrUnreliableProvider is a generic error that indicates that the provider isn't
|
||||
// behaving in a reliable manner to the light client. The light client will
|
||||
// remove the provider
|
||||
type ErrUnreliableProvider struct {
|
||||
Reason string
|
||||
}
|
||||
|
||||
func (e ErrUnreliableProvider) Error() string {
|
||||
return fmt.Sprintf("client deemed unreliable: %s", e.Reason)
|
||||
}
|
||||
|
||||
@@ -18,8 +18,10 @@ import (
|
||||
)
|
||||
|
||||
var defaultOptions = Options{
|
||||
MaxRetryAttempts: 5,
|
||||
Timeout: 3 * time.Second,
|
||||
MaxRetryAttempts: 5,
|
||||
Timeout: 3 * time.Second,
|
||||
NoBlockThreshold: 5,
|
||||
NoResponseThreshold: 5,
|
||||
}
|
||||
|
||||
// http provider uses an RPC client to obtain the necessary information.
|
||||
@@ -27,14 +29,37 @@ type http struct {
|
||||
chainID string
|
||||
client rpcclient.RemoteClient
|
||||
|
||||
maxRetryAttempts int
|
||||
// httt provider heuristics
|
||||
|
||||
// The provider tracks the amount of times that the
|
||||
// client doesn't respond. If this exceeds the threshold
|
||||
// then the provider will return an unreliable provider error
|
||||
noResponseThreshold uint16
|
||||
noResponseCount uint16
|
||||
|
||||
// The provider tracks the amount of time the client
|
||||
// doesn't have a block. If this exceeds the threshold
|
||||
// then the provider will return an unreliable provider error
|
||||
noBlockThreshold uint16
|
||||
noBlockCount uint16
|
||||
|
||||
// In a single request, the provider attempts multiple times
|
||||
// with exponential backoff to reach the client. If this
|
||||
// exceeds the maxRetry attempts, this result in a ErrNoResponse
|
||||
maxRetryAttempts uint16
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
// -1 means no limit
|
||||
MaxRetryAttempts int
|
||||
// 0 means no retries
|
||||
MaxRetryAttempts uint16
|
||||
// 0 means no timeout.
|
||||
Timeout time.Duration
|
||||
// The amount of requests that a client doesn't have the block
|
||||
// for before the provider deems the client unreliable
|
||||
NoBlockThreshold uint16
|
||||
// The amount of requests that a client doesn't respond to
|
||||
// before the provider deems the client unreliable
|
||||
NoResponseThreshold uint16
|
||||
}
|
||||
|
||||
// New creates a HTTP provider, which is using the rpchttp.HTTP client under
|
||||
@@ -67,9 +92,11 @@ func NewWithClient(chainID string, client rpcclient.RemoteClient) provider.Provi
|
||||
// NewWithClient allows you to provide a custom client.
|
||||
func NewWithClientAndOptions(chainID string, client rpcclient.RemoteClient, options Options) provider.Provider {
|
||||
return &http{
|
||||
client: client,
|
||||
chainID: chainID,
|
||||
maxRetryAttempts: options.MaxRetryAttempts,
|
||||
client: client,
|
||||
chainID: chainID,
|
||||
maxRetryAttempts: options.MaxRetryAttempts,
|
||||
noResponseThreshold: options.NoResponseThreshold,
|
||||
noBlockThreshold: options.NoBlockThreshold,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -130,7 +157,8 @@ func (p *http) validatorSet(ctx context.Context, height *int64) (*types.Validato
|
||||
for len(vals) != total && page <= maxPages {
|
||||
// create another for loop to control retries. If p.maxRetryAttempts
|
||||
// is negative we will keep repeating.
|
||||
for attempt := 0; attempt != p.maxRetryAttempts+1; attempt++ {
|
||||
attempt := uint16(0)
|
||||
for {
|
||||
res, err := p.client.Validators(ctx, height, &page, &perPage)
|
||||
switch e := err.(type) {
|
||||
case nil: // success!! Now we validate the response
|
||||
@@ -149,8 +177,13 @@ func (p *http) validatorSet(ctx context.Context, height *int64) (*types.Validato
|
||||
|
||||
case *url.Error:
|
||||
if e.Timeout() {
|
||||
// if we have exceeded retry attempts then return a no response error
|
||||
if attempt == p.maxRetryAttempts {
|
||||
return nil, p.noResponse()
|
||||
}
|
||||
attempt++
|
||||
// request timed out: we wait and try again with exponential backoff
|
||||
time.Sleep(backoffTimeout(uint16(attempt)))
|
||||
time.Sleep(backoffTimeout(attempt))
|
||||
continue
|
||||
}
|
||||
return nil, provider.ErrBadLightBlock{Reason: e}
|
||||
@@ -188,7 +221,7 @@ func (p *http) validatorSet(ctx context.Context, height *int64) (*types.Validato
|
||||
func (p *http) signedHeader(ctx context.Context, height *int64) (*types.SignedHeader, error) {
|
||||
// create a for loop to control retries. If p.maxRetryAttempts
|
||||
// is negative we will keep repeating.
|
||||
for attempt := 0; attempt != p.maxRetryAttempts+1; attempt++ {
|
||||
for attempt := uint16(0); attempt != p.maxRetryAttempts+1; attempt++ {
|
||||
commit, err := p.client.Commit(ctx, height)
|
||||
switch e := err.(type) {
|
||||
case nil: // success!!
|
||||
@@ -197,24 +230,20 @@ func (p *http) signedHeader(ctx context.Context, height *int64) (*types.SignedHe
|
||||
case *url.Error:
|
||||
if e.Timeout() {
|
||||
// we wait and try again with exponential backoff
|
||||
time.Sleep(backoffTimeout(uint16(attempt)))
|
||||
time.Sleep(backoffTimeout(attempt))
|
||||
continue
|
||||
}
|
||||
return nil, provider.ErrBadLightBlock{Reason: e}
|
||||
|
||||
case *rpctypes.RPCError:
|
||||
// Check if we got something other than internal error. This shouldn't happen unless the RPC module
|
||||
// or light client has been tampered with. If we do get this error, stop the connection with the
|
||||
// peer and return an error
|
||||
if e.Code != -32603 {
|
||||
return nil, provider.ErrBadLightBlock{Reason: errors.New(e.Data)}
|
||||
// check if the error indicates that the peer doesn't have the block
|
||||
if strings.Contains(e.Data, ctypes.ErrHeightNotAvailable.Error()) ||
|
||||
strings.Contains(e.Data, ctypes.ErrHeightExceedsChainHead.Error()) {
|
||||
return nil, p.noBlock()
|
||||
}
|
||||
|
||||
// check if the error indicates that the peer doesn't have the block
|
||||
if strings.Contains(err.Error(), ctypes.ErrHeightNotAvailable.Error()) ||
|
||||
strings.Contains(err.Error(), ctypes.ErrHeightExceedsChainHead.Error()) {
|
||||
return nil, provider.ErrLightBlockNotFound
|
||||
}
|
||||
// for every other error, the provider returns a bad block
|
||||
return nil, provider.ErrBadLightBlock{Reason: errors.New(e.Data)}
|
||||
|
||||
default:
|
||||
// If we don't know the error then by default we return a bad light block error and
|
||||
@@ -222,7 +251,27 @@ func (p *http) signedHeader(ctx context.Context, height *int64) (*types.SignedHe
|
||||
return nil, provider.ErrBadLightBlock{Reason: e}
|
||||
}
|
||||
}
|
||||
return nil, provider.ErrNoResponse
|
||||
return nil, p.noResponse()
|
||||
}
|
||||
|
||||
func (p *http) noResponse() error {
|
||||
p.noResponseCount++
|
||||
if p.noResponseCount > p.noResponseThreshold {
|
||||
return provider.ErrUnreliableProvider{
|
||||
Reason: fmt.Sprintf("failed to respond after %d attempts", p.noResponseCount),
|
||||
}
|
||||
}
|
||||
return provider.ErrNoResponse
|
||||
}
|
||||
|
||||
func (p *http) noBlock() error {
|
||||
p.noBlockCount++
|
||||
if p.noBlockCount > p.noBlockThreshold {
|
||||
return provider.ErrUnreliableProvider{
|
||||
Reason: fmt.Sprintf("failed to provide a block after %d attempts", p.noBlockCount),
|
||||
}
|
||||
}
|
||||
return provider.ErrLightBlockNotFound
|
||||
}
|
||||
|
||||
func validateHeight(height int64) (*int64, error) {
|
||||
|
||||
@@ -66,25 +66,33 @@ func TestProvider(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
|
||||
// let's get the highest block
|
||||
sh, err := p.LightBlock(context.Background(), 0)
|
||||
lb, err := p.LightBlock(context.Background(), 0)
|
||||
require.NoError(t, err)
|
||||
assert.True(t, sh.Height < 1000)
|
||||
assert.True(t, lb.Height < 1000)
|
||||
|
||||
// let's check this is valid somehow
|
||||
assert.Nil(t, sh.ValidateBasic(chainID))
|
||||
assert.Nil(t, lb.ValidateBasic(chainID))
|
||||
|
||||
// historical queries now work :)
|
||||
lower := sh.Height - 3
|
||||
sh, err = p.LightBlock(context.Background(), lower)
|
||||
lower := lb.Height - 3
|
||||
lb, err = p.LightBlock(context.Background(), lower)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, lower, sh.Height)
|
||||
assert.Equal(t, lower, lb.Height)
|
||||
|
||||
// // fetching missing heights (both future and pruned) should return appropriate errors
|
||||
_, err = p.LightBlock(context.Background(), 1000)
|
||||
lb, err = p.LightBlock(context.Background(), 1000)
|
||||
require.Error(t, err)
|
||||
require.Nil(t, lb)
|
||||
assert.Equal(t, provider.ErrLightBlockNotFound, err)
|
||||
|
||||
_, err = p.LightBlock(context.Background(), 1)
|
||||
require.Error(t, err)
|
||||
assert.Equal(t, provider.ErrLightBlockNotFound, err)
|
||||
|
||||
// if the provider is unable to provide four more blocks then we should return
|
||||
// an unreliable peer error
|
||||
for i := 0; i < 4; i++ {
|
||||
_, err = p.LightBlock(context.Background(), 1)
|
||||
}
|
||||
assert.IsType(t, provider.ErrUnreliableProvider{}, err)
|
||||
}
|
||||
|
||||
@@ -2,15 +2,12 @@ package mock
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
|
||||
"github.com/tendermint/tendermint/light/provider"
|
||||
"github.com/tendermint/tendermint/types"
|
||||
)
|
||||
|
||||
var errNoResp = errors.New("no response from provider")
|
||||
|
||||
type deadMock struct {
|
||||
id string
|
||||
}
|
||||
@@ -25,9 +22,9 @@ func (p *deadMock) String() string {
|
||||
}
|
||||
|
||||
func (p *deadMock) LightBlock(_ context.Context, height int64) (*types.LightBlock, error) {
|
||||
return nil, errNoResp
|
||||
return nil, provider.ErrNoResponse
|
||||
}
|
||||
|
||||
func (p *deadMock) ReportEvidence(_ context.Context, ev types.Evidence) error {
|
||||
return errNoResp
|
||||
return provider.ErrNoResponse
|
||||
}
|
||||
|
||||
@@ -130,7 +130,7 @@ func VerifyAdjacent(
|
||||
|
||||
// Check the validator hashes are the same
|
||||
if !bytes.Equal(untrustedHeader.ValidatorsHash, trustedHeader.NextValidatorsHash) {
|
||||
err := fmt.Errorf("expected old header next validators (%X) to match those from new header (%X)",
|
||||
err := fmt.Errorf("expected old header's next validators (%X) to match those from new header (%X)",
|
||||
trustedHeader.NextValidatorsHash,
|
||||
untrustedHeader.ValidatorsHash,
|
||||
)
|
||||
|
||||
Reference in New Issue
Block a user