mirror of
https://github.com/tendermint/tendermint.git
synced 2026-04-26 18:45:07 +00:00
light: improve error handling and allow providers to be added (#6733)
This commit is contained in:
@@ -85,7 +85,7 @@ func (p *snapshotPool) Add(peerID types.NodeID, snapshot *snapshot) (bool, error
|
||||
|
||||
appHash, err := p.stateProvider.AppHash(ctx, snapshot.Height)
|
||||
if err != nil {
|
||||
return false, err
|
||||
return false, fmt.Errorf("failed to get app hash: %w", err)
|
||||
}
|
||||
snapshot.trustedAppHash = appHash
|
||||
key := snapshot.Key()
|
||||
|
||||
141
light/client.go
141
light/client.go
@@ -478,7 +478,7 @@ func (c *Client) VerifyHeader(ctx context.Context, newHeader *types.Header, now
|
||||
}
|
||||
|
||||
if !bytes.Equal(l.Hash(), newHeader.Hash()) {
|
||||
return fmt.Errorf("light block header %X does not match newHeader %X", l.Hash(), newHeader.Hash())
|
||||
return fmt.Errorf("header from primary %X does not match newHeader %X", l.Hash(), newHeader.Hash())
|
||||
}
|
||||
|
||||
return c.verifyLightBlock(ctx, l, now)
|
||||
@@ -586,7 +586,7 @@ func (c *Client) verifySequential(
|
||||
}
|
||||
|
||||
// If some intermediate header is invalid, remove the primary and try again.
|
||||
c.logger.Error("primary sent invalid header -> removing", "err", err, "primary", c.primary)
|
||||
c.logger.Info("primary sent invalid header -> removing", "err", err, "primary", c.primary)
|
||||
|
||||
replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true)
|
||||
if removeErr != nil {
|
||||
@@ -630,6 +630,10 @@ func (c *Client) verifySequential(
|
||||
// requested from source is kept such that when a verification is made, and the
|
||||
// light client tries again to verify the new light block in the middle, the light
|
||||
// client does not need to ask for all the same light blocks again.
|
||||
//
|
||||
// If this function errors, it should always wrap it in a `ErrVerifcationFailed`
|
||||
// struct so that the calling function can determine where it failed and handle
|
||||
// it accordingly.
|
||||
func (c *Client) verifySkipping(
|
||||
ctx context.Context,
|
||||
source provider.Provider,
|
||||
@@ -690,20 +694,10 @@ func (c *Client) verifySkipping(
|
||||
// schedule what the next height we need to fetch is
|
||||
pivotHeight := c.schedule(verifiedBlock.Height, blockCache[depth].Height)
|
||||
interimBlock, providerErr := source.LightBlock(ctx, pivotHeight)
|
||||
switch providerErr {
|
||||
case nil:
|
||||
blockCache = append(blockCache, interimBlock)
|
||||
|
||||
// if the error is benign, the client does not need to replace the primary
|
||||
case provider.ErrLightBlockNotFound, provider.ErrNoResponse, provider.ErrHeightTooHigh:
|
||||
return nil, err
|
||||
|
||||
// all other errors such as ErrBadLightBlock or ErrUnreliableProvider are seen as malevolent and the
|
||||
// provider is removed
|
||||
default:
|
||||
if providerErr != nil {
|
||||
return nil, ErrVerificationFailed{From: verifiedBlock.Height, To: pivotHeight, Reason: providerErr}
|
||||
}
|
||||
|
||||
blockCache = append(blockCache, interimBlock)
|
||||
}
|
||||
depth++
|
||||
|
||||
@@ -729,33 +723,8 @@ func (c *Client) verifySkippingAgainstPrimary(
|
||||
now time.Time) error {
|
||||
|
||||
trace, err := c.verifySkipping(ctx, c.primary, trustedBlock, newLightBlock, now)
|
||||
|
||||
switch errors.Unwrap(err).(type) {
|
||||
case ErrInvalidHeader:
|
||||
// If the target header is invalid, return immediately.
|
||||
invalidHeaderHeight := err.(ErrVerificationFailed).To
|
||||
if invalidHeaderHeight == newLightBlock.Height {
|
||||
c.logger.Debug("target header is invalid", "err", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// If some intermediate header is invalid, remove the primary and try again.
|
||||
c.logger.Error("primary sent invalid header -> replacing", "err", err, "primary", c.primary)
|
||||
replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, true)
|
||||
if removeErr != nil {
|
||||
c.logger.Error("failed to replace primary. Returning original error", "err", removeErr)
|
||||
return err
|
||||
}
|
||||
|
||||
if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) {
|
||||
c.logger.Debug("replaced primary but new primary has a different block to the initial one. Returning original error")
|
||||
return err
|
||||
}
|
||||
|
||||
// attempt to verify the header again
|
||||
return c.verifySkippingAgainstPrimary(ctx, trustedBlock, replacementBlock, now)
|
||||
case nil:
|
||||
// Compare header with the witnesses to ensure it's not a fork.
|
||||
if err == nil {
|
||||
// Success! Now compare the header with the witnesses to ensure it's not a fork.
|
||||
// More witnesses we have, more chance to notice one.
|
||||
//
|
||||
// CORRECTNESS ASSUMPTION: there's at least 1 correct full node
|
||||
@@ -763,11 +732,62 @@ func (c *Client) verifySkippingAgainstPrimary(
|
||||
if cmpErr := c.detectDivergence(ctx, trace, now); cmpErr != nil {
|
||||
return cmpErr
|
||||
}
|
||||
default:
|
||||
}
|
||||
|
||||
var e = &ErrVerificationFailed{}
|
||||
// all errors from verify skipping should be `ErrVerificationFailed`
|
||||
// if it's not we just return the error directly
|
||||
if !errors.As(err, e) {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
replace := true
|
||||
switch e.Reason.(type) {
|
||||
// Verification returned an invalid header
|
||||
case ErrInvalidHeader:
|
||||
// If it was the target header, return immediately.
|
||||
if e.To == newLightBlock.Height {
|
||||
c.logger.Debug("target header is invalid", "err", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// If some intermediate header is invalid, remove the primary and try
|
||||
// again.
|
||||
|
||||
// An intermediate header expired. We can no longer validate it as there is
|
||||
// no longer the ability to punish invalid blocks as evidence of misbehavior
|
||||
case ErrOldHeaderExpired:
|
||||
return err
|
||||
|
||||
// This happens if there was a problem in finding the next block or a
|
||||
// context was canceled.
|
||||
default:
|
||||
if errors.Is(e.Reason, context.Canceled) || errors.Is(e.Reason, context.DeadlineExceeded) {
|
||||
return e.Reason
|
||||
}
|
||||
|
||||
if !c.providerShouldBeRemoved(e.Reason) {
|
||||
replace = false
|
||||
}
|
||||
}
|
||||
|
||||
// if we've reached here we're attempting to retry verification with a
|
||||
// different provider
|
||||
c.logger.Info("primary returned error", "err", e, "primary", c.primary, "replace", replace)
|
||||
|
||||
replacementBlock, removeErr := c.findNewPrimary(ctx, newLightBlock.Height, replace)
|
||||
if removeErr != nil {
|
||||
c.logger.Error("failed to replace primary. Returning original error", "err", removeErr)
|
||||
return e.Reason
|
||||
}
|
||||
|
||||
if !bytes.Equal(replacementBlock.Hash(), newLightBlock.Hash()) {
|
||||
c.logger.Debug("replaced primary but new primary has a different block to the initial one. Returning original error")
|
||||
return e.Reason
|
||||
}
|
||||
|
||||
// attempt to verify the header again from the trusted block
|
||||
return c.verifySkippingAgainstPrimary(ctx, trustedBlock, replacementBlock, now)
|
||||
}
|
||||
|
||||
// LastTrustedHeight returns a last trusted height. -1 and nil are returned if
|
||||
@@ -811,6 +831,15 @@ func (c *Client) Witnesses() []provider.Provider {
|
||||
return c.witnesses
|
||||
}
|
||||
|
||||
// AddProvider adds a providers to the light clients set
|
||||
//
|
||||
// NOTE: The light client does not check for uniqueness
|
||||
func (c *Client) AddProvider(p provider.Provider) {
|
||||
c.providerMutex.Lock()
|
||||
defer c.providerMutex.Unlock()
|
||||
c.witnesses = append(c.witnesses, p)
|
||||
}
|
||||
|
||||
// Cleanup removes all the data (headers and validator sets) stored. Note: the
|
||||
// client must be stopped at this point.
|
||||
func (c *Client) Cleanup() error {
|
||||
@@ -865,7 +894,7 @@ func (c *Client) backwards(
|
||||
"newHash", interimHeader.Hash())
|
||||
if err := VerifyBackwards(interimHeader, verifiedHeader); err != nil {
|
||||
// verification has failed
|
||||
c.logger.Error("backwards verification failed, replacing primary...", "err", err, "primary", c.primary)
|
||||
c.logger.Info("backwards verification failed, replacing primary...", "err", err, "primary", c.primary)
|
||||
|
||||
// the client tries to see if it can get a witness to continue with the request
|
||||
newPrimarysBlock, replaceErr := c.findNewPrimary(ctx, newHeader.Height, true)
|
||||
@@ -915,14 +944,14 @@ func (c *Client) lightBlockFromPrimary(ctx context.Context, height int64) (*type
|
||||
|
||||
case provider.ErrNoResponse, provider.ErrLightBlockNotFound, provider.ErrHeightTooHigh:
|
||||
// we find a new witness to replace the primary
|
||||
c.logger.Debug("error from light block request from primary, replacing...",
|
||||
c.logger.Info("error from light block request from primary, replacing...",
|
||||
"error", err, "height", height, "primary", c.primary)
|
||||
return c.findNewPrimary(ctx, height, false)
|
||||
|
||||
default:
|
||||
// The light client has most likely received either provider.ErrUnreliableProvider or provider.ErrBadLightBlock
|
||||
// These errors mean that the light client should drop the primary and try with another provider instead
|
||||
c.logger.Error("error from light block request from primary, removing...",
|
||||
c.logger.Info("error from light block request from primary, removing...",
|
||||
"error", err, "height", height, "primary", c.primary)
|
||||
return c.findNewPrimary(ctx, height, true)
|
||||
}
|
||||
@@ -1022,7 +1051,7 @@ func (c *Client) findNewPrimary(ctx context.Context, height int64, remove bool)
|
||||
// process benign errors by logging them only
|
||||
case provider.ErrNoResponse, provider.ErrLightBlockNotFound, provider.ErrHeightTooHigh:
|
||||
lastError = response.err
|
||||
c.logger.Debug("error on light block request from witness",
|
||||
c.logger.Info("error on light block request from witness",
|
||||
"error", response.err, "primary", c.witnesses[response.witnessIndex])
|
||||
continue
|
||||
|
||||
@@ -1066,13 +1095,13 @@ func (c *Client) compareFirstHeaderWithWitnesses(ctx context.Context, h *types.S
|
||||
case nil:
|
||||
continue
|
||||
case errConflictingHeaders:
|
||||
c.logger.Error(fmt.Sprintf(`witness #%d has a different header. Please check primary is correct
|
||||
and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witness", c.witnesses[e.WitnessIndex])
|
||||
c.logger.Error(`witness has a different header. Please check primary is
|
||||
correct and remove witness. Otherwise, use a different primary`,
|
||||
"Witness", c.witnesses[e.WitnessIndex], "ExpHeader", h.Hash(), "GotHeader", e.Block.Hash())
|
||||
return err
|
||||
case errBadWitness:
|
||||
// If witness sent us an invalid header, then remove it
|
||||
c.logger.Info("witness sent an invalid light block or didn't respond, removing...",
|
||||
"witness", c.witnesses[e.WitnessIndex],
|
||||
c.logger.Info("witness returned an error, removing...",
|
||||
"err", err)
|
||||
witnessesToRemove = append(witnessesToRemove, e.WitnessIndex)
|
||||
default:
|
||||
@@ -1082,7 +1111,7 @@ and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witne
|
||||
}
|
||||
|
||||
// the witness either didn't respond or didn't have the block. We ignore it.
|
||||
c.logger.Debug("unable to compare first header with witness",
|
||||
c.logger.Debug("unable to compare first header with witness, ignoring",
|
||||
"err", err)
|
||||
}
|
||||
|
||||
@@ -1091,3 +1120,11 @@ and remove witness. Otherwise, use a different primary`, e.WitnessIndex), "witne
|
||||
// remove all witnesses that misbehaved
|
||||
return c.removeWitnesses(witnessesToRemove)
|
||||
}
|
||||
|
||||
// providerShouldBeRemoved analyzes the nature of the error and whether the provider
|
||||
// should be removed from the light clients set
|
||||
func (c *Client) providerShouldBeRemoved(err error) bool {
|
||||
return errors.As(err, &provider.ErrUnreliableProvider{}) ||
|
||||
errors.As(err, &provider.ErrBadLightBlock{}) ||
|
||||
errors.Is(err, provider.ErrConnectionClosed)
|
||||
}
|
||||
|
||||
@@ -589,6 +589,36 @@ func TestClient_Concurrency(t *testing.T) {
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
func TestClient_AddProviders(t *testing.T) {
|
||||
c, err := light.NewClient(
|
||||
ctx,
|
||||
chainID,
|
||||
trustOptions,
|
||||
fullNode,
|
||||
[]provider.Provider{fullNode},
|
||||
dbs.New(dbm.NewMemDB()),
|
||||
light.Logger(log.TestingLogger()),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
|
||||
closeCh := make(chan struct{})
|
||||
go func() {
|
||||
// run verification concurrently to make sure it doesn't dead lock
|
||||
_, err = c.VerifyLightBlockAtHeight(ctx, 2, bTime.Add(2*time.Hour))
|
||||
require.NoError(t, err)
|
||||
close(closeCh)
|
||||
}()
|
||||
|
||||
// NOTE: the light client doesn't check uniqueness of providers
|
||||
c.AddProvider(fullNode)
|
||||
require.Len(t, c.Witnesses(), 2)
|
||||
select {
|
||||
case <-closeCh:
|
||||
case <-time.After(5 * time.Second):
|
||||
t.Fatal("concurent light block verification failed to finish in 5s")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClientReplacesPrimaryWithWitnessIfPrimaryIsUnavailable(t *testing.T) {
|
||||
c, err := light.NewClient(
|
||||
ctx,
|
||||
|
||||
@@ -131,7 +131,11 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro
|
||||
var isTargetHeight bool
|
||||
isTargetHeight, lightBlock, err = c.getTargetBlockOrLatest(ctx, h.Height, witness)
|
||||
if err != nil {
|
||||
errc <- err
|
||||
if c.providerShouldBeRemoved(err) {
|
||||
errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex}
|
||||
} else {
|
||||
errc <- err
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -155,7 +159,11 @@ func (c *Client) compareNewHeaderWithWitness(ctx context.Context, errc chan erro
|
||||
time.Sleep(2*c.maxClockDrift + c.maxBlockLag)
|
||||
isTargetHeight, lightBlock, err = c.getTargetBlockOrLatest(ctx, h.Height, witness)
|
||||
if err != nil {
|
||||
errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex}
|
||||
if c.providerShouldBeRemoved(err) {
|
||||
errc <- errBadWitness{Reason: err, WitnessIndex: witnessIndex}
|
||||
} else {
|
||||
errc <- err
|
||||
}
|
||||
return
|
||||
}
|
||||
if isTargetHeight {
|
||||
|
||||
@@ -50,8 +50,9 @@ func VerifyNonAdjacent(
|
||||
return err
|
||||
}
|
||||
|
||||
if HeaderExpired(trustedHeader, trustingPeriod, now) {
|
||||
return ErrOldHeaderExpired{trustedHeader.Time.Add(trustingPeriod), now}
|
||||
// check if the untrusted header is within the trust period
|
||||
if HeaderExpired(untrustedHeader, trustingPeriod, now) {
|
||||
return ErrOldHeaderExpired{untrustedHeader.Time.Add(trustingPeriod), now}
|
||||
}
|
||||
|
||||
if err := verifyNewHeaderAndVals(
|
||||
@@ -117,8 +118,9 @@ func VerifyAdjacent(
|
||||
return errors.New("headers must be adjacent in height")
|
||||
}
|
||||
|
||||
if HeaderExpired(trustedHeader, trustingPeriod, now) {
|
||||
return ErrOldHeaderExpired{trustedHeader.Time.Add(trustingPeriod), now}
|
||||
// check if the untrusted header is within the trust period
|
||||
if HeaderExpired(untrustedHeader, trustingPeriod, now) {
|
||||
return ErrOldHeaderExpired{untrustedHeader.Time.Add(trustingPeriod), now}
|
||||
}
|
||||
|
||||
if err := verifyNewHeaderAndVals(
|
||||
@@ -192,7 +194,10 @@ func HeaderExpired(h *types.SignedHeader, trustingPeriod time.Duration, now time
|
||||
// of the trusted header
|
||||
//
|
||||
// For any of these cases ErrInvalidHeader is returned.
|
||||
// NOTE: This does not check whether the trusted header has expired or not.
|
||||
// NOTE: This does not check whether the trusted or untrusted header has expired
|
||||
// or not. These checks are not necessary because the detector never runs during
|
||||
// backwards verification and thus evidence that needs to be within a certain
|
||||
// time bound is never sent.
|
||||
func VerifyBackwards(untrustedHeader, trustedHeader *types.Header) error {
|
||||
if err := untrustedHeader.ValidateBasic(); err != nil {
|
||||
return ErrInvalidHeader{err}
|
||||
@@ -220,6 +225,8 @@ func VerifyBackwards(untrustedHeader, trustedHeader *types.Header) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// NOTE: This function assumes that untrustedHeader is after trustedHeader.
|
||||
// Do not use for backwards verification.
|
||||
func verifyNewHeaderAndVals(
|
||||
untrustedHeader *types.SignedHeader,
|
||||
untrustedVals *types.ValidatorSet,
|
||||
|
||||
@@ -64,7 +64,7 @@ func TestVerifyAdjacentHeaders(t *testing.T) {
|
||||
keys.GenSignedHeader(chainID, nextHeight, bTime.Add(-1*time.Hour), nil, vals, vals,
|
||||
hash("app_hash"), hash("cons_hash"), hash("results_hash"), 0, len(keys)),
|
||||
vals,
|
||||
3 * time.Hour,
|
||||
4 * time.Hour,
|
||||
bTime.Add(2 * time.Hour),
|
||||
nil,
|
||||
"to be after old header time",
|
||||
@@ -146,7 +146,7 @@ func TestVerifyAdjacentHeaders(t *testing.T) {
|
||||
hash("app_hash"), hash("cons_hash"), hash("results_hash"), 0, len(keys)),
|
||||
keys.ToValidators(10, 1),
|
||||
1 * time.Hour,
|
||||
bTime.Add(1 * time.Hour),
|
||||
bTime.Add(2 * time.Hour),
|
||||
nil,
|
||||
"old header has expired",
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user