Files
tendermint/test/e2e/runner/perturb.go
Sam Kleinman 8addf99f90 e2e: tweak sleep for pertubations (#6723)
This tweaks sleeps around pertubations, based on a theory that our
tests with "kill" pertubations restart the nodes fast enough the peers
haven't marked it down when it tries to reconnect. In my local test
runs, this clears out *most* of the test failures that I've seen,
except for one evidence-related test-harness problem (which should be
handled separately.)
2021-07-14 21:07:25 +00:00

83 lines
2.4 KiB
Go

package main
import (
"fmt"
"time"
rpctypes "github.com/tendermint/tendermint/rpc/core/types"
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
)
// Perturbs a running testnet.
func Perturb(testnet *e2e.Testnet) error {
for _, node := range testnet.Nodes {
for _, perturbation := range node.Perturbations {
_, err := PerturbNode(node, perturbation)
if err != nil {
return err
}
time.Sleep(15 * time.Second) // give network some time to recover between each
}
}
return nil
}
// PerturbNode perturbs a node with a given perturbation, returning its status
// after recovering.
func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.ResultStatus, error) {
testnet := node.Testnet
switch perturbation {
case e2e.PerturbationDisconnect:
logger.Info(fmt.Sprintf("Disconnecting node %v...", node.Name))
if err := execDocker("network", "disconnect", testnet.Name+"_"+testnet.Name, node.Name); err != nil {
return nil, err
}
time.Sleep(10 * time.Second)
if err := execDocker("network", "connect", testnet.Name+"_"+testnet.Name, node.Name); err != nil {
return nil, err
}
case e2e.PerturbationKill:
logger.Info(fmt.Sprintf("Killing node %v...", node.Name))
if err := execCompose(testnet.Dir, "kill", "-s", "SIGKILL", node.Name); err != nil {
return nil, err
}
time.Sleep(2 * time.Second)
if err := execCompose(testnet.Dir, "start", node.Name); err != nil {
return nil, err
}
case e2e.PerturbationPause:
logger.Info(fmt.Sprintf("Pausing node %v...", node.Name))
if err := execCompose(testnet.Dir, "pause", node.Name); err != nil {
return nil, err
}
time.Sleep(10 * time.Second)
if err := execCompose(testnet.Dir, "unpause", node.Name); err != nil {
return nil, err
}
case e2e.PerturbationRestart:
logger.Info(fmt.Sprintf("Restarting node %v...", node.Name))
if err := execCompose(testnet.Dir, "restart", node.Name); err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("unexpected perturbation %q", perturbation)
}
// Seed nodes do not have an RPC endpoint exposed so we cannot assert that
// the node recovered. All we can do is hope.
if node.Mode == e2e.ModeSeed {
return nil, nil
}
status, err := waitForNode(node, 0, 2*time.Minute)
if err != nil {
return nil, err
}
logger.Info(fmt.Sprintf("Node %v recovered at height %v", node.Name, status.SyncInfo.LatestBlockHeight))
return status, nil
}