mirror of
https://github.com/tendermint/tendermint.git
synced 2026-02-07 12:30:45 +00:00
This tweaks sleeps around pertubations, based on a theory that our tests with "kill" pertubations restart the nodes fast enough the peers haven't marked it down when it tries to reconnect. In my local test runs, this clears out *most* of the test failures that I've seen, except for one evidence-related test-harness problem (which should be handled separately.)
83 lines
2.4 KiB
Go
83 lines
2.4 KiB
Go
package main
|
|
|
|
import (
|
|
"fmt"
|
|
"time"
|
|
|
|
rpctypes "github.com/tendermint/tendermint/rpc/core/types"
|
|
e2e "github.com/tendermint/tendermint/test/e2e/pkg"
|
|
)
|
|
|
|
// Perturbs a running testnet.
|
|
func Perturb(testnet *e2e.Testnet) error {
|
|
for _, node := range testnet.Nodes {
|
|
for _, perturbation := range node.Perturbations {
|
|
_, err := PerturbNode(node, perturbation)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
time.Sleep(15 * time.Second) // give network some time to recover between each
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// PerturbNode perturbs a node with a given perturbation, returning its status
|
|
// after recovering.
|
|
func PerturbNode(node *e2e.Node, perturbation e2e.Perturbation) (*rpctypes.ResultStatus, error) {
|
|
testnet := node.Testnet
|
|
switch perturbation {
|
|
case e2e.PerturbationDisconnect:
|
|
logger.Info(fmt.Sprintf("Disconnecting node %v...", node.Name))
|
|
if err := execDocker("network", "disconnect", testnet.Name+"_"+testnet.Name, node.Name); err != nil {
|
|
return nil, err
|
|
}
|
|
time.Sleep(10 * time.Second)
|
|
if err := execDocker("network", "connect", testnet.Name+"_"+testnet.Name, node.Name); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
case e2e.PerturbationKill:
|
|
logger.Info(fmt.Sprintf("Killing node %v...", node.Name))
|
|
if err := execCompose(testnet.Dir, "kill", "-s", "SIGKILL", node.Name); err != nil {
|
|
return nil, err
|
|
}
|
|
time.Sleep(2 * time.Second)
|
|
if err := execCompose(testnet.Dir, "start", node.Name); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
case e2e.PerturbationPause:
|
|
logger.Info(fmt.Sprintf("Pausing node %v...", node.Name))
|
|
if err := execCompose(testnet.Dir, "pause", node.Name); err != nil {
|
|
return nil, err
|
|
}
|
|
time.Sleep(10 * time.Second)
|
|
if err := execCompose(testnet.Dir, "unpause", node.Name); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
case e2e.PerturbationRestart:
|
|
logger.Info(fmt.Sprintf("Restarting node %v...", node.Name))
|
|
if err := execCompose(testnet.Dir, "restart", node.Name); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
default:
|
|
return nil, fmt.Errorf("unexpected perturbation %q", perturbation)
|
|
}
|
|
|
|
// Seed nodes do not have an RPC endpoint exposed so we cannot assert that
|
|
// the node recovered. All we can do is hope.
|
|
if node.Mode == e2e.ModeSeed {
|
|
return nil, nil
|
|
}
|
|
|
|
status, err := waitForNode(node, 0, 2*time.Minute)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
logger.Info(fmt.Sprintf("Node %v recovered at height %v", node.Name, status.SyncInfo.LatestBlockHeight))
|
|
return status, nil
|
|
}
|