gossip: Better check for gossip stabilization on startup

This is a backport of Apache CASSANDRA-9401
(2b1e6aba405002ce86d5badf4223de9751bf867d)

It is better to check the number of nodes in the endpoint_state_map
is not changing for gossip stabilization.

Fixes #2853
Message-Id: <e9f901ac9cadf5935c9c473433dd93e9d02cb748.1506666004.git.asias@scylladb.com>

(cherry picked from commit c0b965ee56)
This commit is contained in:
Asias He
2017-09-29 14:20:31 +08:00
parent 09da4f3d08
commit 025af9d297

View File

@@ -1764,21 +1764,23 @@ future<> gossiper::wait_for_gossip_to_settle() {
static constexpr int32_t GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED = 3;
int32_t total_polls = 0;
int32_t num_okay = 0;
int32_t ep_size = endpoint_state_map.size();
logger.info("Waiting for gossip to settle before accepting client requests...");
sleep(GOSSIP_SETTLE_MIN_WAIT_MS).get();
while (num_okay < GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED) {
sleep(GOSSIP_SETTLE_POLL_INTERVAL_MS).get();
int32_t current_size = endpoint_state_map.size();
total_polls++;
// Make sure 5 gossip rounds are completed sucessfully
if (_nr_run > 5) {
logger.debug("Gossip looks settled. gossip round completed: {}", _nr_run);
if (current_size == ep_size) {
logger.debug("Gossip looks settled");
num_okay++;
} else {
logger.info("Gossip not settled after {} polls.", total_polls);
num_okay = 0;
}
ep_size = current_size;
if (force_after > 0 && total_polls > force_after) {
logger.warn("Gossip not settled but startup forced by cassandra.skip_wait_for_gossip_to_settle.", total_polls);
logger.warn("Gossip not settled but startup forced by skip_wait_for_gossip_to_settle. Gossp total polls: {}", total_polls);
break;
}
}