gossip: Add is_safe_for_bootstrap
Make the following tests pass:
bootstrap_test.py:TestBootstrap.shutdown_wiped_node_cannot_join_test
bootstrap_test.py:TestBootstrap.killed_wiped_node_cannot_join_test
1) start node2
2) wait for cql connection with node2 is ready
3) stop node2
4) delete data and commitlog directory for node2
5) start node2
In step 5), node2 will do the bootstrap process since its data,
including the system table is wiped. It will think itself is a completly
new node and can possiblly stream from wrong node and violate
consistency.
To fix, we reject the boot if we found the node was in SHUTDOWN or
STATUS_NORMAL.
CASSANDRA-9765
Message-Id: <47bc23f4ce1487a60c5b4fbe5bfe9514337480a8.1452158975.git.asias@scylladb.com>
This commit is contained in:
@@ -1594,6 +1594,27 @@ future<> gossiper::wait_for_gossip_to_settle() {
|
||||
logger.info("No gossip backlog; proceeding");
|
||||
}
|
||||
});
|
||||
|
||||
bool gossiper::is_safe_for_bootstrap(inet_address endpoint) {
|
||||
auto eps = get_endpoint_state_for_endpoint(endpoint);
|
||||
|
||||
// if there's no previous state, or the node was previously removed from the cluster, we're good
|
||||
if (!eps || is_dead_state(*eps)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
sstring status = get_gossip_status(*eps);
|
||||
|
||||
logger.debug("is_safe_for_bootstrap: node {} status {}", endpoint, status);
|
||||
|
||||
// these states are not allowed to join the cluster as it would not be safe
|
||||
std::unordered_set<sstring> unsafe_statuses{
|
||||
sstring(""), // failed bootstrap but we did start gossiping
|
||||
sstring(versioned_value::STATUS_NORMAL), // node is legit in the cluster or it was stopped with kill -9
|
||||
sstring(versioned_value::SHUTDOWN) // node was shutdown
|
||||
};
|
||||
|
||||
return !unsafe_statuses.count(status);
|
||||
}
|
||||
|
||||
} // namespace gms
|
||||
|
||||
@@ -347,6 +347,7 @@ public:
|
||||
future<int> get_current_heart_beat_version(inet_address endpoint);
|
||||
|
||||
bool is_gossip_only_member(inet_address endpoint);
|
||||
bool is_safe_for_bootstrap(inet_address endpoint);
|
||||
private:
|
||||
/**
|
||||
* Returns true if the chosen target was also a seed. False otherwise
|
||||
|
||||
@@ -1103,8 +1103,7 @@ future<> storage_service::check_for_endpoint_collision() {
|
||||
do {
|
||||
gossiper.do_shadow_round().get();
|
||||
auto addr = get_broadcast_address();
|
||||
auto eps = gossiper.get_endpoint_state_for_endpoint(addr);
|
||||
if (eps && !gossiper.is_dead_state(*eps) && !gossiper.is_gossip_only_member(addr)) {
|
||||
if (!gossiper.is_safe_for_bootstrap(addr)) {
|
||||
throw std::runtime_error(sprint("A node with address %s already exists, cancelling join. "
|
||||
"Use cassandra.replace_address if you want to replace this node.", addr));
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user