mirror of
https://github.com/scylladb/scylladb.git
synced 2026-06-03 05:26:58 +00:00
storage_service: make shadow round mandatory during bootstrap/replace
It is unsafe to bootstrap or perform replace without performing the shadow round, which is used to obtain features from the existing cluster and verify that we support all enabled features. Before this patch, I could easily produce the following scenario: 1. bootstrap first node in the cluster 2. shut it down 3. start bootstrapping second node, pointing to the first as seed 4. the second node skips shadow round because it gets `rpc::closed_error` when trying to connect to first node. 5. the node then passes the feature check (!) and proceeds to the next step, where it waits for nodes to show up in gossiper 6. we now restart the first node, and the second node finishes bootstrap The shadow round must be mandatory during bootstrap/replace, which is what this patch does. On restart it can remain optional as it was until now. In fact it should be completely unnecessary during restart, but since we did it until now (as best-effort), we can keep doing it.
This commit is contained in:
@@ -2005,8 +2005,8 @@ future<> gossiper::advertise_to_nodes(generation_for_nodes advertise_to_nodes) {
|
||||
});
|
||||
}
|
||||
|
||||
future<> gossiper::do_shadow_round(std::unordered_set<gms::inet_address> nodes) {
|
||||
return seastar::async([this, g = this->shared_from_this(), nodes = std::move(nodes)] () mutable {
|
||||
future<> gossiper::do_shadow_round(std::unordered_set<gms::inet_address> nodes, mandatory is_mandatory) {
|
||||
return seastar::async([this, g = this->shared_from_this(), nodes = std::move(nodes), is_mandatory] () mutable {
|
||||
nodes.erase(get_broadcast_address());
|
||||
gossip_get_endpoint_states_request request{{
|
||||
gms::application_state::STATUS,
|
||||
@@ -2046,7 +2046,7 @@ future<> gossiper::do_shadow_round(std::unordered_set<gms::inet_address> nodes)
|
||||
if (!nodes_talked.empty()) {
|
||||
break;
|
||||
}
|
||||
if (nodes_down == nodes.size()) {
|
||||
if (nodes_down == nodes.size() && !is_mandatory) {
|
||||
logger.warn("All nodes={} are down for get_endpoint_states verb. Skip ShadowRound.", nodes);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -589,10 +589,11 @@ public:
|
||||
gms::advertise_myself advertise = gms::advertise_myself::yes);
|
||||
|
||||
public:
|
||||
using mandatory = bool_class<class mandatory_tag>;
|
||||
/**
|
||||
* Do a single 'shadow' round of gossip, where we do not modify any state
|
||||
*/
|
||||
future<> do_shadow_round(std::unordered_set<gms::inet_address> nodes);
|
||||
future<> do_shadow_round(std::unordered_set<gms::inet_address> nodes, mandatory is_mandatory);
|
||||
|
||||
private:
|
||||
void build_seeds_list();
|
||||
|
||||
@@ -2846,7 +2846,7 @@ future<> storage_service::join_token_ring(sharded<db::system_distributed_keyspac
|
||||
} else {
|
||||
auto local_features = _feature_service.supported_feature_set();
|
||||
slogger.info("Performing gossip shadow round, initial_contact_nodes={}", initial_contact_nodes);
|
||||
co_await _gossiper.do_shadow_round(initial_contact_nodes);
|
||||
co_await _gossiper.do_shadow_round(initial_contact_nodes, gms::gossiper::mandatory::no);
|
||||
if (!_raft_topology_change_enabled) {
|
||||
_gossiper.check_knows_remote_features(local_features, loaded_peer_features);
|
||||
}
|
||||
@@ -4146,7 +4146,7 @@ future<> storage_service::check_for_endpoint_collision(std::unordered_set<gms::i
|
||||
auto local_features = _feature_service.supported_feature_set();
|
||||
do {
|
||||
slogger.info("Performing gossip shadow round");
|
||||
_gossiper.do_shadow_round(initial_contact_nodes).get();
|
||||
_gossiper.do_shadow_round(initial_contact_nodes, gms::gossiper::mandatory::yes).get();
|
||||
if (!_raft_topology_change_enabled) {
|
||||
_gossiper.check_knows_remote_features(local_features, loaded_peer_features);
|
||||
}
|
||||
@@ -4225,7 +4225,7 @@ storage_service::prepare_replacement_info(std::unordered_set<gms::inet_address>
|
||||
|
||||
// make magic happen
|
||||
slogger.info("Performing gossip shadow round");
|
||||
co_await _gossiper.do_shadow_round(initial_contact_nodes);
|
||||
co_await _gossiper.do_shadow_round(initial_contact_nodes, gms::gossiper::mandatory::yes);
|
||||
if (!_raft_topology_change_enabled) {
|
||||
auto local_features = _feature_service.supported_feature_set();
|
||||
_gossiper.check_knows_remote_features(local_features, loaded_peer_features);
|
||||
|
||||
Reference in New Issue
Block a user