mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-30 03:30:49 +00:00
locator: extend rf-rack validation functions
Extend the locator function assert_rf_rack_valid_keyspace to accept arbitrary topology dc-rack maps and nodes instead of using the current token metadata. This allows us to add a new variant of the function that checks rf-rack validity given a topology change that we want to apply. we will use it to check that rf-rack validity will be maintained before applying the topology change. The possible topology changes for the check are node add and node remove / decommission. These operations can change the number of normal racks - if a new node is added to a new rack, or the last node is removed from a rack.
This commit is contained in:
@@ -1423,7 +1423,9 @@ const effective_replication_map_ptr& token_metadata_guard::get_erm() const {
|
||||
return g ? (**g).get_erm() : get<effective_replication_map_ptr>(_guard);
|
||||
}
|
||||
|
||||
void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars) {
|
||||
static void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars,
|
||||
const std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<host_id>>>& dc_rack_map,
|
||||
std::function<bool(host_id)> is_normal_token_owner) {
|
||||
tablet_logger.debug("[assert_rf_rack_valid_keyspace]: Starting verifying that keyspace '{}' is RF-rack-valid", ks);
|
||||
|
||||
// Any keyspace that does NOT use tablets is RF-rack-valid.
|
||||
@@ -1436,8 +1438,6 @@ void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr
|
||||
SCYLLA_ASSERT(ars.get_type() == replication_strategy_type::network_topology);
|
||||
const auto& nts = *static_cast<const network_topology_strategy*>(std::addressof(ars));
|
||||
|
||||
const auto& dc_rack_map = tmptr->get_topology().get_datacenter_racks();
|
||||
|
||||
for (const auto& dc : nts.get_datacenters()) {
|
||||
if (!dc_rack_map.contains(dc)) {
|
||||
on_internal_error(tablet_logger, seastar::format(
|
||||
@@ -1453,9 +1453,7 @@ void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr
|
||||
for (const auto& [_, rack_nodes] : rack_map) {
|
||||
// We must ignore zero-token nodes because they don't take part in replication.
|
||||
// Verify that this rack has at least one normal node.
|
||||
const bool normal_rack = std::ranges::any_of(rack_nodes, [tmptr] (host_id host_id) {
|
||||
return tmptr->is_normal_token_owner(host_id);
|
||||
});
|
||||
const bool normal_rack = std::ranges::any_of(rack_nodes, is_normal_token_owner);
|
||||
if (normal_rack) {
|
||||
++normal_rack_count;
|
||||
}
|
||||
@@ -1486,6 +1484,52 @@ void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr
|
||||
tablet_logger.debug("[assert_rf_rack_valid_keyspace]: Keyspace '{}' has been verified to be RF-rack-valid", ks);
|
||||
}
|
||||
|
||||
void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars) {
|
||||
assert_rf_rack_valid_keyspace(ks, tmptr, ars,
|
||||
tmptr->get_topology().get_datacenter_racks(),
|
||||
[&tmptr] (host_id host) {
|
||||
return tmptr->is_normal_token_owner(host);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars, rf_rack_topology_operation op) {
|
||||
auto dc_rack_map = tmptr->get_topology().get_datacenter_racks();
|
||||
|
||||
switch (op.tag) {
|
||||
case rf_rack_topology_operation::type::add:
|
||||
// include the new node only if it's added to an existing DC.
|
||||
if (dc_rack_map.contains(op.dc)) {
|
||||
tablet_logger.debug("[assert_rf_rack_valid_keyspace]: Including new node {} in DC '{}' and rack '{}'", op.node_id, op.dc, op.rack);
|
||||
dc_rack_map[op.dc][op.rack].insert(op.node_id);
|
||||
}
|
||||
break;
|
||||
case rf_rack_topology_operation::type::remove:
|
||||
// remove the node from the map, if it exists.
|
||||
tablet_logger.debug("[assert_rf_rack_valid_keyspace]: Excluding removed node {} from DC '{}' and rack '{}'", op.node_id, op.dc, op.rack);
|
||||
if (dc_rack_map.contains(op.dc) && dc_rack_map[op.dc].contains(op.rack)) {
|
||||
dc_rack_map[op.dc][op.rack].erase(op.node_id);
|
||||
if (dc_rack_map[op.dc][op.rack].empty()) {
|
||||
dc_rack_map[op.dc].erase(op.rack);
|
||||
if (dc_rack_map[op.dc].empty()) {
|
||||
dc_rack_map.erase(op.dc);
|
||||
}
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
assert_rf_rack_valid_keyspace(ks, tmptr, ars,
|
||||
dc_rack_map,
|
||||
[&tmptr, &op] (host_id host) {
|
||||
if (op.tag == rf_rack_topology_operation::type::add && host == op.node_id) {
|
||||
return true;
|
||||
}
|
||||
return tmptr->is_normal_token_owner(host);
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
rack_list get_allowed_racks(const locator::token_metadata& tm, const sstring& dc) {
|
||||
auto& topo = tm.get_topology();
|
||||
auto normal_nodes = [&] (const sstring& rack) {
|
||||
|
||||
@@ -880,6 +880,23 @@ class abstract_replication_strategy;
|
||||
/// * The keyspace need not exist. We use its name purely for informational reasons (in error messages).
|
||||
void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr, const abstract_replication_strategy&);
|
||||
|
||||
struct rf_rack_topology_operation {
|
||||
enum class type {
|
||||
add,
|
||||
remove // node remove or decommission
|
||||
};
|
||||
type tag;
|
||||
host_id node_id;
|
||||
sstring dc;
|
||||
sstring rack;
|
||||
};
|
||||
|
||||
// Verify that the provided keyspace corresponding to the provided replication strategy will be RF-rack-valid
|
||||
// after the provided topology change operation is applied.
|
||||
// The operation is either adding a node, or removing / decommissioning a node.
|
||||
// The added/removed node should be a normal token owning node. Nodes that don't own tokens don't affect RF-rack-validity.
|
||||
void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars, rf_rack_topology_operation op);
|
||||
|
||||
/// Returns the list of racks that can be used for placing replicas in a given DC.
|
||||
rack_list get_allowed_racks(const locator::token_metadata&, const sstring& dc);
|
||||
|
||||
|
||||
@@ -3599,6 +3599,53 @@ void database::check_rf_rack_validity(const locator::token_metadata_ptr tmptr) c
|
||||
}
|
||||
}
|
||||
|
||||
bool database::check_rf_rack_validity_with_topology_change(locator::token_metadata_ptr tmptr, locator::rf_rack_topology_operation change) const {
|
||||
// if it's already invalid before the topology change, it's allowed to remain invalid
|
||||
try {
|
||||
check_rf_rack_validity(tmptr);
|
||||
} catch (...) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const auto& keyspaces = get_keyspaces();
|
||||
std::vector<std::string_view> invalid_keyspaces{};
|
||||
bool valid = true;
|
||||
|
||||
for (const auto& [name, info] : keyspaces) {
|
||||
try {
|
||||
locator::assert_rf_rack_valid_keyspace(name, tmptr, info.get_replication_strategy(), change);
|
||||
} catch (...) {
|
||||
if (enforce_rf_rack_validity_for_keyspace(info)) {
|
||||
valid = false;
|
||||
}
|
||||
|
||||
invalid_keyspaces.push_back(std::string_view(name));
|
||||
}
|
||||
}
|
||||
|
||||
if (invalid_keyspaces.size() != 0) {
|
||||
const auto ks_list = invalid_keyspaces
|
||||
| std::views::join_with(std::string_view(", "))
|
||||
| std::ranges::to<std::string>();
|
||||
|
||||
auto op_str = [&] {
|
||||
switch (change.tag) {
|
||||
case locator::rf_rack_topology_operation::type::add: return "joining";
|
||||
case locator::rf_rack_topology_operation::type::remove: return "removed";
|
||||
}
|
||||
}();
|
||||
|
||||
dblog.warn("The {} node with DC='{}' and rack='{}' makes some existing keyspaces RF-rack-invalid, i.e. the replication factor "
|
||||
"does not match the number of racks in one of the datacenters. That may reduce "
|
||||
"availability in case of a failure (cf. "
|
||||
"https://docs.scylladb.com/manual/stable/reference/glossary.html#term-RF-rack-valid-keyspace). "
|
||||
"Those keyspaces are: {}",
|
||||
op_str, change.dc, change.rack, ks_list);
|
||||
}
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
utils::chunked_vector<uint64_t> compute_random_sorted_ints(uint64_t max_value, uint64_t n_values) {
|
||||
static thread_local std::minstd_rand rng{std::random_device{}()};
|
||||
std::uniform_int_distribution<uint64_t> dist(0, max_value);
|
||||
|
||||
@@ -2125,6 +2125,18 @@ public:
|
||||
// must contain a complete list of racks and data centers in the cluster.
|
||||
void check_rf_rack_validity(const locator::token_metadata_ptr) const;
|
||||
|
||||
// Verifies whether all keyspaces that require RF-rack-validity (as determined by enforce_rf_rack_validity_for_keyspace)
|
||||
// would remain RF-rack-valid after applying a topology change.
|
||||
//
|
||||
// Returns true if all such keyspaces would remain RF-rack-valid after the change, and false otherwise.
|
||||
// For keyspaces that would become RF-rack-invalid but do not require enforcement,
|
||||
// a warning is printed, but this does not affect the return value.
|
||||
//
|
||||
// Preconditions:
|
||||
// * The provided locator::topology instance (from the passed locator::token_metadata_ptr)
|
||||
// must contain a complete list of racks and data centers in the cluster.
|
||||
bool check_rf_rack_validity_with_topology_change(locator::token_metadata_ptr, locator::rf_rack_topology_operation) const;
|
||||
|
||||
private:
|
||||
// SSTable sampling might require considerable amounts of memory,
|
||||
// so we want to limit the number of concurrent sampling operations.
|
||||
|
||||
Reference in New Issue
Block a user