mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-31 03:56:42 +00:00
test: tablets: Avoid infinite loop in rebalance_tablets()
If there is a bug in the tablet scheduler which makes it never converge for a given state of topology, rebalance_tablets() will never complete and will generate a huge amounts of logs. This patch adds a sanity limit so that we fail earlier. This was observed in one of the test_load_balancing_with_random_load runs in CI. Fixes scylladb/scylladb#17894. Closes scylladb/scylladb#17916
This commit is contained in:
committed by
Kamil Braun
parent
bc42a5a092
commit
baf12b0b2f
@@ -769,18 +769,35 @@ void apply_plan_as_in_progress(token_metadata& tm, const migration_plan& plan) {
|
||||
apply_resize_plan(tm, plan);
|
||||
}
|
||||
|
||||
static
|
||||
size_t get_tablet_count(const tablet_metadata& tm) {
|
||||
size_t count = 0;
|
||||
for (auto& [table, tmap] : tm.all_tables()) {
|
||||
count += std::accumulate(tmap.tablets().begin(), tmap.tablets().end(), size_t(0),
|
||||
[] (size_t accumulator, const locator::tablet_info& info) {
|
||||
return accumulator + info.replicas.size();
|
||||
});
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
static
|
||||
void rebalance_tablets(tablet_allocator& talloc, shared_token_metadata& stm, locator::load_stats_ptr load_stats = {}, std::unordered_set<host_id> skiplist = {}) {
|
||||
while (true) {
|
||||
// Sanity limit to avoid infinite loops.
|
||||
// The x10 factor is arbitrary, it's there to account for more complex schedules than direct migration.
|
||||
auto max_iterations = 1 + get_tablet_count(stm.get()->tablets()) * 10;
|
||||
|
||||
for (size_t i = 0; i < max_iterations; ++i) {
|
||||
auto plan = talloc.balance_tablets(stm.get(), load_stats, std::move(skiplist)).get();
|
||||
if (plan.empty()) {
|
||||
break;
|
||||
return;
|
||||
}
|
||||
stm.mutate_token_metadata([&] (token_metadata& tm) {
|
||||
apply_plan(tm, plan);
|
||||
return make_ready_future<>();
|
||||
}).get();
|
||||
}
|
||||
throw std::runtime_error("rebalance_tablets(): convergence not reached within limit");
|
||||
}
|
||||
|
||||
static
|
||||
|
||||
Reference in New Issue
Block a user