From baf12b0b2f9d910d9c8a93707fd67c670dca5de2 Mon Sep 17 00:00:00 2001 From: Tomasz Grabiec Date: Wed, 20 Mar 2024 11:23:46 +0100 Subject: [PATCH] test: tablets: Avoid infinite loop in rebalance_tablets() If there is a bug in the tablet scheduler which makes it never converge for a given state of topology, rebalance_tablets() will never complete and will generate a huge amounts of logs. This patch adds a sanity limit so that we fail earlier. This was observed in one of the test_load_balancing_with_random_load runs in CI. Fixes scylladb/scylladb#17894. Closes scylladb/scylladb#17916 --- test/boost/tablets_test.cc | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/test/boost/tablets_test.cc b/test/boost/tablets_test.cc index 05cae63ec8..6476fc7712 100644 --- a/test/boost/tablets_test.cc +++ b/test/boost/tablets_test.cc @@ -769,18 +769,35 @@ void apply_plan_as_in_progress(token_metadata& tm, const migration_plan& plan) { apply_resize_plan(tm, plan); } +static +size_t get_tablet_count(const tablet_metadata& tm) { + size_t count = 0; + for (auto& [table, tmap] : tm.all_tables()) { + count += std::accumulate(tmap.tablets().begin(), tmap.tablets().end(), size_t(0), + [] (size_t accumulator, const locator::tablet_info& info) { + return accumulator + info.replicas.size(); + }); + } + return count; +} + static void rebalance_tablets(tablet_allocator& talloc, shared_token_metadata& stm, locator::load_stats_ptr load_stats = {}, std::unordered_set skiplist = {}) { - while (true) { + // Sanity limit to avoid infinite loops. + // The x10 factor is arbitrary, it's there to account for more complex schedules than direct migration. + auto max_iterations = 1 + get_tablet_count(stm.get()->tablets()) * 10; + + for (size_t i = 0; i < max_iterations; ++i) { auto plan = talloc.balance_tablets(stm.get(), load_stats, std::move(skiplist)).get(); if (plan.empty()) { - break; + return; } stm.mutate_token_metadata([&] (token_metadata& tm) { apply_plan(tm, plan); return make_ready_future<>(); }).get(); } + throw std::runtime_error("rebalance_tablets(): convergence not reached within limit"); } static