From e5928497cec4b33b29ee200bc47a97863f15dab3 Mon Sep 17 00:00:00 2001 From: Aleksandra Martyniuk Date: Fri, 23 Jan 2026 15:42:10 +0100 Subject: [PATCH] service: tasks: scan all tablets in tablet_virtual_task::wait Currently, for repair tasks tablet_virtual_task::wait gathers the ids of tablets that are to be repaired. The gathered set is later used to check if the repair is still ongoing. However, if the tablets are resized (split or merged), the gathered set becomes irrelevant. Those, we may end up with invalid tablet id error being thrown. Wait until repair is done for all tablets in the table. --- service/task_manager_module.cc | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/service/task_manager_module.cc b/service/task_manager_module.cc index c44ab2eee0..183065994e 100644 --- a/service/task_manager_module.cc +++ b/service/task_manager_module.cc @@ -150,6 +150,7 @@ future> tablet_virtual_task::wait(tasks::task_ tasks::tmlogger.info("tablet_virtual_task: wait until tablet operation is finished"); co_await utils::get_local_injector().inject("tablet_virtual_task_wait", utils::wait_for_message(60s)); + while (true) { co_await _ss._topology_state_machine.event.wait([&] { if (!_ss.get_token_metadata().tablets().has_tablet_map(table)) { return true; @@ -160,12 +161,29 @@ future> tablet_virtual_task::wait(tasks::task_ } else if (tablet_id_opt.has_value()) { // Migration task. return tmap.get_tablet_info(tablet_id_opt.value()).migration_task_info.tablet_task_id.uuid() != id.uuid(); } else { // Repair task. - return std::all_of(res->tablets.begin(), res->tablets.end(), [&] (const locator::tablet_id& tablet) { - return tmap.get_tablet_info(tablet).repair_task_info.tablet_task_id.uuid() != id.uuid(); - }); + return true; } }); + if (!is_repair_task(task_type)) { + break; + } + + auto tmptr = _ss.get_token_metadata_ptr(); + if (!_ss.get_token_metadata().tablets().has_tablet_map(table)) { + break; + } + auto& tmap = tmptr->tablets().get_tablet_map(table); + bool repair_still_running = false; + co_await tmap.for_each_tablet([&] (locator::tablet_id tid, const locator::tablet_info& info) { + repair_still_running = repair_still_running || (info.repair_task_info.is_valid() && info.repair_task_info.tablet_task_id.uuid() == id.uuid()); + return make_ready_future(); + }); + if (!repair_still_running) { + break; + } + } + res->status.state = tasks::task_manager::task_state::done; // Failed repair task is retried. if (!_ss.get_token_metadata().tablets().has_tablet_map(table)) { res->status.end_time = db_clock::now();