From f2b1cbe9981ef4e32f3d432744cbae91fb1dc023 Mon Sep 17 00:00:00 2001 From: Petr Gusev Date: Wed, 27 May 2026 12:05:24 +0200 Subject: [PATCH] strong consistency/groups_manager: handle timeout in update() wait-for-leader loop The wait-for-leader loop in groups_manager::update() uses abort_on_expiry with a 60-second timeout. If the timeout fires, co_await w->future throws an exception that propagates unhandled out of the server_control_op coroutine, leaving the group in an indeterminate state. Use coroutine::as_future to catch the exception, log a warning, and break out of the loop gracefully. The group will still be reported as started (allowing other operations to proceed) even if the leader wasn't found within the timeout. --- service/strong_consistency/groups_manager.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/service/strong_consistency/groups_manager.cc b/service/strong_consistency/groups_manager.cc index a275f9ad60..2c9db7c301 100644 --- a/service/strong_consistency/groups_manager.cc +++ b/service/strong_consistency/groups_manager.cc @@ -409,7 +409,12 @@ void groups_manager::update(token_metadata_ptr new_tm) { auto srv = raft_server(state, state.gate->hold()); auto res = srv.begin_mutate(aoe.abort_source()); if (auto w = get_if(&res)) { - co_await std::move(w->future); + auto f = co_await coroutine::as_future(std::move(w->future)); + if (f.failed()) { + logger.warn("update(): waiting for leader timed out for tablet {}, " + "group id {}: {}", tablet, id, f.get_exception()); + break; + } } else { break; }