topology_coordinator: handle seastar::abort_requested_exception alongside raft::request_aborted

In several exception handlers, only raft::request_aborted was being
caught and rethrown, while seastar::abort_requested_exception was
falling through to the generic catch(...) block. This caused the
exception to be incorrectly treated as a failure that triggers
rollback, instead of being recognized as an abort signal.

For example, during tablet draining, the error log showed:
"tablets draining failed with seastar::abort_requested_exception
(abort requested). Aborting the topology operation"

This change adds seastar::abort_requested_exception handling
alongside raft::request_aborted in all places where it was missing.
When rethrown, these exceptions propagate up to the main run() loop
where handle_topology_coordinator_error() recognizes them as normal
abort signals and allows the coordinator to exit gracefully without
triggering unnecessary rollback operations.

Fixes: scylladb/scylladb#27255

(cherry picked from commit 37e3dacf33)
This commit is contained in:
Emil Maskovsky
2025-11-27 16:09:18 +01:00
parent 838ef92141
commit bfce02ce7e

View File

@@ -2066,6 +2066,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
throw;
} catch (raft::request_aborted&) {
throw;
} catch (seastar::abort_requested_exception&) {
throw;
} catch (...) {
rtlogger.error("transition_state::join_group0, "
"global_token_metadata_barrier failed, error {}",
@@ -2211,6 +2213,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
throw;
} catch (raft::request_aborted&) {
throw;
} catch (seastar::abort_requested_exception&) {
throw;
} catch (...) {
rtlogger.error("transition_state::commit_cdc_generation, "
"raft_topology_cmd::command::barrier failed, error {}", std::current_exception());
@@ -2306,6 +2310,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
throw;
} catch (raft::request_aborted&) {
throw;
} catch (seastar::abort_requested_exception&) {
throw;
} catch (...) {
rtlogger.error("tablets draining failed with {}. Aborting the topology operation", std::current_exception());
_rollback = fmt::format("Failed to drain tablets: {}", std::current_exception());
@@ -2323,6 +2329,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
throw;
} catch (raft::request_aborted&) {
throw;
} catch (seastar::abort_requested_exception&) {
throw;
} catch (...) {
rtlogger.error("transition_state::write_both_read_old, "
"global_token_metadata_barrier failed, error {}",
@@ -2371,6 +2379,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
throw;
} catch (raft::request_aborted&) {
throw;
} catch (seastar::abort_requested_exception&) {
throw;
} catch (...) {
rtlogger.error("send_raft_topology_cmd(stream_ranges) failed with exception"
" (node state is {}): {}", state, std::current_exception());
@@ -2403,6 +2413,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
throw;
} catch (raft::request_aborted&) {
throw;
} catch (seastar::abort_requested_exception&) {
throw;
} catch (...) {
rtlogger.error("transition_state::write_both_read_new, "
"global_token_metadata_barrier failed, error {}",
@@ -2542,6 +2554,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
throw;
} catch (raft::request_aborted&) {
throw;
} catch (seastar::abort_requested_exception&) {
throw;
} catch (...) {
rtlogger.error("transition_state::left_token_ring, "
"raft_topology_cmd::command::barrier failed, error {}",
@@ -2624,6 +2638,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
throw;
} catch (raft::request_aborted&) {
throw;
} catch (seastar::abort_requested_exception&) {
throw;
} catch(...) {
rtlogger.warn("failed to run barrier_and_drain during rollback of {} after {} failure: {}",
node.id, state, std::current_exception());
@@ -2705,6 +2721,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
throw;
} catch (raft::request_aborted&) {
throw;
} catch (seastar::abort_requested_exception&) {
throw;
} catch(...) {
wait_for_ip_error = std::current_exception();
rtlogger.warn("raft_topology_cmd::command::wait_for_ip failed, error {}",
@@ -2827,7 +2845,9 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
rtbuilder.done();
} catch (term_changed_error&) {
throw;
} catch (raft::request_aborted& e) {
} catch (raft::request_aborted&) {
throw;
} catch (seastar::abort_requested_exception&) {
throw;
} catch (...) {
rtlogger.error("send_raft_topology_cmd(stream_ranges) failed with exception"
@@ -3430,6 +3450,10 @@ future<> topology_coordinator::fence_previous_coordinator() {
// Abort was requested. Break the loop
rtlogger.debug("request to fence previous coordinator was aborted");
break;
} catch (seastar::abort_requested_exception&) {
// Abort was requested. Break the loop
rtlogger.debug("request to fence previous coordinator was aborted");
break;
} catch (...) {
rtlogger.error("failed to fence previous coordinator {}", std::current_exception());
}