mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-29 11:10:40 +00:00
Merge '[Backport 2025.1] topology_coordinator: handle seastar::abort_requested_exception alongside raft::request_aborted' from Scylladb[bot]
In several exception handlers, only `raft::request_aborted` was being caught and rethrown, while `seastar::abort_requested_exception` was falling through to the generic catch(...) block. This caused the exception to be incorrectly treated as a failure that triggers rollback, instead of being recognized as an abort signal.
For example, during tablet draining, the error log showed: "tablets draining failed with seastar::abort_requested_exception (abort requested). Aborting the topology operation"
This change adds `seastar::abort_requested_exception` handling alongside `raft::request_aborted` in all places where it was missing. When rethrown, these exceptions propagate up to the main `run()` loop where `handle_topology_coordinator_error()` recognizes them as normal abort signals and allows the coordinator to exit gracefully without triggering unnecessary rollback operations.
Fixes: scylladb/scylladb#27255
No backport: The problem was only seen in tests and not reported in customer tickets, so it's enough to fix it in the main branch.
- (cherry picked from commit 37e3dacf33)
Parent PR: #27314
Closes scylladb/scylladb#27660
* https://github.com/scylladb/scylladb:
topology_coordinator: handle seastar::abort_requested_exception alongside raft::request_aborted
topology_coordinator: consistently rethrow `raft::request_aborted` for direct/global commands
This commit is contained in:
@@ -2004,6 +2004,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
throw;
|
||||
} catch (raft::request_aborted&) {
|
||||
throw;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
throw;
|
||||
} catch (...) {
|
||||
rtlogger.error("transition_state::join_group0, "
|
||||
"global_token_metadata_barrier failed, error {}",
|
||||
@@ -2140,6 +2142,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
throw;
|
||||
} catch (raft::request_aborted&) {
|
||||
throw;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
throw;
|
||||
} catch (...) {
|
||||
rtlogger.error("transition_state::commit_cdc_generation, "
|
||||
"raft_topology_cmd::command::barrier failed, error {}", std::current_exception());
|
||||
@@ -2220,6 +2224,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
throw;
|
||||
} catch (raft::request_aborted&) {
|
||||
throw;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
throw;
|
||||
} catch (...) {
|
||||
rtlogger.error("tablets draining failed with {}. Aborting the topology operation", std::current_exception());
|
||||
_rollback = fmt::format("Failed to drain tablets: {}", std::current_exception());
|
||||
@@ -2237,6 +2243,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
throw;
|
||||
} catch (raft::request_aborted&) {
|
||||
throw;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
throw;
|
||||
} catch (...) {
|
||||
rtlogger.error("transition_state::write_both_read_old, "
|
||||
"global_token_metadata_barrier failed, error {}",
|
||||
@@ -2303,6 +2311,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
throw;
|
||||
} catch (raft::request_aborted&) {
|
||||
throw;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
throw;
|
||||
} catch (...) {
|
||||
rtlogger.error("send_raft_topology_cmd(stream_ranges) failed with exception"
|
||||
" (node state is {}): {}", state, std::current_exception());
|
||||
@@ -2335,6 +2345,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
throw;
|
||||
} catch (raft::request_aborted&) {
|
||||
throw;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
throw;
|
||||
} catch (...) {
|
||||
rtlogger.error("transition_state::write_both_read_new, "
|
||||
"global_token_metadata_barrier failed, error {}",
|
||||
@@ -2478,6 +2490,8 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
throw;
|
||||
} catch (raft::request_aborted&) {
|
||||
throw;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
throw;
|
||||
} catch (...) {
|
||||
rtlogger.error("transition_state::left_token_ring, "
|
||||
"raft_topology_cmd::command::barrier failed, error {}",
|
||||
@@ -2552,6 +2566,10 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
node.guard = co_await exec_global_command(std::move(node.guard),raft_topology_cmd::command::barrier_and_drain, get_excluded_nodes(node), drop_guard_and_retake::yes);
|
||||
} catch (term_changed_error&) {
|
||||
throw;
|
||||
} catch (raft::request_aborted&) {
|
||||
throw;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
throw;
|
||||
} catch(...) {
|
||||
rtlogger.warn("failed to run barrier_and_drain during rollback of {} after {} failure: {}",
|
||||
node.id, state, std::current_exception());
|
||||
@@ -2629,6 +2647,10 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
}
|
||||
} catch (term_changed_error&) {
|
||||
throw;
|
||||
} catch (raft::request_aborted&) {
|
||||
throw;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
throw;
|
||||
} catch(...) {
|
||||
wait_for_ip_error = std::current_exception();
|
||||
rtlogger.warn("raft_topology_cmd::command::wait_for_ip failed, error {}",
|
||||
@@ -2751,6 +2773,10 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
rtbuilder.done();
|
||||
} catch (term_changed_error&) {
|
||||
throw;
|
||||
} catch (raft::request_aborted&) {
|
||||
throw;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
throw;
|
||||
} catch (...) {
|
||||
rtlogger.error("send_raft_topology_cmd(stream_ranges) failed with exception"
|
||||
" (node state is rebuilding): {}", std::current_exception());
|
||||
@@ -3338,6 +3364,10 @@ future<> topology_coordinator::fence_previous_coordinator() {
|
||||
// Abort was requested. Break the loop
|
||||
rtlogger.debug("request to fence previous coordinator was aborted");
|
||||
break;
|
||||
} catch (seastar::abort_requested_exception&) {
|
||||
// Abort was requested. Break the loop
|
||||
rtlogger.debug("request to fence previous coordinator was aborted");
|
||||
break;
|
||||
} catch (...) {
|
||||
rtlogger.error("failed to fence previous coordinator {}", std::current_exception());
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user