From 7ea8fa459cbf647fb227af193fdec7807907a6e8 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Tue, 7 Nov 2023 11:19:58 +0200 Subject: [PATCH] storage_service: topology coordinator: do not retry the metadata barrier forever in left_token_ring state Handle the barrier failure by sleeping for a "ring delay" and continuing. The purpose of the barrier is to wait for unfinished writes to decommissioned node complete. If barrier fails we give them some time to complete and then proceed with node decommission. The worse thing that may happen if some write will fail because the node will be shutdown. --- service/storage_service.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 7de67ab95b..96ef5317e4 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -2261,6 +2261,7 @@ class topology_coordinator { // be able to become a voter - we'll be banned from the cluster.) } + bool barrier_failed = false; // Wait until other nodes observe the new token ring and stop sending writes to this node. try { node = retake_node(co_await global_token_metadata_barrier(std::move(node.guard), get_excluded_nodes(node)), node.id); @@ -2272,7 +2273,14 @@ class topology_coordinator { slogger.error("raft topology: node_state::left_token_ring (node: {}), " "global_token_metadata_barrier failed, error {}", node.id, std::current_exception()); - break; + barrier_failed = true; + } + + if (barrier_failed) { + // If barrier above failed it means there may be unfinished writes to a decommissioned node. + // Lets wait for the ring delay for those writes to complete and new topology to propagate + // before continuing. + co_await sleep_abortable(_ring_delay, _as); } // Tell the node to shut down.