From 22368b13f2b6c7abcbc04431dbb26aa25ec13771 Mon Sep 17 00:00:00 2001 From: Gleb Natapov' via ScyllaDB development Date: Sun, 29 Sep 2024 11:21:38 +0300 Subject: [PATCH] api: introduce raft stepdown REST API Also provide test.py util function to trigger it. Can be useful for testing. --- api/api-doc/raft.json | 32 ++++++++++++++++++++++++++++++++ api/raft.cc | 36 ++++++++++++++++++++++++++++++++++++ test/topology/util.py | 5 +++++ 3 files changed, 73 insertions(+) diff --git a/api/api-doc/raft.json b/api/api-doc/raft.json index 971475eb9a..77e8698b49 100644 --- a/api/api-doc/raft.json +++ b/api/api-doc/raft.json @@ -94,6 +94,38 @@ ] } ] + }, + { + "path":"/raft/trigger_stepdown/", + "operations":[ + { + "method":"POST", + "summary":"Triggers stepdown of a leader for given Raft group or group0 if not provided (returns an error if the node is not a leader)", + "type":"string", + "nickname":"trigger_stepdown", + "produces":[ + "application/json" + ], + "parameters":[ + { + "name":"group_id", + "description":"The ID of the group which leader should stepdown", + "required":false, + "allowMultiple":false, + "type":"string", + "paramType":"query" + }, + { + "name":"timeout", + "description":"Timeout in seconds after which the endpoint returns a failure. If not provided, 60s is used.", + "required":false, + "allowMultiple":false, + "type":"long", + "paramType":"query" + } + ] + } + ] } ] } diff --git a/api/raft.cc b/api/raft.cc index bdf83b2358..1dcd1254fc 100644 --- a/api/raft.cc +++ b/api/raft.cc @@ -11,6 +11,7 @@ #include "api/api-doc/raft.json.hh" #include "service/raft/raft_group_registry.hh" +#include "service/raft/raft_address_map.hh" #include "log.hh" using namespace seastar::httpd; @@ -123,6 +124,40 @@ void set_raft(http_context&, httpd::routes& r, sharded req) -> future { + auto timeout = get_request_timeout(*req); + auto dur = timeout.value ? *timeout.value - lowres_clock::now() : std::chrono::seconds(60); + const auto stepdown_timeout_ticks = dur / service::raft_tick_interval; + auto timeout_dur = raft::logical_clock::duration(stepdown_timeout_ticks); + + if (!req->query_parameters.contains("group_id")) { + // Stepdown on group 0 by default + co_await raft_gr.invoke_on(0, [timeout_dur] (service::raft_group_registry& raft_gr) { + apilog.info("Triggering stepdown for group0"); + return raft_gr.group0().stepdown(timeout_dur); + }); + co_return json_void{}; + } + raft::group_id gid{utils::UUID{req->get_path_param("group_id")}}; + + std::atomic found_srv{false}; + co_await raft_gr.invoke_on_all([gid, timeout_dur, &found_srv] (service::raft_group_registry& raft_gr) -> future<> { + auto* srv = raft_gr.find_server(gid); + if (!srv) { + co_return; + } + + found_srv = true; + apilog.info("Triggering stepdown for group {}", gid); + co_await srv->stepdown(timeout_dur); + }); + + if (!found_srv) { + throw std::runtime_error{fmt::format("Server for group ID {} not found", gid)}; + } + co_return json_void{}; }); } @@ -131,6 +166,7 @@ void unset_raft(http_context&, httpd::routes& r) { r::trigger_snapshot.unset(r); r::get_leader_host.unset(r); r::read_barrier.unset(r); + r::trigger_stepdown.unset(r); } } diff --git a/test/topology/util.py b/test/topology/util.py index afa3c7acd2..5cdac13ed8 100644 --- a/test/topology/util.py +++ b/test/topology/util.py @@ -398,6 +398,11 @@ async def trigger_snapshot(manager, server: ServerInfo) -> None: host = cql.cluster.metadata.get_host(server.ip_addr) await manager.api.client.post(f"/raft/trigger_snapshot/{group0_id}", host=server.ip_addr) +async def trigger_stepdown(manager, server: ServerInfo) -> None: + cql = manager.get_cql() + host = cql.cluster.metadata.get_host(server.ip_addr) + await manager.api.client.post("/raft/trigger_stepdown", host=server.ip_addr) + async def get_coordinator_host_ids(manager: ManagerClient) -> list[str]: