diff --git a/service/raft/raft_group0.cc b/service/raft/raft_group0.cc index c39849ad3c..dc444ad215 100644 --- a/service/raft/raft_group0.cc +++ b/service/raft/raft_group0.cc @@ -33,6 +33,7 @@ #include #include #include +#include #include "idl/group0.dist.hh" @@ -469,6 +470,13 @@ struct group0_members { const raft::server& _group0_server; const raft_address_map& _address_map; + raft::config_member_set get_members() const { + return _group0_server.get_configuration().current; + } + + std::optional get_inet_addr(const raft::config_member& member) const { + return _address_map.find(member.addr.id); + } std::vector get_inet_addrs(seastar::compat::source_location l = seastar::compat::source_location::current()) const { @@ -576,6 +584,10 @@ future<> raft_group0::setup_group0( co_await join_group0(std::move(seeds), false /* non-voter */, ss, qp, mm, cdc_gen_service); group0_log.info("setup_group0: successfully joined group 0."); + utils::get_local_injector().inject("stop_after_joining_group0", [&] { + throw std::runtime_error{"injection: stop_after_joining_group0"}; + }); + if (replace_info) { // Insert the replaced node's (Raft ID, IP address) pair into `raft_address_map`. // In general, the mapping won't be obtained through the regular gossiping route: @@ -1192,10 +1204,7 @@ static future wait_for_peers_to_enter_synchronize_state( for (sleep_with_exponential_backoff sleep;; co_await sleep(as)) { // We fetch the config again on every attempt to handle the possibility of removing failed nodes. - auto current_config = members0.get_inet_addrs(); - if (current_config.empty()) { - continue; - } + auto current_members_set = members0.get_members(); ::tracker tracker; auto retry = make_lw_shared(false); @@ -1209,10 +1218,20 @@ static future wait_for_peers_to_enter_synchronize_state( } (void) [] (netw::messaging_service& ms, abort_source& as, gate::holder pause_shutdown, - std::vector current_config, + raft::config_member_set current_members_set, group0_members members0, lw_shared_ptr> entered_synchronize, lw_shared_ptr retry, ::tracker tracker) -> future<> { - co_await max_concurrent_for_each(current_config, max_concurrency, [&] (const gms::inet_address& node) -> future<> { + co_await max_concurrent_for_each(current_members_set, max_concurrency, [&] (const raft::config_member& member) -> future<> { + auto node_opt = members0.get_inet_addr(member); + + if (!node_opt.has_value()) { + upgrade_log.warn("wait_for_peers_to_enter_synchronize_state: cannot resolve the IP of {}", member); + *retry = true; + co_return; + } + + auto node = *node_opt; + if (entered_synchronize->contains(node)) { co_return; } @@ -1249,7 +1268,7 @@ static future wait_for_peers_to_enter_synchronize_state( }); tracker.set_value(false); - }(ms, as, pause_shutdown, std::move(current_config), entered_synchronize, retry, tracker); + }(ms, as, pause_shutdown, std::move(current_members_set), members0, entered_synchronize, retry, tracker); auto finish_early = co_await tracker.get(); if (finish_early) { diff --git a/test/topology_experimental_raft/suite.yaml b/test/topology_experimental_raft/suite.yaml index 0097efd0be..7b828049e4 100644 --- a/test/topology_experimental_raft/suite.yaml +++ b/test/topology_experimental_raft/suite.yaml @@ -6,3 +6,5 @@ extra_scylla_config_options: authenticator: AllowAllAuthenticator authorizer: AllowAllAuthorizer experimental_features: ['raft'] +skip_in_release: + - test_blocked_bootstrap diff --git a/test/topology_experimental_raft/test_blocked_bootstrap.py b/test/topology_experimental_raft/test_blocked_bootstrap.py new file mode 100644 index 0000000000..ec10f04b0f --- /dev/null +++ b/test/topology_experimental_raft/test_blocked_bootstrap.py @@ -0,0 +1,51 @@ +# Copyright (C) 2023-present ScyllaDB +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# +from test.pylib.scylla_cluster import ReplaceConfig +from test.pylib.manager_client import ManagerClient + +import pytest +import logging + +logger = logging.getLogger(__name__) + + +@pytest.mark.asyncio +async def test_blocked_bootstrap(manager: ManagerClient): + """ + Scenario: + 1. Start a cluster with nodes node1, node2, node3 + 2. Start node4 replacing node node2 + 3. Stop node node4 after it joined group0 but before it advertised itself in gossip + 4. Start node5 replacing node node2 + + Test simulates the behavior described in #13543. + + Test passes only if `wait_for_peers_to_enter_synchronize_state` doesn't need to + resolve all IPs to return early. If not, node5 will hang trying to resolve the + IP of node4: + ``` + raft_group0_upgrade - : failed to resolve IP addresses of some of the cluster members ([node4's host ID]) + ``` + """ + servers = [await manager.server_add() for _ in range(3)] + + logger.info(f"Stopping node {servers[0]}") + await manager.server_stop_gracefully(servers[0].server_id) + + logger.info(f"Replacing node {servers[0]}") + replace_cfg = ReplaceConfig(replaced_id = servers[0].server_id, reuse_ip_addr = False, use_host_id = False) + + try: + await manager.server_add(replace_cfg, config={ + 'error_injections_at_startup': ['stop_after_joining_group0'] + }) + except: + # Node stops before it advertised itself in gossip, so manager.server_add throws an exception + pass + else: + assert False, "Node should stop before it advertised itself in gossip" + + logger.info(f"Replacing node {servers[0]}") + await manager.server_add(replace_cfg)