mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-01 21:55:50 +00:00
raft_group0: join_group0: fix join hang when node joins group 0 before post_server_start
A joining node hung forever if the topology coordinator added it to the
group 0 configuration before the node reached `post_server_start`. In
that case, `server->get_configuration().contains(my_id)` returned true
and the node broke out of the join loop early, skipping
`post_server_start`. `_join_node_group0_started` was therefore never set,
so the node's `join_node_response` RPC handler blocked indefinitely.
Meanwhile the topology coordinator's `respond_to_joining_node` call
(which has no timeout) hung forever waiting for the reply that never came.
Fix by only taking the early-break path when not starting as a follower
(i.e. when the node is the discovery leader or is restarting). A joining
node must always reach `post_server_start`.
We also provide a regression test. It takes 6s in dev mode.
Fixes SCYLLADB-959
Closes scylladb/scylladb#29266
(cherry picked from commit b9f82f6f23)
Closes scylladb/scylladb#29291
Closes scylladb/scylladb#29308
This commit is contained in:
@@ -555,6 +555,7 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
|
||||
group0_id = g0_info.group0_id;
|
||||
raft::server_address my_addr{my_id, {}};
|
||||
|
||||
bool starting_server_as_follower = false;
|
||||
if (server == nullptr) {
|
||||
// This is the first time discovery is run. Create and start a Raft server for group 0 on this node.
|
||||
raft::configuration initial_configuration;
|
||||
@@ -582,6 +583,7 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
|
||||
// trigger an empty snapshot transfer.
|
||||
nontrivial_snapshot = true;
|
||||
} else {
|
||||
starting_server_as_follower = true;
|
||||
co_await handshaker->pre_server_start(g0_info);
|
||||
}
|
||||
|
||||
@@ -610,7 +612,9 @@ future<> raft_group0::join_group0(std::vector<gms::inet_address> seeds, shared_p
|
||||
}
|
||||
|
||||
SCYLLA_ASSERT(server);
|
||||
if (server->get_configuration().contains(my_id)) {
|
||||
co_await utils::get_local_injector().inject("join_group0_pause_before_config_check",
|
||||
utils::wait_for_message(std::chrono::minutes{5}));
|
||||
if (!starting_server_as_follower && server->get_configuration().contains(my_id)) {
|
||||
// True if we started a new group or completed a configuration change initiated earlier.
|
||||
group0_log.info("server {} already in group 0 (id {}) as {}", my_id, group0_id,
|
||||
server->get_configuration().can_vote(my_id)? "voter" : "non-voter");
|
||||
|
||||
71
test/cluster/test_bootstrap_with_quick_group0_join.py
Normal file
71
test/cluster/test_bootstrap_with_quick_group0_join.py
Normal file
@@ -0,0 +1,71 @@
|
||||
#
|
||||
# Copyright (C) 2026-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
||||
#
|
||||
import logging
|
||||
import asyncio
|
||||
import time
|
||||
|
||||
import pytest
|
||||
|
||||
from test.cluster.conftest import skip_mode
|
||||
from test.cluster.util import get_current_group0_config
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.rest_client import read_barrier
|
||||
from test.pylib.util import wait_for
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@skip_mode('release', 'error injections are not supported in release mode')
|
||||
async def test_bootstrap_with_quick_group0_join(manager: ManagerClient):
|
||||
"""Regression test for https://scylladb.atlassian.net/browse/SCYLLADB-959.
|
||||
|
||||
The bug was that when the bootstrapping node joined group0 before reaching
|
||||
post_server_start, it skipped post_server_start and thus hung forever.
|
||||
|
||||
The test simulates the scenario by starting the second node with the
|
||||
join_group0_pause_before_config_check injection. Without the fix, the
|
||||
startup times out.
|
||||
"""
|
||||
logger.info("Adding first server")
|
||||
s1 = await manager.server_add()
|
||||
|
||||
logger.info("Adding second server with join_group0_pause_before_config_check enabled")
|
||||
s2 = await manager.server_add(start=False, config={
|
||||
'error_injections_at_startup': ['join_group0_pause_before_config_check']
|
||||
})
|
||||
|
||||
logger.info(f"Starting {s2}")
|
||||
start_task = asyncio.create_task(manager.server_start(s2.server_id))
|
||||
|
||||
s2_log = await manager.server_open_log(s2.server_id)
|
||||
|
||||
await s2_log.wait_for("join_group0_pause_before_config_check: waiting for message", timeout=60)
|
||||
|
||||
s1_host_id = await manager.get_host_id(s1.server_id)
|
||||
s2_host_id = await manager.get_host_id(s2.server_id)
|
||||
|
||||
async def s2_in_group0_config_on_s1():
|
||||
config = await get_current_group0_config(manager, s1)
|
||||
ids = {m[0] for m in config}
|
||||
assert s1_host_id in ids # sanity check
|
||||
return True if s2_host_id in ids else None
|
||||
|
||||
# Note: we would like to wait for s2 to see itself in the group0 config, but we can't execute
|
||||
# get_current_group0_config for s2, as s2 doesn't handle CQL requests at this point. As a workaround, we wait for s1
|
||||
# to see s2 and then perform a read barrier on s2.
|
||||
logger.info(f"Waiting for {s1} to see {s2} in the group0 config")
|
||||
await wait_for(s2_in_group0_config_on_s1, deadline=time.time() + 60, period=0.1)
|
||||
|
||||
logger.info(f"Performing read barrier on {s2} to make sure it sees itself in the group0 config")
|
||||
await read_barrier(manager.api, s2.ip_addr)
|
||||
|
||||
logger.info(f"Unblocking {s2}")
|
||||
await manager.api.message_injection(s2.ip_addr, 'join_group0_pause_before_config_check')
|
||||
|
||||
logger.info(f"Waiting for {s2} to complete bootstrap")
|
||||
await asyncio.wait_for(start_task, timeout=60)
|
||||
Reference in New Issue
Block a user