raft: Handle non-critical config update errors in when changing status

to voter.

When a node is bootstrapped and joins a cluster as a non-voter, errors can occur while committing
a new Raft record, for instance, if the Raft leader changes during this time. These errors are not
critical and should not cause a node crash, as the action can be retried.

Fixes scylladb/scylladb#20814

(cherry picked from commit 8c48f7ad62)
This commit is contained in:
Sergey Zolotukhin
2025-01-09 17:51:38 +01:00
committed by GitHub Action
parent 36ff3e8f5f
commit 2ea97d8c19
2 changed files with 9 additions and 2 deletions

View File

@@ -825,7 +825,15 @@ future<> raft_group0::finish_setup_after_join(service::storage_service& ss, cql3
// Just bootstrapped and joined as non-voter. Become a voter.
auto pause_shutdown = _shutdown_gate.hold();
raft::server_address my_addr{my_id, {}};
co_await _raft_gr.group0().modify_config({{my_addr, true}}, {}, &_abort_source);
co_await run_op_with_retry(_abort_source, [this, my_addr]() -> future<operation_result> {
try {
co_await _raft_gr.group0().modify_config({{my_addr, true}}, {}, &_abort_source);
} catch (const raft::commit_status_unknown& e) {
group0_log.info("finish_setup_after_join({}): modify_config returned \"{}\", retrying", my_addr, e);
co_return operation_result::failure;
}
co_return operation_result::success;
}, "finish_setup_after_join->modify_config", {});
group0_log.info("finish_setup_after_join: became a group 0 voter.");
// No need to run `upgrade_to_group0()` since we must have bootstrapped with Raft

View File

@@ -17,7 +17,6 @@ from test.pylib.util import wait_for_cql_and_get_hosts
logger = logging.getLogger(__name__)
@pytest.mark.xfail(reason="issue #20814")
@pytest.mark.asyncio
async def test_error_while_becoming_voter(request: pytest.FixtureRequest, manager: ManagerClient) -> None:
"""