From 0cb63fb669a73d18391e86a1cc0b7aa96862ced3 Mon Sep 17 00:00:00 2001 From: Dario Mirovic Date: Tue, 31 Mar 2026 02:25:15 +0200 Subject: [PATCH] test: cluster: wait for full config reload in audit live-update path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit _apply_config_to_running_servers used wait_for_config (REST API poll) to confirm live config updates. The REST API reads from shard 0 only, so it can return before broadcast_to_all_shards() completes — other shards may still have stale audit config, generating unexpected entries. Additionally, server_remove_config_option for absent keys sent separate SIGHUPs before server_update_config, and the single wait_for_config at the end could match a completion from an earlier SIGHUP. Wait for "completed re-reading configuration file" in the server log after each SIGHUP-producing operation. This message is logged only after both read_config() and broadcast_to_all_shards() finish, guaranteeing all shards have the new config. Each operation gets its own mark+wait so no stale completion is matched. Fixes SCYLLADB-1277 --- test/cluster/test_audit.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/test/cluster/test_audit.py b/test/cluster/test_audit.py index daf16f7173..649f9978c9 100644 --- a/test/cluster/test_audit.py +++ b/test/cluster/test_audit.py @@ -33,7 +33,6 @@ from test.cluster.dtest.dtest_class import create_ks, wait_for from test.cluster.dtest.tools.assertions import assert_invalid from test.cluster.dtest.tools.data import rows_to_list, run_in_parallel -from test.cluster.test_config import wait_for_config from test.pylib.manager_client import ManagerClient from test.pylib.rest_client import read_barrier @@ -113,11 +112,10 @@ class AuditTester: for k in AUTH_CONFIG: await self.manager.server_remove_config_option(srv.server_id, k) - # Remove absent keys so the server reverts to compiled-in defaults. - for k in absent_keys: - await self.manager.server_remove_config_option(srv.server_id, k) - if needs_restart: + # Remove absent keys so the server reverts to compiled-in defaults. + for k in absent_keys: + await self.manager.server_remove_config_option(srv.server_id, k) await self.manager.server_stop_gracefully(srv.server_id) full_cfg = self._build_server_config(needed, enable_compact_storage, user) await self.manager.server_update_config(srv.server_id, config_options=full_cfg) @@ -127,10 +125,17 @@ class AuditTester: # Server stays up — only push live-updatable keys. live_cfg = {k: v for k, v in needed.items() if k in LIVE_AUDIT_KEYS} live_cfg["enable_create_table_with_compact_storage"] = enable_compact_storage + log_file = await self.manager.server_open_log(srv.server_id) + # Each remove/update sends a SIGHUP. Wait for each one's + # "completed re-reading configuration file" before the next + # so we never match a stale message. + for k in absent_keys: + from_mark = await log_file.mark() + await self.manager.server_remove_config_option(srv.server_id, k) + await log_file.wait_for(r"completed re-reading configuration file", from_mark=from_mark, timeout=60) + from_mark = await log_file.mark() await self.manager.server_update_config(srv.server_id, config_options=live_cfg) - for key in LIVE_AUDIT_KEYS: - if key in live_cfg: - await wait_for_config(self.manager, srv, key, live_cfg[key]) + await log_file.wait_for(r"completed re-reading configuration file", from_mark=from_mark, timeout=60) async def _start_fresh_servers(self, needed: dict[str, str], enable_compact_storage: bool,