From d59d7defeeac9bb114dde030c3cbb9de3614f30c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dawid=20M=C4=99drek?= Date: Tue, 10 Feb 2026 16:43:21 +0100 Subject: [PATCH] test: cluster: Fix test_sync_point The test had a few shortcomings that made it flaky or simply wrong: 1. We were verifying that hints were written by checking the size of in-flight hints. However, that could potentially lead to problems in rare situations. For instance, if all of the hints failed to be written to disk, the size of in-flight hints would drop to zero, but creating a sync point would correspond to the empty state. In such a situation, we should fail immediately and indicate what the cause was. 2. A sync point corresponds to the hints that have already been written to disk. The number of those is tracked by the metric `written`. It's a much more reliable way to make sure that hints have been written to the commitlog. That ensures that the sync point we'll create will really correspond to those hints. 3. The auxiliary function `wait_for` used in the test works like this: it executes the passed callback and looks at the result. If it's `None`, it retries it. Otherwise, the callback is deemed to have finished its execution and no further retries will be attempted. Before this commit, we simply returned a bool, and so the code was wrong. We improve it. Note that this fixes scylladb/scylladb#28203, which was a manifestation of scylladb/scylladb#25879. We created a sync point that corresponded to the empty state, and so it immediately resolved, even when node 3 was still dead. Refs scylladb/scylladb#25879 Fixes scylladb/scylladb#28203 (cherry picked from commit a256ba7de00da10905ce5cf758ecf176fb738e21) --- test/cluster/test_hints.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/test/cluster/test_hints.py b/test/cluster/test_hints.py index 68ba3cc3b8..db245fa560 100644 --- a/test/cluster/test_hints.py +++ b/test/cluster/test_hints.py @@ -134,12 +134,17 @@ async def test_sync_point(manager: ManagerClient): # Mutations need to be applied to hinted handoff's commitlog before we create the sync point. # Otherwise, the sync point will correspond to no hints at all. - async def check_no_hints_in_progress_node1() -> bool: - hints = await get_hint_metrics(manager.metrics, node1.ip_addr, "size_of_hints_in_progress") - return hints == 0 + async def check_written_hints(min_count: int) -> bool: + errors = await get_hint_metrics(manager.metrics, node1.ip_addr, "errors") + assert errors == 0, "Writing hints to disk failed" + + hints = await get_hint_metrics(manager.metrics, node1.ip_addr, "written") + if hints >= min_count: + return True + return None deadline = time.time() + 30 - await wait_for(check_no_hints_in_progress_node1, deadline) + await wait_for(lambda: check_written_hints(2 * mutation_count), deadline) sync_point1 = await create_sync_point(manager.api.client, node1.ip_addr)