From ffcce1ffc83fdbf760614c69e7da3bc4c7247bb3 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Thu, 25 Dec 2025 16:19:31 +0200 Subject: [PATCH] test/boost: fix flaky test node_view_update_backlog The boost test view_schema_test.cc::node_view_update_backlog can be flaky if the test machine has a hiccup of 100ms, and this patch fixes it: The test is a unit test for db::view::node_update_backlog, which is supposed to cache the backlog calculation for a given interval. The test asks to cache the backlog for 100ms, and then without sleeping at all tries to fetch a value again and expect the unchanged cached value to be returned. However, if the test run experiences a context switch of 100ms, it can fail, and it did once as reported in #27876. The fix is to change the interval in this test from 100ms to something much larger, like 10 seconds. We don't sleep this amount - we just need the second fetch to happen *before* 10 seconds has passed, so there's no harm in using a very large interval. However, the second half of this test wants to check that after the interval is over, we do get a new backlog calculation. So for the second half of this test we can and should use a shorter backlog - e.g., 10ms. We don't care if the test machine is slow or context switched, for this half of the test we want to to sleep *more* than 10ms, and that's easy. The fixed test is faster than the old one (10ms instead of 100ms) and more reliable on a shared test machine. Fixes #27876. Signed-off-by: Nadav Har'El Closes scylladb/scylladb#27878 --- test/boost/view_schema_test.cc | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/test/boost/view_schema_test.cc b/test/boost/view_schema_test.cc index e54932f1b8..20a4028e67 100644 --- a/test/boost/view_schema_test.cc +++ b/test/boost/view_schema_test.cc @@ -2599,7 +2599,16 @@ SEASTAR_TEST_CASE(test_unselected_column) { } SEASTAR_THREAD_TEST_CASE(node_view_update_backlog) { - db::view::node_update_backlog b(2, 100ms); + // This test was originally written assuming we have (at least) two + // shards and the test doesn't run on shard 1... + BOOST_ASSERT(this_shard_id() != 1); + BOOST_ASSERT(smp::count >= 2); + + // First, check that a db::view::node_update_backlog object doesn't + // recalculate the backlog if the interval hasn't yet passed (we use + // a long 10 second interval that will certainly not pass during this + // test). + db::view::node_update_backlog b(2, 10s); auto backlog = [] (size_t size) { return db::view::update_backlog{size, 1000}; }; smp::submit_to(0, [&b, &backlog] { b.add(backlog(10)); @@ -2610,12 +2619,20 @@ SEASTAR_THREAD_TEST_CASE(node_view_update_backlog) { b.fetch(); }).get(); BOOST_REQUIRE(b.load() == backlog(10)); - sleep(101ms).get(); - smp::submit_to(1, [&b, &backlog] { - b.add(backlog(100)); - b.fetch(); + // Second, check that the backlog *is* recalculated if the interval + // has passed. We use a very short interval (10ms) and sleep a bit more + // to make sure it has passed. + db::view::node_update_backlog b2(2, 10ms); + smp::submit_to(0, [&b2, &backlog] { + b2.add(backlog(10)); + b2.fetch(); }).get(); - BOOST_REQUIRE(b.load() == backlog(100)); + sleep(11ms).get(); + smp::submit_to(1, [&b2, &backlog] { + b2.add(backlog(100)); + b2.fetch(); + }).get(); + BOOST_REQUIRE(b2.load() == backlog(100)); } SEASTAR_TEST_CASE(hide_ttl_and_writetime_for_virtual_columns) {