From ffcce1ffc83fdbf760614c69e7da3bc4c7247bb3 Mon Sep 17 00:00:00 2001
From: Nadav Har'El <nyh@scylladb.com>
Date: Thu, 25 Dec 2025 16:19:31 +0200
Subject: [PATCH] test/boost: fix flaky test node_view_update_backlog

The boost test view_schema_test.cc::node_view_update_backlog can be
flaky if the test machine has a hiccup of 100ms, and this patch fixes
it:

The test is a unit test for db::view::node_update_backlog, which is
supposed to cache the backlog calculation for a given interval. The
test asks to cache the backlog for 100ms, and then without sleeping
at all tries to fetch a value again and expect the unchanged cached
value to be returned. However, if the test run experiences a context
switch of 100ms, it can fail, and it did once as reported in #27876.

The fix is to change the interval in this test from 100ms to something
much larger, like 10 seconds. We don't sleep this amount - we just need
the second fetch to happen *before* 10 seconds has passed, so there's
no harm in using a very large interval.

However, the second half of this test wants to check that after the
interval is over, we do get a new backlog calculation. So for the
second half of this test we can and should use a shorter backlog -
e.g., 10ms. We don't care if the test machine is slow or context switched,
for this half of the test we want to to sleep *more* than 10ms, and
that's easy.

The fixed test is faster than the old one (10ms instead of 100ms) and
more reliable on a shared test machine.

Fixes #27876.

Signed-off-by: Nadav Har'El <nyh@scylladb.com>

Closes scylladb/scylladb#27878
---
 test/boost/view_schema_test.cc | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/test/boost/view_schema_test.cc b/test/boost/view_schema_test.cc
index e54932f1b8..20a4028e67 100644
--- a/test/boost/view_schema_test.cc
+++ b/test/boost/view_schema_test.cc
@@ -2599,7 +2599,16 @@ SEASTAR_TEST_CASE(test_unselected_column) {
 }
 
 SEASTAR_THREAD_TEST_CASE(node_view_update_backlog) {
-    db::view::node_update_backlog b(2, 100ms);
+    // This test was originally written assuming we have (at least) two
+    // shards and the test doesn't run on shard 1...
+    BOOST_ASSERT(this_shard_id() != 1);
+    BOOST_ASSERT(smp::count >= 2);
+
+    // First, check that a db::view::node_update_backlog object doesn't
+    // recalculate the backlog if the interval hasn't yet passed (we use
+    // a long 10 second interval that will certainly not pass during this
+    // test).
+    db::view::node_update_backlog b(2, 10s);
     auto backlog = [] (size_t size) { return db::view::update_backlog{size, 1000}; };
     smp::submit_to(0, [&b, &backlog] {
         b.add(backlog(10));
@@ -2610,12 +2619,20 @@ SEASTAR_THREAD_TEST_CASE(node_view_update_backlog) {
         b.fetch();
     }).get();
     BOOST_REQUIRE(b.load() == backlog(10));
-    sleep(101ms).get();
-    smp::submit_to(1, [&b, &backlog] {
-        b.add(backlog(100));
-        b.fetch();
+    // Second, check that the backlog *is* recalculated if the interval
+    // has passed. We use a very short interval (10ms) and sleep a bit more
+    // to make sure it has passed.
+    db::view::node_update_backlog b2(2, 10ms);
+    smp::submit_to(0, [&b2, &backlog] {
+        b2.add(backlog(10));
+        b2.fetch();
     }).get();
-    BOOST_REQUIRE(b.load() == backlog(100));
+    sleep(11ms).get();
+    smp::submit_to(1, [&b2, &backlog] {
+        b2.add(backlog(100));
+        b2.fetch();
+    }).get();
+    BOOST_REQUIRE(b2.load() == backlog(100));
 }
 
 SEASTAR_TEST_CASE(hide_ttl_and_writetime_for_virtual_columns) {