diff --git a/service/storage_service.cc b/service/storage_service.cc index 7df55c85a9..d1177f6f3c 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -6035,10 +6035,12 @@ future storage_service::raft_topology_cmd_handler(raft version, current_version))); } + rtlogger.info("raft_topology_cmd::barrier_and_drain version {}: waiting for stale token metadata versions to be released", version); co_await ss._shared_token_metadata.stale_versions_in_use(); + rtlogger.info("raft_topology_cmd::barrier_and_drain version {}: stale versions released, draining closing sessions", version); co_await get_topology_session_manager().drain_closing_sessions(); - rtlogger.info("raft_topology_cmd::barrier_and_drain done"); + rtlogger.info("raft_topology_cmd::barrier_and_drain version {}: done", version); }); co_await utils::get_local_injector().inject("raft_topology_barrier_and_drain_fail", [this] (auto& handler) -> future<> { diff --git a/test/cluster/test_alternator.py b/test/cluster/test_alternator.py index 1c9cc1d341..06622cd40c 100644 --- a/test/cluster/test_alternator.py +++ b/test/cluster/test_alternator.py @@ -1156,7 +1156,7 @@ async def test_alternator_invalid_shard_for_lwt(manager: ManagerClient): # The next barrier must be for the write_both_read_new, we need a guarantee # that the src_shard observed it logger.info("Waiting for the next barrier") - await log.wait_for(re.escape(f"[shard {src_shard}: gms] raft_topology - raft_topology_cmd::barrier_and_drain done"), + await log.wait_for(f"\\[shard {src_shard}: gms\\] raft_topology - raft_topology_cmd::barrier_and_drain.*done", from_mark=m) # Now we have a guarantee that a new barrier succeeded on the src_shard, diff --git a/test/cluster/test_tablets_lwt.py b/test/cluster/test_tablets_lwt.py index 7d6e9a085b..aec7508f76 100644 --- a/test/cluster/test_tablets_lwt.py +++ b/test/cluster/test_tablets_lwt.py @@ -994,7 +994,7 @@ async def test_tablets_merge_waits_for_lwt(manager: ManagerClient): logger.info("Wait for the global barrier to start draining on shard0") await log0.wait_for("\\[shard 0: gms\\] raft_topology - Got raft_topology_cmd::barrier_and_drain", from_mark=m) # Just to confirm that the guard still holds the erm - matches = await log0.grep("\\[shard 0: gms\\] raft_topology - raft_topology_cmd::barrier_and_drain done", from_mark=m) + matches = await log0.grep("\\[shard 0: gms\\] raft_topology - raft_topology_cmd::barrier_and_drain.*done", from_mark=m) assert len(matches) == 0 # Before the fix, the tablet migration global barrier did not wait for the LWT operation.