diff --git a/service/tablet_allocator.cc b/service/tablet_allocator.cc index a0aa421885..938e8a38b5 100644 --- a/service/tablet_allocator.cc +++ b/service/tablet_allocator.cc @@ -1849,7 +1849,8 @@ public: auto mig_streaming_info = get_migration_streaming_info(topo, ti, mig); // The node being shrunk may be excluded/down and lack complete tablet stats. // Since we're removing a replica (not placing one), accurate load data isn't needed. - if (_load_sketch->has_node(replica->host)) { + auto* rep_node = topo.find_node(replica->host); + if (_load_sketch->has_node(replica->host) && !(rep_node && rep_node->is_excluded())) { unload(*_load_sketch, replica->host, replica->shard, source_tablets); } if (can_accept_load(nodes, mig_streaming_info)) { diff --git a/service/topology_coordinator.cc b/service/topology_coordinator.cc index 6972004410..424f92b22d 100644 --- a/service/topology_coordinator.cc +++ b/service/topology_coordinator.cc @@ -4437,7 +4437,8 @@ future<> topology_coordinator::refresh_tablet_load_stats() { locator::load_stats node_stats; if (!_gossiper.is_alive(dst)) { - if (_load_stats_per_node.contains(dst)) { + if (_load_stats_per_node.contains(dst) && + !utils::get_local_injector().enter("force_down_node_load_stats_invalid")) { node_stats = _load_stats_per_node[dst]; } else { rtlogger.debug("raft topology: Unable to refresh table load on {} because it's down.", dst); diff --git a/test/cluster/test_tablets.py b/test/cluster/test_tablets.py index 9aa6381dc1..6c73290f65 100644 --- a/test/cluster/test_tablets.py +++ b/test/cluster/test_tablets.py @@ -1745,6 +1745,7 @@ async def test_excludenode(manager: ManagerClient): await manager.remove_node(live_node.server_id, server_id=node_to_remove.server_id) @pytest.mark.asyncio +@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode') async def test_excludenode_shrink_rf(manager: ManagerClient): """ Verifies that ALTER keyspace removing replicas from a DC succeeds when the only @@ -1762,6 +1763,11 @@ async def test_excludenode_shrink_rf(manager: ManagerClient): dc1_servers = await manager.servers_add(servers_num=2, config=config, cmdline=cmdline, property_file={'dc': 'dc1', 'rack': 'rack1'}) dc2_server = await manager.server_add(config=config, cmdline=cmdline, property_file={'dc': 'dc2', 'rack': 'rack2'}) + # Force incomplete load_stats for the down node on every potential topology + # coordinator, so the shrink plan deterministically hits the excluded-node + # path instead of relying on stale cached stats. + await asyncio.gather(*(manager.api.enable_injection(s.ip_addr, "force_down_node_load_stats_invalid", one_shot=False) for s in dc1_servers)) + cql = manager.get_cql() async with new_test_keyspace(manager, "WITH replication = { 'class': 'NetworkTopologyStrategy', " "'dc1': 1, 'dc2': 1} AND tablets = { 'initial': 4 }") as ks: @@ -1778,7 +1784,7 @@ async def test_excludenode_shrink_rf(manager: ManagerClient): # ALTER keyspace to remove dc2 from replication. # This should succeed without timing out, even though the excluded node # has tablet replicas that need to be shrunk away. - await cql.run_async(f"ALTER KEYSPACE {ks} WITH REPLICATION = {{ 'class': 'NetworkTopologyStrategy', 'dc1': 1, 'dc2': 0}}") + await asyncio.wait_for(cql.run_async(f"ALTER KEYSPACE {ks} WITH REPLICATION = {{ 'class': 'NetworkTopologyStrategy', 'dc1': 1, 'dc2': 0}}"), timeout=120) await asyncio.gather(*(read_barrier(manager.api, s.ip_addr) for s in dc1_servers))