diff --git a/gms/gossiper.cc b/gms/gossiper.cc index f7a58b6b0d..002362a7c9 100644 --- a/gms/gossiper.cc +++ b/gms/gossiper.cc @@ -621,6 +621,13 @@ future<> gossiper::do_apply_state_locally(locator::host_id node, endpoint_state // If there is a generation tie, attempt to break it by heartbeat version. auto permit = co_await lock_endpoint(node, null_permit_id); auto es = get_endpoint_state_ptr(node); + + // If remote state update does not contain a host id, check whether the endpoint still + // exists in the `_endpoint_state_map` since after a preemption point it could have been deleted. + if (!remote_state.get_host_id() && !es) { + throw std::runtime_error(format("Entry for host id {} does not exist in the endpoint state map.", node)); + } + if (es) { endpoint_state local_state = *es; auto local_generation = local_state.get_heart_beat_state().get_generation(); @@ -2170,7 +2177,11 @@ future<> gossiper::do_shadow_round(std::unordered_set nodes, }); for (auto& response : responses) { - co_await apply_state_locally_in_shadow_round(std::move(response.endpoint_state_map)); + try { + co_await apply_state_locally_in_shadow_round(std::move(response.endpoint_state_map)); + } catch (const std::exception& exception) { + logger.warn("Error while applying node state {}", exception.what()); + } } if (!nodes_talked.empty()) { break; diff --git a/test/cluster/test_gossiper_race.py b/test/cluster/test_gossiper_race.py index 10d0259075..d42bc39937 100644 --- a/test/cluster/test_gossiper_race.py +++ b/test/cluster/test_gossiper_race.py @@ -15,7 +15,6 @@ from test.pylib.manager_client import ManagerClient @pytest.mark.asyncio @skip_mode('release', 'error injections are not supported in release mode') -@pytest.mark.xfail(reason="https://github.com/scylladb/scylladb/issues/25621") async def test_gossiper_race_on_decommission(manager: ManagerClient): """ Test for gossiper race scenario (https://github.com/scylladb/scylladb/issues/25621):