diff --git a/service/storage_proxy.cc b/service/storage_proxy.cc index 81d8a022d0..a7273d75fe 100644 --- a/service/storage_proxy.cc +++ b/service/storage_proxy.cc @@ -6955,7 +6955,7 @@ future<> storage_proxy::drain_on_shutdown() { //NOTE: the thread is spawned here because there are delicate lifetime issues to consider // and writing them down with plain futures is error-prone. return async([this] { - cancel_write_handlers([] (const abstract_write_response_handler&) { return true; }); + cancel_all_write_response_handlers().get(); _hints_resource_manager.stop().get(); }); } @@ -6979,4 +6979,13 @@ future> storage_proxy::describ return locator::describe_ring(_db.local(), _remote->gossiper(), keyspace, include_only_local_dc); } +future<> storage_proxy::cancel_all_write_response_handlers() { + while (!_response_handlers.empty()) { + _response_handlers.begin()->second->timeout_cb(); + + if (!_response_handlers.empty()) { + co_await maybe_yield(); + } + } +} } diff --git a/service/storage_proxy.hh b/service/storage_proxy.hh index 5d5aa515f5..9b44537750 100644 --- a/service/storage_proxy.hh +++ b/service/storage_proxy.hh @@ -521,6 +521,8 @@ public: bool is_me(gms::inet_address addr) const noexcept; bool is_me(const locator::effective_replication_map& erm, locator::host_id id) const noexcept; + future<> cancel_all_write_response_handlers(); + private: bool only_me(const locator::effective_replication_map& erm, const host_id_vector_replica_set& replicas) const noexcept; diff --git a/test/cluster/test_unfinished_writes_during_shutdown.py b/test/cluster/test_unfinished_writes_during_shutdown.py index 377b6a6949..2ec36a83ce 100644 --- a/test/cluster/test_unfinished_writes_during_shutdown.py +++ b/test/cluster/test_unfinished_writes_during_shutdown.py @@ -24,7 +24,6 @@ from test.cluster.conftest import skip_mode logger = logging.getLogger(__name__) @pytest.mark.asyncio -@pytest.mark.xfail(reason="#23665") @skip_mode('release', 'error injections are not supported in release mode') async def test_unfinished_writes_during_shutdown(request: pytest.FixtureRequest, manager: ManagerClient) -> None: """ Test a simultaneous topology change and write query during shutdown, which may cause the node to get stuck (see https://github.com/scylladb/scylladb/issues/23665).