From 85b42e362b91d5a7b71a2a1999dfec8a3fa66bed Mon Sep 17 00:00:00 2001 From: Asias He Date: Fri, 6 Nov 2015 14:58:55 +0800 Subject: [PATCH] storage_service: Fix hang of decommission nodetool decommission hangs forever due to a recursive lock. decommission() with api lock shutdown_client_servers() with api lock stop_rpc_server() with api lock stop_native_transport() Fix it by calling helpers for stop_rpc_server and stop_native_transport without the lock. --- service/storage_service.cc | 38 +++++++++++++++++++++++--------------- service/storage_service.hh | 3 +++ 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 1bc0bda1a7..5aa67dc473 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -1469,15 +1469,19 @@ future<> storage_service::start_rpc_server() { }); } +future<> storage_service::do_stop_rpc_server() { + auto tserver = _thrift_server; + _thrift_server = {}; + if (tserver) { + // FIXME: thrift_server::stop() doesn't kill existing connections and wait for them + return tserver->stop(); + } + return make_ready_future<>(); +} + future<> storage_service::stop_rpc_server() { return run_with_write_api_lock([] (storage_service& ss) { - auto tserver = ss._thrift_server; - ss._thrift_server = {}; - if (tserver) { - // FIXME: thrift_server::stop() doesn't kill existing connections and wait for them - return tserver->stop(); - } - return make_ready_future<>(); + return ss.do_stop_rpc_server(); }); } @@ -1514,15 +1518,19 @@ future<> storage_service::start_native_transport() { }); } +future<> storage_service::do_stop_native_transport() { + auto cserver = _cql_server; + _cql_server = {}; + if (cserver) { + // FIXME: cql_server::stop() doesn't kill existing connections and wait for them + return cserver->stop(); + } + return make_ready_future<>(); +} + future<> storage_service::stop_native_transport() { return run_with_write_api_lock([] (storage_service& ss) { - auto cserver = ss._cql_server; - ss._cql_server = {}; - if (cserver) { - // FIXME: cql_server::stop() doesn't kill existing connections and wait for them - return cserver->stop(); - } - return make_ready_future<>(); + return ss.do_stop_native_transport(); }); } @@ -2232,7 +2240,7 @@ shared_ptr& storage_service::get_load_broadcaster() { } future<> storage_service::shutdown_client_servers() { - return stop_rpc_server().then([this] { return stop_native_transport(); }); + return do_stop_rpc_server().then([this] { return do_stop_native_transport(); }); } std::unordered_multimap> diff --git a/service/storage_service.hh b/service/storage_service.hh index b06abc43bd..9609cfdc8c 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -282,6 +282,9 @@ public: future is_native_transport_running(); +private: + future<> do_stop_rpc_server(); + future<> do_stop_native_transport(); #if 0 public void stopTransports() {