From 968b07052d6886fd316217db5ff6c0bcb957a43f Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 2 Mar 2022 15:02:30 +0300 Subject: [PATCH 1/8] storage_service: Relax repeating set_mode-s In several places the call to set_mode(...) is used as a (format-less) replecement for regular logging. Mode doesn't really change there, because it had been changed before. Patch all those places to use regular logging, next patches will make full use of it. Signed-off-by: Pavel Emelyanov --- service/storage_service.cc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 115ae230a1..641517fc52 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -449,7 +449,7 @@ void storage_service::join_token_ring(std::chrono::milliseconds delay) { if (tmptr->is_member(get_broadcast_address())) { throw std::runtime_error("This node is already a member of the token ring; bootstrap aborted. (If replacing a dead node, remove the old one from the ring first.)"); } - set_mode(mode::JOINING, "getting bootstrap token", true); + slogger.info("getting bootstrap token"); if (resume_bootstrap) { _bootstrap_tokens = db::system_keyspace::get_saved_tokens().get0(); if (!_bootstrap_tokens.empty()) { @@ -483,7 +483,7 @@ void storage_service::join_token_ring(std::chrono::milliseconds delay) { } else { sleep_abortable(get_ring_delay(), _abort_source).get(); } - set_mode(mode::JOINING, format("Replacing a node with token(s): {}", _bootstrap_tokens), true); + slogger.info("Replacing a node with token(s): {}", _bootstrap_tokens); // _bootstrap_tokens was previously set in prepare_to_join using tokens gossiped by the replaced node } maybe_start_sys_dist_ks(); @@ -667,7 +667,7 @@ void storage_service::bootstrap() { { gms::application_state::STATUS, versioned_value::bootstrapping(_bootstrap_tokens) }, }).get(); - set_mode(mode::JOINING, format("sleeping {} ms for pending range setup", get_ring_delay().count()), true); + slogger.info("sleeping {} ms for pending range setup", get_ring_delay().count()); _gossiper.wait_for_range_setup().get(); } else { // Even with RBNO bootstrap we need to announce the new CDC generation immediately after it's created. @@ -677,7 +677,7 @@ void storage_service::bootstrap() { } } else { // Wait until we know tokens of existing node before announcing replacing status. - set_mode(mode::JOINING, fmt::format("Wait until local node knows tokens of peer nodes"), true); + slogger.info("Wait until local node knows tokens of peer nodes"); _gossiper.wait_for_range_setup().get(); auto replace_addr = get_replace_address(); slogger.debug("Removing replaced endpoint {} from system.peers", *replace_addr); @@ -691,7 +691,7 @@ void storage_service::bootstrap() { } }).get(); - set_mode(mode::JOINING, "Starting to bootstrap...", true); + slogger.info("Starting to bootstrap..."); if (is_replacing()) { run_replace_ops(); } else { @@ -2607,10 +2607,10 @@ future<> storage_service::do_drain() { return bm.drain(); }); - set_mode(mode::DRAINING, "shutting down migration manager", false); + slogger.debug("shutting down migration manager"); co_await _migration_manager.invoke_on_all(&service::migration_manager::drain); - set_mode(mode::DRAINING, "flushing column families", false); + slogger.debug("flushing column families"); co_await _db.invoke_on_all(&replica::database::drain); } @@ -2758,7 +2758,7 @@ void storage_service::unbootstrap() { // Start with BatchLog replay, which may create hints but no writes since this is no longer a valid endpoint. get_batchlog_manager().local().do_batch_log_replay().get(); - set_mode(mode::LEAVING, "streaming hints to other nodes", true); + slogger.info("streaming hints to other nodes"); // wait for the transfer runnables to signal the latch. slogger.debug("waiting for stream acks."); From c385fe7d791854315121036cb32c83ac19e9752e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 4 Mar 2022 12:46:44 +0300 Subject: [PATCH 2/8] storage_service: Make get_operation_mode() return mode itself Now it reports back formatted mode. For future convenience it's needed to return the raw value, all the more so the mode enum class is already public. Signed-off-by: Pavel Emelyanov --- api/storage_service.cc | 2 +- service/storage_service.cc | 5 ++--- service/storage_service.hh | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/api/storage_service.cc b/api/storage_service.cc index 253e4b1096..1bfb621f0d 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -763,7 +763,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded req) { return ss.local().get_operation_mode().then([] (auto mode) { - return make_ready_future(mode); + return make_ready_future(format("{}", mode)); }); }); diff --git a/service/storage_service.cc b/service/storage_service.cc index 641517fc52..b1440952e6 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -1737,10 +1737,9 @@ future>> storage_service::descr }); }; -future storage_service::get_operation_mode() { +future storage_service::get_operation_mode() { return run_with_no_api_lock([] (storage_service& ss) { - auto mode = ss._operation_mode; - return make_ready_future(format("{}", mode)); + return make_ready_future(ss._operation_mode); }); } diff --git a/service/storage_service.hh b/service/storage_service.hh index c422fff12c..76128b8ef4 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -761,7 +761,7 @@ public: void node_ops_cmd_check(gms::inet_address coordinator, const node_ops_cmd_request& req); future<> node_ops_cmd_heartbeat_updater(const node_ops_cmd& cmd, utils::UUID uuid, std::list nodes, lw_shared_ptr heartbeat_updater_done); - future get_operation_mode(); + future get_operation_mode(); future is_starting(); From ca03fd3145114b30ef14e2c1e6ac482930100bbc Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Fri, 4 Mar 2022 12:59:56 +0300 Subject: [PATCH 3/8] storage_service: Replace is_starting() with get_operation_mode() This is trivial change, since the only user is in API and the get_operation_mode + mode values are at hand. One thing to pay attention to -- the new method checks the mode to be <= STARTING, not for equality. Now this is equivalent change, but next patch will introduce NONE mode that should be reported as is_starting() too. Signed-off-by: Pavel Emelyanov --- api/storage_service.cc | 4 ++-- service/storage_service.cc | 7 ------- service/storage_service.hh | 2 -- 3 files changed, 2 insertions(+), 11 deletions(-) diff --git a/api/storage_service.cc b/api/storage_service.cc index 1bfb621f0d..62c9381273 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -768,8 +768,8 @@ void set_storage_service(http_context& ctx, routes& r, sharded req) { - return ss.local().is_starting().then([] (auto starting) { - return make_ready_future(starting); + return ss.local().get_operation_mode().then([] (auto mode) { + return make_ready_future(mode <= service::storage_service::mode::STARTING); }); }); diff --git a/service/storage_service.cc b/service/storage_service.cc index b1440952e6..ea30f768fa 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -1743,13 +1743,6 @@ future storage_service::get_operation_mode() { }); } -future storage_service::is_starting() { - return run_with_no_api_lock([] (storage_service& ss) { - auto mode = ss._operation_mode; - return mode == storage_service::mode::STARTING; - }); -} - future storage_service::is_gossip_running() { return run_with_no_api_lock([] (storage_service& ss) { return ss._gossiper.is_enabled(); diff --git a/service/storage_service.hh b/service/storage_service.hh index 76128b8ef4..bba86cbe78 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -763,8 +763,6 @@ public: future get_operation_mode(); - future is_starting(); - /** * Shuts node off to writes, empties memtables and the commit log. * There are two differences between drain and the normal shutdown hook: From ffbfa3b542e88d628307bc694cb888c2900cb5bd Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 2 Mar 2022 14:14:20 +0300 Subject: [PATCH 4/8] storage_service: Remove _joined and is_joined() The is_joined() status can be get with get_operation_mode(). Since it indicates that the operation mode is JOINING, NORMAL or anything above, the operation mode the enum class should be shuffled to get the simple >= comparison. Another needed change is to set mode few steps earlier than it happens now to cover the non-bootstrap startup case. And the third change is to partially revert the d49aa7ab that made the .is_joined() method be future-less. Nowadays the is_joined() is called only from the API which is happy with being future-full in all other storage service state checks. Signed-off-by: Pavel Emelyanov --- api/storage_service.cc | 4 +++- service/storage_service.cc | 8 ++------ service/storage_service.hh | 9 +-------- 3 files changed, 6 insertions(+), 15 deletions(-) diff --git a/api/storage_service.cc b/api/storage_service.cc index 62c9381273..6c96bfba00 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -850,7 +850,9 @@ void set_storage_service(http_context& ctx, routes& r, sharded req) { - return make_ready_future(ss.local().is_joined()); + return ss.local().get_operation_mode().then([] (auto mode) { + return make_ready_future(mode >= service::storage_service::mode::JOINING); + }); }); ss::set_stream_throughput_mb_per_sec.set(r, [](std::unique_ptr req) { diff --git a/service/storage_service.cc b/service/storage_service.cc index ea30f768fa..897b9cd99d 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -413,11 +413,7 @@ future<> storage_service::wait_for_ring_to_settle(std::chrono::milliseconds dela // Runs inside seastar::async context void storage_service::join_token_ring(std::chrono::milliseconds delay) { - // This function only gets called on shard 0, but we want to set _joined - // on all shards, so this variable can be later read locally. - container().invoke_on_all([] (auto&& ss) { - ss._joined = true; - }).get(); + set_mode(mode::JOINING, "joining the ring", true); _group0->join_group0().get(); @@ -437,7 +433,7 @@ void storage_service::join_token_ring(std::chrono::milliseconds delay) { } else { db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::IN_PROGRESS).get(); } - set_mode(mode::JOINING, "waiting for ring information", true); + slogger.info("waiting for ring information"); // if our schema hasn't matched yet, keep sleeping until it does // (post CASSANDRA-1391 we don't expect this to be necessary very often, but it doesn't hurt to be careful) diff --git a/service/storage_service.hh b/service/storage_service.hh index bba86cbe78..28925fb839 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -279,10 +279,8 @@ private: bool _initialized = false; - bool _joined = false; - public: - enum class mode { STARTING, NORMAL, JOINING, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED }; + enum class mode { STARTING, JOINING, NORMAL, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED }; private: mode _operation_mode = mode::STARTING; friend std::ostream& operator<<(std::ostream& os, const mode& mode); @@ -398,11 +396,6 @@ private: void join_token_ring(std::chrono::milliseconds); void maybe_start_sys_dist_ks(); public: - inline bool is_joined() const { - // Every time we set _joined, we do it on all shards, so we can read its - // value locally. - return _joined; - } future<> rebuild(sstring source_dc); From dbaca825ec5e32b4594720e31ee5e1ab13e0196d Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 2 Mar 2022 14:37:30 +0300 Subject: [PATCH 5/8] storage_service: Remove _initialized and is_initialized() This bit is hairy. First, it indicates that the storage service entered the init_server() method. But, once the node is up and running it also indicates whether the gossiper is enabled or not via the APi call. To rely on the operation mode, first, the NONE mode is introduced at which the server starts. Then in init_server() is switches to STARTING. Second change is to stop using the bit in enable/disable gossiper API call, instead -- check the gossiper.is_enabled() itself. To keep the is_initialized API call compatible, when the operation mode is NORMAL it would return true/false according to the status of the gossiper. This change is simple because storage service API handlers already have the gossiper instance hanging around. Signed-off-by: Pavel Emelyanov --- api/storage_service.cc | 10 +++++++--- service/storage_service.cc | 22 +++++++--------------- service/storage_service.hh | 7 ++----- 3 files changed, 16 insertions(+), 23 deletions(-) diff --git a/api/storage_service.cc b/api/storage_service.cc index 6c96bfba00..07f27e30e6 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -839,9 +839,13 @@ void set_storage_service(http_context& ctx, routes& r, sharded(json_void()); }); - ss::is_initialized.set(r, [&ss](std::unique_ptr req) { - return ss.local().is_initialized().then([] (bool initialized) { - return make_ready_future(initialized); + ss::is_initialized.set(r, [&ss, &g](std::unique_ptr req) { + return ss.local().get_operation_mode().then([&g] (auto mode) { + bool is_initialized = mode >= service::storage_service::mode::STARTING; + if (mode == service::storage_service::mode::NORMAL) { + is_initialized = g.is_enabled(); + } + return make_ready_future(is_initialized); }); }); diff --git a/service/storage_service.cc b/service/storage_service.cc index 897b9cd99d..a222287393 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -138,6 +138,7 @@ enum class node_external_status { static node_external_status map_operation_mode(storage_service::mode m) { switch (m) { + case storage_service::mode::NONE: return node_external_status::STARTING; case storage_service::mode::STARTING: return node_external_status::STARTING; case storage_service::mode::JOINING: return node_external_status::JOINING; case storage_service::mode::NORMAL: return node_external_status::NORMAL; @@ -1304,7 +1305,7 @@ future<> storage_service::init_server(cql3::query_processor& qp) { assert(this_shard_id() == 0); return seastar::async([this, &qp] { - _initialized = true; + set_mode(mode::STARTING, "preparing to join ring", true); _group0 = std::make_unique(_abort_source, _raft_gr, _messaging.local(), _gossiper, qp, _migration_manager.local()); @@ -1655,6 +1656,7 @@ future> storage_service::effective_ownership( } static const std::map mode_names = { + {storage_service::mode::NONE, "STARTING"}, {storage_service::mode::STARTING, "STARTING"}, {storage_service::mode::NORMAL, "NORMAL"}, {storage_service::mode::JOINING, "JOINING"}, @@ -1748,7 +1750,7 @@ future storage_service::is_gossip_running() { future<> storage_service::start_gossiping() { return run_with_api_lock(sstring("start_gossiping"), [] (storage_service& ss) { return seastar::async([&ss] { - if (!ss._initialized) { + if (!ss._gossiper.is_enabled()) { slogger.warn("Starting gossip by operator request"); ss._gossiper.container().invoke_on_all(&gms::gossiper::start).get(); auto undo = defer([&ss] { ss._gossiper.container().invoke_on_all(&gms::gossiper::stop).get(); }); @@ -1760,9 +1762,7 @@ future<> storage_service::start_gossiping() { db::system_keyspace::get_local_tokens().get0(), cdc_gen_ts).get(); ss._gossiper.force_newer_generation(); - ss._gossiper.start_gossiping(utils::get_generation_number()).then([&ss] { - ss._initialized = true; - }).get(); + ss._gossiper.start_gossiping(utils::get_generation_number()).get(); undo.cancel(); } }); @@ -1771,11 +1771,9 @@ future<> storage_service::start_gossiping() { future<> storage_service::stop_gossiping() { return run_with_api_lock(sstring("stop_gossiping"), [] (storage_service& ss) { - if (ss._initialized) { + if (ss._gossiper.is_enabled()) { slogger.warn("Stopping gossip by operator request"); - return ss._gossiper.container().invoke_on_all(&gms::gossiper::stop).then([&ss] { - ss._initialized = false; - }); + return ss._gossiper.container().invoke_on_all(&gms::gossiper::stop); } return make_ready_future<>(); }); @@ -2641,12 +2639,6 @@ int32_t storage_service::get_exception_count() { return 0; } -future storage_service::is_initialized() { - return run_with_no_api_lock([] (storage_service& ss) { - return ss._initialized; - }); -} - // Runs inside seastar::async context std::unordered_multimap storage_service::get_changed_ranges_for_leaving(sstring keyspace_name, inet_address endpoint) { // First get all ranges the leaving endpoint is responsible for diff --git a/service/storage_service.hh b/service/storage_service.hh index 28925fb839..000b4fe10f 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -277,12 +277,10 @@ private: /* Are we starting this node in bootstrap mode? */ bool _is_bootstrap_mode = false; - bool _initialized = false; - public: - enum class mode { STARTING, JOINING, NORMAL, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED }; + enum class mode { NONE, STARTING, JOINING, NORMAL, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED }; private: - mode _operation_mode = mode::STARTING; + mode _operation_mode = mode::NONE; friend std::ostream& operator<<(std::ostream& os, const mode& mode); /* Used for tracking drain progress */ @@ -340,7 +338,6 @@ private: future<> wait_for_ring_to_settle(std::chrono::milliseconds delay); public: - future is_initialized(); future<> check_for_endpoint_collision(std::unordered_set initial_contact_nodes, const std::unordered_map& loaded_peer_features); From 74212286f81ed738b93f93b54c26a523f7b499ec Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 2 Mar 2022 14:53:51 +0300 Subject: [PATCH 6/8] storage_service: Remove _is_bootstrap_mode This "state" is the sub-state of the STARTING mode that's activated when the storage_service::bootstrap() is called. Instead of the separate boolean the new mode can be used. To stop it from reverting the BOOTSTRAP mode back to JOINING some calls to set_mode() should be converted into regular logging. Signed-off-by: Pavel Emelyanov --- service/storage_service.cc | 7 ++++--- service/storage_service.hh | 9 +-------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index a222287393..d2f8ea8b3a 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -140,6 +140,7 @@ static node_external_status map_operation_mode(storage_service::mode m) { switch (m) { case storage_service::mode::NONE: return node_external_status::STARTING; case storage_service::mode::STARTING: return node_external_status::STARTING; + case storage_service::mode::BOOTSTRAP: return node_external_status::JOINING; case storage_service::mode::JOINING: return node_external_status::JOINING; case storage_service::mode::NORMAL: return node_external_status::NORMAL; case storage_service::mode::LEAVING: return node_external_status::LEAVING; @@ -607,10 +608,9 @@ std::list storage_service::get_ignore_dead_nodes_for_replace( // Runs inside seastar::async context void storage_service::bootstrap() { - _is_bootstrap_mode = true; - auto x = seastar::defer([this] { _is_bootstrap_mode = false; }); auto bootstrap_rbno = is_repair_based_node_ops_enabled(streaming::stream_reason::bootstrap); + set_mode(mode::BOOTSTRAP, true); slogger.debug("bootstrap: rbno={} replacing={}", bootstrap_rbno, is_replacing()); if (!is_replacing()) { // Wait until we know tokens of existing node before announcing join status. @@ -1660,6 +1660,7 @@ static const std::map mode_names = { {storage_service::mode::STARTING, "STARTING"}, {storage_service::mode::NORMAL, "NORMAL"}, {storage_service::mode::JOINING, "JOINING"}, + {storage_service::mode::BOOTSTRAP, "BOOTSTRAP"}, {storage_service::mode::LEAVING, "LEAVING"}, {storage_service::mode::DECOMMISSIONED, "DECOMMISSIONED"}, {storage_service::mode::MOVING, "MOVING"}, @@ -3521,7 +3522,7 @@ future storage_service::is_cleanup_allowed(sstring keyspace) { return container().invoke_on(0, [keyspace = std::move(keyspace)] (storage_service& ss) { auto my_address = ss.get_broadcast_address(); auto pending_ranges = ss.get_token_metadata().has_pending_ranges(keyspace, my_address); - bool is_bootstrap_mode = ss._is_bootstrap_mode; + bool is_bootstrap_mode = ss._operation_mode == mode::BOOTSTRAP; slogger.debug("is_cleanup_allowed: keyspace={}, is_bootstrap_mode={}, pending_ranges={}", keyspace, is_bootstrap_mode, pending_ranges); return !is_bootstrap_mode && !pending_ranges; diff --git a/service/storage_service.hh b/service/storage_service.hh index 000b4fe10f..14a3a22b76 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -274,11 +274,8 @@ private: std::optional _removing_node; - /* Are we starting this node in bootstrap mode? */ - bool _is_bootstrap_mode = false; - public: - enum class mode { NONE, STARTING, JOINING, NORMAL, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED }; + enum class mode { NONE, STARTING, JOINING, BOOTSTRAP, NORMAL, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED }; private: mode _operation_mode = mode::NONE; friend std::ostream& operator<<(std::ostream& os, const mode& mode); @@ -407,10 +404,6 @@ private: void bootstrap(); public: - bool is_bootstrap_mode() const { - return _is_bootstrap_mode; - } - /** * Return the rpc address associated with an endpoint as a string. * @param endpoint The endpoint to get rpc address for From 0941098b3924aec4e15e917aebdbe3bb19ff083b Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 2 Mar 2022 14:47:52 +0300 Subject: [PATCH 7/8] storage_service: Remove _ms_stopped This boolean protects do_stop_ms from re-entrability. However, this method is only called from stop_transport() which handles re-entring itself, so the _ms_stopped can be just removed. Signed-off-by: Pavel Emelyanov --- service/storage_service.cc | 4 ---- service/storage_service.hh | 1 - 2 files changed, 5 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index d2f8ea8b3a..7a99089e86 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -1781,10 +1781,6 @@ future<> storage_service::stop_gossiping() { } future<> storage_service::do_stop_ms() { - if (_ms_stopped) { - return make_ready_future<>(); - } - _ms_stopped = true; return _messaging.invoke_on_all([] (auto& ms) { return ms.shutdown(); }).then([] { diff --git a/service/storage_service.hh b/service/storage_service.hh index 14a3a22b76..2bef9801fc 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -152,7 +152,6 @@ private: sharded& _repair; sharded& _stream_manager; sstring _operation_in_progress; - bool _ms_stopped = false; seastar::metrics::metric_groups _metrics; using client_shutdown_hook = noncopyable_function; std::vector _protocol_servers; From 190385551c61639c4427de02ff2384ef72934231 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 2 Mar 2022 15:05:57 +0300 Subject: [PATCH 8/8] storage_service: Relax operation modes switch The set_mode() tries to combine mode switching and extended logging, but there are no places left that do need this flexibility. It's simpler and nicer to make set_mode() _just_ switch the mode and log some generic "entering ... mode" message. Signed-off-by: Pavel Emelyanov --- service/storage_service.cc | 33 ++++++++++++++++----------------- service/storage_service.hh | 3 +-- 2 files changed, 17 insertions(+), 19 deletions(-) diff --git a/service/storage_service.cc b/service/storage_service.cc index 7a99089e86..28fd57e875 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -415,7 +415,7 @@ future<> storage_service::wait_for_ring_to_settle(std::chrono::milliseconds dela // Runs inside seastar::async context void storage_service::join_token_ring(std::chrono::milliseconds delay) { - set_mode(mode::JOINING, "joining the ring", true); + set_mode(mode::JOINING); _group0->join_group0().get(); @@ -563,7 +563,7 @@ void storage_service::join_token_ring(std::chrono::milliseconds delay) { // start participating in the ring. set_gossip_tokens(_gossiper, _bootstrap_tokens, _cdc_gen_id).get(); - set_mode(mode::NORMAL, "node is now in normal status", true); + set_mode(mode::NORMAL); if (get_token_metadata().sorted_tokens().empty()) { auto err = format("join_token_ring: Sorted token in token_metadata is empty"); @@ -610,7 +610,7 @@ std::list storage_service::get_ignore_dead_nodes_for_replace( void storage_service::bootstrap() { auto bootstrap_rbno = is_repair_based_node_ops_enabled(streaming::stream_reason::bootstrap); - set_mode(mode::BOOTSTRAP, true); + set_mode(mode::BOOTSTRAP); slogger.debug("bootstrap: rbno={} replacing={}", bootstrap_rbno, is_replacing()); if (!is_replacing()) { // Wait until we know tokens of existing node before announcing join status. @@ -1305,7 +1305,7 @@ future<> storage_service::init_server(cql3::query_processor& qp) { assert(this_shard_id() == 0); return seastar::async([this, &qp] { - set_mode(mode::STARTING, "preparing to join ring", true); + set_mode(mode::STARTING); _group0 = std::make_unique(_abort_source, _raft_gr, _messaging.local(), _gossiper, qp, _migration_manager.local()); @@ -1673,16 +1673,15 @@ std::ostream& operator<<(std::ostream& os, const storage_service::mode& m) { return os; } -void storage_service::set_mode(mode m, bool log) { - set_mode(m, "", log); -} - -void storage_service::set_mode(mode m, sstring msg, bool log) { - _operation_mode = m; - if (log) { - slogger.info("{}: {}", m, msg); +void storage_service::set_mode(mode m) { + if (m != _operation_mode) { + slogger.info("entering {} mode", m); + _operation_mode = m; } else { - slogger.debug("{}: {}", m, msg); + // This shouldn't happen, but it's too much for an assert, + // so -- just emit a warning in the hope that it will be + // noticed, reported and fixed + slogger.warn("re-entering {} mode", m); } } @@ -1949,7 +1948,7 @@ future<> storage_service::decommission() { // StageManager.shutdownNow(); db::system_keyspace::set_bootstrap_state(db::system_keyspace::bootstrap_state::DECOMMISSIONED).get(); slogger.info("DECOMMISSIONING: set_bootstrap_state done"); - ss.set_mode(mode::DECOMMISSIONED, true); + ss.set_mode(mode::DECOMMISSIONED); slogger.info("DECOMMISSIONING: done"); // let op be responsible for killing the process }); @@ -2573,10 +2572,10 @@ future<> storage_service::drain() { return make_ready_future<>(); } - ss.set_mode(mode::DRAINING, "starting drain process", true); + ss.set_mode(mode::DRAINING); return ss.do_drain().then([&ss] { ss._drain_finished.set_value(); - ss.set_mode(mode::DRAINED, true); + ss.set_mode(mode::DRAINED); }); }); } @@ -2727,7 +2726,7 @@ void storage_service::unbootstrap() { ranges_to_stream.emplace(keyspace_name, std::move(ranges_mm)); } - set_mode(mode::LEAVING, "replaying batch log and streaming data to other nodes", true); + set_mode(mode::LEAVING); auto stream_success = stream_ranges(ranges_to_stream); // Wait for batch log to complete before streaming hints. diff --git a/service/storage_service.hh b/service/storage_service.hh index 2bef9801fc..093d5b4f3b 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -393,8 +393,7 @@ public: future<> rebuild(sstring source_dc); private: - void set_mode(mode m, bool log); - void set_mode(mode m, sstring msg, bool log); + void set_mode(mode m); void mark_existing_views_as_built(); // Stream data for which we become a new replica.