From 2c34d9fcd8a90b7547c28c4c5caebaafcdb15beb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Thu, 18 Jan 2024 17:39:05 +0100 Subject: [PATCH 01/15] docs: update maintenance socket documentation to use WhiteListRoundRobinPolicy After https://github.com/scylladb/python-driver/pull/287, the user can use WhiteListRoundRobinPolicy to connect to the node by maintenance socket. --- docs/operating-scylla/admin-tools/maintenance-socket.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/operating-scylla/admin-tools/maintenance-socket.rst b/docs/operating-scylla/admin-tools/maintenance-socket.rst index 60aa447ac4..3ac99f7f7a 100644 --- a/docs/operating-scylla/admin-tools/maintenance-socket.rst +++ b/docs/operating-scylla/admin-tools/maintenance-socket.rst @@ -26,10 +26,10 @@ With python driver from cassandra.cluster import Cluster from cassandra.connection import UnixSocketEndPoint - from cassandra.policies import HostFilterPolicy, RoundRobinPolicy + from cassandra.policies import WhiteListRoundRobinPolicy - socket = "/cql.m" - cluster = Cluster([UnixSocketEndPoint(socket)], + socket = UnixSocketEndPoint("/cql.m") + cluster = Cluster([socket], # Driver tries to connect to other nodes in the cluster, so we need to filter them out. - load_balancing_policy=HostFilterPolicy(RoundRobinPolicy(), lambda h: h.address == socket)) + load_balancing_policy=WhiteListRoundRobinPolicy([socket])) session = cluster.connect() From 81ef9fc91e9e7939c4c8fa3d54a2257809e5741d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Thu, 25 Jan 2024 15:10:15 +0100 Subject: [PATCH 02/15] docs: add cqlsh usage to maintenance socket documentation After https://github.com/scylladb/scylla-cqlsh/pull/67, the user can use cqlsh to connect to the node by maintenance socket. --- docs/operating-scylla/admin-tools/maintenance-socket.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/docs/operating-scylla/admin-tools/maintenance-socket.rst b/docs/operating-scylla/admin-tools/maintenance-socket.rst index 3ac99f7f7a..f35d7e6759 100644 --- a/docs/operating-scylla/admin-tools/maintenance-socket.rst +++ b/docs/operating-scylla/admin-tools/maintenance-socket.rst @@ -33,3 +33,10 @@ With python driver # Driver tries to connect to other nodes in the cluster, so we need to filter them out. load_balancing_policy=WhiteListRoundRobinPolicy([socket])) session = cluster.connect() + +With :doc:`CQLSh` +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +.. code-block:: console + + cqlsh /cql.m From e6a83b9819e0aac197aa79cb70cbffde0f7b348e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Fri, 22 Dec 2023 14:33:13 +0100 Subject: [PATCH 03/15] db/config: add maintenance mode flag --- db/config.cc | 1 + db/config.hh | 1 + 2 files changed, 2 insertions(+) diff --git a/db/config.cc b/db/config.cc index 13cc57f994..8cf3bc7c7a 100644 --- a/db/config.cc +++ b/db/config.cc @@ -763,6 +763,7 @@ db::config::config(std::shared_ptr exts) "\tworkdir the node will open the maintenance socket on the path /cql.m,\n" "\t where is a path defined by the workdir configuration option\n" "\t the node will open the maintenance socket on the path ") + , maintenance_mode(this, "maintenance_mode", value_status::Used, false, "If set to true, the node will not connect to other nodes. It will only serve requests to its local data.") , native_transport_port_ssl(this, "native_transport_port_ssl", value_status::Used, 9142, "Port on which the CQL TLS native transport listens for clients." "Enabling client encryption and keeping native_transport_port_ssl disabled will use encryption" diff --git a/db/config.hh b/db/config.hh index 2a1b438d10..9cc1746f9b 100644 --- a/db/config.hh +++ b/db/config.hh @@ -278,6 +278,7 @@ public: named_value start_native_transport; named_value native_transport_port; named_value maintenance_socket; + named_value maintenance_mode; named_value native_transport_port_ssl; named_value native_shard_aware_transport_port; named_value native_shard_aware_transport_port_ssl; From 8b2f0e38d9167d779d11f95f368e7cd69679043a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Mon, 22 Jan 2024 11:47:30 +0100 Subject: [PATCH 04/15] service/maintenance_mode: move maintenance_socket_enabled definition to seperate file --- auth/service.cc | 4 ++-- auth/service.hh | 7 ++++--- db/config.hh | 2 -- main.cc | 12 ++++++------ service/maintenance_mode.hh | 15 +++++++++++++++ test/lib/cql_test_env.cc | 2 +- transport/controller.cc | 2 +- transport/controller.hh | 5 +++-- transport/server.cc | 4 ++-- transport/server.hh | 5 +++-- 10 files changed, 37 insertions(+), 21 deletions(-) create mode 100644 service/maintenance_mode.hh diff --git a/auth/service.cc b/auth/service.cc index 42c1cc08ea..45791f63b8 100644 --- a/auth/service.cc +++ b/auth/service.cc @@ -128,7 +128,7 @@ service::service( std::unique_ptr z, std::unique_ptr a, std::unique_ptr r, - db::maintenance_socket_enabled used_by_maintenance_socket) + maintenance_socket_enabled used_by_maintenance_socket) : _loading_cache_config(std::move(c)) , _permissions_cache(nullptr) , _qp(qp) @@ -150,7 +150,7 @@ service::service( ::service::migration_notifier& mn, ::service::migration_manager& mm, const service_config& sc, - db::maintenance_socket_enabled used_by_maintenance_socket) + maintenance_socket_enabled used_by_maintenance_socket) : service( std::move(c), qp, diff --git a/auth/service.hh b/auth/service.hh index c1fd4b21cb..7a022b7f98 100644 --- a/auth/service.hh +++ b/auth/service.hh @@ -26,6 +26,7 @@ #include "utils/observable.hh" #include "utils/serialized_action.hh" #include "db/config.hh" +#include "service/maintenance_mode.hh" namespace cql3 { class query_processor; @@ -94,7 +95,7 @@ class service final : public seastar::peering_sharded_service { utils::observer _permissions_cache_update_interval_in_ms_observer; utils::observer _permissions_cache_validity_in_ms_observer; - db::maintenance_socket_enabled _used_by_maintenance_socket; + maintenance_socket_enabled _used_by_maintenance_socket; public: service( @@ -104,7 +105,7 @@ public: std::unique_ptr, std::unique_ptr, std::unique_ptr, - db::maintenance_socket_enabled); + maintenance_socket_enabled); /// /// This constructor is intended to be used when the class is sharded via \ref seastar::sharded. In that case, the @@ -117,7 +118,7 @@ public: ::service::migration_notifier&, ::service::migration_manager&, const service_config&, - db::maintenance_socket_enabled); + maintenance_socket_enabled); future<> start(::service::migration_manager&); diff --git a/db/config.hh b/db/config.hh index 9cc1746f9b..8682a52236 100644 --- a/db/config.hh +++ b/db/config.hh @@ -126,8 +126,6 @@ struct replication_strategy_restriction_t { constexpr unsigned default_murmur3_partitioner_ignore_msb_bits = 12; -using maintenance_socket_enabled = bool_class; - class config final : public utils::config_file { public: config(); diff --git a/main.cc b/main.cc index 5846890f89..d7850e8f86 100644 --- a/main.cc +++ b/main.cc @@ -1685,11 +1685,11 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl maintenance_auth_config.authenticator_java_name = sstring{auth::allow_all_authenticator_name}; maintenance_auth_config.role_manager_java_name = sstring{auth::maintenance_socket_role_manager_name}; - maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, db::maintenance_socket_enabled::yes).get(); + maintenance_auth_service.start(perm_cache_config, std::ref(qp), std::ref(mm_notifier), std::ref(mm), maintenance_auth_config, maintenance_socket_enabled::yes).get(); scheduling_group_key_config maintenance_cql_sg_stats_cfg = - make_scheduling_group_key_config(db::maintenance_socket_enabled::yes); - cql_transport::controller cql_maintenance_server_ctl(maintenance_auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, scheduling_group_key_create(maintenance_cql_sg_stats_cfg).get0(), db::maintenance_socket_enabled::yes); + make_scheduling_group_key_config(maintenance_socket_enabled::yes); + cql_transport::controller cql_maintenance_server_ctl(maintenance_auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, scheduling_group_key_create(maintenance_cql_sg_stats_cfg).get0(), maintenance_socket_enabled::yes); std::any stop_maintenance_auth_service; std::any stop_maintenance_cql; @@ -1778,7 +1778,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl auth_config.authenticator_java_name = qualified_authenticator_name; auth_config.role_manager_java_name = qualified_role_manager_name; - auth_service.start(std::move(perm_cache_config), std::ref(qp), std::ref(mm_notifier), std::ref(mm), auth_config, db::maintenance_socket_enabled::no).get(); + auth_service.start(std::move(perm_cache_config), std::ref(qp), std::ref(mm_notifier), std::ref(mm), auth_config, maintenance_socket_enabled::no).get(); std::any stop_auth_service; start_auth_service(auth_service, stop_auth_service, "auth service"); @@ -1877,8 +1877,8 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl notify_set.notify_all(configurable::system_state::started).get(); - scheduling_group_key_config cql_sg_stats_cfg = make_scheduling_group_key_config(db::maintenance_socket_enabled::no); - cql_transport::controller cql_server_ctl(auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, scheduling_group_key_create(cql_sg_stats_cfg).get0(), db::maintenance_socket_enabled::no); + scheduling_group_key_config cql_sg_stats_cfg = make_scheduling_group_key_config(maintenance_socket_enabled::no); + cql_transport::controller cql_server_ctl(auth_service, mm_notifier, gossiper, qp, service_memory_limiter, sl_controller, lifecycle_notifier, *cfg, scheduling_group_key_create(cql_sg_stats_cfg).get0(), maintenance_socket_enabled::no); ss.local().register_protocol_server(cql_server_ctl); diff --git a/service/maintenance_mode.hh b/service/maintenance_mode.hh new file mode 100644 index 0000000000..3ebeddafe0 --- /dev/null +++ b/service/maintenance_mode.hh @@ -0,0 +1,15 @@ +/* + * Copyright (C) 2024-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: AGPL-3.0-or-later + */ + +#pragma once + +#include + +using namespace seastar; + +using maintenance_socket_enabled = bool_class; diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index d7ec97f5de..d782ad7a59 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -925,7 +925,7 @@ private: auth_config.authenticator_java_name = qualified_authenticator_name; auth_config.role_manager_java_name = qualified_role_manager_name; - _auth_service.start(perm_cache_config, std::ref(_qp), std::ref(_mnotifier), std::ref(_mm), auth_config, db::maintenance_socket_enabled::no).get(); + _auth_service.start(perm_cache_config, std::ref(_qp), std::ref(_mnotifier), std::ref(_mm), auth_config, maintenance_socket_enabled::no).get(); _auth_service.invoke_on_all([this] (auth::service& auth) { return auth.start(_mm.local()); }).get(); diff --git a/transport/controller.cc b/transport/controller.cc index 010b0bf7f9..9983617878 100644 --- a/transport/controller.cc +++ b/transport/controller.cc @@ -26,7 +26,7 @@ static logging::logger logger("cql_server_controller"); controller::controller(sharded& auth, sharded& mn, sharded& gossiper, sharded& qp, sharded& ml, sharded& sl_controller, sharded& elc_notif, - const db::config& cfg, scheduling_group_key cql_opcode_stats_key, db::maintenance_socket_enabled used_by_maintenance_socket) + const db::config& cfg, scheduling_group_key cql_opcode_stats_key, maintenance_socket_enabled used_by_maintenance_socket) : _ops_sem(1) , _auth_service(auth) , _mnotifier(mn) diff --git a/transport/controller.hh b/transport/controller.hh index 459e8d39ce..edd627592c 100644 --- a/transport/controller.hh +++ b/transport/controller.hh @@ -15,6 +15,7 @@ #include "db/config.hh" #include "protocol_server.hh" +#include "service/maintenance_mode.hh" using namespace seastar; @@ -58,13 +59,13 @@ class controller : public protocol_server { future<> subscribe_server(sharded& server); future<> unsubscribe_server(sharded& server); - db::maintenance_socket_enabled _used_by_maintenance_socket; + maintenance_socket_enabled _used_by_maintenance_socket; public: controller(sharded&, sharded&, sharded&, sharded&, sharded&, sharded&, sharded&, - const db::config& cfg, scheduling_group_key cql_opcode_stats_key, db::maintenance_socket_enabled used_by_maintenance_socket); + const db::config& cfg, scheduling_group_key cql_opcode_stats_key, maintenance_socket_enabled used_by_maintenance_socket); virtual sstring name() const override; virtual sstring protocol() const override; virtual sstring protocol_version() const override; diff --git a/transport/server.cc b/transport/server.cc index 89f2a27dd2..af989db8f3 100644 --- a/transport/server.cc +++ b/transport/server.cc @@ -188,7 +188,7 @@ event::event_type parse_event_type(const sstring& value) } } -cql_sg_stats::cql_sg_stats(db::maintenance_socket_enabled used_by_maintenance_socket) +cql_sg_stats::cql_sg_stats(maintenance_socket_enabled used_by_maintenance_socket) : _cql_requests_stats(static_cast(cql_binary_opcode::OPCODES_COUNT)) { if (used_by_maintenance_socket) { @@ -236,7 +236,7 @@ void cql_sg_stats::register_metrics() cql_server::cql_server(distributed& qp, auth::service& auth_service, service::memory_limiter& ml, cql_server_config config, const db::config& db_cfg, qos::service_level_controller& sl_controller, gms::gossiper& g, scheduling_group_key stats_key, - db::maintenance_socket_enabled used_by_maintenance_socket) + maintenance_socket_enabled used_by_maintenance_socket) : server("CQLServer", clogger) , _query_processor(qp) , _config(std::move(config)) diff --git a/transport/server.hh b/transport/server.hh index deab212e45..fc414bbb34 100644 --- a/transport/server.hh +++ b/transport/server.hh @@ -33,6 +33,7 @@ #include "exceptions/coordinator_result.hh" #include "db/operation_type.hh" #include "db/config.hh" +#include "service/maintenance_mode.hh" namespace cql3 { @@ -123,7 +124,7 @@ struct cql_sg_stats { uint64_t response_size = 0; }; - cql_sg_stats(db::maintenance_socket_enabled); + cql_sg_stats(maintenance_socket_enabled); request_kind_stats& get_cql_opcode_stats(cql_binary_opcode op) { return _cql_requests_stats[static_cast(op)]; } void register_metrics(); private: @@ -170,7 +171,7 @@ public: qos::service_level_controller& sl_controller, gms::gossiper& g, scheduling_group_key stats_key, - db::maintenance_socket_enabled used_by_maintenance_socket); + maintenance_socket_enabled used_by_maintenance_socket); public: using response = cql_transport::response; using result_with_foreign_response_ptr = exceptions::coordinator_result>>; From d4c22fc86ce081c39ba258da4017f280a18e7ab5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Fri, 12 Jan 2024 11:08:24 +0100 Subject: [PATCH 05/15] service/maintenance_mode: add maintenance_mode_enabled bool class --- service/maintenance_mode.hh | 1 + 1 file changed, 1 insertion(+) diff --git a/service/maintenance_mode.hh b/service/maintenance_mode.hh index 3ebeddafe0..6c713cec6e 100644 --- a/service/maintenance_mode.hh +++ b/service/maintenance_mode.hh @@ -13,3 +13,4 @@ using namespace seastar; using maintenance_socket_enabled = bool_class; +using maintenance_mode_enabled = bool_class; From c530756837f84527b039d1aac508aa89587adabc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Fri, 19 Jan 2024 16:41:38 +0100 Subject: [PATCH 06/15] storage_service: add MAINTENANCE option to mode enum join_cluster and start_maintenance_mode are incompatible. To make sure that only one is called when the node starts, add the MAINTENANCE option. start_maintenance_mode sets _operation_mode to MAINTENANCE. join_cluster sets _operation_mode to STARTING. set_mode will result in an internal error if: * it tries to set MAINTENANCE mode when the _operation_mode is other than NONE, i.e. start_maintenance_mode is called after join_cluster (or it is called during the drain, but it also shouldn't happen). * it tries to set STARTING mode when the mode is set to MAINTENANCE, i.e. join_cluster is called after start_maintenance_mode. --- api/storage_service.cc | 4 ++-- service/storage_service.cc | 15 +++++++++++++-- service/storage_service.hh | 3 ++- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/api/storage_service.cc b/api/storage_service.cc index 902380d285..628a65586d 100644 --- a/api/storage_service.cc +++ b/api/storage_service.cc @@ -1001,7 +1001,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded req) { return ss.local().get_operation_mode().then([&ss] (auto mode) { - bool is_initialized = mode >= service::storage_service::mode::STARTING; + bool is_initialized = mode >= service::storage_service::mode::STARTING && mode != service::storage_service::mode::MAINTENANCE; if (mode == service::storage_service::mode::NORMAL) { is_initialized = ss.local().gossiper().is_enabled(); } @@ -1015,7 +1015,7 @@ void set_storage_service(http_context& ctx, routes& r, sharded req) { return ss.local().get_operation_mode().then([] (auto mode) { - return make_ready_future(mode >= service::storage_service::mode::JOINING); + return make_ready_future(mode >= service::storage_service::mode::JOINING && mode != service::storage_service::mode::MAINTENANCE); }); }); diff --git a/service/storage_service.cc b/service/storage_service.cc index 73aef33be4..a2ecbad1bf 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -175,7 +175,8 @@ enum class node_external_status { DECOMMISSIONED = 5, DRAINING = 6, DRAINED = 7, - MOVING = 8 //deprecated + MOVING = 8, //deprecated + MAINTENANCE = 9 }; static node_external_status map_operation_mode(storage_service::mode m) { @@ -190,6 +191,7 @@ static node_external_status map_operation_mode(storage_service::mode m) { case storage_service::mode::DRAINING: return node_external_status::DRAINING; case storage_service::mode::DRAINED: return node_external_status::DRAINED; case storage_service::mode::MOVING: return node_external_status::MOVING; + case storage_service::mode::MAINTENANCE: return node_external_status::MAINTENANCE; } return node_external_status::UNKNOWN; } @@ -203,7 +205,7 @@ void storage_service::register_metrics() { namespace sm = seastar::metrics; _metrics.add_group("node", { sm::make_gauge("operation_mode", sm::description("The operation mode of the current node. UNKNOWN = 0, STARTING = 1, JOINING = 2, NORMAL = 3, " - "LEAVING = 4, DECOMMISSIONED = 5, DRAINING = 6, DRAINED = 7, MOVING = 8"), [this] { + "LEAVING = 4, DECOMMISSIONED = 5, DRAINING = 6, DRAINED = 7, MOVING = 8, MAINTENANCE = 9"), [this] { return static_cast>(map_operation_mode(_operation_mode)); }), }); @@ -2797,6 +2799,15 @@ future> storage_service::effective_ownership( } void storage_service::set_mode(mode m) { + if (m == mode::MAINTENANCE && _operation_mode != mode::NONE) { + // Prevent from calling `start_maintenance_mode` after `join_cluster`. + on_fatal_internal_error(slogger, format("Node should enter maintenance mode only from mode::NONE (current mode: {})", _operation_mode)); + } + if (m == mode::STARTING && _operation_mode == mode::MAINTENANCE) { + // Prevent from calling `join_cluster` after `start_maintenance_mode`. + on_fatal_internal_error(slogger, "Node in the maintenance mode cannot enter the starting mode"); + } + if (m != _operation_mode) { slogger.info("entering {} mode", m); _operation_mode = m; diff --git a/service/storage_service.hh b/service/storage_service.hh index e43ad4ad1a..e9b901982c 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -283,7 +283,7 @@ private: public: std::chrono::milliseconds get_ring_delay(); - enum class mode { NONE, STARTING, JOINING, BOOTSTRAP, NORMAL, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED }; + enum class mode { NONE, STARTING, JOINING, BOOTSTRAP, NORMAL, LEAVING, DECOMMISSIONED, MOVING, DRAINING, DRAINED, MAINTENANCE }; private: mode _operation_mode = mode::NONE; /* Used for tracking drain progress */ @@ -837,6 +837,7 @@ struct fmt::formatter : fmt::formatter Date: Fri, 22 Dec 2023 18:27:46 +0100 Subject: [PATCH 07/15] service/storage_service: add start_maintenance_mode() method In the maintenance mode, other nodes won't be available thus we disabled joining the token ring and the token metadata won't be populated with the local node's endpoint. When a CQL query is executed it checks the `token_metadata` structure and fails if it is empty. Add a method that initialises `token_metadata` with the local node as the only node in the token ring. --- service/storage_service.cc | 8 ++++++++ service/storage_service.hh | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/service/storage_service.cc b/service/storage_service.cc index a2ecbad1bf..fd8c78c08c 100644 --- a/service/storage_service.cc +++ b/service/storage_service.cc @@ -5874,6 +5874,14 @@ bool storage_service::is_repair_based_node_ops_enabled(streaming::stream_reason return global_enabled && enabled_set.contains(reason); } +future<> storage_service::start_maintenance_mode() { + set_mode(mode::MAINTENANCE); + + return mutate_token_metadata([this] (mutable_token_metadata_ptr token_metadata) -> future<> { + return token_metadata->update_normal_tokens({ dht::token{} }, get_token_metadata_ptr()->get_topology().my_host_id()); + }, acquire_merge_lock::yes); +} + node_ops_meta_data::node_ops_meta_data( node_ops_id ops_uuid, gms::inet_address coordinator, diff --git a/service/storage_service.hh b/service/storage_service.hh index e9b901982c..faed9212c2 100644 --- a/service/storage_service.hh +++ b/service/storage_service.hh @@ -785,6 +785,14 @@ public: future<> move_tablet(table_id, dht::token, locator::tablet_replica src, locator::tablet_replica dst); future<> set_tablet_balancing_enabled(bool); + // In the maintenance mode, other nodes won't be available thus we disabled joining + // the token ring and the token metadata won't be populated with the local node's endpoint. + // When a CQL query is executed it checks the `token_metadata` structure and fails if it is empty. + // + // This method initialises `token_metadata` with the local node as the only node in the token ring. + // It is incompatible with the `join_cluster` method. + future<> start_maintenance_mode(); + private: // Synchronizes the local node state (token_metadata, system.peers/system.local tables, // gossiper) to align it with the other raft topology nodes. From c08266cfe540aeb2631943f485fad91bf13f0a0e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Thu, 18 Jan 2024 17:35:10 +0100 Subject: [PATCH 08/15] raft_group0_client: disable group0 operations in the maintenance mode In maintenance mode, the node doesn't communicate with other nodes, so it doesn't start or apply group0 operations. Users can still try to start it, e.g. change the schema, and the node can't allow it. Init _upgrade_state with recovery in the maintenance mode. Throw an error if the group0 operation is started in maintenance mode. --- main.cc | 2 +- service/raft/raft_group0_client.cc | 12 +++++++++--- service/raft/raft_group0_client.hh | 5 ++++- test/lib/cql_test_env.cc | 2 +- 4 files changed, 15 insertions(+), 6 deletions(-) diff --git a/main.cc b/main.cc index d7850e8f86..f7210250f6 100644 --- a/main.cc +++ b/main.cc @@ -1359,7 +1359,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl // group0 client exists only on shard 0. // The client has to be created before `stop_raft` since during // destruction it has to exist until raft_gr.stop() completes. - service::raft_group0_client group0_client{raft_gr.local(), sys_ks.local()}; + service::raft_group0_client group0_client{raft_gr.local(), sys_ks.local(), maintenance_mode_enabled{cfg->maintenance_mode()}}; service::raft_group0 group0_service{ stop_signal.as_local_abort_source(), raft_gr.local(), messaging, diff --git a/service/raft/raft_group0_client.cc b/service/raft/raft_group0_client.cc index c71243e7d8..e715854b12 100644 --- a/service/raft/raft_group0_client.cc +++ b/service/raft/raft_group0_client.cc @@ -240,6 +240,10 @@ future raft_group0_client::start_operation(seastar::abort_source* on_internal_error(logger, "start_group0_operation: must run on shard 0"); } + if (_maintenance_mode) { + throw exceptions::configuration_exception{"cannot start group0 operation in the maintenance mode"}; + } + auto [upgrade_lock_holder, upgrade_state] = co_await get_group0_upgrade_state(); switch (upgrade_state) { case group0_upgrade_state::use_post_raft_procedures: { @@ -329,8 +333,8 @@ group0_command raft_group0_client::prepare_command(Command change, std::string_v return group0_cmd; } -raft_group0_client::raft_group0_client(service::raft_group_registry& raft_gr, db::system_keyspace& sys_ks) - : _raft_gr(raft_gr), _sys_ks(sys_ks) { +raft_group0_client::raft_group0_client(service::raft_group_registry& raft_gr, db::system_keyspace& sys_ks, maintenance_mode_enabled maintenance_mode) + : _raft_gr(raft_gr), _sys_ks(sys_ks), _maintenance_mode(maintenance_mode) { } future<> raft_group0_client::init() { @@ -355,7 +359,9 @@ future<> raft_group0_client::init() { return service::group0_upgrade_state::recovery; }; - _upgrade_state = value(co_await _sys_ks.load_group0_upgrade_state()); + _upgrade_state = _maintenance_mode + ? group0_upgrade_state::recovery + : value(co_await _sys_ks.load_group0_upgrade_state()); if (_upgrade_state == group0_upgrade_state::recovery) { logger.warn("RECOVERY mode."); } diff --git a/service/raft/raft_group0_client.hh b/service/raft/raft_group0_client.hh index c1659e9478..98c7d9474e 100644 --- a/service/raft/raft_group0_client.hh +++ b/service/raft/raft_group0_client.hh @@ -24,6 +24,7 @@ #include "gc_clock.hh" #include "service/raft/group0_state_machine.hh" #include "db/system_keyspace.hh" +#include "service/maintenance_mode.hh" namespace service { // Obtaining this object means that all previously finished operations on group 0 are visible on this node. @@ -83,6 +84,8 @@ class raft_group0_client { std::unordered_map> _results; + maintenance_mode_enabled _maintenance_mode; + // Guard manages the result of a single query. If it is created for a particular query, // then `group0_state_machine` will save the result of that query and it can be returned by the guard. // Guard manages the lifetime of the _results entry. It creates and destroys the entry, which state machine puts the result in. @@ -99,7 +102,7 @@ class raft_group0_client { }; public: - raft_group0_client(service::raft_group_registry&, db::system_keyspace&); + raft_group0_client(service::raft_group_registry&, db::system_keyspace&, maintenance_mode_enabled); // Call after `system_keyspace` is initialized. future<> init(); diff --git a/test/lib/cql_test_env.cc b/test/lib/cql_test_env.cc index d782ad7a59..b7f0747185 100644 --- a/test/lib/cql_test_env.cc +++ b/test/lib/cql_test_env.cc @@ -741,7 +741,7 @@ private: auto stop_forward_service = defer([this] { _forward_service.stop().get(); }); // gropu0 client exists only on shard 0 - service::raft_group0_client group0_client(_group0_registry.local(), _sys_ks.local()); + service::raft_group0_client group0_client(_group0_registry.local(), _sys_ks.local(), maintenance_mode_enabled::no); _mm.start(std::ref(_mnotifier), std::ref(_feature_service), std::ref(_ms), std::ref(_proxy), std::ref(_gossiper), std::ref(group0_client), std::ref(_sys_ks)).get(); auto stop_mm = defer([this] { _mm.stop().get(); }); From d8de209dcfe2ef370e700bb698cf5308cce0b0fe Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Fri, 29 Sep 2023 12:35:52 +0200 Subject: [PATCH 09/15] message_service: add sanity check that rpc connections are not created in the maintenance mode In maintenance mode, a node shouldn't be able to communicate with other nodes. To make sure this does not happen, the sanity check is added. --- main.cc | 1 + message/messaging_service.cc | 3 +++ message/messaging_service.hh | 3 +++ 3 files changed, 7 insertions(+) diff --git a/main.cc b/main.cc index f7210250f6..b095db79c5 100644 --- a/main.cc +++ b/main.cc @@ -1246,6 +1246,7 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl if (snitch.local()->prefer_local()) { mscfg.preferred_ips = sys_ks.local().get_preferred_ips().get0(); } + mscfg.maintenance_mode = maintenance_mode_enabled{cfg->maintenance_mode()}; const auto& seo = cfg->server_encryption_options(); auto encrypt = utils::get_or_default(seo, "internode_encryption", "none"); diff --git a/message/messaging_service.cc b/message/messaging_service.cc index d69e7a0931..732f4842df 100644 --- a/message/messaging_service.cc +++ b/message/messaging_service.cc @@ -802,6 +802,9 @@ gms::inet_address messaging_service::get_public_endpoint_for(const gms::inet_add shared_ptr messaging_service::get_rpc_client(messaging_verb verb, msg_addr id) { assert(!_shutting_down); + if (_cfg.maintenance_mode) { + on_internal_error(mlogger, "This node is in maintenance mode, it shouldn't contact other nodes"); + } auto idx = get_rpc_client_idx(verb); auto it = _clients[idx].find(id); diff --git a/message/messaging_service.hh b/message/messaging_service.hh index 98c38ebea5..4fa49b5092 100644 --- a/message/messaging_service.hh +++ b/message/messaging_service.hh @@ -8,6 +8,7 @@ #pragma once +#include "db/config.hh" #include "messaging_service_fwd.hh" #include "msg_addr.hh" #include @@ -22,6 +23,7 @@ #include "streaming/stream_fwd.hh" #include "locator/host_id.hh" #include "service/session.hh" +#include "service/maintenance_mode.hh" #include #include @@ -281,6 +283,7 @@ public: bool listen_on_broadcast_address = false; size_t rpc_memory_limit = 1'000'000; std::unordered_map preferred_ips; + maintenance_mode_enabled maintenance_mode = maintenance_mode_enabled::no; }; struct scheduling_config { From 617adde9c91025dac01ff1eeafd6e3afe4bd0c3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Thu, 18 Jan 2024 22:05:59 +0100 Subject: [PATCH 10/15] main: move some REST routes initialization before joining group0 Move REST endpoints that don't need connection with other nodes, before joining the group0. This way, they can be initialized in the maintenance mode. Move `snapshot_ctl` along with routes because of snapshots API and tasks API. Its constructor is a noop, so it is safe to move it. --- main.cc | 35 ++++++++++++++++++----------------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/main.cc b/main.cc index b095db79c5..0913f00c08 100644 --- a/main.cc +++ b/main.cc @@ -1700,6 +1700,24 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl start_cql(cql_maintenance_server_ctl, stop_maintenance_cql, "maintenance native server"); } + snapshot_ctl.start(std::ref(db)).get(); + auto stop_snapshot_ctl = defer_verbose_shutdown("snapshots", [&snapshot_ctl] { + snapshot_ctl.stop().get(); + }); + + api::set_server_snapshot(ctx, snapshot_ctl).get(); + auto stop_api_snapshots = defer_verbose_shutdown("snapshots API", [&ctx] { + api::unset_server_snapshot(ctx).get(); + }); + + api::set_server_tasks_compaction_module(ctx, ss, snapshot_ctl).get(); + auto stop_tasks_api = defer_verbose_shutdown("tasks API", [&ctx] { + api::unset_server_tasks_compaction_module(ctx).get(); + }); + + //FIXME: discarded future + (void)api::set_server_cache(ctx); + sys_dist_ks.start(std::ref(qp), std::ref(mm), std::ref(proxy)).get(); auto stop_sdks = defer_verbose_shutdown("system distributed keyspace", [] { sys_dist_ks.invoke_on_all(&db::system_distributed_keyspace::stop).get(); @@ -1789,21 +1807,6 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl api::unset_server_authorization_cache(ctx).get(); }); - snapshot_ctl.start(std::ref(db)).get(); - auto stop_snapshot_ctl = defer_verbose_shutdown("snapshots", [&snapshot_ctl] { - snapshot_ctl.stop().get(); - }); - - api::set_server_snapshot(ctx, snapshot_ctl).get(); - auto stop_api_snapshots = defer_verbose_shutdown("snapshots API", [&ctx] { - api::unset_server_snapshot(ctx).get(); - }); - - api::set_server_tasks_compaction_module(ctx, ss, snapshot_ctl).get(); - auto stop_tasks_api = defer_verbose_shutdown("tasks API", [&ctx] { - api::unset_server_tasks_compaction_module(ctx).get(); - }); - supervisor::notify("starting batchlog manager"); db::batchlog_manager_config bm_cfg; bm_cfg.write_request_timeout = cfg->write_request_timeout_in_ms() * 1ms; @@ -1838,8 +1841,6 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl view_backlog_broker.stop().get(); }); - //FIXME: discarded future - (void)api::set_server_cache(ctx); startlog.info("Waiting for gossip to settle before accepting client requests..."); gossiper.local().wait_for_gossip_to_settle().get(); api::set_server_gossip_settle(ctx, gossiper).get(); From 0bdbd6e8f52ea065761933ad2585266dc94ae793 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Fri, 22 Dec 2023 18:33:44 +0100 Subject: [PATCH 11/15] main: add maintenance mode In maintenance mode: * Group0 doesn't start and the node doesn't join the token ring to behave as a dead node to others, * Group0 operations are disabled and result in an error, * Only the maintenance socket listens for CQL requests, * The storage service initialises token_metadata with the local node as the only node on the token ring. Maintenance mode is enabled by passing the --maintenance-mode flag. Maintenance mode starts before the group0 is initialised. --- main.cc | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/main.cc b/main.cc index 0913f00c08..a0b129c6dd 100644 --- a/main.cc +++ b/main.cc @@ -1718,6 +1718,28 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl //FIXME: discarded future (void)api::set_server_cache(ctx); + if (cfg->maintenance_mode()) { + startlog.info("entering maintenance mode."); + + ss.local().start_maintenance_mode().get(); + + seastar::set_abort_on_ebadf(cfg->abort_on_ebadf()); + api::set_server_done(ctx).get(); + { + auto do_drain = defer_verbose_shutdown("local storage", [&ss] { + // Flush all memtables and stop ongoing compactions + ss.local().drain_on_shutdown().get(); + }); + + startlog.info("Scylla version {} initialization completed (maintenance mode).", scylla_version()); + stop_signal.wait().get(); + startlog.info("Signal received; shutting down"); + } + startlog.info("Scylla version {} shutdown complete.", scylla_version()); + _exit(0); + return 0; + } + sys_dist_ks.start(std::ref(qp), std::ref(mm), std::ref(proxy)).get(); auto stop_sdks = defer_verbose_shutdown("system distributed keyspace", [] { sys_dist_ks.invoke_on_all(&db::system_distributed_keyspace::stop).get(); From 9c07a189e84003e9916ce6d46492285bd6b1bed4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Fri, 22 Dec 2023 14:09:15 +0100 Subject: [PATCH 12/15] docs: add maintenance mode documentation --- docs/operating-scylla/_common/tools_index.rst | 1 + docs/operating-scylla/admin-tools/index.rst | 1 + .../admin-tools/maintenance-mode.rst | 23 +++++++++++++++++++ 3 files changed, 25 insertions(+) create mode 100644 docs/operating-scylla/admin-tools/maintenance-mode.rst diff --git a/docs/operating-scylla/_common/tools_index.rst b/docs/operating-scylla/_common/tools_index.rst index 71f01b880f..1d78c15490 100644 --- a/docs/operating-scylla/_common/tools_index.rst +++ b/docs/operating-scylla/_common/tools_index.rst @@ -16,6 +16,7 @@ * :doc:`perftune` - performance configuration. * :doc:`SELECT * FROM MUTATION_FRAGMENTS() Statement ` - dump the underlying mutation data from tables. * :doc:`Maintenance socket ` - a Unix domain socket for full-permission CQL connection. +* :doc:`Maintenance mode ` - a mode for performing maintenance tasks on an offline Scylla node. Run each tool with ``-h``, ``--help`` for full options description. diff --git a/docs/operating-scylla/admin-tools/index.rst b/docs/operating-scylla/admin-tools/index.rst index e05a30bac0..ef8b5c5688 100644 --- a/docs/operating-scylla/admin-tools/index.rst +++ b/docs/operating-scylla/admin-tools/index.rst @@ -21,6 +21,7 @@ Admin Tools Virtual Tables SELECT * FROM MUTATION_FRAGMENTS() Statement Maintenance socket + Maintenance mode .. panel-box:: :title: Admin Tools diff --git a/docs/operating-scylla/admin-tools/maintenance-mode.rst b/docs/operating-scylla/admin-tools/maintenance-mode.rst new file mode 100644 index 0000000000..44b7b66f8e --- /dev/null +++ b/docs/operating-scylla/admin-tools/maintenance-mode.rst @@ -0,0 +1,23 @@ +Maintenance mode +================ + +In this mode, the node is not reachable from the outside, i.e. + + * it refuses all incoming RPC connections, + * it does not join the cluster, thus + + * all operations that need synchronizing with other nodes are disabled (e.g. schema changes), + * all cluster-wide operations are disabled for this node (e.g. repair), + * other nodes see this node as dead, + * cannot read or write data from/to other nodes, + * it does not open Alternator and Redis transport ports and the TCP CQL port. + +The only way to make CQL queries is to use :doc:`the maintenance socket `. The node serves only local data. + +To start the node in maintenance mode, use the `--maintenance-mode true` flag or set `maintenance_mode: true` in the configuration file. + +REST API works as usual, but some routes are disabled: + + * authorization_cache + * failure_detector + * hinted_hand_off_manager From 77a656bfd65eb2bda5ce6c7f6c9a20fab6b62414 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Thu, 18 Jan 2024 17:38:11 +0100 Subject: [PATCH 13/15] test.py: when connecting to node in maintenance mode use maintenance socket A node in the maintenance socket hasn't an opened regular CQL port. To connect to the node, the scylla cluster needs to use the node's maintenance socket. --- test/pylib/scylla_cluster.py | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/test/pylib/scylla_cluster.py b/test/pylib/scylla_cluster.py index 7144df3178..fdd92755a1 100644 --- a/test/pylib/scylla_cluster.py +++ b/test/pylib/scylla_cluster.py @@ -41,6 +41,7 @@ from cassandra.cluster import Session # pylint: disable=no-name-in-mod from cassandra.cluster import ExecutionProfile # pylint: disable=no-name-in-module from cassandra.cluster import EXEC_PROFILE_DEFAULT # pylint: disable=no-name-in-module from cassandra.policies import WhiteListRoundRobinPolicy # type: ignore +from cassandra.connection import UnixSocketEndPoint class ReplaceConfig(NamedTuple): @@ -355,6 +356,19 @@ class ScyllaServer: except Exception as exc: # pylint: disable=broad-except return f"Exception when reading server log {self.log_filename}: {exc}" + def in_maintenance_mode(self) -> bool: + """Return True if the server is in maintenance mode""" + return self.config.get("maintenance_mode", False) + + def maintenance_socket(self) -> Optional[str]: + """Return the maintenance socket path""" + maintenance_socket_option = self.config["maintenance_socket"] + if maintenance_socket_option == "workdir": + return (self.workdir / "cql.m").absolute().as_posix() + elif maintenance_socket_option == "ignore": + return None + return maintenance_socket_option + async def cql_is_up(self) -> CqlUpState: """Test that CQL is serving (a check we use at start up).""" caslog = logging.getLogger('cassandra') @@ -368,8 +382,19 @@ class ScyllaServer: # words, even after CQL port is up, Scylla may still be # initializing. When the role is ready, queries begin to # work, so rely on this "side effect". - profile = ExecutionProfile(load_balancing_policy=WhiteListRoundRobinPolicy([self.rpc_address]), - request_timeout=self.TOPOLOGY_TIMEOUT) + in_maintenance_mode = self.in_maintenance_mode() + + if in_maintenance_mode: + maintenance_socket = self.maintenance_socket() + if maintenance_socket is None: + raise RuntimeError("Can't check CQL in maintenance mode without a maintenance socket") + profile = ExecutionProfile(load_balancing_policy=WhiteListRoundRobinPolicy([UnixSocketEndPoint(maintenance_socket)]), + request_timeout=self.TOPOLOGY_TIMEOUT) + contact_points = [UnixSocketEndPoint(maintenance_socket)] + else: + profile = ExecutionProfile(load_balancing_policy=WhiteListRoundRobinPolicy([self.rpc_address]), + request_timeout=self.TOPOLOGY_TIMEOUT) + contact_points=[self.rpc_address] connected = False try: # In a cluster setup, it's possible that the CQL @@ -377,7 +402,7 @@ class ScyllaServer: # point, so make sure we execute the checks strictly via # this connection with Cluster(execution_profiles={EXEC_PROFILE_DEFAULT: profile}, - contact_points=[self.rpc_address], + contact_points=contact_points, # This is the latest version Scylla supports protocol_version=4, control_connection_timeout=self.TOPOLOGY_TIMEOUT, @@ -389,7 +414,7 @@ class ScyllaServer: session.execute("SELECT key FROM system.local where key = 'local'") self.control_cluster = Cluster(execution_profiles= {EXEC_PROFILE_DEFAULT: profile}, - contact_points=[self.rpc_address], + contact_points=contact_points, control_connection_timeout=self.TOPOLOGY_TIMEOUT, auth_provider=auth) self.control_connection = self.control_cluster.connect() From ca35e352f53e105a778b36e0b3cc19766a34677b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Fri, 19 Jan 2024 17:48:04 +0100 Subject: [PATCH 14/15] test.py: generalize usage of cluster_con Add option to pass load_balancing policy. Change hosts type to list of IPs or cassandra.Endpoint. --- test/topology/conftest.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/test/topology/conftest.py b/test/topology/conftest.py index 1570e71387..5959a2d99b 100644 --- a/test/topology/conftest.py +++ b/test/topology/conftest.py @@ -24,6 +24,7 @@ from cassandra.policies import TokenAwarePolicy # type: from cassandra.policies import WhiteListRoundRobinPolicy # type: ignore from cassandra.connection import DRIVER_NAME # type: ignore # pylint: disable=no-name-in-module from cassandra.connection import DRIVER_VERSION # type: ignore # pylint: disable=no-name-in-module +from cassandra.connection import EndPoint # type: ignore # pylint: disable=no-name-in-module Session.run_async = run_async # patch Session for convenience @@ -84,12 +85,12 @@ class CustomConnection(Cluster.connection_class): # cluster_con helper: set up client object for communicating with the CQL API. -def cluster_con(hosts: List[IPAddress], port: int, use_ssl: bool, auth_provider=None): +def cluster_con(hosts: List[IPAddress | EndPoint], port: int, use_ssl: bool, auth_provider=None, load_balancing_policy=RoundRobinPolicy()): """Create a CQL Cluster connection object according to configuration. It does not .connect() yet.""" assert len(hosts) > 0, "python driver connection needs at least one host to connect to" profile = ExecutionProfile( - load_balancing_policy=RoundRobinPolicy(), + load_balancing_policy=load_balancing_policy, consistency_level=ConsistencyLevel.LOCAL_QUORUM, serial_consistency_level=ConsistencyLevel.LOCAL_SERIAL, # The default timeouts should have been more than enough, but in some From 763911af5b5f735f842169b630e144b1fe6c0243 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miko=C5=82aj=20Grzebieluch?= Date: Thu, 18 Jan 2024 17:43:08 +0100 Subject: [PATCH 15/15] test.py: add test for maintenance mode The test checks that in maintenance mode server A is not available for other nodes and for clients. It is possible to connect by the maintenance socket to server A and perform local CQL operations. --- test/topology_custom/test_maintenance_mode.py | 95 +++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 test/topology_custom/test_maintenance_mode.py diff --git a/test/topology_custom/test_maintenance_mode.py b/test/topology_custom/test_maintenance_mode.py new file mode 100644 index 0000000000..32a06e5bc8 --- /dev/null +++ b/test/topology_custom/test_maintenance_mode.py @@ -0,0 +1,95 @@ +# +# Copyright (C) 2024-present ScyllaDB +# +# SPDX-License-Identifier: AGPL-3.0-or-later +# + +from cassandra.protocol import ConfigurationException +from cassandra.connection import UnixSocketEndPoint +from cassandra.policies import WhiteListRoundRobinPolicy +from test.pylib.manager_client import ManagerClient +from test.topology.conftest import cluster_con + +import pytest +import logging +import socket + +logger = logging.getLogger(__name__) + +@pytest.mark.asyncio +async def test_maintenance_mode(manager: ManagerClient): + """ + The test checks that in maintenance mode server A is not available for other nodes and for clients. + It is possible to connect by the maintenance socket to server A and perform local CQL operations. + """ + + server_a, server_b = await manager.server_add(), await manager.server_add() + workdir = await manager.server_get_workdir(server_a.server_id) + socket_endpoint = UnixSocketEndPoint(workdir + "/cql.m") + + cluster = cluster_con([server_b.ip_addr], 9042, False) + cql = cluster.connect() + + await cql.run_async("CREATE KEYSPACE ks WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': 1}") + await cql.run_async("CREATE TABLE ks.t (k int PRIMARY KEY, v int)") + + # Token ranges of the server A + # [(start_token, end_token)] + ranges = [(int(row[0]), int(row[1])) for row in await cql.run_async(f"""SELECT start_token, end_token, endpoint + FROM system.token_ring WHERE keyspace_name = 'ks' + AND endpoint = '{server_a.ip_addr}' ALLOW FILTERING""")] + + # Insert data to the cluster and find a key that is stored on server A. + for i in range(256): + await cql.run_async(f"INSERT INTO ks.t (k, v) VALUES ({i}, {i})") + + # [(key, token of this key)] + keys_with_tokens = [(int(row[0]), int(row[1])) for row in await cql.run_async("SELECT k, token(k) FROM ks.t")] + key_on_server_a = None + + for key, token in keys_with_tokens: + for start, end in ranges: + if (start < end and start < token <= end) or (start >= end and (token <= end or start < token)): + key_on_server_a = key + + if key_on_server_a is None: + # There is only a chance ~(1/2)^256 that all keys are stored on the server B + # In this case we skip the test + pytest.skip("All keys are stored on the server B") + + # Start server A in maintenance mode + await manager.server_stop_gracefully(server_a.server_id) + await manager.server_update_config(server_a.server_id, "maintenance_mode", "true") + await manager.server_start(server_a.server_id) + + # Check that the regular CQL port is not available + assert socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect_ex((server_a.ip_addr, 9042)) != 0 + + maintenance_cluster = cluster_con([socket_endpoint], 9042, False, + load_balancing_policy=WhiteListRoundRobinPolicy([socket_endpoint])) + maintenance_cql = maintenance_cluster.connect() + + # Check that local data is available in maintenance mode + res = await maintenance_cql.run_async(f"SELECT v FROM ks.t WHERE k = {key_on_server_a}") + assert res[0][0] == key_on_server_a + + # Check that group0 operations are disabled + with pytest.raises(ConfigurationException): + await maintenance_cql.run_async(f"CREATE TABLE ks.t2 (k int PRIMARY KEY, v int)") + + await maintenance_cql.run_async(f"UPDATE ks.t SET v = {key_on_server_a + 1} WHERE k = {key_on_server_a}") + + # Ensure that server B recognizes server A as being shutdown, not as being alive. + res = await cql.run_async(f"SELECT status FROM system.cluster_status WHERE peer = '{server_a.ip_addr}'") + assert res[0][0] == "shutdown" + + await manager.server_stop_gracefully(server_a.server_id) + + # Restart in normal mode to see if the changes made in maintenance mode are persisted + await manager.server_update_config(server_a.server_id, "maintenance_mode", "false") + await manager.server_start(server_a.server_id, wait_others=1) + + res = await cql.run_async(f"SELECT v FROM ks.t WHERE k = {key_on_server_a}") + assert res[0][0] == key_on_server_a + 1 + +