Merge 'Add maintenance mode' from Mikołaj Grzebieluch

In this mode, the node is not reachable from the outside, i.e.
* it refuses all incoming RPC connections,
* it does not join the cluster, thus
  * all group0 operations are disabled (e.g. schema changes),
  * all cluster-wide operations are disabled for this node (e.g. repair),
  * other nodes see this node as dead,
  * cannot read or write data from/to other nodes,
* it does not open Alternator and Redis transport ports and the TCP CQL port.

The only way to make CQL queries is to use the maintenance socket. The node serves only local data.

To start the node in maintenance mode, use the `--maintenance-mode true` flag or set `maintenance_mode: true` in the configuration file.

REST API works as usual, but some routes are disabled:
* authorization_cache
* failure_detector
* hinted_hand_off_manager

This PR also updates the maintenance socket documentation:
* add cqlsh usage to the documentation
* update the documentation to use `WhiteListRoundRobinPolicy`

Fixes #5489.

Closes scylladb/scylladb#15346

* github.com:scylladb/scylladb:
  test.py: add test for maintenance mode
  test.py: generalize usage of cluster_con
  test.py: when connecting to node in maintenance mode use maintenance socket
  docs: add maintenance mode documentation
  main: add maintenance mode
  main: move some REST routes initialization before joining group0
  message_service: add sanity check that rpc connections are not created in the maintenance mode
  raft_group0_client: disable group0 operations in the maintenance mode
  service/storage_service: add start_maintenance_mode() method
  storage_service: add MAINTENANCE option to mode enum
  service/maintenance_mode: add maintenance_mode_enabled bool class
  service/maintenance_mode: move maintenance_socket_enabled definition to seperate file
  db/config: add maintenance mode flag
  docs: add cqlsh usage to maintenance socket documentation
  docs: update maintenance socket documentation to use WhiteListRoundRobinPolicy
This commit is contained in:
Kamil Braun
2024-01-26 11:02:34 +01:00
25 changed files with 298 additions and 59 deletions

View File

@@ -741,7 +741,7 @@ private:
auto stop_forward_service = defer([this] { _forward_service.stop().get(); });
// gropu0 client exists only on shard 0
service::raft_group0_client group0_client(_group0_registry.local(), _sys_ks.local());
service::raft_group0_client group0_client(_group0_registry.local(), _sys_ks.local(), maintenance_mode_enabled::no);
_mm.start(std::ref(_mnotifier), std::ref(_feature_service), std::ref(_ms), std::ref(_proxy), std::ref(_gossiper), std::ref(group0_client), std::ref(_sys_ks)).get();
auto stop_mm = defer([this] { _mm.stop().get(); });
@@ -925,7 +925,7 @@ private:
auth_config.authenticator_java_name = qualified_authenticator_name;
auth_config.role_manager_java_name = qualified_role_manager_name;
_auth_service.start(perm_cache_config, std::ref(_qp), std::ref(_mnotifier), std::ref(_mm), auth_config, db::maintenance_socket_enabled::no).get();
_auth_service.start(perm_cache_config, std::ref(_qp), std::ref(_mnotifier), std::ref(_mm), auth_config, maintenance_socket_enabled::no).get();
_auth_service.invoke_on_all([this] (auth::service& auth) {
return auth.start(_mm.local());
}).get();

View File

@@ -42,6 +42,7 @@ from cassandra.cluster import Session # pylint: disable=no-name-in-mod
from cassandra.cluster import ExecutionProfile # pylint: disable=no-name-in-module
from cassandra.cluster import EXEC_PROFILE_DEFAULT # pylint: disable=no-name-in-module
from cassandra.policies import WhiteListRoundRobinPolicy # type: ignore
from cassandra.connection import UnixSocketEndPoint
class ReplaceConfig(NamedTuple):
@@ -362,6 +363,19 @@ class ScyllaServer:
except Exception as exc: # pylint: disable=broad-except
return f"Exception when reading server log {self.log_filename}: {exc}"
def in_maintenance_mode(self) -> bool:
"""Return True if the server is in maintenance mode"""
return self.config.get("maintenance_mode", False)
def maintenance_socket(self) -> Optional[str]:
"""Return the maintenance socket path"""
maintenance_socket_option = self.config["maintenance_socket"]
if maintenance_socket_option == "workdir":
return (self.workdir / "cql.m").absolute().as_posix()
elif maintenance_socket_option == "ignore":
return None
return maintenance_socket_option
async def cql_is_up(self) -> CqlUpState:
"""Test that CQL is serving (a check we use at start up)."""
caslog = logging.getLogger('cassandra')
@@ -375,8 +389,19 @@ class ScyllaServer:
# words, even after CQL port is up, Scylla may still be
# initializing. When the role is ready, queries begin to
# work, so rely on this "side effect".
profile = ExecutionProfile(load_balancing_policy=WhiteListRoundRobinPolicy([self.rpc_address]),
request_timeout=self.TOPOLOGY_TIMEOUT)
in_maintenance_mode = self.in_maintenance_mode()
if in_maintenance_mode:
maintenance_socket = self.maintenance_socket()
if maintenance_socket is None:
raise RuntimeError("Can't check CQL in maintenance mode without a maintenance socket")
profile = ExecutionProfile(load_balancing_policy=WhiteListRoundRobinPolicy([UnixSocketEndPoint(maintenance_socket)]),
request_timeout=self.TOPOLOGY_TIMEOUT)
contact_points = [UnixSocketEndPoint(maintenance_socket)]
else:
profile = ExecutionProfile(load_balancing_policy=WhiteListRoundRobinPolicy([self.rpc_address]),
request_timeout=self.TOPOLOGY_TIMEOUT)
contact_points=[self.rpc_address]
connected = False
try:
# In a cluster setup, it's possible that the CQL
@@ -384,7 +409,7 @@ class ScyllaServer:
# point, so make sure we execute the checks strictly via
# this connection
with Cluster(execution_profiles={EXEC_PROFILE_DEFAULT: profile},
contact_points=[self.rpc_address],
contact_points=contact_points,
# This is the latest version Scylla supports
protocol_version=4,
control_connection_timeout=self.TOPOLOGY_TIMEOUT,
@@ -396,7 +421,7 @@ class ScyllaServer:
session.execute("SELECT key FROM system.local where key = 'local'")
self.control_cluster = Cluster(execution_profiles=
{EXEC_PROFILE_DEFAULT: profile},
contact_points=[self.rpc_address],
contact_points=contact_points,
control_connection_timeout=self.TOPOLOGY_TIMEOUT,
auth_provider=auth)
self.control_connection = self.control_cluster.connect()

View File

@@ -24,6 +24,7 @@ from cassandra.policies import TokenAwarePolicy # type:
from cassandra.policies import WhiteListRoundRobinPolicy # type: ignore
from cassandra.connection import DRIVER_NAME # type: ignore # pylint: disable=no-name-in-module
from cassandra.connection import DRIVER_VERSION # type: ignore # pylint: disable=no-name-in-module
from cassandra.connection import EndPoint # type: ignore # pylint: disable=no-name-in-module
Session.run_async = run_async # patch Session for convenience
@@ -84,12 +85,12 @@ class CustomConnection(Cluster.connection_class):
# cluster_con helper: set up client object for communicating with the CQL API.
def cluster_con(hosts: List[IPAddress], port: int, use_ssl: bool, auth_provider=None):
def cluster_con(hosts: List[IPAddress | EndPoint], port: int, use_ssl: bool, auth_provider=None, load_balancing_policy=RoundRobinPolicy()):
"""Create a CQL Cluster connection object according to configuration.
It does not .connect() yet."""
assert len(hosts) > 0, "python driver connection needs at least one host to connect to"
profile = ExecutionProfile(
load_balancing_policy=RoundRobinPolicy(),
load_balancing_policy=load_balancing_policy,
consistency_level=ConsistencyLevel.LOCAL_QUORUM,
serial_consistency_level=ConsistencyLevel.LOCAL_SERIAL,
# The default timeouts should have been more than enough, but in some

View File

@@ -0,0 +1,95 @@
#
# Copyright (C) 2024-present ScyllaDB
#
# SPDX-License-Identifier: AGPL-3.0-or-later
#
from cassandra.protocol import ConfigurationException
from cassandra.connection import UnixSocketEndPoint
from cassandra.policies import WhiteListRoundRobinPolicy
from test.pylib.manager_client import ManagerClient
from test.topology.conftest import cluster_con
import pytest
import logging
import socket
logger = logging.getLogger(__name__)
@pytest.mark.asyncio
async def test_maintenance_mode(manager: ManagerClient):
"""
The test checks that in maintenance mode server A is not available for other nodes and for clients.
It is possible to connect by the maintenance socket to server A and perform local CQL operations.
"""
server_a, server_b = await manager.server_add(), await manager.server_add()
workdir = await manager.server_get_workdir(server_a.server_id)
socket_endpoint = UnixSocketEndPoint(workdir + "/cql.m")
cluster = cluster_con([server_b.ip_addr], 9042, False)
cql = cluster.connect()
await cql.run_async("CREATE KEYSPACE ks WITH REPLICATION = {'class': 'SimpleStrategy', 'replication_factor': 1}")
await cql.run_async("CREATE TABLE ks.t (k int PRIMARY KEY, v int)")
# Token ranges of the server A
# [(start_token, end_token)]
ranges = [(int(row[0]), int(row[1])) for row in await cql.run_async(f"""SELECT start_token, end_token, endpoint
FROM system.token_ring WHERE keyspace_name = 'ks'
AND endpoint = '{server_a.ip_addr}' ALLOW FILTERING""")]
# Insert data to the cluster and find a key that is stored on server A.
for i in range(256):
await cql.run_async(f"INSERT INTO ks.t (k, v) VALUES ({i}, {i})")
# [(key, token of this key)]
keys_with_tokens = [(int(row[0]), int(row[1])) for row in await cql.run_async("SELECT k, token(k) FROM ks.t")]
key_on_server_a = None
for key, token in keys_with_tokens:
for start, end in ranges:
if (start < end and start < token <= end) or (start >= end and (token <= end or start < token)):
key_on_server_a = key
if key_on_server_a is None:
# There is only a chance ~(1/2)^256 that all keys are stored on the server B
# In this case we skip the test
pytest.skip("All keys are stored on the server B")
# Start server A in maintenance mode
await manager.server_stop_gracefully(server_a.server_id)
await manager.server_update_config(server_a.server_id, "maintenance_mode", "true")
await manager.server_start(server_a.server_id)
# Check that the regular CQL port is not available
assert socket.socket(socket.AF_INET, socket.SOCK_STREAM).connect_ex((server_a.ip_addr, 9042)) != 0
maintenance_cluster = cluster_con([socket_endpoint], 9042, False,
load_balancing_policy=WhiteListRoundRobinPolicy([socket_endpoint]))
maintenance_cql = maintenance_cluster.connect()
# Check that local data is available in maintenance mode
res = await maintenance_cql.run_async(f"SELECT v FROM ks.t WHERE k = {key_on_server_a}")
assert res[0][0] == key_on_server_a
# Check that group0 operations are disabled
with pytest.raises(ConfigurationException):
await maintenance_cql.run_async(f"CREATE TABLE ks.t2 (k int PRIMARY KEY, v int)")
await maintenance_cql.run_async(f"UPDATE ks.t SET v = {key_on_server_a + 1} WHERE k = {key_on_server_a}")
# Ensure that server B recognizes server A as being shutdown, not as being alive.
res = await cql.run_async(f"SELECT status FROM system.cluster_status WHERE peer = '{server_a.ip_addr}'")
assert res[0][0] == "shutdown"
await manager.server_stop_gracefully(server_a.server_id)
# Restart in normal mode to see if the changes made in maintenance mode are persisted
await manager.server_update_config(server_a.server_id, "maintenance_mode", "false")
await manager.server_start(server_a.server_id, wait_others=1)
res = await cql.run_async(f"SELECT v FROM ks.t WHERE k = {key_on_server_a}")
assert res[0][0] == key_on_server_a + 1