set ssl_* columns in system.clients

Depends on https://github.com/scylladb/seastar/pull/2651

Missing columns have been present since probably forever -
they were added to the schema but never assigned any value:
```
cqlsh> select * from system.clients;
------------------+------------------------
...
 ssl_cipher_suite | null
 ssl_enabled      | null
 ssl_protocol     | null
...
```

This patch sets values of these columns:
- with a TLS connection, the 3 TLS-related fields are filled in,
- without TLS, `ssl_enabled` is set to `false` and other columns are
  `null`,
- if there's an error while inspecting TLS values, the connection is
  dropped.

We want to save the TLS info of a connection just after accepting it,
but without waiting for a TLS handshake to complete, so once the
connection is accepted, we're inspecting it in the background for the
server to be able to accept next connections immediately.
Later, when we construct system.clients virtual table, the previously
saved data can be instantaneously assigned to client_data, which is a
struct representing a row in system.clients table. This way we don't
slow down constructing this table by more than necessary, which is
relevant for cases with plenty of connections.

Fixes: #9216

Closes scylladb/scylladb#22961
This commit is contained in:
Piotr Smaron
2025-07-22 15:02:10 +02:00
committed by Nadav Har'El
parent 3c0032deb4
commit bdb90ee15c
4 changed files with 89 additions and 10 deletions

View File

@@ -353,18 +353,19 @@ server::listen(socket_address addr, std::shared_ptr<seastar::tls::credentials_bu
lo.lba = server_socket::load_balancing_algorithm::port;
}
server_socket ss;
bool is_tls = false;
try {
ss = builder
? seastar::tls::listen(_credentials, addr, lo)
? is_tls = true, seastar::tls::listen(_credentials, addr, lo)
: seastar::listen(addr, lo);
} catch (...) {
throw std::runtime_error(format("{} error while listening on {} -> {}", _server_name, addr, std::current_exception()));
}
_listeners.emplace_back(std::move(ss));
_listeners_stopped = when_all(std::move(_listeners_stopped), do_accepts(_listeners.size() - 1, keepalive, addr)).discard_result();
_listeners_stopped = when_all(std::move(_listeners_stopped), do_accepts(_listeners.size() - 1, keepalive, addr, is_tls)).discard_result();
}
future<> server::do_accepts(int which, bool keepalive, socket_address server_addr) {
future<> server::do_accepts(int which, bool keepalive, socket_address server_addr, bool is_tls) {
while (!_gate.is_closed()) {
seastar::gate::holder holder(_gate);
bool shed = false;
@@ -404,10 +405,26 @@ future<> server::do_accepts(int which, bool keepalive, socket_address server_add
conn->shutdown();
continue;
}
conn->_ssl_enabled = is_tls;
// Move the processing into the background.
(void)futurize_invoke([this, conn] {
// Block while monitoring for lifetime/errors.
return conn->process().then_wrapped([this, conn] (auto f) {
(void)futurize_invoke([this, conn, is_tls] {
return (is_tls
? tls::get_protocol_version(conn->_fd).then([conn](const sstring& protocol) {
return tls::get_cipher_suite(conn->_fd).then(
[conn, protocol](const sstring& cipher_suite) mutable {
conn->_ssl_protocol = protocol;
conn->_ssl_cipher_suite = cipher_suite;
return make_ready_future<bool>(true);
});
}).handle_exception([this, conn](std::exception_ptr ep) {
_logger.warn("Inspecting TLS connection failed: {}", ep);
return make_ready_future<bool>(false);
})
: make_ready_future<bool>(true)
).then([conn] (bool ok){
// Block while monitoring for lifetime/errors.
return ok ? conn->process() : make_ready_future<>();
}).then_wrapped([this, conn](auto f) {
try {
f.get();
} catch (...) {

View File

@@ -60,6 +60,10 @@ protected:
seastar::named_gate _pending_requests_gate;
seastar::gate::holder _hold_server;
bool _ssl_enabled = false;
std::optional<sstring> _ssl_cipher_suite = std::nullopt;
std::optional<sstring> _ssl_protocol = std::nullopt;;
private:
future<> process_until_tenant_switch();
bool shutdown_input();
@@ -144,7 +148,7 @@ public:
std::function<server&()> get_shard_instance = {}
);
future<> do_accepts(int which, bool keepalive, socket_address server_addr);
future<> do_accepts(int which, bool keepalive, socket_address server_addr, bool is_tls);
protected:
virtual seastar::shared_ptr<connection> make_connection(socket_address server_addr, connected_socket&& fd, socket_address addr, named_semaphore& sem, semaphore_units<named_semaphore_exception_factory> initial_sem_units) = 0;

View File

@@ -10,8 +10,25 @@
import pytest
import ssl
import cassandra.cluster
from contextlib import contextmanager
import re
import ssl
# This function normalizes the SSL cipher suite name (a string),
# which we need to do because tests use python library and scylla server uses C library,
# and both the python's and the C's library naming conventions are different,
# so we need some translation in order to compare them.
def normalize_cipher(cipher_name: str) -> str:
if cipher_name.startswith("TLS_"):
cipher_name = cipher_name[len("TLS_"):] # Remove leading "TLS_" if present.
cipher_name = cipher_name.replace("_WITH_", "-")
cipher_name = cipher_name.replace("_", "-")
# Remove hyphen between letters and digits: e.g. convert "AES-256" to "AES256"
cipher_name = re.sub(r'([A-Z]+)-(\d+)', r'\1\2', cipher_name)
return cipher_name
# Test that TLS 1.2 is supported (because this is what "cqlsh --ssl" uses
# by default), and that other TLS version are either supported - or if
@@ -46,6 +63,39 @@ def test_tls_versions(cql):
assert 'protocol version' in str(e) or 'no protocols available' in str(e)
print(f"{ssl_version} not supported")
# a regression test for #9216
def test_system_clients_stores_tls_info(cql):
if not cql.cluster.ssl_context:
table_result = cql.execute(f"SELECT * FROM system.clients")
for row in table_result:
assert not row.ssl_enabled
assert row.ssl_protocol is None
assert row.ssl_cipher_suite is None
if cql.cluster.ssl_context:
# TLS v1.2 must be supported, because this is the default version that
# "cqlsh --ssl" uses. If this fact changes in the future, we may need
# to reconsider this test.
with try_connect(cql.cluster, ssl.TLSVersion.TLSv1_2) as session:
# As of time of writing, python driver spawns 5 to 6 connections for this single session,
# and some connections may already be past the TLS init phase, while others not, when we query system.clients,
# so we need to retry until all connections are initialized and have their TLS info recorded in system.clients,
# otherwise we'd end up with some connections e.g. having their ssl_enabled=True but other fields still None.
expected_ciphers = [normalize_cipher(cipher['name']) for cipher in ssl.create_default_context().get_ciphers()]
for _ in range(1000): # try for up to 1000 * 0.01s = 10s seconds
rows = session.execute(f"SELECT * FROM system.clients")
if rows and all(
row.ssl_enabled
and row.ssl_protocol == 'TLS1.2'
and normalize_cipher(row.ssl_cipher_suite) in expected_ciphers
for row in rows
):
return
time.sleep(0.01)
pytest.fail(f"Not all connections have TLS data set correctly in system.clients after 10s seconds")
@contextmanager
def try_connect(orig_cluster, ssl_version):
ssl_context=ssl.SSLContext(ssl.PROTOCOL_TLS)
ssl_context.minimum_version = ssl_version
@@ -63,8 +113,11 @@ def try_connect(orig_cluster, ssl_version):
# so let's increase them to 60 seconds. See issue #11289.
connect_timeout = 60,
control_connection_timeout = 60)
cluster.connect()
cluster.shutdown()
try:
session = cluster.connect()
yield session
finally:
cluster.shutdown()
# Test that if we try to connect to an SSL port with *unencrypted* CQL,
# it doesn't work.

View File

@@ -680,6 +680,11 @@ client_data cql_server::connection::make_client_data() const {
cd.connection_stage = client_connection_stage::authenticating;
}
cd.scheduling_group_name = _current_scheduling_group.name();
cd.ssl_enabled = _ssl_enabled;
cd.ssl_protocol = _ssl_protocol;
cd.ssl_cipher_suite = _ssl_cipher_suite;
return cd;
}