From da6fe14035adb5fa7f094333161d567461ac3f76 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 24 Mar 2026 18:01:28 +0300 Subject: [PATCH 1/2] transport: test that connection_stage is READY after auth via all process_startup paths The cert-auth path in process_startup (introduced in 20e9619bb1) was missing _ready = true, _authenticating = false, update_scheduling_group() and on_connection_ready(). The result is that connections authenticated via certificate show connection_stage = AUTHENTICATING in system.clients forever, run under the wrong service-level scheduling group, and hold the uninitialized-connections semaphore slot for the lifetime of the connection. Add a parametrized cluster test that verifies all three process_startup branches result in connection_stage = READY: - allow_all: AllowAllAuthenticator (no-auth path) - password: PasswordAuthenticator (SASL/process_auth_response path) - cert_bypass: CertificateAuthenticator with transport_early_auth_bypass error injection (cert-auth path -- the buggy one) The injection is added to certificate_authenticator::authenticate() so tests can bypass actual TLS certificate parsing while still exercising the cert-auth code path in process_startup. The cert_bypass case is marked xfail until the bug is fixed. Signed-off-by: Pavel Emelyanov Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- auth/certificate_authenticator.cc | 4 + .../auth_cluster/test_connection_stage.py | 100 ++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 test/cluster/auth_cluster/test_connection_stage.py diff --git a/auth/certificate_authenticator.cc b/auth/certificate_authenticator.cc index c416c1ff59..c0457eacc0 100644 --- a/auth/certificate_authenticator.cc +++ b/auth/certificate_authenticator.cc @@ -14,6 +14,7 @@ #include #include "utils/to_string.hh" +#include "utils/error_injection.hh" #include "data_dictionary/data_dictionary.hh" #include "cql3/query_processor.hh" #include "db/config.hh" @@ -105,6 +106,9 @@ auth::authentication_option_set auth::certificate_authenticator::alterable_optio } future> auth::certificate_authenticator::authenticate(session_dn_func f) const { + if (auto user = utils::get_local_injector().inject_parameter("transport_early_auth_bypass")) { + co_return auth::authenticated_user{sstring(*user)}; + } if (!f) { co_return std::nullopt; } diff --git a/test/cluster/auth_cluster/test_connection_stage.py b/test/cluster/auth_cluster/test_connection_stage.py new file mode 100644 index 0000000000..b825e3b6de --- /dev/null +++ b/test/cluster/auth_cluster/test_connection_stage.py @@ -0,0 +1,100 @@ +# +# Copyright (C) 2026-present ScyllaDB +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0 +# + +""" +Tests that system.clients shows connection_stage = 'READY' after a successful +authentication, regardless of which auth path was taken in process_startup: + + 1. No-auth path (AllowAllAuthenticator): require_authentication() = false, + connection is immediately marked ready. + + 2. SASL/password path (PasswordAuthenticator): client sends AUTH_RESPONSE, + process_auth_response() marks the connection ready. + + 3. Certificate-bypass path (CertificateAuthenticator with + transport_early_auth_bypass injection): authenticate(session_dn_func) + returns a user immediately, process_startup() marks the connection ready + without a SASL round-trip. + + This is the path introduced by commit 20e9619bb1 that was missing the + _ready = true / update_scheduling_group() / on_connection_ready() calls. +""" + +import logging +import time +import pytest +from cassandra.auth import PlainTextAuthProvider +from test.pylib.manager_client import ManagerClient +from test.pylib.util import wait_for_cql_and_get_hosts, wait_for + + +logger = logging.getLogger(__name__) + + +def make_server_config(auth_type: str) -> dict: + """Return the scylla config dict for a given auth type.""" + if auth_type == "allow_all": + return { + "authenticator": "AllowAllAuthenticator", + "authorizer": "AllowAllAuthorizer", + } + if auth_type == "password": + return { + "authenticator": "PasswordAuthenticator", + "authorizer": "CassandraAuthorizer", + } + if auth_type == "cert_bypass": + # CertificateAuthenticator with transport_early_auth_bypass active + # from the very first connection (including the server-add readiness + # check), so no TLS socket is required. The injection makes + # certificate_authenticator::authenticate() return the cassandra + # superuser immediately, bypassing the TLS certificate check. + return { + "authenticator": "CertificateAuthenticator", + "authorizer": "CassandraAuthorizer", + # Minimal valid config; regex is never reached due to injection. + "auth_certificate_role_queries": [{"source": "SUBJECT", "query": "CN=(.+)"}], + "error_injections_at_startup": [ + {"name": "transport_early_auth_bypass", "value": "cassandra"}, + ], + } + raise ValueError(f"Unknown auth_type: {auth_type!r}") + + +@pytest.mark.asyncio +@pytest.mark.parametrize("auth_type", [ + "allow_all", + "password", + pytest.param("cert_bypass", marks=pytest.mark.xfail(reason="bug in process_startup cert-auth path", strict=True)), +]) +@pytest.mark.skip_mode("release", reason="error injections are not supported in release mode") +async def test_connection_stage_ready_after_auth(manager: ManagerClient, auth_type: str) -> None: + """After a successful authentication via any code path in process_startup, + the connection must be reported as READY in system.clients.""" + server = await manager.server_add(config=make_server_config(auth_type)) + + # CertificateAuthenticator bypassed by injection returns a user without a + # SASL challenge, so the driver must not attempt a password handshake. + auth_provider = PlainTextAuthProvider(username="cassandra", password="cassandra") \ + if auth_type == "password" else None + + await manager.driver_connect(server=server, auth_provider=auth_provider) + cql = manager.get_cql() + await wait_for_cql_and_get_hosts(cql, [server], time.time() + 60) + + async def all_connections_ready(): + rows = list(cql.execute( + "SELECT connection_stage FROM system.clients WHERE client_type = 'cql' ALLOW FILTERING" + )) + if not rows: + return None + if any(r.connection_stage != "READY" for r in rows): + return None + return rows + + rows = await wait_for(all_connections_ready, time.time() + 30) + assert rows, "No CQL connections found in system.clients" + logger.info("auth_type=%s: all %d connection(s) are READY", auth_type, len(rows)) From 2d8540f1eedeacd5a58aa5a19cfe7bc1d7df8549 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 24 Mar 2026 18:02:46 +0300 Subject: [PATCH 2/2] transport: fix process_startup cert-auth path missing connection-ready setup When authenticate() returns a user directly (certificate-based auth, introduced in 20e9619bb1), process_startup was missing the same post-authentication bookkeeping that the no-auth and SASL paths perform: - update_scheduling_group(): without it, the connection runs under the default scheduling group instead of the one mapped to the user's service level. - _authenticating = false / _ready = true: without them, system.clients reports connection_stage = AUTHENTICATING forever instead of READY. - on_connection_ready(): without it, the connection never releases its slot in the uninitialized-connections concurrency semaphore (acquired at connection creation), leaking one unit per cert-authenticated connection for the lifetime of the connection. The omission was introduced when on_connection_ready() was added to the else and SASL branches in 474e84199c but the cert-auth branch was missed. Fixes: 20e9619bb1 ("auth: support certificate-based authentication") Signed-off-by: Pavel Emelyanov Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- test/cluster/auth_cluster/test_connection_stage.py | 2 +- transport/server.cc | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/test/cluster/auth_cluster/test_connection_stage.py b/test/cluster/auth_cluster/test_connection_stage.py index b825e3b6de..095987e217 100644 --- a/test/cluster/auth_cluster/test_connection_stage.py +++ b/test/cluster/auth_cluster/test_connection_stage.py @@ -68,7 +68,7 @@ def make_server_config(auth_type: str) -> dict: @pytest.mark.parametrize("auth_type", [ "allow_all", "password", - pytest.param("cert_bypass", marks=pytest.mark.xfail(reason="bug in process_startup cert-auth path", strict=True)), + "cert_bypass", ]) @pytest.mark.skip_mode("release", reason="error injections are not supported in release mode") async def test_connection_stage_ready_after_auth(manager: ManagerClient, auth_type: str) -> None: diff --git a/transport/server.cc b/transport/server.cc index 76256224d2..5caf2d69c0 100644 --- a/transport/server.cc +++ b/transport/server.cc @@ -1405,6 +1405,10 @@ future> cql_server::connection::process_st client_state.set_login(std::move(*opt_user)); co_await client_state.check_user_can_login(); client_state.maybe_update_per_service_level_params(); + update_scheduling_group(); + _authenticating = false; + _ready = true; + on_connection_ready(); res = make_ready(stream, trace_state); } else { res = make_autheticate(stream, a.qualified_java_name(), trace_state);