Merge 'auth: move passwords::check call to alien thread' from Andrzej Jackowski

Analysis of customer stalls revealed that the function `detail::hash_with_salt` (invoked by `passwords::check`) often blocks the reactor. Internally, this function uses the external `crypt_r` function to compute password hashes, which is CPU-intensive.

This PR addresses the issue in two ways:
1) `sha-512` is now the only password hashing scheme for new passwords (it was already the common-case).
2) `passwords::check` is moved to a dedicated alien thread.

Regarding point 1: before this change, the following hashing schemes were supported by     `identify_best_supported_scheme()`: bcrypt_y, bcrypt_a, SHA-512, SHA-256, and MD5. The reason for this was that the `crypt_r` function used for password hashing comes from an external library (currently `libxcrypt`), and the supported hashing algorithms vary depending on the library in use. However:
- The bcrypt schemes never worked properly because their prefixes lack the required round count (e.g. `$2y$` instead of `$2y$05$`). Moreover, bcrypt is slower than SHA-512, so it  not good idea to fix or use it.
- SHA-256 and SHA-512 both belong to the SHA-2 family. Libraries that support one almost always support the other, so it’s very unlikely to find SHA-256 without SHA-512.
- MD5 is no longer considered secure for password hashing.

Regarding point 2: the `passwords::check` call now runs on a shared alien thread created at database startup. An `std::mutex` synchronizes that thread with the shards. In theory this could introduce a frequent lock contention, but in practice each shard handles only a few hundred new connections per second—even during storms. There is already `_conns_cpu_concurrency_semaphore` in `generic_server` limits the number of concurrent connection handlers.

Fixes https://github.com/scylladb/scylladb/issues/24524

Backport not needed, as it is a new feature.

Closes scylladb/scylladb#24924

* github.com:scylladb/scylladb:
  main: utils: add thread names to alien workers
  auth: move passwords::check call to alien thread
  test: wait for 3 clients with given username in test_service_level_api
  auth: refactor password checking in password_authenticator
  auth: make SHA-512 the only password hashing scheme for new passwords
  auth: whitespace change in identify_best_supported_scheme()
  auth: require scheme as parameter for `generate_salt`
  auth: check password hashing scheme support on authenticator start

(cherry picked from commit c762425ea7)
This commit is contained in:
Avi Kivity
2025-07-16 13:15:54 +03:00
parent 2bbf3cf669
commit 0900a88884
19 changed files with 110 additions and 57 deletions

View File

@@ -29,7 +29,7 @@ BOOST_AUTO_TEST_CASE(passwords_are_salted) {
std::unordered_set<sstring> observed_passwords{};
for (int i = 0; i < 10; ++i) {
const sstring e = auth::passwords::hash(cleartext, rng_for_salt);
const sstring e = auth::passwords::hash(cleartext, rng_for_salt, auth::passwords::scheme::sha_512);
BOOST_REQUIRE(!observed_passwords.contains(e));
observed_passwords.insert(e);
}
@@ -47,7 +47,7 @@ BOOST_AUTO_TEST_CASE(correct_passwords_authenticate) {
};
for (const char* p : passwords) {
BOOST_REQUIRE(auth::passwords::check(p, auth::passwords::hash(p, rng_for_salt)));
BOOST_REQUIRE(auth::passwords::check(p, auth::passwords::hash(p, rng_for_salt, auth::passwords::scheme::sha_512)));
}
}
@@ -55,6 +55,6 @@ BOOST_AUTO_TEST_CASE(correct_passwords_authenticate) {
// A hashed password that does not match the password in cleartext does not authenticate.
//
BOOST_AUTO_TEST_CASE(incorrect_passwords_do_not_authenticate) {
const sstring hashed_password = auth::passwords::hash("actual_password", rng_for_salt);
const sstring hashed_password = auth::passwords::hash("actual_password", rng_for_salt,auth::passwords::scheme::sha_512);
BOOST_REQUIRE(!auth::passwords::check("password_guess", hashed_password));
}

View File

@@ -48,6 +48,17 @@ def count_opened_connections_from_table(cql):
return result
def wait_for_clients(cql, username, clients_num, wait_s = 1, timeout_s = 30):
start_time = time.time()
while time.time() - start_time < timeout_s:
result = cql.execute(f"SELECT COUNT(*) FROM system.clients WHERE username='{username}' ALLOW FILTERING")
if result.one()[0] == clients_num:
return
else:
time.sleep(wait_s)
raise RuntimeError(f"Awaiting for {clients_num} clients timed out.")
def wait_until_all_connections_authenticated(cql, wait_s = 1, timeout_s = 30):
start_time = time.time()
while time.time() - start_time < timeout_s:
@@ -85,6 +96,7 @@ def test_count_opened_cql_connections(cql):
try:
with new_session(cql, user):
wait_for_clients(cql, user, 3) # 3 from smp=2 + control connection
wait_until_all_connections_authenticated(cql)
verify_scheduling_group_assignment(cql, user, sl, get_shard_count(cql))
@@ -120,6 +132,7 @@ def test_switch_tenants(cql):
try:
with new_session(cql, user) as user_session:
wait_for_clients(cql, user, 3) # 3 from smp=2 + control connection
wait_until_all_connections_authenticated(cql)
verify_scheduling_group_assignment(cql, user, sl1, shard_count)

View File

@@ -1079,7 +1079,11 @@ private:
auth_config.authenticator_java_name = qualified_authenticator_name;
auth_config.role_manager_java_name = qualified_role_manager_name;
_auth_service.start(perm_cache_config, std::ref(_qp), std::ref(group0_client), std::ref(_mnotifier), std::ref(_mm), auth_config, maintenance_socket_enabled::no).get();
const uint64_t niceness = 19;
auto hashing_worker = utils::alien_worker(startlog, niceness, "pwd-hash");
_auth_service.start(perm_cache_config, std::ref(_qp), std::ref(group0_client), std::ref(_mnotifier), std::ref(_mm), auth_config, maintenance_socket_enabled::no, std::ref(hashing_worker)).get();
_auth_service.invoke_on_all([this] (auth::service& auth) {
return auth.start(_mm.local(), _sys_ks.local());
}).get();