From 797bc28aaee47c1b8563bf90e0902a194097a5fe Mon Sep 17 00:00:00 2001 From: Marcin Maliszkiewicz Date: Tue, 5 May 2026 15:43:06 +0200 Subject: [PATCH 1/4] auth: remove unused permissions field from cache role_record The permissions field in role_record was populated by fetch_role() but never read. Authorization uses cached_permissions instead, which is loaded via the permission_loader callback. Remove the dead field and its fetch code. The removed code also did not check for missing columns before accessing the permissions set, which could crash on ghost rows left by the removed auth v2 migration. The migration used INSERT (creating row markers), and when permissions were later revoked, the row marker survived while the permissions column became null. --- auth/cache.cc | 13 ------------- auth/cache.hh | 1 - 2 files changed, 14 deletions(-) diff --git a/auth/cache.cc b/auth/cache.cc index bed290a288..e1e92e470e 100644 --- a/auth/cache.cc +++ b/auth/cache.cc @@ -190,19 +190,6 @@ future> cache::fetch_role(const role_name_t& r co_await coroutine::maybe_yield(); } } - // permissions - { - static const sstring q = format("SELECT role, resource, permissions FROM {}.{} WHERE role = ?", db::system_keyspace::NAME, PERMISSIONS_CF); - auto rs = co_await fetch(q); - for (const auto& r : *rs) { - auto resource = r.get_as("resource"); - auto perms_strings = r.get_set("permissions"); - std::unordered_set perms_set(perms_strings.begin(), perms_strings.end()); - auto pset = permissions::from_strings(perms_set); - rec->permissions[std::move(resource)] = std::move(pset); - co_await coroutine::maybe_yield(); - } - } co_return rec; } diff --git a/auth/cache.hh b/auth/cache.hh index d63c02466f..305ec01f3f 100644 --- a/auth/cache.hh +++ b/auth/cache.hh @@ -44,7 +44,6 @@ public: std::unordered_set members; sstring salted_hash; std::unordered_map attributes; - std::unordered_map permissions; private: friend cache; // cached permissions include effects of role's inheritance From c44625ebdf3e44d8302f5d22a96c0a892a4929c5 Mon Sep 17 00:00:00 2001 From: Marcin Maliszkiewicz Date: Tue, 5 May 2026 15:43:20 +0200 Subject: [PATCH 2/4] auth: add defensive has() guard for role_attributes value column Add a has() check before accessing the value column in role_attributes to tolerate ghost rows with missing regular columns. In practice this is unlikely to be a problem since attributes are not typically revoked, but the guard is added for consistency and defensive programming. --- auth/cache.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/auth/cache.cc b/auth/cache.cc index e1e92e470e..856c29ac73 100644 --- a/auth/cache.cc +++ b/auth/cache.cc @@ -185,6 +185,9 @@ future> cache::fetch_role(const role_name_t& r static const sstring q = format("SELECT role, name, value FROM {}.{} WHERE role = ?", db::system_keyspace::NAME, ROLE_ATTRIBUTES_CF); auto rs = co_await fetch(q); for (const auto& r : *rs) { + if (!r.has("value")) { + continue; + } rec->attributes[r.get_as("name")] = r.get_as("value"); co_await coroutine::maybe_yield(); From df69a5c79b7b4da90f238eb8c47f072d070a6552 Mon Sep 17 00:00:00 2001 From: Marcin Maliszkiewicz Date: Tue, 5 May 2026 15:32:28 +0200 Subject: [PATCH 3/4] auth: tolerate missing permissions column in authorize() Ghost rows in role_permissions with a live row marker but no permissions column can occur when permissions created via INSERT (e.g. by the removed auth v2 migration) are later revoked. The row marker survives the revoke, leaving a row visible to queries but with permissions=null. Add a has() guard before accessing the permissions column, matching the pattern already used in list_all(). Return NONE permissions for such ghost rows instead of crashing. --- auth/default_authorizer.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/auth/default_authorizer.cc b/auth/default_authorizer.cc index 02c96d3778..d00e518ef6 100644 --- a/auth/default_authorizer.cc +++ b/auth/default_authorizer.cc @@ -76,7 +76,11 @@ default_authorizer::authorize(const role_or_anonymous& maybe_role, const resourc if (results->empty()) { co_return permissions::NONE; } - co_return permissions::from_strings(results->one().get_set(PERMISSIONS_NAME)); + const auto& row = results->one(); + if (!row.has(PERMISSIONS_NAME)) { + co_return permissions::NONE; + } + co_return permissions::from_strings(row.get_set(PERMISSIONS_NAME)); } future<> From 5c5306c692abc044c054044c5f42e7696876c08d Mon Sep 17 00:00:00 2001 From: Marcin Maliszkiewicz Date: Tue, 5 May 2026 15:08:24 +0200 Subject: [PATCH 4/4] test: add reproducer for auth cache crash on missing permissions column --- .../test_permissions_removal_restart.py | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 test/cluster/auth_cluster/test_permissions_removal_restart.py diff --git a/test/cluster/auth_cluster/test_permissions_removal_restart.py b/test/cluster/auth_cluster/test_permissions_removal_restart.py new file mode 100644 index 0000000000..d17f4d3e7b --- /dev/null +++ b/test/cluster/auth_cluster/test_permissions_removal_restart.py @@ -0,0 +1,65 @@ +# +# Copyright (C) 2026-present ScyllaDB +# +# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 +# + +import pytest +import logging + +from test.pylib.manager_client import ManagerClient +from test.cluster.auth_cluster import extra_scylla_config_options as auth_config + +logger = logging.getLogger(__name__) + + +@pytest.mark.asyncio +async def test_permissions_removal_and_restart(manager: ManagerClient) -> None: + """Test that a node boots successfully when role_permissions contains a + ghost row with role and resource set but the permissions column missing. + + The auth v2 migration (now removed) used INSERT to copy permission rows + from the legacy table, which created CQL row markers. Normal GRANT uses + UPDATE, which only writes collection cells without row markers. When + permissions were later revoked, the collection cells were tombstoned but + the row marker from the migration INSERT persisted. That leaves a row + with role and resource but no permissions column. + + This test simulates that scenario: + 1. INSERT permissions with row marker (simulating auth v2 migration) + 2. REVOKE ALL permissions (tombstones the cells, marker survives) + 3. Restart and verify the node boots successfully + """ + servers = await manager.servers_add(1, config=auth_config) + cql, _ = await manager.get_ready_cql(servers) + server = servers[0] + + await cql.run_async("CREATE ROLE scylla_admin WITH PASSWORD = 'x' AND LOGIN = true") + await cql.run_async("CREATE ROLE scylla_manager WITH PASSWORD = 'x' AND LOGIN = true") + + # Simulate auth v2 migration: INSERT creates a row marker alongside the + # permission cells, unlike GRANT which uses UPDATE (no row marker). + await cql.run_async( + "INSERT INTO system.role_permissions (role, resource, permissions) " + "VALUES ('scylla_admin', 'roles/scylla_manager', {'ALTER', 'AUTHORIZE', 'DROP'})") + + # Revoke all permissions — tombstones the collection cells, but the + # row marker from the INSERT survives, creating a ghost row. + await cql.run_async("REVOKE ALL ON ROLE scylla_manager FROM scylla_admin") + + # Additional check: a row with an explicitly empty permissions set. + await cql.run_async("CREATE ROLE test_empty_perms WITH PASSWORD = 'x' AND LOGIN = true") + await cql.run_async( + "INSERT INTO system.role_permissions (role, resource) " + "VALUES ('test_empty_perms', 'roles/scylla_manager')") + + # Restart — the auth cache loads the ghost row and must not crash + logger.info("Restarting node") + await manager.server_stop_gracefully(server.server_id) + await manager.server_start(server.server_id) + + await manager.driver_connect() + cql, _ = await manager.get_ready_cql(servers) + rows = await cql.run_async("SELECT * FROM system.local") + assert len(rows) == 1, "Node should be functional after restart" + logger.info("Node restarted successfully")