diff --git a/audit/CMakeLists.txt b/audit/CMakeLists.txt index d6001676f0..abd65bd965 100644 --- a/audit/CMakeLists.txt +++ b/audit/CMakeLists.txt @@ -7,7 +7,8 @@ target_sources(scylla_audit audit_cf_storage_helper.cc audit_composite_storage_helper.cc audit_rule.cc - audit_syslog_storage_helper.cc) + audit_syslog_storage_helper.cc + preprocessed_audit_rules.cc) target_include_directories(scylla_audit PUBLIC ${CMAKE_SOURCE_DIR}) diff --git a/audit/preprocessed_audit_rules.cc b/audit/preprocessed_audit_rules.cc new file mode 100644 index 0000000000..2da6cb4f20 --- /dev/null +++ b/audit/preprocessed_audit_rules.cc @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2026-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 + */ + +#include "audit/preprocessed_audit_rules.hh" +#include "audit/audit_rule.hh" + +#include + +namespace audit { + +preprocessed_audit_rules::preprocessed_audit_rules(std::vector rules) noexcept + : _rules(std::move(rules)) +{ } + +future<> preprocessed_audit_rules::refresh_rules(std::vector rules) { + _rules = std::move(rules); + _role_to_matching_rules.clear(); + _table_to_matching_rules.clear(); + ++_cache_generation; + co_await rebuild_cache(); +} + +void preprocessed_audit_rules::add_known_role(const sstring& role) { + auto [it, inserted] = _known_roles.insert(role); + if (inserted) { + ++_cache_generation; + _role_to_matching_rules[role] = compute_role_bits(_rules, role); + } +} + +void preprocessed_audit_rules::remove_known_role(const sstring& role) { + if (_known_roles.erase(role)) { + ++_cache_generation; + _role_to_matching_rules.erase(role); + } +} + +void preprocessed_audit_rules::add_known_table(const sstring& keyspace, const sstring& table) { + auto [it, inserted] = _known_tables.emplace(keyspace, table); + if (inserted) { + ++_cache_generation; + _table_to_matching_rules[known_table{keyspace, table}] = compute_table_bits(_rules, keyspace, table); + } +} + +void preprocessed_audit_rules::remove_known_table(const sstring& keyspace, const sstring& table) { + if (_known_tables.erase(known_table{keyspace, table})) { + ++_cache_generation; + _table_to_matching_rules.erase(known_table{keyspace, table}); + } +} + +preprocessed_audit_rules::rule_bitset +preprocessed_audit_rules::compute_role_bits(const std::vector& rules, const sstring& role) const { + rule_bitset bits(rules.size()); + for (size_t i = 0; i < rules.size(); ++i) { + if (matches_role(rules[i], role)) { + bits.set(i); + } + } + return bits; +} + +preprocessed_audit_rules::rule_bitset +preprocessed_audit_rules::compute_table_bits(const std::vector& rules, const sstring& keyspace, const sstring& table) const { + rule_bitset bits(rules.size()); + sstring qt = qualified_table_name(keyspace, table); + for (size_t i = 0; i < rules.size(); ++i) { + if (matches_qualified_table(rules[i], qt)) { + bits.set(i); + } + } + return bits; +} + +audit_sink_set preprocessed_audit_rules::collect_sinks(const rule_bitset& bits, + statement_category category) const { + audit_sink_set result; + for (auto i = bits.find_first(); i != rule_bitset::npos; i = bits.find_next(i)) { + const auto& rule = _rules[i]; + if (matches_category(rule, category)) { + result.add(rule_sinks(rule)); + } + } + return result; +} + +future<> preprocessed_audit_rules::rebuild_cache() { + // Retry loop: if rules, roles, or tables change while we yield during + // the compute phase, the generation counter will have advanced and we + // discard the stale result and rebuild from the updated snapshot. + while (true) { + // Snapshot current state to detect concurrent modifications after yielding. + // These copies are lightweight (just names, not schema objects) and bounded + // by the number of audit rules, roles, and tables — expected to be modest. + auto rules = _rules; + auto known_roles = _known_roles; + auto known_tables = _known_tables; + auto generation = _cache_generation; + + std::unordered_map + role_to_matching_rules; + std::unordered_map> + table_to_matching_rules; + + // Precompute per-entity rule bitsets. Each iteration invokes fnmatch + // for every rule pattern, making this more expensive than the copies above. + if (!rules.empty()) { + for (const auto& role : known_roles) { + role_to_matching_rules[role] = compute_role_bits(rules, role); + co_await coroutine::maybe_yield(); + } + for (const auto& [ks, tbl] : known_tables) { + table_to_matching_rules[known_table{ks, tbl}] = compute_table_bits(rules, ks, tbl); + co_await coroutine::maybe_yield(); + } + } + + if (generation == _cache_generation) { + _role_to_matching_rules = std::move(role_to_matching_rules); + _table_to_matching_rules = std::move(table_to_matching_rules); + co_return; + } + } +} + +future<> preprocessed_audit_rules::replace_known_entities(std::unordered_set roles, known_table_set tables) { + _known_roles = std::move(roles); + _known_tables = std::move(tables); + _role_to_matching_rules.clear(); + _table_to_matching_rules.clear(); + ++_cache_generation; + co_await rebuild_cache(); +} + +audit_sink_set preprocessed_audit_rules::matching_sinks(statement_category category, + std::string_view keyspace, + std::string_view table, + std::string_view role) const { + bool table_scoped = is_table_scoped_category(category); + + // Look up role in the precomputed map. + auto role_it = _role_to_matching_rules.find(role); + if (role_it == _role_to_matching_rules.end()) { + // Unknown role — slow path: evaluate all rules with fnmatch. + audit_sink_set result; + for (const auto& rule : _rules) { + if (matches_rule(rule, category, keyspace, table, role)) { + result.add(rule_sinks(rule)); + } + } + return result; + } + + if (!table_scoped || keyspace.empty()) { + // Table-independent categories (AUTH, ADMIN, DCL) or operations with + // empty keyspace (e.g., batch operations spanning multiple tables): + // only role matching matters. + return collect_sinks(role_it->second, category); + } + + // Table-scoped categories (DML, DDL, QUERY): intersect role and table bitsets. + auto table_it = _table_to_matching_rules.find( + std::pair{keyspace, table}); + if (table_it == _table_to_matching_rules.end()) { + // Unknown table — slow path: evaluate all rules with fnmatch. + audit_sink_set result; + for (const auto& rule : _rules) { + if (matches_rule(rule, category, keyspace, table, role)) { + result.add(rule_sinks(rule)); + } + } + return result; + } + + // Fast path: intersect precomputed bitsets and check category. + // Iterates inline instead of using operator& to avoid heap-allocating a temporary dynamic_bitset. + audit_sink_set result; + const auto& role_bits = role_it->second; + const auto& table_bits = table_it->second; + for (auto i = role_bits.find_first(); i != rule_bitset::npos; i = role_bits.find_next(i)) { + if (table_bits.test(i) && matches_category(_rules[i], category)) { + result.add(rule_sinks(_rules[i])); + } + } + return result; +} + +} // namespace audit diff --git a/audit/preprocessed_audit_rules.hh b/audit/preprocessed_audit_rules.hh new file mode 100644 index 0000000000..6c00545c0e --- /dev/null +++ b/audit/preprocessed_audit_rules.hh @@ -0,0 +1,79 @@ +/* + * Copyright (C) 2026-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 + */ +#pragma once + +#include "absl-flat_hash_map.hh" +#include "audit/audit_rule.hh" +#include "seastarx.hh" +#include "utils/hash.hh" +#include +#include +#include + +#include +#include +#include +#include + +namespace audit { + +class preprocessed_audit_rules { +public: + using known_table = std::pair; // (keyspace, table) + using known_table_set = std::unordered_set; + using rule_bitset = boost::dynamic_bitset; + + preprocessed_audit_rules() noexcept = default; + explicit preprocessed_audit_rules(std::vector rules) noexcept; + + future<> refresh_rules(std::vector rules); + + void add_known_role(const sstring& role); + void remove_known_role(const sstring& role); + + void add_known_table(const sstring& keyspace, const sstring& table); + void remove_known_table(const sstring& keyspace, const sstring& table); + + /// Replace known roles and tables and rebuild the cache, yielding + /// between entities to avoid reactor stalls. + future<> replace_known_entities(std::unordered_set roles, known_table_set tables); + + audit_sink_set matching_sinks(statement_category category, std::string_view keyspace, + std::string_view table, std::string_view role) const; + + const std::vector& rules() const noexcept { return _rules; } + const std::unordered_set& known_roles() const noexcept { return _known_roles; } + +private: + rule_bitset compute_role_bits(const std::vector& rules, const sstring& role) const; + rule_bitset compute_table_bits(const std::vector& rules, const sstring& keyspace, const sstring& table) const; + + audit_sink_set collect_sinks(const rule_bitset& bits, statement_category category) const; + + /// Rebuild the cache from snapshots and swap it in if no concurrent + /// cache input changed while yielding. + future<> rebuild_cache(); + + std::vector _rules; + std::unordered_set _known_roles; + known_table_set _known_tables; + size_t _cache_generation = 0; + + /// For each known role, a bitset indicating which rules match that role. + /// Uses transparent hash/equal to avoid allocating an sstring on lookup. + std::unordered_map + _role_to_matching_rules; + + /// For each known table, a bitset indicating which rules match that table. + /// utils::tuple_hash and std::equal_to<> are transparent, so lookups can + /// use a pair without copying into sstring. + std::unordered_map> + _table_to_matching_rules; +}; + +} // namespace audit diff --git a/configure.py b/configure.py index 2c8ee8bf04..ae774393b4 100755 --- a/configure.py +++ b/configure.py @@ -1312,6 +1312,7 @@ scylla_core = (['message/messaging_service.cc', 'audit/audit_cf_storage_helper.cc', 'audit/audit_composite_storage_helper.cc', 'audit/audit_rule.cc', + 'audit/preprocessed_audit_rules.cc', 'audit/audit_syslog_storage_helper.cc', 'tombstone_gc_options.cc', 'tombstone_gc.cc',