audit: add preprocessed rule matching cache

Running fnmatch on every audit event would hurt hot-path
latency.

Precompute per-role and per-table bitsets and intersect
them at query time. Rebuild from snapshots with a
generation counter to avoid partial state after yielding.
Unknown roles/tables fall back to linear fnmatch until
metadata notifications populate the cache.

Refs SCYLLADB-1430
This commit is contained in:
Andrzej Jackowski
2026-03-27 15:47:04 +01:00
parent 6354daa8d7
commit 97fb2f01ff
4 changed files with 276 additions and 1 deletions

View File

@@ -7,7 +7,8 @@ target_sources(scylla_audit
audit_cf_storage_helper.cc
audit_composite_storage_helper.cc
audit_rule.cc
audit_syslog_storage_helper.cc)
audit_syslog_storage_helper.cc
preprocessed_audit_rules.cc)
target_include_directories(scylla_audit
PUBLIC
${CMAKE_SOURCE_DIR})

View File

@@ -0,0 +1,194 @@
/*
* Copyright (C) 2026-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
*/
#include "audit/preprocessed_audit_rules.hh"
#include "audit/audit_rule.hh"
#include <seastar/coroutine/maybe_yield.hh>
namespace audit {
preprocessed_audit_rules::preprocessed_audit_rules(std::vector<audit_rule> rules) noexcept
: _rules(std::move(rules))
{ }
future<> preprocessed_audit_rules::refresh_rules(std::vector<audit_rule> rules) {
_rules = std::move(rules);
_role_to_matching_rules.clear();
_table_to_matching_rules.clear();
++_cache_generation;
co_await rebuild_cache();
}
void preprocessed_audit_rules::add_known_role(const sstring& role) {
auto [it, inserted] = _known_roles.insert(role);
if (inserted) {
++_cache_generation;
_role_to_matching_rules[role] = compute_role_bits(_rules, role);
}
}
void preprocessed_audit_rules::remove_known_role(const sstring& role) {
if (_known_roles.erase(role)) {
++_cache_generation;
_role_to_matching_rules.erase(role);
}
}
void preprocessed_audit_rules::add_known_table(const sstring& keyspace, const sstring& table) {
auto [it, inserted] = _known_tables.emplace(keyspace, table);
if (inserted) {
++_cache_generation;
_table_to_matching_rules[known_table{keyspace, table}] = compute_table_bits(_rules, keyspace, table);
}
}
void preprocessed_audit_rules::remove_known_table(const sstring& keyspace, const sstring& table) {
if (_known_tables.erase(known_table{keyspace, table})) {
++_cache_generation;
_table_to_matching_rules.erase(known_table{keyspace, table});
}
}
preprocessed_audit_rules::rule_bitset
preprocessed_audit_rules::compute_role_bits(const std::vector<audit_rule>& rules, const sstring& role) const {
rule_bitset bits(rules.size());
for (size_t i = 0; i < rules.size(); ++i) {
if (matches_role(rules[i], role)) {
bits.set(i);
}
}
return bits;
}
preprocessed_audit_rules::rule_bitset
preprocessed_audit_rules::compute_table_bits(const std::vector<audit_rule>& rules, const sstring& keyspace, const sstring& table) const {
rule_bitset bits(rules.size());
sstring qt = qualified_table_name(keyspace, table);
for (size_t i = 0; i < rules.size(); ++i) {
if (matches_qualified_table(rules[i], qt)) {
bits.set(i);
}
}
return bits;
}
audit_sink_set preprocessed_audit_rules::collect_sinks(const rule_bitset& bits,
statement_category category) const {
audit_sink_set result;
for (auto i = bits.find_first(); i != rule_bitset::npos; i = bits.find_next(i)) {
const auto& rule = _rules[i];
if (matches_category(rule, category)) {
result.add(rule_sinks(rule));
}
}
return result;
}
future<> preprocessed_audit_rules::rebuild_cache() {
// Retry loop: if rules, roles, or tables change while we yield during
// the compute phase, the generation counter will have advanced and we
// discard the stale result and rebuild from the updated snapshot.
while (true) {
// Snapshot current state to detect concurrent modifications after yielding.
// These copies are lightweight (just names, not schema objects) and bounded
// by the number of audit rules, roles, and tables — expected to be modest.
auto rules = _rules;
auto known_roles = _known_roles;
auto known_tables = _known_tables;
auto generation = _cache_generation;
std::unordered_map<sstring, rule_bitset, sstring_hash, sstring_eq>
role_to_matching_rules;
std::unordered_map<known_table, rule_bitset, utils::tuple_hash, std::equal_to<>>
table_to_matching_rules;
// Precompute per-entity rule bitsets. Each iteration invokes fnmatch
// for every rule pattern, making this more expensive than the copies above.
if (!rules.empty()) {
for (const auto& role : known_roles) {
role_to_matching_rules[role] = compute_role_bits(rules, role);
co_await coroutine::maybe_yield();
}
for (const auto& [ks, tbl] : known_tables) {
table_to_matching_rules[known_table{ks, tbl}] = compute_table_bits(rules, ks, tbl);
co_await coroutine::maybe_yield();
}
}
if (generation == _cache_generation) {
_role_to_matching_rules = std::move(role_to_matching_rules);
_table_to_matching_rules = std::move(table_to_matching_rules);
co_return;
}
}
}
future<> preprocessed_audit_rules::replace_known_entities(std::unordered_set<sstring> roles, known_table_set tables) {
_known_roles = std::move(roles);
_known_tables = std::move(tables);
_role_to_matching_rules.clear();
_table_to_matching_rules.clear();
++_cache_generation;
co_await rebuild_cache();
}
audit_sink_set preprocessed_audit_rules::matching_sinks(statement_category category,
std::string_view keyspace,
std::string_view table,
std::string_view role) const {
bool table_scoped = is_table_scoped_category(category);
// Look up role in the precomputed map.
auto role_it = _role_to_matching_rules.find(role);
if (role_it == _role_to_matching_rules.end()) {
// Unknown role — slow path: evaluate all rules with fnmatch.
audit_sink_set result;
for (const auto& rule : _rules) {
if (matches_rule(rule, category, keyspace, table, role)) {
result.add(rule_sinks(rule));
}
}
return result;
}
if (!table_scoped || keyspace.empty()) {
// Table-independent categories (AUTH, ADMIN, DCL) or operations with
// empty keyspace (e.g., batch operations spanning multiple tables):
// only role matching matters.
return collect_sinks(role_it->second, category);
}
// Table-scoped categories (DML, DDL, QUERY): intersect role and table bitsets.
auto table_it = _table_to_matching_rules.find(
std::pair<std::string_view, std::string_view>{keyspace, table});
if (table_it == _table_to_matching_rules.end()) {
// Unknown table — slow path: evaluate all rules with fnmatch.
audit_sink_set result;
for (const auto& rule : _rules) {
if (matches_rule(rule, category, keyspace, table, role)) {
result.add(rule_sinks(rule));
}
}
return result;
}
// Fast path: intersect precomputed bitsets and check category.
// Iterates inline instead of using operator& to avoid heap-allocating a temporary dynamic_bitset.
audit_sink_set result;
const auto& role_bits = role_it->second;
const auto& table_bits = table_it->second;
for (auto i = role_bits.find_first(); i != rule_bitset::npos; i = role_bits.find_next(i)) {
if (table_bits.test(i) && matches_category(_rules[i], category)) {
result.add(rule_sinks(_rules[i]));
}
}
return result;
}
} // namespace audit

View File

@@ -0,0 +1,79 @@
/*
* Copyright (C) 2026-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
*/
#pragma once
#include "absl-flat_hash_map.hh"
#include "audit/audit_rule.hh"
#include "seastarx.hh"
#include "utils/hash.hh"
#include <boost/dynamic_bitset.hpp>
#include <seastar/core/future.hh>
#include <seastar/core/sstring.hh>
#include <string_view>
#include <unordered_map>
#include <unordered_set>
#include <vector>
namespace audit {
class preprocessed_audit_rules {
public:
using known_table = std::pair<sstring, sstring>; // (keyspace, table)
using known_table_set = std::unordered_set<known_table, utils::tuple_hash>;
using rule_bitset = boost::dynamic_bitset<uint64_t>;
preprocessed_audit_rules() noexcept = default;
explicit preprocessed_audit_rules(std::vector<audit_rule> rules) noexcept;
future<> refresh_rules(std::vector<audit_rule> rules);
void add_known_role(const sstring& role);
void remove_known_role(const sstring& role);
void add_known_table(const sstring& keyspace, const sstring& table);
void remove_known_table(const sstring& keyspace, const sstring& table);
/// Replace known roles and tables and rebuild the cache, yielding
/// between entities to avoid reactor stalls.
future<> replace_known_entities(std::unordered_set<sstring> roles, known_table_set tables);
audit_sink_set matching_sinks(statement_category category, std::string_view keyspace,
std::string_view table, std::string_view role) const;
const std::vector<audit_rule>& rules() const noexcept { return _rules; }
const std::unordered_set<sstring>& known_roles() const noexcept { return _known_roles; }
private:
rule_bitset compute_role_bits(const std::vector<audit_rule>& rules, const sstring& role) const;
rule_bitset compute_table_bits(const std::vector<audit_rule>& rules, const sstring& keyspace, const sstring& table) const;
audit_sink_set collect_sinks(const rule_bitset& bits, statement_category category) const;
/// Rebuild the cache from snapshots and swap it in if no concurrent
/// cache input changed while yielding.
future<> rebuild_cache();
std::vector<audit_rule> _rules;
std::unordered_set<sstring> _known_roles;
known_table_set _known_tables;
size_t _cache_generation = 0;
/// For each known role, a bitset indicating which rules match that role.
/// Uses transparent hash/equal to avoid allocating an sstring on lookup.
std::unordered_map<sstring, rule_bitset, sstring_hash, sstring_eq>
_role_to_matching_rules;
/// For each known table, a bitset indicating which rules match that table.
/// utils::tuple_hash and std::equal_to<> are transparent, so lookups can
/// use a pair<string_view, string_view> without copying into sstring.
std::unordered_map<known_table, rule_bitset, utils::tuple_hash, std::equal_to<>>
_table_to_matching_rules;
};
} // namespace audit

View File

@@ -1312,6 +1312,7 @@ scylla_core = (['message/messaging_service.cc',
'audit/audit_cf_storage_helper.cc',
'audit/audit_composite_storage_helper.cc',
'audit/audit_rule.cc',
'audit/preprocessed_audit_rules.cc',
'audit/audit_syslog_storage_helper.cc',
'tombstone_gc_options.cc',
'tombstone_gc.cc',