mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-22 15:52:13 +00:00
audit: add preprocessed rule matching cache
Running fnmatch on every audit event would hurt hot-path latency. Precompute per-role and per-table bitsets and intersect them at query time. Rebuild from snapshots with a generation counter to avoid partial state after yielding. Unknown roles/tables fall back to linear fnmatch until metadata notifications populate the cache. Refs SCYLLADB-1430
This commit is contained in:
@@ -7,7 +7,8 @@ target_sources(scylla_audit
|
||||
audit_cf_storage_helper.cc
|
||||
audit_composite_storage_helper.cc
|
||||
audit_rule.cc
|
||||
audit_syslog_storage_helper.cc)
|
||||
audit_syslog_storage_helper.cc
|
||||
preprocessed_audit_rules.cc)
|
||||
target_include_directories(scylla_audit
|
||||
PUBLIC
|
||||
${CMAKE_SOURCE_DIR})
|
||||
|
||||
194
audit/preprocessed_audit_rules.cc
Normal file
194
audit/preprocessed_audit_rules.cc
Normal file
@@ -0,0 +1,194 @@
|
||||
/*
|
||||
* Copyright (C) 2026-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
*/
|
||||
|
||||
#include "audit/preprocessed_audit_rules.hh"
|
||||
#include "audit/audit_rule.hh"
|
||||
|
||||
#include <seastar/coroutine/maybe_yield.hh>
|
||||
|
||||
namespace audit {
|
||||
|
||||
preprocessed_audit_rules::preprocessed_audit_rules(std::vector<audit_rule> rules) noexcept
|
||||
: _rules(std::move(rules))
|
||||
{ }
|
||||
|
||||
future<> preprocessed_audit_rules::refresh_rules(std::vector<audit_rule> rules) {
|
||||
_rules = std::move(rules);
|
||||
_role_to_matching_rules.clear();
|
||||
_table_to_matching_rules.clear();
|
||||
++_cache_generation;
|
||||
co_await rebuild_cache();
|
||||
}
|
||||
|
||||
void preprocessed_audit_rules::add_known_role(const sstring& role) {
|
||||
auto [it, inserted] = _known_roles.insert(role);
|
||||
if (inserted) {
|
||||
++_cache_generation;
|
||||
_role_to_matching_rules[role] = compute_role_bits(_rules, role);
|
||||
}
|
||||
}
|
||||
|
||||
void preprocessed_audit_rules::remove_known_role(const sstring& role) {
|
||||
if (_known_roles.erase(role)) {
|
||||
++_cache_generation;
|
||||
_role_to_matching_rules.erase(role);
|
||||
}
|
||||
}
|
||||
|
||||
void preprocessed_audit_rules::add_known_table(const sstring& keyspace, const sstring& table) {
|
||||
auto [it, inserted] = _known_tables.emplace(keyspace, table);
|
||||
if (inserted) {
|
||||
++_cache_generation;
|
||||
_table_to_matching_rules[known_table{keyspace, table}] = compute_table_bits(_rules, keyspace, table);
|
||||
}
|
||||
}
|
||||
|
||||
void preprocessed_audit_rules::remove_known_table(const sstring& keyspace, const sstring& table) {
|
||||
if (_known_tables.erase(known_table{keyspace, table})) {
|
||||
++_cache_generation;
|
||||
_table_to_matching_rules.erase(known_table{keyspace, table});
|
||||
}
|
||||
}
|
||||
|
||||
preprocessed_audit_rules::rule_bitset
|
||||
preprocessed_audit_rules::compute_role_bits(const std::vector<audit_rule>& rules, const sstring& role) const {
|
||||
rule_bitset bits(rules.size());
|
||||
for (size_t i = 0; i < rules.size(); ++i) {
|
||||
if (matches_role(rules[i], role)) {
|
||||
bits.set(i);
|
||||
}
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
preprocessed_audit_rules::rule_bitset
|
||||
preprocessed_audit_rules::compute_table_bits(const std::vector<audit_rule>& rules, const sstring& keyspace, const sstring& table) const {
|
||||
rule_bitset bits(rules.size());
|
||||
sstring qt = qualified_table_name(keyspace, table);
|
||||
for (size_t i = 0; i < rules.size(); ++i) {
|
||||
if (matches_qualified_table(rules[i], qt)) {
|
||||
bits.set(i);
|
||||
}
|
||||
}
|
||||
return bits;
|
||||
}
|
||||
|
||||
audit_sink_set preprocessed_audit_rules::collect_sinks(const rule_bitset& bits,
|
||||
statement_category category) const {
|
||||
audit_sink_set result;
|
||||
for (auto i = bits.find_first(); i != rule_bitset::npos; i = bits.find_next(i)) {
|
||||
const auto& rule = _rules[i];
|
||||
if (matches_category(rule, category)) {
|
||||
result.add(rule_sinks(rule));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
future<> preprocessed_audit_rules::rebuild_cache() {
|
||||
// Retry loop: if rules, roles, or tables change while we yield during
|
||||
// the compute phase, the generation counter will have advanced and we
|
||||
// discard the stale result and rebuild from the updated snapshot.
|
||||
while (true) {
|
||||
// Snapshot current state to detect concurrent modifications after yielding.
|
||||
// These copies are lightweight (just names, not schema objects) and bounded
|
||||
// by the number of audit rules, roles, and tables — expected to be modest.
|
||||
auto rules = _rules;
|
||||
auto known_roles = _known_roles;
|
||||
auto known_tables = _known_tables;
|
||||
auto generation = _cache_generation;
|
||||
|
||||
std::unordered_map<sstring, rule_bitset, sstring_hash, sstring_eq>
|
||||
role_to_matching_rules;
|
||||
std::unordered_map<known_table, rule_bitset, utils::tuple_hash, std::equal_to<>>
|
||||
table_to_matching_rules;
|
||||
|
||||
// Precompute per-entity rule bitsets. Each iteration invokes fnmatch
|
||||
// for every rule pattern, making this more expensive than the copies above.
|
||||
if (!rules.empty()) {
|
||||
for (const auto& role : known_roles) {
|
||||
role_to_matching_rules[role] = compute_role_bits(rules, role);
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
for (const auto& [ks, tbl] : known_tables) {
|
||||
table_to_matching_rules[known_table{ks, tbl}] = compute_table_bits(rules, ks, tbl);
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
}
|
||||
|
||||
if (generation == _cache_generation) {
|
||||
_role_to_matching_rules = std::move(role_to_matching_rules);
|
||||
_table_to_matching_rules = std::move(table_to_matching_rules);
|
||||
co_return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
future<> preprocessed_audit_rules::replace_known_entities(std::unordered_set<sstring> roles, known_table_set tables) {
|
||||
_known_roles = std::move(roles);
|
||||
_known_tables = std::move(tables);
|
||||
_role_to_matching_rules.clear();
|
||||
_table_to_matching_rules.clear();
|
||||
++_cache_generation;
|
||||
co_await rebuild_cache();
|
||||
}
|
||||
|
||||
audit_sink_set preprocessed_audit_rules::matching_sinks(statement_category category,
|
||||
std::string_view keyspace,
|
||||
std::string_view table,
|
||||
std::string_view role) const {
|
||||
bool table_scoped = is_table_scoped_category(category);
|
||||
|
||||
// Look up role in the precomputed map.
|
||||
auto role_it = _role_to_matching_rules.find(role);
|
||||
if (role_it == _role_to_matching_rules.end()) {
|
||||
// Unknown role — slow path: evaluate all rules with fnmatch.
|
||||
audit_sink_set result;
|
||||
for (const auto& rule : _rules) {
|
||||
if (matches_rule(rule, category, keyspace, table, role)) {
|
||||
result.add(rule_sinks(rule));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
if (!table_scoped || keyspace.empty()) {
|
||||
// Table-independent categories (AUTH, ADMIN, DCL) or operations with
|
||||
// empty keyspace (e.g., batch operations spanning multiple tables):
|
||||
// only role matching matters.
|
||||
return collect_sinks(role_it->second, category);
|
||||
}
|
||||
|
||||
// Table-scoped categories (DML, DDL, QUERY): intersect role and table bitsets.
|
||||
auto table_it = _table_to_matching_rules.find(
|
||||
std::pair<std::string_view, std::string_view>{keyspace, table});
|
||||
if (table_it == _table_to_matching_rules.end()) {
|
||||
// Unknown table — slow path: evaluate all rules with fnmatch.
|
||||
audit_sink_set result;
|
||||
for (const auto& rule : _rules) {
|
||||
if (matches_rule(rule, category, keyspace, table, role)) {
|
||||
result.add(rule_sinks(rule));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Fast path: intersect precomputed bitsets and check category.
|
||||
// Iterates inline instead of using operator& to avoid heap-allocating a temporary dynamic_bitset.
|
||||
audit_sink_set result;
|
||||
const auto& role_bits = role_it->second;
|
||||
const auto& table_bits = table_it->second;
|
||||
for (auto i = role_bits.find_first(); i != rule_bitset::npos; i = role_bits.find_next(i)) {
|
||||
if (table_bits.test(i) && matches_category(_rules[i], category)) {
|
||||
result.add(rule_sinks(_rules[i]));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace audit
|
||||
79
audit/preprocessed_audit_rules.hh
Normal file
79
audit/preprocessed_audit_rules.hh
Normal file
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (C) 2026-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
||||
*/
|
||||
#pragma once
|
||||
|
||||
#include "absl-flat_hash_map.hh"
|
||||
#include "audit/audit_rule.hh"
|
||||
#include "seastarx.hh"
|
||||
#include "utils/hash.hh"
|
||||
#include <boost/dynamic_bitset.hpp>
|
||||
#include <seastar/core/future.hh>
|
||||
#include <seastar/core/sstring.hh>
|
||||
|
||||
#include <string_view>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
namespace audit {
|
||||
|
||||
class preprocessed_audit_rules {
|
||||
public:
|
||||
using known_table = std::pair<sstring, sstring>; // (keyspace, table)
|
||||
using known_table_set = std::unordered_set<known_table, utils::tuple_hash>;
|
||||
using rule_bitset = boost::dynamic_bitset<uint64_t>;
|
||||
|
||||
preprocessed_audit_rules() noexcept = default;
|
||||
explicit preprocessed_audit_rules(std::vector<audit_rule> rules) noexcept;
|
||||
|
||||
future<> refresh_rules(std::vector<audit_rule> rules);
|
||||
|
||||
void add_known_role(const sstring& role);
|
||||
void remove_known_role(const sstring& role);
|
||||
|
||||
void add_known_table(const sstring& keyspace, const sstring& table);
|
||||
void remove_known_table(const sstring& keyspace, const sstring& table);
|
||||
|
||||
/// Replace known roles and tables and rebuild the cache, yielding
|
||||
/// between entities to avoid reactor stalls.
|
||||
future<> replace_known_entities(std::unordered_set<sstring> roles, known_table_set tables);
|
||||
|
||||
audit_sink_set matching_sinks(statement_category category, std::string_view keyspace,
|
||||
std::string_view table, std::string_view role) const;
|
||||
|
||||
const std::vector<audit_rule>& rules() const noexcept { return _rules; }
|
||||
const std::unordered_set<sstring>& known_roles() const noexcept { return _known_roles; }
|
||||
|
||||
private:
|
||||
rule_bitset compute_role_bits(const std::vector<audit_rule>& rules, const sstring& role) const;
|
||||
rule_bitset compute_table_bits(const std::vector<audit_rule>& rules, const sstring& keyspace, const sstring& table) const;
|
||||
|
||||
audit_sink_set collect_sinks(const rule_bitset& bits, statement_category category) const;
|
||||
|
||||
/// Rebuild the cache from snapshots and swap it in if no concurrent
|
||||
/// cache input changed while yielding.
|
||||
future<> rebuild_cache();
|
||||
|
||||
std::vector<audit_rule> _rules;
|
||||
std::unordered_set<sstring> _known_roles;
|
||||
known_table_set _known_tables;
|
||||
size_t _cache_generation = 0;
|
||||
|
||||
/// For each known role, a bitset indicating which rules match that role.
|
||||
/// Uses transparent hash/equal to avoid allocating an sstring on lookup.
|
||||
std::unordered_map<sstring, rule_bitset, sstring_hash, sstring_eq>
|
||||
_role_to_matching_rules;
|
||||
|
||||
/// For each known table, a bitset indicating which rules match that table.
|
||||
/// utils::tuple_hash and std::equal_to<> are transparent, so lookups can
|
||||
/// use a pair<string_view, string_view> without copying into sstring.
|
||||
std::unordered_map<known_table, rule_bitset, utils::tuple_hash, std::equal_to<>>
|
||||
_table_to_matching_rules;
|
||||
};
|
||||
|
||||
} // namespace audit
|
||||
@@ -1312,6 +1312,7 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'audit/audit_cf_storage_helper.cc',
|
||||
'audit/audit_composite_storage_helper.cc',
|
||||
'audit/audit_rule.cc',
|
||||
'audit/preprocessed_audit_rules.cc',
|
||||
'audit/audit_syslog_storage_helper.cc',
|
||||
'tombstone_gc_options.cc',
|
||||
'tombstone_gc.cc',
|
||||
|
||||
Reference in New Issue
Block a user