From 32cfa778f74feb4d860145144aeba0a97e8a9c7d Mon Sep 17 00:00:00 2001 From: Andrzej Jackowski Date: Mon, 23 Mar 2026 17:33:41 +0100 Subject: [PATCH] audit: define audit_rule type with parsing and validation Audit rules provide more granular control over which statements are audited, filtering by tables, roles, and categories. Typos in sink or category names should be caught at parse time rather than silently disabling rules at runtime. Define the audit_rule struct with JSON parsing, validation of sink and category names, serialization, and fmt support. Move statement_category, category_set, and category_to_string out of audit.hh/audit.cc so the rule type is self-contained. Refs SCYLLADB-1430 --- audit/CMakeLists.txt | 1 + audit/audit.cc | 13 ---- audit/audit.hh | 11 ---- audit/audit_rule.cc | 151 +++++++++++++++++++++++++++++++++++++++++++ audit/audit_rule.hh | 78 ++++++++++++++++++++++ configure.py | 1 + 6 files changed, 231 insertions(+), 24 deletions(-) create mode 100644 audit/audit_rule.cc create mode 100644 audit/audit_rule.hh diff --git a/audit/CMakeLists.txt b/audit/CMakeLists.txt index 96df108a4d..d6001676f0 100644 --- a/audit/CMakeLists.txt +++ b/audit/CMakeLists.txt @@ -6,6 +6,7 @@ target_sources(scylla_audit audit.cc audit_cf_storage_helper.cc audit_composite_storage_helper.cc + audit_rule.cc audit_syslog_storage_helper.cc) target_include_directories(scylla_audit PUBLIC diff --git a/audit/audit.cc b/audit/audit.cc index 25c1b6890e..b20fac1a8e 100644 --- a/audit/audit.cc +++ b/audit/audit.cc @@ -69,19 +69,6 @@ static std::unique_ptr create_storage_helper(const std::set(std::move(helpers)); } -static sstring category_to_string(statement_category category) -{ - switch (category) { - case statement_category::QUERY: return "QUERY"; - case statement_category::DML: return "DML"; - case statement_category::DDL: return "DDL"; - case statement_category::DCL: return "DCL"; - case statement_category::AUTH: return "AUTH"; - case statement_category::ADMIN: return "ADMIN"; - } - return ""; -} - sstring audit_info::category_string() const { return category_to_string(_category); } diff --git a/audit/audit.hh b/audit/audit.hh index b5833c8189..6e17599576 100644 --- a/audit/audit.hh +++ b/audit/audit.hh @@ -60,17 +60,6 @@ public: } }; -enum class statement_category { - QUERY, DML, DDL, DCL, AUTH, ADMIN -}; - -using category_set = enum_set>; - // Holds the audit metadata for a single request: the operation category, // target keyspace/table, and the query string to be logged. class audit_info { diff --git a/audit/audit_rule.cc b/audit/audit_rule.cc new file mode 100644 index 0000000000..c637aaf547 --- /dev/null +++ b/audit/audit_rule.cc @@ -0,0 +1,151 @@ +/* + * Copyright (C) 2026-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 + */ + +#include "audit/audit_rule.hh" +#include "audit/audit.hh" +#include "utils/rjson.hh" + +#include + +namespace audit { + +sstring category_to_string(statement_category category) { + switch (category) { + case statement_category::QUERY: return "QUERY"; + case statement_category::DML: return "DML"; + case statement_category::DDL: return "DDL"; + case statement_category::DCL: return "DCL"; + case statement_category::AUTH: return "AUTH"; + case statement_category::ADMIN: return "ADMIN"; + } + return ""; +} + +static statement_category string_to_category(std::string_view s) { + if (s == "QUERY") return statement_category::QUERY; + if (s == "DML") return statement_category::DML; + if (s == "DDL") return statement_category::DDL; + if (s == "DCL") return statement_category::DCL; + if (s == "AUTH") return statement_category::AUTH; + if (s == "ADMIN") return statement_category::ADMIN; + throw audit_exception(fmt::format( + "Bad configuration: invalid category '{}' in audit rule", s)); +} + +namespace { + +rjson::value string_vec_to_json(const std::vector& vec) { + rjson::value arr = rjson::empty_array(); + for (const auto& s : vec) { + rjson::push_back(arr, rjson::from_string(s)); + } + return arr; +} + +std::vector json_array_to_string_vec(const rjson::value& arr, const sstring& field_name) { + if (!arr.IsArray()) { + throw audit_exception(fmt::format( + "Bad configuration: '{}' must be a JSON array", field_name)); + } + std::vector result; + for (const auto& elem : arr.GetArray()) { + if (!elem.IsString()) { + throw audit_exception(fmt::format( + "Bad configuration: '{}' array elements must be strings", field_name)); + } + result.emplace_back(rjson::to_string_view(elem)); + } + return result; +} + +} // anonymous namespace + +category_set parse_categories(const std::vector& categories) { + category_set result; + for (const auto& cat : categories) { + result.set(string_to_category(cat)); + } + return result; +} + +void validate_audit_rule(const audit_rule& rule) { + // Sinks: must be non-empty, each must be "table" or "syslog" + if (rule.sinks.empty()) { + throw audit_exception("Bad configuration: 'sinks' must be non-empty in audit rule"); + } + for (const auto& sink : rule.sinks) { + if (sink != "table" && sink != "syslog") { + throw audit_exception(fmt::format( + "Bad configuration: invalid sink '{}' in audit rule (must be 'table' or 'syslog')", sink)); + } + } +} + +std::vector parse_audit_rules_from_json(const sstring& json_str) { + if (json_str.empty()) { + return {}; + } + + rjson::value parsed; + try { + parsed = rjson::parse(json_str); + } catch (const rjson::error& e) { + throw audit_exception(fmt::format( + "Bad configuration: failed to parse audit_rules JSON: {}", e.what())); + } + + if (!parsed.IsArray()) { + throw audit_exception("Bad configuration: audit_rules must be a JSON array"); + } + + std::vector rules; + for (const auto& elem : parsed.GetArray()) { + if (!elem.IsObject()) { + throw audit_exception("Bad configuration: each audit rule must be a JSON object"); + } + + for (const auto& field : audit_rule_required_fields) { + if (!rjson::find(elem, field)) { + throw audit_exception(fmt::format( + "Bad configuration: audit rule missing required field '{}'", field)); + } + } + + audit_rule rule; + rule.sinks = json_array_to_string_vec(*rjson::find(elem, "sinks"), "sinks"); + rule.categories = parse_categories(json_array_to_string_vec(*rjson::find(elem, "categories"), "categories")); + rule.qualified_table_names = json_array_to_string_vec(*rjson::find(elem, "qualified_table_names"), "qualified_table_names"); + rule.roles = json_array_to_string_vec(*rjson::find(elem, "roles"), "roles"); + + validate_audit_rule(rule); + rules.push_back(std::move(rule)); + } + + return rules; +} + +sstring audit_rules_to_json_string(const std::vector& rules) { + rjson::value arr = rjson::empty_array(); + + for (const auto& rule : rules) { + rjson::value obj = rjson::empty_object(); + rjson::add_with_string_name(obj, "sinks", string_vec_to_json(rule.sinks)); + rjson::value cat_arr = rjson::empty_array(); + for (auto cat : rule.categories) { + rjson::push_back(cat_arr, rjson::from_string(category_to_string(cat))); + } + rjson::add_with_string_name(obj, "categories", std::move(cat_arr)); + rjson::add_with_string_name(obj, "qualified_table_names", string_vec_to_json(rule.qualified_table_names)); + rjson::add_with_string_name(obj, "roles", string_vec_to_json(rule.roles)); + rjson::push_back(arr, std::move(obj)); + } + + return rjson::print(arr); +} + +} // namespace audit diff --git a/audit/audit_rule.hh b/audit/audit_rule.hh new file mode 100644 index 0000000000..cf6e56aba6 --- /dev/null +++ b/audit/audit_rule.hh @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2026-present ScyllaDB + */ + +/* + * SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1 + */ +#pragma once + +#include "seastarx.hh" +#include "enum_set.hh" +#include + +#include +#include +#include +#include +#include + +namespace audit { + +enum class statement_category { + QUERY, DML, DDL, DCL, AUTH, ADMIN +}; + +using category_set = enum_set>; + +sstring category_to_string(statement_category category); + +/// Required field names for an audit rule (used by both JSON and YAML parsers). +inline constexpr std::array audit_rule_required_fields = { + "sinks", "categories", "qualified_table_names", "roles" +}; + +struct audit_rule { + std::vector sinks; + category_set categories; + std::vector qualified_table_names; + std::vector roles; + + bool operator==(const audit_rule& other) const { + return sinks == other.sinks + && categories.mask() == other.categories.mask() + && qualified_table_names == other.qualified_table_names + && roles == other.roles; + } +}; + +std::vector parse_audit_rules_from_json(const sstring& json_str); + +sstring audit_rules_to_json_string(const std::vector& rules); + +category_set parse_categories(const std::vector& categories); + +void validate_audit_rule(const audit_rule& rule); + +} // namespace audit + +template<> +struct fmt::formatter { + constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); } + auto format(const audit::audit_rule& rule, fmt::format_context& ctx) const { + auto out = fmt::format_to(ctx.out(), "audit_rule{{sinks=[{}], categories=[", fmt::join(rule.sinks, ",")); + bool first = true; + for (auto cat : rule.categories) { + if (!first) { out = fmt::format_to(out, ","); } + out = fmt::format_to(out, "{}", audit::category_to_string(cat)); + first = false; + } + return fmt::format_to(out, "], qualified_table_names=[{}], roles=[{}]}}", + fmt::join(rule.qualified_table_names, ","), fmt::join(rule.roles, ",")); + } +}; diff --git a/configure.py b/configure.py index 7f759b812d..4f6dde9bd8 100755 --- a/configure.py +++ b/configure.py @@ -1310,6 +1310,7 @@ scylla_core = (['message/messaging_service.cc', 'audit/audit.cc', 'audit/audit_cf_storage_helper.cc', 'audit/audit_composite_storage_helper.cc', + 'audit/audit_rule.cc', 'audit/audit_syslog_storage_helper.cc', 'tombstone_gc_options.cc', 'tombstone_gc.cc',