Files
scylladb/test/boost/alternator_unit_test.cc
Szymon Malewski b75c6c9ef7 alternator: adds expression cache implementation
Every expression in Alternator's requests is parsed from string to adequate structure.
This patch implements a caching structure (input expression strings mapping to parsed 'template' structures), which will be used for handling requests in following commits.
If the reqested expression is valid (parsable) the cache will always return a value - if it is not already in the cache it will be created and stored.
The cache has limited (live configurable) size - when it is reached, the least recently used entry is removed.
The copy of the template in cache is returned - individual instances still need to be resolved (placeholders substituted with names and values).
Invalid requests will have no effect on the cache - the parser throws an exception.
Caching is implemented for all expression types. Internally it is based on helper structure `lru_string_map`.
Basic metrics (total count of hits and misses for each expression type and number of evictions) are implemented.
Metrics are used in boost unit tests.
2025-09-28 04:27:44 +02:00

434 lines
20 KiB
C++

/*
* Copyright (C) 2020-present ScyllaDB
*/
/*
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
*/
#include "test/lib/scylla_test_case.hh"
#include <seastar/util/defer.hh>
#include <seastar/core/memory.hh>
#include "utils/base64.hh"
#include "utils/rjson.hh"
#include "alternator/serialization.hh"
#include "alternator/expressions.hh"
#include <seastar/core/coroutine.hh>
#include <seastar/core/sleep.hh>
static std::map<std::string, std::string> strings {
{"", ""},
{"a", "YQ=="},
{"ab", "YWI="},
{"abc", "YWJj"},
{"abcd", "YWJjZA=="},
{"abcde", "YWJjZGU="},
{"abcdef", "YWJjZGVm"},
{"abcdefg", "YWJjZGVmZw=="},
{"abcdefgh", "YWJjZGVmZ2g="},
};
BOOST_AUTO_TEST_CASE(test_base64_encode_decode) {
for (auto& [str, encoded] : strings) {
BOOST_REQUIRE_EQUAL(base64_encode(to_bytes_view(str)), encoded);
auto decoded = base64_decode(encoded);
BOOST_REQUIRE_EQUAL(to_bytes_view(str), bytes_view(decoded));
}
}
BOOST_AUTO_TEST_CASE(test_base64_decoded_len) {
for (auto& [str, encoded] : strings) {
BOOST_REQUIRE_EQUAL(str.size(), base64_decoded_len(encoded));
}
}
BOOST_AUTO_TEST_CASE(test_base64_begins_with) {
for (auto& [str, encoded] : strings) {
for (size_t i = 0; i < str.size(); ++i) {
std::string prefix(str.c_str(), i);
std::string encoded_prefix = base64_encode(to_bytes_view(prefix));
BOOST_REQUIRE(base64_begins_with(encoded, encoded_prefix));
}
}
std::string str1 = "ABCDEFGHIJKL123456";
std::string str2 = "ABCDEFGHIJKL1234567";
std::string str3 = "ABCDEFGHIJKL12345678";
std::string encoded_str1 = base64_encode(to_bytes_view(str1));
std::string encoded_str2 = base64_encode(to_bytes_view(str2));
std::string encoded_str3 = base64_encode(to_bytes_view(str3));
std::vector<std::string> non_prefixes = {
"B", "AC", "ABD", "ACD", "ABCE", "ABCEG", "ABCDEFGHIJKLM", "ABCDEFGHIJKL123456789"
};
for (auto& non_prefix : non_prefixes) {
std::string encoded_non_prefix = base64_encode(to_bytes_view(non_prefix));
BOOST_REQUIRE(!base64_begins_with(encoded_str1, encoded_non_prefix));
BOOST_REQUIRE(!base64_begins_with(encoded_str2, encoded_non_prefix));
BOOST_REQUIRE(!base64_begins_with(encoded_str3, encoded_non_prefix));
}
}
BOOST_AUTO_TEST_CASE(test_allocator_fail_gracefully) {
// Allocation size is set to a ridiculously high value to ensure
// that it will immediately fail - trying to lazily allocate just
// a little more than total memory may still succeed.
static size_t too_large_alloc_size = memory::stats().total_memory() * 1024 * 1024;
rjson::allocator allocator;
// Impossible allocation should throw
BOOST_REQUIRE_THROW(allocator.Malloc(too_large_alloc_size), rjson::error);
// So should impossible reallocation
void* memory = allocator.Malloc(1);
auto release = defer([memory] { rjson::allocator::Free(memory); });
BOOST_REQUIRE_THROW(allocator.Realloc(memory, 1, too_large_alloc_size), rjson::error);
// Internal rapidjson stack should also throw
// and also be destroyed gracefully later
rapidjson::internal::Stack stack(&allocator, 0);
BOOST_REQUIRE_THROW(stack.Push<char>(too_large_alloc_size), rjson::error);
}
// Test the alternator::internal::magnitude_and_precision() function which we
// use to used to check if a number exceeds DynamoDB's limits on magnitude and
// precision (for issue #6794). This just tests the internal implementation -
// we also have end-to-end tests trying to insert various numbers with bad
// magnitude and precision to the database in test/alternator/test_number.py.
BOOST_AUTO_TEST_CASE(test_magnitude_and_precision) {
struct expected {
const char* number;
int magnitude;
int precision;
};
std::vector<expected> tests = {
// number magnitude, precision
{"0", 0, 0},
{"0e10", 0, 0},
{"0e-10", 0, 0},
{"0e+10", 0, 0},
{"0.0", 0, 0},
{"0.00e10", 0, 0},
{"1", 0, 1},
{"12.", 1, 2},
{"1.1", 0, 2},
{"12.3", 1, 3},
{"12.300", 1, 3},
{"0.3", -1, 1},
{".3", -1, 1},
{"3e-1", -1, 1},
{"0.00012", -4, 2},
{"1.2e-4", -4, 2},
{"1.2E-4", -4, 2},
{"12.345e50", 51, 5},
{"12.345e-50",-49, 5},
{"123000000", 8, 3},
{"123000000.000e+5", 13, 3},
{"10.01", 1, 4},
{"1.001e1", 1, 4},
{"1e5", 5, 1},
{"1e+5", 5, 1},
{"1e-5", -5, 1},
{"123e-7", -5, 3},
// These are important edge cases: DynamoDB considers 1e126 to be
// overflowing but 9.9999e125 is considered to have magnitude 125
// and ok. Conversely, 1e-131 is underflowing and 0.9e-130 is too.
{"9.99999e125", 125, 6},
{"0.99999e-130", -131, 5},
{"0.9e-130", -131, 1},
// Although 1e1000 is not allowed, 0e0000 is allowed - it's just 0.
{"0e1000", 0, 0},
};
// prefixes that should do nothing to a number
std::vector<std::string> prefixes = {
"",
"0",
"+",
"-",
"+0000",
"-0000"
};
for (expected test : tests) {
for (std::string prefix : prefixes) {
std::string number = prefix + test.number;
auto res = alternator::internal::get_magnitude_and_precision(number);
BOOST_CHECK_MESSAGE(res.magnitude == test.magnitude,
seastar::format("{}: expected magnitude {}, got {}", number, test.magnitude, res.magnitude));
BOOST_CHECK_MESSAGE(res.precision == test.precision,
seastar::format("{}: expected precision {}, got {}", number, test.precision, res.precision));
}
}
// Huge exponents like 1e1000000 are not guaranteed to return that
// specific number as magnitude, but is guaranteed to return some
// other high magnitude that the caller can complain is excessive.
auto res = alternator::internal::get_magnitude_and_precision("1e1000000");
BOOST_CHECK(res.magnitude > 1000);
res = alternator::internal::get_magnitude_and_precision("1e-1000000");
BOOST_CHECK(res.magnitude < -1000);
// Even if an exponent so huge that it doesn't even fit in a 32-bit
// integer, we shouldn't fail to recognize its excessive magnitude:
res = alternator::internal::get_magnitude_and_precision("1e1000000000000");
BOOST_CHECK(res.magnitude > 1000);
res = alternator::internal::get_magnitude_and_precision("1e-1000000000000");
BOOST_CHECK(res.magnitude < -1000);
}
// parsed expression cache tests:
// ANTLR3 leaks memory when it tries to recover from missing token.
// - it creates a "fake" token, if it allows to continue parsing.
// Leak was reported by ASAN, when running this test in debug mode -
// the test passed but the leak is discovered when the test file exits.
// Reproduces #25878
BOOST_AUTO_TEST_CASE(missing_tokens_memory_leak) {
BOOST_REQUIRE_THROW(alternator::parse_update_expression("SET a :v"), alternator::expressions_syntax_error); // missing '='
BOOST_REQUIRE_THROW(alternator::parse_update_expression("DELETE a v"), alternator::expressions_syntax_error); // missing ':'
BOOST_REQUIRE_THROW(alternator::parse_update_expression("ADD a v"), alternator::expressions_syntax_error); // missing ':'
BOOST_REQUIRE_THROW(alternator::parse_condition_expression("size(a < 5", "Test"), alternator::expressions_syntax_error); // missing ')'
BOOST_REQUIRE_THROW(alternator::parse_condition_expression("a IN :x)", "Test"), alternator::expressions_syntax_error); // missing '('
BOOST_REQUIRE_THROW(alternator::parse_condition_expression("a IN (:x", "Test"), alternator::expressions_syntax_error); // missing ')'
BOOST_REQUIRE_THROW(alternator::parse_condition_expression("a BETWEEN :x AN :y", "Test"), alternator::expressions_syntax_error); // missing 'AND'
BOOST_REQUIRE_THROW(alternator::parse_condition_expression("a BETWEEN :x :y", "Test"), alternator::expressions_syntax_error); // missing 'AND'
BOOST_REQUIRE_THROW(alternator::parse_projection_expression("a[0.b"), alternator::expressions_syntax_error); // missing ']'
}
// Tests of inputs that cause exceptions inside the expression parser.
// ANTR3 itself doesn't use exceptions, but we do in additional checks.
// Apart from correct response, which may be tested in Python tests,
// main concern here is if this can cause memory leaks
// similar to issue in the above test.
BOOST_AUTO_TEST_CASE(exception_at_expression_parsing) {
// std::stoi throws std::out_of_range if the number is too big
BOOST_REQUIRE_THROW(alternator::parse_projection_expression("a[99999999999999999]") , alternator::expressions_syntax_error);
// Path depth limit exceeded should throw expressions_syntax_error
// alternator::parsed::path::depth_limit is private, so try with some arbitrary long path:
std::string long_path = "a";
for (int i = 0; i < 100; ++i) {
long_path += ".a";
}
BOOST_REQUIRE_THROW(alternator::parse_projection_expression(long_path), alternator::expressions_syntax_error);
// Appending duplicate update actions throws expressions_syntax_error
BOOST_REQUIRE_THROW(alternator::parse_update_expression("SET a = :v SET b = :w"), alternator::expressions_syntax_error);
// Single non-function condition throws expressions_syntax_error
BOOST_REQUIRE_THROW(alternator::parse_condition_expression("a OR b", "TEST"), alternator::expressions_syntax_error);
}
using exp_type = alternator::stats::expression_types;
static int exp_type_i(exp_type type) {
if (static_cast<int>(type) >= exp_type::NUM_EXPRESSION_TYPES)
BOOST_FAIL("Invalid expression type");
return static_cast<int>(type);
}
static std::string_view str(exp_type type) {
constexpr static std::string_view exp_type_s[exp_type::NUM_EXPRESSION_TYPES] = { "projection", "update", "condition" };
return exp_type_s[exp_type_i(type)];
};
static uint64_t& hits_counter(alternator::stats& stats, exp_type type) {
return stats.expression_cache.requests[exp_type_i(type)].hits;
}
static uint64_t& misses_counter(alternator::stats& stats, exp_type type) {
return stats.expression_cache.requests[exp_type_i(type)].misses;
}
enum class expecting_exception { yes, no };
static expecting_exception hit(alternator::stats& stats, exp_type type) {
hits_counter(stats, type)++;
return expecting_exception::no;
}
static expecting_exception miss(alternator::stats& stats, exp_type type) {
misses_counter(stats, type)++;
return expecting_exception::no;
}
static expecting_exception eviction_miss(alternator::stats& stats, exp_type type) {
stats.expression_cache.evictions++;
return miss(stats, type);
}
static expecting_exception invalid(alternator::stats& stats, exp_type type) {
return expecting_exception::yes;
}
struct test_cache {
alternator::stats stats;
alternator::stats expected_stats;
utils::updateable_value_source<uint32_t> max_cache_entries;
std::unique_ptr<alternator::parsed::expression_cache> cache;
test_cache(int size) : max_cache_entries(size), cache(std::make_unique<alternator::parsed::expression_cache>(alternator::parsed::expression_cache::config{
.max_cache_entries = utils::updateable_value<uint32_t>(max_cache_entries)
}, stats)) {}
std::string validate_stats(const std::string& msg) {
for (int t = 0; t < exp_type::NUM_EXPRESSION_TYPES; t++) {
exp_type type = static_cast<exp_type>(t);
if(hits_counter(stats, type) != hits_counter(expected_stats, type)) {
return format("{}: expected {} {} hits, got {}", msg, hits_counter(expected_stats, type), str(type), hits_counter(stats, type));
}
if(misses_counter(stats, type) != misses_counter(expected_stats, type)) {
return format("{}: expected {} {} misses, got {}", msg, misses_counter(expected_stats, type), str(type), misses_counter(stats, type));
}
}
if(stats.expression_cache.evictions != expected_stats.expression_cache.evictions) {
return format("{}: expected {} evictions, got {}", msg, expected_stats.expression_cache.evictions, stats.expression_cache.evictions);
}
return std::string();
}
void check_stats(const std::string& msg) {
std::string v = validate_stats(msg);
BOOST_REQUIRE_MESSAGE(v.empty(), v);
}
seastar::future<> wait_check_stats(const std::string& msg) {
for (int attempt = 0; attempt < 100; attempt++) {
std::string v = validate_stats(msg);
if (v.empty()) {
co_return;
}
co_await seastar::sleep(std::chrono::milliseconds(10));
}
check_stats(msg); // Final check after all attempts
}
void try_parse(const std::string& expr, exp_type type, expecting_exception (*expected_cache_behavior)(alternator::stats&, exp_type)) {
try {
switch (type) {
case exp_type::PROJECTION_EXPRESSION:
(void)(cache->parse_projection_expression(expr));
break;
case exp_type::UPDATE_EXPRESSION:
(void)(cache->parse_update_expression(expr));
break;
case exp_type::CONDITION_EXPRESSION:
(void)(cache->parse_condition_expression(expr, "Test"));
break;
default:
BOOST_FAIL("Invalid expression type");
}
if (expected_cache_behavior(expected_stats, type) == expecting_exception::yes) {
BOOST_FAIL(format("Expected exception for {} expression: {}, but none was thrown.", str(type), expr));
}
} catch (const alternator::expressions_syntax_error& ex) {
if (expected_cache_behavior(expected_stats, type) == expecting_exception::no) {
BOOST_FAIL(format("Unexpected syntax exception for {} expression: {}, {}", str(type), expr, ex.what()));
}
} catch (const std::exception& ex) {
BOOST_FAIL(format("Unexpected exception for {} expression: {}, {}", str(type), expr, ex.what()));
}
check_stats(format("after parsing {} expression: {}", str(type), expr));
}
};
// Basic cache functionality test: hits, misses, evictions.
SEASTAR_TEST_CASE(test_parsed_expression_cache) {
test_cache cache(3);
// New entries
cache.try_parse("a", exp_type::PROJECTION_EXPRESSION, miss);
cache.try_parse("a", exp_type::PROJECTION_EXPRESSION, hit);
cache.try_parse("SET a=:v", exp_type::UPDATE_EXPRESSION, miss);
cache.try_parse("SET a=:v", exp_type::UPDATE_EXPRESSION, hit);
cache.try_parse("a=:v", exp_type::CONDITION_EXPRESSION, miss);
cache.try_parse("a=:v", exp_type::CONDITION_EXPRESSION, hit);
// Cache full - evicting old entrires
cache.try_parse("b", exp_type::PROJECTION_EXPRESSION, eviction_miss);
cache.try_parse("b", exp_type::PROJECTION_EXPRESSION, hit);
cache.try_parse("SET b=:v", exp_type::UPDATE_EXPRESSION, eviction_miss);
cache.try_parse("SET b=:v", exp_type::UPDATE_EXPRESSION, hit);
cache.try_parse("b=:v", exp_type::CONDITION_EXPRESSION, eviction_miss);
cache.try_parse("b=:v", exp_type::CONDITION_EXPRESSION, hit);
// Keys existing in cache, but invalid (for a given type) - raise exception
cache.try_parse("b", exp_type::UPDATE_EXPRESSION, invalid);
cache.try_parse("b", exp_type::CONDITION_EXPRESSION, invalid);
cache.try_parse("SET b=:v", exp_type::PROJECTION_EXPRESSION, invalid);
cache.try_parse("SET b=:v", exp_type::CONDITION_EXPRESSION, invalid);
cache.try_parse("b=:v", exp_type::PROJECTION_EXPRESSION, invalid);
cache.try_parse("b=:v", exp_type::UPDATE_EXPRESSION, invalid);
// Invalid expressions should not affect cache state
cache.try_parse("b", exp_type::PROJECTION_EXPRESSION, hit);
cache.try_parse("SET b=:v", exp_type::UPDATE_EXPRESSION, hit);
cache.try_parse("b=:v", exp_type::CONDITION_EXPRESSION, hit);
co_return;
}
// Test that same strings can't be parsed to different expression types.
SEASTAR_TEST_CASE(test_parsed_expression_cache_invalid_requests) {
test_cache cache(2000);
auto inv_expr = {"", " ", "SET", "set", ":v", "1"};
for (auto expr : inv_expr) {
cache.try_parse(expr, exp_type::PROJECTION_EXPRESSION, invalid);
cache.try_parse(expr, exp_type::UPDATE_EXPRESSION, invalid);
cache.try_parse(expr, exp_type::CONDITION_EXPRESSION, invalid);
}
auto projection = {"a", "a, b", "a.b", "a.#b", "#a[1]", "a[1].b"};
for (auto expr : projection) {
cache.try_parse(expr, exp_type::UPDATE_EXPRESSION, invalid);
cache.try_parse(expr, exp_type::CONDITION_EXPRESSION, invalid);
cache.try_parse(expr, exp_type::PROJECTION_EXPRESSION, miss);
}
auto condition = {"a=:v", "size(a)", "a IN (:v)", "a > :v", "a = :v AND b = :w", "a = :v OR b = :w", "NOT a = :v", "(a = :v)"};
for (auto expr : condition) {
cache.try_parse(expr, exp_type::PROJECTION_EXPRESSION, invalid);
cache.try_parse(expr, exp_type::UPDATE_EXPRESSION, invalid);
cache.try_parse(expr, exp_type::CONDITION_EXPRESSION, miss);
}
auto update = {"SET a=:v", "SET a=:v, b = :1", "ADD a[1] :v", "REMOVE a[1]", "DELETE a :v", "DELETE a :v, b :w REMOVE c", "SET a=:v REMOVE b ADD c :w"};
for (auto expr : update) {
cache.try_parse(expr, exp_type::PROJECTION_EXPRESSION, invalid);
cache.try_parse(expr, exp_type::CONDITION_EXPRESSION, invalid);
cache.try_parse(expr, exp_type::UPDATE_EXPRESSION, miss);
}
co_return;
}
// Test resizing the cache at runtime.
SEASTAR_TEST_CASE(test_parsed_expression_cache_resize) {
test_cache cache(3);
cache.try_parse("a", exp_type::PROJECTION_EXPRESSION, miss);
cache.try_parse("b", exp_type::PROJECTION_EXPRESSION, miss);
cache.try_parse("c", exp_type::PROJECTION_EXPRESSION, miss);
cache.try_parse("d", exp_type::PROJECTION_EXPRESSION, eviction_miss);
cache.max_cache_entries.set(4);
cache.try_parse("e", exp_type::PROJECTION_EXPRESSION, miss);
cache.max_cache_entries.set(2);
cache.expected_stats.expression_cache.evictions += 2;
cache.check_stats("after resizing cache to 2 entries");
cache.max_cache_entries.set(0);
cache.expected_stats.expression_cache.evictions += 2;
cache.check_stats("after disabling cache");
// for resizes down with more then 3000 evictions the change may be asynchronous
size_t large_size = 30000;
size_t first_reduce = 75*large_size/100;
cache.max_cache_entries.set(large_size);
for (size_t i = 0; i < large_size; i++) {
cache.try_parse(seastar::format("expr{}", i), exp_type::PROJECTION_EXPRESSION, miss);
co_await maybe_yield();
}
cache.max_cache_entries.set(first_reduce);
cache.expected_stats.expression_cache.evictions += (large_size - first_reduce);
co_await cache.wait_check_stats("async, after resizing cache");
for (size_t i = 0; i < first_reduce; i++) {
cache.try_parse(seastar::format("expr{}", i), exp_type::PROJECTION_EXPRESSION, eviction_miss);
co_await maybe_yield();
}
cache.max_cache_entries.set(0);
cache.expected_stats.expression_cache.evictions += first_reduce;
co_await cache.wait_check_stats("async, after disabling cache");
cache.max_cache_entries.set(large_size);
for (size_t i = 0; i < large_size; i++) {
cache.try_parse(seastar::format("expr{}", i), exp_type::PROJECTION_EXPRESSION, miss);
co_await maybe_yield();
}
cache.max_cache_entries.set(1000);
co_await cache.cache->stop();
cache.cache.reset();
co_return;
}