To reduce test executable size and speed up compilation time, compile unit tests into a single executable. Here is a file size comparison of the unit test executable: - Before applying the patch $ du -h --exclude='*.o' --exclude='*.o.d' build/release/test/boost/ build/debug/test/boost/ 11G build/release/test/boost/ 29G build/debug/test/boost/ - After applying the patch du -h --exclude='*.o' --exclude='*.o.d' build/release/test/boost/ build/debug/test/boost/ 5.5G build/release/test/boost/ 19G build/debug/test/boost/ It reduces executable sizes 5.5GB on release, and 10GB on debug. Closes #9155 Closes scylladb/scylladb#21443
224 lines
12 KiB
C++
224 lines
12 KiB
C++
/*
|
|
* Copyright (C) 2023-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
*/
|
|
|
|
#include <fmt/ranges.h>
|
|
#undef SEASTAR_TESTING_MAIN
|
|
#include <seastar/testing/test_case.hh>
|
|
#include "test/lib/cql_test_env.hh"
|
|
#include "test/lib/log.hh"
|
|
#include "db/config.hh"
|
|
|
|
BOOST_AUTO_TEST_SUITE(cache_algorithm_test)
|
|
|
|
// These tests are slow, and tuned to a particular amount of memory
|
|
// (and --memory is ignored in debug mode).
|
|
// Hence they are not run in debug.
|
|
#ifndef SEASTAR_DEBUG
|
|
|
|
// The problem with naive index caching is that every uncached read drags a full
|
|
// index page into the cache. But the index page can be orders of magnitude bigger
|
|
// than the ingested row. Depending on the workload, this can effectively bloat the
|
|
// memory usage of every cached row by orders of magnitude, and ruin cache
|
|
// effectiveness.
|
|
//
|
|
// This test checks for the above problem.
|
|
//
|
|
// The table created by the test has the following properties:
|
|
// - There is only one column in the schema -- the partition key --
|
|
// because this results in the biggest `index page size`:`row size` ratio,
|
|
// due to details of the SSTable format.
|
|
// This makes the effects drastic, and so easy to test.
|
|
// - The total size of all keys is at least 2 times greater than the size of RAM.
|
|
// This ensures that most of the index is uncached. This is necessary for
|
|
// the issue to become a visible.
|
|
// - The size of user data (1000 per partition) is significantly bigger
|
|
// than several hundred bytes, to make various constant overheads
|
|
// (per-cell, per-row, per-partition) smaller than the size of user data.
|
|
// This simplifies reasoning about the test.
|
|
// In particular, it should ensure that each index page contains about 2000 keys,
|
|
// so it has size about 2 MiB.
|
|
//
|
|
// After populating this table, the test reads (sequentially) a subset of 1000 rows
|
|
// multiple times. Since the total size of this hot subset (including overheads) is
|
|
// only about 1 MiB, the test expects it to be perfectly cached.
|
|
// This should be true unless index cache is flooding the cache.
|
|
SEASTAR_TEST_CASE(test_index_doesnt_flood_cache_in_small_partition_workload) {
|
|
cql_test_config cfg;
|
|
// Prevents an unnecessary sleep at the end of the test.
|
|
cfg.db_config->task_ttl_seconds.set(0);
|
|
// Scylla refreshes query permissions periodically.
|
|
// This causes background queries to system_auth.roles, which in turn can cause
|
|
// spurious cache misses to appear in cache_tracker stats, which breaks
|
|
// the assumptions of the test.
|
|
// The lines below effectively disable the permission refresh to get rid of that problem.
|
|
cfg.db_config->permissions_validity_in_ms.set(uint32_t(-1));
|
|
cfg.db_config->permissions_update_interval_in_ms.set(uint32_t(-1));
|
|
// As of this writing, uncommenting the below should make the test fail.
|
|
// cfg.db_config->index_cache_fraction.set(1.0);
|
|
return do_with_cql_env_thread([] (cql_test_env& e) {
|
|
// We disable compactions because they cause confusing cache mispopulations.
|
|
e.execute_cql("CREATE TABLE ks.t(pk blob PRIMARY KEY) WITH compaction = { 'class' : 'NullCompactionStrategy' };").get();
|
|
auto insert_query = e.prepare("INSERT INTO ks.t(pk) VALUES (?)").get();
|
|
auto select_query = e.prepare("SELECT * FROM t WHERE pk = ?").get();
|
|
|
|
constexpr uint64_t pk_number = 600000;
|
|
constexpr uint64_t pk_size = 1000;
|
|
// Sanity check. The test assumes that the total index size is significantly bigger than RAM.
|
|
BOOST_REQUIRE_GT(pk_size * pk_number, 2 * seastar::memory::stats().total_memory());
|
|
|
|
// A bijection between uint64_t and blobs of size pk_size.
|
|
auto make_key = [pk_size] (uint64_t x) {
|
|
bytes b(std::max(pk_size, sizeof(x)), '\0');
|
|
auto i = b.begin();
|
|
write<uint64_t>(i, x);
|
|
return b;
|
|
};
|
|
|
|
// Populate the table.
|
|
for (size_t i = 0; i < pk_number; ++i) {
|
|
e.execute_prepared(insert_query, {{cql3::raw_value::make_value(make_key(i))}}).get();
|
|
}
|
|
// Flushing makes reasoning easier.
|
|
e.db().invoke_on_all(&replica::database::flush_all_memtables).get();
|
|
|
|
int retries = 0;
|
|
constexpr int max_retries = 100;
|
|
retry:
|
|
constexpr uint64_t hot_subset_size = 1000;
|
|
// Sanity check. The test assumes that the total *hot* index size is significantly bigger than RAM.
|
|
//
|
|
// data_summary_ratio is the target `data file size : summary file size` ratio.
|
|
// In a table containing only primary keys, the approximate size of an index page is `pk_size * data_summary_ratio`.
|
|
//
|
|
// The sanity check here is that the maximum total size of the touched index pages is much greater than RAM.
|
|
// (Maximum is reached when each hot row lands on a different index page.)
|
|
const uint64_t data_summary_ratio = static_cast<uint64_t>(1 / e.local_db().get_config().sstable_summary_ratio());
|
|
BOOST_REQUIRE_GT(hot_subset_size * pk_size * data_summary_ratio, 2 * seastar::memory::stats().total_memory());
|
|
|
|
auto get_misses = [&e] { return e.local_db().row_cache_tracker().get_stats().partition_misses; };
|
|
uint64_t misses_before = get_misses();
|
|
for (size_t i = 0; i < hot_subset_size; ++i) {
|
|
e.execute_prepared(select_query, {{cql3::raw_value::make_value(make_key(hot_subset_size * retries + i))}}).get();
|
|
}
|
|
|
|
misses_before = get_misses();
|
|
uint64_t reads_before = e.local_db().row_cache_tracker().get_stats().reads;
|
|
uint64_t reads_expected = reads_before;
|
|
|
|
// The rows we just read have a small total size. They should be perfectly cached.
|
|
for (size_t repeat = 0; repeat < 3; ++repeat) {
|
|
for (size_t i = 0; i < hot_subset_size; ++i) {
|
|
e.execute_prepared(select_query, {{cql3::raw_value::make_value(make_key(hot_subset_size * retries + i))}}).get();
|
|
++reads_expected;
|
|
// If the rows were perfectly cached, there were no new misses.
|
|
uint64_t reads_after = e.local_db().row_cache_tracker().get_stats().reads_done;
|
|
uint64_t misses_after = get_misses();
|
|
if (get_misses() != misses_before) {
|
|
// Cache misses are allowed only if they were done by something outside of the test.
|
|
BOOST_REQUIRE_GT(reads_after, reads_expected);
|
|
if (retries < max_retries) {
|
|
++retries;
|
|
testlog.warn("Detected extra cache misses (actual={}, expected={}, repeat={}, i={}), but they can be explained by extra reads (after={}, before={}, expected={}) done by something in the background, so retrying. (retries={})", misses_after, misses_before, repeat, i, reads_after, reads_before, reads_expected, retries);
|
|
goto retry;
|
|
} else {
|
|
BOOST_FAIL("Test failed due to too much background noise");
|
|
}
|
|
}
|
|
BOOST_REQUIRE_EQUAL(get_misses(), misses_before);
|
|
}
|
|
}
|
|
}, std::move(cfg));
|
|
}
|
|
|
|
// The previous test checks that index_cache_fraction doesn't allow index cache to
|
|
// flood the memory.
|
|
//
|
|
// This test checks that it doesn't completely kill caching.
|
|
SEASTAR_TEST_CASE(test_index_is_cached_in_big_partition_workload) {
|
|
cql_test_config cfg;
|
|
// Prevents an unnecessary sleep at the end of the test.
|
|
cfg.db_config->task_ttl_seconds.set(0);
|
|
// Scylla refreshes query permissions periodically.
|
|
// This causes background queries to system_auth.roles, which in turn can cause
|
|
// spurious cache misses to appear in cache_tracker stats, which breaks
|
|
// the assumptions of the test.
|
|
// The lines below effectively disable the permission refresh to get rid of that problem.
|
|
cfg.db_config->permissions_validity_in_ms.set(uint32_t(-1));
|
|
cfg.db_config->permissions_update_interval_in_ms.set(uint32_t(-1));
|
|
// As of this writing, uncommenting the below should make the test fail.
|
|
// cfg.db_config->index_cache_fraction.set(0.0);
|
|
return do_with_cql_env_thread([] (cql_test_env& e) {
|
|
// We disable compactions because they cause confusing cache mispopulations.
|
|
e.execute_cql("CREATE TABLE ks.t(pk bigint, ck bigint, v blob, primary key (pk, ck)) WITH compaction = { 'class' : 'NullCompactionStrategy' };").get();
|
|
auto insert_query = e.prepare("INSERT INTO ks.t(pk, ck, v) VALUES (?, ?, ?)").get();
|
|
auto select_query = e.prepare("SELECT * FROM t WHERE pk = ? AND ck = ?").get();
|
|
|
|
constexpr uint64_t pk_number = 10;
|
|
constexpr uint64_t ck_number = 600;
|
|
constexpr uint64_t v_size = 100000;
|
|
// Sanity check. The test assumes that the total table size is significantly bigger than RAM.
|
|
BOOST_REQUIRE_GT(pk_number * ck_number * v_size, 2 * seastar::memory::stats().total_memory());
|
|
|
|
// A bijection between uint64_t and blobs of size x.
|
|
auto make_key = [] (uint64_t x) {
|
|
bytes b(bytes::initialized_later(), sizeof(x));
|
|
auto i = b.begin();
|
|
write<uint64_t>(i, x);
|
|
return b;
|
|
};
|
|
|
|
// Populate the table.
|
|
for (size_t pk = 0; pk < pk_number; ++pk) {
|
|
for (size_t ck = 0; ck < ck_number; ++ck) {
|
|
e.execute_prepared(insert_query, {{cql3::raw_value::make_value(make_key(pk))}, {cql3::raw_value::make_value(make_key(ck))}, {cql3::raw_value::make_value(bytes(v_size, 0))}}).get();
|
|
}
|
|
}
|
|
// Flushing makes reasoning easier.
|
|
e.db().invoke_on_all(&replica::database::flush_all_memtables).get();
|
|
|
|
// Populate the index cache.
|
|
for (size_t ck = 0; ck < ck_number; ++ck) {
|
|
for (size_t pk = 0; pk < pk_number; ++pk) {
|
|
e.execute_prepared(select_query, {{cql3::raw_value::make_value(make_key(pk))}, {cql3::raw_value::make_value(make_key(ck))}}).get();
|
|
}
|
|
}
|
|
|
|
int retries = 0;
|
|
constexpr int max_retries = 100;
|
|
retry:
|
|
// The index is small and used once every few reads, so it should be perfectly cached.
|
|
auto get_misses = [&e] { return e.local_db().row_cache_tracker().get_partition_index_cache_stats().misses; };
|
|
uint64_t misses_before = get_misses();
|
|
uint64_t reads_before = e.local_db().row_cache_tracker().get_stats().reads;
|
|
uint64_t reads_expected = reads_before;
|
|
for (size_t ck = 0; ck < ck_number; ++ck) {
|
|
for (size_t pk = 0; pk < pk_number; ++pk) {
|
|
e.execute_prepared(select_query, {{cql3::raw_value::make_value(make_key(pk))}, {cql3::raw_value::make_value(make_key(ck))}}).get();
|
|
++reads_expected;
|
|
}
|
|
}
|
|
uint64_t reads_after = e.local_db().row_cache_tracker().get_stats().reads_done;
|
|
uint64_t misses_after = get_misses();
|
|
if (misses_after != misses_before) {
|
|
// Cache misses are allowed only if they were done by something outside of the test.
|
|
BOOST_REQUIRE_GT(reads_after, reads_expected);
|
|
if (retries < max_retries) {
|
|
++retries;
|
|
testlog.warn("Detected extra cache misses (actual={}, expected={}), but they can be explained by extra reads (after={}, before={}, expected={}) done by something in the background, so retrying. (retries={})", misses_after, misses_before, reads_after, reads_before, reads_expected, retries);
|
|
goto retry;
|
|
} else {
|
|
BOOST_FAIL("Test failed due to too much background noise");
|
|
}
|
|
}
|
|
BOOST_REQUIRE_EQUAL(get_misses(), misses_before);
|
|
}, std::move(cfg));
|
|
}
|
|
|
|
#endif
|
|
BOOST_AUTO_TEST_SUITE_END()
|