From 2a7939ee4dab5c87d13475aef11b49f46db652e7 Mon Sep 17 00:00:00 2001 From: "Raphael S. Carvalho" Date: Wed, 2 Feb 2022 15:28:20 -0300 Subject: [PATCH] tests: Add compaction controller test There's no automated test for controller, it's time to have one. Let's start with a basic one that verifies the assumption that perfectly compacted tiers should produce 0 backlog. Signed-off-by: Raphael S. Carvalho --- test/boost/sstable_compaction_test.cc | 107 ++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/test/boost/sstable_compaction_test.cc b/test/boost/sstable_compaction_test.cc index 1d2253cab0..4e42288db6 100644 --- a/test/boost/sstable_compaction_test.cc +++ b/test/boost/sstable_compaction_test.cc @@ -4811,3 +4811,110 @@ SEASTAR_TEST_CASE(test_major_does_not_miss_data_in_memtable) { .produces_end_of_stream(); }); } + +SEASTAR_TEST_CASE(simple_backlog_controller_test) { + return test_env::do_with_async([] (test_env& env) { + ///////////// + // settings + static constexpr float disk_memory_ratio = 78.125; /* AWS I3en is ~78.125 */ + static constexpr uint64_t available_memory_per_shard = 8'000'000'000; /* AWS I3en */ + static constexpr float target_disk_usage = 0.50; + + const uint64_t available_disk_size_per_shard = disk_memory_ratio * available_memory_per_shard; + const uint64_t available_memory = available_memory_per_shard * 0.92; /* 8% is reserved for the OS */ + const uint64_t estimated_flush_size = double(available_memory) * 0.05; /* flush threshold is 5% of available shard mem */ + const uint64_t all_tables_disk_usage = double(available_disk_size_per_shard) * target_disk_usage; + + auto as = abort_source(); + compaction_manager::compaction_scheduling_group csg = { default_scheduling_group(), default_priority_class() }; + compaction_manager::maintenance_scheduling_group msg = { default_scheduling_group(), default_priority_class() }; + auto manager = compaction_manager(csg, msg, available_memory, as); + + auto add_sstable = [&env, &manager, gen = make_lw_shared(1)] (replica::table& t, uint64_t data_size) { + auto sst = env.make_sstable(t.schema(), "", (*gen)++, la, big); + auto key = make_local_key(t.schema()); + sstables::test(sst).set_values_for_leveled_strategy(data_size, 0 /*level*/, 0 /*max ts*/, key, key); + assert(sst->data_size() == data_size); + auto backlog_before = t.get_compaction_strategy().get_backlog_tracker().backlog(); + t.add_sstable_and_update_cache(sst).get(); + testlog.debug("\tNew sstable of size={}; Backlog diff={};", + sstables::pretty_printed_data_size(data_size), + t.get_compaction_strategy().get_backlog_tracker().backlog() - backlog_before); + }; + + auto tracker = make_lw_shared(); + cell_locker_stats cl_stats; + auto create_table = [&] () { + simple_schema ss; + auto s = ss.schema(); + + replica::column_family::config cfg = column_family_test_config(env.manager(), env.semaphore()); + cfg.datadir = ""; + cfg.enable_disk_writes = true; + cfg.enable_cache = false; + auto t = make_lw_shared(s, cfg, replica::table::no_commitlog(), manager, cl_stats, *tracker); + t->mark_ready_for_writes(); + t->start(); + t->set_compaction_strategy(sstables::compaction_strategy_type::size_tiered); + return t; + }; + + auto get_size_for_tier = [&] (int tier) -> uint64_t { + return std::pow(4, tier) * estimated_flush_size; + }; + auto get_total_tiers = [&] (uint64_t target_size) -> unsigned { + double inv_log_4 = 1.0f / std::log(4); + return std::ceil(std::log(double(target_size) / estimated_flush_size) * inv_log_4); + }; + auto normalize_backlog = [&] (double backlog) -> double { + return backlog / available_memory; + }; + + struct result { + unsigned table_count; + uint64_t per_table_max_disk_usage; + double normalized_backlog; + }; + std::vector results; + + std::vector target_table_count_s = { 1, 2, 5, 10, 20 }; + for (auto target_table_count : target_table_count_s) { + const uint64_t per_table_max_disk_usage = std::ceil(all_tables_disk_usage / target_table_count); + + testlog.info("Creating tables, with max size={}", sstables::pretty_printed_data_size(per_table_max_disk_usage)); + + std::vector> tables; + uint64_t tables_total_size = 0; + + for (uint64_t t_idx = 0, available_space = all_tables_disk_usage; available_space >= estimated_flush_size; t_idx++) { + auto target_disk_usage = std::min(available_space, per_table_max_disk_usage); + auto tiers = get_total_tiers(target_disk_usage); + + auto t = create_table(); + for (auto tier_idx = 0; tier_idx < tiers; tier_idx++) { + auto tier_size = get_size_for_tier(tier_idx); + if (tier_size > available_space) { + break; + } + add_sstable(*t, tier_size); + available_space -= std::min(available_space, uint64_t(tier_size)); + } + + auto table_size = t->get_stats().live_disk_space_used; + testlog.debug("T{}: {} tiers, with total size={}", t_idx, tiers, sstables::pretty_printed_data_size(table_size)); + tables.push_back(t); + tables_total_size += table_size; + } + testlog.debug("Created {} tables, with total size={}", tables.size(), sstables::pretty_printed_data_size(tables_total_size)); + results.push_back(result{ tables.size(), per_table_max_disk_usage, normalize_backlog(manager.backlog()) }); + for (auto& t : tables) { + t->stop().get(); + } + } + for (auto& r : results) { + testlog.info("Tables={} with max size={} -> NormalizedBacklog={}", r.table_count, sstables::pretty_printed_data_size(r.per_table_max_disk_usage), r.normalized_backlog); + // Expect 0 backlog as tiers are all perfectly compacted + BOOST_REQUIRE(r.normalized_backlog == 0.0f); + } + }); +}