replica: memtable: don't forget memtable memory allocation statistics

A memtable object contains two logalloc::allocating_section members
that track memory allocation requirements during reads and writes.
Because these are local to the memtable, each time we seal a memtable
and create a new one, these statistics are forgotten. As a result
we may have to re-learn the typical size of reads and writes, incurring
a small performance penalty.

The solution is to move the allocating_section object to the memtable_list
container. The workload is the same across all memtables of the same
table, so we don't lose discrimination here.

The performance penalty may be increased later if log changes to
memory reserve thresholds including a backtrace, so this reduces the
odds of incurring such a penalty.

Closes scylladb/scylladb#15737
This commit is contained in:
Avi Kivity
2023-10-17 16:25:28 +03:00
committed by Tomasz Grabiec
parent c8cb70918b
commit 7d5e22b43b
5 changed files with 34 additions and 13 deletions

View File

@@ -1956,7 +1956,9 @@ future<> memtable_list::flush() {
}
lw_shared_ptr<memtable> memtable_list::new_memtable() {
return make_lw_shared<memtable>(_current_schema(), *_dirty_memory_manager, _table_stats, this, _compaction_scheduling_group);
return make_lw_shared<memtable>(_current_schema(), *_dirty_memory_manager,
_read_section, _allocating_section,
_table_stats, this, _compaction_scheduling_group);
}
// Synchronously swaps the active memtable with a new, empty one,

View File

@@ -169,6 +169,8 @@ private:
seal_immediate_fn_type _seal_immediate_fn;
std::function<schema_ptr()> _current_schema;
replica::dirty_memory_manager* _dirty_memory_manager;
logalloc::allocating_section _read_section;
logalloc::allocating_section _allocating_section;
std::optional<shared_future<>> _flush_coalescing;
seastar::scheduling_group _compaction_scheduling_group;
replica::table_stats& _table_stats;

View File

@@ -114,7 +114,10 @@ void memtable::memtable_encoding_stats_collector::update(const ::schema& s, cons
}
}
memtable::memtable(schema_ptr schema, dirty_memory_manager& dmm, replica::table_stats& table_stats,
memtable::memtable(schema_ptr schema, dirty_memory_manager& dmm,
logalloc::allocating_section& read_section,
logalloc::allocating_section& allocating_section,
replica::table_stats& table_stats,
memtable_list* memtable_list, seastar::scheduling_group compaction_scheduling_group)
: dirty_memory_manager_logalloc::size_tracked_region()
, _dirty_mgr(dmm)
@@ -122,6 +125,8 @@ memtable::memtable(schema_ptr schema, dirty_memory_manager& dmm, replica::table_
[this] (size_t freed) { remove_flushed_memory(freed); })
, _memtable_list(memtable_list)
, _schema(std::move(schema))
, _read_section(read_section)
, _allocating_section(allocating_section)
, partitions(dht::raw_token_less_comparator{})
, _table_stats(table_stats) {
logalloc::region::listen(&dmm.region_group());
@@ -129,9 +134,12 @@ memtable::memtable(schema_ptr schema, dirty_memory_manager& dmm, replica::table_
static thread_local dirty_memory_manager mgr_for_tests;
static thread_local replica::table_stats stats_for_tests;
static thread_local logalloc::allocating_section memtable_read_section_for_tests;
static thread_local logalloc::allocating_section memtable_allocating_section_for_tests;
memtable::memtable(schema_ptr schema)
: memtable(std::move(schema), mgr_for_tests, stats_for_tests)
: memtable(std::move(schema), mgr_for_tests,
memtable_read_section_for_tests, memtable_allocating_section_for_tests, stats_for_tests)
{ }
memtable::~memtable() {

View File

@@ -111,8 +111,8 @@ private:
mutation_cleaner _cleaner;
memtable_list *_memtable_list;
schema_ptr _schema;
logalloc::allocating_section _read_section;
logalloc::allocating_section _allocating_section;
logalloc::allocating_section& _read_section;
logalloc::allocating_section& _allocating_section;
partitions_type partitions;
size_t nr_partitions = 0;
db::replay_position _replay_position;
@@ -171,7 +171,10 @@ private:
void clear() noexcept;
uint64_t dirty_size() const;
public:
explicit memtable(schema_ptr schema, dirty_memory_manager&, replica::table_stats& table_stats, memtable_list *memtable_list = nullptr,
explicit memtable(schema_ptr schema, dirty_memory_manager&,
logalloc::allocating_section& read_section,
logalloc::allocating_section& allocating_section,
replica::table_stats& table_stats, memtable_list *memtable_list = nullptr,
seastar::scheduling_group compaction_scheduling_group = seastar::current_scheduling_group());
// Used for testing that want to control the flush process.
explicit memtable(schema_ptr schema);

View File

@@ -169,9 +169,9 @@ SEASTAR_TEST_CASE(test_memtable_flush_reader) {
return seastar::async([] {
tests::reader_concurrency_semaphore_wrapper semaphore;
auto make_memtable = [] (replica::dirty_memory_manager& mgr, replica::table_stats& tbl_stats, std::vector<mutation> muts) {
auto make_memtable = [] (replica::dirty_memory_manager& mgr, logalloc::allocating_section& read_section, logalloc::allocating_section& allocating_section, replica::table_stats& tbl_stats, std::vector<mutation> muts) {
assert(!muts.empty());
auto mt = make_lw_shared<replica::memtable>(muts.front().schema(), mgr, tbl_stats);
auto mt = make_lw_shared<replica::memtable>(muts.front().schema(), mgr, read_section, allocating_section, tbl_stats);
for (auto& m : muts) {
mt->apply(m);
}
@@ -181,6 +181,8 @@ SEASTAR_TEST_CASE(test_memtable_flush_reader) {
auto test_random_streams = [&] (random_mutation_generator&& gen) {
for (auto i = 0; i < 4; i++) {
replica::table_stats tbl_stats;
logalloc::allocating_section read_section;
logalloc::allocating_section allocating_section;
replica::dirty_memory_manager mgr;
const auto muts = gen(4);
const auto now = gc_clock::now();
@@ -190,7 +192,7 @@ SEASTAR_TEST_CASE(test_memtable_flush_reader) {
}
testlog.info("Simple read");
auto mt = make_memtable(mgr, tbl_stats, muts);
auto mt = make_memtable(mgr, read_section, allocating_section, tbl_stats, muts);
assert_that(mt->make_flush_reader(gen.schema(), semaphore.make_permit()))
.produces_compacted(compacted_muts[0], now)
@@ -200,7 +202,7 @@ SEASTAR_TEST_CASE(test_memtable_flush_reader) {
.produces_end_of_stream();
testlog.info("Read with next_partition() calls between partition");
mt = make_memtable(mgr, tbl_stats, muts);
mt = make_memtable(mgr, read_section, allocating_section, tbl_stats, muts);
assert_that(mt->make_flush_reader(gen.schema(), semaphore.make_permit()))
.next_partition()
.produces_compacted(compacted_muts[0], now)
@@ -214,7 +216,7 @@ SEASTAR_TEST_CASE(test_memtable_flush_reader) {
.produces_end_of_stream();
testlog.info("Read with next_partition() calls inside partitions");
mt = make_memtable(mgr, tbl_stats, muts);
mt = make_memtable(mgr, read_section, allocating_section, tbl_stats, muts);
assert_that(mt->make_flush_reader(gen.schema(), semaphore.make_permit()))
.produces_compacted(compacted_muts[0], now)
.produces_partition_start(muts[1].decorated_key(), muts[1].partition().partition_tombstone())
@@ -293,9 +295,11 @@ SEASTAR_TEST_CASE(test_unspooled_dirty_accounting_on_flush) {
tests::reader_concurrency_semaphore_wrapper semaphore;
replica::dirty_memory_manager mgr;
logalloc::allocating_section read_section;
logalloc::allocating_section allocating_section;
replica::table_stats tbl_stats;
auto mt = make_lw_shared<replica::memtable>(s, mgr, tbl_stats);
auto mt = make_lw_shared<replica::memtable>(s, mgr, read_section, allocating_section, tbl_stats);
std::vector<mutation> ring = make_ring(s, 3);
std::vector<mutation> current_ring;
@@ -428,9 +432,11 @@ SEASTAR_TEST_CASE(test_segment_migration_during_flush) {
tests::reader_concurrency_semaphore_wrapper semaphore;
replica::table_stats tbl_stats;
logalloc::allocating_section read_section;
logalloc::allocating_section allocating_section;
replica::dirty_memory_manager mgr;
auto mt = make_lw_shared<replica::memtable>(s, mgr, tbl_stats);
auto mt = make_lw_shared<replica::memtable>(s, mgr, read_section, allocating_section, tbl_stats);
const int rows_per_partition = 300;
const int partitions = 3;