From 63f1969e08e3e4dde4ac3b0cdddf352ed57034e6 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 13 Aug 2024 12:30:38 +0300 Subject: [PATCH 1/7] sstable_directory: Load sstable once when sorting In order to decide which list to put sstable into, the sort_sstable() first calls get_shards_for_this_sstable() which loads the sstable anyway. If loaded shards contain only the current one (which is the common case) sstable is loaded again. In fact, if the sstable happens to be remote it's loaded anyway to get its open info. Fix that by loading sstable, then getting shards directly from it. Signed-off-by: Pavel Emelyanov --- sstables/sstable_directory.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index 0327885f41..bf50195f09 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -211,18 +211,19 @@ future sstable_directory::get_open_info_for_this_ssta future<> sstable_directory::sort_sstable(sstables::entry_descriptor desc, process_flags flags) { - auto shards = co_await get_shards_for_this_sstable(desc, flags); + auto sst = co_await load_sstable(desc, flags); + auto shards = sst->get_shards_for_this_sstable(); if (shards.size() == 1) { if (shards[0] == this_shard_id()) { dirlog.trace("{} identified as a local unshared SSTable", sstable_filename(desc)); - _unshared_local_sstables.push_back(co_await load_sstable(std::move(desc), flags)); + _unshared_local_sstables.push_back(std::move(sst)); } else { dirlog.trace("{} identified as a remote unshared SSTable, shard={}", sstable_filename(desc), shards[0]); _unshared_remote_sstables[shards[0]].push_back(std::move(desc)); } } else { dirlog.trace("{} identified as a shared SSTable, shards={}", sstable_filename(desc), shards); - _shared_sstable_info.push_back(co_await get_open_info_for_this_sstable(desc)); + _shared_sstable_info.push_back(co_await sst->get_open_info()); } } From ad3725fbbd3e6bf4a20081018078bf2e6b12fe32 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 13 Aug 2024 12:34:36 +0300 Subject: [PATCH 2/7] sstable_directory: Remove unused helpers After previous patch some wrappers around load_sstable() became unused. Signed-off-by: Pavel Emelyanov --- sstables/sstable_directory.cc | 12 ------------ sstables/sstable_directory.hh | 5 ----- 2 files changed, 17 deletions(-) diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index bf50195f09..04cf2f35eb 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -197,18 +197,6 @@ sstable_directory::process_descriptor(sstables::entry_descriptor desc, process_f } } -future> sstable_directory::get_shards_for_this_sstable(const sstables::entry_descriptor& desc, process_flags flags) const { - auto sst = _manager.make_sstable(_schema, _table_dir, *_storage_opts, desc.generation, _state, desc.version, desc.format, gc_clock::now(), _error_handler_gen); - co_await sst->load_owner_shards(_sharder); - validate(sst, flags); - co_return sst->get_shards_for_this_sstable(); -} - -future sstable_directory::get_open_info_for_this_sstable(const sstables::entry_descriptor& desc) const { - auto sst = co_await load_sstable(std::move(desc)); - co_return co_await sst->get_open_info(); -} - future<> sstable_directory::sort_sstable(sstables::entry_descriptor desc, process_flags flags) { auto sst = co_await load_sstable(desc, flags); diff --git a/sstables/sstable_directory.hh b/sstables/sstable_directory.hh index 044e85851b..e2b3968457 100644 --- a/sstables/sstable_directory.hh +++ b/sstables/sstable_directory.hh @@ -187,11 +187,6 @@ private: // Returns filename for a SSTable from its entry_descriptor. sstring sstable_filename(const sstables::entry_descriptor& desc) const; - // Compute owner of shards for a particular SSTable. - future> get_shards_for_this_sstable(const sstables::entry_descriptor& desc, process_flags flags) const; - // Retrieves sstables::foreign_sstable_open_info for a particular SSTable. - future get_open_info_for_this_sstable(const sstables::entry_descriptor& desc) const; - sstable_directory(sstables_manager& manager, schema_ptr schema, std::variant, const dht::sharder*> sharder, From 369f9111b8b133563c59afa9695d898e70f8fb49 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 13 Aug 2024 12:38:24 +0300 Subject: [PATCH 3/7] sstable_directory: Keep loaded sst in local var This will make next patch shorter. Signed-off-by: Pavel Emelyanov --- sstables/sstable_directory.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index 04cf2f35eb..e61fd66452 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -192,8 +192,9 @@ sstable_directory::process_descriptor(sstables::entry_descriptor desc, process_f if (flags.sort_sstables_according_to_owner) { co_await sort_sstable(std::move(desc), flags); } else { + auto sst = co_await load_sstable(std::move(desc), flags); dirlog.debug("Added {} to unsorted sstables list", sstable_filename(desc)); - _unsorted_sstables.push_back(co_await load_sstable(std::move(desc), flags)); + _unsorted_sstables.push_back(std::move(sst)); } } From aa40aeb72fa3659e826f652f0e8dae0a78b3d360 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 13 Aug 2024 12:36:30 +0300 Subject: [PATCH 4/7] sstable_directory: Log sst->get_filename(), not sstable_filename(desc) There are some places that log sstable Data file name via sstable descriptor. After previous patching all those loggers have sstable at hand and can use sstable::get_filename() instead. Signed-off-by: Pavel Emelyanov --- sstables/sstable_directory.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index e61fd66452..4e93598c80 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -193,7 +193,7 @@ sstable_directory::process_descriptor(sstables::entry_descriptor desc, process_f co_await sort_sstable(std::move(desc), flags); } else { auto sst = co_await load_sstable(std::move(desc), flags); - dirlog.debug("Added {} to unsorted sstables list", sstable_filename(desc)); + dirlog.debug("Added {} to unsorted sstables list", sst->get_filename()); _unsorted_sstables.push_back(std::move(sst)); } } @@ -204,14 +204,14 @@ sstable_directory::sort_sstable(sstables::entry_descriptor desc, process_flags f auto shards = sst->get_shards_for_this_sstable(); if (shards.size() == 1) { if (shards[0] == this_shard_id()) { - dirlog.trace("{} identified as a local unshared SSTable", sstable_filename(desc)); + dirlog.trace("{} identified as a local unshared SSTable", sst->get_filename()); _unshared_local_sstables.push_back(std::move(sst)); } else { - dirlog.trace("{} identified as a remote unshared SSTable, shard={}", sstable_filename(desc), shards[0]); + dirlog.trace("{} identified as a remote unshared SSTable, shard={}", sst->get_filename(), shards[0]); _unshared_remote_sstables[shards[0]].push_back(std::move(desc)); } } else { - dirlog.trace("{} identified as a shared SSTable, shards={}", sstable_filename(desc), shards); + dirlog.trace("{} identified as a shared SSTable, shards={}", sst->get_filename(), shards); _shared_sstable_info.push_back(co_await sst->get_open_info()); } } From d8cb175fb71648db3b96db15d083124a3018ca8c Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 13 Aug 2024 12:38:56 +0300 Subject: [PATCH 5/7] sstable_directory: Remove unused sstable_filename(desc) helper It's unused after previous patch. Signed-off-by: Pavel Emelyanov --- sstables/sstable_directory.cc | 4 ---- sstables/sstable_directory.hh | 3 --- 2 files changed, 7 deletions(-) diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index 4e93598c80..562379083a 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -216,10 +216,6 @@ sstable_directory::sort_sstable(sstables::entry_descriptor desc, process_flags f } } -sstring sstable_directory::sstable_filename(const sstables::entry_descriptor& desc) const { - return sstable::filename(make_path(_table_dir, _state).native(), _schema->ks_name(), _schema->cf_name(), desc.version, desc.generation, desc.format, component_type::Data); -} - generation_type sstable_directory::highest_generation_seen() const { return _max_generation_seen; diff --git a/sstables/sstable_directory.hh b/sstables/sstable_directory.hh index e2b3968457..e725ebd60b 100644 --- a/sstables/sstable_directory.hh +++ b/sstables/sstable_directory.hh @@ -184,9 +184,6 @@ private: // Sort the sstable according to owner future<> sort_sstable(sstables::entry_descriptor desc, process_flags flags); - // Returns filename for a SSTable from its entry_descriptor. - sstring sstable_filename(const sstables::entry_descriptor& desc) const; - sstable_directory(sstables_manager& manager, schema_ptr schema, std::variant, const dht::sharder*> sharder, From da4a5df33999cc728d01996fde54253ddb6273db Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 13 Aug 2024 13:22:30 +0300 Subject: [PATCH 6/7] sstable_directory: Squash sort_sstable() with process_descriptor() The latter (caller) loads sstable, so does the former, so load it once and then put it in either list/set, depending on flags and shard info. Signed-off-by: Pavel Emelyanov --- sstables/sstable_directory.cc | 12 ++++-------- sstables/sstable_directory.hh | 3 --- 2 files changed, 4 insertions(+), 11 deletions(-) diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index 562379083a..2b0030790c 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -189,18 +189,14 @@ sstable_directory::process_descriptor(sstables::entry_descriptor desc, process_f _max_version_seen = desc.version; } - if (flags.sort_sstables_according_to_owner) { - co_await sort_sstable(std::move(desc), flags); - } else { - auto sst = co_await load_sstable(std::move(desc), flags); + auto sst = co_await load_sstable(desc, flags); + + if (!flags.sort_sstables_according_to_owner) { dirlog.debug("Added {} to unsorted sstables list", sst->get_filename()); _unsorted_sstables.push_back(std::move(sst)); + co_return; } -} -future<> -sstable_directory::sort_sstable(sstables::entry_descriptor desc, process_flags flags) { - auto sst = co_await load_sstable(desc, flags); auto shards = sst->get_shards_for_this_sstable(); if (shards.size() == 1) { if (shards[0] == this_shard_id()) { diff --git a/sstables/sstable_directory.hh b/sstables/sstable_directory.hh index e725ebd60b..9f04ccdaee 100644 --- a/sstables/sstable_directory.hh +++ b/sstables/sstable_directory.hh @@ -181,9 +181,6 @@ private: future<> load_foreign_sstables(sstable_entry_descriptor_vector info_vec); - // Sort the sstable according to owner - future<> sort_sstable(sstables::entry_descriptor desc, process_flags flags); - sstable_directory(sstables_manager& manager, schema_ptr schema, std::variant, const dht::sharder*> sharder, From d3870304a9fec9f038f407d58d93db04697f7eaf Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 13 Aug 2024 13:26:06 +0300 Subject: [PATCH 7/7] sstable_directory: Open-code load_sstable() into process_descriptor() There are two load_sstable() overloads, and one of them is only used inside process_descriptor(). What this loading helper does is, in fact, processes given descriptor, so it's worth having it open-coded into its caller. Signed-off-by: Pavel Emelyanov --- sstables/sstable_directory.cc | 18 +++++++----------- sstables/sstable_directory.hh | 1 - 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index 2b0030790c..9665301a87 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -173,23 +173,19 @@ future sstable_directory::load_sstable(sstables::entry co_return sst; } -future sstable_directory::load_sstable(sstables::entry_descriptor desc, process_flags flags) const { - auto sst = co_await load_sstable(std::move(desc), flags.sstable_open_config); - validate(sst, flags); - if (flags.need_mutate_level) { - dirlog.trace("Mutating {} to level 0\n", sst->get_filename()); - co_await sst->mutate_sstable_level(0); - } - co_return sst; -} - future<> sstable_directory::process_descriptor(sstables::entry_descriptor desc, process_flags flags) { if (desc.version > _max_version_seen) { _max_version_seen = desc.version; } - auto sst = co_await load_sstable(desc, flags); + auto sst = co_await load_sstable(desc, flags.sstable_open_config); + validate(sst, flags); + + if (flags.need_mutate_level) { + dirlog.trace("Mutating {} to level 0\n", sst->get_filename()); + co_await sst->mutate_sstable_level(0); + } if (!flags.sort_sstables_according_to_owner) { dirlog.debug("Added {} to unsorted sstables list", sst->get_filename()); diff --git a/sstables/sstable_directory.hh b/sstables/sstable_directory.hh index 9f04ccdaee..06dd0a7de0 100644 --- a/sstables/sstable_directory.hh +++ b/sstables/sstable_directory.hh @@ -177,7 +177,6 @@ private: future<> process_descriptor(sstables::entry_descriptor desc, process_flags flags); void validate(sstables::shared_sstable sst, process_flags flags) const; future load_sstable(sstables::entry_descriptor desc, sstables::sstable_open_config cfg = {}) const; - future load_sstable(sstables::entry_descriptor desc, process_flags flags) const; future<> load_foreign_sstables(sstable_entry_descriptor_vector info_vec);