table: Fix inefficiency when rebuilding statistics with compaction groups

Whenever any compaction group has its SSTable set updated, table's
rebuild_statistics() is called and it inefficiently iterates through
SSTable set of all compaction groups.

Now each compaction group keeps track of its statistics, such that
table's rebuild_statistics() only need to sum them up.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
This commit is contained in:
Raphael S. Carvalho
2023-02-02 17:10:11 -03:00
parent 529a1239a9
commit 55a8421e3d
3 changed files with 38 additions and 16 deletions

View File

@@ -45,6 +45,8 @@ class compaction_group {
// have not been deleted yet, so must not GC any tombstones in other sstables
// that may delete data in these sstables:
std::vector<sstables::shared_sstable> _sstables_compacted_but_not_deleted;
uint64_t _main_set_disk_space_used = 0;
uint64_t _maintenance_set_disk_space_used = 0;
private:
// Adds new sstable to the set of sstables
// Doesn't update the cache. The cache must be synchronized in order for reads to see
@@ -57,6 +59,7 @@ private:
enable_backlog_tracker backlog_tracker);
// Update compaction backlog tracker with the same changes applied to the underlying sstable set.
void backlog_tracker_adjust_charges(const std::vector<sstables::shared_sstable>& old_sstables, const std::vector<sstables::shared_sstable>& new_sstables);
static uint64_t calculate_disk_space_used_for(const sstables::sstable_set& set);
public:
compaction_group(table& t, dht::token_range token_range);
@@ -110,6 +113,10 @@ public:
compaction_backlog_tracker& get_backlog_tracker();
size_t live_sstable_count() const noexcept;
uint64_t live_disk_space_used() const noexcept;
uint64_t total_disk_space_used() const noexcept;
compaction::table_state& as_table_state() const noexcept;
};

View File

@@ -543,7 +543,6 @@ private:
bool cache_enabled() const {
return _config.enable_cache && _schema->caching_options().enabled();
}
void update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable) noexcept;
future<> do_add_sstable_and_update_cache(sstables::shared_sstable sst, sstables::offstrategy offstrategy);
// Helpers which add sstable on behalf of a compaction group and refreshes compound set.
void add_sstable(compaction_group& cg, sstables::shared_sstable sstable);

View File

@@ -407,12 +407,6 @@ void table::notify_bootstrap_or_replace_end() {
trigger_offstrategy_compaction();
}
void table::update_stats_for_new_sstable(uint64_t disk_space_used_by_sstable) noexcept {
_stats.live_disk_space_used += disk_space_used_by_sstable;
_stats.total_disk_space_used += disk_space_used_by_sstable;
_stats.live_sstable_count++;
}
inline void table::add_sstable_to_backlog_tracker(compaction_backlog_tracker& tracker, sstables::shared_sstable sstable) {
tracker.replace_sstables({}, {std::move(sstable)});
}
@@ -438,14 +432,13 @@ compaction_group::do_add_sstable(lw_shared_ptr<sstables::sstable_set> sstables,
if (backlog_tracker) {
table::add_sstable_to_backlog_tracker(get_backlog_tracker(), sstable);
}
// update sstable set last in case either updating
// staging sstables or backlog tracker throws
_t.update_stats_for_new_sstable(sstable->bytes_on_disk());
return new_sstables;
}
void compaction_group::add_sstable(sstables::shared_sstable sstable) {
auto sstable_size = sstable->bytes_on_disk();
_main_sstables = do_add_sstable(_main_sstables, std::move(sstable), enable_backlog_tracker::yes);
_main_set_disk_space_used += sstable_size;
}
const lw_shared_ptr<sstables::sstable_set>& compaction_group::main_sstables() const noexcept {
@@ -454,10 +447,13 @@ const lw_shared_ptr<sstables::sstable_set>& compaction_group::main_sstables() co
void compaction_group::set_main_sstables(lw_shared_ptr<sstables::sstable_set> new_main_sstables) {
_main_sstables = std::move(new_main_sstables);
_main_set_disk_space_used = calculate_disk_space_used_for(*_main_sstables);
}
void compaction_group::add_maintenance_sstable(sstables::shared_sstable sst) {
auto sstable_size = sst->bytes_on_disk();
_maintenance_sstables = do_add_sstable(_maintenance_sstables, std::move(sst), enable_backlog_tracker::no);
_maintenance_set_disk_space_used += sstable_size;
}
const lw_shared_ptr<sstables::sstable_set>& compaction_group::maintenance_sstables() const noexcept {
@@ -466,6 +462,7 @@ const lw_shared_ptr<sstables::sstable_set>& compaction_group::maintenance_sstabl
void compaction_group::set_maintenance_sstables(lw_shared_ptr<sstables::sstable_set> new_maintenance_sstables) {
_maintenance_sstables = std::move(new_maintenance_sstables);
_maintenance_set_disk_space_used = calculate_disk_space_used_for(*_maintenance_sstables);
}
void table::add_sstable(compaction_group& cg, sstables::shared_sstable sstable) {
@@ -987,18 +984,37 @@ void table::set_metrics() {
}
}
size_t compaction_group::live_sstable_count() const noexcept {
// FIXME: switch to sstable_set::size() once available.
return _main_sstables->all()->size() + _maintenance_sstables->all()->size();
}
uint64_t compaction_group::live_disk_space_used() const noexcept {
return _main_set_disk_space_used + _maintenance_set_disk_space_used;
}
uint64_t compaction_group::total_disk_space_used() const noexcept {
return live_disk_space_used() + boost::accumulate(_sstables_compacted_but_not_deleted | boost::adaptors::transformed(std::mem_fn(&sstables::sstable::bytes_on_disk)), uint64_t(0));
}
uint64_t compaction_group::calculate_disk_space_used_for(const sstables::sstable_set& set) {
uint64_t disk_space_used = 0;
set.for_each_sstable([&] (const sstables::shared_sstable& sst) {
disk_space_used += sst->bytes_on_disk();
});
return disk_space_used;
}
void table::rebuild_statistics() {
_stats.live_disk_space_used = 0;
_stats.live_sstable_count = 0;
_stats.total_disk_space_used = 0;
_sstables->for_each_sstable([this] (const sstables::shared_sstable& tab) {
update_stats_for_new_sstable(tab->bytes_on_disk());
});
for (const compaction_group_ptr& cg : compaction_groups()) {
for (auto& tab: cg->compacted_undeleted_sstables()) {
_stats.total_disk_space_used += tab->bytes_on_disk();
}
_stats.live_disk_space_used += cg->live_disk_space_used();
_stats.total_disk_space_used += cg->total_disk_space_used();
_stats.live_sstable_count += cg->live_sstable_count();
}
}