compaction: Prevent cleanup and regular from compacting the same sstable

Due to regression introduced by 463d0ab, regular can compact in parallel a sstable
being compacted by cleanup, scrub or upgrade.

This redundancy causes resources to be wasted, write amplification is increased
and so does the operation time, etc.

That's a potential source of data resurrection because the now-owned data from
a sstable being compacted by both cleanup and regular will still exist in the
node afterwards, so resurrection can happen if node regains ownership.

Fixes #8155.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20210225172641.787022-1-raphaelsc@scylladb.com>
This commit is contained in:
Raphael S. Carvalho
2021-02-25 14:26:41 -03:00
committed by Avi Kivity
parent cb0b8d1903
commit 2cf0c4bbf1

View File

@@ -632,10 +632,11 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
_tasks.push_back(task);
auto sstables = std::make_unique<std::vector<sstables::shared_sstable>>(get_func(*cf));
auto compacting = make_lw_shared<compacting_sstable_registration>(this, *sstables);
auto sstables_ptr = sstables.get();
_stats.pending_tasks += sstables->size();
task->compaction_done = do_until([sstables_ptr] { return sstables_ptr->empty(); }, [this, task, options, sstables_ptr] () mutable {
task->compaction_done = do_until([sstables_ptr] { return sstables_ptr->empty(); }, [this, task, options, sstables_ptr, compacting] () mutable {
// FIXME: lock cf here
if (!can_proceed(task)) {
@@ -645,7 +646,7 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
auto sst = sstables_ptr->back();
sstables_ptr->pop_back();
return repeat([this, task, options, sst = std::move(sst)] () mutable {
return repeat([this, task, options, sst = std::move(sst), compacting] () mutable {
column_family& cf = *task->compacting_cf;
auto sstable_level = sst->get_sstable_level();
auto run_identifier = sst->run_identifier();
@@ -653,7 +654,6 @@ future<> compaction_manager::rewrite_sstables(column_family* cf, sstables::compa
auto descriptor = sstables::compaction_descriptor({ sst }, cf.get_sstable_set(), service::get_local_compaction_priority(),
sstable_level, sstables::compaction_descriptor::default_max_sstable_bytes, run_identifier, options);
auto compacting = make_lw_shared<compacting_sstable_registration>(this, descriptor.sstables);
// Releases reference to cleaned sstable such that respective used disk space can be freed.
descriptor.release_exhausted = [compacting] (const std::vector<sstables::shared_sstable>& exhausted_sstables) {
compacting->release_compacting(exhausted_sstables);