mirror of
https://github.com/scylladb/scylladb.git
synced 2026-06-02 13:06:57 +00:00
compaction: fail resharding when out of space prevention is activated
When out-of-space prevention is activated, the compaction manager is
drained and disabled. This caused resharding to silently succeed without
actually processing any SSTables, because:
1. run_custom_job() calls start_compaction() which returns nullopt when
is_disabled() is true, and run_custom_job() would just return
immediately — appearing as a successful no-op.
2. reshard() used throw_if_stopping::no, so even within the compaction
task executor, stopping would be silently swallowed rather than
propagated as an exception.
The SSTable loader interprets a successful return from resharding as
"all SSTables processed", so it proceeds without error, leaving
the unprocessed SSTables orphaned and their data missing from the table.
Fix this with two changes:
- run_custom_job(): when start_compaction() returns nullopt, check
is_disabled() and throw via make_disabled_exception() rather than
returning silently. This ensures callers are always informed when
a job was skipped because compaction is disabled (e.g. due to disk
space pressure), as opposed to a benign skip (e.g. table removed).
- reshard(): change throw_if_stopping::no to throw_if_stopping::yes.
Resharding is mandatory for correct SSTable loading — unlike reshape
which is optional and can be safely skipped, resharding failure must
be propagated to the caller so the loader does not proceed with
incomplete data.
Fixes https://scylladb.atlassian.net/browse/SCYLLADB-2085.
Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Closes scylladb/scylladb#30041
(cherry picked from commit ea3615de1e)
Closes scylladb/scylladb#30155
This commit is contained in:
committed by
Botond Dénes
parent
36d3c3dad9
commit
908d3fb53a
@@ -710,6 +710,9 @@ protected:
|
||||
future<> compaction_manager::run_custom_job(compaction_group_view& t, compaction_type type, const char* desc, noncopyable_function<future<>(compaction_data&, compaction_progress_monitor&)> job, tasks::task_info info, throw_if_stopping do_throw_if_stopping) {
|
||||
auto gh = start_compaction(t);
|
||||
if (!gh) {
|
||||
if (is_disabled()) {
|
||||
co_return co_await coroutine::return_exception_ptr(make_disabled_exception(t));
|
||||
}
|
||||
co_return;
|
||||
}
|
||||
|
||||
|
||||
@@ -170,8 +170,12 @@ future<> reshard(sstables::sstable_directory& dir, sstables::sstable_directory::
|
||||
on_internal_error(tasks::tmlogger, format("No compaction group found for table {}.{}", table.schema()->ks_name(), table.schema()->cf_name()));
|
||||
}
|
||||
auto& t = cg->view_for_unrepaired_data();
|
||||
// Resharding is mandatory for SSTable loading — if it cannot proceed (e.g. due to
|
||||
// out-of-space prevention disabling compaction), we must fail loudly rather than
|
||||
// silently skipping and leaving SSTables orphaned.
|
||||
auto& cm = table.get_compaction_manager();
|
||||
co_await coroutine::parallel_for_each(buckets, [&] (std::vector<sstables::shared_sstable>& sstlist) mutable {
|
||||
return table.get_compaction_manager().run_custom_job(t, compaction_type::Reshard, "Reshard compaction", [&] (compaction_data& info, compaction_progress_monitor& progress_monitor) -> future<> {
|
||||
return cm.run_custom_job(t, compaction_type::Reshard, "Reshard compaction", [&] (compaction_data& info, compaction_progress_monitor& progress_monitor) -> future<> {
|
||||
auto erm = table.get_effective_replication_map(); // keep alive around compaction.
|
||||
|
||||
compaction_descriptor desc(sstlist);
|
||||
@@ -184,7 +188,7 @@ future<> reshard(sstables::sstable_directory& dir, sstables::sstable_directory::
|
||||
// input sstables are moved, to guarantee their resources are released once we're done
|
||||
// resharding them.
|
||||
co_await when_all_succeed(dir.collect_output_unshared_sstables(std::move(result.new_sstables), sstables::sstable_directory::can_be_remote::yes), dir.remove_sstables(std::move(sstlist))).discard_result();
|
||||
}, parent_info, throw_if_stopping::no);
|
||||
}, parent_info, throw_if_stopping::yes);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user