Files
scylladb/compaction/task_manager_module.cc
Raphael S. Carvalho 83c70ac04f utils: Extract pretty printers into a header
Can be easily reused elsewhere.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
2023-06-26 21:58:20 -03:00

363 lines
16 KiB
C++

/*
* Copyright (C) 2023-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#include "compaction/task_manager_module.hh"
#include "compaction/compaction_manager.hh"
#include "replica/database.hh"
#include "sstables/sstables.hh"
#include "sstables/sstable_directory.hh"
#include "utils/pretty_printers.hh"
namespace compaction {
struct table_tasks_info {
tasks::task_manager::task_ptr task;
table_info ti;
table_tasks_info(tasks::task_manager::task_ptr t, table_info info)
: task(t)
, ti(info)
{}
};
future<> run_on_table(sstring op, replica::database& db, std::string keyspace, table_info ti, std::function<future<> (replica::table&)> func) {
std::exception_ptr ex;
tasks::tmlogger.debug("Starting {} on {}.{}", op, keyspace, ti.name);
try {
co_await func(db.find_column_family(ti.id));
} catch (const replica::no_such_column_family& e) {
tasks::tmlogger.warn("Skipping {} of {}.{}: {}", op, keyspace, ti.name, e.what());
} catch (...) {
ex = std::current_exception();
tasks::tmlogger.error("Failed {} of {}.{}: {}", op, keyspace, ti.name, ex);
}
if (ex) {
co_await coroutine::return_exception_ptr(std::move(ex));
}
}
// Run on all tables, skipping dropped tables
future<> run_on_existing_tables(sstring op, replica::database& db, std::string keyspace, const std::vector<table_info> local_tables, std::function<future<> (replica::table&)> func) {
for (const auto& ti : local_tables) {
co_await run_on_table(op, db, keyspace, ti, func);
}
}
future<> wait_for_your_turn(seastar::condition_variable& cv, tasks::task_manager::task_ptr& current_task, tasks::task_id id) {
co_await cv.wait([&] {
return current_task && current_task->id() == id;
});
}
future<> run_table_tasks(replica::database& db, std::vector<table_tasks_info> table_tasks, seastar::condition_variable& cv, tasks::task_manager::task_ptr& current_task, bool sort) {
std::exception_ptr ex;
// While compaction is run on one table, the size of tables may significantly change.
// Thus, they are sorted before each invidual compaction and the smallest table is chosen.
while (!table_tasks.empty()) {
try {
if (sort) {
// Major compact smaller tables first, to increase chances of success if low on space.
// Tables will be kept in descending order.
std::ranges::sort(table_tasks, std::greater<>(), [&] (const table_tasks_info& tti) {
try {
return db.find_column_family(tti.ti.id).get_stats().live_disk_space_used;
} catch (const replica::no_such_column_family& e) {
return int64_t(-1);
}
});
}
// Task responsible for the smallest table.
current_task = table_tasks.back().task;
table_tasks.pop_back();
cv.broadcast();
co_await current_task->done();
} catch (...) {
ex = std::current_exception();
current_task = nullptr;
cv.broken(ex);
break;
}
}
if (ex) {
// Wait for all tasks even on failure.
for (auto& tti: table_tasks) {
co_await tti.task->done();
}
co_await coroutine::return_exception_ptr(std::move(ex));
}
}
future<> major_keyspace_compaction_task_impl::run() {
co_await _db.invoke_on_all([&] (replica::database& db) -> future<> {
tasks::task_info parent_info{_status.id, _status.shard};
auto& module = db.get_compaction_manager().get_task_manager_module();
auto task = co_await module.make_and_start_task<shard_major_keyspace_compaction_task_impl>(parent_info, _status.keyspace, _status.id, db, _table_infos);
co_await task->done();
});
}
tasks::is_internal shard_major_keyspace_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> shard_major_keyspace_compaction_task_impl::run() {
seastar::condition_variable cv;
tasks::task_manager::task_ptr current_task;
tasks::task_info parent_info{_status.id, _status.shard};
std::vector<table_tasks_info> table_tasks;
for (auto& ti : _local_tables) {
table_tasks.emplace_back(co_await _module->make_and_start_task<table_major_keyspace_compaction_task_impl>(parent_info, _status.keyspace, ti.name, _status.id, _db, ti, cv, current_task), ti);
}
co_await run_table_tasks(_db, std::move(table_tasks), cv, current_task, true);
}
tasks::is_internal table_major_keyspace_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> table_major_keyspace_compaction_task_impl::run() {
co_await wait_for_your_turn(_cv, _current_task, _status.id);
tasks::task_info info{_status.id, _status.shard};
co_await run_on_table("force_keyspace_compaction", _db, _status.keyspace, _ti, [info] (replica::table& t) {
return t.compact_all_sstables(info);
});
}
future<> cleanup_keyspace_compaction_task_impl::run() {
co_await _db.invoke_on_all([&] (replica::database& db) -> future<> {
auto& module = db.get_compaction_manager().get_task_manager_module();
auto task = co_await module.make_and_start_task<shard_cleanup_keyspace_compaction_task_impl>({_status.id, _status.shard}, _status.keyspace, _status.id, db, _table_infos);
co_await task->done();
});
}
tasks::is_internal shard_cleanup_keyspace_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> shard_cleanup_keyspace_compaction_task_impl::run() {
seastar::condition_variable cv;
tasks::task_manager::task_ptr current_task;
tasks::task_info parent_info{_status.id, _status.shard};
std::vector<table_tasks_info> table_tasks;
for (auto& ti : _local_tables) {
table_tasks.emplace_back(co_await _module->make_and_start_task<table_cleanup_keyspace_compaction_task_impl>(parent_info, _status.keyspace, ti.name, _status.id, _db, ti, cv, current_task), ti);
}
co_await run_table_tasks(_db, std::move(table_tasks), cv, current_task, true);
}
tasks::is_internal table_cleanup_keyspace_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> table_cleanup_keyspace_compaction_task_impl::run() {
co_await wait_for_your_turn(_cv, _current_task, _status.id);
auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(_db.get_keyspace_local_ranges(_status.keyspace));
co_await run_on_table("force_keyspace_cleanup", _db, _status.keyspace, _ti, [&] (replica::table& t) {
return t.perform_cleanup_compaction(owned_ranges_ptr);
});
}
future<> offstrategy_keyspace_compaction_task_impl::run() {
_needed = co_await _db.map_reduce0([&] (replica::database& db) -> future<bool> {
bool needed = false;
tasks::task_info parent_info{_status.id, _status.shard};
auto& module = db.get_compaction_manager().get_task_manager_module();
auto task = co_await module.make_and_start_task<shard_offstrategy_keyspace_compaction_task_impl>(parent_info, _status.keyspace, _status.id, db, _table_infos, needed);
co_await task->done();
co_return needed;
}, false, std::plus<bool>());
}
tasks::is_internal shard_offstrategy_keyspace_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> shard_offstrategy_keyspace_compaction_task_impl::run() {
seastar::condition_variable cv;
tasks::task_manager::task_ptr current_task;
tasks::task_info parent_info{_status.id, _status.shard};
std::vector<table_tasks_info> table_tasks;
for (auto& ti : _table_infos) {
table_tasks.emplace_back(co_await _module->make_and_start_task<table_offstrategy_keyspace_compaction_task_impl>(parent_info, _status.keyspace, ti.name, _status.id, _db, ti, cv, current_task, _needed), ti);
}
co_await run_table_tasks(_db, std::move(table_tasks), cv, current_task, false);
}
tasks::is_internal table_offstrategy_keyspace_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> table_offstrategy_keyspace_compaction_task_impl::run() {
co_await wait_for_your_turn(_cv, _current_task, _status.id);
co_await run_on_table("perform_keyspace_offstrategy_compaction", _db, _status.keyspace, _ti, [this] (replica::table& t) -> future<> {
_needed |= co_await t.perform_offstrategy_compaction();
});
}
future<> upgrade_sstables_compaction_task_impl::run() {
co_await _db.invoke_on_all([&] (replica::database& db) -> future<> {
tasks::task_info parent_info{_status.id, _status.shard};
auto& compaction_module = db.get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<shard_upgrade_sstables_compaction_task_impl>(parent_info, _status.keyspace, _status.id, db, _table_infos, _exclude_current_version);
co_await task->done();
});
}
tasks::is_internal shard_upgrade_sstables_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> shard_upgrade_sstables_compaction_task_impl::run() {
seastar::condition_variable cv;
tasks::task_manager::task_ptr current_task;
tasks::task_info parent_info{_status.id, _status.shard};
std::vector<table_tasks_info> table_tasks;
for (auto& ti : _table_infos) {
table_tasks.emplace_back(co_await _module->make_and_start_task<table_upgrade_sstables_compaction_task_impl>(parent_info, _status.keyspace, ti.name, _status.id, _db, ti, cv, current_task, _exclude_current_version), ti);
}
co_await run_table_tasks(_db, std::move(table_tasks), cv, current_task, false);
}
tasks::is_internal table_upgrade_sstables_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> table_upgrade_sstables_compaction_task_impl::run() {
co_await wait_for_your_turn(_cv, _current_task, _status.id);
auto owned_ranges_ptr = compaction::make_owned_ranges_ptr(_db.get_keyspace_local_ranges(_status.keyspace));
co_await run_on_table("upgrade_sstables", _db, _status.keyspace, _ti, [&] (replica::table& t) -> future<> {
return t.parallel_foreach_table_state([&] (compaction::table_state& ts) -> future<> {
return t.get_compaction_manager().perform_sstable_upgrade(owned_ranges_ptr, ts, _exclude_current_version);
});
});
}
future<> scrub_sstables_compaction_task_impl::run() {
_stats = co_await _db.map_reduce0([&] (replica::database& db) -> future<sstables::compaction_stats> {
sstables::compaction_stats stats;
tasks::task_info parent_info{_status.id, _status.shard};
auto& compaction_module = db.get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<shard_scrub_sstables_compaction_task_impl>(parent_info, _status.keyspace, _status.id, db, _column_families, _opts, stats);
co_await task->done();
co_return stats;
}, sstables::compaction_stats{}, std::plus<sstables::compaction_stats>());
}
tasks::is_internal shard_scrub_sstables_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> shard_scrub_sstables_compaction_task_impl::run() {
_stats = co_await map_reduce(_column_families, [&] (sstring cfname) -> future<sstables::compaction_stats> {
sstables::compaction_stats stats{};
tasks::task_info parent_info{_status.id, _status.shard};
auto& compaction_module = _db.get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<table_scrub_sstables_compaction_task_impl>(parent_info, _status.keyspace, cfname, _status.id, _db, _opts, stats);
co_await task->done();
co_return stats;
}, sstables::compaction_stats{}, std::plus<sstables::compaction_stats>());
}
tasks::is_internal table_scrub_sstables_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> table_scrub_sstables_compaction_task_impl::run() {
auto& cm = _db.get_compaction_manager();
auto& cf = _db.find_column_family(_status.keyspace, _status.table);
co_await cf.parallel_foreach_table_state([&] (compaction::table_state& ts) mutable -> future<> {
auto r = co_await cm.perform_sstable_scrub(ts, _opts);
_stats += r.value_or(sstables::compaction_stats{});
});
}
future<> table_reshaping_compaction_task_impl::run() {
auto start = std::chrono::steady_clock::now();
auto total_size = co_await _dir.map_reduce0([&] (sstables::sstable_directory& d) -> future<uint64_t> {
uint64_t total_shard_size;
tasks::task_info parent_info{_status.id, _status.shard};
auto& compaction_module = _db.local().get_compaction_manager().get_task_manager_module();
auto task = co_await compaction_module.make_and_start_task<shard_reshaping_compaction_task_impl>(parent_info, _status.keyspace, _status.table, _status.id, d, _db, _mode, _creator, _filter, total_shard_size);
co_await task->done();
co_return total_shard_size;
}, uint64_t(0), std::plus<uint64_t>());
if (total_size > 0) {
auto duration = std::chrono::duration_cast<std::chrono::duration<float>>(std::chrono::steady_clock::now() - start);
dblog.info("Reshaped {} in {:.2f} seconds, {}", utils::pretty_printed_data_size(total_size), duration.count(), utils::pretty_printed_throughput(total_size, duration));
}
}
tasks::is_internal shard_reshaping_compaction_task_impl::is_internal() const noexcept {
return tasks::is_internal::yes;
}
future<> shard_reshaping_compaction_task_impl::run() {
auto& table = _db.local().find_column_family(_status.keyspace, _status.table);
uint64_t reshaped_size = 0;
while (true) {
auto reshape_candidates = boost::copy_range<std::vector<sstables::shared_sstable>>(_dir.get_unshared_local_sstables()
| boost::adaptors::filtered([&filter = _filter] (const auto& sst) {
return filter(sst);
}));
auto desc = table.get_compaction_strategy().get_reshaping_job(std::move(reshape_candidates), table.schema(), _mode);
if (desc.sstables.empty()) {
break;
}
if (!reshaped_size) {
dblog.info("Table {}.{} with compaction strategy {} found SSTables that need reshape. Starting reshape process", table.schema()->ks_name(), table.schema()->cf_name(), table.get_compaction_strategy().name());
}
std::vector<sstables::shared_sstable> sstlist;
for (auto& sst : desc.sstables) {
reshaped_size += sst->data_size();
sstlist.push_back(sst);
}
desc.creator = _creator;
std::exception_ptr ex;
try {
co_await table.get_compaction_manager().run_custom_job(table.as_table_state(), sstables::compaction_type::Reshape, "Reshape compaction", [&dir = _dir, &table, sstlist = std::move(sstlist), desc = std::move(desc)] (sstables::compaction_data& info) mutable -> future<> {
sstables::compaction_result result = co_await sstables::compact_sstables(std::move(desc), info, table.as_table_state());
co_await dir.remove_unshared_sstables(std::move(sstlist));
co_await dir.collect_output_unshared_sstables(std::move(result.new_sstables), sstables::sstable_directory::can_be_remote::no);
});
} catch (...) {
ex = std::current_exception();
}
if (ex != nullptr) {
try {
std::rethrow_exception(std::move(ex));
} catch (sstables::compaction_stopped_exception& e) {
dblog.info("Table {}.{} with compaction strategy {} had reshape successfully aborted.", table.schema()->ks_name(), table.schema()->cf_name(), table.get_compaction_strategy().name());
break;
} catch (...) {
dblog.info("Reshape failed for Table {}.{} with compaction strategy {} due to {}", table.schema()->ks_name(), table.schema()->cf_name(), table.get_compaction_strategy().name(), std::current_exception());
break;
}
}
co_await coroutine::maybe_yield();
}
_total_shard_size = reshaped_size;
}
}