mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-20 16:40:35 +00:00
303 lines
13 KiB
C++
303 lines
13 KiB
C++
/*
|
|
*
|
|
* Modified by ScyllaDB
|
|
* Copyright (C) 2020-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.1 and Apache-2.0)
|
|
*
|
|
* Copyright (C) 2020-present ScyllaDB
|
|
*/
|
|
|
|
#include <algorithm>
|
|
#include <stdexcept>
|
|
#include <seastar/core/coroutine.hh>
|
|
#include <seastar/coroutine/maybe_yield.hh>
|
|
#include <seastar/coroutine/switch_to.hh>
|
|
#include <seastar/coroutine/parallel_for_each.hh>
|
|
#include "db/snapshot-ctl.hh"
|
|
#include "db/snapshot/backup_task.hh"
|
|
#include "db/schema_tables.hh"
|
|
#include "index/secondary_index_manager.hh"
|
|
#include "replica/database.hh"
|
|
#include "replica/global_table_ptr.hh"
|
|
#include "replica/schema_describe_helper.hh"
|
|
#include "sstables/sstables_manager.hh"
|
|
#include "service/storage_proxy.hh"
|
|
|
|
logging::logger snap_log("snapshots");
|
|
|
|
namespace db {
|
|
|
|
snapshot_ctl::snapshot_ctl(sharded<replica::database>& db, sharded<service::storage_proxy>& sp, tasks::task_manager& tm, sstables::storage_manager& sstm, config cfg)
|
|
: _config(std::move(cfg))
|
|
, _db(db)
|
|
, _sp(sp)
|
|
, _ops("snapshot_ctl")
|
|
, _task_manager_module(make_shared<snapshot::task_manager_module>(tm))
|
|
, _storage_manager(sstm)
|
|
{
|
|
tm.register_module("snapshot", _task_manager_module);
|
|
}
|
|
|
|
future<> snapshot_ctl::stop() {
|
|
co_await disable_all_operations();
|
|
co_await _task_manager_module->stop();
|
|
}
|
|
|
|
future<> snapshot_ctl::disable_all_operations() {
|
|
if (!_ops.is_closed()) {
|
|
if (_ops.get_count()) {
|
|
snap_log.info("Waiting for snapshot/backup tasks to finish");
|
|
}
|
|
co_await _ops.close();
|
|
}
|
|
}
|
|
|
|
future<> snapshot_ctl::check_snapshot_not_exist(sstring ks_name, sstring name, std::optional<std::vector<sstring>> filter) {
|
|
auto& ks = _db.local().find_keyspace(ks_name);
|
|
return parallel_for_each(ks.metadata()->cf_meta_data(), [this, ks_name = std::move(ks_name), name = std::move(name), filter = std::move(filter)] (auto& pair) {
|
|
auto& cf_name = pair.first;
|
|
if (filter && std::find(filter->begin(), filter->end(), cf_name) == filter->end()) {
|
|
return make_ready_future<>();
|
|
}
|
|
auto& cf = _db.local().find_column_family(pair.second);
|
|
return cf.snapshot_exists(name).then([ks_name = std::move(ks_name), name] (bool exists) {
|
|
if (exists) {
|
|
throw std::runtime_error(format("Keyspace {}: snapshot {} already exists.", ks_name, name));
|
|
}
|
|
});
|
|
});
|
|
}
|
|
|
|
future<> snapshot_ctl::run_snapshot_modify_operation(noncopyable_function<future<>()>&& f) {
|
|
return with_gate(_ops, [f = std::move(f), this] () mutable {
|
|
return container().invoke_on(0, [f = std::move(f)] (snapshot_ctl& snap) mutable {
|
|
return with_lock(snap._lock.for_write(), std::move(f));
|
|
});
|
|
});
|
|
}
|
|
|
|
future<> snapshot_ctl::take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts) {
|
|
if (tag.empty()) {
|
|
throw std::runtime_error("You must supply a snapshot name.");
|
|
}
|
|
|
|
if (keyspace_names.size() == 0) {
|
|
std::ranges::copy(_db.local().get_keyspaces() | std::views::keys, std::back_inserter(keyspace_names));
|
|
};
|
|
|
|
return run_snapshot_modify_operation([tag = std::move(tag), keyspace_names = std::move(keyspace_names), opts, this] () mutable {
|
|
return do_take_snapshot(std::move(tag), std::move(keyspace_names), opts);
|
|
});
|
|
}
|
|
|
|
future<> snapshot_ctl::do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts) {
|
|
co_await coroutine::parallel_for_each(keyspace_names, [tag, this] (const auto& ks_name) {
|
|
return check_snapshot_not_exist(ks_name, tag);
|
|
});
|
|
co_await coroutine::parallel_for_each(keyspace_names, [this, tag = std::move(tag), opts] (const auto& ks_name) {
|
|
return replica::database::snapshot_keyspace_on_all_shards(_db, ks_name, tag, opts);
|
|
});
|
|
}
|
|
|
|
future<> snapshot_ctl::take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
|
|
if (ks_name.empty()) {
|
|
throw std::runtime_error("You must supply a keyspace name");
|
|
}
|
|
if (tables.empty()) {
|
|
throw std::runtime_error("You must supply a table name");
|
|
}
|
|
if (tag.empty()) {
|
|
throw std::runtime_error("You must supply a snapshot name.");
|
|
}
|
|
|
|
return run_snapshot_modify_operation([this, ks_name = std::move(ks_name), tables = std::move(tables), tag = std::move(tag), opts] () mutable {
|
|
return do_take_column_family_snapshot(std::move(ks_name), std::move(tables), std::move(tag), opts);
|
|
});
|
|
}
|
|
|
|
future<> snapshot_ctl::take_cluster_column_family_snapshot(std::vector<sstring> ks_names, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
|
|
if (tag.empty()) {
|
|
throw std::invalid_argument("You must supply a snapshot name.");
|
|
}
|
|
if (ks_names.size() != 1 && !tables.empty()) {
|
|
throw std::invalid_argument("Cannot name tables when doing multiple keyspaces snapshot");
|
|
}
|
|
if (ks_names.empty()) {
|
|
std::ranges::copy(_db.local().get_keyspaces() | std::views::keys, std::back_inserter(ks_names));
|
|
}
|
|
|
|
return run_snapshot_modify_operation([this, ks_names = std::move(ks_names), tables = std::move(tables), tag = std::move(tag), opts] () mutable {
|
|
return do_take_cluster_column_family_snapshot(std::move(ks_names), std::move(tables), std::move(tag), opts);
|
|
});
|
|
}
|
|
|
|
future<> snapshot_ctl::do_take_cluster_column_family_snapshot(std::vector<sstring> ks_names, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
|
|
if (tables.empty()) {
|
|
co_await coroutine::parallel_for_each(ks_names, [tag, this] (const auto& ks_name) {
|
|
return check_snapshot_not_exist(ks_name, tag);
|
|
});
|
|
co_await _sp.local().snapshot_keyspace(
|
|
ks_names | std::views::transform([&](auto& ks) { return std::make_pair(ks, sstring{}); })
|
|
| std::ranges::to<std::unordered_multimap>(),
|
|
tag, opts
|
|
);
|
|
co_return;
|
|
};
|
|
|
|
auto ks = ks_names[0];
|
|
co_await check_snapshot_not_exist(ks, tag, tables);
|
|
|
|
co_await _sp.local().snapshot_keyspace(
|
|
tables | std::views::transform([&](auto& cf) { return std::make_pair(ks, cf); })
|
|
| std::ranges::to<std::unordered_multimap>(),
|
|
tag, opts
|
|
);
|
|
}
|
|
|
|
sstring snapshot_ctl::resolve_table_name(const sstring& ks_name, const sstring& name) const {
|
|
try {
|
|
_db.local().find_uuid(ks_name, name);
|
|
return name;
|
|
} catch (const data_dictionary::no_such_column_family&) {
|
|
// The name may be a logical index name (e.g. "myindex").
|
|
// Only indexes with a backing view have a separate backing table
|
|
// that can be snapshotted. Custom indexes such as vector indexes
|
|
// do not, so keep rejecting them here rather than mapping them to
|
|
// a synthetic name.
|
|
auto schema = _db.local().find_indexed_table(ks_name, name);
|
|
if (schema) {
|
|
const auto& im = schema->all_indices().at(name);
|
|
if (db::schema_tables::view_should_exist(im)) {
|
|
return secondary_index::index_table_name(name);
|
|
}
|
|
}
|
|
throw;
|
|
}
|
|
}
|
|
|
|
future<> snapshot_ctl::do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts) {
|
|
for (auto& t : tables) {
|
|
t = resolve_table_name(ks_name, t);
|
|
}
|
|
co_await check_snapshot_not_exist(ks_name, tag, tables);
|
|
co_await replica::database::snapshot_tables_on_all_shards(_db, ks_name, std::move(tables), std::move(tag), opts);
|
|
}
|
|
|
|
future<> snapshot_ctl::clear_snapshot(sstring tag, std::vector<sstring> keyspace_names, sstring cf_name) {
|
|
co_return co_await run_snapshot_modify_operation([this, tag = std::move(tag), keyspace_names = std::move(keyspace_names), cf_name = std::move(cf_name)] (this auto) -> future<> {
|
|
// clear_snapshot enumerates keyspace_names and uses cf_name as a
|
|
// filter in each. When cf_name needs resolution (e.g. logical index
|
|
// name -> backing table name), the result may differ per keyspace,
|
|
// so resolve and clear individually.
|
|
if (!cf_name.empty() && !keyspace_names.empty()) {
|
|
std::vector<std::pair<sstring, sstring>> resolved_targets;
|
|
resolved_targets.reserve(keyspace_names.size());
|
|
|
|
// Resolve every keyspace first so a later failure doesn't delete
|
|
// snapshots that were already matched in earlier keyspaces.
|
|
for (const auto& ks_name : keyspace_names) {
|
|
resolved_targets.emplace_back(ks_name, resolve_table_name(ks_name, cf_name));
|
|
}
|
|
for (auto& [ks_name, resolved_cf_name] : resolved_targets) {
|
|
co_await _db.local().clear_snapshot(tag, {ks_name}, std::move(resolved_cf_name));
|
|
}
|
|
co_return;
|
|
}
|
|
co_await _db.local().clear_snapshot(std::move(tag), std::move(keyspace_names), cf_name);
|
|
});
|
|
}
|
|
|
|
future<std::unordered_map<sstring, snapshot_ctl::db_snapshot_details>>
|
|
snapshot_ctl::get_snapshot_details() {
|
|
using snapshot_map = std::unordered_map<sstring, db_snapshot_details>;
|
|
|
|
co_return co_await run_snapshot_list_operation(coroutine::lambda([this] () -> future<snapshot_map> {
|
|
auto details = co_await _db.local().get_snapshot_details();
|
|
|
|
for (auto& [snapshot_name, snapshot_details] : details) {
|
|
for (auto& table : snapshot_details) {
|
|
auto schema = _db.local().as_data_dictionary().try_find_table(
|
|
table.ks, table.cf);
|
|
if (!schema || !schema->schema()->is_view()) {
|
|
continue;
|
|
}
|
|
|
|
auto helper = replica::make_schema_describe_helper(
|
|
schema->schema(), _db.local().as_data_dictionary());
|
|
if (helper.type == schema_describe_helper::type::index) {
|
|
table.cf = secondary_index::index_name_from_table_name(
|
|
table.cf);
|
|
}
|
|
}
|
|
}
|
|
|
|
co_return details;
|
|
}));
|
|
}
|
|
|
|
future<int64_t> snapshot_ctl::true_snapshots_size() {
|
|
co_return co_await run_snapshot_list_operation(coroutine::lambda([this] () -> future<int64_t> {
|
|
int64_t total = 0;
|
|
for (auto& [name, details] : co_await _db.local().get_snapshot_details()) {
|
|
total += std::accumulate(details.begin(), details.end(), int64_t(0), [] (int64_t sum, const auto& d) { return sum + d.details.live; });
|
|
}
|
|
co_return total;
|
|
}));
|
|
}
|
|
|
|
future<tasks::task_id> snapshot_ctl::start_backup(sstring endpoint, sstring bucket, sstring prefix, sstring keyspace, sstring table, sstring snapshot_name, bool move_files) {
|
|
if (this_shard_id() != 0) {
|
|
co_return co_await container().invoke_on(0, [&](auto& local) {
|
|
return local.start_backup(endpoint, bucket, prefix, keyspace, table, snapshot_name, move_files);
|
|
});
|
|
}
|
|
|
|
co_await coroutine::switch_to(_config.backup_sched_group);
|
|
snap_log.info("Backup sstables from {}({}) to {}", keyspace, snapshot_name, endpoint);
|
|
auto global_table = co_await get_table_on_all_shards(_db, keyspace, table);
|
|
auto& storage_options = global_table->get_storage_options();
|
|
if (!storage_options.is_local_type()) {
|
|
throw std::invalid_argument("not able to backup a non-local table");
|
|
}
|
|
auto& local_storage_options = std::get<data_dictionary::storage_options::local>(storage_options.value);
|
|
//
|
|
// The keyspace data directories and their snapshots are arranged as follows:
|
|
//
|
|
// <data dir>
|
|
// |- <keyspace name1>
|
|
// | |- <column family name1>
|
|
// | |- snapshots
|
|
// | |- <snapshot name1>
|
|
// | |- <snapshot file1>
|
|
// | |- <snapshot file2>
|
|
// | |- ...
|
|
// | |- <snapshot name2>
|
|
// | |- ...
|
|
// | |- <column family name2>
|
|
// | |- ...
|
|
// |- <keyspace name2>
|
|
// |- ...
|
|
//
|
|
auto dir = (local_storage_options.dir /
|
|
sstables::snapshots_dir /
|
|
std::string_view(snapshot_name));
|
|
auto task = co_await _task_manager_module->make_and_start_task<::db::snapshot::backup_task_impl>(
|
|
{}, *this, _storage_manager.container(), std::move(endpoint), std::move(bucket), std::move(prefix), keyspace, dir, global_table->schema()->id(), move_files);
|
|
co_return task->id();
|
|
}
|
|
|
|
future<int64_t> snapshot_ctl::true_snapshots_size(sstring ks, sstring cf) {
|
|
co_return co_await run_snapshot_list_operation(coroutine::lambda([this, ks = std::move(ks), cf = std::move(cf)] () -> future<int64_t> {
|
|
int64_t total = 0;
|
|
for (auto& [name, details] : co_await _db.local().find_column_family(ks, cf).get_snapshot_details()) {
|
|
total += details.total;
|
|
}
|
|
co_return total;
|
|
}));
|
|
}
|
|
|
|
}
|