Files
scylladb/db/snapshot-ctl.hh
Calle Wilund a5df2e79a7 storage_service: Wait for snapshot/backup before decommission
Fixes: SCYLLADB-244

Disables snapshot control such that any active ops finish/fail
before proceeding with decommission.
Note: snapshot control provided as argument, not member ref
due to storage_service being used from both main and cql_test_env.
(The latter has no snapshot_ctl to provide).

Could do the snapshot lockout on API level, but want to do
pre-checks before this.

Note: this just disables backup/snapshot fully. Could re-enable
after decommission, but this seems somewhat pointless.

v2:
* Add log message to snapshot shutdown
* Make test use log waiting instead of timeouts

Closes scylladb/scylladb#28980
2026-03-16 17:12:57 +02:00

154 lines
5.2 KiB
C++

/*
*
* Modified by ScyllaDB
* Copyright (C) 2020-present ScyllaDB
*/
/*
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
*
* Copyright (C) 2020-present ScyllaDB
*/
#pragma once
#include <vector>
#include <seastar/core/sharded.hh>
#include <seastar/core/future.hh>
#include "replica/database_fwd.hh"
#include "tasks/task_manager.hh"
#include <seastar/core/gate.hh>
#include <seastar/core/rwlock.hh>
using namespace seastar;
namespace sstables { class storage_manager; }
namespace service { class storage_proxy; }
namespace db {
namespace snapshot {
class task_manager_module : public tasks::task_manager::module {
public:
task_manager_module(tasks::task_manager& tm) noexcept : tasks::task_manager::module(tm, "snapshot") {}
};
class backup_task_impl;
} // snapshot namespace
struct snapshot_options {
bool skip_flush = false;
gc_clock::time_point created_at = gc_clock::now();
std::optional<gc_clock::time_point> expires_at;
};
class snapshot_ctl : public peering_sharded_service<snapshot_ctl> {
public:
struct table_snapshot_details {
int64_t total;
int64_t live;
};
struct table_snapshot_details_ext {
sstring ks;
sstring cf;
table_snapshot_details details;
};
struct config {
seastar::scheduling_group backup_sched_group;
};
using db_snapshot_details = std::vector<table_snapshot_details_ext>;
snapshot_ctl(sharded<replica::database>& db, sharded<service::storage_proxy>&, tasks::task_manager& tm, sstables::storage_manager& sstm, config cfg);
future<> stop();
sharded<replica::database>& db() { return _db; };
/**
* Takes the snapshot for all keyspaces. A snapshot name must be specified.
*
* @param tag the tag given to the snapshot; may not be null or empty
*/
future<> take_snapshot(sstring tag, snapshot_options opts = {}) {
return take_snapshot(tag, {}, opts);
}
/**
* Takes the snapshot for the given keyspaces. A snapshot name must be specified.
*
* @param tag the tag given to the snapshot; may not be null or empty
* @param keyspace_names the names of the keyspaces to snapshot; empty means "all"
*/
future<> take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts = {});
/**
* Takes the snapshot of multiple tables. A snapshot name must be specified.
*
* @param ks_name the keyspace which holds the specified column family
* @param tables a vector of tables names to snapshot
* @param tag the tag given to the snapshot; may not be null or empty
*/
future<> take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
/**
* Takes the snapshot of multiple tables or a whole keyspace, or all keyspaces,
* using global, clusterwide topology coordinated op.
* A snapshot name must be specified.
*
* @param ks_names the keyspaces to snapshot
* @param tables optional - a vector of tables names to snapshot
* @param tag the tag given to the snapshot; may not be null or empty
*/
future<> take_cluster_column_family_snapshot(std::vector<sstring> ks_names, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
/**
* Remove the snapshot with the given name from the given keyspaces.
* If no tag is specified we will remove all snapshots.
* If a cf_name is specified, only that table will be deleted
*/
future<> clear_snapshot(sstring tag, std::vector<sstring> keyspace_names, sstring cf_name);
future<tasks::task_id> start_backup(sstring endpoint, sstring bucket, sstring prefix, sstring keyspace, sstring table, sstring snapshot_name, bool move_files);
future<std::unordered_map<sstring, db_snapshot_details>> get_snapshot_details();
future<int64_t> true_snapshots_size();
future<int64_t> true_snapshots_size(sstring ks, sstring cf);
future<> disable_all_operations();
private:
config _config;
sharded<replica::database>& _db;
sharded<service::storage_proxy>& _sp;
seastar::rwlock _lock;
seastar::named_gate _ops;
shared_ptr<snapshot::task_manager_module> _task_manager_module;
sstables::storage_manager& _storage_manager;
future<> check_snapshot_not_exist(sstring ks_name, sstring name, std::optional<std::vector<sstring>> filter = {});
future<> run_snapshot_modify_operation(noncopyable_function<future<>()> &&);
template <typename Func>
std::invoke_result_t<Func> run_snapshot_list_operation(Func&& f) {
return with_gate(_ops, [f = std::move(f), this] () {
return container().invoke_on(0, [f = std::move(f)] (snapshot_ctl& snap) mutable {
return with_lock(snap._lock.for_read(), std::move(f));
});
});
}
friend class snapshot::backup_task_impl;
future<> do_take_snapshot(sstring tag, std::vector<sstring> keyspace_names, snapshot_options opts = {} );
future<> do_take_column_family_snapshot(sstring ks_name, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
future<> do_take_cluster_column_family_snapshot(std::vector<sstring> ks_names, std::vector<sstring> tables, sstring tag, snapshot_options opts = {});
};
}