mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-21 00:50:35 +00:00
The mechanics of the restore is like this - A /storage_service/tablets/restore API is called with (keyspace, table, endpoint, bucket, manifests) parameters - First, it populates the system_distributed.snapshot_sstables table with the data read from the manifests - Then it emplaces a bunch of tablet transitions (of a new "restore" kind), one for each tablet - The topology coordinator handles the "restore" transition by calling a new RESTORE_TABLET RPC against all the current tablet replicas - Each replica handles the RPC verb by - Reading the snapshot_sstables table - Filtering the read sstable infos against current node and tablet being handled - Downloading and attaching the filtered sstables This PR includes system_distributed.snapshot_sstables table from @robertbindar and preparation work from @kreuzerkrieg that extracts raw sstables downloading and attaching from existing generic sstables loading code. This is first step towards SCYLLADB-197 and lacks many things. In particular - the API only works for single-DC cluster - the caller needs to "lock" tablet boundaries with min/max tablet count - not abortable - no progress tracking - sub-optimal (re-kicking API on restore will re-download everything again) - not re-attacheable (if API node dies, restoration proceeds, but the caller cannot "wait" for it to complete via other node) - nodes download sstables in maintenance/streaming sched gorup (should be moved to maintenance/backup) Other follow-up items: - have an actual swagger object specification for `backup_location` Closes #28436 Closes #28657 Closes #28773 Closes scylladb/scylladb#28763 * github.com:scylladb/scylladb: test: Add test for backup vs migration race test: Restore resilience test sstables_loader: Fail tablet-restore task if not all sstables were downloaded sstables_loader: mark sstables as downloaded after attaching sstables_loader: return shared_sstable from attach_sstable db: add update_sstable_download_status method db: add downloaded column to snapshot_sstables db: extract snapshot_sstables TTL into class constant test: Add a test for tablet-aware restore tablets: Implement tablet-aware cluster-wide restore messaging: Add RESTORE_TABLET RPC verb sstables_loader: Add method to download and attach sstables for a tablet tablets: Add restore_config to tablet_transition_info sstables_loader: Add restore_tablets task skeleton test: Add rest_client helper to kick newly introduced API endpoint api: Add /storage_service/tablets/restore endpoint skeleton sstables_loader: Add keyspace and table arguments to manfiest loading helper sstables_loader_helpers: just reformat the code sstables_loader_helpers: generalize argument and variable names sstables_loader_helpers: generalize get_sstables_for_tablet sstables_loader_helpers: add token getters for tablet filtering sstables_loader_helpers: remove underscores from struct members sstables_loader: move download_sstable and get_sstables_for_tablet sstables_loader: extract single-tablet SST filtering sstables_loader: make download_sstable static sstables_loader: fix formating of the new `download_sstable` function sstables_loader: extract single SST download into a function sstables_loader: add shard_id to minimal_sst_info sstables_loader: add function for parsing backup manifests split utility functions for creating test data from database_test export make_storage_options_config from lib/test_services rjson: Add helpers for conversions to dht::token and sstable_id Add system_distributed_keyspace.snapshot_sstables add get_system_distributed_keyspace to cql_test_env code: Add system_distributed_keyspace dependency to sstables_loader storage_service: Export export handle_raft_rpc() helper storage_service: Export do_tablet_operation() storage_service: Split transit_tablet() into two tablets: Add braces around tablet_transition_kind::repair switch
162 lines
7.1 KiB
C++
162 lines
7.1 KiB
C++
/*
|
|
* Copyright (C) 2018-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "schema/schema_fwd.hh"
|
|
#include "service/qos/qos_common.hh"
|
|
#include "utils/UUID.hh"
|
|
#include "utils/chunked_vector.hh"
|
|
#include "cdc/generation_id.hh"
|
|
#include "db/consistency_level_type.hh"
|
|
#include "locator/host_id.hh"
|
|
#include "dht/token.hh"
|
|
#include "sstables/types.hh"
|
|
|
|
#include <seastar/core/future.hh>
|
|
#include <seastar/core/sstring.hh>
|
|
#include <seastar/util/bool_class.hh>
|
|
|
|
#include <optional>
|
|
#include <unordered_map>
|
|
|
|
namespace cql3 {
|
|
class query_processor;
|
|
}
|
|
|
|
namespace cdc {
|
|
class stream_id;
|
|
class topology_description;
|
|
class streams_version;
|
|
} // namespace cdc
|
|
|
|
namespace service {
|
|
class storage_proxy;
|
|
class migration_manager;
|
|
}
|
|
|
|
|
|
namespace db {
|
|
|
|
using is_downloaded = bool_class<class is_downloaded_tag>;
|
|
|
|
struct snapshot_sstable_entry {
|
|
sstables::sstable_id sstable_id;
|
|
dht::token first_token;
|
|
dht::token last_token;
|
|
sstring toc_name;
|
|
sstring prefix;
|
|
is_downloaded downloaded{is_downloaded::no};
|
|
};
|
|
|
|
class system_distributed_keyspace {
|
|
public:
|
|
static constexpr auto NAME = "system_distributed";
|
|
static constexpr auto NAME_EVERYWHERE = "system_distributed_everywhere";
|
|
|
|
static constexpr auto VIEW_BUILD_STATUS = "view_build_status";
|
|
static constexpr auto SERVICE_LEVELS = "service_levels";
|
|
|
|
/* Nodes use this table to communicate new CDC stream generations to other nodes. */
|
|
static constexpr auto CDC_TOPOLOGY_DESCRIPTION = "cdc_generation_descriptions";
|
|
|
|
/* Nodes use this table to communicate new CDC stream generations to other nodes.
|
|
* Resides in system_distributed_everywhere. */
|
|
static constexpr auto CDC_GENERATIONS_V2 = "cdc_generation_descriptions_v2";
|
|
|
|
/* This table is used by CDC clients to learn about available CDC streams. */
|
|
static constexpr auto CDC_DESC_V2 = "cdc_streams_descriptions_v2";
|
|
|
|
/* Used by CDC clients to learn CDC generation timestamps. */
|
|
static constexpr auto CDC_TIMESTAMPS = "cdc_generation_timestamps";
|
|
|
|
/* Previous version of the "cdc_streams_descriptions_v2" table.
|
|
* We use it in the upgrade procedure to ensure that CDC generations appearing
|
|
* in the old table also appear in the new table, if necessary. */
|
|
static constexpr auto CDC_DESC_V1 = "cdc_streams_descriptions";
|
|
|
|
/* This table is used by the backup and restore code to store per-sstable metadata.
|
|
* The data the coordinator node puts in this table comes from the snapshot manifests. */
|
|
static constexpr auto SNAPSHOT_SSTABLES = "snapshot_sstables";
|
|
|
|
static constexpr uint64_t SNAPSHOT_SSTABLES_TTL_SECONDS = std::chrono::seconds(std::chrono::days(3)).count();
|
|
|
|
/* Information required to modify/query some system_distributed tables, passed from the caller. */
|
|
struct context {
|
|
/* How many different token owners (endpoints) are there in the token ring? */
|
|
size_t num_token_owners;
|
|
};
|
|
private:
|
|
cql3::query_processor& _qp;
|
|
service::migration_manager& _mm;
|
|
service::storage_proxy& _sp;
|
|
|
|
bool _started = false;
|
|
bool _forced_cdc_timestamps_schema_sync = false;
|
|
|
|
public:
|
|
static std::vector<schema_ptr> all_distributed_tables();
|
|
static std::vector<schema_ptr> all_everywhere_tables();
|
|
|
|
system_distributed_keyspace(cql3::query_processor&, service::migration_manager&, service::storage_proxy&);
|
|
|
|
future<> start();
|
|
future<> start_workload_prioritization();
|
|
future<> stop();
|
|
|
|
bool started() const { return _started; }
|
|
|
|
future<> insert_cdc_generation(utils::UUID, const cdc::topology_description&, context);
|
|
future<std::optional<cdc::topology_description>> read_cdc_generation(utils::UUID);
|
|
|
|
future<> create_cdc_desc(db_clock::time_point, const cdc::topology_description&, context);
|
|
future<bool> cdc_desc_exists(db_clock::time_point, context);
|
|
|
|
// Reads and builds generation map - a map from generation timestamps to vector of all stream ids for that generation.
|
|
// Generations with timestamp >= `not_older_than` are returned, plus the one just before it (the straddling generation).
|
|
// Returns empty map if there are no generations with timestamp >= `not_older_than`.
|
|
// NOTE: there's a sibling `read_cdc_for_tablets_versioned_streams`, that reads the same data for tables backed by tablets. The data returned is the same.
|
|
// NOTE: currently used only by alternator
|
|
future<std::map<db_clock::time_point, cdc::streams_version>> cdc_get_versioned_streams(db_clock::time_point not_older_than, context);
|
|
|
|
// Read current generation timestamp for the given table. Throws runtime_error (see `cql3::untyped_result_set::one()`) if table not found.
|
|
// NOTE: there's a sibling `read_cdc_for_tablets_current_generation_timestamp` in `system_keyspace`, that does the same for tables backed up by tablets.
|
|
// NOTE: currently used only by alternator
|
|
future<db_clock::time_point> cdc_current_generation_timestamp(context);
|
|
|
|
future<qos::service_levels_info> get_service_levels(qos::query_context ctx) const;
|
|
future<qos::service_levels_info> get_service_level(sstring service_level_name) const;
|
|
future<> set_service_level(sstring service_level_name, qos::service_level_options slo) const;
|
|
future<> drop_service_level(sstring service_level_name) const;
|
|
|
|
/* Inserts a single SSTable entry for a given snapshot, keyspace, table, datacenter,
|
|
* and rack. The row is written with the specified TTL (in seconds). Uses consistency
|
|
* level `EACH_QUORUM` by default.*/
|
|
future<> insert_snapshot_sstable(sstring snapshot_name, sstring ks, sstring table, sstring dc, sstring rack, sstables::sstable_id sstable_id, dht::token first_token, dht::token last_token, sstring toc_name, sstring prefix, is_downloaded downloaded, db::consistency_level cl = db::consistency_level::EACH_QUORUM);
|
|
|
|
/* Retrieves all SSTable entries for a given snapshot, keyspace, table, datacenter, and rack.
|
|
* If `start_token` and `end_token` are provided, only entries whose `first_token` is in the range [`start_token`, `end_token`] will be returned.
|
|
* Returns a vector of `snapshot_sstable_entry` structs containing `sstable_id`, `first_token`, `last_token`,
|
|
* `toc_name`, and `prefix`. Uses consistency level `LOCAL_QUORUM` by default. */
|
|
future<utils::chunked_vector<snapshot_sstable_entry>> get_snapshot_sstables(sstring snapshot_name, sstring ks, sstring table, sstring dc, sstring rack, db::consistency_level cl = db::consistency_level::LOCAL_QUORUM, std::optional<dht::token> start_token = std::nullopt, std::optional<dht::token> end_token = std::nullopt) const;
|
|
|
|
future<> update_sstable_download_status(sstring snapshot_name,
|
|
sstring ks,
|
|
sstring table,
|
|
sstring dc,
|
|
sstring rack,
|
|
sstables::sstable_id sstable_id,
|
|
dht::token start_token,
|
|
is_downloaded downloaded) const;
|
|
|
|
private:
|
|
future<> create_tables(std::vector<schema_ptr> tables);
|
|
};
|
|
|
|
}
|