mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-12 19:02:12 +00:00
Merge 'Simplify some Raft tables' from Kamil Braun
Rename `system.raft_config` to `system.raft_snapshot_config` to make it clearer what the table stores. Remove the `my_server_id` partition key column from `system.raft_snapshot_config` and a corresponding column from `system.raft_snapshots` which would store the Raft server ID of the local node. It's unnecessary, all servers running on a given node in different groups will use the same ID - the Raft ID of the node which is equal to its Host ID. There will be no multiple servers running in a single Raft group on the same node. Closes #12513 * github.com:scylladb/scylladb: db: system_keyspace: remove (my_)server_id column from RAFT_SNAPSHOTS and RAFT_SNAPSHOT_CONFIG db: system_keyspace: rename 'raft_config' to 'raft_snapshot_config'
This commit is contained in:
@@ -2881,7 +2881,7 @@ static const std::unordered_set<sstring>& system_ks_null_shard_tables() {
|
||||
SCYLLA_TABLE_SCHEMA_HISTORY,
|
||||
db::system_keyspace::RAFT,
|
||||
db::system_keyspace::RAFT_SNAPSHOTS,
|
||||
db::system_keyspace::RAFT_CONFIG,
|
||||
db::system_keyspace::RAFT_SNAPSHOT_CONFIG,
|
||||
db::system_keyspace::GROUP0_HISTORY,
|
||||
db::system_keyspace::DISCOVERY,
|
||||
db::system_keyspace::BROADCAST_KV_STORE,
|
||||
|
||||
@@ -219,10 +219,6 @@ schema_ptr system_keyspace::raft_snapshots() {
|
||||
auto id = generate_legacy_id(NAME, RAFT_SNAPSHOTS);
|
||||
return schema_builder(NAME, RAFT_SNAPSHOTS, std::optional(id))
|
||||
.with_column("group_id", timeuuid_type, column_kind::partition_key)
|
||||
// To be able to start multiple raft servers inside one raft group
|
||||
// on the same node, we need to include the server_id in the
|
||||
// partition key, as well.
|
||||
.with_column("server_id", uuid_type, column_kind::partition_key)
|
||||
.with_column("snapshot_id", uuid_type)
|
||||
// Index and term of last entry in the snapshot
|
||||
.with_column("idx", long_type)
|
||||
@@ -237,12 +233,11 @@ schema_ptr system_keyspace::raft_snapshots() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr system_keyspace::raft_config() {
|
||||
schema_ptr system_keyspace::raft_snapshot_config() {
|
||||
static thread_local auto schema = [] {
|
||||
auto id = generate_legacy_id(system_keyspace::NAME, RAFT_CONFIG);
|
||||
return schema_builder(system_keyspace::NAME, RAFT_CONFIG, std::optional(id))
|
||||
auto id = generate_legacy_id(system_keyspace::NAME, RAFT_SNAPSHOT_CONFIG);
|
||||
return schema_builder(system_keyspace::NAME, RAFT_SNAPSHOT_CONFIG, std::optional(id))
|
||||
.with_column("group_id", timeuuid_type, column_kind::partition_key)
|
||||
.with_column("my_server_id", uuid_type, column_kind::partition_key)
|
||||
.with_column("server_id", uuid_type, column_kind::clustering_key)
|
||||
.with_column("disposition", ascii_type, column_kind::clustering_key) // can be 'CURRENT` or `PREVIOUS'
|
||||
.with_column("can_vote", boolean_type)
|
||||
@@ -2692,7 +2687,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
|
||||
v3::cdc_local(),
|
||||
});
|
||||
if (cfg.consistent_cluster_management()) {
|
||||
r.insert(r.end(), {raft(), raft_snapshots(), raft_config(), group0_history(), discovery()});
|
||||
r.insert(r.end(), {raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery()});
|
||||
|
||||
if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) {
|
||||
r.insert(r.end(), {broadcast_kv_store()});
|
||||
|
||||
@@ -93,7 +93,7 @@ class system_keyspace : public seastar::peering_sharded_service<system_keyspace>
|
||||
sharded<replica::database>& _db;
|
||||
std::unique_ptr<local_cache> _cache;
|
||||
|
||||
static schema_ptr raft_config();
|
||||
static schema_ptr raft_snapshot_config();
|
||||
static schema_ptr local();
|
||||
static schema_ptr peers();
|
||||
static schema_ptr peer_events();
|
||||
@@ -135,7 +135,7 @@ public:
|
||||
static constexpr auto SCYLLA_LOCAL = "scylla_local";
|
||||
static constexpr auto RAFT = "raft";
|
||||
static constexpr auto RAFT_SNAPSHOTS = "raft_snapshots";
|
||||
static constexpr auto RAFT_CONFIG = "raft_config";
|
||||
static constexpr auto RAFT_SNAPSHOT_CONFIG = "raft_snapshot_config";
|
||||
static constexpr auto REPAIR_HISTORY = "repair_history";
|
||||
static constexpr auto GROUP0_HISTORY = "group0_history";
|
||||
static constexpr auto DISCOVERY = "discovery";
|
||||
@@ -198,7 +198,7 @@ public:
|
||||
static schema_ptr batchlog();
|
||||
};
|
||||
|
||||
static constexpr const char* extra_durable_tables[] = { PAXOS, SCYLLA_LOCAL, RAFT, RAFT_SNAPSHOTS, RAFT_CONFIG, DISCOVERY, BROADCAST_KV_STORE };
|
||||
static constexpr const char* extra_durable_tables[] = { PAXOS, SCYLLA_LOCAL, RAFT, RAFT_SNAPSHOTS, RAFT_SNAPSHOT_CONFIG, DISCOVERY, BROADCAST_KV_STORE };
|
||||
|
||||
static bool is_extra_durable(const sstring& name);
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ There are three such tables:
|
||||
works fine with many groups.
|
||||
- `raft_snapshots`, a supporting table storing the so-called
|
||||
snapshot descriptors,
|
||||
- `raft_config`, a normalized part of raft
|
||||
- `raft_snapshot_config`, a normalized part of raft
|
||||
`raft_snapshots`, storing the cluster configuration
|
||||
at the time of taking the snapshot. May be out of date with
|
||||
the real cluster configuration, e.g. when configuration
|
||||
@@ -98,8 +98,8 @@ Raft group 0 has an id (UUID) just like any other group. After a
|
||||
node boots, this id is persisted in `scylla_local` system table.
|
||||
If this id is present, the node can start a Raft instance for
|
||||
the group using the last saved state in `raft`, `raft_snapshots`
|
||||
and `raft_config` system tables, which are all retrievable by
|
||||
group id.
|
||||
and `raft_snapshot_config` system tables, which are all retrievable
|
||||
by group id.
|
||||
If a persisted id is missing, it means the node is bootstrapping
|
||||
and haven't joined Raft yet.
|
||||
|
||||
|
||||
@@ -122,15 +122,15 @@ future<raft::snapshot_descriptor> raft_sys_table_storage::load_snapshot_descript
|
||||
utils::UUID snapshot_id = id_row.get_as<utils::UUID>("snapshot_id");
|
||||
|
||||
// Fetch raft log index and term for the latest snapshot descriptor
|
||||
static const auto load_snp_info_cql = format("SELECT idx, term FROM system.{} WHERE group_id = ? AND server_id = ?",
|
||||
static const auto load_snp_info_cql = format("SELECT idx, term FROM system.{} WHERE group_id = ?",
|
||||
db::system_keyspace::RAFT_SNAPSHOTS);
|
||||
::shared_ptr<cql3::untyped_result_set> snp_rs = co_await _qp.execute_internal(load_snp_info_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes);
|
||||
::shared_ptr<cql3::untyped_result_set> snp_rs = co_await _qp.execute_internal(load_snp_info_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes);
|
||||
// Should be only one matching row, since each individual server can only
|
||||
// have a single snapshot installed at a time
|
||||
const auto& snp_row = snp_rs->one();
|
||||
// Fetch current and previous raft configurations for the snapshot
|
||||
static const auto load_cfg_cql = format("SELECT server_id, disposition, can_vote FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_CONFIG);
|
||||
::shared_ptr<cql3::untyped_result_set> cfg_rs = co_await _qp.execute_internal(load_cfg_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes);
|
||||
static const auto load_cfg_cql = format("SELECT server_id, disposition, can_vote FROM system.{} WHERE group_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG);
|
||||
::shared_ptr<cql3::untyped_result_set> cfg_rs = co_await _qp.execute_internal(load_cfg_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes);
|
||||
|
||||
raft::configuration cfg;
|
||||
|
||||
@@ -155,27 +155,27 @@ future<raft::snapshot_descriptor> raft_sys_table_storage::load_snapshot_descript
|
||||
future<> raft_sys_table_storage::store_snapshot_descriptor(const raft::snapshot_descriptor& snap, size_t preserve_log_entries) {
|
||||
// TODO: check that snap.idx refers to an already persisted entry
|
||||
return execute_with_linearization_point([this, &snap, preserve_log_entries] () -> future<> {
|
||||
static const auto store_snp_cql = format("INSERT INTO system.{} (group_id, server_id, snapshot_id, idx, term) VALUES (?, ?, ?, ?, ?)",
|
||||
static const auto store_snp_cql = format("INSERT INTO system.{} (group_id, snapshot_id, idx, term) VALUES (?, ?, ?, ?)",
|
||||
db::system_keyspace::RAFT_SNAPSHOTS);
|
||||
co_await _qp.execute_internal(
|
||||
store_snp_cql,
|
||||
{_group_id.id, _server_id.id, snap.id.id, int64_t(snap.idx), int64_t(snap.term)},
|
||||
{_group_id.id, snap.id.id, int64_t(snap.idx), int64_t(snap.term)},
|
||||
cql3::query_processor::cache_internal::yes
|
||||
);
|
||||
// remove old configs
|
||||
static const auto delete_raft_cfg_cql = format("DELETE FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_CONFIG);
|
||||
co_await _qp.execute_internal(delete_raft_cfg_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes);
|
||||
static const auto delete_raft_cfg_cql = format("DELETE FROM system.{} WHERE group_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG);
|
||||
co_await _qp.execute_internal(delete_raft_cfg_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes);
|
||||
// store current and previous raft configurations
|
||||
static const auto store_raft_cfg_cql = format("INSERT INTO system.{} (group_id, my_server_id, server_id, disposition, can_vote) VALUES (?, ?, ?, ?, ?)",
|
||||
db::system_keyspace::RAFT_CONFIG);
|
||||
static const auto store_raft_cfg_cql = format("INSERT INTO system.{} (group_id, server_id, disposition, can_vote) VALUES (?, ?, ?, ?)",
|
||||
db::system_keyspace::RAFT_SNAPSHOT_CONFIG);
|
||||
for (const raft::config_member& srv : snap.config.current) {
|
||||
co_await _qp.execute_internal(store_raft_cfg_cql,
|
||||
{_group_id.id, _server_id.id, srv.addr.id.id, "CURRENT", srv.can_vote},
|
||||
{_group_id.id, srv.addr.id.id, "CURRENT", srv.can_vote},
|
||||
cql3::query_processor::cache_internal::yes);
|
||||
}
|
||||
for (const raft::config_member& srv : snap.config.previous) {
|
||||
co_await _qp.execute_internal(store_raft_cfg_cql,
|
||||
{_group_id.id, _server_id.id, srv.addr.id.id, "PREVIOUS", srv.can_vote},
|
||||
{_group_id.id, srv.addr.id.id, "PREVIOUS", srv.can_vote},
|
||||
cql3::query_processor::cache_internal::yes);
|
||||
}
|
||||
// Also update the latest snapshot id in `system.raft` table
|
||||
|
||||
@@ -909,7 +909,7 @@ SEASTAR_TEST_CASE(test_schema_tables_use_null_sharder) {
|
||||
BOOST_REQUIRE(it != cf_metadata.end());
|
||||
BOOST_REQUIRE_EQUAL(it->second->get_sharder().shard_count(), 1);
|
||||
|
||||
it = cf_metadata.find("raft_config");
|
||||
it = cf_metadata.find("raft_snapshot_config");
|
||||
BOOST_REQUIRE(it != cf_metadata.end());
|
||||
BOOST_REQUIRE_EQUAL(it->second->get_sharder().shard_count(), 1);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user