Merge 'Simplify some Raft tables' from Kamil Braun

Rename `system.raft_config` to `system.raft_snapshot_config` to make it clearer
what the table stores.

Remove the `my_server_id` partition key column from
`system.raft_snapshot_config` and a corresponding column from
`system.raft_snapshots` which would store the Raft server ID of the local node.
It's unnecessary, all servers running on a given node in different groups will
use the same ID - the Raft ID of the node which is equal to its Host ID. There
will be no multiple servers running in a single Raft group on the same node.

Closes #12513

* github.com:scylladb/scylladb:
  db: system_keyspace: remove (my_)server_id column from RAFT_SNAPSHOTS and RAFT_SNAPSHOT_CONFIG
  db: system_keyspace: rename 'raft_config' to 'raft_snapshot_config'
This commit is contained in:
Tomasz Grabiec
2023-01-13 00:23:19 +01:00
6 changed files with 24 additions and 29 deletions

View File

@@ -2881,7 +2881,7 @@ static const std::unordered_set<sstring>& system_ks_null_shard_tables() {
SCYLLA_TABLE_SCHEMA_HISTORY,
db::system_keyspace::RAFT,
db::system_keyspace::RAFT_SNAPSHOTS,
db::system_keyspace::RAFT_CONFIG,
db::system_keyspace::RAFT_SNAPSHOT_CONFIG,
db::system_keyspace::GROUP0_HISTORY,
db::system_keyspace::DISCOVERY,
db::system_keyspace::BROADCAST_KV_STORE,

View File

@@ -219,10 +219,6 @@ schema_ptr system_keyspace::raft_snapshots() {
auto id = generate_legacy_id(NAME, RAFT_SNAPSHOTS);
return schema_builder(NAME, RAFT_SNAPSHOTS, std::optional(id))
.with_column("group_id", timeuuid_type, column_kind::partition_key)
// To be able to start multiple raft servers inside one raft group
// on the same node, we need to include the server_id in the
// partition key, as well.
.with_column("server_id", uuid_type, column_kind::partition_key)
.with_column("snapshot_id", uuid_type)
// Index and term of last entry in the snapshot
.with_column("idx", long_type)
@@ -237,12 +233,11 @@ schema_ptr system_keyspace::raft_snapshots() {
return schema;
}
schema_ptr system_keyspace::raft_config() {
schema_ptr system_keyspace::raft_snapshot_config() {
static thread_local auto schema = [] {
auto id = generate_legacy_id(system_keyspace::NAME, RAFT_CONFIG);
return schema_builder(system_keyspace::NAME, RAFT_CONFIG, std::optional(id))
auto id = generate_legacy_id(system_keyspace::NAME, RAFT_SNAPSHOT_CONFIG);
return schema_builder(system_keyspace::NAME, RAFT_SNAPSHOT_CONFIG, std::optional(id))
.with_column("group_id", timeuuid_type, column_kind::partition_key)
.with_column("my_server_id", uuid_type, column_kind::partition_key)
.with_column("server_id", uuid_type, column_kind::clustering_key)
.with_column("disposition", ascii_type, column_kind::clustering_key) // can be 'CURRENT` or `PREVIOUS'
.with_column("can_vote", boolean_type)
@@ -2692,7 +2687,7 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
v3::cdc_local(),
});
if (cfg.consistent_cluster_management()) {
r.insert(r.end(), {raft(), raft_snapshots(), raft_config(), group0_history(), discovery()});
r.insert(r.end(), {raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery()});
if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) {
r.insert(r.end(), {broadcast_kv_store()});

View File

@@ -93,7 +93,7 @@ class system_keyspace : public seastar::peering_sharded_service<system_keyspace>
sharded<replica::database>& _db;
std::unique_ptr<local_cache> _cache;
static schema_ptr raft_config();
static schema_ptr raft_snapshot_config();
static schema_ptr local();
static schema_ptr peers();
static schema_ptr peer_events();
@@ -135,7 +135,7 @@ public:
static constexpr auto SCYLLA_LOCAL = "scylla_local";
static constexpr auto RAFT = "raft";
static constexpr auto RAFT_SNAPSHOTS = "raft_snapshots";
static constexpr auto RAFT_CONFIG = "raft_config";
static constexpr auto RAFT_SNAPSHOT_CONFIG = "raft_snapshot_config";
static constexpr auto REPAIR_HISTORY = "repair_history";
static constexpr auto GROUP0_HISTORY = "group0_history";
static constexpr auto DISCOVERY = "discovery";
@@ -198,7 +198,7 @@ public:
static schema_ptr batchlog();
};
static constexpr const char* extra_durable_tables[] = { PAXOS, SCYLLA_LOCAL, RAFT, RAFT_SNAPSHOTS, RAFT_CONFIG, DISCOVERY, BROADCAST_KV_STORE };
static constexpr const char* extra_durable_tables[] = { PAXOS, SCYLLA_LOCAL, RAFT, RAFT_SNAPSHOTS, RAFT_SNAPSHOT_CONFIG, DISCOVERY, BROADCAST_KV_STORE };
static bool is_extra_durable(const sstring& name);

View File

@@ -40,7 +40,7 @@ There are three such tables:
works fine with many groups.
- `raft_snapshots`, a supporting table storing the so-called
snapshot descriptors,
- `raft_config`, a normalized part of raft
- `raft_snapshot_config`, a normalized part of raft
`raft_snapshots`, storing the cluster configuration
at the time of taking the snapshot. May be out of date with
the real cluster configuration, e.g. when configuration
@@ -98,8 +98,8 @@ Raft group 0 has an id (UUID) just like any other group. After a
node boots, this id is persisted in `scylla_local` system table.
If this id is present, the node can start a Raft instance for
the group using the last saved state in `raft`, `raft_snapshots`
and `raft_config` system tables, which are all retrievable by
group id.
and `raft_snapshot_config` system tables, which are all retrievable
by group id.
If a persisted id is missing, it means the node is bootstrapping
and haven't joined Raft yet.

View File

@@ -122,15 +122,15 @@ future<raft::snapshot_descriptor> raft_sys_table_storage::load_snapshot_descript
utils::UUID snapshot_id = id_row.get_as<utils::UUID>("snapshot_id");
// Fetch raft log index and term for the latest snapshot descriptor
static const auto load_snp_info_cql = format("SELECT idx, term FROM system.{} WHERE group_id = ? AND server_id = ?",
static const auto load_snp_info_cql = format("SELECT idx, term FROM system.{} WHERE group_id = ?",
db::system_keyspace::RAFT_SNAPSHOTS);
::shared_ptr<cql3::untyped_result_set> snp_rs = co_await _qp.execute_internal(load_snp_info_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes);
::shared_ptr<cql3::untyped_result_set> snp_rs = co_await _qp.execute_internal(load_snp_info_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes);
// Should be only one matching row, since each individual server can only
// have a single snapshot installed at a time
const auto& snp_row = snp_rs->one();
// Fetch current and previous raft configurations for the snapshot
static const auto load_cfg_cql = format("SELECT server_id, disposition, can_vote FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_CONFIG);
::shared_ptr<cql3::untyped_result_set> cfg_rs = co_await _qp.execute_internal(load_cfg_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes);
static const auto load_cfg_cql = format("SELECT server_id, disposition, can_vote FROM system.{} WHERE group_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG);
::shared_ptr<cql3::untyped_result_set> cfg_rs = co_await _qp.execute_internal(load_cfg_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes);
raft::configuration cfg;
@@ -155,27 +155,27 @@ future<raft::snapshot_descriptor> raft_sys_table_storage::load_snapshot_descript
future<> raft_sys_table_storage::store_snapshot_descriptor(const raft::snapshot_descriptor& snap, size_t preserve_log_entries) {
// TODO: check that snap.idx refers to an already persisted entry
return execute_with_linearization_point([this, &snap, preserve_log_entries] () -> future<> {
static const auto store_snp_cql = format("INSERT INTO system.{} (group_id, server_id, snapshot_id, idx, term) VALUES (?, ?, ?, ?, ?)",
static const auto store_snp_cql = format("INSERT INTO system.{} (group_id, snapshot_id, idx, term) VALUES (?, ?, ?, ?)",
db::system_keyspace::RAFT_SNAPSHOTS);
co_await _qp.execute_internal(
store_snp_cql,
{_group_id.id, _server_id.id, snap.id.id, int64_t(snap.idx), int64_t(snap.term)},
{_group_id.id, snap.id.id, int64_t(snap.idx), int64_t(snap.term)},
cql3::query_processor::cache_internal::yes
);
// remove old configs
static const auto delete_raft_cfg_cql = format("DELETE FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_CONFIG);
co_await _qp.execute_internal(delete_raft_cfg_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes);
static const auto delete_raft_cfg_cql = format("DELETE FROM system.{} WHERE group_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG);
co_await _qp.execute_internal(delete_raft_cfg_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes);
// store current and previous raft configurations
static const auto store_raft_cfg_cql = format("INSERT INTO system.{} (group_id, my_server_id, server_id, disposition, can_vote) VALUES (?, ?, ?, ?, ?)",
db::system_keyspace::RAFT_CONFIG);
static const auto store_raft_cfg_cql = format("INSERT INTO system.{} (group_id, server_id, disposition, can_vote) VALUES (?, ?, ?, ?)",
db::system_keyspace::RAFT_SNAPSHOT_CONFIG);
for (const raft::config_member& srv : snap.config.current) {
co_await _qp.execute_internal(store_raft_cfg_cql,
{_group_id.id, _server_id.id, srv.addr.id.id, "CURRENT", srv.can_vote},
{_group_id.id, srv.addr.id.id, "CURRENT", srv.can_vote},
cql3::query_processor::cache_internal::yes);
}
for (const raft::config_member& srv : snap.config.previous) {
co_await _qp.execute_internal(store_raft_cfg_cql,
{_group_id.id, _server_id.id, srv.addr.id.id, "PREVIOUS", srv.can_vote},
{_group_id.id, srv.addr.id.id, "PREVIOUS", srv.can_vote},
cql3::query_processor::cache_internal::yes);
}
// Also update the latest snapshot id in `system.raft` table

View File

@@ -909,7 +909,7 @@ SEASTAR_TEST_CASE(test_schema_tables_use_null_sharder) {
BOOST_REQUIRE(it != cf_metadata.end());
BOOST_REQUIRE_EQUAL(it->second->get_sharder().shard_count(), 1);
it = cf_metadata.find("raft_config");
it = cf_metadata.find("raft_snapshot_config");
BOOST_REQUIRE(it != cf_metadata.end());
BOOST_REQUIRE_EQUAL(it->second->get_sharder().shard_count(), 1);