From bed555d1e54c7e7fb545c2b6af1940aac14f4aa2 Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Thu, 12 Jan 2023 16:21:25 +0100 Subject: [PATCH 1/2] db: system_keyspace: rename 'raft_config' to 'raft_snapshot_config' Make it clear that the table stores the snapshot configuration, which is not necessarily the currently operating configuration (the last one appended to the log). In the future we plan to have a separate virtual table for showing the currently operating configuration, perhaps we will call it `system.raft_config`. --- db/schema_tables.cc | 2 +- db/system_keyspace.cc | 8 ++++---- db/system_keyspace.hh | 6 +++--- docs/dev/raft-in-scylla.md | 6 +++--- service/raft/raft_sys_table_storage.cc | 6 +++--- test/boost/schema_change_test.cc | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/db/schema_tables.cc b/db/schema_tables.cc index 5f6f9e20d3..3c335db8c6 100644 --- a/db/schema_tables.cc +++ b/db/schema_tables.cc @@ -2881,7 +2881,7 @@ static const std::unordered_set& system_ks_null_shard_tables() { SCYLLA_TABLE_SCHEMA_HISTORY, db::system_keyspace::RAFT, db::system_keyspace::RAFT_SNAPSHOTS, - db::system_keyspace::RAFT_CONFIG, + db::system_keyspace::RAFT_SNAPSHOT_CONFIG, db::system_keyspace::GROUP0_HISTORY, db::system_keyspace::DISCOVERY, db::system_keyspace::BROADCAST_KV_STORE, diff --git a/db/system_keyspace.cc b/db/system_keyspace.cc index 0bdf565ba6..a9fb5b7804 100644 --- a/db/system_keyspace.cc +++ b/db/system_keyspace.cc @@ -237,10 +237,10 @@ schema_ptr system_keyspace::raft_snapshots() { return schema; } -schema_ptr system_keyspace::raft_config() { +schema_ptr system_keyspace::raft_snapshot_config() { static thread_local auto schema = [] { - auto id = generate_legacy_id(system_keyspace::NAME, RAFT_CONFIG); - return schema_builder(system_keyspace::NAME, RAFT_CONFIG, std::optional(id)) + auto id = generate_legacy_id(system_keyspace::NAME, RAFT_SNAPSHOT_CONFIG); + return schema_builder(system_keyspace::NAME, RAFT_SNAPSHOT_CONFIG, std::optional(id)) .with_column("group_id", timeuuid_type, column_kind::partition_key) .with_column("my_server_id", uuid_type, column_kind::partition_key) .with_column("server_id", uuid_type, column_kind::clustering_key) @@ -2692,7 +2692,7 @@ std::vector system_keyspace::all_tables(const db::config& cfg) { v3::cdc_local(), }); if (cfg.consistent_cluster_management()) { - r.insert(r.end(), {raft(), raft_snapshots(), raft_config(), group0_history(), discovery()}); + r.insert(r.end(), {raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery()}); if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) { r.insert(r.end(), {broadcast_kv_store()}); diff --git a/db/system_keyspace.hh b/db/system_keyspace.hh index 3dfa5ae06a..60dc338135 100644 --- a/db/system_keyspace.hh +++ b/db/system_keyspace.hh @@ -93,7 +93,7 @@ class system_keyspace : public seastar::peering_sharded_service sharded& _db; std::unique_ptr _cache; - static schema_ptr raft_config(); + static schema_ptr raft_snapshot_config(); static schema_ptr local(); static schema_ptr peers(); static schema_ptr peer_events(); @@ -135,7 +135,7 @@ public: static constexpr auto SCYLLA_LOCAL = "scylla_local"; static constexpr auto RAFT = "raft"; static constexpr auto RAFT_SNAPSHOTS = "raft_snapshots"; - static constexpr auto RAFT_CONFIG = "raft_config"; + static constexpr auto RAFT_SNAPSHOT_CONFIG = "raft_snapshot_config"; static constexpr auto REPAIR_HISTORY = "repair_history"; static constexpr auto GROUP0_HISTORY = "group0_history"; static constexpr auto DISCOVERY = "discovery"; @@ -198,7 +198,7 @@ public: static schema_ptr batchlog(); }; - static constexpr const char* extra_durable_tables[] = { PAXOS, SCYLLA_LOCAL, RAFT, RAFT_SNAPSHOTS, RAFT_CONFIG, DISCOVERY, BROADCAST_KV_STORE }; + static constexpr const char* extra_durable_tables[] = { PAXOS, SCYLLA_LOCAL, RAFT, RAFT_SNAPSHOTS, RAFT_SNAPSHOT_CONFIG, DISCOVERY, BROADCAST_KV_STORE }; static bool is_extra_durable(const sstring& name); diff --git a/docs/dev/raft-in-scylla.md b/docs/dev/raft-in-scylla.md index 68e590c63f..15ae08a392 100644 --- a/docs/dev/raft-in-scylla.md +++ b/docs/dev/raft-in-scylla.md @@ -40,7 +40,7 @@ There are three such tables: works fine with many groups. - `raft_snapshots`, a supporting table storing the so-called snapshot descriptors, - - `raft_config`, a normalized part of raft + - `raft_snapshot_config`, a normalized part of raft `raft_snapshots`, storing the cluster configuration at the time of taking the snapshot. May be out of date with the real cluster configuration, e.g. when configuration @@ -98,8 +98,8 @@ Raft group 0 has an id (UUID) just like any other group. After a node boots, this id is persisted in `scylla_local` system table. If this id is present, the node can start a Raft instance for the group using the last saved state in `raft`, `raft_snapshots` -and `raft_config` system tables, which are all retrievable by -group id. +and `raft_snapshot_config` system tables, which are all retrievable +by group id. If a persisted id is missing, it means the node is bootstrapping and haven't joined Raft yet. diff --git a/service/raft/raft_sys_table_storage.cc b/service/raft/raft_sys_table_storage.cc index 6c9000f904..f39868889d 100644 --- a/service/raft/raft_sys_table_storage.cc +++ b/service/raft/raft_sys_table_storage.cc @@ -129,7 +129,7 @@ future raft_sys_table_storage::load_snapshot_descript // have a single snapshot installed at a time const auto& snp_row = snp_rs->one(); // Fetch current and previous raft configurations for the snapshot - static const auto load_cfg_cql = format("SELECT server_id, disposition, can_vote FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_CONFIG); + static const auto load_cfg_cql = format("SELECT server_id, disposition, can_vote FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG); ::shared_ptr cfg_rs = co_await _qp.execute_internal(load_cfg_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes); raft::configuration cfg; @@ -163,11 +163,11 @@ future<> raft_sys_table_storage::store_snapshot_descriptor(const raft::snapshot_ cql3::query_processor::cache_internal::yes ); // remove old configs - static const auto delete_raft_cfg_cql = format("DELETE FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_CONFIG); + static const auto delete_raft_cfg_cql = format("DELETE FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG); co_await _qp.execute_internal(delete_raft_cfg_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes); // store current and previous raft configurations static const auto store_raft_cfg_cql = format("INSERT INTO system.{} (group_id, my_server_id, server_id, disposition, can_vote) VALUES (?, ?, ?, ?, ?)", - db::system_keyspace::RAFT_CONFIG); + db::system_keyspace::RAFT_SNAPSHOT_CONFIG); for (const raft::config_member& srv : snap.config.current) { co_await _qp.execute_internal(store_raft_cfg_cql, {_group_id.id, _server_id.id, srv.addr.id.id, "CURRENT", srv.can_vote}, diff --git a/test/boost/schema_change_test.cc b/test/boost/schema_change_test.cc index 04075bb698..023f11a685 100644 --- a/test/boost/schema_change_test.cc +++ b/test/boost/schema_change_test.cc @@ -909,7 +909,7 @@ SEASTAR_TEST_CASE(test_schema_tables_use_null_sharder) { BOOST_REQUIRE(it != cf_metadata.end()); BOOST_REQUIRE_EQUAL(it->second->get_sharder().shard_count(), 1); - it = cf_metadata.find("raft_config"); + it = cf_metadata.find("raft_snapshot_config"); BOOST_REQUIRE(it != cf_metadata.end()); BOOST_REQUIRE_EQUAL(it->second->get_sharder().shard_count(), 1); From be390285b6a95d3166a522cfc495ccd61b06b6bf Mon Sep 17 00:00:00 2001 From: Kamil Braun Date: Thu, 12 Jan 2023 16:36:30 +0100 Subject: [PATCH 2/2] db: system_keyspace: remove (my_)server_id column from RAFT_SNAPSHOTS and RAFT_SNAPSHOT_CONFIG A single node will run a single Raft server in any given Raft group, so this column is not necessary. --- db/system_keyspace.cc | 5 ----- service/raft/raft_sys_table_storage.cc | 22 +++++++++++----------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/db/system_keyspace.cc b/db/system_keyspace.cc index a9fb5b7804..47c0d6991e 100644 --- a/db/system_keyspace.cc +++ b/db/system_keyspace.cc @@ -219,10 +219,6 @@ schema_ptr system_keyspace::raft_snapshots() { auto id = generate_legacy_id(NAME, RAFT_SNAPSHOTS); return schema_builder(NAME, RAFT_SNAPSHOTS, std::optional(id)) .with_column("group_id", timeuuid_type, column_kind::partition_key) - // To be able to start multiple raft servers inside one raft group - // on the same node, we need to include the server_id in the - // partition key, as well. - .with_column("server_id", uuid_type, column_kind::partition_key) .with_column("snapshot_id", uuid_type) // Index and term of last entry in the snapshot .with_column("idx", long_type) @@ -242,7 +238,6 @@ schema_ptr system_keyspace::raft_snapshot_config() { auto id = generate_legacy_id(system_keyspace::NAME, RAFT_SNAPSHOT_CONFIG); return schema_builder(system_keyspace::NAME, RAFT_SNAPSHOT_CONFIG, std::optional(id)) .with_column("group_id", timeuuid_type, column_kind::partition_key) - .with_column("my_server_id", uuid_type, column_kind::partition_key) .with_column("server_id", uuid_type, column_kind::clustering_key) .with_column("disposition", ascii_type, column_kind::clustering_key) // can be 'CURRENT` or `PREVIOUS' .with_column("can_vote", boolean_type) diff --git a/service/raft/raft_sys_table_storage.cc b/service/raft/raft_sys_table_storage.cc index f39868889d..57495c513b 100644 --- a/service/raft/raft_sys_table_storage.cc +++ b/service/raft/raft_sys_table_storage.cc @@ -122,15 +122,15 @@ future raft_sys_table_storage::load_snapshot_descript utils::UUID snapshot_id = id_row.get_as("snapshot_id"); // Fetch raft log index and term for the latest snapshot descriptor - static const auto load_snp_info_cql = format("SELECT idx, term FROM system.{} WHERE group_id = ? AND server_id = ?", + static const auto load_snp_info_cql = format("SELECT idx, term FROM system.{} WHERE group_id = ?", db::system_keyspace::RAFT_SNAPSHOTS); - ::shared_ptr snp_rs = co_await _qp.execute_internal(load_snp_info_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes); + ::shared_ptr snp_rs = co_await _qp.execute_internal(load_snp_info_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes); // Should be only one matching row, since each individual server can only // have a single snapshot installed at a time const auto& snp_row = snp_rs->one(); // Fetch current and previous raft configurations for the snapshot - static const auto load_cfg_cql = format("SELECT server_id, disposition, can_vote FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG); - ::shared_ptr cfg_rs = co_await _qp.execute_internal(load_cfg_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes); + static const auto load_cfg_cql = format("SELECT server_id, disposition, can_vote FROM system.{} WHERE group_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG); + ::shared_ptr cfg_rs = co_await _qp.execute_internal(load_cfg_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes); raft::configuration cfg; @@ -155,27 +155,27 @@ future raft_sys_table_storage::load_snapshot_descript future<> raft_sys_table_storage::store_snapshot_descriptor(const raft::snapshot_descriptor& snap, size_t preserve_log_entries) { // TODO: check that snap.idx refers to an already persisted entry return execute_with_linearization_point([this, &snap, preserve_log_entries] () -> future<> { - static const auto store_snp_cql = format("INSERT INTO system.{} (group_id, server_id, snapshot_id, idx, term) VALUES (?, ?, ?, ?, ?)", + static const auto store_snp_cql = format("INSERT INTO system.{} (group_id, snapshot_id, idx, term) VALUES (?, ?, ?, ?)", db::system_keyspace::RAFT_SNAPSHOTS); co_await _qp.execute_internal( store_snp_cql, - {_group_id.id, _server_id.id, snap.id.id, int64_t(snap.idx), int64_t(snap.term)}, + {_group_id.id, snap.id.id, int64_t(snap.idx), int64_t(snap.term)}, cql3::query_processor::cache_internal::yes ); // remove old configs - static const auto delete_raft_cfg_cql = format("DELETE FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG); - co_await _qp.execute_internal(delete_raft_cfg_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes); + static const auto delete_raft_cfg_cql = format("DELETE FROM system.{} WHERE group_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG); + co_await _qp.execute_internal(delete_raft_cfg_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes); // store current and previous raft configurations - static const auto store_raft_cfg_cql = format("INSERT INTO system.{} (group_id, my_server_id, server_id, disposition, can_vote) VALUES (?, ?, ?, ?, ?)", + static const auto store_raft_cfg_cql = format("INSERT INTO system.{} (group_id, server_id, disposition, can_vote) VALUES (?, ?, ?, ?)", db::system_keyspace::RAFT_SNAPSHOT_CONFIG); for (const raft::config_member& srv : snap.config.current) { co_await _qp.execute_internal(store_raft_cfg_cql, - {_group_id.id, _server_id.id, srv.addr.id.id, "CURRENT", srv.can_vote}, + {_group_id.id, srv.addr.id.id, "CURRENT", srv.can_vote}, cql3::query_processor::cache_internal::yes); } for (const raft::config_member& srv : snap.config.previous) { co_await _qp.execute_internal(store_raft_cfg_cql, - {_group_id.id, _server_id.id, srv.addr.id.id, "PREVIOUS", srv.can_vote}, + {_group_id.id, srv.addr.id.id, "PREVIOUS", srv.can_vote}, cql3::query_processor::cache_internal::yes); } // Also update the latest snapshot id in `system.raft` table