diff --git a/db/schema_tables.cc b/db/schema_tables.cc index 5f6f9e20d3..3c335db8c6 100644 --- a/db/schema_tables.cc +++ b/db/schema_tables.cc @@ -2881,7 +2881,7 @@ static const std::unordered_set& system_ks_null_shard_tables() { SCYLLA_TABLE_SCHEMA_HISTORY, db::system_keyspace::RAFT, db::system_keyspace::RAFT_SNAPSHOTS, - db::system_keyspace::RAFT_CONFIG, + db::system_keyspace::RAFT_SNAPSHOT_CONFIG, db::system_keyspace::GROUP0_HISTORY, db::system_keyspace::DISCOVERY, db::system_keyspace::BROADCAST_KV_STORE, diff --git a/db/system_keyspace.cc b/db/system_keyspace.cc index 0bdf565ba6..47c0d6991e 100644 --- a/db/system_keyspace.cc +++ b/db/system_keyspace.cc @@ -219,10 +219,6 @@ schema_ptr system_keyspace::raft_snapshots() { auto id = generate_legacy_id(NAME, RAFT_SNAPSHOTS); return schema_builder(NAME, RAFT_SNAPSHOTS, std::optional(id)) .with_column("group_id", timeuuid_type, column_kind::partition_key) - // To be able to start multiple raft servers inside one raft group - // on the same node, we need to include the server_id in the - // partition key, as well. - .with_column("server_id", uuid_type, column_kind::partition_key) .with_column("snapshot_id", uuid_type) // Index and term of last entry in the snapshot .with_column("idx", long_type) @@ -237,12 +233,11 @@ schema_ptr system_keyspace::raft_snapshots() { return schema; } -schema_ptr system_keyspace::raft_config() { +schema_ptr system_keyspace::raft_snapshot_config() { static thread_local auto schema = [] { - auto id = generate_legacy_id(system_keyspace::NAME, RAFT_CONFIG); - return schema_builder(system_keyspace::NAME, RAFT_CONFIG, std::optional(id)) + auto id = generate_legacy_id(system_keyspace::NAME, RAFT_SNAPSHOT_CONFIG); + return schema_builder(system_keyspace::NAME, RAFT_SNAPSHOT_CONFIG, std::optional(id)) .with_column("group_id", timeuuid_type, column_kind::partition_key) - .with_column("my_server_id", uuid_type, column_kind::partition_key) .with_column("server_id", uuid_type, column_kind::clustering_key) .with_column("disposition", ascii_type, column_kind::clustering_key) // can be 'CURRENT` or `PREVIOUS' .with_column("can_vote", boolean_type) @@ -2692,7 +2687,7 @@ std::vector system_keyspace::all_tables(const db::config& cfg) { v3::cdc_local(), }); if (cfg.consistent_cluster_management()) { - r.insert(r.end(), {raft(), raft_snapshots(), raft_config(), group0_history(), discovery()}); + r.insert(r.end(), {raft(), raft_snapshots(), raft_snapshot_config(), group0_history(), discovery()}); if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) { r.insert(r.end(), {broadcast_kv_store()}); diff --git a/db/system_keyspace.hh b/db/system_keyspace.hh index 3dfa5ae06a..60dc338135 100644 --- a/db/system_keyspace.hh +++ b/db/system_keyspace.hh @@ -93,7 +93,7 @@ class system_keyspace : public seastar::peering_sharded_service sharded& _db; std::unique_ptr _cache; - static schema_ptr raft_config(); + static schema_ptr raft_snapshot_config(); static schema_ptr local(); static schema_ptr peers(); static schema_ptr peer_events(); @@ -135,7 +135,7 @@ public: static constexpr auto SCYLLA_LOCAL = "scylla_local"; static constexpr auto RAFT = "raft"; static constexpr auto RAFT_SNAPSHOTS = "raft_snapshots"; - static constexpr auto RAFT_CONFIG = "raft_config"; + static constexpr auto RAFT_SNAPSHOT_CONFIG = "raft_snapshot_config"; static constexpr auto REPAIR_HISTORY = "repair_history"; static constexpr auto GROUP0_HISTORY = "group0_history"; static constexpr auto DISCOVERY = "discovery"; @@ -198,7 +198,7 @@ public: static schema_ptr batchlog(); }; - static constexpr const char* extra_durable_tables[] = { PAXOS, SCYLLA_LOCAL, RAFT, RAFT_SNAPSHOTS, RAFT_CONFIG, DISCOVERY, BROADCAST_KV_STORE }; + static constexpr const char* extra_durable_tables[] = { PAXOS, SCYLLA_LOCAL, RAFT, RAFT_SNAPSHOTS, RAFT_SNAPSHOT_CONFIG, DISCOVERY, BROADCAST_KV_STORE }; static bool is_extra_durable(const sstring& name); diff --git a/docs/dev/raft-in-scylla.md b/docs/dev/raft-in-scylla.md index 68e590c63f..15ae08a392 100644 --- a/docs/dev/raft-in-scylla.md +++ b/docs/dev/raft-in-scylla.md @@ -40,7 +40,7 @@ There are three such tables: works fine with many groups. - `raft_snapshots`, a supporting table storing the so-called snapshot descriptors, - - `raft_config`, a normalized part of raft + - `raft_snapshot_config`, a normalized part of raft `raft_snapshots`, storing the cluster configuration at the time of taking the snapshot. May be out of date with the real cluster configuration, e.g. when configuration @@ -98,8 +98,8 @@ Raft group 0 has an id (UUID) just like any other group. After a node boots, this id is persisted in `scylla_local` system table. If this id is present, the node can start a Raft instance for the group using the last saved state in `raft`, `raft_snapshots` -and `raft_config` system tables, which are all retrievable by -group id. +and `raft_snapshot_config` system tables, which are all retrievable +by group id. If a persisted id is missing, it means the node is bootstrapping and haven't joined Raft yet. diff --git a/service/raft/raft_sys_table_storage.cc b/service/raft/raft_sys_table_storage.cc index 6c9000f904..57495c513b 100644 --- a/service/raft/raft_sys_table_storage.cc +++ b/service/raft/raft_sys_table_storage.cc @@ -122,15 +122,15 @@ future raft_sys_table_storage::load_snapshot_descript utils::UUID snapshot_id = id_row.get_as("snapshot_id"); // Fetch raft log index and term for the latest snapshot descriptor - static const auto load_snp_info_cql = format("SELECT idx, term FROM system.{} WHERE group_id = ? AND server_id = ?", + static const auto load_snp_info_cql = format("SELECT idx, term FROM system.{} WHERE group_id = ?", db::system_keyspace::RAFT_SNAPSHOTS); - ::shared_ptr snp_rs = co_await _qp.execute_internal(load_snp_info_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes); + ::shared_ptr snp_rs = co_await _qp.execute_internal(load_snp_info_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes); // Should be only one matching row, since each individual server can only // have a single snapshot installed at a time const auto& snp_row = snp_rs->one(); // Fetch current and previous raft configurations for the snapshot - static const auto load_cfg_cql = format("SELECT server_id, disposition, can_vote FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_CONFIG); - ::shared_ptr cfg_rs = co_await _qp.execute_internal(load_cfg_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes); + static const auto load_cfg_cql = format("SELECT server_id, disposition, can_vote FROM system.{} WHERE group_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG); + ::shared_ptr cfg_rs = co_await _qp.execute_internal(load_cfg_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes); raft::configuration cfg; @@ -155,27 +155,27 @@ future raft_sys_table_storage::load_snapshot_descript future<> raft_sys_table_storage::store_snapshot_descriptor(const raft::snapshot_descriptor& snap, size_t preserve_log_entries) { // TODO: check that snap.idx refers to an already persisted entry return execute_with_linearization_point([this, &snap, preserve_log_entries] () -> future<> { - static const auto store_snp_cql = format("INSERT INTO system.{} (group_id, server_id, snapshot_id, idx, term) VALUES (?, ?, ?, ?, ?)", + static const auto store_snp_cql = format("INSERT INTO system.{} (group_id, snapshot_id, idx, term) VALUES (?, ?, ?, ?)", db::system_keyspace::RAFT_SNAPSHOTS); co_await _qp.execute_internal( store_snp_cql, - {_group_id.id, _server_id.id, snap.id.id, int64_t(snap.idx), int64_t(snap.term)}, + {_group_id.id, snap.id.id, int64_t(snap.idx), int64_t(snap.term)}, cql3::query_processor::cache_internal::yes ); // remove old configs - static const auto delete_raft_cfg_cql = format("DELETE FROM system.{} WHERE group_id = ? AND my_server_id = ?", db::system_keyspace::RAFT_CONFIG); - co_await _qp.execute_internal(delete_raft_cfg_cql, {_group_id.id, _server_id.id}, cql3::query_processor::cache_internal::yes); + static const auto delete_raft_cfg_cql = format("DELETE FROM system.{} WHERE group_id = ?", db::system_keyspace::RAFT_SNAPSHOT_CONFIG); + co_await _qp.execute_internal(delete_raft_cfg_cql, {_group_id.id}, cql3::query_processor::cache_internal::yes); // store current and previous raft configurations - static const auto store_raft_cfg_cql = format("INSERT INTO system.{} (group_id, my_server_id, server_id, disposition, can_vote) VALUES (?, ?, ?, ?, ?)", - db::system_keyspace::RAFT_CONFIG); + static const auto store_raft_cfg_cql = format("INSERT INTO system.{} (group_id, server_id, disposition, can_vote) VALUES (?, ?, ?, ?)", + db::system_keyspace::RAFT_SNAPSHOT_CONFIG); for (const raft::config_member& srv : snap.config.current) { co_await _qp.execute_internal(store_raft_cfg_cql, - {_group_id.id, _server_id.id, srv.addr.id.id, "CURRENT", srv.can_vote}, + {_group_id.id, srv.addr.id.id, "CURRENT", srv.can_vote}, cql3::query_processor::cache_internal::yes); } for (const raft::config_member& srv : snap.config.previous) { co_await _qp.execute_internal(store_raft_cfg_cql, - {_group_id.id, _server_id.id, srv.addr.id.id, "PREVIOUS", srv.can_vote}, + {_group_id.id, srv.addr.id.id, "PREVIOUS", srv.can_vote}, cql3::query_processor::cache_internal::yes); } // Also update the latest snapshot id in `system.raft` table diff --git a/test/boost/schema_change_test.cc b/test/boost/schema_change_test.cc index 04075bb698..023f11a685 100644 --- a/test/boost/schema_change_test.cc +++ b/test/boost/schema_change_test.cc @@ -909,7 +909,7 @@ SEASTAR_TEST_CASE(test_schema_tables_use_null_sharder) { BOOST_REQUIRE(it != cf_metadata.end()); BOOST_REQUIRE_EQUAL(it->second->get_sharder().shard_count(), 1); - it = cf_metadata.find("raft_config"); + it = cf_metadata.find("raft_snapshot_config"); BOOST_REQUIRE(it != cf_metadata.end()); BOOST_REQUIRE_EQUAL(it->second->get_sharder().shard_count(), 1);