Add three new system tables for storing raft state for strongly consistent tablets, corresponding to the tables for group0: - system.raft_groups: Stores the raft log, term/vote, snapshot_id, and commit_idx for each tablet's raft group. - system.raft_groups_snapshots: Stores snapshot descriptors (index, term) for each group. - system.raft_groups_snapshot_config: Stores the raft configuration (current and previous voters) for each snapshot. These tables use a (shard, group_id) composite partition key with the newly added raft_groups_partitioner and raft_groups_sharder, ensuring data is co-located with the tablet replica that owns the raft group. The tables are only created when the STRONGLY_CONSISTENT_TABLES experimental feature is enabled.
119 lines
4.6 KiB
C++
119 lines
4.6 KiB
C++
/*
|
|
* Copyright (C) 2023-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "types/types.hh"
|
|
#include "mutation/timestamp.hh"
|
|
#include "locator/tablets.hh"
|
|
#include "schema/schema_fwd.hh"
|
|
#include "mutation/mutation.hh"
|
|
#include "mutation/canonical_mutation.hh"
|
|
#include "replica/database_fwd.hh"
|
|
|
|
#include <seastar/core/future.hh>
|
|
|
|
#include <vector>
|
|
|
|
|
|
namespace cql3 {
|
|
|
|
class query_processor;
|
|
|
|
}
|
|
|
|
namespace gms {
|
|
|
|
class feature_service;
|
|
|
|
}
|
|
|
|
namespace replica {
|
|
|
|
data_type get_replica_set_type();
|
|
|
|
data_type get_tablet_info_type();
|
|
|
|
void set_strongly_consistent_tables_enabled(bool enabled);
|
|
|
|
schema_ptr make_tablets_schema();
|
|
|
|
schema_ptr make_raft_schema(sstring name, bool is_group0);
|
|
schema_ptr make_raft_snapshots_schema(sstring name, bool is_group0);
|
|
schema_ptr make_raft_snapshot_config_schema(sstring name, bool is_group0);
|
|
|
|
void tablet_add_repair_scheduler_user_types(const sstring& ks, replica::database& db);
|
|
|
|
std::vector<data_value> replicas_to_data_value(const locator::tablet_replica_set& replicas);
|
|
|
|
/// Converts information in tablet_map to mutations of system.tablets.
|
|
///
|
|
/// The mutations will delete any older tablet information for the same table.
|
|
/// The provided timestamp should be strictly monotonically increasing
|
|
/// between calls for the overriding to work correctly.
|
|
future<> tablet_map_to_mutations(const locator::tablet_map&,
|
|
table_id,
|
|
const sstring& keyspace_name,
|
|
const sstring& table_name,
|
|
api::timestamp_type,
|
|
const gms::feature_service& features,
|
|
std::function<future<>(mutation)> process_mutation);
|
|
|
|
mutation colocated_tablet_map_to_mutation(table_id,
|
|
const sstring& keyspace_name,
|
|
const sstring& table_name,
|
|
table_id base_table,
|
|
api::timestamp_type);
|
|
|
|
mutation make_drop_tablet_map_mutation(table_id, api::timestamp_type);
|
|
|
|
/// Stores a given tablet_metadata in system.tablets.
|
|
///
|
|
/// Overrides tablet maps for tables present in the given tablet metadata.
|
|
/// Does not delete tablet maps for tables which are absent in the given tablet metadata.
|
|
/// The provided timestamp should be strictly monotonically increasing
|
|
/// between calls for tablet map overriding to work correctly.
|
|
/// The timestamp must be greater than api::min_timestamp.
|
|
future<> save_tablet_metadata(replica::database&, const locator::tablet_metadata&, api::timestamp_type);
|
|
|
|
/// Extract a tablet metadata change hint from the tablet mutations.
|
|
///
|
|
/// Mutations which don't mutate the tablet table are ignored.
|
|
std::optional<locator::tablet_metadata_change_hint> get_tablet_metadata_change_hint(const utils::chunked_vector<canonical_mutation>&);
|
|
|
|
/// Update the tablet metadata change hint, with the changes represented by the tablet mutation.
|
|
///
|
|
/// If the mutation belongs to another table, no updates are done.
|
|
void update_tablet_metadata_change_hint(locator::tablet_metadata_change_hint&, const mutation&);
|
|
|
|
/// Reads the replica set from given cell value
|
|
locator::tablet_replica_set tablet_replica_set_from_cell(const data_value&);
|
|
|
|
/// Reads tablet metadata from system.tablets.
|
|
future<locator::tablet_metadata> read_tablet_metadata(cql3::query_processor&);
|
|
|
|
/// Reads the set of hosts referenced by tablet replicas.
|
|
future<std::unordered_set<locator::host_id>> read_required_hosts(cql3::query_processor&);
|
|
|
|
/// Update tablet metadata from system.tablets, based on the provided hint.
|
|
///
|
|
/// The hint is used to determine what has changed and only reload the changed
|
|
/// parts from disk, updating the passed-in metadata in-place accordingly.
|
|
future<> update_tablet_metadata(replica::database& db, cql3::query_processor&, locator::tablet_metadata&, const locator::tablet_metadata_change_hint&);
|
|
|
|
/// Reads tablet metadata from system.tablets in the form of mutations.
|
|
future<> read_tablet_mutations(seastar::sharded<database>&, std::function<void(canonical_mutation)> process_mutation);
|
|
|
|
/// Reads tablet transition stage (if any)
|
|
future<std::optional<locator::tablet_transition_stage>> read_tablet_transition_stage(cql3::query_processor& qp, table_id tid, dht::token last_token);
|
|
|
|
/// Validates changes to system.tablets represented by mutations
|
|
void validate_tablet_metadata_change(const locator::tablet_metadata& tm, const utils::chunked_vector<canonical_mutation>& mutations);
|
|
|
|
} // namespace replica
|