Files
scylladb/locator/topology.hh
Avi Kivity 0ae22a09d4 LICENSE: Update to version 1.1
Updated terms of non-commercial use (must be a never-customer).
2026-04-12 19:46:33 +03:00

531 lines
16 KiB
C++

/*
*
* Modified by ScyllaDB
* Copyright (C) 2022-present ScyllaDB
*/
/*
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.1 and Apache-2.0)
*/
#pragma once
#include <boost/functional/hash.hpp>
#include <cstddef>
#include <functional>
#include <unordered_set>
#include <unordered_map>
#include <iostream>
#include <random>
#include <seastar/core/future.hh>
#include <seastar/core/sstring.hh>
#include <seastar/util/bool_class.hh>
#include "locator/types.hh"
#include "inet_address_vectors.hh"
using namespace seastar;
struct sort_by_proximity_topology;
namespace locator {
class topology;
}
namespace locator {
class node;
using node_holder = std::unique_ptr<node>;
using shard_id = seastar::shard_id;
using rack_list = std::vector<sstring>;
class node {
public:
using this_node = bool_class<struct this_node_tag>;
using idx_type = int;
enum class state {
none = 0,
bootstrapping, // (joining)
replacing, // (joining)
normal,
being_decommissioned, // (leaving)
being_removed, // (leaving)
being_replaced, // (leaving)
left // after decommissioned, removed, replaced
};
private:
const locator::topology* _topology;
locator::host_id _host_id;
endpoint_dc_rack _dc_rack;
state _state;
shard_id _shard_count = 0;
bool _excluded = false;
bool _draining = false;
// Is this node the `localhost` instance
this_node _is_this_node;
idx_type _idx = -1;
public:
node(const locator::topology* topology,
locator::host_id id,
endpoint_dc_rack dc_rack,
state state,
shard_id shard_count = 0,
bool excluded = false,
this_node is_this_node = this_node::no,
idx_type idx = -1,
bool draining = false);
node(const node&) = delete;
node(node&&) = delete;
bool operator==(const node& other) const noexcept {
return this == &other;
}
const locator::topology* topology() const noexcept {
return _topology;
}
const locator::host_id& host_id() const noexcept {
return _host_id;
}
const endpoint_dc_rack& dc_rack() const noexcept {
return _dc_rack;
}
const sstring& dc() const noexcept {
return _dc_rack.dc;
}
const sstring& rack() const noexcept {
return _dc_rack.rack;
}
// Is this "localhost"?
this_node is_this_node() const noexcept { return _is_this_node; }
// idx < 0 means "unassigned"
idx_type idx() const noexcept { return _idx; }
state get_state() const noexcept { return _state; }
bool is_joining() const noexcept {
switch (_state) {
case state::bootstrapping:
case state::replacing:
return true;
default:
return false;
}
}
bool is_bootstrapping() const noexcept {
return _state == state::bootstrapping;
}
bool is_normal() const noexcept {
return _state == state::normal;
}
// Excluded nodes are still part of topology, possibly present in replica sets, but
// are not going to be up again and can be ignored in barriers.
bool is_excluded() const {
return _excluded;
}
void set_excluded(bool excluded) {
_excluded = excluded;
}
// Indicates that the tablet scheduler should move tablet replicas away
// from this node because it's undergoing decommission or removenode operation.
bool is_draining() const {
return _draining;
}
void set_draining(bool draining) {
_draining = draining;
}
bool is_leaving() const noexcept {
switch (_state) {
case state::being_decommissioned:
case state::being_removed:
case state::being_replaced:
return true;
default:
return false;
}
}
bool is_member() const noexcept {
return is_normal() || is_leaving();
}
bool left() const noexcept {
return _state == state::left;
}
bool is_none() const noexcept {
return _state == state::none;
}
shard_id get_shard_count() const noexcept { return _shard_count; }
static std::string to_string(state);
private:
static node_holder make(const locator::topology* topology,
locator::host_id id,
endpoint_dc_rack dc_rack,
state state,
shard_id shard_count = 0,
bool excluded = false,
node::this_node is_this_node = this_node::no,
idx_type idx = -1,
bool draining = false);
node_holder clone() const;
void set_topology(const locator::topology* topology) noexcept { _topology = topology; }
void set_idx(idx_type idx) noexcept { _idx = idx; }
void set_state(state state) noexcept { _state = state; }
void set_shard_count(shard_id shard_count) noexcept { _shard_count = shard_count; }
friend class topology;
};
class topology {
public:
struct config {
inet_address this_endpoint;
inet_address this_cql_address; // corresponds to broadcast_rpc_address
host_id this_host_id;
endpoint_dc_rack local_dc_rack;
bool disable_proximity_sorting = false;
bool operator==(const config&) const = default;
};
struct shallow_copy{};
topology(shallow_copy, config cfg);
topology(config cfg);
topology(topology&&) noexcept;
topology& operator=(topology&&) noexcept;
future<topology> clone_gently() const;
future<> clear_gently() noexcept;
public:
const config& get_config() const noexcept { return _cfg; }
void set_host_id_cfg(host_id this_host_id);
const node* this_node() const noexcept {
return _this_node;
}
// Adds a node with given host_id, endpoint, and DC/rack.
const node& add_node(host_id id, const endpoint_dc_rack& dr, node::state state,
shard_id shard_count = 0);
// Optionally updates node's current host_id, endpoint, or DC/rack.
// Note: the host_id may be updated from null to non-null after a new node gets a new, random host_id,
// or a peer node host_id may be updated when the node is replaced with another node using the same ip address.
void update_node(node& node,
std::optional<host_id> opt_id,
std::optional<endpoint_dc_rack> opt_dr,
std::optional<node::state> opt_st,
std::optional<shard_id> opt_shard_count = std::nullopt);
// Removes a node using its host_id
// Returns true iff the node was found and removed.
bool remove_node(host_id id);
// Looks up a node by its host_id.
// Returns a pointer to the node if found, or nullptr otherwise.
const node* find_node(host_id id) const noexcept;
node* find_node(host_id id) noexcept;
const node& get_node(host_id id) const {
auto n = find_node(id);
if (!n) {
throw_with_backtrace<std::runtime_error>(format("Node {} not found in topology", id));
}
return *n;
};
// Finds a node by its index
// Returns a pointer to the node if found, or nullptr otherwise.
const node* find_node(node::idx_type idx) const noexcept;
// Returns true if a node with given host_id is found
bool has_node(host_id id) const noexcept;
/**
* Stores current DC/rack assignment for ep
*
* Adds or updates a node with given endpoint
*/
const node& add_or_update_endpoint(host_id id, std::optional<endpoint_dc_rack> opt_dr = std::nullopt,
std::optional<node::state> opt_st = std::nullopt,
std::optional<shard_id> shard_count = std::nullopt);
bool remove_endpoint(locator::host_id ep);
const std::unordered_map<sstring,
std::unordered_set<host_id>>&
get_datacenter_endpoints() const {
return _dc_endpoints;
}
std::unordered_map<sstring, std::unordered_set<host_id>>
get_datacenter_host_ids() const;
const std::unordered_map<sstring,
std::unordered_set<std::reference_wrapper<const node>>>&
get_datacenter_nodes() const {
return _dc_nodes;
}
const std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<std::reference_wrapper<const node>>>>&
get_datacenter_rack_nodes() const noexcept {
return _dc_rack_nodes;
}
const std::unordered_map<sstring,
std::unordered_map<sstring,
std::unordered_set<host_id>>>&
get_datacenter_racks() const {
return _dc_racks;
}
const std::unordered_set<sstring>& get_datacenters() const noexcept {
return _datacenters;
}
// Get dc/rack location of this node
const endpoint_dc_rack& get_location() const noexcept {
return _this_node ? _this_node->dc_rack() : _cfg.local_dc_rack;
}
// Get dc/rack location of a node identified by host_id
// The specified node must exist.
const endpoint_dc_rack& get_location(host_id id) const {
if (auto node = find_node(id)) {
return node->dc_rack();
}
return get_location_slow(id);
}
// Get datacenter of this node
const sstring& get_datacenter() const noexcept {
return get_location().dc;
}
// Get datacenter of a node identified by host_id
// The specified node must exist.
const sstring& get_datacenter(host_id id) const {
return get_location(id).dc;
}
// Get rack of this node
const sstring& get_rack() const noexcept {
return get_location().rack;
}
// Get rack of a node identified by host_id
// The specified node must exist.
const sstring& get_rack(host_id id) const {
return get_location(id).rack;
}
utils::UUID get_rack_uuid() const;
auto get_local_dc_filter() const noexcept {
return [ this, local_dc = get_datacenter() ] (auto ep) {
return get_datacenter(ep) == local_dc;
};
};
template <std::ranges::range Range>
inline size_t count_local_endpoints(const Range& endpoints) const {
return std::count_if(endpoints.begin(), endpoints.end(), get_local_dc_filter());
}
bool can_sort_by_proximity() const noexcept {
return _sort_by_proximity;
}
/**
* This method will sort the addresses list by proximity to the given host_id,
* if `can_sort_by_proximity()`.
*/
void sort_by_proximity(locator::host_id address, host_id_vector_replica_set& addresses) const;
/**
* Unconditionally sort the addresses list by proximity to the given host_id,
* assuming `can_sort_by_proximity`.
*/
void do_sort_by_proximity(locator::host_id address, host_id_vector_replica_set& addresses) const;
/**
* Calculates topology-distance between two endpoints.
*
* The closest nodes to a given node are:
* 1. The node itself
* 2. Nodes in the same RACK
* 3. Nodes in the same DC
*/
static int distance(const locator::host_id& address, const endpoint_dc_rack& loc, const locator::host_id& address1, const endpoint_dc_rack& loc1) noexcept;
// Executes a function for each node in a state other than "none" and "left".
void for_each_node(std::function<void(const node&)> func) const;
// Returns pointers to all nodes in a state other than "none" and "left".
std::unordered_set<std::reference_wrapper<const node>> get_nodes() const;
// Returns ids of all nodes in a state other than "none" and "left".
std::unordered_set<locator::host_id> get_all_host_ids() const;
host_id my_host_id() const noexcept {
return _cfg.this_host_id;
}
inet_address my_address() const noexcept {
return _cfg.this_endpoint;
}
inet_address my_cql_address() const noexcept {
return _cfg.this_cql_address;
}
bool is_me(const locator::host_id& id) const noexcept {
return id == my_host_id();
}
private:
using random_engine_type = std::mt19937_64;
bool is_configured_this_node(const node&) const;
const node& add_node(node_holder node);
void remove_node(const node& node);
static std::string debug_format(const node*);
void index_node(const node& node);
void unindex_node(const node& node);
node_holder pop_node(const node& node);
static node* make_mutable(const node* nptr) {
return const_cast<node*>(nptr);
}
void seed_random_engine(random_engine_type::result_type);
const endpoint_dc_rack& get_location_slow(host_id id) const;
unsigned _shard;
config _cfg;
const node* _this_node = nullptr;
std::vector<node_holder> _nodes;
std::unordered_map<host_id, std::reference_wrapper<const node>> _nodes_by_host_id;
std::unordered_map<sstring, std::unordered_set<std::reference_wrapper<const node>>> _dc_nodes;
std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<std::reference_wrapper<const node>>>> _dc_rack_nodes;
/** multi-map: DC -> endpoints in that DC */
std::unordered_map<sstring,
std::unordered_set<host_id>>
_dc_endpoints;
/** map: DC -> (multi-map: rack -> endpoints in that rack) */
std::unordered_map<sstring,
std::unordered_map<sstring,
std::unordered_set<host_id>>>
_dc_racks;
bool _sort_by_proximity = true;
std::unordered_set<sstring> _datacenters;
mutable random_engine_type _random_engine;
friend class token_metadata_impl;
friend struct ::sort_by_proximity_topology;
public:
void test_compare_endpoints(const locator::host_id& address, const locator::host_id& a1, const locator::host_id& a2) const;
void test_sort_by_proximity(const locator::host_id& address, const host_id_vector_replica_set& nodes) const;
};
} // namespace locator
namespace std {
template<>
struct hash<std::reference_wrapper<const locator::node>> {
std::size_t operator()(const std::reference_wrapper<const locator::node>& ref) const {
return std::hash<const locator::node*>()(&ref.get());
}
};
template<>
struct equal_to<std::reference_wrapper<const locator::node>> {
bool operator()(const std::reference_wrapper<const locator::node>& lhs,
const std::reference_wrapper<const locator::node>& rhs) const {
return lhs.get() == rhs.get();
}
};
std::ostream& operator<<(std::ostream& out, const locator::node& node);
std::ostream& operator<<(std::ostream& out, const locator::node::state& state);
} // namespace std
// Accepts :v format option for verbose printing
template <>
struct fmt::formatter<locator::node> : fmt::formatter<string_view> {
bool verbose = false;
constexpr auto parse(fmt::format_parse_context& ctx) {
auto it = ctx.begin(), end = ctx.end();
if (it != end) {
if (*it == 'v') {
verbose = true;
it++;
}
if (it != end && *it != '}') {
throw fmt::format_error("invalid format specifier");
}
}
return it;
}
template <typename FormatContext>
auto format(const locator::node& node, FormatContext& ctx) const {
if (!verbose) {
return fmt::format_to(ctx.out(), "{}", node.host_id());
} else {
return fmt::format_to(ctx.out(), " idx={} host_id={} dc={} rack={} state={} shards={} excluded={} draining={} this_node={}",
node.idx(),
node.host_id(),
node.dc_rack().dc,
node.dc_rack().rack,
locator::node::to_string(node.get_state()),
node.get_shard_count(),
node.is_excluded(),
node.is_draining(),
bool(node.is_this_node()));
}
}
};
template <>
struct fmt::formatter<locator::node::state> : fmt::formatter<string_view> {
template <typename FormatContext>
auto format(const locator::node::state& state, FormatContext& ctx) const {
return fmt::format_to(ctx.out(), "{}", locator::node::to_string(state));
}
};