mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-23 01:50:35 +00:00
When topology barrier is blocked for longer than configured threshold (2s), stale versions are marked as stalled and when they get released they report backtrace to the logs. This should help to identify what was holding for token metadata pointer for too long. Example log: token_metadata - topology version 30 held for 299.159 [s] past expiry, released at: 0x2397ae1 0x23a36b6 ... Closes scylladb/scylladb#17427
1295 lines
44 KiB
C++
1295 lines
44 KiB
C++
/*
|
|
* Copyright (C) 2015-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
|
|
#include "token_metadata.hh"
|
|
#include <optional>
|
|
#include "locator/snitch_base.hh"
|
|
#include "locator/abstract_replication_strategy.hh"
|
|
#include "locator/tablets.hh"
|
|
#include "log.hh"
|
|
#include "partition_range_compat.hh"
|
|
#include <unordered_map>
|
|
#include <algorithm>
|
|
#include <boost/icl/interval.hpp>
|
|
#include <boost/icl/interval_map.hpp>
|
|
#include <seastar/core/coroutine.hh>
|
|
#include <seastar/coroutine/maybe_yield.hh>
|
|
#include <boost/range/adaptors.hpp>
|
|
#include <seastar/core/smp.hh>
|
|
#include "utils/stall_free.hh"
|
|
|
|
namespace locator {
|
|
|
|
static logging::logger tlogger("token_metadata");
|
|
|
|
template <typename C, typename V>
|
|
static void remove_by_value(C& container, V value) {
|
|
for (auto it = container.begin(); it != container.end();) {
|
|
if (it->second == value) {
|
|
it = container.erase(it);
|
|
} else {
|
|
it++;
|
|
}
|
|
}
|
|
}
|
|
|
|
class token_metadata_impl final {
|
|
private:
|
|
/**
|
|
* Maintains token to endpoint map of every node in the cluster.
|
|
* Each Token is associated with exactly one Address, but each Address may have
|
|
* multiple tokens. Hence, the BiMultiValMap collection.
|
|
*/
|
|
// FIXME: have to be BiMultiValMap
|
|
std::unordered_map<token, host_id> _token_to_endpoint_map;
|
|
|
|
// Track the unique set of nodes in _token_to_endpoint_map
|
|
std::unordered_set<host_id> _normal_token_owners;
|
|
|
|
std::unordered_map<token, host_id> _bootstrap_tokens;
|
|
std::unordered_set<host_id> _leaving_endpoints;
|
|
// The map between the existing node to be replaced and the replacing node
|
|
std::unordered_map<host_id, host_id> _replacing_endpoints;
|
|
|
|
std::optional<topology_change_info> _topology_change_info;
|
|
|
|
std::vector<token> _sorted_tokens;
|
|
|
|
tablet_metadata _tablets;
|
|
|
|
topology _topology;
|
|
|
|
token_metadata::read_new_t _read_new = token_metadata::read_new_t::no;
|
|
|
|
long _ring_version = 0;
|
|
static thread_local long _static_ring_version;
|
|
|
|
// Zero means that token_metadata versions are not supported,
|
|
// this will be used in RPC handling to decide whether we
|
|
// need to apply fencing or not.
|
|
// The initial valid version is 1;
|
|
token_metadata::version_t _version = 0;
|
|
token_metadata::version_tracker_t _version_tracker;
|
|
|
|
// Note: if any member is added to this class
|
|
// clone_async() must be updated to copy that member.
|
|
|
|
void sort_tokens();
|
|
|
|
const tablet_metadata& tablets() const { return _tablets; }
|
|
tablet_metadata& tablets() { return _tablets; }
|
|
|
|
void set_tablets(tablet_metadata&& tablets) {
|
|
_tablets = std::move(tablets);
|
|
invalidate_cached_rings();
|
|
}
|
|
|
|
struct shallow_copy {};
|
|
public:
|
|
token_metadata_impl(shallow_copy, const token_metadata_impl& o) noexcept
|
|
: _topology(topology::config{})
|
|
{}
|
|
token_metadata_impl(token_metadata::config cfg) noexcept : _topology(std::move(cfg.topo_cfg)) {};
|
|
token_metadata_impl(const token_metadata_impl&) = delete; // it's too huge for direct copy, use clone_async()
|
|
token_metadata_impl(token_metadata_impl&&) noexcept = default;
|
|
const std::vector<token>& sorted_tokens() const;
|
|
future<> update_normal_tokens(std::unordered_set<token> tokens, host_id endpoint);
|
|
const token& first_token(const token& start) const;
|
|
size_t first_token_index(const token& start) const;
|
|
std::optional<host_id> get_endpoint(const token& token) const;
|
|
std::vector<token> get_tokens(const host_id& addr) const;
|
|
const std::unordered_map<token, host_id>& get_token_to_endpoint() const {
|
|
return _token_to_endpoint_map;
|
|
}
|
|
|
|
const std::unordered_set<host_id>& get_leaving_endpoints() const {
|
|
return _leaving_endpoints;
|
|
}
|
|
|
|
const std::unordered_map<token, host_id>& get_bootstrap_tokens() const {
|
|
return _bootstrap_tokens;
|
|
}
|
|
|
|
void update_topology(host_id id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> shard_count = std::nullopt) {
|
|
_topology.add_or_update_endpoint(id, std::nullopt, std::move(opt_dr), std::move(opt_st), std::move(shard_count));
|
|
}
|
|
|
|
/**
|
|
* Creates an iterable range of the sorted tokens starting at the token next
|
|
* after the given one.
|
|
*
|
|
* @param start A token that will define the beginning of the range
|
|
*
|
|
* @return The requested range (see the description above)
|
|
*/
|
|
boost::iterator_range<token_metadata::tokens_iterator> ring_range(const token& start) const;
|
|
|
|
boost::iterator_range<token_metadata::tokens_iterator> ring_range(dht::ring_position_view pos) const;
|
|
|
|
topology& get_topology() {
|
|
return _topology;
|
|
}
|
|
|
|
const topology& get_topology() const {
|
|
return _topology;
|
|
}
|
|
|
|
void debug_show() const;
|
|
|
|
/**
|
|
* Store an end-point to host ID mapping. Each ID must be unique, and
|
|
* cannot be changed after the fact.
|
|
*
|
|
* @param hostId
|
|
* @param endpoint
|
|
*/
|
|
void update_host_id(const host_id& host_id, inet_address endpoint);
|
|
|
|
/** Return the unique host ID for an end-point. */
|
|
host_id get_host_id(inet_address endpoint) const;
|
|
|
|
/// Return the unique host ID for an end-point or nullopt if not found.
|
|
std::optional<host_id> get_host_id_if_known(inet_address endpoint) const;
|
|
|
|
/** Return the end-point for a unique host ID or nullopt if not found.*/
|
|
std::optional<inet_address> get_endpoint_for_host_id_if_known(host_id) const;
|
|
|
|
/** Return the end-point for a unique host ID.*/
|
|
inet_address get_endpoint_for_host_id(host_id) const;
|
|
|
|
/** @return a copy of the endpoint-to-id map for read-only operations */
|
|
std::unordered_map<inet_address, host_id> get_endpoint_to_host_id_map_for_reading() const;
|
|
|
|
void add_bootstrap_token(token t, host_id endpoint);
|
|
|
|
void add_bootstrap_tokens(std::unordered_set<token> tokens, host_id endpoint);
|
|
|
|
void remove_bootstrap_tokens(std::unordered_set<token> tokens);
|
|
|
|
void add_leaving_endpoint(host_id endpoint);
|
|
void del_leaving_endpoint(host_id endpoint);
|
|
public:
|
|
void remove_endpoint(host_id endpoint);
|
|
|
|
bool is_normal_token_owner(host_id endpoint) const;
|
|
|
|
bool is_leaving(host_id endpoint) const;
|
|
|
|
// Is this node being replaced by another node
|
|
bool is_being_replaced(host_id endpoint) const;
|
|
|
|
// Is any node being replaced by another node
|
|
bool is_any_node_being_replaced() const;
|
|
|
|
void add_replacing_endpoint(host_id existing_node, host_id replacing_node);
|
|
|
|
void del_replacing_endpoint(host_id existing_node);
|
|
public:
|
|
|
|
/**
|
|
* Create a full copy of token_metadata_impl using asynchronous continuations.
|
|
* The caller must ensure that the cloned object will not change if
|
|
* the function yields.
|
|
*/
|
|
future<std::unique_ptr<token_metadata_impl>> clone_async() const noexcept;
|
|
|
|
/**
|
|
* Create a copy of TokenMetadata with only tokenToEndpointMap. That is, pending ranges,
|
|
* bootstrap tokens and leaving endpoints are not included in the copy.
|
|
* The caller must ensure that the cloned object will not change if
|
|
* the function yields.
|
|
*/
|
|
future<std::unique_ptr<token_metadata_impl>> clone_only_token_map(bool clone_sorted_tokens = true) const noexcept;
|
|
|
|
/**
|
|
* Create a copy of TokenMetadata with tokenToEndpointMap reflecting situation after all
|
|
* current leave operations have finished.
|
|
*
|
|
* @return new token metadata
|
|
*/
|
|
future<std::unique_ptr<token_metadata_impl>> clone_after_all_left() const noexcept {
|
|
return clone_only_token_map(false).then([this] (std::unique_ptr<token_metadata_impl> all_left_metadata) {
|
|
for (auto endpoint : _leaving_endpoints) {
|
|
all_left_metadata->remove_endpoint(endpoint);
|
|
}
|
|
all_left_metadata->sort_tokens();
|
|
|
|
return all_left_metadata;
|
|
});
|
|
}
|
|
|
|
/**
|
|
* Destroy the token_metadata members using continuations
|
|
* to prevent reactor stalls.
|
|
*/
|
|
future<> clear_gently() noexcept;
|
|
|
|
public:
|
|
dht::token_range_vector get_primary_ranges_for(std::unordered_set<token> tokens) const;
|
|
|
|
dht::token_range_vector get_primary_ranges_for(token right) const;
|
|
static boost::icl::interval<token>::interval_type range_to_interval(wrapping_interval<dht::token> r);
|
|
static wrapping_interval<dht::token> interval_to_range(boost::icl::interval<token>::interval_type i);
|
|
|
|
public:
|
|
future<> update_topology_change_info(dc_rack_fn& get_dc_rack);
|
|
const std::optional<topology_change_info>& get_topology_change_info() const {
|
|
return _topology_change_info;
|
|
}
|
|
public:
|
|
|
|
token get_predecessor(token t) const;
|
|
|
|
// Returns nodes that are officially part of the ring. It does not include
|
|
// node that is still joining the cluster, e.g., a node that is still
|
|
// streaming data before it finishes the bootstrap process and turns into
|
|
// NORMAL status.
|
|
const std::unordered_set<host_id>& get_all_endpoints() const noexcept {
|
|
return _normal_token_owners;
|
|
}
|
|
|
|
/* Returns the number of different endpoints that own tokens in the ring.
|
|
* Bootstrapping tokens are not taken into account. */
|
|
size_t count_normal_token_owners() const;
|
|
private:
|
|
future<> update_normal_token_owners();
|
|
public:
|
|
void set_read_new(token_metadata::read_new_t read_new) {
|
|
_read_new = read_new;
|
|
}
|
|
|
|
public:
|
|
long get_ring_version() const {
|
|
return _ring_version;
|
|
}
|
|
|
|
void invalidate_cached_rings() {
|
|
_ring_version = ++_static_ring_version;
|
|
tlogger.debug("ring_version={}", _ring_version);
|
|
}
|
|
|
|
token_metadata::version_t get_version() const {
|
|
return _version;
|
|
}
|
|
void set_version(token_metadata::version_t version) {
|
|
if (version <= 0) {
|
|
on_internal_error(tlogger,
|
|
format("token_metadata_impl::set_version: invalid new version {}", version));
|
|
}
|
|
if (version < _version) {
|
|
on_internal_error(tlogger,
|
|
format("token_metadata_impl::set_version: new version can't be smaller than the previous one, "
|
|
"new version {}, previous version {}", version, _version));
|
|
}
|
|
_version = version;
|
|
}
|
|
void set_version_tracker(token_metadata::version_tracker_t tracker) {
|
|
_version_tracker = std::move(tracker);
|
|
}
|
|
|
|
friend class token_metadata;
|
|
};
|
|
|
|
thread_local long token_metadata_impl::_static_ring_version;
|
|
|
|
token_metadata::tokens_iterator::tokens_iterator(const token& start, const token_metadata_impl* token_metadata)
|
|
: _token_metadata(token_metadata) {
|
|
_cur_it = _token_metadata->sorted_tokens().begin() + _token_metadata->first_token_index(start);
|
|
_remaining = _token_metadata->sorted_tokens().size();
|
|
}
|
|
|
|
bool token_metadata::tokens_iterator::operator==(const tokens_iterator& it) const {
|
|
return _remaining == it._remaining;
|
|
}
|
|
|
|
const token& token_metadata::tokens_iterator::operator*() const {
|
|
return *_cur_it;
|
|
}
|
|
|
|
token_metadata::tokens_iterator& token_metadata::tokens_iterator::operator++() {
|
|
++_cur_it;
|
|
if (_cur_it == _token_metadata->sorted_tokens().end()) {
|
|
_cur_it = _token_metadata->sorted_tokens().begin();
|
|
}
|
|
--_remaining;
|
|
return *this;
|
|
}
|
|
|
|
host_id token_metadata::get_my_id() const {
|
|
return get_topology().get_config().this_host_id;
|
|
}
|
|
|
|
inline
|
|
boost::iterator_range<token_metadata::tokens_iterator>
|
|
token_metadata_impl::ring_range(const token& start) const {
|
|
auto begin = token_metadata::tokens_iterator(start, this);
|
|
auto end = token_metadata::tokens_iterator();
|
|
return boost::make_iterator_range(begin, end);
|
|
}
|
|
|
|
future<std::unique_ptr<token_metadata_impl>> token_metadata_impl::clone_async() const noexcept {
|
|
auto ret = co_await clone_only_token_map();
|
|
ret->_bootstrap_tokens.reserve(_bootstrap_tokens.size());
|
|
for (const auto& p : _bootstrap_tokens) {
|
|
ret->_bootstrap_tokens.emplace(p);
|
|
co_await coroutine::maybe_yield();
|
|
}
|
|
ret->_leaving_endpoints = _leaving_endpoints;
|
|
ret->_replacing_endpoints = _replacing_endpoints;
|
|
ret->_ring_version = _ring_version;
|
|
ret->_version = _version;
|
|
co_return ret;
|
|
}
|
|
|
|
future<std::unique_ptr<token_metadata_impl>> token_metadata_impl::clone_only_token_map(bool clone_sorted_tokens) const noexcept {
|
|
auto ret = std::make_unique<token_metadata_impl>(shallow_copy{}, *this);
|
|
ret->_token_to_endpoint_map.reserve(_token_to_endpoint_map.size());
|
|
for (const auto& p : _token_to_endpoint_map) {
|
|
ret->_token_to_endpoint_map.emplace(p);
|
|
co_await coroutine::maybe_yield();
|
|
}
|
|
ret->_normal_token_owners = _normal_token_owners;
|
|
ret->_topology = co_await _topology.clone_gently();
|
|
if (clone_sorted_tokens) {
|
|
ret->_sorted_tokens = _sorted_tokens;
|
|
co_await coroutine::maybe_yield();
|
|
}
|
|
ret->_tablets = _tablets;
|
|
ret->_read_new = _read_new;
|
|
co_return ret;
|
|
}
|
|
|
|
future<> token_metadata_impl::clear_gently() noexcept {
|
|
co_await utils::clear_gently(_token_to_endpoint_map);
|
|
co_await utils::clear_gently(_normal_token_owners);
|
|
co_await utils::clear_gently(_bootstrap_tokens);
|
|
co_await utils::clear_gently(_leaving_endpoints);
|
|
co_await utils::clear_gently(_replacing_endpoints);
|
|
co_await utils::clear_gently(_sorted_tokens);
|
|
co_await _topology.clear_gently();
|
|
co_await _tablets.clear_gently();
|
|
co_return;
|
|
}
|
|
|
|
void token_metadata_impl::sort_tokens() {
|
|
std::vector<token> sorted;
|
|
sorted.reserve(_token_to_endpoint_map.size());
|
|
|
|
for (auto&& i : _token_to_endpoint_map) {
|
|
sorted.push_back(i.first);
|
|
}
|
|
|
|
std::sort(sorted.begin(), sorted.end());
|
|
|
|
_sorted_tokens = std::move(sorted);
|
|
}
|
|
|
|
const tablet_metadata& token_metadata::tablets() const {
|
|
return _impl->tablets();
|
|
}
|
|
|
|
tablet_metadata& token_metadata::tablets() {
|
|
return _impl->tablets();
|
|
}
|
|
|
|
void token_metadata::set_tablets(tablet_metadata tm) {
|
|
_impl->set_tablets(std::move(tm));
|
|
}
|
|
|
|
const std::vector<token>& token_metadata_impl::sorted_tokens() const {
|
|
return _sorted_tokens;
|
|
}
|
|
|
|
std::vector<token> token_metadata_impl::get_tokens(const host_id& addr) const {
|
|
std::vector<token> res;
|
|
for (auto&& i : _token_to_endpoint_map) {
|
|
if (i.second == addr) {
|
|
res.push_back(i.first);
|
|
}
|
|
}
|
|
std::sort(res.begin(), res.end());
|
|
return res;
|
|
}
|
|
|
|
future<> token_metadata_impl::update_normal_tokens(std::unordered_set<token> tokens, host_id endpoint) {
|
|
if (tokens.empty()) {
|
|
co_return;
|
|
}
|
|
|
|
if (!_topology.has_node(endpoint)) {
|
|
on_internal_error(tlogger, format("token_metadata_impl: {} must be a member of topology to update normal tokens", endpoint));
|
|
}
|
|
|
|
bool should_sort_tokens = false;
|
|
|
|
// Phase 1: erase all tokens previously owned by the endpoint.
|
|
for(auto it = _token_to_endpoint_map.begin(), ite = _token_to_endpoint_map.end(); it != ite;) {
|
|
co_await coroutine::maybe_yield();
|
|
if(it->second == endpoint) {
|
|
auto tokit = tokens.find(it->first);
|
|
if (tokit == tokens.end()) {
|
|
// token no longer owned by endpoint
|
|
it = _token_to_endpoint_map.erase(it);
|
|
continue;
|
|
}
|
|
// token ownership did not change,
|
|
// no further update needed for it.
|
|
tokens.erase(tokit);
|
|
}
|
|
++it;
|
|
}
|
|
|
|
// Phase 2:
|
|
// a. ...
|
|
// b. update pending _bootstrap_tokens and _leaving_endpoints
|
|
// c. update _token_to_endpoint_map with the new endpoint->token mappings
|
|
// - set `should_sort_tokens` if new tokens were added
|
|
remove_by_value(_bootstrap_tokens, endpoint);
|
|
_leaving_endpoints.erase(endpoint);
|
|
invalidate_cached_rings();
|
|
for (const token& t : tokens)
|
|
{
|
|
co_await coroutine::maybe_yield();
|
|
auto prev = _token_to_endpoint_map.insert(std::pair<token, host_id>(t, endpoint));
|
|
should_sort_tokens |= prev.second; // new token inserted -> sort
|
|
if (prev.first->second != endpoint) {
|
|
tlogger.debug("Token {} changing ownership from {} to {}", t, prev.first->second, endpoint);
|
|
prev.first->second = endpoint;
|
|
}
|
|
}
|
|
|
|
co_await update_normal_token_owners();
|
|
|
|
// New tokens were added to _token_to_endpoint_map
|
|
// so re-sort all tokens.
|
|
if (should_sort_tokens) {
|
|
sort_tokens();
|
|
}
|
|
co_return;
|
|
}
|
|
|
|
size_t token_metadata_impl::first_token_index(const token& start) const {
|
|
if (_sorted_tokens.empty()) {
|
|
auto msg = format("sorted_tokens is empty in first_token_index!");
|
|
tlogger.error("{}", msg);
|
|
throw std::runtime_error(msg);
|
|
}
|
|
auto it = std::lower_bound(_sorted_tokens.begin(), _sorted_tokens.end(), start);
|
|
if (it == _sorted_tokens.end()) {
|
|
return 0;
|
|
} else {
|
|
return std::distance(_sorted_tokens.begin(), it);
|
|
}
|
|
}
|
|
|
|
const token& token_metadata_impl::first_token(const token& start) const {
|
|
return _sorted_tokens[first_token_index(start)];
|
|
}
|
|
|
|
std::optional<host_id> token_metadata_impl::get_endpoint(const token& token) const {
|
|
auto it = _token_to_endpoint_map.find(token);
|
|
if (it == _token_to_endpoint_map.end()) {
|
|
return std::nullopt;
|
|
} else {
|
|
return it->second;
|
|
}
|
|
}
|
|
|
|
void token_metadata_impl::debug_show() const {
|
|
auto reporter = std::make_shared<timer<lowres_clock>>();
|
|
reporter->set_callback ([reporter, this] {
|
|
fmt::print("Endpoint -> Token\n");
|
|
for (auto x : _token_to_endpoint_map) {
|
|
fmt::print("inet_address={}, token={}\n", x.second, x.first);
|
|
}
|
|
fmt::print("Sorted Token\n");
|
|
for (auto x : _sorted_tokens) {
|
|
fmt::print("token={}\n", x);
|
|
}
|
|
});
|
|
reporter->arm_periodic(std::chrono::seconds(1));
|
|
}
|
|
|
|
void token_metadata_impl::update_host_id(const host_id& host_id, inet_address endpoint) {
|
|
_topology.add_or_update_endpoint(host_id, endpoint);
|
|
}
|
|
|
|
host_id token_metadata_impl::get_host_id(inet_address endpoint) const {
|
|
if (const auto* node = _topology.find_node(endpoint)) [[likely]] {
|
|
return node->host_id();
|
|
} else {
|
|
on_internal_error(tlogger, format("host_id for endpoint {} is not found", endpoint));
|
|
}
|
|
}
|
|
|
|
std::optional<host_id> token_metadata_impl::get_host_id_if_known(inet_address endpoint) const {
|
|
if (const auto* node = _topology.find_node(endpoint)) [[likely]] {
|
|
return node->host_id();
|
|
} else {
|
|
return std::nullopt;
|
|
}
|
|
}
|
|
|
|
std::optional<inet_address> token_metadata_impl::get_endpoint_for_host_id_if_known(host_id host_id) const {
|
|
if (const auto* node = _topology.find_node(host_id)) [[likely]] {
|
|
return node->endpoint();
|
|
} else {
|
|
return std::nullopt;
|
|
}
|
|
}
|
|
|
|
inet_address token_metadata_impl::get_endpoint_for_host_id(host_id host_id) const {
|
|
if (const auto* node = _topology.find_node(host_id)) [[likely]] {
|
|
return node->endpoint();
|
|
} else {
|
|
on_internal_error(tlogger, format("endpoint for host_id {} is not found", host_id));
|
|
}
|
|
}
|
|
|
|
std::unordered_map<inet_address, host_id> token_metadata_impl::get_endpoint_to_host_id_map_for_reading() const {
|
|
const auto& nodes = _topology.get_nodes_by_endpoint();
|
|
std::unordered_map<inet_address, host_id> map;
|
|
map.reserve(nodes.size());
|
|
for (const auto& [endpoint, node] : nodes) {
|
|
// Restrict to token-owners
|
|
if (!(node->is_normal() || node->is_leaving())) {
|
|
continue;
|
|
}
|
|
if (const auto& host_id = node->host_id()) {
|
|
map[endpoint] = host_id;
|
|
} else {
|
|
tlogger.info("get_endpoint_to_host_id_map_for_reading: endpoint {} has null host_id: state={}", endpoint, node->get_state());
|
|
}
|
|
}
|
|
return map;
|
|
}
|
|
|
|
bool token_metadata_impl::is_normal_token_owner(host_id endpoint) const {
|
|
return _normal_token_owners.contains(endpoint);
|
|
}
|
|
|
|
void token_metadata_impl::add_bootstrap_token(token t, host_id endpoint) {
|
|
std::unordered_set<token> tokens{t};
|
|
add_bootstrap_tokens(tokens, endpoint);
|
|
}
|
|
|
|
boost::iterator_range<token_metadata::tokens_iterator>
|
|
token_metadata_impl::ring_range(const dht::ring_position_view start) const {
|
|
return ring_range(start.token());
|
|
}
|
|
|
|
void token_metadata_impl::add_bootstrap_tokens(std::unordered_set<token> tokens, host_id endpoint) {
|
|
for (auto t : tokens) {
|
|
auto old_endpoint = _bootstrap_tokens.find(t);
|
|
if (old_endpoint != _bootstrap_tokens.end() && (*old_endpoint).second != endpoint) {
|
|
auto msg = format("Bootstrap Token collision between {} and {} (token {}", (*old_endpoint).second, endpoint, t);
|
|
throw std::runtime_error(msg);
|
|
}
|
|
|
|
auto old_endpoint2 = _token_to_endpoint_map.find(t);
|
|
if (old_endpoint2 != _token_to_endpoint_map.end() && (*old_endpoint2).second != endpoint) {
|
|
auto msg = format("Bootstrap Token collision between {} and {} (token {}", (*old_endpoint2).second, endpoint, t);
|
|
throw std::runtime_error(msg);
|
|
}
|
|
}
|
|
|
|
std::erase_if(_bootstrap_tokens, [endpoint] (const std::pair<token, host_id>& n) { return n.second == endpoint; });
|
|
|
|
for (auto t : tokens) {
|
|
_bootstrap_tokens[t] = endpoint;
|
|
}
|
|
}
|
|
|
|
void token_metadata_impl::remove_bootstrap_tokens(std::unordered_set<token> tokens) {
|
|
if (tokens.empty()) {
|
|
tlogger.warn("tokens is empty in remove_bootstrap_tokens!");
|
|
return;
|
|
}
|
|
for (auto t : tokens) {
|
|
_bootstrap_tokens.erase(t);
|
|
}
|
|
}
|
|
|
|
bool token_metadata_impl::is_leaving(host_id endpoint) const {
|
|
return _leaving_endpoints.contains(endpoint);
|
|
}
|
|
|
|
bool token_metadata_impl::is_being_replaced(host_id endpoint) const {
|
|
return _replacing_endpoints.contains(endpoint);
|
|
}
|
|
|
|
bool token_metadata_impl::is_any_node_being_replaced() const {
|
|
return !_replacing_endpoints.empty();
|
|
}
|
|
|
|
void token_metadata_impl::remove_endpoint(host_id endpoint) {
|
|
remove_by_value(_bootstrap_tokens, endpoint);
|
|
remove_by_value(_token_to_endpoint_map, endpoint);
|
|
_normal_token_owners.erase(endpoint);
|
|
_topology.remove_endpoint(endpoint);
|
|
_leaving_endpoints.erase(endpoint);
|
|
del_replacing_endpoint(endpoint);
|
|
invalidate_cached_rings();
|
|
}
|
|
|
|
token token_metadata_impl::get_predecessor(token t) const {
|
|
auto& tokens = sorted_tokens();
|
|
auto it = std::lower_bound(tokens.begin(), tokens.end(), t);
|
|
if (it == tokens.end() || *it != t) {
|
|
auto msg = format("token error in get_predecessor!");
|
|
tlogger.error("{}", msg);
|
|
throw std::runtime_error(msg);
|
|
}
|
|
if (it == tokens.begin()) {
|
|
// If the token is the first element, its preprocessor is the last element
|
|
return tokens.back();
|
|
} else {
|
|
return *(--it);
|
|
}
|
|
}
|
|
|
|
dht::token_range_vector token_metadata_impl::get_primary_ranges_for(std::unordered_set<token> tokens) const {
|
|
dht::token_range_vector ranges;
|
|
ranges.reserve(tokens.size() + 1); // one of the ranges will wrap
|
|
for (auto right : tokens) {
|
|
auto left = get_predecessor(right);
|
|
::compat::unwrap_into(
|
|
wrapping_interval<token>(interval_bound<token>(left, false), interval_bound<token>(right)),
|
|
dht::token_comparator(),
|
|
[&] (auto&& rng) { ranges.push_back(std::move(rng)); });
|
|
}
|
|
return ranges;
|
|
}
|
|
|
|
dht::token_range_vector token_metadata_impl::get_primary_ranges_for(token right) const {
|
|
return get_primary_ranges_for(std::unordered_set<token>{right});
|
|
}
|
|
|
|
boost::icl::interval<token>::interval_type
|
|
token_metadata_impl::range_to_interval(wrapping_interval<dht::token> r) {
|
|
bool start_inclusive = false;
|
|
bool end_inclusive = false;
|
|
token start = dht::minimum_token();
|
|
token end = dht::maximum_token();
|
|
|
|
if (r.start()) {
|
|
start = r.start()->value();
|
|
start_inclusive = r.start()->is_inclusive();
|
|
}
|
|
|
|
if (r.end()) {
|
|
end = r.end()->value();
|
|
end_inclusive = r.end()->is_inclusive();
|
|
}
|
|
|
|
if (start_inclusive == false && end_inclusive == false) {
|
|
return boost::icl::interval<token>::open(std::move(start), std::move(end));
|
|
} else if (start_inclusive == false && end_inclusive == true) {
|
|
return boost::icl::interval<token>::left_open(std::move(start), std::move(end));
|
|
} else if (start_inclusive == true && end_inclusive == false) {
|
|
return boost::icl::interval<token>::right_open(std::move(start), std::move(end));
|
|
} else {
|
|
return boost::icl::interval<token>::closed(std::move(start), std::move(end));
|
|
}
|
|
}
|
|
|
|
wrapping_interval<dht::token>
|
|
token_metadata_impl::interval_to_range(boost::icl::interval<token>::interval_type i) {
|
|
bool start_inclusive;
|
|
bool end_inclusive;
|
|
auto bounds = i.bounds().bits();
|
|
if (bounds == boost::icl::interval_bounds::static_open) {
|
|
start_inclusive = false;
|
|
end_inclusive = false;
|
|
} else if (bounds == boost::icl::interval_bounds::static_left_open) {
|
|
start_inclusive = false;
|
|
end_inclusive = true;
|
|
} else if (bounds == boost::icl::interval_bounds::static_right_open) {
|
|
start_inclusive = true;
|
|
end_inclusive = false;
|
|
} else if (bounds == boost::icl::interval_bounds::static_closed) {
|
|
start_inclusive = true;
|
|
end_inclusive = true;
|
|
} else {
|
|
throw std::runtime_error("Invalid boost::icl::interval<token> bounds");
|
|
}
|
|
return wrapping_interval<dht::token>({{i.lower(), start_inclusive}}, {{i.upper(), end_inclusive}});
|
|
}
|
|
|
|
future<> token_metadata_impl::update_topology_change_info(dc_rack_fn& get_dc_rack) {
|
|
if (_bootstrap_tokens.empty() && _leaving_endpoints.empty() && _replacing_endpoints.empty()) {
|
|
co_await utils::clear_gently(_topology_change_info);
|
|
_topology_change_info.reset();
|
|
co_return;
|
|
}
|
|
|
|
// target_token_metadata incorporates all the changes from leaving, bootstrapping and replacing
|
|
auto target_token_metadata = co_await clone_only_token_map(false);
|
|
{
|
|
// construct new_normal_tokens based on _bootstrap_tokens and _replacing_endpoints
|
|
std::unordered_map<host_id, std::unordered_set<token>> new_normal_tokens;
|
|
if (!_replacing_endpoints.empty()) {
|
|
for (const auto& [token, inet_address]: _token_to_endpoint_map) {
|
|
const auto it = _replacing_endpoints.find(inet_address);
|
|
if (it == _replacing_endpoints.end()) {
|
|
continue;
|
|
}
|
|
new_normal_tokens[it->second].insert(token);
|
|
}
|
|
for (const auto& [replace_from, replace_to]: _replacing_endpoints) {
|
|
target_token_metadata->remove_endpoint(replace_from);
|
|
}
|
|
}
|
|
for (const auto& [token, inet_address]: _bootstrap_tokens) {
|
|
new_normal_tokens[inet_address].insert(token);
|
|
}
|
|
// apply new_normal_tokens
|
|
for (auto& [endpoint, tokens]: new_normal_tokens) {
|
|
target_token_metadata->update_topology(endpoint, get_dc_rack(endpoint), node::state::normal);
|
|
co_await target_token_metadata->update_normal_tokens(std::move(tokens), endpoint);
|
|
}
|
|
// apply leaving endpoints
|
|
for (const auto& endpoint: _leaving_endpoints) {
|
|
target_token_metadata->remove_endpoint(endpoint);
|
|
}
|
|
target_token_metadata->sort_tokens();
|
|
}
|
|
|
|
// merge tokens from token_to_endpoint and bootstrap_tokens,
|
|
// preserving tokens of leaving endpoints
|
|
auto all_tokens = std::vector<dht::token>();
|
|
all_tokens.reserve(sorted_tokens().size() + get_bootstrap_tokens().size());
|
|
all_tokens.resize(sorted_tokens().size());
|
|
std::copy(begin(sorted_tokens()), end(sorted_tokens()), begin(all_tokens));
|
|
for (const auto& p: get_bootstrap_tokens()) {
|
|
all_tokens.push_back(p.first);
|
|
}
|
|
std::sort(begin(all_tokens), end(all_tokens));
|
|
|
|
auto prev_value = std::move(_topology_change_info);
|
|
_topology_change_info.emplace(make_lw_shared<token_metadata>(std::move(target_token_metadata)),
|
|
std::move(all_tokens),
|
|
_read_new);
|
|
co_await utils::clear_gently(prev_value);
|
|
}
|
|
|
|
size_t token_metadata_impl::count_normal_token_owners() const {
|
|
return _normal_token_owners.size();
|
|
}
|
|
|
|
future<> token_metadata_impl::update_normal_token_owners() {
|
|
std::unordered_set<host_id> eps;
|
|
for (auto [t, ep]: _token_to_endpoint_map) {
|
|
eps.insert(ep);
|
|
co_await coroutine::maybe_yield();
|
|
}
|
|
_normal_token_owners = std::move(eps);
|
|
}
|
|
|
|
void token_metadata_impl::add_leaving_endpoint(host_id endpoint) {
|
|
_leaving_endpoints.emplace(endpoint);
|
|
}
|
|
|
|
void token_metadata_impl::del_leaving_endpoint(host_id endpoint) {
|
|
_leaving_endpoints.erase(endpoint);
|
|
}
|
|
|
|
void token_metadata_impl::add_replacing_endpoint(host_id existing_node, host_id replacing_node) {
|
|
if (existing_node == replacing_node) {
|
|
on_internal_error(tlogger, format("Can't replace node {} with itself", existing_node));
|
|
}
|
|
tlogger.info("Added node {} as pending replacing endpoint which replaces existing node {}",
|
|
replacing_node, existing_node);
|
|
_replacing_endpoints[existing_node] = replacing_node;
|
|
}
|
|
|
|
void token_metadata_impl::del_replacing_endpoint(host_id existing_node) {
|
|
if (_replacing_endpoints.contains(existing_node)) {
|
|
tlogger.info("Removed node {} as pending replacing endpoint which replaces existing node {}",
|
|
_replacing_endpoints[existing_node], existing_node);
|
|
}
|
|
_replacing_endpoints.erase(existing_node);
|
|
}
|
|
|
|
topology_change_info::topology_change_info(lw_shared_ptr<token_metadata> target_token_metadata_,
|
|
std::vector<dht::token> all_tokens_,
|
|
token_metadata::read_new_t read_new_)
|
|
: target_token_metadata(std::move(target_token_metadata_))
|
|
, all_tokens(std::move(all_tokens_))
|
|
, read_new(read_new_)
|
|
{
|
|
}
|
|
|
|
future<> topology_change_info::clear_gently() {
|
|
co_await utils::clear_gently(target_token_metadata);
|
|
co_await utils::clear_gently(all_tokens);
|
|
}
|
|
|
|
token_metadata::token_metadata(std::unique_ptr<token_metadata_impl> impl)
|
|
: _impl(std::move(impl))
|
|
{
|
|
}
|
|
|
|
token_metadata::token_metadata(config cfg)
|
|
: _impl(std::make_unique<token_metadata_impl>(cfg))
|
|
{
|
|
}
|
|
|
|
token_metadata::~token_metadata() = default;
|
|
|
|
token_metadata::token_metadata(token_metadata&&) noexcept = default;
|
|
|
|
token_metadata& token_metadata::token_metadata::operator=(token_metadata&&) noexcept = default;
|
|
|
|
const std::vector<token>&
|
|
token_metadata::sorted_tokens() const {
|
|
return _impl->sorted_tokens();
|
|
}
|
|
|
|
future<>
|
|
token_metadata::update_normal_tokens(std::unordered_set<token> tokens, host_id endpoint) {
|
|
return _impl->update_normal_tokens(std::move(tokens), endpoint);
|
|
}
|
|
|
|
const token&
|
|
token_metadata::first_token(const token& start) const {
|
|
return _impl->first_token(start);
|
|
}
|
|
|
|
size_t
|
|
token_metadata::first_token_index(const token& start) const {
|
|
return _impl->first_token_index(start);
|
|
}
|
|
|
|
std::optional<host_id>
|
|
token_metadata::get_endpoint(const token& token) const {
|
|
return _impl->get_endpoint(token);
|
|
}
|
|
|
|
std::vector<token>
|
|
token_metadata::get_tokens(const host_id& addr) const {
|
|
return _impl->get_tokens(addr);
|
|
}
|
|
|
|
const std::unordered_map<token, host_id>&
|
|
token_metadata::get_token_to_endpoint() const {
|
|
return _impl->get_token_to_endpoint();
|
|
}
|
|
|
|
const std::unordered_set<host_id>&
|
|
token_metadata::get_leaving_endpoints() const {
|
|
return _impl->get_leaving_endpoints();
|
|
}
|
|
|
|
const std::unordered_map<token, host_id>&
|
|
token_metadata::get_bootstrap_tokens() const {
|
|
return _impl->get_bootstrap_tokens();
|
|
}
|
|
|
|
void
|
|
token_metadata::update_topology(host_id ep, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> shard_count) {
|
|
_impl->update_topology(ep, std::move(opt_dr), std::move(opt_st), std::move(shard_count));
|
|
}
|
|
|
|
boost::iterator_range<token_metadata::tokens_iterator>
|
|
token_metadata::ring_range(const token& start) const {
|
|
return _impl->ring_range(start);
|
|
}
|
|
|
|
boost::iterator_range<token_metadata::tokens_iterator>
|
|
token_metadata::ring_range(dht::ring_position_view start) const {
|
|
return _impl->ring_range(start);
|
|
}
|
|
|
|
class token_metadata_ring_splitter : public locator::token_range_splitter {
|
|
token_metadata_ptr _tmptr;
|
|
boost::iterator_range<token_metadata::tokens_iterator> _range;
|
|
public:
|
|
token_metadata_ring_splitter(token_metadata_ptr tmptr)
|
|
: _tmptr(std::move(tmptr))
|
|
, _range(_tmptr->sorted_tokens().empty() // ring_range() throws if the ring is empty
|
|
? boost::make_iterator_range(token_metadata::tokens_iterator(), token_metadata::tokens_iterator())
|
|
: _tmptr->ring_range(dht::minimum_token()))
|
|
{ }
|
|
|
|
void reset(dht::ring_position_view pos) override {
|
|
_range = _tmptr->ring_range(pos);
|
|
}
|
|
|
|
std::optional<dht::token> next_token() override {
|
|
if (_range.empty()) {
|
|
return std::nullopt;
|
|
}
|
|
auto t = *_range.begin();
|
|
_range.drop_front();
|
|
return t;
|
|
}
|
|
};
|
|
|
|
std::unique_ptr<locator::token_range_splitter> make_splitter(token_metadata_ptr tmptr) {
|
|
return std::make_unique<token_metadata_ring_splitter>(std::move(tmptr));
|
|
}
|
|
|
|
topology&
|
|
token_metadata::get_topology() {
|
|
return _impl->get_topology();
|
|
}
|
|
|
|
const topology&
|
|
token_metadata::get_topology() const {
|
|
return _impl->get_topology();
|
|
}
|
|
|
|
void
|
|
token_metadata::debug_show() const {
|
|
_impl->debug_show();
|
|
}
|
|
|
|
void
|
|
token_metadata::update_host_id(const host_id& host_id, inet_address endpoint) {
|
|
_impl->update_host_id(host_id, endpoint);
|
|
}
|
|
|
|
host_id
|
|
token_metadata::get_host_id(inet_address endpoint) const {
|
|
return _impl->get_host_id(endpoint);
|
|
}
|
|
|
|
std::optional<host_id>
|
|
token_metadata::get_host_id_if_known(inet_address endpoint) const {
|
|
return _impl->get_host_id_if_known(endpoint);
|
|
}
|
|
|
|
std::optional<token_metadata::inet_address>
|
|
token_metadata::get_endpoint_for_host_id_if_known(host_id host_id) const {
|
|
return _impl->get_endpoint_for_host_id_if_known(host_id);
|
|
}
|
|
|
|
token_metadata::inet_address
|
|
token_metadata::get_endpoint_for_host_id(host_id host_id) const {
|
|
return _impl->get_endpoint_for_host_id(host_id);
|
|
}
|
|
|
|
host_id_or_endpoint token_metadata::parse_host_id_and_endpoint(const sstring& host_id_string) const {
|
|
auto res = host_id_or_endpoint(host_id_string);
|
|
res.resolve(*this);
|
|
return res;
|
|
}
|
|
|
|
std::unordered_map<inet_address, host_id>
|
|
token_metadata::get_endpoint_to_host_id_map_for_reading() const {
|
|
return _impl->get_endpoint_to_host_id_map_for_reading();
|
|
}
|
|
|
|
void
|
|
token_metadata::add_bootstrap_token(token t, host_id endpoint) {
|
|
_impl->add_bootstrap_token(t, endpoint);
|
|
}
|
|
|
|
void
|
|
token_metadata::add_bootstrap_tokens(std::unordered_set<token> tokens, host_id endpoint) {
|
|
_impl->add_bootstrap_tokens(std::move(tokens), endpoint);
|
|
}
|
|
|
|
void
|
|
token_metadata::remove_bootstrap_tokens(std::unordered_set<token> tokens) {
|
|
_impl->remove_bootstrap_tokens(std::move(tokens));
|
|
}
|
|
|
|
void
|
|
token_metadata::add_leaving_endpoint(host_id endpoint) {
|
|
_impl->add_leaving_endpoint(endpoint);
|
|
}
|
|
|
|
void
|
|
token_metadata::del_leaving_endpoint(host_id endpoint) {
|
|
_impl->del_leaving_endpoint(endpoint);
|
|
}
|
|
|
|
void
|
|
token_metadata::remove_endpoint(host_id endpoint) {
|
|
_impl->remove_endpoint(endpoint);
|
|
_impl->sort_tokens();
|
|
}
|
|
|
|
bool
|
|
token_metadata::is_normal_token_owner(host_id endpoint) const {
|
|
return _impl->is_normal_token_owner(endpoint);
|
|
}
|
|
|
|
bool
|
|
token_metadata::is_leaving(host_id endpoint) const {
|
|
return _impl->is_leaving(endpoint);
|
|
}
|
|
|
|
bool
|
|
token_metadata::is_being_replaced(host_id endpoint) const {
|
|
return _impl->is_being_replaced(endpoint);
|
|
}
|
|
|
|
bool
|
|
token_metadata::is_any_node_being_replaced() const {
|
|
return _impl->is_any_node_being_replaced();
|
|
}
|
|
|
|
void token_metadata::add_replacing_endpoint(host_id existing_node, host_id replacing_node) {
|
|
_impl->add_replacing_endpoint(existing_node, replacing_node);
|
|
}
|
|
|
|
void token_metadata::del_replacing_endpoint(host_id existing_node) {
|
|
_impl->del_replacing_endpoint(existing_node);
|
|
}
|
|
|
|
future<token_metadata> token_metadata::clone_async() const noexcept {
|
|
co_return token_metadata(co_await _impl->clone_async());
|
|
}
|
|
|
|
future<token_metadata>
|
|
token_metadata::clone_only_token_map() const noexcept {
|
|
co_return token_metadata(co_await _impl->clone_only_token_map());
|
|
}
|
|
|
|
future<token_metadata>
|
|
token_metadata::clone_after_all_left() const noexcept {
|
|
co_return token_metadata(co_await _impl->clone_after_all_left());
|
|
}
|
|
|
|
future<> token_metadata::clear_gently() noexcept {
|
|
return _impl->clear_gently();
|
|
}
|
|
|
|
dht::token_range_vector
|
|
token_metadata::get_primary_ranges_for(std::unordered_set<token> tokens) const {
|
|
return _impl->get_primary_ranges_for(std::move(tokens));
|
|
}
|
|
|
|
dht::token_range_vector
|
|
token_metadata::get_primary_ranges_for(token right) const {
|
|
return _impl->get_primary_ranges_for(right);
|
|
}
|
|
|
|
boost::icl::interval<token>::interval_type
|
|
token_metadata::range_to_interval(wrapping_interval<dht::token> r) {
|
|
return token_metadata_impl::range_to_interval(std::move(r));
|
|
}
|
|
|
|
wrapping_interval<dht::token>
|
|
token_metadata::interval_to_range(boost::icl::interval<token>::interval_type i) {
|
|
return token_metadata_impl::interval_to_range(std::move(i));
|
|
}
|
|
|
|
future<>
|
|
token_metadata::update_topology_change_info(dc_rack_fn& get_dc_rack) {
|
|
return _impl->update_topology_change_info(get_dc_rack);
|
|
}
|
|
|
|
const std::optional<topology_change_info>&
|
|
token_metadata::get_topology_change_info() const {
|
|
return _impl->get_topology_change_info();
|
|
}
|
|
|
|
token
|
|
token_metadata::get_predecessor(token t) const {
|
|
return _impl->get_predecessor(t);
|
|
}
|
|
|
|
const std::unordered_set<host_id>&
|
|
token_metadata::get_all_endpoints() const {
|
|
return _impl->get_all_endpoints();
|
|
}
|
|
|
|
std::unordered_set<gms::inet_address> token_metadata::get_all_ips() const {
|
|
const auto& host_ids = _impl->get_all_endpoints();
|
|
std::unordered_set<gms::inet_address> result;
|
|
result.reserve(host_ids.size());
|
|
for (const auto& id: host_ids) {
|
|
result.insert(_impl->get_endpoint_for_host_id(id));
|
|
}
|
|
return result;
|
|
}
|
|
|
|
size_t
|
|
token_metadata::count_normal_token_owners() const {
|
|
return _impl->count_normal_token_owners();
|
|
}
|
|
|
|
void
|
|
token_metadata::set_read_new(read_new_t read_new) {
|
|
_impl->set_read_new(read_new);
|
|
}
|
|
|
|
long
|
|
token_metadata::get_ring_version() const {
|
|
return _impl->get_ring_version();
|
|
}
|
|
|
|
void
|
|
token_metadata::invalidate_cached_rings() {
|
|
_impl->invalidate_cached_rings();
|
|
}
|
|
|
|
auto
|
|
token_metadata::get_version() const -> version_t {
|
|
return _impl->get_version();
|
|
}
|
|
void
|
|
token_metadata::set_version(version_t version) {
|
|
_impl->set_version(version);
|
|
}
|
|
void
|
|
token_metadata::set_version_tracker(version_tracker_t tracker) {
|
|
_impl->set_version_tracker(std::move(tracker));
|
|
}
|
|
|
|
version_tracker::~version_tracker() {
|
|
if (_expired_at) {
|
|
auto now = std::chrono::steady_clock::now();
|
|
if (*_expired_at + _log_threshold < now) {
|
|
auto d = std::chrono::duration_cast<std::chrono::duration<float>>(now - *_expired_at);
|
|
tlogger.warn("topology version {} held for {:.3f} [s] past expiry, released at: {}", _version, d.count(),
|
|
seastar::current_backtrace());
|
|
}
|
|
}
|
|
}
|
|
|
|
version_tracker shared_token_metadata::new_tracker(token_metadata::version_t version) {
|
|
auto tracker = version_tracker(_versions_barrier.start(), version);
|
|
_trackers.push_front(tracker);
|
|
return tracker;
|
|
}
|
|
|
|
void shared_token_metadata::set(mutable_token_metadata_ptr tmptr) noexcept {
|
|
if (_shared->get_ring_version() >= tmptr->get_ring_version()) {
|
|
on_internal_error(tlogger, format("shared_token_metadata: must not set non-increasing ring_version: {} -> {}", _shared->get_ring_version(), tmptr->get_ring_version()));
|
|
}
|
|
|
|
if (_shared->get_version() > tmptr->get_version()) {
|
|
on_internal_error(tlogger, format("shared_token_metadata: must not set decreasing version: {} -> {}", _shared->get_version(), tmptr->get_version()));
|
|
} else if (_shared->get_version() < tmptr->get_version()) {
|
|
_stale_versions_in_use = _versions_barrier.advance_and_await();
|
|
}
|
|
|
|
_shared = std::move(tmptr);
|
|
_shared->set_version_tracker(new_tracker(_shared->get_version()));
|
|
|
|
for (auto&& v : _trackers) {
|
|
if (v.version() != _shared->get_version()) {
|
|
v.mark_expired(_stall_detector_threshold);
|
|
}
|
|
}
|
|
|
|
tlogger.debug("new token_metadata is set, version {}", _shared->get_version());
|
|
}
|
|
|
|
void shared_token_metadata::update_fence_version(token_metadata::version_t version) {
|
|
if (const auto current_version = _shared->get_version(); version > current_version) {
|
|
// The token_metadata::version under no circumstance can go backwards.
|
|
// Even in case of topology change coordinator moving to another node
|
|
// this condition must hold, that is why we treat its violation
|
|
// as an internal error.
|
|
on_internal_error(tlogger,
|
|
format("shared_token_metadata: invalid new fence version, can't be greater than the current version, "
|
|
"current version {}, new fence version {}", current_version, version));
|
|
}
|
|
if (version < _fence_version) {
|
|
// If topology change coordinator moved to another node,
|
|
// it can get ahead and increment the fence version
|
|
// while we are handling raft_topology_cmd::command::fence,
|
|
// so we just throw an error in this case.
|
|
throw std::runtime_error(
|
|
format("shared_token_metadata: can't set decreasing fence version: {} -> {}",
|
|
_fence_version, version));
|
|
}
|
|
_fence_version = version;
|
|
tlogger.debug("new fence_version is set, version {}", _fence_version);
|
|
}
|
|
|
|
future<> shared_token_metadata::mutate_token_metadata(seastar::noncopyable_function<future<> (token_metadata&)> func) {
|
|
auto lk = co_await get_lock();
|
|
auto tm = co_await _shared->clone_async();
|
|
// bump the token_metadata ring_version
|
|
// to invalidate cached token/replication mappings
|
|
// when the modified token_metadata is committed.
|
|
tm.invalidate_cached_rings();
|
|
co_await func(tm);
|
|
set(make_token_metadata_ptr(std::move(tm)));
|
|
}
|
|
|
|
future<> shared_token_metadata::mutate_on_all_shards(sharded<shared_token_metadata>& stm, seastar::noncopyable_function<future<> (token_metadata&)> func) {
|
|
auto base_shard = this_shard_id();
|
|
assert(base_shard == 0);
|
|
auto lk = co_await stm.local().get_lock();
|
|
|
|
std::vector<mutable_token_metadata_ptr> pending_token_metadata_ptr;
|
|
pending_token_metadata_ptr.resize(smp::count);
|
|
auto tmptr = make_token_metadata_ptr(co_await stm.local().get()->clone_async());
|
|
auto& tm = *tmptr;
|
|
// bump the token_metadata ring_version
|
|
// to invalidate cached token/replication mappings
|
|
// when the modified token_metadata is committed.
|
|
tm.invalidate_cached_rings();
|
|
co_await func(tm);
|
|
|
|
// Apply the mutated token_metadata only after successfully cloning it on all shards.
|
|
pending_token_metadata_ptr[base_shard] = tmptr;
|
|
co_await smp::invoke_on_others(base_shard, [&] () -> future<> {
|
|
pending_token_metadata_ptr[this_shard_id()] = make_token_metadata_ptr(co_await tm.clone_async());
|
|
});
|
|
|
|
co_await stm.invoke_on_all([&] (shared_token_metadata& stm) {
|
|
stm.set(std::move(pending_token_metadata_ptr[this_shard_id()]));
|
|
});
|
|
}
|
|
|
|
host_id_or_endpoint::host_id_or_endpoint(const sstring& s, param_type restrict) {
|
|
switch (restrict) {
|
|
case param_type::host_id:
|
|
try {
|
|
id = host_id(utils::UUID(s));
|
|
} catch (const marshal_exception& e) {
|
|
throw std::invalid_argument(format("Invalid host_id {}: {}", s, e.what()));
|
|
}
|
|
break;
|
|
case param_type::endpoint:
|
|
try {
|
|
endpoint = gms::inet_address(s);
|
|
} catch (std::invalid_argument& e) {
|
|
throw std::invalid_argument(format("Invalid inet_address {}: {}", s, e.what()));
|
|
}
|
|
break;
|
|
case param_type::auto_detect:
|
|
try {
|
|
id = host_id(utils::UUID(s));
|
|
} catch (const marshal_exception& e) {
|
|
try {
|
|
endpoint = gms::inet_address(s);
|
|
} catch (std::invalid_argument& e) {
|
|
throw std::invalid_argument(format("Invalid host_id or inet_address {}", s));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
void host_id_or_endpoint::resolve(const token_metadata& tm) {
|
|
if (id) {
|
|
auto endpoint_opt = tm.get_endpoint_for_host_id_if_known(id);
|
|
if (!endpoint_opt) {
|
|
throw std::runtime_error(format("Host ID {} not found in the cluster", id));
|
|
}
|
|
endpoint = *endpoint_opt;
|
|
} else {
|
|
auto opt_id = tm.get_host_id_if_known(endpoint);
|
|
if (!opt_id) {
|
|
throw std::runtime_error(format("Host inet address {} not found in the cluster", endpoint));
|
|
}
|
|
id = *opt_id;
|
|
}
|
|
}
|
|
|
|
} // namespace locator
|