Compare commits
20 Commits
master
...
debug_form
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8c293dfe36 | ||
|
|
97fb93a5b4 | ||
|
|
357b94ac60 | ||
|
|
037f291d48 | ||
|
|
b246a70404 | ||
|
|
a071d54160 | ||
|
|
62d369c113 | ||
|
|
bce09e055c | ||
|
|
37004ac4dc | ||
|
|
6f619908ce | ||
|
|
0fe552b074 | ||
|
|
14482e2e07 | ||
|
|
4bda6dded5 | ||
|
|
0be4fac5e5 | ||
|
|
8818a0347e | ||
|
|
489bdb2f93 | ||
|
|
9efd94199d | ||
|
|
10085ddb25 | ||
|
|
8635a2aa6f | ||
|
|
c7bb7b34c0 |
@@ -53,8 +53,12 @@ std::vector<sstring> get_attr_values(LDAP* ld, LDAPMessage* res, const char* att
|
|||||||
mylog.debug("Analyzing search results");
|
mylog.debug("Analyzing search results");
|
||||||
for (auto e = ldap_first_entry(ld, res); e; e = ldap_next_entry(ld, e)) {
|
for (auto e = ldap_first_entry(ld, res); e; e = ldap_next_entry(ld, e)) {
|
||||||
struct deleter {
|
struct deleter {
|
||||||
void operator()(berval** p) { ldap_value_free_len(p); }
|
void operator()(berval** p) {
|
||||||
void operator()(char* p) { ldap_memfree(p); }
|
ldap_value_free_len(p);
|
||||||
|
}
|
||||||
|
void operator()(char* p) {
|
||||||
|
ldap_memfree(p);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
const std::unique_ptr<char, deleter> dname(ldap_get_dn(ld, e));
|
const std::unique_ptr<char, deleter> dname(ldap_get_dn(ld, e));
|
||||||
mylog.debug("Analyzing entry {}", dname.get());
|
mylog.debug("Analyzing entry {}", dname.get());
|
||||||
@@ -75,12 +79,14 @@ std::vector<sstring> get_attr_values(LDAP* ld, LDAPMessage* res, const char* att
|
|||||||
|
|
||||||
namespace auth {
|
namespace auth {
|
||||||
|
|
||||||
ldap_role_manager::ldap_role_manager(
|
ldap_role_manager::ldap_role_manager(std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
|
||||||
std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
|
uint32_t permissions_update_interval_in_ms, utils::observer<uint32_t> permissions_update_interval_in_ms_observer, cql3::query_processor& qp,
|
||||||
uint32_t permissions_update_interval_in_ms,
|
::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
||||||
utils::observer<uint32_t> permissions_update_interval_in_ms_observer,
|
: _std_mgr(qp, rg0c, mm, cache)
|
||||||
cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
, _group0_client(rg0c)
|
||||||
: _std_mgr(qp, rg0c, mm, cache), _group0_client(rg0c), _query_template(query_template), _target_attr(target_attr), _bind_name(bind_name)
|
, _query_template(query_template)
|
||||||
|
, _target_attr(target_attr)
|
||||||
|
, _bind_name(bind_name)
|
||||||
, _bind_password(bind_password)
|
, _bind_password(bind_password)
|
||||||
, _permissions_update_interval_in_ms(permissions_update_interval_in_ms)
|
, _permissions_update_interval_in_ms(permissions_update_interval_in_ms)
|
||||||
, _permissions_update_interval_in_ms_observer(std::move(permissions_update_interval_in_ms_observer))
|
, _permissions_update_interval_in_ms_observer(std::move(permissions_update_interval_in_ms_observer))
|
||||||
@@ -90,17 +96,12 @@ ldap_role_manager::ldap_role_manager(
|
|||||||
}
|
}
|
||||||
|
|
||||||
ldap_role_manager::ldap_role_manager(cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
ldap_role_manager::ldap_role_manager(cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
||||||
: ldap_role_manager(
|
: ldap_role_manager(qp.db().get_config().ldap_url_template(), qp.db().get_config().ldap_attr_role(), qp.db().get_config().ldap_bind_dn(),
|
||||||
qp.db().get_config().ldap_url_template(),
|
qp.db().get_config().ldap_bind_passwd(), qp.db().get_config().permissions_update_interval_in_ms(),
|
||||||
qp.db().get_config().ldap_attr_role(),
|
qp.db().get_config().permissions_update_interval_in_ms.observe([this](const uint32_t& v) {
|
||||||
qp.db().get_config().ldap_bind_dn(),
|
_permissions_update_interval_in_ms = v;
|
||||||
qp.db().get_config().ldap_bind_passwd(),
|
}),
|
||||||
qp.db().get_config().permissions_update_interval_in_ms(),
|
qp, rg0c, mm, cache) {
|
||||||
qp.db().get_config().permissions_update_interval_in_ms.observe([this] (const uint32_t& v) { _permissions_update_interval_in_ms = v; }),
|
|
||||||
qp,
|
|
||||||
rg0c,
|
|
||||||
mm,
|
|
||||||
cache) {
|
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string_view ldap_role_manager::qualified_java_name() const noexcept {
|
std::string_view ldap_role_manager::qualified_java_name() const noexcept {
|
||||||
@@ -113,8 +114,7 @@ const resource_set& ldap_role_manager::protected_resources() const {
|
|||||||
|
|
||||||
future<> ldap_role_manager::start() {
|
future<> ldap_role_manager::start() {
|
||||||
if (!parse_url(get_url("dummy-user"))) { // Just need host and port -- any user should do.
|
if (!parse_url(get_url("dummy-user"))) { // Just need host and port -- any user should do.
|
||||||
return make_exception_future(
|
return make_exception_future(std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
|
||||||
std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
|
|
||||||
}
|
}
|
||||||
_cache_pruner = futurize_invoke([this]() -> future<> {
|
_cache_pruner = futurize_invoke([this]() -> future<> {
|
||||||
while (true) {
|
while (true) {
|
||||||
@@ -188,9 +188,11 @@ future<conn_ptr> ldap_role_manager::reconnect() {
|
|||||||
|
|
||||||
future<> ldap_role_manager::stop() {
|
future<> ldap_role_manager::stop() {
|
||||||
_as.request_abort();
|
_as.request_abort();
|
||||||
return std::move(_cache_pruner).then([this] {
|
return std::move(_cache_pruner)
|
||||||
|
.then([this] {
|
||||||
return _std_mgr.stop();
|
return _std_mgr.stop();
|
||||||
}).then([this] {
|
})
|
||||||
|
.then([this] {
|
||||||
return _connection_factory.stop();
|
return _connection_factory.stop();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -221,8 +223,8 @@ future<role_set> ldap_role_manager::query_granted(std::string_view grantee_name,
|
|||||||
if (!desc) {
|
if (!desc) {
|
||||||
return make_exception_future<role_set>(std::runtime_error(format("Error parsing URL {}", url)));
|
return make_exception_future<role_set>(std::runtime_error(format("Error parsing URL {}", url)));
|
||||||
}
|
}
|
||||||
return _connection_factory.with_connection([this, desc = std::move(desc), grantee_name_ = sstring(grantee_name)]
|
return _connection_factory.with_connection(
|
||||||
(ldap_connection& conn) -> future<role_set> {
|
[this, desc = std::move(desc), grantee_name_ = sstring(grantee_name)](ldap_connection& conn) -> future<role_set> {
|
||||||
sstring grantee_name = std::move(grantee_name_);
|
sstring grantee_name = std::move(grantee_name_);
|
||||||
ldap_msg_ptr res = co_await conn.search(desc->lud_dn, desc->lud_scope, desc->lud_filter, desc->lud_attrs,
|
ldap_msg_ptr res = co_await conn.search(desc->lud_dn, desc->lud_scope, desc->lud_filter, desc->lud_attrs,
|
||||||
/*attrsonly=*/0, /*serverctrls=*/nullptr, /*clientctrls=*/nullptr,
|
/*attrsonly=*/0, /*serverctrls=*/nullptr, /*clientctrls=*/nullptr,
|
||||||
@@ -251,8 +253,7 @@ future<role_set> ldap_role_manager::query_granted(std::string_view grantee_name,
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<role_to_directly_granted_map>
|
future<role_to_directly_granted_map> ldap_role_manager::query_all_directly_granted(::service::query_state& qs) {
|
||||||
ldap_role_manager::query_all_directly_granted(::service::query_state& qs) {
|
|
||||||
role_to_directly_granted_map result;
|
role_to_directly_granted_map result;
|
||||||
auto roles = co_await query_all(qs);
|
auto roles = co_await query_all(qs);
|
||||||
for (auto& role : roles) {
|
for (auto& role : roles) {
|
||||||
@@ -283,8 +284,8 @@ future<> ldap_role_manager::create_role(std::string_view role_name) {
|
|||||||
} catch (const role_already_exists&) {
|
} catch (const role_already_exists&) {
|
||||||
// ok
|
// ok
|
||||||
} catch (const ::service::group0_concurrent_modification& ex) {
|
} catch (const ::service::group0_concurrent_modification& ex) {
|
||||||
mylog.warn("Failed to auto-create role \"{}\" due to guard conflict.{}.",
|
mylog.warn("Failed to auto-create role \"{}\" due to guard conflict.{}.", role_name,
|
||||||
role_name, retries ? " Retrying" : " Number of retries exceeded, giving up");
|
retries ? " Retrying" : " Number of retries exceeded, giving up");
|
||||||
if (retries--) {
|
if (retries--) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -329,8 +330,7 @@ future<bool> ldap_role_manager::can_login(std::string_view role_name) {
|
|||||||
return _std_mgr.can_login(role_name);
|
return _std_mgr.can_login(role_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<std::optional<sstring>> ldap_role_manager::get_attribute(
|
future<std::optional<sstring>> ldap_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
|
||||||
std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
|
|
||||||
return _std_mgr.get_attribute(role_name, attribute_name, qs);
|
return _std_mgr.get_attribute(role_name, attribute_name, qs);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
151
cdc/split.cc
151
cdc/split.cc
@@ -211,7 +211,9 @@ private:
|
|||||||
|
|
||||||
public:
|
public:
|
||||||
extract_collection_visitor(column_id id, std::map<change_key_t, row_update>& updates)
|
extract_collection_visitor(column_id id, std::map<change_key_t, row_update>& updates)
|
||||||
: _id(id), _updates(updates) {}
|
: _id(id)
|
||||||
|
, _updates(updates) {
|
||||||
|
}
|
||||||
|
|
||||||
void collection_tombstone(const tombstone& t) {
|
void collection_tombstone(const tombstone& t) {
|
||||||
auto& entry = get_or_append_entry(t.timestamp + 1, gc_clock::duration(0));
|
auto& entry = get_or_append_entry(t.timestamp + 1, gc_clock::duration(0));
|
||||||
@@ -226,7 +228,9 @@ public:
|
|||||||
cell(key, c);
|
cell(key, c);
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr bool finished() const { return false; }
|
constexpr bool finished() const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Visits all cells and tombstones in a row, putting the encountered changes into buckets
|
/* Visits all cells and tombstones in a row, putting the encountered changes into buckets
|
||||||
@@ -254,7 +258,9 @@ struct extract_row_visitor {
|
|||||||
data_type _value_type;
|
data_type _value_type;
|
||||||
|
|
||||||
collection_visitor(column_id id, std::map<change_key_t, row_update>& updates, const collection_type_impl& ctype)
|
collection_visitor(column_id id, std::map<change_key_t, row_update>& updates, const collection_type_impl& ctype)
|
||||||
: extract_collection_visitor<collection_visitor>(id, updates), _value_type(ctype.value_comparator()) {}
|
: extract_collection_visitor<collection_visitor>(id, updates)
|
||||||
|
, _value_type(ctype.value_comparator()) {
|
||||||
|
}
|
||||||
|
|
||||||
data_type get_value_type(bytes_view) {
|
data_type get_value_type(bytes_view) {
|
||||||
return _value_type;
|
return _value_type;
|
||||||
@@ -268,7 +274,9 @@ struct extract_row_visitor {
|
|||||||
const user_type_impl& _utype;
|
const user_type_impl& _utype;
|
||||||
|
|
||||||
udt_visitor(column_id id, std::map<change_key_t, row_update>& updates, const user_type_impl& utype)
|
udt_visitor(column_id id, std::map<change_key_t, row_update>& updates, const user_type_impl& utype)
|
||||||
: extract_collection_visitor<udt_visitor>(id, updates), _utype(utype) {}
|
: extract_collection_visitor<udt_visitor>(id, updates)
|
||||||
|
, _utype(utype) {
|
||||||
|
}
|
||||||
|
|
||||||
data_type get_value_type(bytes_view key) {
|
data_type get_value_type(bytes_view key) {
|
||||||
return _utype.type(deserialize_field_index(key));
|
return _utype.type(deserialize_field_index(key));
|
||||||
@@ -279,11 +287,12 @@ struct extract_row_visitor {
|
|||||||
},
|
},
|
||||||
[&](const abstract_type& o) {
|
[&](const abstract_type& o) {
|
||||||
throw std::runtime_error(format("extract_changes: unknown collection type:", o.name()));
|
throw std::runtime_error(format("extract_changes: unknown collection type:", o.name()));
|
||||||
}
|
}));
|
||||||
));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr bool finished() const { return false; }
|
constexpr bool finished() const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
struct extract_changes_visitor {
|
struct extract_changes_visitor {
|
||||||
@@ -294,11 +303,7 @@ struct extract_changes_visitor {
|
|||||||
visit_row_cells(v);
|
visit_row_cells(v);
|
||||||
|
|
||||||
for (auto& [ts_ttl, row_update] : v._updates) {
|
for (auto& [ts_ttl, row_update] : v._updates) {
|
||||||
_result[ts_ttl.first].static_updates.push_back({
|
_result[ts_ttl.first].static_updates.push_back({ts_ttl.second, std::move(row_update.atomic_entries), std::move(row_update.nonatomic_entries)});
|
||||||
ts_ttl.second,
|
|
||||||
std::move(row_update.atomic_entries),
|
|
||||||
std::move(row_update.nonatomic_entries)
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -326,13 +331,7 @@ struct extract_changes_visitor {
|
|||||||
auto [ts, ttl] = ts_ttl;
|
auto [ts, ttl] = ts_ttl;
|
||||||
|
|
||||||
if (v._marker && ts == v._marker_ts && ttl == v._marker_ttl) {
|
if (v._marker && ts == v._marker_ts && ttl == v._marker_ttl) {
|
||||||
_result[ts].clustered_inserts.push_back({
|
_result[ts].clustered_inserts.push_back({ttl, ckey, *v._marker, std::move(row_update.atomic_entries), {}});
|
||||||
ttl,
|
|
||||||
ckey,
|
|
||||||
*v._marker,
|
|
||||||
std::move(row_update.atomic_entries),
|
|
||||||
{}
|
|
||||||
});
|
|
||||||
|
|
||||||
auto& cr_insert = _result[ts].clustered_inserts.back();
|
auto& cr_insert = _result[ts].clustered_inserts.back();
|
||||||
bool clustered_update_exists = false;
|
bool clustered_update_exists = false;
|
||||||
@@ -362,12 +361,7 @@ struct extract_changes_visitor {
|
|||||||
cr_insert.nonatomic_entries.push_back(std::move(nonatomic_up));
|
cr_insert.nonatomic_entries.push_back(std::move(nonatomic_up));
|
||||||
} else {
|
} else {
|
||||||
if (!clustered_update_exists) {
|
if (!clustered_update_exists) {
|
||||||
_result[ts].clustered_updates.push_back({
|
_result[ts].clustered_updates.push_back({ttl, ckey, {}, {}});
|
||||||
ttl,
|
|
||||||
ckey,
|
|
||||||
{},
|
|
||||||
{}
|
|
||||||
});
|
|
||||||
|
|
||||||
// Multiple iterations of this `for` loop (for different collection columns)
|
// Multiple iterations of this `for` loop (for different collection columns)
|
||||||
// might want to put their `nonatomic_up`s into an UPDATE change;
|
// might want to put their `nonatomic_up`s into an UPDATE change;
|
||||||
@@ -390,12 +384,7 @@ struct extract_changes_visitor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
_result[ts].clustered_updates.push_back({
|
_result[ts].clustered_updates.push_back({ttl, ckey, std::move(row_update.atomic_entries), std::move(row_update.nonatomic_entries)});
|
||||||
ttl,
|
|
||||||
ckey,
|
|
||||||
std::move(row_update.atomic_entries),
|
|
||||||
std::move(row_update.nonatomic_entries)
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -412,7 +401,9 @@ struct extract_changes_visitor {
|
|||||||
_result[t.timestamp].partition_deletions = partition_deletion{t};
|
_result[t.timestamp].partition_deletions = partition_deletion{t};
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr bool finished() const { return false; }
|
constexpr bool finished() const {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
set_of_changes extract_changes(const mutation& m) {
|
set_of_changes extract_changes(const mutation& m) {
|
||||||
@@ -426,13 +417,23 @@ namespace cdc {
|
|||||||
struct find_timestamp_visitor {
|
struct find_timestamp_visitor {
|
||||||
api::timestamp_type _ts = api::missing_timestamp;
|
api::timestamp_type _ts = api::missing_timestamp;
|
||||||
|
|
||||||
bool finished() const { return _ts != api::missing_timestamp; }
|
bool finished() const {
|
||||||
|
return _ts != api::missing_timestamp;
|
||||||
|
}
|
||||||
|
|
||||||
void visit(api::timestamp_type ts) { _ts = ts; }
|
void visit(api::timestamp_type ts) {
|
||||||
void visit(const atomic_cell_view& cell) { visit(cell.timestamp()); }
|
_ts = ts;
|
||||||
|
}
|
||||||
|
void visit(const atomic_cell_view& cell) {
|
||||||
|
visit(cell.timestamp());
|
||||||
|
}
|
||||||
|
|
||||||
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||||
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
visit(cell);
|
||||||
|
}
|
||||||
|
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||||
|
visit(cell);
|
||||||
|
}
|
||||||
void collection_tombstone(const tombstone& t) {
|
void collection_tombstone(const tombstone& t) {
|
||||||
// A collection tombstone with timestamp T can be created with:
|
// A collection tombstone with timestamp T can be created with:
|
||||||
// UPDATE ks.t USING TIMESTAMP T + 1 SET X = null WHERE ...
|
// UPDATE ks.t USING TIMESTAMP T + 1 SET X = null WHERE ...
|
||||||
@@ -441,15 +442,33 @@ struct find_timestamp_visitor {
|
|||||||
// with cdc$time using timestamp T + 1 instead of T.
|
// with cdc$time using timestamp T + 1 instead of T.
|
||||||
visit(t.timestamp + 1);
|
visit(t.timestamp + 1);
|
||||||
}
|
}
|
||||||
void live_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
|
void live_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||||
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
|
visit(cell);
|
||||||
void collection_column(const column_definition&, auto&& visit_collection) { visit_collection(*this); }
|
}
|
||||||
void marker(const row_marker& rm) { visit(rm.timestamp()); }
|
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||||
void static_row_cells(auto&& visit_row_cells) { visit_row_cells(*this); }
|
visit(cell);
|
||||||
void clustered_row_cells(const clustering_key&, auto&& visit_row_cells) { visit_row_cells(*this); }
|
}
|
||||||
void clustered_row_delete(const clustering_key&, const tombstone& t) { visit(t.timestamp); }
|
void collection_column(const column_definition&, auto&& visit_collection) {
|
||||||
void range_delete(const range_tombstone& t) { visit(t.tomb.timestamp); }
|
visit_collection(*this);
|
||||||
void partition_delete(const tombstone& t) { visit(t.timestamp); }
|
}
|
||||||
|
void marker(const row_marker& rm) {
|
||||||
|
visit(rm.timestamp());
|
||||||
|
}
|
||||||
|
void static_row_cells(auto&& visit_row_cells) {
|
||||||
|
visit_row_cells(*this);
|
||||||
|
}
|
||||||
|
void clustered_row_cells(const clustering_key&, auto&& visit_row_cells) {
|
||||||
|
visit_row_cells(*this);
|
||||||
|
}
|
||||||
|
void clustered_row_delete(const clustering_key&, const tombstone& t) {
|
||||||
|
visit(t.timestamp);
|
||||||
|
}
|
||||||
|
void range_delete(const range_tombstone& t) {
|
||||||
|
visit(t.tomb.timestamp);
|
||||||
|
}
|
||||||
|
void partition_delete(const tombstone& t) {
|
||||||
|
visit(t.timestamp);
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Find some timestamp inside the given mutation.
|
/* Find some timestamp inside the given mutation.
|
||||||
@@ -505,8 +524,12 @@ struct should_split_visitor {
|
|||||||
|
|
||||||
virtual ~should_split_visitor() = default;
|
virtual ~should_split_visitor() = default;
|
||||||
|
|
||||||
inline bool finished() const { return _result; }
|
inline bool finished() const {
|
||||||
inline void stop() { _result = true; }
|
return _result;
|
||||||
|
}
|
||||||
|
inline void stop() {
|
||||||
|
_result = true;
|
||||||
|
}
|
||||||
|
|
||||||
void visit(api::timestamp_type ts, gc_clock::duration ttl = gc_clock::duration(0)) {
|
void visit(api::timestamp_type ts, gc_clock::duration ttl = gc_clock::duration(0)) {
|
||||||
if (_ts != api::missing_timestamp && _ts != ts) {
|
if (_ts != api::missing_timestamp && _ts != ts) {
|
||||||
@@ -520,12 +543,20 @@ struct should_split_visitor {
|
|||||||
_ttl = {ttl};
|
_ttl = {ttl};
|
||||||
}
|
}
|
||||||
|
|
||||||
void visit(const atomic_cell_view& cell) { visit(cell.timestamp(), get_ttl(cell)); }
|
void visit(const atomic_cell_view& cell) {
|
||||||
|
visit(cell.timestamp(), get_ttl(cell));
|
||||||
|
}
|
||||||
|
|
||||||
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||||
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
visit(cell);
|
||||||
|
}
|
||||||
|
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||||
|
visit(cell);
|
||||||
|
}
|
||||||
|
|
||||||
void collection_tombstone(const tombstone& t) { visit(t.timestamp + 1); }
|
void collection_tombstone(const tombstone& t) {
|
||||||
|
visit(t.timestamp + 1);
|
||||||
|
}
|
||||||
|
|
||||||
virtual void live_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
virtual void live_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||||
if (_had_row_marker) {
|
if (_had_row_marker) {
|
||||||
@@ -534,8 +565,12 @@ struct should_split_visitor {
|
|||||||
}
|
}
|
||||||
visit(cell);
|
visit(cell);
|
||||||
}
|
}
|
||||||
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
|
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||||
void collection_column(const column_definition&, auto&& visit_collection) { visit_collection(*this); }
|
visit(cell);
|
||||||
|
}
|
||||||
|
void collection_column(const column_definition&, auto&& visit_collection) {
|
||||||
|
visit_collection(*this);
|
||||||
|
}
|
||||||
|
|
||||||
virtual void marker(const row_marker& rm) {
|
virtual void marker(const row_marker& rm) {
|
||||||
_had_row_marker = true;
|
_had_row_marker = true;
|
||||||
@@ -711,8 +746,8 @@ bool should_skip(batch& changes, const mutation& base_mutation, change_processor
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
void process_changes_with_splitting(const mutation& base_mutation, change_processor& processor,
|
void process_changes_with_splitting(
|
||||||
bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
const mutation& base_mutation, change_processor& processor, bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||||
const auto base_schema = base_mutation.schema();
|
const auto base_schema = base_mutation.schema();
|
||||||
auto changes = extract_changes(base_mutation);
|
auto changes = extract_changes(base_mutation);
|
||||||
auto pk = base_mutation.key();
|
auto pk = base_mutation.key();
|
||||||
@@ -824,8 +859,8 @@ void process_changes_with_splitting(const mutation& base_mutation, change_proces
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void process_changes_without_splitting(const mutation& base_mutation, change_processor& processor,
|
void process_changes_without_splitting(
|
||||||
bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
const mutation& base_mutation, change_processor& processor, bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||||
if (alternator_strict_compatibility) {
|
if (alternator_strict_compatibility) {
|
||||||
auto changes = extract_changes(base_mutation);
|
auto changes = extract_changes(base_mutation);
|
||||||
if (should_skip(changes.begin()->second, base_mutation, processor)) {
|
if (should_skip(changes.begin()->second, base_mutation, processor)) {
|
||||||
|
|||||||
@@ -946,7 +946,7 @@ sstables::shared_sstable sstables_task_executor::consume_sstable() {
|
|||||||
auto sst = _sstables.back();
|
auto sst = _sstables.back();
|
||||||
_sstables.pop_back();
|
_sstables.pop_back();
|
||||||
--_cm._stats.pending_tasks; // from this point on, switch_state(pending|active) works the same way as any other task
|
--_cm._stats.pending_tasks; // from this point on, switch_state(pending|active) works the same way as any other task
|
||||||
cmlog.debug("{}", format("consumed {}", sst->get_filename()));
|
cmlog.debug("consumed {}", sst->get_filename());
|
||||||
return sst;
|
return sst;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1208,7 +1208,6 @@ future<> compaction_manager::await_tasks(std::vector<shared_ptr<compaction_task_
|
|||||||
|
|
||||||
std::vector<shared_ptr<compaction_task_executor>>
|
std::vector<shared_ptr<compaction_task_executor>>
|
||||||
compaction_manager::do_stop_ongoing_compactions(sstring reason, std::function<bool(const compaction_group_view*)> filter, std::optional<compaction_type> type_opt) noexcept {
|
compaction_manager::do_stop_ongoing_compactions(sstring reason, std::function<bool(const compaction_group_view*)> filter, std::optional<compaction_type> type_opt) noexcept {
|
||||||
auto ongoing_compactions = get_compactions(filter).size();
|
|
||||||
auto tasks = _tasks
|
auto tasks = _tasks
|
||||||
| std::views::filter([&filter, type_opt] (const auto& task) {
|
| std::views::filter([&filter, type_opt] (const auto& task) {
|
||||||
return filter(task.compacting_table()) && (!type_opt || task.compaction_type() == *type_opt);
|
return filter(task.compacting_table()) && (!type_opt || task.compaction_type() == *type_opt);
|
||||||
@@ -1217,6 +1216,7 @@ compaction_manager::do_stop_ongoing_compactions(sstring reason, std::function<bo
|
|||||||
| std::ranges::to<std::vector<shared_ptr<compaction_task_executor>>>();
|
| std::ranges::to<std::vector<shared_ptr<compaction_task_executor>>>();
|
||||||
logging::log_level level = tasks.empty() ? log_level::debug : log_level::info;
|
logging::log_level level = tasks.empty() ? log_level::debug : log_level::info;
|
||||||
if (cmlog.is_enabled(level)) {
|
if (cmlog.is_enabled(level)) {
|
||||||
|
auto ongoing_compactions = get_compactions(filter).size();
|
||||||
std::string scope = "";
|
std::string scope = "";
|
||||||
if (!tasks.empty()) {
|
if (!tasks.empty()) {
|
||||||
const compaction_group_view* t = tasks.front()->compacting_table();
|
const compaction_group_view* t = tasks.front()->compacting_table();
|
||||||
@@ -1426,11 +1426,17 @@ protected:
|
|||||||
compaction_strategy cs = t.get_compaction_strategy();
|
compaction_strategy cs = t.get_compaction_strategy();
|
||||||
compaction_descriptor descriptor = co_await cs.get_sstables_for_compaction(t, _cm.get_strategy_control());
|
compaction_descriptor descriptor = co_await cs.get_sstables_for_compaction(t, _cm.get_strategy_control());
|
||||||
int weight = calculate_weight(descriptor);
|
int weight = calculate_weight(descriptor);
|
||||||
|
bool debug_enabled = cmlog.is_enabled(log_level::debug);
|
||||||
|
if (debug_enabled) {
|
||||||
cmlog.debug("Started minor compaction sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
cmlog.debug("Started minor compaction sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
||||||
descriptor.sstables, compacting_table()->get_sstables_repaired_at(),
|
descriptor.sstables, compacting_table()->get_sstables_repaired_at(),
|
||||||
compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
||||||
|
}
|
||||||
|
|
||||||
auto old_sstables = ::format("{}", descriptor.sstables);
|
sstring old_sstables;
|
||||||
|
if (debug_enabled) {
|
||||||
|
old_sstables = ::format("{}", descriptor.sstables);
|
||||||
|
}
|
||||||
|
|
||||||
if (descriptor.sstables.empty() || !can_proceed() || t.is_auto_compaction_disabled_by_user()) {
|
if (descriptor.sstables.empty() || !can_proceed() || t.is_auto_compaction_disabled_by_user()) {
|
||||||
cmlog.debug("{}: sstables={} can_proceed={} auto_compaction={}", *this, descriptor.sstables.size(), can_proceed(), t.is_auto_compaction_disabled_by_user());
|
cmlog.debug("{}: sstables={} can_proceed={} auto_compaction={}", *this, descriptor.sstables.size(), can_proceed(), t.is_auto_compaction_disabled_by_user());
|
||||||
@@ -1460,8 +1466,10 @@ protected:
|
|||||||
try {
|
try {
|
||||||
bool should_update_history = this->should_update_history(descriptor.options.type());
|
bool should_update_history = this->should_update_history(descriptor.options.type());
|
||||||
compaction_result res = co_await compact_sstables(std::move(descriptor), _compaction_data, on_replace);
|
compaction_result res = co_await compact_sstables(std::move(descriptor), _compaction_data, on_replace);
|
||||||
|
if (debug_enabled) {
|
||||||
cmlog.debug("Finished minor compaction old_sstables={} new_sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
cmlog.debug("Finished minor compaction old_sstables={} new_sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
||||||
old_sstables, res.new_sstables, compacting_table()->get_sstables_repaired_at(), compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
old_sstables, res.new_sstables, compacting_table()->get_sstables_repaired_at(), compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
||||||
|
}
|
||||||
finish_compaction();
|
finish_compaction();
|
||||||
if (should_update_history) {
|
if (should_update_history) {
|
||||||
// update_history can take a long time compared to
|
// update_history can take a long time compared to
|
||||||
|
|||||||
@@ -33,8 +33,10 @@ future<compaction_descriptor> leveled_compaction_strategy::get_sstables_for_comp
|
|||||||
auto candidate = manifest.get_compaction_candidates(*state->last_compacted_keys, state->compaction_counter);
|
auto candidate = manifest.get_compaction_candidates(*state->last_compacted_keys, state->compaction_counter);
|
||||||
|
|
||||||
if (!candidate.sstables.empty()) {
|
if (!candidate.sstables.empty()) {
|
||||||
|
if (leveled_manifest::logger.is_enabled(logging::log_level::debug)) {
|
||||||
auto main_set = co_await table_s.main_sstable_set();
|
auto main_set = co_await table_s.main_sstable_set();
|
||||||
leveled_manifest::logger.debug("leveled: Compacting {} out of {} sstables", candidate.sstables.size(), main_set->size());
|
leveled_manifest::logger.debug("leveled: Compacting {} out of {} sstables", candidate.sstables.size(), main_set->size());
|
||||||
|
}
|
||||||
co_return candidate;
|
co_return candidate;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -15,6 +15,7 @@
|
|||||||
#include "compaction_strategy_state.hh"
|
#include "compaction_strategy_state.hh"
|
||||||
#include "utils/error_injection.hh"
|
#include "utils/error_injection.hh"
|
||||||
|
|
||||||
|
#include <seastar/util/lazy.hh>
|
||||||
#include <ranges>
|
#include <ranges>
|
||||||
|
|
||||||
namespace compaction {
|
namespace compaction {
|
||||||
@@ -28,10 +29,10 @@ time_window_compaction_strategy_state_ptr time_window_compaction_strategy::get_s
|
|||||||
}
|
}
|
||||||
|
|
||||||
const std::unordered_map<sstring, std::chrono::seconds> time_window_compaction_strategy_options::valid_window_units = {
|
const std::unordered_map<sstring, std::chrono::seconds> time_window_compaction_strategy_options::valid_window_units = {
|
||||||
{ "MINUTES", 60s }, { "HOURS", 3600s }, { "DAYS", 86400s }
|
{"MINUTES", 60s}, {"HOURS", 3600s}, {"DAYS", 86400s}};
|
||||||
};
|
|
||||||
|
|
||||||
const std::unordered_map<sstring, time_window_compaction_strategy_options::timestamp_resolutions> time_window_compaction_strategy_options::valid_timestamp_resolutions = {
|
const std::unordered_map<sstring, time_window_compaction_strategy_options::timestamp_resolutions>
|
||||||
|
time_window_compaction_strategy_options::valid_timestamp_resolutions = {
|
||||||
{"MICROSECONDS", timestamp_resolutions::microsecond},
|
{"MICROSECONDS", timestamp_resolutions::microsecond},
|
||||||
{"MILLISECONDS", timestamp_resolutions::millisecond},
|
{"MILLISECONDS", timestamp_resolutions::millisecond},
|
||||||
};
|
};
|
||||||
@@ -43,7 +44,8 @@ static std::chrono::seconds validate_compaction_window_unit(const std::map<sstri
|
|||||||
if (tmp_value) {
|
if (tmp_value) {
|
||||||
auto valid_window_units_it = time_window_compaction_strategy_options::valid_window_units.find(tmp_value.value());
|
auto valid_window_units_it = time_window_compaction_strategy_options::valid_window_units.find(tmp_value.value());
|
||||||
if (valid_window_units_it == time_window_compaction_strategy_options::valid_window_units.end()) {
|
if (valid_window_units_it == time_window_compaction_strategy_options::valid_window_units.end()) {
|
||||||
throw exceptions::configuration_exception(fmt::format("Invalid window unit {} for {}", tmp_value.value(), time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY));
|
throw exceptions::configuration_exception(
|
||||||
|
fmt::format("Invalid window unit {} for {}", tmp_value.value(), time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY));
|
||||||
}
|
}
|
||||||
window_unit = valid_window_units_it->second;
|
window_unit = valid_window_units_it->second;
|
||||||
}
|
}
|
||||||
@@ -59,10 +61,12 @@ static std::chrono::seconds validate_compaction_window_unit(const std::map<sstri
|
|||||||
|
|
||||||
static int validate_compaction_window_size(const std::map<sstring, sstring>& options) {
|
static int validate_compaction_window_size(const std::map<sstring, sstring>& options) {
|
||||||
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY);
|
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY);
|
||||||
int window_size = cql3::statements::property_definitions::to_long(time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, tmp_value, time_window_compaction_strategy_options::DEFAULT_COMPACTION_WINDOW_SIZE);
|
int window_size = cql3::statements::property_definitions::to_long(time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, tmp_value,
|
||||||
|
time_window_compaction_strategy_options::DEFAULT_COMPACTION_WINDOW_SIZE);
|
||||||
|
|
||||||
if (window_size <= 0) {
|
if (window_size <= 0) {
|
||||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be greater than 1", time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, window_size));
|
throw exceptions::configuration_exception(
|
||||||
|
fmt::format("{} value ({}) must be greater than 1", time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, window_size));
|
||||||
}
|
}
|
||||||
|
|
||||||
return window_size;
|
return window_size;
|
||||||
@@ -82,26 +86,30 @@ static db_clock::duration validate_expired_sstable_check_frequency_seconds(const
|
|||||||
try {
|
try {
|
||||||
expired_sstable_check_frequency = std::chrono::seconds(std::stol(tmp_value.value()));
|
expired_sstable_check_frequency = std::chrono::seconds(std::stol(tmp_value.value()));
|
||||||
} catch (const std::exception& e) {
|
} catch (const std::exception& e) {
|
||||||
throw exceptions::syntax_exception(fmt::format("Invalid long value {} for {}", tmp_value.value(), time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY));
|
throw exceptions::syntax_exception(fmt::format(
|
||||||
|
"Invalid long value {} for {}", tmp_value.value(), time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return expired_sstable_check_frequency;
|
return expired_sstable_check_frequency;
|
||||||
}
|
}
|
||||||
|
|
||||||
static db_clock::duration validate_expired_sstable_check_frequency_seconds(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
static db_clock::duration validate_expired_sstable_check_frequency_seconds(
|
||||||
|
const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||||
db_clock::duration expired_sstable_check_frequency = validate_expired_sstable_check_frequency_seconds(options);
|
db_clock::duration expired_sstable_check_frequency = validate_expired_sstable_check_frequency_seconds(options);
|
||||||
unchecked_options.erase(time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY);
|
unchecked_options.erase(time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY);
|
||||||
return expired_sstable_check_frequency;
|
return expired_sstable_check_frequency;
|
||||||
}
|
}
|
||||||
|
|
||||||
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options) {
|
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options) {
|
||||||
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = time_window_compaction_strategy_options::timestamp_resolutions::microsecond;
|
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution =
|
||||||
|
time_window_compaction_strategy_options::timestamp_resolutions::microsecond;
|
||||||
|
|
||||||
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
|
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
|
||||||
if (tmp_value) {
|
if (tmp_value) {
|
||||||
if (!time_window_compaction_strategy_options::valid_timestamp_resolutions.contains(tmp_value.value())) {
|
if (!time_window_compaction_strategy_options::valid_timestamp_resolutions.contains(tmp_value.value())) {
|
||||||
throw exceptions::configuration_exception(fmt::format("Invalid timestamp resolution {} for {}", tmp_value.value(), time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY));
|
throw exceptions::configuration_exception(fmt::format(
|
||||||
|
"Invalid timestamp resolution {} for {}", tmp_value.value(), time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY));
|
||||||
} else {
|
} else {
|
||||||
timestamp_resolution = time_window_compaction_strategy_options::valid_timestamp_resolutions.at(tmp_value.value());
|
timestamp_resolution = time_window_compaction_strategy_options::valid_timestamp_resolutions.at(tmp_value.value());
|
||||||
}
|
}
|
||||||
@@ -110,7 +118,8 @@ static time_window_compaction_strategy_options::timestamp_resolutions validate_t
|
|||||||
return timestamp_resolution;
|
return timestamp_resolution;
|
||||||
}
|
}
|
||||||
|
|
||||||
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(
|
||||||
|
const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||||
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = validate_timestamp_resolution(options);
|
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = validate_timestamp_resolution(options);
|
||||||
unchecked_options.erase(time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
|
unchecked_options.erase(time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
|
||||||
return timestamp_resolution;
|
return timestamp_resolution;
|
||||||
@@ -162,7 +171,9 @@ class classify_by_timestamp {
|
|||||||
std::vector<int64_t> _known_windows;
|
std::vector<int64_t> _known_windows;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
explicit classify_by_timestamp(time_window_compaction_strategy_options options) : _options(std::move(options)) { }
|
explicit classify_by_timestamp(time_window_compaction_strategy_options options)
|
||||||
|
: _options(std::move(options)) {
|
||||||
|
}
|
||||||
int64_t operator()(api::timestamp_type ts) {
|
int64_t operator()(api::timestamp_type ts) {
|
||||||
const auto window = time_window_compaction_strategy::get_window_for(_options, ts);
|
const auto window = time_window_compaction_strategy::get_window_for(_options, ts);
|
||||||
if (const auto it = std::ranges::find(_known_windows, window); it != _known_windows.end()) {
|
if (const auto it = std::ranges::find(_known_windows, window); it != _known_windows.end()) {
|
||||||
@@ -210,21 +221,19 @@ uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutati
|
|||||||
return partition_estimate / std::max(1UL, uint64_t(estimated_window_count));
|
return partition_estimate / std::max(1UL, uint64_t(estimated_window_count));
|
||||||
}
|
}
|
||||||
|
|
||||||
mutation_reader_consumer time_window_compaction_strategy::make_interposer_consumer(const mutation_source_metadata& ms_meta, mutation_reader_consumer end_consumer) const {
|
mutation_reader_consumer time_window_compaction_strategy::make_interposer_consumer(
|
||||||
if (ms_meta.min_timestamp && ms_meta.max_timestamp
|
const mutation_source_metadata& ms_meta, mutation_reader_consumer end_consumer) const {
|
||||||
&& get_window_for(_options, *ms_meta.min_timestamp) == get_window_for(_options, *ms_meta.max_timestamp)) {
|
if (ms_meta.min_timestamp && ms_meta.max_timestamp &&
|
||||||
|
get_window_for(_options, *ms_meta.min_timestamp) == get_window_for(_options, *ms_meta.max_timestamp)) {
|
||||||
return end_consumer;
|
return end_consumer;
|
||||||
}
|
}
|
||||||
return [options = _options, end_consumer = std::move(end_consumer)](mutation_reader rd) mutable -> future<> {
|
return [options = _options, end_consumer = std::move(end_consumer)](mutation_reader rd) mutable -> future<> {
|
||||||
return mutation_writer::segregate_by_timestamp(
|
return mutation_writer::segregate_by_timestamp(std::move(rd), classify_by_timestamp(std::move(options)), end_consumer);
|
||||||
std::move(rd),
|
|
||||||
classify_by_timestamp(std::move(options)),
|
|
||||||
end_consumer);
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
compaction_descriptor
|
compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||||
time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||||
auto mode = cfg.mode;
|
auto mode = cfg.mode;
|
||||||
std::vector<sstables::shared_sstable> single_window;
|
std::vector<sstables::shared_sstable> single_window;
|
||||||
std::vector<sstables::shared_sstable> multi_window;
|
std::vector<sstables::shared_sstable> multi_window;
|
||||||
@@ -258,10 +267,14 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_
|
|||||||
return sstable_set_overlapping_count(schema, ssts) <= tolerance;
|
return sstable_set_overlapping_count(schema, ssts) <= tolerance;
|
||||||
};
|
};
|
||||||
|
|
||||||
clogger.debug("time_window_compaction_strategy::get_reshaping_job: offstrategy_threshold={} max_sstables={} multi_window={} disjoint={} single_window={} disjoint={}",
|
clogger.debug("time_window_compaction_strategy::get_reshaping_job: offstrategy_threshold={} max_sstables={} multi_window={} disjoint={} "
|
||||||
offstrategy_threshold, max_sstables,
|
"single_window={} disjoint={}",
|
||||||
multi_window.size(), !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0,
|
offstrategy_threshold, max_sstables, multi_window.size(), seastar::value_of([&] {
|
||||||
single_window.size(), !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0);
|
return !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0;
|
||||||
|
}),
|
||||||
|
single_window.size(), seastar::value_of([&] {
|
||||||
|
return !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0;
|
||||||
|
}));
|
||||||
|
|
||||||
auto get_job_size = [](const std::vector<sstables::shared_sstable>& ssts) {
|
auto get_job_size = [](const std::vector<sstables::shared_sstable>& ssts) {
|
||||||
return std::ranges::fold_left(ssts | std::views::transform(std::mem_fn(&sstables::sstable::bytes_on_disk)), uint64_t(0), std::plus{});
|
return std::ranges::fold_left(ssts | std::views::transform(std::mem_fn(&sstables::sstable::bytes_on_disk)), uint64_t(0), std::plus{});
|
||||||
@@ -273,8 +286,7 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_
|
|||||||
const size_t min_sstables = 2;
|
const size_t min_sstables = 2;
|
||||||
auto is_above_target_size = job_size > target_job_size;
|
auto is_above_target_size = job_size > target_job_size;
|
||||||
|
|
||||||
return (ssts.size() > max_sstables && !is_disjoint) ||
|
return (ssts.size() > max_sstables && !is_disjoint) || (ssts.size() > min_sstables && is_above_target_size);
|
||||||
(ssts.size() > min_sstables && is_above_target_size);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
auto maybe_trim_job = [&need_trimming](std::vector<sstables::shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
|
auto maybe_trim_job = [&need_trimming](std::vector<sstables::shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
|
||||||
@@ -334,8 +346,7 @@ time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_
|
|||||||
return compaction_descriptor();
|
return compaction_descriptor();
|
||||||
}
|
}
|
||||||
|
|
||||||
future<compaction_descriptor>
|
future<compaction_descriptor> time_window_compaction_strategy::get_sstables_for_compaction(compaction_group_view& table_s, strategy_control& control) {
|
||||||
time_window_compaction_strategy::get_sstables_for_compaction(compaction_group_view& table_s, strategy_control& control) {
|
|
||||||
auto state = get_state(table_s);
|
auto state = get_state(table_s);
|
||||||
auto compaction_time = gc_clock::now();
|
auto compaction_time = gc_clock::now();
|
||||||
auto candidates = co_await control.candidates(table_s);
|
auto candidates = co_await control.candidates(table_s);
|
||||||
@@ -369,10 +380,8 @@ time_window_compaction_strategy::get_sstables_for_compaction(compaction_group_vi
|
|||||||
co_return compaction_descriptor(std::move(compaction_candidates));
|
co_return compaction_descriptor(std::move(compaction_candidates));
|
||||||
}
|
}
|
||||||
|
|
||||||
time_window_compaction_strategy::bucket_compaction_mode
|
time_window_compaction_strategy::bucket_compaction_mode time_window_compaction_strategy::compaction_mode(
|
||||||
time_window_compaction_strategy::compaction_mode(const time_window_compaction_strategy_state& state,
|
const time_window_compaction_strategy_state& state, const bucket_t& bucket, timestamp_type bucket_key, timestamp_type now, size_t min_threshold) const {
|
||||||
const bucket_t& bucket, timestamp_type bucket_key,
|
|
||||||
timestamp_type now, size_t min_threshold) const {
|
|
||||||
// STCS will also be performed on older window buckets, to avoid a bad write and
|
// STCS will also be performed on older window buckets, to avoid a bad write and
|
||||||
// space amplification when something like read repair cause small updates to
|
// space amplification when something like read repair cause small updates to
|
||||||
// those past windows.
|
// those past windows.
|
||||||
@@ -385,8 +394,7 @@ time_window_compaction_strategy::compaction_mode(const time_window_compaction_st
|
|||||||
return bucket_compaction_mode::none;
|
return bucket_compaction_mode::none;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<sstables::shared_sstable>
|
std::vector<sstables::shared_sstable> time_window_compaction_strategy::get_next_non_expired_sstables(compaction_group_view& table_s, strategy_control& control,
|
||||||
time_window_compaction_strategy::get_next_non_expired_sstables(compaction_group_view& table_s, strategy_control& control,
|
|
||||||
std::vector<sstables::shared_sstable> non_expiring_sstables, gc_clock::time_point compaction_time, time_window_compaction_strategy_state& state) {
|
std::vector<sstables::shared_sstable> non_expiring_sstables, gc_clock::time_point compaction_time, time_window_compaction_strategy_state& state) {
|
||||||
auto most_interesting = get_compaction_candidates(table_s, control, non_expiring_sstables, state);
|
auto most_interesting = get_compaction_candidates(table_s, control, non_expiring_sstables, state);
|
||||||
|
|
||||||
@@ -412,8 +420,7 @@ time_window_compaction_strategy::get_next_non_expired_sstables(compaction_group_
|
|||||||
return {*it};
|
return {*it};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<sstables::shared_sstable>
|
std::vector<sstables::shared_sstable> time_window_compaction_strategy::get_compaction_candidates(compaction_group_view& table_s, strategy_control& control,
|
||||||
time_window_compaction_strategy::get_compaction_candidates(compaction_group_view& table_s, strategy_control& control,
|
|
||||||
std::vector<sstables::shared_sstable> candidate_sstables, time_window_compaction_strategy_state& state) {
|
std::vector<sstables::shared_sstable> candidate_sstables, time_window_compaction_strategy_state& state) {
|
||||||
auto [buckets, max_timestamp] = get_buckets(std::move(candidate_sstables), _options);
|
auto [buckets, max_timestamp] = get_buckets(std::move(candidate_sstables), _options);
|
||||||
// Update the highest window seen, if necessary
|
// Update the highest window seen, if necessary
|
||||||
@@ -423,8 +430,7 @@ time_window_compaction_strategy::get_compaction_candidates(compaction_group_view
|
|||||||
state.highest_window_seen, state);
|
state.highest_window_seen, state);
|
||||||
}
|
}
|
||||||
|
|
||||||
timestamp_type
|
timestamp_type time_window_compaction_strategy::get_window_lower_bound(std::chrono::seconds sstable_window_size, timestamp_type timestamp) {
|
||||||
time_window_compaction_strategy::get_window_lower_bound(std::chrono::seconds sstable_window_size, timestamp_type timestamp) {
|
|
||||||
using namespace std::chrono;
|
using namespace std::chrono;
|
||||||
// mask out window size from timestamp to get lower bound of its window
|
// mask out window size from timestamp to get lower bound of its window
|
||||||
auto num_windows = microseconds(timestamp) / sstable_window_size;
|
auto num_windows = microseconds(timestamp) / sstable_window_size;
|
||||||
@@ -432,8 +438,8 @@ time_window_compaction_strategy::get_window_lower_bound(std::chrono::seconds sst
|
|||||||
return duration_cast<microseconds>(num_windows * sstable_window_size).count();
|
return duration_cast<microseconds>(num_windows * sstable_window_size).count();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::map<timestamp_type, std::vector<sstables::shared_sstable>>, timestamp_type>
|
std::pair<std::map<timestamp_type, std::vector<sstables::shared_sstable>>, timestamp_type> time_window_compaction_strategy::get_buckets(
|
||||||
time_window_compaction_strategy::get_buckets(std::vector<sstables::shared_sstable> files, const time_window_compaction_strategy_options& options) {
|
std::vector<sstables::shared_sstable> files, const time_window_compaction_strategy_options& options) {
|
||||||
std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets;
|
std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets;
|
||||||
|
|
||||||
timestamp_type max_timestamp = 0;
|
timestamp_type max_timestamp = 0;
|
||||||
@@ -450,11 +456,13 @@ time_window_compaction_strategy::get_buckets(std::vector<sstables::shared_sstabl
|
|||||||
return std::make_pair(std::move(buckets), max_timestamp);
|
return std::make_pair(std::move(buckets), max_timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
} // namespace compaction
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct fmt::formatter<std::map<compaction::timestamp_type, std::vector<sstables::shared_sstable>>> {
|
struct fmt::formatter<std::map<compaction::timestamp_type, std::vector<sstables::shared_sstable>>> {
|
||||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
auto format(const std::map<compaction::timestamp_type, std::vector<sstables::shared_sstable>>& buckets, fmt::format_context& ctx) const {
|
auto format(const std::map<compaction::timestamp_type, std::vector<sstables::shared_sstable>>& buckets, fmt::format_context& ctx) const {
|
||||||
auto out = fmt::format_to(ctx.out(), " buckets = {{\n");
|
auto out = fmt::format_to(ctx.out(), " buckets = {{\n");
|
||||||
for (auto& [timestamp, sstables] : buckets | std::views::reverse) {
|
for (auto& [timestamp, sstables] : buckets | std::views::reverse) {
|
||||||
@@ -466,9 +474,9 @@ struct fmt::formatter<std::map<compaction::timestamp_type, std::vector<sstables:
|
|||||||
|
|
||||||
namespace compaction {
|
namespace compaction {
|
||||||
|
|
||||||
std::vector<sstables::shared_sstable>
|
std::vector<sstables::shared_sstable> time_window_compaction_strategy::newest_bucket(compaction_group_view& table_s, strategy_control& control,
|
||||||
time_window_compaction_strategy::newest_bucket(compaction_group_view& table_s, strategy_control& control, std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets,
|
std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets, int min_threshold, int max_threshold, timestamp_type now,
|
||||||
int min_threshold, int max_threshold, timestamp_type now, time_window_compaction_strategy_state& state) {
|
time_window_compaction_strategy_state& state) {
|
||||||
clogger.debug("time_window_compaction_strategy::newest_bucket:\n now {}\n{}", now, buckets);
|
clogger.debug("time_window_compaction_strategy::newest_bucket:\n now {}\n{}", now, buckets);
|
||||||
|
|
||||||
for (auto&& [key, bucket] : buckets | std::views::reverse) {
|
for (auto&& [key, bucket] : buckets | std::views::reverse) {
|
||||||
@@ -509,8 +517,7 @@ time_window_compaction_strategy::newest_bucket(compaction_group_view& table_s, s
|
|||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<sstables::shared_sstable>
|
std::vector<sstables::shared_sstable> time_window_compaction_strategy::trim_to_threshold(std::vector<sstables::shared_sstable> bucket, int max_threshold) {
|
||||||
time_window_compaction_strategy::trim_to_threshold(std::vector<sstables::shared_sstable> bucket, int max_threshold) {
|
|
||||||
auto n = std::min(bucket.size(), size_t(max_threshold));
|
auto n = std::min(bucket.size(), size_t(max_threshold));
|
||||||
// Trim the largest sstables off the end to meet the maxThreshold
|
// Trim the largest sstables off the end to meet the maxThreshold
|
||||||
std::ranges::partial_sort(bucket, bucket.begin() + n, std::ranges::less(), std::mem_fn(&sstables::sstable::ondisk_data_size));
|
std::ranges::partial_sort(bucket, bucket.begin() + n, std::ranges::less(), std::mem_fn(&sstables::sstable::ondisk_data_size));
|
||||||
@@ -542,8 +549,8 @@ future<int64_t> time_window_compaction_strategy::estimated_pending_compactions(c
|
|||||||
co_return n;
|
co_return n;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<compaction_descriptor>
|
std::vector<compaction_descriptor> time_window_compaction_strategy::get_cleanup_compaction_jobs(
|
||||||
time_window_compaction_strategy::get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const {
|
compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const {
|
||||||
std::vector<compaction_descriptor> ret;
|
std::vector<compaction_descriptor> ret;
|
||||||
for (auto&& [_, sstables] : get_buckets(std::move(candidates), _options).first) {
|
for (auto&& [_, sstables] : get_buckets(std::move(candidates), _options).first) {
|
||||||
auto per_window_jobs = size_tiered_compaction_strategy(_stcs_options).get_cleanup_compaction_jobs(table_s, std::move(sstables));
|
auto per_window_jobs = size_tiered_compaction_strategy(_stcs_options).get_cleanup_compaction_jobs(table_s, std::move(sstables));
|
||||||
@@ -556,4 +563,4 @@ std::unique_ptr<sstables::sstable_set_impl> time_window_compaction_strategy::mak
|
|||||||
return std::make_unique<sstables::time_series_sstable_set>(ts.schema(), _options.enable_optimized_twcs_queries);
|
return std::make_unique<sstables::time_series_sstable_set>(ts.schema(), _options.enable_optimized_twcs_queries);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
} // namespace compaction
|
||||||
|
|||||||
@@ -48,13 +48,15 @@ const sstring query_processor::CQL_VERSION = "3.3.1";
|
|||||||
const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono::minutes(60);
|
const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono::minutes(60);
|
||||||
|
|
||||||
struct query_processor::remote {
|
struct query_processor::remote {
|
||||||
remote(service::migration_manager& mm, service::mapreduce_service& fwd,
|
remote(service::migration_manager& mm, service::mapreduce_service& fwd, service::storage_service& ss, service::raft_group0_client& group0_client,
|
||||||
service::storage_service& ss, service::raft_group0_client& group0_client,
|
|
||||||
service::strong_consistency::coordinator& _sc_coordinator)
|
service::strong_consistency::coordinator& _sc_coordinator)
|
||||||
: mm(mm), mapreducer(fwd), ss(ss), group0_client(group0_client)
|
: mm(mm)
|
||||||
|
, mapreducer(fwd)
|
||||||
|
, ss(ss)
|
||||||
|
, group0_client(group0_client)
|
||||||
, sc_coordinator(_sc_coordinator)
|
, sc_coordinator(_sc_coordinator)
|
||||||
, gate("query_processor::remote")
|
, gate("query_processor::remote") {
|
||||||
{}
|
}
|
||||||
|
|
||||||
service::migration_manager& mm;
|
service::migration_manager& mm;
|
||||||
service::mapreduce_service& mapreducer;
|
service::mapreduce_service& mapreducer;
|
||||||
@@ -77,7 +79,9 @@ static service::query_state query_state_for_internal_call() {
|
|||||||
return {service::client_state::for_internal_calls(), empty_service_permit()};
|
return {service::client_state::for_internal_calls(), empty_service_permit()};
|
||||||
}
|
}
|
||||||
|
|
||||||
query_processor::query_processor(service::storage_proxy& proxy, data_dictionary::database db, service::migration_notifier& mn, vector_search::vector_store_client& vsc, query_processor::memory_config mcfg, cql_config& cql_cfg, utils::loading_cache_config auth_prep_cache_cfg, lang::manager& langm)
|
query_processor::query_processor(service::storage_proxy& proxy, data_dictionary::database db, service::migration_notifier& mn,
|
||||||
|
vector_search::vector_store_client& vsc, query_processor::memory_config mcfg, cql_config& cql_cfg, utils::loading_cache_config auth_prep_cache_cfg,
|
||||||
|
lang::manager& langm)
|
||||||
: _migration_subscriber{std::make_unique<migration_subscriber>(this)}
|
: _migration_subscriber{std::make_unique<migration_subscriber>(this)}
|
||||||
, _proxy(proxy)
|
, _proxy(proxy)
|
||||||
, _db(db)
|
, _db(db)
|
||||||
@@ -87,14 +91,22 @@ query_processor::query_processor(service::storage_proxy& proxy, data_dictionary:
|
|||||||
, _cql_config(cql_cfg)
|
, _cql_config(cql_cfg)
|
||||||
, _prepared_cache(prep_cache_log, _mcfg.prepared_statment_cache_size)
|
, _prepared_cache(prep_cache_log, _mcfg.prepared_statment_cache_size)
|
||||||
, _authorized_prepared_cache(std::move(auth_prep_cache_cfg), authorized_prepared_statements_cache_log)
|
, _authorized_prepared_cache(std::move(auth_prep_cache_cfg), authorized_prepared_statements_cache_log)
|
||||||
, _auth_prepared_cache_cfg_cb([this] (uint32_t) { (void) _authorized_prepared_cache_config_action.trigger_later(); })
|
, _auth_prepared_cache_cfg_cb([this](uint32_t) {
|
||||||
, _authorized_prepared_cache_config_action([this] { update_authorized_prepared_cache_config(); return make_ready_future<>(); })
|
(void)_authorized_prepared_cache_config_action.trigger_later();
|
||||||
|
})
|
||||||
|
, _authorized_prepared_cache_config_action([this] {
|
||||||
|
update_authorized_prepared_cache_config();
|
||||||
|
return make_ready_future<>();
|
||||||
|
})
|
||||||
, _authorized_prepared_cache_update_interval_in_ms_observer(_db.get_config().permissions_update_interval_in_ms.observe(_auth_prepared_cache_cfg_cb))
|
, _authorized_prepared_cache_update_interval_in_ms_observer(_db.get_config().permissions_update_interval_in_ms.observe(_auth_prepared_cache_cfg_cb))
|
||||||
, _authorized_prepared_cache_validity_in_ms_observer(_db.get_config().permissions_validity_in_ms.observe(_auth_prepared_cache_cfg_cb))
|
, _authorized_prepared_cache_validity_in_ms_observer(_db.get_config().permissions_validity_in_ms.observe(_auth_prepared_cache_cfg_cb))
|
||||||
, _lang_manager(langm)
|
, _lang_manager(langm)
|
||||||
, _write_consistency_levels_warned_observer(_db.get_config().write_consistency_levels_warned.observe([this](const auto& v) { _write_consistency_levels_warned = to_consistency_level_set(v); }))
|
, _write_consistency_levels_warned_observer(_db.get_config().write_consistency_levels_warned.observe([this](const auto& v) {
|
||||||
, _write_consistency_levels_disallowed_observer(_db.get_config().write_consistency_levels_disallowed.observe([this](const auto& v) { _write_consistency_levels_disallowed = to_consistency_level_set(v); }))
|
_write_consistency_levels_warned = to_consistency_level_set(v);
|
||||||
{
|
}))
|
||||||
|
, _write_consistency_levels_disallowed_observer(_db.get_config().write_consistency_levels_disallowed.observe([this](const auto& v) {
|
||||||
|
_write_consistency_levels_disallowed = to_consistency_level_set(v);
|
||||||
|
})) {
|
||||||
_write_consistency_levels_warned = to_consistency_level_set(_db.get_config().write_consistency_levels_warned());
|
_write_consistency_levels_warned = to_consistency_level_set(_db.get_config().write_consistency_levels_warned());
|
||||||
_write_consistency_levels_disallowed = to_consistency_level_set(_db.get_config().write_consistency_levels_disallowed());
|
_write_consistency_levels_disallowed = to_consistency_level_set(_db.get_config().write_consistency_levels_disallowed());
|
||||||
namespace sm = seastar::metrics;
|
namespace sm = seastar::metrics;
|
||||||
@@ -110,17 +122,11 @@ query_processor::query_processor(service::storage_proxy& proxy, data_dictionary:
|
|||||||
const auto system_ks_label_instance = ks_label("system");
|
const auto system_ks_label_instance = ks_label("system");
|
||||||
|
|
||||||
std::vector<sm::metric_definition> qp_group;
|
std::vector<sm::metric_definition> qp_group;
|
||||||
qp_group.push_back(sm::make_counter(
|
qp_group.push_back(sm::make_counter("statements_prepared", _stats.prepare_invocations, sm::description("Counts the total number of parsed CQL requests.")));
|
||||||
"statements_prepared",
|
|
||||||
_stats.prepare_invocations,
|
|
||||||
sm::description("Counts the total number of parsed CQL requests.")));
|
|
||||||
for (auto cl = size_t(clevel::MIN_VALUE); cl <= size_t(clevel::MAX_VALUE); ++cl) {
|
for (auto cl = size_t(clevel::MIN_VALUE); cl <= size_t(clevel::MAX_VALUE); ++cl) {
|
||||||
qp_group.push_back(
|
qp_group.push_back(sm::make_counter(
|
||||||
sm::make_counter(
|
"queries", _stats.queries_by_cl[cl], sm::description("Counts queries by consistency level."), {cl_label(clevel(cl)), basic_level})
|
||||||
"queries",
|
.set_skip_when_empty());
|
||||||
_stats.queries_by_cl[cl],
|
|
||||||
sm::description("Counts queries by consistency level."),
|
|
||||||
{cl_label(clevel(cl)), basic_level}).set_skip_when_empty());
|
|
||||||
}
|
}
|
||||||
_metrics.add_group("query_processor", qp_group);
|
_metrics.add_group("query_processor", qp_group);
|
||||||
|
|
||||||
@@ -521,25 +527,19 @@ query_processor::query_processor(service::storage_proxy& proxy, data_dictionary:
|
|||||||
|
|
||||||
std::vector<sm::metric_definition> cql_cl_group;
|
std::vector<sm::metric_definition> cql_cl_group;
|
||||||
for (auto cl = size_t(clevel::MIN_VALUE); cl <= size_t(clevel::MAX_VALUE); ++cl) {
|
for (auto cl = size_t(clevel::MIN_VALUE); cl <= size_t(clevel::MAX_VALUE); ++cl) {
|
||||||
cql_cl_group.push_back(
|
cql_cl_group.push_back(sm::make_counter("writes_per_consistency_level", _cql_stats.writes_per_consistency_level[cl],
|
||||||
sm::make_counter(
|
sm::description("Counts the number of writes for each consistency level."), {cl_label(clevel(cl)), basic_level})
|
||||||
"writes_per_consistency_level",
|
.set_skip_when_empty());
|
||||||
_cql_stats.writes_per_consistency_level[cl],
|
|
||||||
sm::description("Counts the number of writes for each consistency level."),
|
|
||||||
{cl_label(clevel(cl)), basic_level}).set_skip_when_empty());
|
|
||||||
}
|
}
|
||||||
_metrics.add_group("cql", cql_cl_group);
|
_metrics.add_group("cql", cql_cl_group);
|
||||||
|
|
||||||
_metrics.add_group("cql", {
|
_metrics.add_group(
|
||||||
sm::make_counter(
|
"cql", {
|
||||||
"write_consistency_levels_disallowed_violations",
|
sm::make_counter("write_consistency_levels_disallowed_violations", _cql_stats.write_consistency_levels_disallowed_violations,
|
||||||
_cql_stats.write_consistency_levels_disallowed_violations,
|
|
||||||
sm::description("Counts the number of write_consistency_levels_disallowed guardrail violations, "
|
sm::description("Counts the number of write_consistency_levels_disallowed guardrail violations, "
|
||||||
"i.e. attempts to write with a forbidden consistency level."),
|
"i.e. attempts to write with a forbidden consistency level."),
|
||||||
{basic_level}),
|
{basic_level}),
|
||||||
sm::make_counter(
|
sm::make_counter("write_consistency_levels_warned_violations", _cql_stats.write_consistency_levels_warned_violations,
|
||||||
"write_consistency_levels_warned_violations",
|
|
||||||
_cql_stats.write_consistency_levels_warned_violations,
|
|
||||||
sm::description("Counts the number of write_consistency_levels_warned guardrail violations, "
|
sm::description("Counts the number of write_consistency_levels_warned guardrail violations, "
|
||||||
"i.e. attempts to write with a discouraged consistency level."),
|
"i.e. attempts to write with a discouraged consistency level."),
|
||||||
{basic_level}),
|
{basic_level}),
|
||||||
@@ -554,15 +554,13 @@ query_processor::~query_processor() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
|
std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder> query_processor::acquire_strongly_consistent_coordinator() {
|
||||||
query_processor::acquire_strongly_consistent_coordinator() {
|
|
||||||
auto [remote_, holder] = remote();
|
auto [remote_, holder] = remote();
|
||||||
return {remote_.get().sc_coordinator, std::move(holder)};
|
return {remote_.get().sc_coordinator, std::move(holder)};
|
||||||
}
|
}
|
||||||
|
|
||||||
void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer,
|
void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer, service::storage_service& ss,
|
||||||
service::storage_service& ss, service::raft_group0_client& group0_client,
|
service::raft_group0_client& group0_client, service::strong_consistency::coordinator& sc_coordinator) {
|
||||||
service::strong_consistency::coordinator& sc_coordinator) {
|
|
||||||
_remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client, sc_coordinator);
|
_remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client, sc_coordinator);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -582,7 +580,9 @@ future<> query_processor::stop() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<cql_transport::messages::result_message>> query_processor::execute_with_guard(
|
future<::shared_ptr<cql_transport::messages::result_message>> query_processor::execute_with_guard(
|
||||||
std::function<future<::shared_ptr<cql_transport::messages::result_message>>(service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>)> fn,
|
std::function<future<::shared_ptr<cql_transport::messages::result_message>>(
|
||||||
|
service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>)>
|
||||||
|
fn,
|
||||||
::shared_ptr<cql_statement> statement, service::query_state& query_state, const query_options& options) {
|
::shared_ptr<cql_statement> statement, service::query_state& query_state, const query_options& options) {
|
||||||
// execute all statements that need group0 guard on shard0
|
// execute all statements that need group0 guard on shard0
|
||||||
if (this_shard_id() != 0) {
|
if (this_shard_id() != 0) {
|
||||||
@@ -596,8 +596,8 @@ future<::shared_ptr<cql_transport::messages::result_message>> query_processor::e
|
|||||||
auto guard = co_await remote_.get().mm.start_group0_operation();
|
auto guard = co_await remote_.get().mm.start_group0_operation();
|
||||||
co_return co_await fn(query_state, statement, options, std::move(guard));
|
co_return co_await fn(query_state, statement, options, std::move(guard));
|
||||||
} catch (const service::group0_concurrent_modification& ex) {
|
} catch (const service::group0_concurrent_modification& ex) {
|
||||||
log.warn("Failed to execute statement \"{}\" due to guard conflict.{}.",
|
log.warn("Failed to execute statement \"{}\" due to guard conflict.{}.", statement->raw_cql_statement,
|
||||||
statement->raw_cql_statement, retries ? " Retrying" : " Number of retries exceeded, giving up");
|
retries ? " Retrying" : " Number of retries exceeded, giving up");
|
||||||
if (retries--) {
|
if (retries--) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -607,28 +607,29 @@ future<::shared_ptr<cql_transport::messages::result_message>> query_processor::e
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename... Args>
|
template <typename... Args>
|
||||||
future<::shared_ptr<result_message>>
|
future<::shared_ptr<result_message>> query_processor::execute_maybe_with_guard(service::query_state& query_state, ::shared_ptr<cql_statement> statement,
|
||||||
query_processor::execute_maybe_with_guard(service::query_state& query_state, ::shared_ptr<cql_statement> statement, const query_options& options,
|
const query_options& options,
|
||||||
future<::shared_ptr<result_message>>(query_processor::*fn)(service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>, Args...), Args... args) {
|
future<::shared_ptr<result_message>> (query_processor::*fn)(
|
||||||
|
service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>, Args...),
|
||||||
|
Args... args) {
|
||||||
if (!statement->needs_guard(*this, query_state)) {
|
if (!statement->needs_guard(*this, query_state)) {
|
||||||
return (this->*fn)(query_state, std::move(statement), options, std::nullopt, std::forward<Args>(args)...);
|
return (this->*fn)(query_state, std::move(statement), options, std::nullopt, std::forward<Args>(args)...);
|
||||||
}
|
}
|
||||||
static auto exec = [fn] (query_processor& qp, Args... args, service::query_state& query_state, ::shared_ptr<cql_statement> statement, const query_options& options, std::optional<service::group0_guard> guard) {
|
static auto exec = [fn](query_processor& qp, Args... args, service::query_state& query_state, ::shared_ptr<cql_statement> statement,
|
||||||
|
const query_options& options, std::optional<service::group0_guard> guard) {
|
||||||
return (qp.*fn)(query_state, std::move(statement), options, std::move(guard), std::forward<Args>(args)...);
|
return (qp.*fn)(query_state, std::move(statement), options, std::move(guard), std::forward<Args>(args)...);
|
||||||
};
|
};
|
||||||
return execute_with_guard(std::bind_front(exec, std::ref(*this), std::forward<Args>(args)...), std::move(statement), query_state, options);
|
return execute_with_guard(std::bind_front(exec, std::ref(*this), std::forward<Args>(args)...), std::move(statement), query_state, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<result_message>>
|
future<::shared_ptr<result_message>> query_processor::execute_direct_without_checking_exception_message(
|
||||||
query_processor::execute_direct_without_checking_exception_message(const std::string_view& query_string, service::query_state& query_state, dialect d, query_options& options) {
|
const std::string_view& query_string, service::query_state& query_state, dialect d, query_options& options) {
|
||||||
log.trace("execute_direct: \"{}\"", query_string);
|
log.trace("execute_direct: \"{}\"", query_string);
|
||||||
tracing::trace(query_state.get_trace_state(), "Parsing a statement");
|
tracing::trace(query_state.get_trace_state(), "Parsing a statement");
|
||||||
auto p = get_statement(query_string, query_state.get_client_state(), d);
|
auto p = get_statement(query_string, query_state.get_client_state(), d);
|
||||||
auto statement = p->statement;
|
auto statement = p->statement;
|
||||||
if (statement->get_bound_terms() != options.get_values_count()) {
|
if (statement->get_bound_terms() != options.get_values_count()) {
|
||||||
const auto msg = format("Invalid amount of bind variables: expected {:d} received {:d}",
|
const auto msg = format("Invalid amount of bind variables: expected {:d} received {:d}", statement->get_bound_terms(), options.get_values_count());
|
||||||
statement->get_bound_terms(),
|
|
||||||
options.get_values_count());
|
|
||||||
throw exceptions::invalid_request_exception(msg);
|
throw exceptions::invalid_request_exception(msg);
|
||||||
}
|
}
|
||||||
options.prepare(p->bound_names);
|
options.prepare(p->bound_names);
|
||||||
@@ -639,17 +640,13 @@ query_processor::execute_direct_without_checking_exception_message(const std::st
|
|||||||
metrics.regularStatementsExecuted.inc();
|
metrics.regularStatementsExecuted.inc();
|
||||||
#endif
|
#endif
|
||||||
auto user = query_state.get_client_state().user();
|
auto user = query_state.get_client_state().user();
|
||||||
tracing::trace(query_state.get_trace_state(), "Processing a statement for authenticated user: {}", user ? (user->name ? *user->name : "anonymous") : "no user authenticated");
|
tracing::trace(query_state.get_trace_state(), "Processing a statement for authenticated user: {}",
|
||||||
|
user ? (user->name ? *user->name : "anonymous") : "no user authenticated");
|
||||||
return execute_maybe_with_guard(query_state, std::move(statement), options, &query_processor::do_execute_direct, std::move(p->warnings));
|
return execute_maybe_with_guard(query_state, std::move(statement), options, &query_processor::do_execute_direct, std::move(p->warnings));
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<result_message>>
|
future<::shared_ptr<result_message>> query_processor::do_execute_direct(service::query_state& query_state, shared_ptr<cql_statement> statement,
|
||||||
query_processor::do_execute_direct(
|
const query_options& options, std::optional<service::group0_guard> guard, cql3::cql_warnings_vec warnings) {
|
||||||
service::query_state& query_state,
|
|
||||||
shared_ptr<cql_statement> statement,
|
|
||||||
const query_options& options,
|
|
||||||
std::optional<service::group0_guard> guard,
|
|
||||||
cql3::cql_warnings_vec warnings) {
|
|
||||||
auto access_future = co_await coroutine::as_future(statement->check_access(*this, query_state.get_client_state()));
|
auto access_future = co_await coroutine::as_future(statement->check_access(*this, query_state.get_client_state()));
|
||||||
if (access_future.failed()) {
|
if (access_future.failed()) {
|
||||||
co_await audit::inspect(statement, query_state, options, true);
|
co_await audit::inspect(statement, query_state, options, true);
|
||||||
@@ -674,26 +671,16 @@ query_processor::do_execute_direct(
|
|||||||
co_return std::move(m);
|
co_return std::move(m);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<result_message>>
|
future<::shared_ptr<result_message>> query_processor::execute_prepared_without_checking_exception_message(service::query_state& query_state,
|
||||||
query_processor::execute_prepared_without_checking_exception_message(
|
shared_ptr<cql_statement> statement, const query_options& options, statements::prepared_statement::checked_weak_ptr prepared,
|
||||||
service::query_state& query_state,
|
cql3::prepared_cache_key_type cache_key, bool needs_authorization) {
|
||||||
shared_ptr<cql_statement> statement,
|
return execute_maybe_with_guard(
|
||||||
const query_options& options,
|
query_state, std::move(statement), options, &query_processor::do_execute_prepared, std::move(prepared), std::move(cache_key), needs_authorization);
|
||||||
statements::prepared_statement::checked_weak_ptr prepared,
|
|
||||||
cql3::prepared_cache_key_type cache_key,
|
|
||||||
bool needs_authorization) {
|
|
||||||
return execute_maybe_with_guard(query_state, std::move(statement), options, &query_processor::do_execute_prepared, std::move(prepared), std::move(cache_key), needs_authorization);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<result_message>>
|
future<::shared_ptr<result_message>> query_processor::do_execute_prepared(service::query_state& query_state, shared_ptr<cql_statement> statement,
|
||||||
query_processor::do_execute_prepared(
|
const query_options& options, std::optional<service::group0_guard> guard, statements::prepared_statement::checked_weak_ptr prepared,
|
||||||
service::query_state& query_state,
|
cql3::prepared_cache_key_type cache_key, bool needs_authorization) {
|
||||||
shared_ptr<cql_statement> statement,
|
|
||||||
const query_options& options,
|
|
||||||
std::optional<service::group0_guard> guard,
|
|
||||||
statements::prepared_statement::checked_weak_ptr prepared,
|
|
||||||
cql3::prepared_cache_key_type cache_key,
|
|
||||||
bool needs_authorization) {
|
|
||||||
if (needs_authorization) {
|
if (needs_authorization) {
|
||||||
co_await statement->check_access(*this, query_state.get_client_state());
|
co_await statement->check_access(*this, query_state.get_client_state());
|
||||||
try {
|
try {
|
||||||
@@ -707,8 +694,8 @@ query_processor::do_execute_prepared(
|
|||||||
co_return co_await process_authorized_statement(std::move(statement), query_state, options, std::move(guard));
|
co_return co_await process_authorized_statement(std::move(statement), query_state, options, std::move(guard));
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<result_message>>
|
future<::shared_ptr<result_message>> query_processor::process_authorized_statement(const ::shared_ptr<cql_statement> statement,
|
||||||
query_processor::process_authorized_statement(const ::shared_ptr<cql_statement> statement, service::query_state& query_state, const query_options& options, std::optional<service::group0_guard> guard) {
|
service::query_state& query_state, const query_options& options, std::optional<service::group0_guard> guard) {
|
||||||
auto& client_state = query_state.get_client_state();
|
auto& client_state = query_state.get_client_state();
|
||||||
|
|
||||||
++_stats.queries_by_cl[size_t(options.get_consistency())];
|
++_stats.queries_by_cl[size_t(options.get_consistency())];
|
||||||
@@ -723,14 +710,14 @@ query_processor::process_authorized_statement(const ::shared_ptr<cql_statement>
|
|||||||
co_return ::make_shared<result_message::void_message>();
|
co_return ::make_shared<result_message::void_message>();
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
future<::shared_ptr<cql_transport::messages::result_message::prepared>> query_processor::prepare(
|
||||||
query_processor::prepare(sstring query_string, service::query_state& query_state, cql3::dialect d) {
|
sstring query_string, service::query_state& query_state, cql3::dialect d) {
|
||||||
auto& client_state = query_state.get_client_state();
|
auto& client_state = query_state.get_client_state();
|
||||||
return prepare(std::move(query_string), client_state, d);
|
return prepare(std::move(query_string), client_state, d);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
future<::shared_ptr<cql_transport::messages::result_message::prepared>> query_processor::prepare(
|
||||||
query_processor::prepare(sstring query_string, const service::client_state& client_state, cql3::dialect d) {
|
sstring query_string, const service::client_state& client_state, cql3::dialect d) {
|
||||||
try {
|
try {
|
||||||
auto key = compute_id(query_string, client_state.get_raw_keyspace(), d);
|
auto key = compute_id(query_string, client_state.get_raw_keyspace(), d);
|
||||||
auto prep_entry = co_await _prepared_cache.get_pinned(key, [this, &query_string, &client_state, d] {
|
auto prep_entry = co_await _prepared_cache.get_pinned(key, [this, &query_string, &client_state, d] {
|
||||||
@@ -739,17 +726,13 @@ query_processor::prepare(sstring query_string, const service::client_state& clie
|
|||||||
auto bound_terms = prepared->statement->get_bound_terms();
|
auto bound_terms = prepared->statement->get_bound_terms();
|
||||||
if (bound_terms > std::numeric_limits<uint16_t>::max()) {
|
if (bound_terms > std::numeric_limits<uint16_t>::max()) {
|
||||||
throw exceptions::invalid_request_exception(
|
throw exceptions::invalid_request_exception(
|
||||||
format("Too many markers(?). {:d} markers exceed the allowed maximum of {:d}",
|
format("Too many markers(?). {:d} markers exceed the allowed maximum of {:d}", bound_terms, std::numeric_limits<uint16_t>::max()));
|
||||||
bound_terms,
|
|
||||||
std::numeric_limits<uint16_t>::max()));
|
|
||||||
}
|
}
|
||||||
throwing_assert(bound_terms == prepared->bound_names.size());
|
throwing_assert(bound_terms == prepared->bound_names.size());
|
||||||
return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
|
return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
|
||||||
});
|
});
|
||||||
|
|
||||||
co_await utils::get_local_injector().inject(
|
co_await utils::get_local_injector().inject("query_processor_prepare_wait_after_cache_get", utils::wait_for_message(std::chrono::seconds(60)));
|
||||||
"query_processor_prepare_wait_after_cache_get",
|
|
||||||
utils::wait_for_message(std::chrono::seconds(60)));
|
|
||||||
|
|
||||||
auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_entry),
|
auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_entry),
|
||||||
client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK));
|
client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK));
|
||||||
@@ -765,15 +748,11 @@ static std::string hash_target(std::string_view query_string, std::string_view k
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
prepared_cache_key_type query_processor::compute_id(
|
prepared_cache_key_type query_processor::compute_id(std::string_view query_string, std::string_view keyspace, dialect d) {
|
||||||
std::string_view query_string,
|
|
||||||
std::string_view keyspace,
|
|
||||||
dialect d) {
|
|
||||||
return prepared_cache_key_type(md5_hasher::calculate(hash_target(query_string, keyspace)), d);
|
return prepared_cache_key_type(md5_hasher::calculate(hash_target(query_string, keyspace)), d);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<prepared_statement>
|
std::unique_ptr<prepared_statement> query_processor::get_statement(const std::string_view& query, const service::client_state& client_state, dialect d) {
|
||||||
query_processor::get_statement(const std::string_view& query, const service::client_state& client_state, dialect d) {
|
|
||||||
// Measuring allocation cost requires that no yield points exist
|
// Measuring allocation cost requires that no yield points exist
|
||||||
// between bytes_before and bytes_after. It needs fixing if this
|
// between bytes_before and bytes_after. It needs fixing if this
|
||||||
// function is ever futurized.
|
// function is ever futurized.
|
||||||
@@ -798,8 +777,7 @@ query_processor::get_statement(const std::string_view& query, const service::cli
|
|||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<raw::parsed_statement>
|
std::unique_ptr<raw::parsed_statement> query_processor::parse_statement(const std::string_view& query, dialect d) {
|
||||||
query_processor::parse_statement(const std::string_view& query, dialect d) {
|
|
||||||
try {
|
try {
|
||||||
{
|
{
|
||||||
const char* error_injection_key = "query_processor-parse_statement-test_failure";
|
const char* error_injection_key = "query_processor-parse_statement-test_failure";
|
||||||
@@ -824,8 +802,7 @@ query_processor::parse_statement(const std::string_view& query, dialect d) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::unique_ptr<raw::parsed_statement>>
|
std::vector<std::unique_ptr<raw::parsed_statement>> query_processor::parse_statements(std::string_view queries, dialect d) {
|
||||||
query_processor::parse_statements(std::string_view queries, dialect d) {
|
|
||||||
try {
|
try {
|
||||||
auto statements = util::do_with_parser(queries, d, std::mem_fn(&cql3_parser::CqlParser::queries));
|
auto statements = util::do_with_parser(queries, d, std::mem_fn(&cql3_parser::CqlParser::queries));
|
||||||
if (statements.empty()) {
|
if (statements.empty()) {
|
||||||
@@ -854,15 +831,10 @@ std::pair<std::reference_wrapper<struct query_processor::remote>, gate::holder>
|
|||||||
on_internal_error(log, "attempted to perform distributed query when `query_processor::remote` is unavailable");
|
on_internal_error(log, "attempted to perform distributed query when `query_processor::remote` is unavailable");
|
||||||
}
|
}
|
||||||
|
|
||||||
query_options query_processor::make_internal_options(
|
query_options query_processor::make_internal_options(const statements::prepared_statement::checked_weak_ptr& p, const std::vector<data_value_or_unset>& values,
|
||||||
const statements::prepared_statement::checked_weak_ptr& p,
|
db::consistency_level cl, int32_t page_size, service::node_local_only node_local_only) const {
|
||||||
const std::vector<data_value_or_unset>& values,
|
|
||||||
db::consistency_level cl,
|
|
||||||
int32_t page_size,
|
|
||||||
service::node_local_only node_local_only) const {
|
|
||||||
if (p->bound_names.size() != values.size()) {
|
if (p->bound_names.size() != values.size()) {
|
||||||
throw std::invalid_argument(
|
throw std::invalid_argument(format("Invalid number of values. Expecting {:d} but got {:d}", p->bound_names.size(), values.size()));
|
||||||
format("Invalid number of values. Expecting {:d} but got {:d}", p->bound_names.size(), values.size()));
|
|
||||||
}
|
}
|
||||||
auto ni = p->bound_names.begin();
|
auto ni = p->bound_names.begin();
|
||||||
raw_value_vector_with_unset bound_values;
|
raw_value_vector_with_unset bound_values;
|
||||||
@@ -870,8 +842,7 @@ query_options query_processor::make_internal_options(
|
|||||||
bound_values.unset.resize(values.size());
|
bound_values.unset.resize(values.size());
|
||||||
for (auto& var : values) {
|
for (auto& var : values) {
|
||||||
auto& n = *ni;
|
auto& n = *ni;
|
||||||
std::visit(overloaded_functor {
|
std::visit(overloaded_functor{[&](const data_value& v) {
|
||||||
[&] (const data_value& v) {
|
|
||||||
if (v.type() == bytes_type) {
|
if (v.type() == bytes_type) {
|
||||||
bound_values.values.emplace_back(cql3::raw_value::make_value(value_cast<bytes>(v)));
|
bound_values.values.emplace_back(cql3::raw_value::make_value(value_cast<bytes>(v)));
|
||||||
} else if (v.is_null()) {
|
} else if (v.is_null()) {
|
||||||
@@ -879,23 +850,20 @@ query_options query_processor::make_internal_options(
|
|||||||
} else {
|
} else {
|
||||||
bound_values.values.emplace_back(cql3::raw_value::make_value(n->type->decompose(v)));
|
bound_values.values.emplace_back(cql3::raw_value::make_value(n->type->decompose(v)));
|
||||||
}
|
}
|
||||||
}, [&] (const unset_value&) {
|
},
|
||||||
|
[&](const unset_value&) {
|
||||||
bound_values.values.emplace_back(cql3::raw_value::make_null());
|
bound_values.values.emplace_back(cql3::raw_value::make_null());
|
||||||
bound_values.unset[std::distance(p->bound_names.begin(), ni)] = true;
|
bound_values.unset[std::distance(p->bound_names.begin(), ni)] = true;
|
||||||
}
|
}},
|
||||||
}, var);
|
var);
|
||||||
++ni;
|
++ni;
|
||||||
}
|
}
|
||||||
return query_options(
|
return query_options(cl, std::move(bound_values),
|
||||||
cl,
|
cql3::query_options::specific_options{.page_size = page_size,
|
||||||
std::move(bound_values),
|
|
||||||
cql3::query_options::specific_options {
|
|
||||||
.page_size = page_size,
|
|
||||||
.state = {},
|
.state = {},
|
||||||
.serial_consistency = db::consistency_level::SERIAL,
|
.serial_consistency = db::consistency_level::SERIAL,
|
||||||
.timestamp = api::missing_timestamp,
|
.timestamp = api::missing_timestamp,
|
||||||
.node_local_only = node_local_only
|
.node_local_only = node_local_only});
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) {
|
statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) {
|
||||||
@@ -917,11 +885,7 @@ struct internal_query_state {
|
|||||||
};
|
};
|
||||||
|
|
||||||
internal_query_state query_processor::create_paged_state(
|
internal_query_state query_processor::create_paged_state(
|
||||||
const sstring& query_string,
|
const sstring& query_string, db::consistency_level cl, const data_value_list& values, int32_t page_size, std::optional<service::query_state> qs) {
|
||||||
db::consistency_level cl,
|
|
||||||
const data_value_list& values,
|
|
||||||
int32_t page_size,
|
|
||||||
std::optional<service::query_state> qs) {
|
|
||||||
auto p = prepare_internal(query_string);
|
auto p = prepare_internal(query_string);
|
||||||
auto opts = make_internal_options(p, values, cl, page_size);
|
auto opts = make_internal_options(p, values, cl, page_size);
|
||||||
if (!qs) {
|
if (!qs) {
|
||||||
@@ -935,8 +899,7 @@ bool query_processor::has_more_results(cql3::internal_query_state& state) const
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<> query_processor::for_each_cql_result(
|
future<> query_processor::for_each_cql_result(
|
||||||
cql3::internal_query_state& state,
|
cql3::internal_query_state& state, noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)> f) {
|
||||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)> f) {
|
|
||||||
do {
|
do {
|
||||||
auto msg = co_await execute_paged_internal(state);
|
auto msg = co_await execute_paged_internal(state);
|
||||||
for (auto& row : *msg) {
|
for (auto& row : *msg) {
|
||||||
@@ -947,17 +910,18 @@ future<> query_processor::for_each_cql_result(
|
|||||||
} while (has_more_results(state));
|
} while (has_more_results(state));
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<untyped_result_set>>
|
future<::shared_ptr<untyped_result_set>> query_processor::execute_paged_internal(internal_query_state& state) {
|
||||||
query_processor::execute_paged_internal(internal_query_state& state) {
|
|
||||||
state.p->statement->validate(*this, service::client_state::for_internal_calls());
|
state.p->statement->validate(*this, service::client_state::for_internal_calls());
|
||||||
::shared_ptr<cql_transport::messages::result_message> msg =
|
::shared_ptr<cql_transport::messages::result_message> msg = co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);
|
||||||
co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);
|
|
||||||
|
|
||||||
class visitor : public result_message::visitor_base {
|
class visitor : public result_message::visitor_base {
|
||||||
internal_query_state& _state;
|
internal_query_state& _state;
|
||||||
query_processor& _qp;
|
query_processor& _qp;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
visitor(internal_query_state& state, query_processor& qp) : _state(state), _qp(qp) {
|
visitor(internal_query_state& state, query_processor& qp)
|
||||||
|
: _state(state)
|
||||||
|
, _qp(qp) {
|
||||||
}
|
}
|
||||||
virtual ~visitor() = default;
|
virtual ~visitor() = default;
|
||||||
void visit(const result_message::rows& rmrs) override {
|
void visit(const result_message::rows& rmrs) override {
|
||||||
@@ -986,23 +950,14 @@ query_processor::execute_paged_internal(internal_query_state& state) {
|
|||||||
co_return ::make_shared<untyped_result_set>(msg);
|
co_return ::make_shared<untyped_result_set>(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<untyped_result_set>>
|
future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
|
||||||
query_processor::execute_internal(
|
const sstring& query_string, db::consistency_level cl, const data_value_list& values, cache_internal cache) {
|
||||||
const sstring& query_string,
|
|
||||||
db::consistency_level cl,
|
|
||||||
const data_value_list& values,
|
|
||||||
cache_internal cache) {
|
|
||||||
auto qs = query_state_for_internal_call();
|
auto qs = query_state_for_internal_call();
|
||||||
co_return co_await execute_internal(query_string, cl, qs, values, cache);
|
co_return co_await execute_internal(query_string, cl, qs, values, cache);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<untyped_result_set>>
|
future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
|
||||||
query_processor::execute_internal(
|
const sstring& query_string, db::consistency_level cl, service::query_state& query_state, const data_value_list& values, cache_internal cache) {
|
||||||
const sstring& query_string,
|
|
||||||
db::consistency_level cl,
|
|
||||||
service::query_state& query_state,
|
|
||||||
const data_value_list& values,
|
|
||||||
cache_internal cache) {
|
|
||||||
|
|
||||||
if (log.is_enabled(logging::log_level::trace)) {
|
if (log.is_enabled(logging::log_level::trace)) {
|
||||||
log.trace("execute_internal: {}\"{}\" ({})", cache ? "(cached) " : "", query_string, fmt::join(values, ", "));
|
log.trace("execute_internal: {}\"{}\" ({})", cache ? "(cached) " : "", query_string, fmt::join(values, ", "));
|
||||||
@@ -1020,10 +975,7 @@ query_processor::execute_internal(
|
|||||||
}
|
}
|
||||||
|
|
||||||
future<utils::chunked_vector<mutation>> query_processor::get_mutations_internal(
|
future<utils::chunked_vector<mutation>> query_processor::get_mutations_internal(
|
||||||
const sstring query_string,
|
const sstring query_string, service::query_state& query_state, api::timestamp_type timestamp, std::vector<data_value_or_unset> values) {
|
||||||
service::query_state& query_state,
|
|
||||||
api::timestamp_type timestamp,
|
|
||||||
std::vector<data_value_or_unset> values) {
|
|
||||||
log.debug("get_mutations_internal: \"{}\" ({})", query_string, fmt::join(values, ", "));
|
log.debug("get_mutations_internal: \"{}\" ({})", query_string, fmt::join(values, ", "));
|
||||||
auto stmt = prepare_internal(query_string);
|
auto stmt = prepare_internal(query_string);
|
||||||
auto mod_stmt = dynamic_pointer_cast<cql3::statements::modification_statement>(stmt->statement);
|
auto mod_stmt = dynamic_pointer_cast<cql3::statements::modification_statement>(stmt->statement);
|
||||||
@@ -1041,12 +993,8 @@ future<utils::chunked_vector<mutation>> query_processor::get_mutations_internal(
|
|||||||
co_return co_await mod_stmt->get_mutations(*this, opts, timeout, true, timestamp, query_state, json_cache, std::move(keys));
|
co_return co_await mod_stmt->get_mutations(*this, opts, timeout, true, timestamp, query_state, json_cache, std::move(keys));
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<untyped_result_set>>
|
future<::shared_ptr<untyped_result_set>> query_processor::execute_with_params(
|
||||||
query_processor::execute_with_params(
|
statements::prepared_statement::checked_weak_ptr p, db::consistency_level cl, service::query_state& query_state, const data_value_list& values) {
|
||||||
statements::prepared_statement::checked_weak_ptr p,
|
|
||||||
db::consistency_level cl,
|
|
||||||
service::query_state& query_state,
|
|
||||||
const data_value_list& values) {
|
|
||||||
auto opts = make_internal_options(p, values, cl);
|
auto opts = make_internal_options(p, values, cl);
|
||||||
auto statement = p->statement;
|
auto statement = p->statement;
|
||||||
|
|
||||||
@@ -1054,21 +1002,15 @@ query_processor::execute_with_params(
|
|||||||
co_return ::make_shared<untyped_result_set>(msg);
|
co_return ::make_shared<untyped_result_set>(msg);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<result_message>>
|
future<::shared_ptr<result_message>> query_processor::do_execute_with_params(
|
||||||
query_processor::do_execute_with_params(
|
service::query_state& query_state, shared_ptr<cql_statement> statement, const query_options& options, std::optional<service::group0_guard> guard) {
|
||||||
service::query_state& query_state,
|
|
||||||
shared_ptr<cql_statement> statement,
|
|
||||||
const query_options& options, std::optional<service::group0_guard> guard) {
|
|
||||||
statement->validate(*this, service::client_state::for_internal_calls());
|
statement->validate(*this, service::client_state::for_internal_calls());
|
||||||
co_return co_await coroutine::try_future(statement->execute(*this, query_state, options, std::move(guard)));
|
co_return co_await coroutine::try_future(statement->execute(*this, query_state, options, std::move(guard)));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
future<::shared_ptr<cql_transport::messages::result_message>>
|
future<::shared_ptr<cql_transport::messages::result_message>> query_processor::execute_batch_without_checking_exception_message(
|
||||||
query_processor::execute_batch_without_checking_exception_message(
|
::shared_ptr<statements::batch_statement> batch, service::query_state& query_state, query_options& options,
|
||||||
::shared_ptr<statements::batch_statement> batch,
|
|
||||||
service::query_state& query_state,
|
|
||||||
query_options& options,
|
|
||||||
std::unordered_map<prepared_cache_key_type, authorized_prepared_statements_cache::value_type> pending_authorization_entries) {
|
std::unordered_map<prepared_cache_key_type, authorized_prepared_statements_cache::value_type> pending_authorization_entries) {
|
||||||
auto access_future = co_await coroutine::as_future(batch->check_access(*this, query_state.get_client_state()));
|
auto access_future = co_await coroutine::as_future(batch->check_access(*this, query_state.get_client_state()));
|
||||||
co_await coroutine::parallel_for_each(pending_authorization_entries, [this, &query_state](auto& e) -> future<> {
|
co_await coroutine::parallel_for_each(pending_authorization_entries, [this, &query_state](auto& e) -> future<> {
|
||||||
@@ -1096,20 +1038,18 @@ query_processor::execute_batch_without_checking_exception_message(
|
|||||||
co_return co_await batch->execute(*this, query_state, options, std::nullopt);
|
co_return co_await batch->execute(*this, query_state, options, std::nullopt);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<service::broadcast_tables::query_result>
|
future<service::broadcast_tables::query_result> query_processor::execute_broadcast_table_query(const service::broadcast_tables::query& query) {
|
||||||
query_processor::execute_broadcast_table_query(const service::broadcast_tables::query& query) {
|
|
||||||
auto [remote_, holder] = remote();
|
auto [remote_, holder] = remote();
|
||||||
co_return co_await service::broadcast_tables::execute(remote_.get().group0_client, query);
|
co_return co_await service::broadcast_tables::execute(remote_.get().group0_client, query);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<query::mapreduce_result>
|
future<query::mapreduce_result> query_processor::mapreduce(query::mapreduce_request req, tracing::trace_state_ptr tr_state) {
|
||||||
query_processor::mapreduce(query::mapreduce_request req, tracing::trace_state_ptr tr_state) {
|
|
||||||
auto [remote_, holder] = remote();
|
auto [remote_, holder] = remote();
|
||||||
co_return co_await remote_.get().mapreducer.dispatch(std::move(req), std::move(tr_state));
|
co_return co_await remote_.get().mapreducer.dispatch(std::move(req), std::move(tr_state));
|
||||||
}
|
}
|
||||||
|
|
||||||
future<::shared_ptr<messages::result_message>>
|
future<::shared_ptr<messages::result_message>> query_processor::execute_schema_statement(
|
||||||
query_processor::execute_schema_statement(const statements::schema_altering_statement& stmt, service::query_state& state, const query_options& options, service::group0_batch& mc) {
|
const statements::schema_altering_statement& stmt, service::query_state& state, const query_options& options, service::group0_batch& mc) {
|
||||||
if (this_shard_id() != 0) {
|
if (this_shard_id() != 0) {
|
||||||
on_internal_error(log, "DDL must be executed on shard 0");
|
on_internal_error(log, "DDL must be executed on shard 0");
|
||||||
}
|
}
|
||||||
@@ -1163,7 +1103,8 @@ future<> query_processor::announce_schema_statement(const statements::schema_alt
|
|||||||
co_await remote_.get().mm.announce(std::move(m), std::move(guard), description);
|
co_await remote_.get().mm.announce(std::move(m), std::move(guard), description);
|
||||||
}
|
}
|
||||||
|
|
||||||
query_processor::migration_subscriber::migration_subscriber(query_processor* qp) : _qp{qp} {
|
query_processor::migration_subscriber::migration_subscriber(query_processor* qp)
|
||||||
|
: _qp{qp} {
|
||||||
}
|
}
|
||||||
|
|
||||||
void query_processor::migration_subscriber::on_create_keyspace(const sstring& ks_name) {
|
void query_processor::migration_subscriber::on_create_keyspace(const sstring& ks_name) {
|
||||||
@@ -1189,10 +1130,7 @@ void query_processor::migration_subscriber::on_create_view(const sstring& ks_nam
|
|||||||
void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks_name) {
|
void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks_name) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void query_processor::migration_subscriber::on_update_column_family(
|
void query_processor::migration_subscriber::on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) {
|
||||||
const sstring& ks_name,
|
|
||||||
const sstring& cf_name,
|
|
||||||
bool columns_changed) {
|
|
||||||
// #1255: Ignoring columns_changed deliberately.
|
// #1255: Ignoring columns_changed deliberately.
|
||||||
log.info("Column definitions for {}.{} changed, invalidating related prepared statements", ks_name, cf_name);
|
log.info("Column definitions for {}.{} changed, invalidating related prepared statements", ks_name, cf_name);
|
||||||
remove_invalid_prepared_statements(ks_name, cf_name);
|
remove_invalid_prepared_statements(ks_name, cf_name);
|
||||||
@@ -1207,9 +1145,7 @@ void query_processor::migration_subscriber::on_update_function(const sstring& ks
|
|||||||
void query_processor::migration_subscriber::on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) {
|
void query_processor::migration_subscriber::on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void query_processor::migration_subscriber::on_update_view(
|
void query_processor::migration_subscriber::on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) {
|
||||||
const sstring& ks_name,
|
|
||||||
const sstring& view_name, bool columns_changed) {
|
|
||||||
// scylladb/scylladb#16392 - Materialized views are also tables so we need at least handle
|
// scylladb/scylladb#16392 - Materialized views are also tables so we need at least handle
|
||||||
// them as such when changed.
|
// them as such when changed.
|
||||||
on_update_column_family(ks_name, view_name, columns_changed);
|
on_update_column_family(ks_name, view_name, columns_changed);
|
||||||
@@ -1238,39 +1174,28 @@ void query_processor::migration_subscriber::on_drop_view(const sstring& ks_name,
|
|||||||
remove_invalid_prepared_statements(ks_name, view_name);
|
remove_invalid_prepared_statements(ks_name, view_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
void query_processor::migration_subscriber::remove_invalid_prepared_statements(
|
void query_processor::migration_subscriber::remove_invalid_prepared_statements(sstring ks_name, std::optional<sstring> cf_name) {
|
||||||
sstring ks_name,
|
|
||||||
std::optional<sstring> cf_name) {
|
|
||||||
_qp->_prepared_cache.remove_if([&](::shared_ptr<cql_statement> stmt) {
|
_qp->_prepared_cache.remove_if([&](::shared_ptr<cql_statement> stmt) {
|
||||||
return this->should_invalidate(ks_name, cf_name, stmt);
|
return this->should_invalidate(ks_name, cf_name, stmt);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool query_processor::migration_subscriber::should_invalidate(
|
bool query_processor::migration_subscriber::should_invalidate(sstring ks_name, std::optional<sstring> cf_name, ::shared_ptr<cql_statement> statement) {
|
||||||
sstring ks_name,
|
|
||||||
std::optional<sstring> cf_name,
|
|
||||||
::shared_ptr<cql_statement> statement) {
|
|
||||||
return statement->depends_on(ks_name, cf_name);
|
return statement->depends_on(ks_name, cf_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> query_processor::query_internal(
|
future<> query_processor::query_internal(const sstring& query_string, db::consistency_level cl, const data_value_list& values, int32_t page_size,
|
||||||
const sstring& query_string,
|
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f, std::optional<service::query_state> qs) {
|
||||||
db::consistency_level cl,
|
|
||||||
const data_value_list& values,
|
|
||||||
int32_t page_size,
|
|
||||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
|
|
||||||
std::optional<service::query_state> qs) {
|
|
||||||
auto query_state = create_paged_state(query_string, cl, values, page_size, std::move(qs));
|
auto query_state = create_paged_state(query_string, cl, values, page_size, std::move(qs));
|
||||||
co_return co_await for_each_cql_result(query_state, std::move(f));
|
co_return co_await for_each_cql_result(query_state, std::move(f));
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> query_processor::query_internal(
|
future<> query_processor::query_internal(const sstring& query_string, noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
|
||||||
const sstring& query_string,
|
|
||||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
|
|
||||||
return query_internal(query_string, db::consistency_level::ONE, {}, 1000, std::move(f));
|
return query_internal(query_string, db::consistency_level::ONE, {}, 1000, std::move(f));
|
||||||
}
|
}
|
||||||
|
|
||||||
shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_shard(unsigned shard, cql3::computed_function_values cached_fn_calls, bool track) {
|
shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_shard(
|
||||||
|
unsigned shard, cql3::computed_function_values cached_fn_calls, bool track) {
|
||||||
if (track) {
|
if (track) {
|
||||||
_proxy.get_stats().replica_cross_shard_ops++;
|
_proxy.get_stats().replica_cross_shard_ops++;
|
||||||
}
|
}
|
||||||
@@ -1278,7 +1203,8 @@ shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_s
|
|||||||
return ::make_shared<cql_transport::messages::result_message::bounce>(my_host_id, shard, std::move(cached_fn_calls));
|
return ::make_shared<cql_transport::messages::result_message::bounce>(my_host_id, shard, std::move(cached_fn_calls));
|
||||||
}
|
}
|
||||||
|
|
||||||
shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_node(locator::tablet_replica replica, cql3::computed_function_values cached_fn_calls, seastar::lowres_clock::time_point timeout, bool is_write) {
|
shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_node(
|
||||||
|
locator::tablet_replica replica, cql3::computed_function_values cached_fn_calls, seastar::lowres_clock::time_point timeout, bool is_write) {
|
||||||
get_cql_stats().forwarded_requests++;
|
get_cql_stats().forwarded_requests++;
|
||||||
return ::make_shared<cql_transport::messages::result_message::bounce>(replica.host, replica.shard, std::move(cached_fn_calls), timeout, is_write);
|
return ::make_shared<cql_transport::messages::result_message::bounce>(replica.host, replica.shard, std::move(cached_fn_calls), timeout, is_write);
|
||||||
}
|
}
|
||||||
@@ -1307,4 +1233,4 @@ void query_processor::reset_cache() {
|
|||||||
_authorized_prepared_cache.reset();
|
_authorized_prepared_cache.reset();
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
} // namespace cql3
|
||||||
|
|||||||
@@ -63,15 +63,14 @@ namespace db {
|
|||||||
|
|
||||||
namespace schema_tables {
|
namespace schema_tables {
|
||||||
|
|
||||||
static constexpr std::initializer_list<table_kind> all_table_kinds = {
|
static constexpr std::initializer_list<table_kind> all_table_kinds = {table_kind::table, table_kind::view};
|
||||||
table_kind::table,
|
|
||||||
table_kind::view
|
|
||||||
};
|
|
||||||
|
|
||||||
static schema_ptr get_table_holder(table_kind k) {
|
static schema_ptr get_table_holder(table_kind k) {
|
||||||
switch (k) {
|
switch (k) {
|
||||||
case table_kind::table: return tables();
|
case table_kind::table:
|
||||||
case table_kind::view: return views();
|
return tables();
|
||||||
|
case table_kind::view:
|
||||||
|
return views();
|
||||||
}
|
}
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
@@ -94,12 +93,15 @@ void table_selector::add(sstring name) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
} // namespace schema_tables
|
||||||
|
|
||||||
}
|
} // namespace db
|
||||||
|
|
||||||
template <> struct fmt::formatter<db::schema_tables::table_kind> {
|
template <>
|
||||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
struct fmt::formatter<db::schema_tables::table_kind> {
|
||||||
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
auto format(db::schema_tables::table_kind k, fmt::format_context& ctx) const {
|
auto format(db::schema_tables::table_kind k, fmt::format_context& ctx) const {
|
||||||
switch (k) {
|
switch (k) {
|
||||||
using enum db::schema_tables::table_kind;
|
using enum db::schema_tables::table_kind;
|
||||||
@@ -125,11 +127,8 @@ static std::optional<table_id> table_id_from_mutations(const schema_mutations& s
|
|||||||
return table_id(table_row.get_nonnull<utils::UUID>("id"));
|
return table_id(table_row.get_nonnull<utils::UUID>("id"));
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static future<std::map<table_id, schema_mutations>> read_tables_for_keyspaces(sharded<service::storage_proxy>& proxy, const std::set<sstring>& keyspace_names,
|
||||||
future<std::map<table_id, schema_mutations>>
|
table_kind kind, const std::unordered_map<sstring, table_selector>& tables_per_keyspace) {
|
||||||
read_tables_for_keyspaces(sharded<service::storage_proxy>& proxy, const std::set<sstring>& keyspace_names, table_kind kind,
|
|
||||||
const std::unordered_map<sstring, table_selector>& tables_per_keyspace)
|
|
||||||
{
|
|
||||||
std::map<table_id, schema_mutations> result;
|
std::map<table_id, schema_mutations> result;
|
||||||
for (auto&& [keyspace_name, sel] : tables_per_keyspace) {
|
for (auto&& [keyspace_name, sel] : tables_per_keyspace) {
|
||||||
if (!sel.tables.contains(kind)) {
|
if (!sel.tables.contains(kind)) {
|
||||||
@@ -149,8 +148,7 @@ read_tables_for_keyspaces(sharded<service::storage_proxy>& proxy, const std::set
|
|||||||
|
|
||||||
// Extracts the names of tables affected by a schema mutation.
|
// Extracts the names of tables affected by a schema mutation.
|
||||||
// The mutation must target one of the tables in schema_tables_holding_schema_mutations().
|
// The mutation must target one of the tables in schema_tables_holding_schema_mutations().
|
||||||
static
|
static table_selector get_affected_tables(const sstring& keyspace_name, const mutation& m) {
|
||||||
table_selector get_affected_tables(const sstring& keyspace_name, const mutation& m) {
|
|
||||||
const schema& s = *m.schema();
|
const schema& s = *m.schema();
|
||||||
auto get_table_name = [&](const clustering_key& ck) {
|
auto get_table_name = [&](const clustering_key& ck) {
|
||||||
// The first component of the clustering key in each table listed in
|
// The first component of the clustering key in each table listed in
|
||||||
@@ -159,22 +157,21 @@ table_selector get_affected_tables(const sstring& keyspace_name, const mutation&
|
|||||||
};
|
};
|
||||||
table_selector result;
|
table_selector result;
|
||||||
if (m.partition().partition_tombstone()) {
|
if (m.partition().partition_tombstone()) {
|
||||||
slogger.trace("Mutation of {}.{} for keyspace {} contains a partition tombstone",
|
slogger.trace("Mutation of {}.{} for keyspace {} contains a partition tombstone", m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
||||||
m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
|
||||||
result.all_in_keyspace = true;
|
result.all_in_keyspace = true;
|
||||||
}
|
}
|
||||||
for (auto&& e : m.partition().row_tombstones()) {
|
for (auto&& e : m.partition().row_tombstones()) {
|
||||||
const range_tombstone& rt = e.tombstone();
|
const range_tombstone& rt = e.tombstone();
|
||||||
if (rt.start.size(s) == 0 || rt.end.size(s) == 0) {
|
if (rt.start.size(s) == 0 || rt.end.size(s) == 0) {
|
||||||
slogger.trace("Mutation of {}.{} for keyspace {} contains a multi-table range tombstone",
|
slogger.trace(
|
||||||
m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
"Mutation of {}.{} for keyspace {} contains a multi-table range tombstone", m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
||||||
result.all_in_keyspace = true;
|
result.all_in_keyspace = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
auto table_name = get_table_name(rt.start);
|
auto table_name = get_table_name(rt.start);
|
||||||
if (table_name != get_table_name(rt.end)) {
|
if (table_name != get_table_name(rt.end)) {
|
||||||
slogger.trace("Mutation of {}.{} for keyspace {} contains a multi-table range tombstone",
|
slogger.trace(
|
||||||
m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
"Mutation of {}.{} for keyspace {} contains a multi-table range tombstone", m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
||||||
result.all_in_keyspace = true;
|
result.all_in_keyspace = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@@ -183,15 +180,16 @@ table_selector get_affected_tables(const sstring& keyspace_name, const mutation&
|
|||||||
for (auto&& row : m.partition().clustered_rows()) {
|
for (auto&& row : m.partition().clustered_rows()) {
|
||||||
result.add(get_table_name(row.key()));
|
result.add(get_table_name(row.key()));
|
||||||
}
|
}
|
||||||
slogger.trace("Mutation of {}.{} for keyspace {} affects tables: {}, all_in_keyspace: {}",
|
slogger.trace("Mutation of {}.{} for keyspace {} affects tables: {}, all_in_keyspace: {}", m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name,
|
||||||
m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name, result.tables, result.all_in_keyspace);
|
result.tables, result.all_in_keyspace);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
future<schema_result>
|
future<schema_result> static read_schema_for_keyspaces(
|
||||||
static read_schema_for_keyspaces(sharded<service::storage_proxy>& proxy, const sstring& schema_table_name, const std::set<sstring>& keyspace_names)
|
sharded<service::storage_proxy>& proxy, const sstring& schema_table_name, const std::set<sstring>& keyspace_names) {
|
||||||
{
|
auto map = [&proxy, schema_table_name](const sstring& keyspace_name) {
|
||||||
auto map = [&proxy, schema_table_name] (const sstring& keyspace_name) { return read_schema_partition_for_keyspace(proxy, schema_table_name, keyspace_name); };
|
return read_schema_partition_for_keyspace(proxy, schema_table_name, keyspace_name);
|
||||||
|
};
|
||||||
auto insert = [](schema_result&& result, auto&& schema_entity) {
|
auto insert = [](schema_result&& result, auto&& schema_entity) {
|
||||||
if (!schema_entity.second->empty()) {
|
if (!schema_entity.second->empty()) {
|
||||||
result.insert(std::move(schema_entity));
|
result.insert(std::move(schema_entity));
|
||||||
@@ -202,8 +200,8 @@ static read_schema_for_keyspaces(sharded<service::storage_proxy>& proxy, const s
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Returns names of live table definitions of given keyspace
|
// Returns names of live table definitions of given keyspace
|
||||||
future<std::vector<sstring>>
|
future<std::vector<sstring>> static read_table_names_of_keyspace(
|
||||||
static read_table_names_of_keyspace(sharded<service::storage_proxy>& proxy, const sstring& keyspace_name, schema_ptr schema_table) {
|
sharded<service::storage_proxy>& proxy, const sstring& keyspace_name, schema_ptr schema_table) {
|
||||||
auto pkey = dht::decorate_key(*schema_table, partition_key::from_singular(*schema_table, keyspace_name));
|
auto pkey = dht::decorate_key(*schema_table, partition_key::from_singular(*schema_table, keyspace_name));
|
||||||
auto&& rs = co_await db::system_keyspace::query(proxy.local().get_db(), schema_table->ks_name(), schema_table->cf_name(), pkey);
|
auto&& rs = co_await db::system_keyspace::query(proxy.local().get_db(), schema_table->ks_name(), schema_table->cf_name(), pkey);
|
||||||
co_return rs->rows() | std::views::transform([schema_table](const query::result_set_row& row) {
|
co_return rs->rows() | std::views::transform([schema_table](const query::result_set_row& row) {
|
||||||
@@ -242,8 +240,7 @@ static void maybe_delete_schema_version(mutation& m) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> schema_applier::merge_keyspaces()
|
future<> schema_applier::merge_keyspaces() {
|
||||||
{
|
|
||||||
/*
|
/*
|
||||||
* - we don't care about entriesOnlyOnLeft() or entriesInCommon(), because only the changes are of interest to us
|
* - we don't care about entriesOnlyOnLeft() or entriesInCommon(), because only the changes are of interest to us
|
||||||
* - of all entriesOnlyOnRight(), we only care about ones that have live columns; it's possible to have a ColumnFamily
|
* - of all entriesOnlyOnRight(), we only care about ones that have live columns; it's possible to have a ColumnFamily
|
||||||
@@ -280,21 +277,16 @@ future<> schema_applier::merge_keyspaces()
|
|||||||
for (auto& name : created) {
|
for (auto& name : created) {
|
||||||
slogger.info("Creating keyspace {}", name);
|
slogger.info("Creating keyspace {}", name);
|
||||||
auto sk_after_v = _after.scylla_keyspaces.contains(name) ? _after.scylla_keyspaces.at(name) : nullptr;
|
auto sk_after_v = _after.scylla_keyspaces.contains(name) ? _after.scylla_keyspaces.at(name) : nullptr;
|
||||||
auto ksm = co_await create_keyspace_metadata(
|
auto ksm = co_await create_keyspace_metadata(schema_result_value_type{name, _after.keyspaces.at(name)}, sk_after_v);
|
||||||
schema_result_value_type{name, _after.keyspaces.at(name)}, sk_after_v);
|
|
||||||
_affected_keyspaces.created.push_back(
|
_affected_keyspaces.created.push_back(
|
||||||
co_await replica::database::prepare_create_keyspace_on_all_shards(
|
co_await replica::database::prepare_create_keyspace_on_all_shards(sharded_db, _proxy, *ksm, _pending_token_metadata));
|
||||||
sharded_db, _proxy, *ksm, _pending_token_metadata));
|
|
||||||
_affected_keyspaces.names.created.insert(name);
|
_affected_keyspaces.names.created.insert(name);
|
||||||
}
|
}
|
||||||
for (auto& name : altered) {
|
for (auto& name : altered) {
|
||||||
slogger.info("Altering keyspace {}", name);
|
slogger.info("Altering keyspace {}", name);
|
||||||
auto sk_after_v = _after.scylla_keyspaces.contains(name) ? _after.scylla_keyspaces.at(name) : nullptr;
|
auto sk_after_v = _after.scylla_keyspaces.contains(name) ? _after.scylla_keyspaces.at(name) : nullptr;
|
||||||
auto tmp_ksm = co_await create_keyspace_metadata(
|
auto tmp_ksm = co_await create_keyspace_metadata(schema_result_value_type{name, _after.keyspaces.at(name)}, sk_after_v);
|
||||||
schema_result_value_type{name, _after.keyspaces.at(name)}, sk_after_v);
|
_affected_keyspaces.altered.push_back(co_await replica::database::prepare_update_keyspace_on_all_shards(sharded_db, *tmp_ksm, _pending_token_metadata));
|
||||||
_affected_keyspaces.altered.push_back(
|
|
||||||
co_await replica::database::prepare_update_keyspace_on_all_shards(
|
|
||||||
sharded_db, *tmp_ksm, _pending_token_metadata));
|
|
||||||
_affected_keyspaces.names.altered.insert(name);
|
_affected_keyspaces.names.altered.insert(name);
|
||||||
}
|
}
|
||||||
for (auto& key : _affected_keyspaces.names.dropped) {
|
for (auto& key : _affected_keyspaces.names.dropped) {
|
||||||
@@ -391,8 +383,8 @@ struct aggregate_diff {
|
|||||||
std::vector<std::pair<const query::result_set_row*, const query::result_set_row*>> dropped;
|
std::vector<std::pair<const query::result_set_row*, const query::result_set_row*>> dropped;
|
||||||
};
|
};
|
||||||
|
|
||||||
static aggregate_diff diff_aggregates_rows(const schema_result& aggr_before, const schema_result& aggr_after,
|
static aggregate_diff diff_aggregates_rows(
|
||||||
const schema_result& scylla_aggr_before, const schema_result& scylla_aggr_after) {
|
const schema_result& aggr_before, const schema_result& aggr_after, const schema_result& scylla_aggr_before, const schema_result& scylla_aggr_after) {
|
||||||
using map = std::map<std::vector<bytes>, const query::result_set_row*>;
|
using map = std::map<std::vector<bytes>, const query::result_set_row*>;
|
||||||
auto aggr_diff = difference(aggr_before, aggr_after, indirect_equal_to<lw_shared_ptr<query::result_set>>());
|
auto aggr_diff = difference(aggr_before, aggr_after, indirect_equal_to<lw_shared_ptr<query::result_set>>());
|
||||||
|
|
||||||
@@ -436,15 +428,11 @@ static aggregate_diff diff_aggregates_rows(const schema_result& aggr_before, con
|
|||||||
|
|
||||||
for (const auto& k : diff.entries_only_on_left) {
|
for (const auto& k : diff.entries_only_on_left) {
|
||||||
auto entry = scylla_aggr_rows_before.find(k);
|
auto entry = scylla_aggr_rows_before.find(k);
|
||||||
dropped.push_back({
|
dropped.push_back({aggr_before_rows.find(k)->second, (entry != scylla_aggr_rows_before.end()) ? entry->second : nullptr});
|
||||||
aggr_before_rows.find(k)->second, (entry != scylla_aggr_rows_before.end()) ? entry->second : nullptr
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
for (const auto& k : diff.entries_only_on_right) {
|
for (const auto& k : diff.entries_only_on_right) {
|
||||||
auto entry = scylla_aggr_rows_after.find(k);
|
auto entry = scylla_aggr_rows_after.find(k);
|
||||||
created.push_back({
|
created.push_back({aggr_after_rows.find(k)->second, (entry != scylla_aggr_rows_after.end()) ? entry->second : nullptr});
|
||||||
aggr_after_rows.find(k)->second, (entry != scylla_aggr_rows_after.end()) ? entry->second : nullptr
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -452,8 +440,7 @@ static aggregate_diff diff_aggregates_rows(const schema_result& aggr_before, con
|
|||||||
}
|
}
|
||||||
|
|
||||||
// see the comments for merge_keyspaces()
|
// see the comments for merge_keyspaces()
|
||||||
future<> schema_applier::merge_types()
|
future<> schema_applier::merge_types() {
|
||||||
{
|
|
||||||
auto diff = diff_rows(_before.types, _after.types);
|
auto diff = diff_rows(_before.types, _after.types);
|
||||||
co_await _affected_user_types.start();
|
co_await _affected_user_types.start();
|
||||||
co_await _affected_user_types.invoke_on_all([&](affected_user_types_per_shard& af) mutable -> future<> {
|
co_await _affected_user_types.invoke_on_all([&](affected_user_types_per_shard& af) mutable -> future<> {
|
||||||
@@ -482,12 +469,8 @@ enum class schema_diff_side {
|
|||||||
right, // new, after
|
right, // new, after
|
||||||
};
|
};
|
||||||
|
|
||||||
static schema_diff_per_shard diff_table_or_view(sharded<service::storage_proxy>& proxy,
|
static schema_diff_per_shard diff_table_or_view(sharded<service::storage_proxy>& proxy, const std::map<table_id, schema_mutations>& before,
|
||||||
const std::map<table_id, schema_mutations>& before,
|
const std::map<table_id, schema_mutations>& after, bool reload, noncopyable_function<schema_ptr(schema_mutations sm, schema_diff_side)> create_schema) {
|
||||||
const std::map<table_id, schema_mutations>& after,
|
|
||||||
bool reload,
|
|
||||||
noncopyable_function<schema_ptr (schema_mutations sm, schema_diff_side)> create_schema)
|
|
||||||
{
|
|
||||||
schema_diff_per_shard d;
|
schema_diff_per_shard d;
|
||||||
auto diff = difference(before, after);
|
auto diff = difference(before, after);
|
||||||
for (auto&& key : diff.entries_only_on_left) {
|
for (auto&& key : diff.entries_only_on_left) {
|
||||||
@@ -524,7 +507,9 @@ static schema_diff_per_shard diff_table_or_view(sharded<service::storage_proxy>&
|
|||||||
constexpr size_t max_concurrent = 8;
|
constexpr size_t max_concurrent = 8;
|
||||||
|
|
||||||
|
|
||||||
in_progress_types_storage_per_shard::in_progress_types_storage_per_shard(replica::database& db, const affected_keyspaces& affected_keyspaces, const affected_user_types& affected_types) : _stored_user_types(db.as_user_types_storage()) {
|
in_progress_types_storage_per_shard::in_progress_types_storage_per_shard(
|
||||||
|
replica::database& db, const affected_keyspaces& affected_keyspaces, const affected_user_types& affected_types)
|
||||||
|
: _stored_user_types(db.as_user_types_storage()) {
|
||||||
// initialize metadata for new keyspaces
|
// initialize metadata for new keyspaces
|
||||||
for (auto& ks_per_shard : affected_keyspaces.created) {
|
for (auto& ks_per_shard : affected_keyspaces.created) {
|
||||||
auto metadata = ks_per_shard[this_shard_id()]->metadata();
|
auto metadata = ks_per_shard[this_shard_id()]->metadata();
|
||||||
@@ -570,7 +555,8 @@ std::shared_ptr<data_dictionary::user_types_storage> in_progress_types_storage_p
|
|||||||
return _stored_user_types;
|
return _stored_user_types;
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> in_progress_types_storage::init(sharded<replica::database>& sharded_db, const affected_keyspaces& affected_keyspaces, const affected_user_types& affected_types) {
|
future<> in_progress_types_storage::init(
|
||||||
|
sharded<replica::database>& sharded_db, const affected_keyspaces& affected_keyspaces, const affected_user_types& affected_types) {
|
||||||
co_await sharded_db.invoke_on_all([&](replica::database& db) {
|
co_await sharded_db.invoke_on_all([&](replica::database& db) {
|
||||||
shards[this_shard_id()] = make_foreign(seastar::make_shared<in_progress_types_storage_per_shard>(db, affected_keyspaces, affected_types));
|
shards[this_shard_id()] = make_foreign(seastar::make_shared<in_progress_types_storage_per_shard>(db, affected_keyspaces, affected_types));
|
||||||
});
|
});
|
||||||
@@ -585,8 +571,7 @@ in_progress_types_storage_per_shard& in_progress_types_storage::local() {
|
|||||||
// that when a base schema and a subset of its views are modified together (i.e.,
|
// that when a base schema and a subset of its views are modified together (i.e.,
|
||||||
// upon an alter table or alter type statement), then they are published together
|
// upon an alter table or alter type statement), then they are published together
|
||||||
// as well, without any deferring in-between.
|
// as well, without any deferring in-between.
|
||||||
future<> schema_applier::merge_tables_and_views()
|
future<> schema_applier::merge_tables_and_views() {
|
||||||
{
|
|
||||||
auto& user_types = _types_storage.local();
|
auto& user_types = _types_storage.local();
|
||||||
co_await _affected_tables_and_views.tables_and_views.start();
|
co_await _affected_tables_and_views.tables_and_views.start();
|
||||||
|
|
||||||
@@ -683,31 +668,26 @@ future<> schema_applier::merge_tables_and_views()
|
|||||||
frozen_schema_diff tables_frozen = co_await local_tables.freeze();
|
frozen_schema_diff tables_frozen = co_await local_tables.freeze();
|
||||||
frozen_schema_diff cdc_frozen = co_await local_cdc.freeze();
|
frozen_schema_diff cdc_frozen = co_await local_cdc.freeze();
|
||||||
frozen_schema_diff views_frozen = co_await local_views.freeze();
|
frozen_schema_diff views_frozen = co_await local_views.freeze();
|
||||||
co_await _affected_tables_and_views.tables_and_views.invoke_on_others([this, &tables_frozen, &cdc_frozen, &views_frozen] (affected_tables_and_views_per_shard& tables_and_views) -> future<> {
|
co_await _affected_tables_and_views.tables_and_views.invoke_on_others(
|
||||||
|
[this, &tables_frozen, &cdc_frozen, &views_frozen](affected_tables_and_views_per_shard& tables_and_views) -> future<> {
|
||||||
auto& db = _proxy.local().get_db().local();
|
auto& db = _proxy.local().get_db().local();
|
||||||
tables_and_views.tables = co_await schema_diff_per_shard::copy_from(
|
tables_and_views.tables = co_await schema_diff_per_shard::copy_from(db, _types_storage, tables_frozen);
|
||||||
db, _types_storage, tables_frozen);
|
tables_and_views.cdc = co_await schema_diff_per_shard::copy_from(db, _types_storage, cdc_frozen);
|
||||||
tables_and_views.cdc = co_await schema_diff_per_shard::copy_from(
|
tables_and_views.views = co_await schema_diff_per_shard::copy_from(db, _types_storage, views_frozen);
|
||||||
db, _types_storage, cdc_frozen);
|
|
||||||
tables_and_views.views = co_await schema_diff_per_shard::copy_from(
|
|
||||||
db, _types_storage, views_frozen);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
auto& db = _proxy.local().get_db();
|
auto& db = _proxy.local().get_db();
|
||||||
co_await max_concurrent_for_each(local_views.dropped, max_concurrent, [&db, this](schema_ptr& dt) -> future<> {
|
co_await max_concurrent_for_each(local_views.dropped, max_concurrent, [&db, this](schema_ptr& dt) -> future<> {
|
||||||
auto uuid = dt->id();
|
auto uuid = dt->id();
|
||||||
_affected_tables_and_views.table_shards.insert({uuid,
|
_affected_tables_and_views.table_shards.insert({uuid, co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
||||||
co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
|
||||||
});
|
});
|
||||||
co_await max_concurrent_for_each(local_tables.dropped, max_concurrent, [&db, this](schema_ptr& dt) -> future<> {
|
co_await max_concurrent_for_each(local_tables.dropped, max_concurrent, [&db, this](schema_ptr& dt) -> future<> {
|
||||||
auto uuid = dt->id();
|
auto uuid = dt->id();
|
||||||
_affected_tables_and_views.table_shards.insert({uuid,
|
_affected_tables_and_views.table_shards.insert({uuid, co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
||||||
co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
|
||||||
});
|
});
|
||||||
co_await max_concurrent_for_each(local_cdc.dropped, max_concurrent, [&db, this](schema_ptr& dt) -> future<> {
|
co_await max_concurrent_for_each(local_cdc.dropped, max_concurrent, [&db, this](schema_ptr& dt) -> future<> {
|
||||||
auto uuid = dt->id();
|
auto uuid = dt->id();
|
||||||
_affected_tables_and_views.table_shards.insert({uuid,
|
_affected_tables_and_views.table_shards.insert({uuid, co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
||||||
co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -767,24 +747,41 @@ static future<> notify_tables_and_views(service::migration_notifier& notifier, c
|
|||||||
const auto& views = diff.tables_and_views.local().views;
|
const auto& views = diff.tables_and_views.local().views;
|
||||||
|
|
||||||
// View drops are notified first, because a table can only be dropped if its views are already deleted
|
// View drops are notified first, because a table can only be dropped if its views are already deleted
|
||||||
co_await notify(views.dropped, [&] (auto&& dt) { return notifier.drop_view(view_ptr(dt)); });
|
co_await notify(views.dropped, [&](auto&& dt) {
|
||||||
co_await notify(tables.dropped, [&] (auto&& dt) { return notifier.drop_column_family(dt); });
|
return notifier.drop_view(view_ptr(dt));
|
||||||
co_await notify(cdc.dropped, [&] (auto&& dt) { return notifier.drop_column_family(dt); });
|
});
|
||||||
|
co_await notify(tables.dropped, [&](auto&& dt) {
|
||||||
|
return notifier.drop_column_family(dt);
|
||||||
|
});
|
||||||
|
co_await notify(cdc.dropped, [&](auto&& dt) {
|
||||||
|
return notifier.drop_column_family(dt);
|
||||||
|
});
|
||||||
// Table creations are notified first, in case a view is created right after the table
|
// Table creations are notified first, in case a view is created right after the table
|
||||||
co_await notify(tables.created, [&] (auto&& gs) { return notifier.create_column_family(gs); });
|
co_await notify(tables.created, [&](auto&& gs) {
|
||||||
co_await notify(cdc.created, [&] (auto&& gs) { return notifier.create_column_family(gs); });
|
return notifier.create_column_family(gs);
|
||||||
co_await notify(views.created, [&] (auto&& gs) { return notifier.create_view(view_ptr(gs)); });
|
});
|
||||||
|
co_await notify(cdc.created, [&](auto&& gs) {
|
||||||
|
return notifier.create_column_family(gs);
|
||||||
|
});
|
||||||
|
co_await notify(views.created, [&](auto&& gs) {
|
||||||
|
return notifier.create_view(view_ptr(gs));
|
||||||
|
});
|
||||||
// Table altering is notified first, in case new base columns appear
|
// Table altering is notified first, in case new base columns appear
|
||||||
co_await notify(tables.altered, [&] (auto&& altered) { return notifier.update_column_family(altered.new_schema, *it++); });
|
co_await notify(tables.altered, [&](auto&& altered) {
|
||||||
co_await notify(cdc.altered, [&] (auto&& altered) { return notifier.update_column_family(altered.new_schema, *it++); });
|
return notifier.update_column_family(altered.new_schema, *it++);
|
||||||
co_await notify(views.altered, [&] (auto&& altered) { return notifier.update_view(view_ptr(altered.new_schema), *it++); });
|
});
|
||||||
|
co_await notify(cdc.altered, [&](auto&& altered) {
|
||||||
|
return notifier.update_column_family(altered.new_schema, *it++);
|
||||||
|
});
|
||||||
|
co_await notify(views.altered, [&](auto&& altered) {
|
||||||
|
return notifier.update_view(view_ptr(altered.new_schema), *it++);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
static void drop_cached_func(replica::database& db, const query::result_set_row& row) {
|
static void drop_cached_func(replica::database& db, const query::result_set_row& row) {
|
||||||
auto language = row.get_nonnull<sstring>("language");
|
auto language = row.get_nonnull<sstring>("language");
|
||||||
if (language == "wasm") {
|
if (language == "wasm") {
|
||||||
cql3::functions::function_name name{
|
cql3::functions::function_name name{row.get_nonnull<sstring>("keyspace_name"), row.get_nonnull<sstring>("function_name")};
|
||||||
row.get_nonnull<sstring>("keyspace_name"), row.get_nonnull<sstring>("function_name")};
|
|
||||||
auto arg_types = read_arg_types(row, name.keyspace, db.user_types());
|
auto arg_types = read_arg_types(row, name.keyspace, db.user_types());
|
||||||
db.lang().remove(name, arg_types);
|
db.lang().remove(name, arg_types);
|
||||||
}
|
}
|
||||||
@@ -799,8 +796,7 @@ future<> schema_applier::merge_functions() {
|
|||||||
batch.add_function(co_await create_func(db, *val, _types_storage.local()));
|
batch.add_function(co_await create_func(db, *val, _types_storage.local()));
|
||||||
}
|
}
|
||||||
for (const auto& val : diff.dropped) {
|
for (const auto& val : diff.dropped) {
|
||||||
cql3::functions::function_name name{
|
cql3::functions::function_name name{val->get_nonnull<sstring>("keyspace_name"), val->get_nonnull<sstring>("function_name")};
|
||||||
val->get_nonnull<sstring>("keyspace_name"), val->get_nonnull<sstring>("function_name")};
|
|
||||||
auto commited_storage = _types_storage.local().committed_storage();
|
auto commited_storage = _types_storage.local().committed_storage();
|
||||||
auto arg_types = read_arg_types(*val, name.keyspace, *commited_storage);
|
auto arg_types = read_arg_types(*val, name.keyspace, *commited_storage);
|
||||||
// as we don't yield between dropping cache and committing batch
|
// as we don't yield between dropping cache and committing batch
|
||||||
@@ -824,8 +820,7 @@ future<> schema_applier::merge_aggregates() {
|
|||||||
batch.add_function(create_aggregate(db, *val.first, val.second, batch, _types_storage.local()));
|
batch.add_function(create_aggregate(db, *val.first, val.second, batch, _types_storage.local()));
|
||||||
}
|
}
|
||||||
for (const auto& val : diff.dropped) {
|
for (const auto& val : diff.dropped) {
|
||||||
cql3::functions::function_name name{
|
cql3::functions::function_name name{val.first->get_nonnull<sstring>("keyspace_name"), val.first->get_nonnull<sstring>("aggregate_name")};
|
||||||
val.first->get_nonnull<sstring>("keyspace_name"), val.first->get_nonnull<sstring>("aggregate_name")};
|
|
||||||
auto commited_storage = _types_storage.local().committed_storage();
|
auto commited_storage = _types_storage.local().committed_storage();
|
||||||
auto arg_types = read_arg_types(*val.first, name.keyspace, *commited_storage);
|
auto arg_types = read_arg_types(*val.first, name.keyspace, *commited_storage);
|
||||||
batch.remove_aggregate(name, arg_types);
|
batch.remove_aggregate(name, arg_types);
|
||||||
@@ -924,10 +919,11 @@ class pending_schema_getter : public service::schema_getter {
|
|||||||
private:
|
private:
|
||||||
schema_applier& _sa;
|
schema_applier& _sa;
|
||||||
sharded<replica::database>& _db;
|
sharded<replica::database>& _db;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
pending_schema_getter(schema_applier& sa) :
|
pending_schema_getter(schema_applier& sa)
|
||||||
_sa(sa), _db(sa._proxy.local().get_db()) {
|
: _sa(sa)
|
||||||
};
|
, _db(sa._proxy.local().get_db()) {};
|
||||||
|
|
||||||
virtual flat_hash_map<sstring, locator::replication_strategy_ptr> get_keyspaces_replication() const override {
|
virtual flat_hash_map<sstring, locator::replication_strategy_ptr> get_keyspaces_replication() const override {
|
||||||
flat_hash_map<sstring, locator::replication_strategy_ptr> out;
|
flat_hash_map<sstring, locator::replication_strategy_ptr> out;
|
||||||
@@ -989,8 +985,7 @@ future<> schema_applier::update_tablets() {
|
|||||||
if (_tablet_hint) {
|
if (_tablet_hint) {
|
||||||
slogger.info("Tablet metadata changed");
|
slogger.info("Tablet metadata changed");
|
||||||
pending_schema_getter getter{*this};
|
pending_schema_getter getter{*this};
|
||||||
_token_metadata_change = co_await _ss.local().prepare_token_metadata_change(
|
_token_metadata_change = co_await _ss.local().prepare_token_metadata_change(_pending_token_metadata.local(), getter);
|
||||||
_pending_token_metadata.local(), getter);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -999,8 +994,7 @@ future<> schema_applier::update_tablets() {
|
|||||||
future<> schema_applier::load_mutable_token_metadata() {
|
future<> schema_applier::load_mutable_token_metadata() {
|
||||||
locator::mutable_token_metadata_ptr current_token_metadata = co_await _ss.local().get_mutable_token_metadata_ptr();
|
locator::mutable_token_metadata_ptr current_token_metadata = co_await _ss.local().get_mutable_token_metadata_ptr();
|
||||||
if (_tablet_hint) {
|
if (_tablet_hint) {
|
||||||
auto new_token_metadata = co_await _ss.local().prepare_tablet_metadata(
|
auto new_token_metadata = co_await _ss.local().prepare_tablet_metadata(_tablet_hint, current_token_metadata);
|
||||||
_tablet_hint, current_token_metadata);
|
|
||||||
co_return co_await _pending_token_metadata.assign(new_token_metadata);
|
co_return co_await _pending_token_metadata.assign(new_token_metadata);
|
||||||
}
|
}
|
||||||
co_await _pending_token_metadata.assign(current_token_metadata);
|
co_await _pending_token_metadata.assign(current_token_metadata);
|
||||||
@@ -1115,8 +1109,7 @@ future<> schema_applier::commit() {
|
|||||||
// However, we can only acquire the (write) lock after preparing all
|
// However, we can only acquire the (write) lock after preparing all
|
||||||
// entities for the pending schema change that need to iterate over tables_metadata;
|
// entities for the pending schema change that need to iterate over tables_metadata;
|
||||||
// otherwise, such iteration would deadlock.
|
// otherwise, such iteration would deadlock.
|
||||||
_metadata_locks = std::make_unique<replica::tables_metadata_lock_on_all_shards>(
|
_metadata_locks = std::make_unique<replica::tables_metadata_lock_on_all_shards>(co_await replica::database::lock_tables_metadata(sharded_db));
|
||||||
co_await replica::database::lock_tables_metadata(sharded_db));
|
|
||||||
// Run func first on shard 0
|
// Run func first on shard 0
|
||||||
// to allow "seeding" of the effective_replication_map
|
// to allow "seeding" of the effective_replication_map
|
||||||
// with a new e_r_m instance.
|
// with a new e_r_m instance.
|
||||||
@@ -1154,8 +1147,7 @@ future<> schema_applier::finalize_tables_and_views() {
|
|||||||
|
|
||||||
if (_tablet_hint) {
|
if (_tablet_hint) {
|
||||||
auto& db = sharded_db.local();
|
auto& db = sharded_db.local();
|
||||||
co_await db.get_compaction_manager().get_shared_tombstone_gc_state().
|
co_await db.get_compaction_manager().get_shared_tombstone_gc_state().flush_pending_repair_time_update(db);
|
||||||
flush_pending_repair_time_update(db);
|
|
||||||
_ss.local().wake_up_topology_state_machine();
|
_ss.local().wake_up_topology_state_machine();
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1187,10 +1179,9 @@ future<> schema_applier::finalize_tables_and_views() {
|
|||||||
co_await max_concurrent_for_each(diff.tables_and_views.local().tables.created, max_concurrent, [this](const schema_ptr& gs) -> future<> {
|
co_await max_concurrent_for_each(diff.tables_and_views.local().tables.created, max_concurrent, [this](const schema_ptr& gs) -> future<> {
|
||||||
co_await store_column_mapping(_proxy, gs, false);
|
co_await store_column_mapping(_proxy, gs, false);
|
||||||
});
|
});
|
||||||
co_await max_concurrent_for_each(diff.tables_and_views.local().tables.altered, max_concurrent, [this] (const schema_diff_per_shard::altered_schema& altered) -> future<> {
|
co_await max_concurrent_for_each(
|
||||||
co_await when_all_succeed(
|
diff.tables_and_views.local().tables.altered, max_concurrent, [this](const schema_diff_per_shard::altered_schema& altered) -> future<> {
|
||||||
store_column_mapping(_proxy, altered.old_schema, true),
|
co_await when_all_succeed(store_column_mapping(_proxy, altered.old_schema, true), store_column_mapping(_proxy, altered.new_schema, false));
|
||||||
store_column_mapping(_proxy, altered.new_schema, false));
|
|
||||||
});
|
});
|
||||||
co_await max_concurrent_for_each(diff.tables_and_views.local().tables.dropped, max_concurrent, [this](const schema_ptr& s) -> future<> {
|
co_await max_concurrent_for_each(diff.tables_and_views.local().tables.dropped, max_concurrent, [this](const schema_ptr& s) -> future<> {
|
||||||
co_await drop_column_mapping(_sys_ks.local(), s->id(), s->version());
|
co_await drop_column_mapping(_sys_ks.local(), s->id(), s->version());
|
||||||
@@ -1260,8 +1251,8 @@ static future<> execute_do_merge_schema(sharded<service::storage_proxy>& proxy,
|
|||||||
co_await ap.post_commit();
|
co_await ap.post_commit();
|
||||||
}
|
}
|
||||||
|
|
||||||
static future<> do_merge_schema(sharded<service::storage_proxy>& proxy, sharded<service::storage_service>& ss, sharded<db::system_keyspace>& sys_ks, utils::chunked_vector<mutation> mutations, bool reload)
|
static future<> do_merge_schema(sharded<service::storage_proxy>& proxy, sharded<service::storage_service>& ss, sharded<db::system_keyspace>& sys_ks,
|
||||||
{
|
utils::chunked_vector<mutation> mutations, bool reload) {
|
||||||
slogger.trace("do_merge_schema: {}", mutations);
|
slogger.trace("do_merge_schema: {}", mutations);
|
||||||
schema_applier ap(proxy, ss, sys_ks, reload);
|
schema_applier ap(proxy, ss, sys_ks, reload);
|
||||||
co_await execute_do_merge_schema(proxy, ap, std::move(mutations)).finally([&ap]() {
|
co_await execute_do_merge_schema(proxy, ap, std::move(mutations)).finally([&ap]() {
|
||||||
@@ -1278,8 +1269,8 @@ static future<> do_merge_schema(sharded<service::storage_proxy>& proxy, sharded
|
|||||||
* @throws ConfigurationException If one of metadata attributes has invalid value
|
* @throws ConfigurationException If one of metadata attributes has invalid value
|
||||||
* @throws IOException If data was corrupted during transportation or failed to apply fs operations
|
* @throws IOException If data was corrupted during transportation or failed to apply fs operations
|
||||||
*/
|
*/
|
||||||
future<> merge_schema(sharded<db::system_keyspace>& sys_ks, sharded<service::storage_proxy>& proxy, sharded<service::storage_service>& ss, utils::chunked_vector<mutation> mutations, bool reload)
|
future<> merge_schema(sharded<db::system_keyspace>& sys_ks, sharded<service::storage_proxy>& proxy, sharded<service::storage_service>& ss,
|
||||||
{
|
utils::chunked_vector<mutation> mutations, bool reload) {
|
||||||
if (this_shard_id() != 0) {
|
if (this_shard_id() != 0) {
|
||||||
// mutations must be applied on the owning shard (0).
|
// mutations must be applied on the owning shard (0).
|
||||||
co_await smp::submit_to(0, coroutine::lambda([&, fmuts = freeze(mutations)]() mutable -> future<> {
|
co_await smp::submit_to(0, coroutine::lambda([&, fmuts = freeze(mutations)]() mutable -> future<> {
|
||||||
@@ -1294,6 +1285,6 @@ future<> merge_schema(sharded<db::system_keyspace>& sys_ks, sharded<service::sto
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
} // namespace schema_tables
|
||||||
|
|
||||||
}
|
} // namespace db
|
||||||
|
|||||||
@@ -29,8 +29,8 @@ static logging::logger blogger("boot_strapper");
|
|||||||
|
|
||||||
namespace dht {
|
namespace dht {
|
||||||
|
|
||||||
future<> boot_strapper::bootstrap(streaming::stream_reason reason, gms::gossiper& gossiper, service::frozen_topology_guard topo_guard,
|
future<> boot_strapper::bootstrap(
|
||||||
locator::host_id replace_address) {
|
streaming::stream_reason reason, gms::gossiper& gossiper, service::frozen_topology_guard topo_guard, locator::host_id replace_address) {
|
||||||
blogger.debug("Beginning bootstrap process: sorted_tokens={}", get_token_metadata().sorted_tokens());
|
blogger.debug("Beginning bootstrap process: sorted_tokens={}", get_token_metadata().sorted_tokens());
|
||||||
sstring description;
|
sstring description;
|
||||||
if (reason == streaming::stream_reason::bootstrap) {
|
if (reason == streaming::stream_reason::bootstrap) {
|
||||||
@@ -41,7 +41,8 @@ future<> boot_strapper::bootstrap(streaming::stream_reason reason, gms::gossiper
|
|||||||
throw std::runtime_error("Wrong stream_reason provided: it can only be replace or bootstrap");
|
throw std::runtime_error("Wrong stream_reason provided: it can only be replace or bootstrap");
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
auto streamer = make_lw_shared<range_streamer>(_db, _stream_manager, _token_metadata_ptr, _abort_source, _tokens, _address, _dr, description, reason, topo_guard);
|
auto streamer = make_lw_shared<range_streamer>(
|
||||||
|
_db, _stream_manager, _token_metadata_ptr, _abort_source, _tokens, _address, _dr, description, reason, topo_guard);
|
||||||
auto nodes_to_filter = gossiper.get_unreachable_members();
|
auto nodes_to_filter = gossiper.get_unreachable_members();
|
||||||
if (reason == streaming::stream_reason::replace) {
|
if (reason == streaming::stream_reason::replace) {
|
||||||
nodes_to_filter.insert(std::move(replace_address));
|
nodes_to_filter.insert(std::move(replace_address));
|
||||||
@@ -71,7 +72,8 @@ std::unordered_set<token> boot_strapper::get_random_bootstrap_tokens(const token
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (num_tokens == 1) {
|
if (num_tokens == 1) {
|
||||||
blogger.warn("Picking random token for a single vnode. You should probably add more vnodes; failing that, you should probably specify the token manually");
|
blogger.warn(
|
||||||
|
"Picking random token for a single vnode. You should probably add more vnodes; failing that, you should probably specify the token manually");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto tokens = get_random_tokens(std::move(tmptr), num_tokens);
|
auto tokens = get_random_tokens(std::move(tmptr), num_tokens);
|
||||||
@@ -86,7 +88,8 @@ std::unordered_set<token> boot_strapper::get_bootstrap_tokens(token_metadata_ptr
|
|||||||
return get_bootstrap_tokens(std::move(tmptr), cfg.initial_token(), cfg.num_tokens(), check);
|
return get_bootstrap_tokens(std::move(tmptr), cfg.initial_token(), cfg.num_tokens(), check);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metadata_ptr tmptr, sstring tokens_string, uint32_t num_tokens, check_token_endpoint check) {
|
std::unordered_set<token> boot_strapper::get_bootstrap_tokens(
|
||||||
|
const token_metadata_ptr tmptr, sstring tokens_string, uint32_t num_tokens, check_token_endpoint check) {
|
||||||
std::unordered_set<sstring> initial_tokens;
|
std::unordered_set<sstring> initial_tokens;
|
||||||
try {
|
try {
|
||||||
boost::split(initial_tokens, tokens_string, boost::is_any_of(sstring(", ")));
|
boost::split(initial_tokens, tokens_string, boost::is_any_of(sstring(", ")));
|
||||||
@@ -102,7 +105,8 @@ std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metada
|
|||||||
for (auto& token_string : initial_tokens) {
|
for (auto& token_string : initial_tokens) {
|
||||||
auto token = dht::token::from_sstring(token_string);
|
auto token = dht::token::from_sstring(token_string);
|
||||||
if (check && tmptr->get_endpoint(token)) {
|
if (check && tmptr->get_endpoint(token)) {
|
||||||
throw std::runtime_error(format("Bootstrapping to existing token {} is not allowed (decommission/removenode the old node first).", token_string));
|
throw std::runtime_error(
|
||||||
|
format("Bootstrapping to existing token {} is not allowed (decommission/removenode the old node first).", token_string));
|
||||||
}
|
}
|
||||||
tokens.insert(token);
|
tokens.insert(token);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,10 +26,9 @@ static logging::logger logger("range_streamer");
|
|||||||
|
|
||||||
using inet_address = gms::inet_address;
|
using inet_address = gms::inet_address;
|
||||||
|
|
||||||
std::unordered_map<locator::host_id, dht::token_range_vector>
|
std::unordered_map<locator::host_id, dht::token_range_vector> range_streamer::get_range_fetch_map(
|
||||||
range_streamer::get_range_fetch_map(const std::unordered_map<dht::token_range, std::vector<locator::host_id>>& ranges_with_sources,
|
const std::unordered_map<dht::token_range, std::vector<locator::host_id>>& ranges_with_sources,
|
||||||
const std::unordered_set<std::unique_ptr<i_source_filter>>& source_filters,
|
const std::unordered_set<std::unique_ptr<i_source_filter>>& source_filters, const sstring& keyspace) {
|
||||||
const sstring& keyspace) {
|
|
||||||
std::unordered_map<locator::host_id, dht::token_range_vector> range_fetch_map_map;
|
std::unordered_map<locator::host_id, dht::token_range_vector> range_fetch_map_map;
|
||||||
const auto& topo = _token_metadata_ptr->get_topology();
|
const auto& topo = _token_metadata_ptr->get_topology();
|
||||||
for (const auto& x : ranges_with_sources) {
|
for (const auto& x : ranges_with_sources) {
|
||||||
@@ -79,8 +78,8 @@ range_streamer::get_range_fetch_map(const std::unordered_map<dht::token_range, s
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Must be called from a seastar thread
|
// Must be called from a seastar thread
|
||||||
std::unordered_map<dht::token_range, std::vector<locator::host_id>>
|
std::unordered_map<dht::token_range, std::vector<locator::host_id>> range_streamer::get_all_ranges_with_sources_for(
|
||||||
range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, const locator::vnode_effective_replication_map* erm, dht::token_range_vector desired_ranges) {
|
const sstring& keyspace_name, const locator::vnode_effective_replication_map* erm, dht::token_range_vector desired_ranges) {
|
||||||
logger.debug("{} ks={}", __func__, keyspace_name);
|
logger.debug("{} ks={}", __func__, keyspace_name);
|
||||||
|
|
||||||
auto range_addresses = erm->get_range_host_ids().get();
|
auto range_addresses = erm->get_range_host_ids().get();
|
||||||
@@ -114,8 +113,8 @@ range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, co
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Must be called from a seastar thread
|
// Must be called from a seastar thread
|
||||||
std::unordered_map<dht::token_range, std::vector<locator::host_id>>
|
std::unordered_map<dht::token_range, std::vector<locator::host_id>> range_streamer::get_all_ranges_with_strict_sources_for(
|
||||||
range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_name, const locator::vnode_effective_replication_map* erm, dht::token_range_vector desired_ranges, gms::gossiper& gossiper) {
|
const sstring& keyspace_name, const locator::vnode_effective_replication_map* erm, dht::token_range_vector desired_ranges, gms::gossiper& gossiper) {
|
||||||
logger.debug("{} ks={}", __func__, keyspace_name);
|
logger.debug("{} ks={}", __func__, keyspace_name);
|
||||||
SCYLLA_ASSERT(_tokens.empty() == false);
|
SCYLLA_ASSERT(_tokens.empty() == false);
|
||||||
|
|
||||||
@@ -153,8 +152,9 @@ range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_n
|
|||||||
// Due to CASSANDRA-5953 we can have a higher RF then we have endpoints.
|
// Due to CASSANDRA-5953 we can have a higher RF then we have endpoints.
|
||||||
// So we need to be careful to only be strict when endpoints == RF
|
// So we need to be careful to only be strict when endpoints == RF
|
||||||
if (old_endpoints.size() == erm->get_replication_factor()) {
|
if (old_endpoints.size() == erm->get_replication_factor()) {
|
||||||
std::erase_if(old_endpoints,
|
std::erase_if(old_endpoints, [&new_endpoints](locator::host_id ep) {
|
||||||
[&new_endpoints] (locator::host_id ep) { return new_endpoints.contains(ep); });
|
return new_endpoints.contains(ep);
|
||||||
|
});
|
||||||
if (old_endpoints.size() != 1) {
|
if (old_endpoints.size() != 1) {
|
||||||
throw std::runtime_error(format("Expected 1 endpoint but found {:d}", old_endpoints.size()));
|
throw std::runtime_error(format("Expected 1 endpoint but found {:d}", old_endpoints.size()));
|
||||||
}
|
}
|
||||||
@@ -176,7 +176,9 @@ range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_n
|
|||||||
locator::host_id source_id = it->second.front();
|
locator::host_id source_id = it->second.front();
|
||||||
|
|
||||||
if (gossiper.is_enabled() && !gossiper.is_alive(source_id)) {
|
if (gossiper.is_enabled() && !gossiper.is_alive(source_id)) {
|
||||||
throw std::runtime_error(format("A node required to move the data consistently is down ({}). If you wish to move the data from a potentially inconsistent replica, restart the node with consistent_rangemovement=false", source_id));
|
throw std::runtime_error(format("A node required to move the data consistently is down ({}). If you wish to move the data from a potentially "
|
||||||
|
"inconsistent replica, restart the node with consistent_rangemovement=false",
|
||||||
|
source_id));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -188,12 +190,8 @@ bool range_streamer::use_strict_sources_for_ranges(const sstring& keyspace_name,
|
|||||||
auto nr_nodes_in_ring = get_token_metadata().get_normal_token_owners().size();
|
auto nr_nodes_in_ring = get_token_metadata().get_normal_token_owners().size();
|
||||||
bool everywhere_topology = erm.get_replication_strategy().get_type() == locator::replication_strategy_type::everywhere_topology;
|
bool everywhere_topology = erm.get_replication_strategy().get_type() == locator::replication_strategy_type::everywhere_topology;
|
||||||
// Use strict when number of nodes in the ring is equal or more than RF
|
// Use strict when number of nodes in the ring is equal or more than RF
|
||||||
auto strict = _db.local().get_config().consistent_rangemovement()
|
auto strict = _db.local().get_config().consistent_rangemovement() && !_tokens.empty() && !everywhere_topology && nr_nodes_in_ring >= rf;
|
||||||
&& !_tokens.empty()
|
logger.debug("use_strict_sources_for_ranges: ks={}, nr_nodes_in_ring={}, rf={}, strict={}", keyspace_name, nr_nodes_in_ring, rf, strict);
|
||||||
&& !everywhere_topology
|
|
||||||
&& nr_nodes_in_ring >= rf;
|
|
||||||
logger.debug("use_strict_sources_for_ranges: ks={}, nr_nodes_in_ring={}, rf={}, strict={}",
|
|
||||||
keyspace_name, nr_nodes_in_ring, rf, strict);
|
|
||||||
return strict;
|
return strict;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -214,7 +212,8 @@ void range_streamer::add_rx_ranges(const sstring& keyspace_name, std::unordered_
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO: This is the legacy range_streamer interface, it is add_rx_ranges which adds rx ranges.
|
// TODO: This is the legacy range_streamer interface, it is add_rx_ranges which adds rx ranges.
|
||||||
future<> range_streamer::add_ranges(const sstring& keyspace_name, locator::static_effective_replication_map_ptr erm, dht::token_range_vector ranges, gms::gossiper& gossiper, bool is_replacing) {
|
future<> range_streamer::add_ranges(const sstring& keyspace_name, locator::static_effective_replication_map_ptr erm, dht::token_range_vector ranges,
|
||||||
|
gms::gossiper& gossiper, bool is_replacing) {
|
||||||
return seastar::async([this, keyspace_name, ermp = std::move(erm), ranges = std::move(ranges), &gossiper, is_replacing]() mutable {
|
return seastar::async([this, keyspace_name, ermp = std::move(erm), ranges = std::move(ranges), &gossiper, is_replacing]() mutable {
|
||||||
if (_nr_tx_added) {
|
if (_nr_tx_added) {
|
||||||
throw std::runtime_error("Mixed sending and receiving is not supported");
|
throw std::runtime_error("Mixed sending and receiving is not supported");
|
||||||
@@ -232,7 +231,8 @@ future<> range_streamer::add_ranges(const sstring& keyspace_name, locator::stati
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_map<locator::host_id, dht::token_range_vector> range_fetch_map = get_range_fetch_map(ranges_for_keyspace, _source_filters, keyspace_name);
|
std::unordered_map<locator::host_id, dht::token_range_vector> range_fetch_map =
|
||||||
|
get_range_fetch_map(ranges_for_keyspace, _source_filters, keyspace_name);
|
||||||
utils::clear_gently(ranges_for_keyspace).get();
|
utils::clear_gently(ranges_for_keyspace).get();
|
||||||
|
|
||||||
if (logger.is_enabled(logging::log_level::debug)) {
|
if (logger.is_enabled(logging::log_level::debug)) {
|
||||||
@@ -268,13 +268,13 @@ future<> range_streamer::stream_async() {
|
|||||||
unsigned nr_ranges_streamed = 0;
|
unsigned nr_ranges_streamed = 0;
|
||||||
size_t nr_ranges_total = range_vec.size();
|
size_t nr_ranges_total = range_vec.size();
|
||||||
auto do_streaming = [&](dht::token_range_vector&& ranges_to_stream) {
|
auto do_streaming = [&](dht::token_range_vector&& ranges_to_stream) {
|
||||||
auto sp = stream_plan(_stream_manager.local(), format("{}-{}-index-{:d}", description, keyspace, sp_index++),
|
auto sp = stream_plan(_stream_manager.local(), format("{}-{}-index-{:d}", description, keyspace, sp_index++), _reason, _topo_guard);
|
||||||
_reason, _topo_guard);
|
auto abort_listener = _abort_source.subscribe([&]() noexcept {
|
||||||
auto abort_listener = _abort_source.subscribe([&] () noexcept { sp.abort(); });
|
sp.abort();
|
||||||
|
});
|
||||||
_abort_source.check();
|
_abort_source.check();
|
||||||
logger.info("{} with {} for keyspace={}, streaming [{}, {}) out of {} ranges",
|
logger.info("{} with {} for keyspace={}, streaming [{}, {}) out of {} ranges", description, source, keyspace, nr_ranges_streamed,
|
||||||
description, source, keyspace,
|
nr_ranges_streamed + ranges_to_stream.size(), nr_ranges_total);
|
||||||
nr_ranges_streamed, nr_ranges_streamed + ranges_to_stream.size(), nr_ranges_total);
|
|
||||||
auto ranges_streamed = ranges_to_stream.size();
|
auto ranges_streamed = ranges_to_stream.size();
|
||||||
if (_nr_rx_added) {
|
if (_nr_rx_added) {
|
||||||
sp.request_ranges(source, keyspace, std::move(ranges_to_stream), _tables);
|
sp.request_ranges(source, keyspace, std::move(ranges_to_stream), _tables);
|
||||||
@@ -287,8 +287,8 @@ future<> range_streamer::stream_async() {
|
|||||||
_nr_ranges_remaining -= ranges_streamed;
|
_nr_ranges_remaining -= ranges_streamed;
|
||||||
float percentage = _nr_total_ranges == 0 ? 1 : (_nr_total_ranges - _nr_ranges_remaining) / (float)_nr_total_ranges;
|
float percentage = _nr_total_ranges == 0 ? 1 : (_nr_total_ranges - _nr_ranges_remaining) / (float)_nr_total_ranges;
|
||||||
_stream_manager.local().update_finished_percentage(_reason, percentage);
|
_stream_manager.local().update_finished_percentage(_reason, percentage);
|
||||||
logger.info("Finished {} out of {} ranges for {}, finished percentage={}",
|
logger.info("Finished {} out of {} ranges for {}, finished percentage={}", _nr_total_ranges - _nr_ranges_remaining, _nr_total_ranges,
|
||||||
_nr_total_ranges - _nr_ranges_remaining, _nr_total_ranges, _reason, percentage);
|
_reason, percentage);
|
||||||
};
|
};
|
||||||
dht::token_range_vector ranges_to_stream;
|
dht::token_range_vector ranges_to_stream;
|
||||||
try {
|
try {
|
||||||
@@ -344,4 +344,4 @@ size_t range_streamer::nr_ranges_to_stream() {
|
|||||||
return nr_ranges_remaining;
|
return nr_ranges_remaining;
|
||||||
}
|
}
|
||||||
|
|
||||||
} // dht
|
} // namespace dht
|
||||||
|
|||||||
281
gms/gossiper.cc
281
gms/gossiper.cc
@@ -85,8 +85,8 @@ const std::set<inet_address>& gossiper::get_seeds() const noexcept {
|
|||||||
return _gcfg.seeds;
|
return _gcfg.seeds;
|
||||||
}
|
}
|
||||||
|
|
||||||
gossiper::gossiper(abort_source& as, const locator::shared_token_metadata& stm, netw::messaging_service& ms, gossip_config gcfg, gossip_address_map& address_map,
|
gossiper::gossiper(abort_source& as, const locator::shared_token_metadata& stm, netw::messaging_service& ms, gossip_config gcfg,
|
||||||
service::topology_state_machine& tsm)
|
gossip_address_map& address_map, service::topology_state_machine& tsm)
|
||||||
: _topo_sm(tsm)
|
: _topo_sm(tsm)
|
||||||
, _abort_source(as)
|
, _abort_source(as)
|
||||||
, _shared_token_metadata(stm)
|
, _shared_token_metadata(stm)
|
||||||
@@ -98,12 +98,15 @@ gossiper::gossiper(abort_source& as, const locator::shared_token_metadata& stm,
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
_scheduled_gossip_task.set_callback(_gcfg.gossip_scheduling_group, [this] { run(); });
|
_scheduled_gossip_task.set_callback(_gcfg.gossip_scheduling_group, [this] {
|
||||||
|
run();
|
||||||
|
});
|
||||||
// Register this instance with JMX
|
// Register this instance with JMX
|
||||||
namespace sm = seastar::metrics;
|
namespace sm = seastar::metrics;
|
||||||
auto ep = my_host_id();
|
auto ep = my_host_id();
|
||||||
_metrics.add_group("gossip", {
|
_metrics.add_group("gossip", {
|
||||||
sm::make_counter("heart_beat",
|
sm::make_counter(
|
||||||
|
"heart_beat",
|
||||||
[ep, this] {
|
[ep, this] {
|
||||||
auto es = get_endpoint_state_ptr(ep);
|
auto es = get_endpoint_state_ptr(ep);
|
||||||
if (es) {
|
if (es) {
|
||||||
@@ -111,15 +114,20 @@ gossiper::gossiper(abort_source& as, const locator::shared_token_metadata& stm,
|
|||||||
} else {
|
} else {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}, sm::description("Heartbeat of the current Node."))(basic_level),
|
},
|
||||||
sm::make_gauge("live",
|
sm::description("Heartbeat of the current Node."))(basic_level),
|
||||||
|
sm::make_gauge(
|
||||||
|
"live",
|
||||||
[this] {
|
[this] {
|
||||||
return _live_endpoints.size();
|
return _live_endpoints.size();
|
||||||
}, sm::description("How many live nodes the current node sees"))(basic_level),
|
},
|
||||||
sm::make_gauge("unreachable",
|
sm::description("How many live nodes the current node sees"))(basic_level),
|
||||||
|
sm::make_gauge(
|
||||||
|
"unreachable",
|
||||||
[this] {
|
[this] {
|
||||||
return _unreachable_endpoints.size();
|
return _unreachable_endpoints.size();
|
||||||
}, sm::description("How many unreachable nodes the current node sees"))(basic_level),
|
},
|
||||||
|
sm::description("How many unreachable nodes the current node sees"))(basic_level),
|
||||||
});
|
});
|
||||||
|
|
||||||
// Add myself to the map on start
|
// Add myself to the map on start
|
||||||
@@ -171,11 +179,10 @@ void gossiper::do_sort(utils::chunked_vector<gossip_digest>& g_digest_list) cons
|
|||||||
// Depends on
|
// Depends on
|
||||||
// - no external dependency
|
// - no external dependency
|
||||||
future<> gossiper::handle_syn_msg(locator::host_id from, gossip_digest_syn syn_msg) {
|
future<> gossiper::handle_syn_msg(locator::host_id from, gossip_digest_syn syn_msg) {
|
||||||
logger.trace(
|
logger.trace("handle_syn_msg():from={},cluster_name:peer={},local={},group0_id:peer={},local={},"
|
||||||
"handle_syn_msg():from={},cluster_name:peer={},local={},group0_id:peer={},local={},"
|
|
||||||
"recovery_leader:peer={},local={},partitioner_name:peer={},local={}",
|
"recovery_leader:peer={},local={},partitioner_name:peer={},local={}",
|
||||||
from, syn_msg.cluster_id(), get_cluster_name(), syn_msg.group0_id(), get_group0_id(),
|
from, syn_msg.cluster_id(), get_cluster_name(), syn_msg.group0_id(), get_group0_id(), syn_msg.recovery_leader(), get_recovery_leader(),
|
||||||
syn_msg.recovery_leader(), get_recovery_leader(), syn_msg.partioner(), get_partitioner_name());
|
syn_msg.partioner(), get_partitioner_name());
|
||||||
if (!is_enabled()) {
|
if (!is_enabled()) {
|
||||||
co_return;
|
co_return;
|
||||||
}
|
}
|
||||||
@@ -190,8 +197,7 @@ future<> gossiper::handle_syn_msg(locator::host_id from, gossip_digest_syn syn_m
|
|||||||
// Throw away the message and signal that something is wrong.
|
// Throw away the message and signal that something is wrong.
|
||||||
bool both_nodes_in_recovery = syn_msg.recovery_leader() && get_recovery_leader();
|
bool both_nodes_in_recovery = syn_msg.recovery_leader() && get_recovery_leader();
|
||||||
if (both_nodes_in_recovery && syn_msg.recovery_leader() != get_recovery_leader()) {
|
if (both_nodes_in_recovery && syn_msg.recovery_leader() != get_recovery_leader()) {
|
||||||
logger.warn("Recovery leader mismatch from {} {} != {},",
|
logger.warn("Recovery leader mismatch from {} {} != {},", from, syn_msg.recovery_leader(), get_recovery_leader());
|
||||||
from, syn_msg.recovery_leader(), get_recovery_leader());
|
|
||||||
co_return;
|
co_return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -231,16 +237,14 @@ future<> gossiper::handle_syn_msg(locator::host_id from, gossip_digest_syn syn_m
|
|||||||
syn_msg_pending& p = _syn_handlers[from];
|
syn_msg_pending& p = _syn_handlers[from];
|
||||||
if (p.syn_msg) {
|
if (p.syn_msg) {
|
||||||
// Process pending gossip syn msg and send ack msg back
|
// Process pending gossip syn msg and send ack msg back
|
||||||
logger.debug("Handle queued gossip syn msg from node {}, syn_msg={}, pending={}",
|
logger.debug("Handle queued gossip syn msg from node {}, syn_msg={}, pending={}", from, p.syn_msg, p.pending);
|
||||||
from, p.syn_msg, p.pending);
|
|
||||||
syn_msg = std::move(p.syn_msg.value());
|
syn_msg = std::move(p.syn_msg.value());
|
||||||
p.syn_msg = {};
|
p.syn_msg = {};
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
// No more pending syn msg to process
|
// No more pending syn msg to process
|
||||||
p.pending = false;
|
p.pending = false;
|
||||||
logger.debug("No more queued gossip syn msg from node {}, syn_msg={}, pending={}",
|
logger.debug("No more queued gossip syn msg from node {}, syn_msg={}, pending={}", from, p.syn_msg, p.pending);
|
||||||
from, p.syn_msg, p.pending);
|
|
||||||
co_return;
|
co_return;
|
||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
@@ -288,8 +292,7 @@ static bool should_count_as_msg_processing(const std::map<inet_address, endpoint
|
|||||||
|
|
||||||
if (is_critical_state(app_state)) {
|
if (is_critical_state(app_state)) {
|
||||||
count_as_msg_processing = true;
|
count_as_msg_processing = true;
|
||||||
logger.debug("node={}, app_state={}, count_as_msg_processing={}",
|
logger.debug("node={}, app_state={}, count_as_msg_processing={}", x.first, app_state, count_as_msg_processing);
|
||||||
x.first, app_state, count_as_msg_processing);
|
|
||||||
return count_as_msg_processing;
|
return count_as_msg_processing;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -352,16 +355,14 @@ future<> gossiper::handle_ack_msg(locator::host_id id, gossip_digest_ack ack_msg
|
|||||||
ack_msg_pending& p = _ack_handlers[from];
|
ack_msg_pending& p = _ack_handlers[from];
|
||||||
if (p.ack_msg_digest) {
|
if (p.ack_msg_digest) {
|
||||||
// Process pending gossip ack msg digests and send ack2 msg back
|
// Process pending gossip ack msg digests and send ack2 msg back
|
||||||
logger.debug("Handle queued gossip ack msg digests from node {}, ack_msg_digest={}, pending={}",
|
logger.debug("Handle queued gossip ack msg digests from node {}, ack_msg_digest={}, pending={}", from, p.ack_msg_digest, p.pending);
|
||||||
from, p.ack_msg_digest, p.pending);
|
|
||||||
ack_msg_digest = std::move(p.ack_msg_digest.value());
|
ack_msg_digest = std::move(p.ack_msg_digest.value());
|
||||||
p.ack_msg_digest = {};
|
p.ack_msg_digest = {};
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
// No more pending ack msg digests to process
|
// No more pending ack msg digests to process
|
||||||
p.pending = false;
|
p.pending = false;
|
||||||
logger.debug("No more queued gossip ack msg digests from node {}, ack_msg_digest={}, pending={}",
|
logger.debug("No more queued gossip ack msg digests from node {}, ack_msg_digest={}, pending={}", from, p.ack_msg_digest, p.pending);
|
||||||
from, p.ack_msg_digest, p.pending);
|
|
||||||
co_return;
|
co_return;
|
||||||
}
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
@@ -391,18 +392,22 @@ future<> gossiper::do_send_ack2_msg(locator::host_id from, utils::chunked_vector
|
|||||||
// current node sent an initial SYN. Comparing versions across
|
// current node sent an initial SYN. Comparing versions across
|
||||||
// different generations in get_state_for_version_bigger_than
|
// different generations in get_state_for_version_bigger_than
|
||||||
// could result in losing some app states with smaller versions.
|
// could result in losing some app states with smaller versions.
|
||||||
const auto version = es->get_heart_beat_state().get_generation() > g_digest.get_generation()
|
const auto version = es->get_heart_beat_state().get_generation() > g_digest.get_generation() ? version_type(0) : g_digest.get_max_version();
|
||||||
? version_type(0)
|
|
||||||
: g_digest.get_max_version();
|
|
||||||
auto local_ep_state_ptr = get_state_for_version_bigger_than(id, version);
|
auto local_ep_state_ptr = get_state_for_version_bigger_than(id, version);
|
||||||
if (local_ep_state_ptr) {
|
if (local_ep_state_ptr) {
|
||||||
delta_ep_state_map.emplace(addr, *local_ep_state_ptr);
|
delta_ep_state_map.emplace(addr, *local_ep_state_ptr);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
gms::gossip_digest_ack2 ack2_msg(std::move(delta_ep_state_map));
|
gms::gossip_digest_ack2 ack2_msg(std::move(delta_ep_state_map));
|
||||||
logger.debug("Calling do_send_ack2_msg to node {}, ack_msg_digest={}, ack2_msg={}", from, ack_msg_digest, ack2_msg);
|
sstring ack2_msg_str;
|
||||||
|
if (logger.is_enabled(logging::log_level::debug)) {
|
||||||
|
ack2_msg_str = fmt::format("{}", ack2_msg);
|
||||||
|
logger.debug("Calling do_send_ack2_msg to node {}, ack_msg_digest={}, ack2_msg={}", from, ack_msg_digest, ack2_msg_str);
|
||||||
|
}
|
||||||
co_await ser::gossip_rpc_verbs::send_gossip_digest_ack2(&_messaging, from, std::move(ack2_msg));
|
co_await ser::gossip_rpc_verbs::send_gossip_digest_ack2(&_messaging, from, std::move(ack2_msg));
|
||||||
logger.debug("finished do_send_ack2_msg to node {}, ack_msg_digest={}, ack2_msg={}", from, ack_msg_digest, ack2_msg);
|
if (logger.is_enabled(logging::log_level::debug)) {
|
||||||
|
logger.debug("finished do_send_ack2_msg to node {}, ack_msg_digest={}, ack2_msg={}", from, ack_msg_digest, ack2_msg_str);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Depends on
|
// Depends on
|
||||||
@@ -434,7 +439,8 @@ future<> gossiper::handle_ack2_msg(locator::host_id from, gossip_digest_ack2 msg
|
|||||||
co_await apply_state_locally(std::move(remote_ep_state_map));
|
co_await apply_state_locally(std::move(remote_ep_state_map));
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> gossiper::handle_echo_msg(locator::host_id from_hid, seastar::rpc::opt_time_point timeout, std::optional<int64_t> generation_number_opt, bool notify_up) {
|
future<> gossiper::handle_echo_msg(
|
||||||
|
locator::host_id from_hid, seastar::rpc::opt_time_point timeout, std::optional<int64_t> generation_number_opt, bool notify_up) {
|
||||||
bool respond = true;
|
bool respond = true;
|
||||||
if (!_advertise_to_nodes.empty()) {
|
if (!_advertise_to_nodes.empty()) {
|
||||||
auto it = _advertise_to_nodes.find(from_hid);
|
auto it = _advertise_to_nodes.find(from_hid);
|
||||||
@@ -444,11 +450,11 @@ future<> gossiper::handle_echo_msg(locator::host_id from_hid, seastar::rpc::opt_
|
|||||||
auto es = get_endpoint_state_ptr(from_hid);
|
auto es = get_endpoint_state_ptr(from_hid);
|
||||||
if (es) {
|
if (es) {
|
||||||
auto saved_generation_number = it->second;
|
auto saved_generation_number = it->second;
|
||||||
auto current_generation_number = generation_number_opt ?
|
auto current_generation_number =
|
||||||
generation_type(generation_number_opt.value()) : es->get_heart_beat_state().get_generation();
|
generation_number_opt ? generation_type(generation_number_opt.value()) : es->get_heart_beat_state().get_generation();
|
||||||
respond = saved_generation_number == current_generation_number;
|
respond = saved_generation_number == current_generation_number;
|
||||||
logger.debug("handle_echo_msg: from={}, saved_generation_number={}, current_generation_number={}",
|
logger.debug("handle_echo_msg: from={}, saved_generation_number={}, current_generation_number={}", from_hid, saved_generation_number,
|
||||||
from_hid, saved_generation_number, current_generation_number);
|
current_generation_number);
|
||||||
} else {
|
} else {
|
||||||
respond = false;
|
respond = false;
|
||||||
}
|
}
|
||||||
@@ -490,24 +496,22 @@ future<> gossiper::handle_shutdown_msg(locator::host_id from, std::optional<int6
|
|||||||
auto es = get_endpoint_state_ptr(from);
|
auto es = get_endpoint_state_ptr(from);
|
||||||
if (es) {
|
if (es) {
|
||||||
auto local_generation = es->get_heart_beat_state().get_generation();
|
auto local_generation = es->get_heart_beat_state().get_generation();
|
||||||
logger.info("Got shutdown message from {}, received_generation={}, local_generation={}",
|
logger.info("Got shutdown message from {}, received_generation={}, local_generation={}", from, generation_number_opt.value(), local_generation);
|
||||||
from, generation_number_opt.value(), local_generation);
|
|
||||||
if (local_generation.value() != generation_number_opt.value()) {
|
if (local_generation.value() != generation_number_opt.value()) {
|
||||||
logger.warn("Ignoring shutdown message from {} because generation number does not match, received_generation={}, local_generation={}",
|
logger.warn("Ignoring shutdown message from {} because generation number does not match, received_generation={}, local_generation={}", from,
|
||||||
from, generation_number_opt.value(), local_generation);
|
generation_number_opt.value(), local_generation);
|
||||||
co_return;
|
co_return;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.warn("Ignoring shutdown message from {} because generation number does not match, received_generation={}, local_generation=not found",
|
logger.warn("Ignoring shutdown message from {} because generation number does not match, received_generation={}, local_generation=not found", from,
|
||||||
from, generation_number_opt.value());
|
generation_number_opt.value());
|
||||||
co_return;
|
co_return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
co_await mark_as_shutdown(from, permit.id());
|
co_await mark_as_shutdown(from, permit.id());
|
||||||
}
|
}
|
||||||
|
|
||||||
future<gossip_get_endpoint_states_response>
|
future<gossip_get_endpoint_states_response> gossiper::handle_get_endpoint_states_msg(gossip_get_endpoint_states_request request) {
|
||||||
gossiper::handle_get_endpoint_states_msg(gossip_get_endpoint_states_request request) {
|
|
||||||
std::unordered_map<gms::inet_address, gms::endpoint_state> map;
|
std::unordered_map<gms::inet_address, gms::endpoint_state> map;
|
||||||
const auto& application_states_wanted = request.application_states;
|
const auto& application_states_wanted = request.application_states;
|
||||||
for (const auto& [node, state] : _endpoint_state_map) {
|
for (const auto& [node, state] : _endpoint_state_map) {
|
||||||
@@ -558,17 +562,20 @@ void gossiper::init_messaging_service_handler() {
|
|||||||
return gossiper.handle_ack2_msg(from, std::move(msg));
|
return gossiper.handle_ack2_msg(from, std::move(msg));
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
ser::gossip_rpc_verbs::register_gossip_echo(&_messaging, [this] (const rpc::client_info& cinfo, seastar::rpc::opt_time_point timeout, rpc::optional<int64_t> generation_number_opt, rpc::optional<bool> notify_up_opt) {
|
ser::gossip_rpc_verbs::register_gossip_echo(&_messaging, [this](const rpc::client_info& cinfo, seastar::rpc::opt_time_point timeout,
|
||||||
|
rpc::optional<int64_t> generation_number_opt, rpc::optional<bool> notify_up_opt) {
|
||||||
auto from_hid = cinfo.retrieve_auxiliary<locator::host_id>("host_id");
|
auto from_hid = cinfo.retrieve_auxiliary<locator::host_id>("host_id");
|
||||||
return handle_echo_msg(from_hid, timeout, generation_number_opt, notify_up_opt.value_or(false));
|
return handle_echo_msg(from_hid, timeout, generation_number_opt, notify_up_opt.value_or(false));
|
||||||
});
|
});
|
||||||
ser::gossip_rpc_verbs::register_gossip_shutdown(&_messaging, [this] (const rpc::client_info& cinfo, inet_address from, rpc::optional<int64_t> generation_number_opt) {
|
ser::gossip_rpc_verbs::register_gossip_shutdown(
|
||||||
|
&_messaging, [this](const rpc::client_info& cinfo, inet_address from, rpc::optional<int64_t> generation_number_opt) {
|
||||||
auto from_hid = cinfo.retrieve_auxiliary<locator::host_id>("host_id");
|
auto from_hid = cinfo.retrieve_auxiliary<locator::host_id>("host_id");
|
||||||
return background_msg("GOSSIP_SHUTDOWN", [from_hid, generation_number_opt](gms::gossiper& gossiper) {
|
return background_msg("GOSSIP_SHUTDOWN", [from_hid, generation_number_opt](gms::gossiper& gossiper) {
|
||||||
return gossiper.handle_shutdown_msg(from_hid, generation_number_opt);
|
return gossiper.handle_shutdown_msg(from_hid, generation_number_opt);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
ser::gossip_rpc_verbs::register_gossip_get_endpoint_states(&_messaging, [this] (const rpc::client_info& cinfo, rpc::opt_time_point, gossip_get_endpoint_states_request request) {
|
ser::gossip_rpc_verbs::register_gossip_get_endpoint_states(
|
||||||
|
&_messaging, [this](const rpc::client_info& cinfo, rpc::opt_time_point, gossip_get_endpoint_states_request request) {
|
||||||
return container().invoke_on(0, [request = std::move(request)](gms::gossiper& gossiper) mutable {
|
return container().invoke_on(0, [request = std::move(request)](gms::gossiper& gossiper) mutable {
|
||||||
return gossiper.handle_get_endpoint_states_msg(std::move(request));
|
return gossiper.handle_get_endpoint_states_msg(std::move(request));
|
||||||
});
|
});
|
||||||
@@ -630,8 +637,8 @@ future<> gossiper::do_apply_state_locally(locator::host_id node, endpoint_state
|
|||||||
logger.trace("{} local generation {}, remote generation {}", node, local_generation, remote_generation);
|
logger.trace("{} local generation {}, remote generation {}", node, local_generation, remote_generation);
|
||||||
if (remote_generation > generation_type(get_generation_number().value() + MAX_GENERATION_DIFFERENCE)) {
|
if (remote_generation > generation_type(get_generation_number().value() + MAX_GENERATION_DIFFERENCE)) {
|
||||||
// assume some peer has corrupted memory and is broadcasting an unbelievable generation about another peer (or itself)
|
// assume some peer has corrupted memory and is broadcasting an unbelievable generation about another peer (or itself)
|
||||||
logger.warn("received an invalid gossip generation for peer {}; local generation = {}, received generation = {}",
|
logger.warn("received an invalid gossip generation for peer {}; local generation = {}, received generation = {}", node, local_generation,
|
||||||
node, local_generation, remote_generation);
|
remote_generation);
|
||||||
} else if (remote_generation > local_generation) {
|
} else if (remote_generation > local_generation) {
|
||||||
logger.trace("Updating heartbeat state generation to {} from {} for {} (notify={})", remote_generation, local_generation, node, !shadow_round);
|
logger.trace("Updating heartbeat state generation to {} from {} for {} (notify={})", remote_generation, local_generation, node, !shadow_round);
|
||||||
// major state change will handle the update by inserting the remote state directly
|
// major state change will handle the update by inserting the remote state directly
|
||||||
@@ -672,7 +679,9 @@ future<> gossiper::apply_state_locally(std::map<inet_address, endpoint_state> ma
|
|||||||
auto start = std::chrono::steady_clock::now();
|
auto start = std::chrono::steady_clock::now();
|
||||||
auto endpoints = map | std::views::keys | std::ranges::to<utils::chunked_vector<inet_address>>();
|
auto endpoints = map | std::views::keys | std::ranges::to<utils::chunked_vector<inet_address>>();
|
||||||
std::shuffle(endpoints.begin(), endpoints.end(), _random_engine);
|
std::shuffle(endpoints.begin(), endpoints.end(), _random_engine);
|
||||||
auto node_is_seed = [this] (gms::inet_address ip) { return is_seed(ip); };
|
auto node_is_seed = [this](gms::inet_address ip) {
|
||||||
|
return is_seed(ip);
|
||||||
|
};
|
||||||
boost::partition(endpoints, node_is_seed);
|
boost::partition(endpoints, node_is_seed);
|
||||||
logger.debug("apply_state_locally_endpoints={}", endpoints);
|
logger.debug("apply_state_locally_endpoints={}", endpoints);
|
||||||
|
|
||||||
@@ -700,8 +709,7 @@ future<> gossiper::apply_state_locally(std::map<inet_address, endpoint_state> ma
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
logger.debug("apply_state_locally() took {} ms", std::chrono::duration_cast<std::chrono::milliseconds>(
|
logger.debug("apply_state_locally() took {} ms", std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start).count());
|
||||||
std::chrono::steady_clock::now() - start).count());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> gossiper::force_remove_endpoint(locator::host_id id, permit_id pid) {
|
future<> gossiper::force_remove_endpoint(locator::host_id id, permit_id pid) {
|
||||||
@@ -779,8 +787,7 @@ gossiper::endpoint_permit::endpoint_permit(endpoint_locks_map::entry_ptr&& ptr,
|
|||||||
: _ptr(std::move(ptr))
|
: _ptr(std::move(ptr))
|
||||||
, _permit_id(_ptr->pid)
|
, _permit_id(_ptr->pid)
|
||||||
, _addr(std::move(addr))
|
, _addr(std::move(addr))
|
||||||
, _caller(std::move(caller))
|
, _caller(std::move(caller)) {
|
||||||
{
|
|
||||||
++_ptr->holders;
|
++_ptr->holders;
|
||||||
if (!_ptr->first_holder) {
|
if (!_ptr->first_holder) {
|
||||||
_ptr->first_holder = _caller;
|
_ptr->first_holder = _caller;
|
||||||
@@ -793,8 +800,8 @@ gossiper::endpoint_permit::endpoint_permit(endpoint_permit&& o) noexcept
|
|||||||
: _ptr(std::exchange(o._ptr, nullptr))
|
: _ptr(std::exchange(o._ptr, nullptr))
|
||||||
, _permit_id(std::exchange(o._permit_id, null_permit_id))
|
, _permit_id(std::exchange(o._permit_id, null_permit_id))
|
||||||
, _addr(std::exchange(o._addr, locator::host_id{}))
|
, _addr(std::exchange(o._addr, locator::host_id{}))
|
||||||
, _caller(std::move(o._caller))
|
, _caller(std::move(o._caller)) {
|
||||||
{}
|
}
|
||||||
|
|
||||||
gossiper::endpoint_permit::~endpoint_permit() {
|
gossiper::endpoint_permit::~endpoint_permit() {
|
||||||
release();
|
release();
|
||||||
@@ -818,18 +825,21 @@ bool gossiper::endpoint_permit::release() noexcept {
|
|||||||
|
|
||||||
gossiper::endpoint_lock_entry::endpoint_lock_entry() noexcept
|
gossiper::endpoint_lock_entry::endpoint_lock_entry() noexcept
|
||||||
: sem(1)
|
: sem(1)
|
||||||
, pid(permit_id::create_null_id())
|
, pid(permit_id::create_null_id()) {
|
||||||
{}
|
}
|
||||||
|
|
||||||
future<gossiper::endpoint_permit> gossiper::lock_endpoint(locator::host_id ep, permit_id pid, std::source_location l) {
|
future<gossiper::endpoint_permit> gossiper::lock_endpoint(locator::host_id ep, permit_id pid, std::source_location l) {
|
||||||
if (current_scheduling_group() != _gcfg.gossip_scheduling_group) {
|
if (current_scheduling_group() != _gcfg.gossip_scheduling_group) {
|
||||||
logger.warn("Incorrect scheduling group used for gossiper::lock_endpoint: {}, should be {}, backtrace {}", current_scheduling_group().name(), _gcfg.gossip_scheduling_group.name(), current_backtrace());
|
logger.warn("Incorrect scheduling group used for gossiper::lock_endpoint: {}, should be {}, backtrace {}", current_scheduling_group().name(),
|
||||||
|
_gcfg.gossip_scheduling_group.name(), current_backtrace());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (this_shard_id() != 0) {
|
if (this_shard_id() != 0) {
|
||||||
on_internal_error(logger, "lock_endpoint must be called on shard 0");
|
on_internal_error(logger, "lock_endpoint must be called on shard 0");
|
||||||
}
|
}
|
||||||
auto eptr = co_await _endpoint_locks.get_or_load(ep, [] (const locator::host_id& ep) { return endpoint_lock_entry(); });
|
auto eptr = co_await _endpoint_locks.get_or_load(ep, [](const locator::host_id& ep) {
|
||||||
|
return endpoint_lock_entry();
|
||||||
|
});
|
||||||
if (pid) {
|
if (pid) {
|
||||||
if (eptr->pid == pid) {
|
if (eptr->pid == pid) {
|
||||||
// Already locked with the same permit
|
// Already locked with the same permit
|
||||||
@@ -837,7 +847,8 @@ future<gossiper::endpoint_permit> gossiper::lock_endpoint(locator::host_id ep, p
|
|||||||
} else {
|
} else {
|
||||||
// permit_id mismatch means either that the endpoint lock was released,
|
// permit_id mismatch means either that the endpoint lock was released,
|
||||||
// or maybe we're passed a permit_id that was acquired for a different endpoint.
|
// or maybe we're passed a permit_id that was acquired for a different endpoint.
|
||||||
on_internal_error_noexcept(logger, fmt::format("{}: lock_endpoint {}: permit_id={}: endpoint_lock_entry has mismatching permit_id={}", l.function_name(), ep, pid, eptr->pid));
|
on_internal_error_noexcept(logger,
|
||||||
|
fmt::format("{}: lock_endpoint {}: permit_id={}: endpoint_lock_entry has mismatching permit_id={}", l.function_name(), ep, pid, eptr->pid));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pid = permit_id::create_random_id();
|
pid = permit_id::create_random_id();
|
||||||
@@ -869,15 +880,15 @@ future<gossiper::endpoint_permit> gossiper::lock_endpoint(locator::host_id ep, p
|
|||||||
}
|
}
|
||||||
return fmt_loc(*l);
|
return fmt_loc(*l);
|
||||||
};
|
};
|
||||||
logger.error(
|
logger.error("{}: waiting for endpoint lock (ep={}) took more than {}, signifying possible deadlock;"
|
||||||
"{}: waiting for endpoint lock (ep={}) took more than {}, signifying possible deadlock;"
|
|
||||||
" holders: {}, first holder: {}, last holder (might not be current): {}",
|
" holders: {}, first holder: {}, last holder (might not be current): {}",
|
||||||
fmt_loc(l), ep, duration, eptr->holders, fmt_loc_opt(eptr->first_holder), fmt_loc_opt(eptr->last_holder));
|
fmt_loc(l), ep, duration, eptr->holders, fmt_loc_opt(eptr->first_holder), fmt_loc_opt(eptr->last_holder));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
eptr->pid = pid;
|
eptr->pid = pid;
|
||||||
if (eptr->holders) {
|
if (eptr->holders) {
|
||||||
on_internal_error_noexcept(logger, fmt::format("{}: lock_endpoint {}: newly held endpoint_lock_entry has {} holders", l.function_name(), ep, eptr->holders));
|
on_internal_error_noexcept(
|
||||||
|
logger, fmt::format("{}: lock_endpoint {}: newly held endpoint_lock_entry has {} holders", l.function_name(), ep, eptr->holders));
|
||||||
}
|
}
|
||||||
_abort_source.check();
|
_abort_source.check();
|
||||||
co_return endpoint_permit(std::move(eptr), std::move(ep), std::move(l));
|
co_return endpoint_permit(std::move(eptr), std::move(ep), std::move(l));
|
||||||
@@ -911,7 +922,9 @@ future<std::set<inet_address>> gossiper::get_live_members_synchronized() {
|
|||||||
return container().invoke_on(0, [](gms::gossiper& g) -> future<std::set<inet_address>> {
|
return container().invoke_on(0, [](gms::gossiper& g) -> future<std::set<inet_address>> {
|
||||||
// Make sure the value we return is synchronized on all shards
|
// Make sure the value we return is synchronized on all shards
|
||||||
auto lock = co_await g.lock_endpoint_update_semaphore();
|
auto lock = co_await g.lock_endpoint_update_semaphore();
|
||||||
co_return g.get_live_members() | std::views::transform([&g] (auto id) { return g._address_map.get(id); }) | std::ranges::to<std::set>();
|
co_return g.get_live_members() | std::views::transform([&g](auto id) {
|
||||||
|
return g._address_map.get(id);
|
||||||
|
}) | std::ranges::to<std::set>();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -919,12 +932,15 @@ future<std::set<inet_address>> gossiper::get_unreachable_members_synchronized()
|
|||||||
return container().invoke_on(0, [](gms::gossiper& g) -> future<std::set<inet_address>> {
|
return container().invoke_on(0, [](gms::gossiper& g) -> future<std::set<inet_address>> {
|
||||||
// Make sure the value we return is synchronized on all shards
|
// Make sure the value we return is synchronized on all shards
|
||||||
auto lock = co_await g.lock_endpoint_update_semaphore();
|
auto lock = co_await g.lock_endpoint_update_semaphore();
|
||||||
co_return g.get_unreachable_members() | std::views::transform([&g] (auto id) { return g._address_map.get(id); }) | std::ranges::to<std::set>();
|
co_return g.get_unreachable_members() | std::views::transform([&g](auto id) {
|
||||||
|
return g._address_map.get(id);
|
||||||
|
}) | std::ranges::to<std::set>();
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> gossiper::send_echo(locator::host_id host_id, std::chrono::milliseconds timeout_ms, int64_t generation_number, bool notify_up) {
|
future<> gossiper::send_echo(locator::host_id host_id, std::chrono::milliseconds timeout_ms, int64_t generation_number, bool notify_up) {
|
||||||
return ser::gossip_rpc_verbs::send_gossip_echo(&_messaging, host_id, netw::messaging_service::clock_type::now() + timeout_ms, _abort_source, generation_number, notify_up);
|
return ser::gossip_rpc_verbs::send_gossip_echo(
|
||||||
|
&_messaging, host_id, netw::messaging_service::clock_type::now() + timeout_ms, _abort_source, generation_number, notify_up);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> gossiper::failure_detector_loop_sleep(std::chrono::seconds duration) {
|
future<> gossiper::failure_detector_loop_sleep(std::chrono::seconds duration) {
|
||||||
@@ -1008,15 +1024,14 @@ future<> gossiper::failure_detector_loop() {
|
|||||||
co_await coroutine::parallel_for_each(std::views::iota(0u, nodes.size()), [this, generation_number, live_endpoints_version, &nodes](size_t idx) {
|
co_await coroutine::parallel_for_each(std::views::iota(0u, nodes.size()), [this, generation_number, live_endpoints_version, &nodes](size_t idx) {
|
||||||
const auto& node = nodes[idx];
|
const auto& node = nodes[idx];
|
||||||
auto shard = idx % smp::count;
|
auto shard = idx % smp::count;
|
||||||
logger.debug("failure_detector_loop: Started new round for node={} on shard={}, live_nodes={}, live_endpoints_version={}",
|
logger.debug("failure_detector_loop: Started new round for node={} on shard={}, live_nodes={}, live_endpoints_version={}", node, shard, nodes,
|
||||||
node, shard, nodes, live_endpoints_version);
|
live_endpoints_version);
|
||||||
return container().invoke_on(shard, [node, generation_number, live_endpoints_version](gms::gossiper& g) {
|
return container().invoke_on(shard, [node, generation_number, live_endpoints_version](gms::gossiper& g) {
|
||||||
return g.failure_detector_loop_for_node(node, generation_number, live_endpoints_version);
|
return g.failure_detector_loop_for_node(node, generation_number, live_endpoints_version);
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
logger.warn("failure_detector_loop: Got error in the loop, live_nodes={}: {}",
|
logger.warn("failure_detector_loop: Got error in the loop, live_nodes={}: {}", _live_endpoints, std::current_exception());
|
||||||
_live_endpoints, std::current_exception());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
logger.info("failure_detector_loop: Finished main loop");
|
logger.info("failure_detector_loop: Finished main loop");
|
||||||
@@ -1054,8 +1069,8 @@ future<> gossiper::replicate_live_endpoints_on_change(foreign_ptr<std::unique_pt
|
|||||||
// Apply copies on each other shard
|
// Apply copies on each other shard
|
||||||
co_await container().invoke_on_all([&](gossiper& local_gossiper) noexcept {
|
co_await container().invoke_on_all([&](gossiper& local_gossiper) noexcept {
|
||||||
if (local_gossiper._live_endpoints_version >= new_version) {
|
if (local_gossiper._live_endpoints_version >= new_version) {
|
||||||
on_fatal_internal_error(logger, fmt::format("shard already has unexpected live_endpoints_version {} > {}",
|
on_fatal_internal_error(
|
||||||
local_gossiper._live_endpoints_version, new_version));
|
logger, fmt::format("shard already has unexpected live_endpoints_version {} > {}", local_gossiper._live_endpoints_version, new_version));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto data = per_shard_data[this_shard_id()].release();
|
auto data = per_shard_data[this_shard_id()].release();
|
||||||
@@ -1089,8 +1104,7 @@ void gossiper::run() {
|
|||||||
utils::chunked_vector<gossip_digest> g_digests = make_random_gossip_digest();
|
utils::chunked_vector<gossip_digest> g_digests = make_random_gossip_digest();
|
||||||
|
|
||||||
if (g_digests.size() > 0) {
|
if (g_digests.size() > 0) {
|
||||||
gossip_digest_syn message(
|
gossip_digest_syn message(get_cluster_name(), get_partitioner_name(), g_digests, get_group0_id(), get_recovery_leader());
|
||||||
get_cluster_name(), get_partitioner_name(), g_digests, get_group0_id(), get_recovery_leader());
|
|
||||||
|
|
||||||
if (_endpoints_to_talk_with.empty() && !_live_endpoints.empty()) {
|
if (_endpoints_to_talk_with.empty() && !_live_endpoints.empty()) {
|
||||||
auto live_endpoints = _live_endpoints | std::ranges::to<std::vector>();
|
auto live_endpoints = _live_endpoints | std::ranges::to<std::vector>();
|
||||||
@@ -1102,8 +1116,8 @@ void gossiper::run() {
|
|||||||
constexpr size_t nr_rounds = 10;
|
constexpr size_t nr_rounds = 10;
|
||||||
size_t nodes_per_round = (live_endpoints.size() + nr_rounds - 1) / nr_rounds;
|
size_t nodes_per_round = (live_endpoints.size() + nr_rounds - 1) / nr_rounds;
|
||||||
_endpoints_to_talk_with = live_endpoints | std::views::chunk(nodes_per_round) | std::ranges::to<std::list<std::vector<locator::host_id>>>();
|
_endpoints_to_talk_with = live_endpoints | std::views::chunk(nodes_per_round) | std::ranges::to<std::list<std::vector<locator::host_id>>>();
|
||||||
logger.debug("Set live nodes to talk: endpoint_state_map={}, all_live_nodes={}, endpoints_to_talk_with={}",
|
logger.debug("Set live nodes to talk: endpoint_state_map={}, all_live_nodes={}, endpoints_to_talk_with={}", _endpoint_state_map.size(),
|
||||||
_endpoint_state_map.size(), live_endpoints, _endpoints_to_talk_with);
|
live_endpoints, _endpoints_to_talk_with);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!_endpoints_to_talk_with.empty()) {
|
if (!_endpoints_to_talk_with.empty()) {
|
||||||
@@ -1375,8 +1389,7 @@ future<> gossiper::do_gossip_to_unreachable_member(gossip_digest_syn message) {
|
|||||||
addrs.insert(x.first);
|
addrs.insert(x.first);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
logger.trace("do_gossip_to_unreachable_member: live_endpoint nr={} unreachable_endpoints nr={}",
|
logger.trace("do_gossip_to_unreachable_member: live_endpoint nr={} unreachable_endpoints nr={}", live_endpoint_count, unreachable_endpoint_count);
|
||||||
live_endpoint_count, unreachable_endpoint_count);
|
|
||||||
return send_gossip(message, addrs);
|
return send_gossip(message, addrs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1471,14 +1484,18 @@ bool gossiper::is_cql_ready(const locator::host_id& endpoint) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
locator::host_id gossiper::get_host_id(inet_address endpoint) const {
|
locator::host_id gossiper::get_host_id(inet_address endpoint) const {
|
||||||
auto ids = _endpoint_state_map | std::views::values | std::views::filter([endpoint] (const auto& es) { return es->get_ip() == endpoint; });
|
auto ids = _endpoint_state_map | std::views::values | std::views::filter([endpoint](const auto& es) {
|
||||||
|
return es->get_ip() == endpoint;
|
||||||
|
});
|
||||||
|
|
||||||
if (std::ranges::distance(ids) == 0) {
|
if (std::ranges::distance(ids) == 0) {
|
||||||
throw std::runtime_error(format("Could not get host_id for endpoint {}: endpoint state not found", endpoint));
|
throw std::runtime_error(format("Could not get host_id for endpoint {}: endpoint state not found", endpoint));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find an entry with largest generation
|
// Find an entry with largest generation
|
||||||
const auto& es = std::ranges::max(ids, [](const auto& ep1, const auto& ep2) { return ep1->get_heart_beat_state().get_generation() < ep2->get_heart_beat_state().get_generation(); });
|
const auto& es = std::ranges::max(ids, [](const auto& ep1, const auto& ep2) {
|
||||||
|
return ep1->get_heart_beat_state().get_generation() < ep2->get_heart_beat_state().get_generation();
|
||||||
|
});
|
||||||
|
|
||||||
auto host_id = es->get_host_id();
|
auto host_id = es->get_host_id();
|
||||||
if (!host_id) {
|
if (!host_id) {
|
||||||
@@ -1491,7 +1508,8 @@ std::optional<locator::host_id> gossiper::try_get_host_id(inet_address endpoint)
|
|||||||
std::optional<locator::host_id> host_id;
|
std::optional<locator::host_id> host_id;
|
||||||
try {
|
try {
|
||||||
host_id = get_host_id(endpoint);
|
host_id = get_host_id(endpoint);
|
||||||
} catch (std::runtime_error&) {}
|
} catch (std::runtime_error&) {
|
||||||
|
}
|
||||||
return host_id;
|
return host_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1613,10 +1631,12 @@ void gossiper::mark_alive(endpoint_state_ptr node) {
|
|||||||
// Enter the _background_msg gate so stop() would wait on it
|
// Enter the _background_msg gate so stop() would wait on it
|
||||||
auto gh = _background_msg.hold();
|
auto gh = _background_msg.hold();
|
||||||
logger.debug("Sending a EchoMessage to {}/{}, with generation_number={}", id, addr, generation);
|
logger.debug("Sending a EchoMessage to {}/{}, with generation_number={}", id, addr, generation);
|
||||||
(void) send_echo(id, std::chrono::seconds(15), generation.value(), false).then([this, id] {
|
(void)send_echo(id, std::chrono::seconds(15), generation.value(), false)
|
||||||
|
.then([this, id] {
|
||||||
logger.trace("Got EchoMessage Reply");
|
logger.trace("Got EchoMessage Reply");
|
||||||
return real_mark_alive(id);
|
return real_mark_alive(id);
|
||||||
}).handle_exception([addr, gh = std::move(gh), unmark_pending = std::move(unmark_pending), id] (auto ep) {
|
})
|
||||||
|
.handle_exception([addr, gh = std::move(gh), unmark_pending = std::move(unmark_pending), id](auto ep) {
|
||||||
logger.warn("Fail to send EchoMessage to {}/{}: {}", id, addr, ep);
|
logger.warn("Fail to send EchoMessage to {}/{}: {}", id, addr, ep);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -1731,7 +1751,9 @@ future<> gossiper::handle_major_state_change(endpoint_state eps, permit_id pid,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool gossiper::is_dead_state(const endpoint_state& eps) const {
|
bool gossiper::is_dead_state(const endpoint_state& eps) const {
|
||||||
return std::ranges::any_of(DEAD_STATES, [state = get_gossip_status(eps)](const auto& deadstate) { return state == deadstate; });
|
return std::ranges::any_of(DEAD_STATES, [state = get_gossip_status(eps)](const auto& deadstate) {
|
||||||
|
return state == deadstate;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
bool gossiper::is_shutdown(const locator::host_id& endpoint) const {
|
bool gossiper::is_shutdown(const locator::host_id& endpoint) const {
|
||||||
@@ -1747,7 +1769,9 @@ bool gossiper::is_normal(const locator::host_id& endpoint) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool gossiper::is_silent_shutdown_state(const endpoint_state& ep_state) const {
|
bool gossiper::is_silent_shutdown_state(const endpoint_state& ep_state) const {
|
||||||
return std::ranges::any_of(SILENT_SHUTDOWN_STATES, [state = get_gossip_status(ep_state)](const auto& deadstate) { return state == deadstate; });
|
return std::ranges::any_of(SILENT_SHUTDOWN_STATES, [state = get_gossip_status(ep_state)](const auto& deadstate) {
|
||||||
|
return state == deadstate;
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> gossiper::apply_new_states(endpoint_state local_state, const endpoint_state& remote_state, permit_id pid, bool shadow_round) {
|
future<> gossiper::apply_new_states(endpoint_state local_state, const endpoint_state& remote_state, permit_id pid, bool shadow_round) {
|
||||||
@@ -1837,16 +1861,13 @@ future<> gossiper::do_on_dead_notifications(inet_address addr, endpoint_state_pt
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
void gossiper::request_all(gossip_digest& g_digest,
|
void gossiper::request_all(gossip_digest& g_digest, utils::chunked_vector<gossip_digest>& delta_gossip_digest_list, generation_type remote_generation) const {
|
||||||
utils::chunked_vector<gossip_digest>& delta_gossip_digest_list, generation_type remote_generation) const {
|
|
||||||
/* We are here since we have no data for this endpoint locally so request everything. */
|
/* We are here since we have no data for this endpoint locally so request everything. */
|
||||||
delta_gossip_digest_list.emplace_back(g_digest.get_endpoint(), remote_generation);
|
delta_gossip_digest_list.emplace_back(g_digest.get_endpoint(), remote_generation);
|
||||||
logger.trace("request_all for {}", g_digest.get_endpoint());
|
logger.trace("request_all for {}", g_digest.get_endpoint());
|
||||||
}
|
}
|
||||||
|
|
||||||
void gossiper::send_all(gossip_digest& g_digest,
|
void gossiper::send_all(gossip_digest& g_digest, std::map<inet_address, endpoint_state>& delta_ep_state_map, version_type max_remote_version) const {
|
||||||
std::map<inet_address, endpoint_state>& delta_ep_state_map,
|
|
||||||
version_type max_remote_version) const {
|
|
||||||
auto ep = g_digest.get_endpoint();
|
auto ep = g_digest.get_endpoint();
|
||||||
auto id = try_get_host_id(ep);
|
auto id = try_get_host_id(ep);
|
||||||
logger.trace("send_all(): ep={}/{}, version > {}", id, ep, max_remote_version);
|
logger.trace("send_all(): ep={}/{}, version > {}", id, ep, max_remote_version);
|
||||||
@@ -1858,8 +1879,7 @@ void gossiper::send_all(gossip_digest& g_digest,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void gossiper::examine_gossiper(utils::chunked_vector<gossip_digest>& g_digest_list,
|
void gossiper::examine_gossiper(utils::chunked_vector<gossip_digest>& g_digest_list, utils::chunked_vector<gossip_digest>& delta_gossip_digest_list,
|
||||||
utils::chunked_vector<gossip_digest>& delta_gossip_digest_list,
|
|
||||||
std::map<inet_address, endpoint_state>& delta_ep_state_map) const {
|
std::map<inet_address, endpoint_state>& delta_ep_state_map) const {
|
||||||
for (gossip_digest& g_digest : g_digest_list) {
|
for (gossip_digest& g_digest : g_digest_list) {
|
||||||
auto remote_generation = g_digest.get_generation();
|
auto remote_generation = g_digest.get_generation();
|
||||||
@@ -1878,8 +1898,8 @@ void gossiper::examine_gossiper(utils::chunked_vector<gossip_digest>& g_digest_l
|
|||||||
auto local_generation = ep_state_ptr.get_heart_beat_state().get_generation();
|
auto local_generation = ep_state_ptr.get_heart_beat_state().get_generation();
|
||||||
/* get the max version of all keys in the state associated with this endpoint */
|
/* get the max version of all keys in the state associated with this endpoint */
|
||||||
auto max_local_version = get_max_endpoint_state_version(ep_state_ptr);
|
auto max_local_version = get_max_endpoint_state_version(ep_state_ptr);
|
||||||
logger.trace("examine_gossiper(): ep={}, remote={}.{}, local={}.{}", ep,
|
logger.trace(
|
||||||
remote_generation, max_remote_version, local_generation, max_local_version);
|
"examine_gossiper(): ep={}, remote={}.{}, local={}.{}", ep, remote_generation, max_remote_version, local_generation, max_local_version);
|
||||||
if (remote_generation == local_generation && max_remote_version == max_local_version) {
|
if (remote_generation == local_generation && max_remote_version == max_local_version) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -1954,8 +1974,7 @@ future<> gossiper::start_gossiping(gms::generation_type generation_nbr, applicat
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
future<gossiper::generation_for_nodes>
|
future<gossiper::generation_for_nodes> gossiper::get_generation_for_nodes(std::unordered_set<locator::host_id> nodes) const {
|
||||||
gossiper::get_generation_for_nodes(std::unordered_set<locator::host_id> nodes) const {
|
|
||||||
generation_for_nodes ret;
|
generation_for_nodes ret;
|
||||||
for (const auto& node : nodes) {
|
for (const auto& node : nodes) {
|
||||||
auto es = get_endpoint_state_ptr(node);
|
auto es = get_endpoint_state_ptr(node);
|
||||||
@@ -1963,8 +1982,7 @@ gossiper::get_generation_for_nodes(std::unordered_set<locator::host_id> nodes) c
|
|||||||
auto current_generation_number = es->get_heart_beat_state().get_generation();
|
auto current_generation_number = es->get_heart_beat_state().get_generation();
|
||||||
ret.emplace(node, current_generation_number);
|
ret.emplace(node, current_generation_number);
|
||||||
} else {
|
} else {
|
||||||
return make_exception_future<generation_for_nodes>(
|
return make_exception_future<generation_for_nodes>(std::runtime_error(format("Can not find generation number for node={}", node)));
|
||||||
std::runtime_error(format("Can not find generation number for node={}", node)));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return make_ready_future<generation_for_nodes>(std::move(ret));
|
return make_ready_future<generation_for_nodes>(std::move(ret));
|
||||||
@@ -1979,13 +1997,8 @@ future<> gossiper::advertise_to_nodes(generation_for_nodes advertise_to_nodes) {
|
|||||||
future<> gossiper::do_shadow_round(std::unordered_set<gms::inet_address> nodes, mandatory is_mandatory) {
|
future<> gossiper::do_shadow_round(std::unordered_set<gms::inet_address> nodes, mandatory is_mandatory) {
|
||||||
co_await coroutine::switch_to(_gcfg.gossip_scheduling_group);
|
co_await coroutine::switch_to(_gcfg.gossip_scheduling_group);
|
||||||
nodes.erase(get_broadcast_address());
|
nodes.erase(get_broadcast_address());
|
||||||
gossip_get_endpoint_states_request request{{
|
gossip_get_endpoint_states_request request{{gms::application_state::STATUS, gms::application_state::HOST_ID, gms::application_state::DC,
|
||||||
gms::application_state::STATUS,
|
gms::application_state::RACK, gms::application_state::SUPPORTED_FEATURES, gms::application_state::SNITCH_NAME}};
|
||||||
gms::application_state::HOST_ID,
|
|
||||||
gms::application_state::DC,
|
|
||||||
gms::application_state::RACK,
|
|
||||||
gms::application_state::SUPPORTED_FEATURES,
|
|
||||||
gms::application_state::SNITCH_NAME}};
|
|
||||||
logger.info("Gossip shadow round started with nodes={}", nodes);
|
logger.info("Gossip shadow round started with nodes={}", nodes);
|
||||||
std::unordered_set<gms::inet_address> nodes_talked;
|
std::unordered_set<gms::inet_address> nodes_talked;
|
||||||
auto start_time = clk::now();
|
auto start_time = clk::now();
|
||||||
@@ -1996,13 +2009,16 @@ future<> gossiper::do_shadow_round(std::unordered_set<gms::inet_address> nodes,
|
|||||||
co_await coroutine::parallel_for_each(nodes, [this, &request, &responses, &nodes_talked, &nodes_down](gms::inet_address node) -> future<> {
|
co_await coroutine::parallel_for_each(nodes, [this, &request, &responses, &nodes_talked, &nodes_down](gms::inet_address node) -> future<> {
|
||||||
logger.debug("Sent get_endpoint_states request to {}, request={}", node, request.application_states);
|
logger.debug("Sent get_endpoint_states request to {}, request={}", node, request.application_states);
|
||||||
try {
|
try {
|
||||||
auto response = co_await ser::gossip_rpc_verbs::send_gossip_get_endpoint_states(&_messaging, msg_addr(node), netw::messaging_service::clock_type::now() + std::chrono::seconds(5), request);
|
auto response = co_await ser::gossip_rpc_verbs::send_gossip_get_endpoint_states(
|
||||||
|
&_messaging, msg_addr(node), netw::messaging_service::clock_type::now() + std::chrono::seconds(5), request);
|
||||||
|
|
||||||
logger.debug("Got get_endpoint_states response from {}, response={}", node, response.endpoint_state_map);
|
logger.debug("Got get_endpoint_states response from {}, response={}", node, response.endpoint_state_map);
|
||||||
responses.push_back(std::move(response));
|
responses.push_back(std::move(response));
|
||||||
nodes_talked.insert(node);
|
nodes_talked.insert(node);
|
||||||
|
|
||||||
utils::get_local_injector().inject("stop_during_gossip_shadow_round", [] { std::raise(SIGSTOP); });
|
utils::get_local_injector().inject("stop_during_gossip_shadow_round", [] {
|
||||||
|
std::raise(SIGSTOP);
|
||||||
|
});
|
||||||
} catch (seastar::rpc::unknown_verb_error&) {
|
} catch (seastar::rpc::unknown_verb_error&) {
|
||||||
auto err = format("Node {} does not support get_endpoint_states verb", node);
|
auto err = format("Node {} does not support get_endpoint_states verb", node);
|
||||||
logger.error("{}", err);
|
logger.error("{}", err);
|
||||||
@@ -2034,8 +2050,7 @@ future<> gossiper::do_shadow_round(std::unordered_set<gms::inet_address> nodes,
|
|||||||
throw std::runtime_error(fmt::format("Unable to gossip with any nodes={} (ShadowRound).", nodes));
|
throw std::runtime_error(fmt::format("Unable to gossip with any nodes={} (ShadowRound).", nodes));
|
||||||
}
|
}
|
||||||
sleep_abortable(std::chrono::seconds(1), _abort_source).get();
|
sleep_abortable(std::chrono::seconds(1), _abort_source).get();
|
||||||
logger.info("Connect nodes={} again ... ({} seconds passed)",
|
logger.info("Connect nodes={} again ... ({} seconds passed)", nodes, std::chrono::duration_cast<std::chrono::seconds>(clk::now() - start_time).count());
|
||||||
nodes, std::chrono::duration_cast<std::chrono::seconds>(clk::now() - start_time).count());
|
|
||||||
}
|
}
|
||||||
logger.info("Gossip shadow round finished with nodes_talked={}", nodes_talked);
|
logger.info("Gossip shadow round finished with nodes_talked={}", nodes_talked);
|
||||||
}
|
}
|
||||||
@@ -2080,7 +2095,8 @@ future<> gossiper::add_saved_endpoint(locator::host_id host_id, gms::loaded_endp
|
|||||||
auto es = get_endpoint_state_ptr(host_id);
|
auto es = get_endpoint_state_ptr(host_id);
|
||||||
if (es) {
|
if (es) {
|
||||||
if (es->get_heart_beat_state().get_generation()) {
|
if (es->get_heart_beat_state().get_generation()) {
|
||||||
auto msg = fmt::format("Attempted to add saved endpoint {} after endpoint_state was already established with gossip: {}, at {}", ep, es->get_heart_beat_state(), current_backtrace());
|
auto msg = fmt::format("Attempted to add saved endpoint {} after endpoint_state was already established with gossip: {}, at {}", ep,
|
||||||
|
es->get_heart_beat_state(), current_backtrace());
|
||||||
on_internal_error(logger, msg);
|
on_internal_error(logger, msg);
|
||||||
}
|
}
|
||||||
ep_state = *es;
|
ep_state = *es;
|
||||||
@@ -2131,8 +2147,7 @@ future<> gossiper::add_local_application_state(application_state_map states) {
|
|||||||
auto permit = co_await gossiper.lock_endpoint(ep_id, null_permit_id);
|
auto permit = co_await gossiper.lock_endpoint(ep_id, null_permit_id);
|
||||||
auto ep_state_before = gossiper.get_endpoint_state_ptr(ep_id);
|
auto ep_state_before = gossiper.get_endpoint_state_ptr(ep_id);
|
||||||
if (!ep_state_before) {
|
if (!ep_state_before) {
|
||||||
auto err = fmt::format("endpoint_state_map does not contain endpoint = {}, application_states = {}",
|
auto err = fmt::format("endpoint_state_map does not contain endpoint = {}, application_states = {}", ep_addr, states);
|
||||||
ep_addr, states);
|
|
||||||
co_await coroutine::return_exception(std::runtime_error(err));
|
co_await coroutine::return_exception(std::runtime_error(err));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2244,9 +2259,8 @@ bool gossiper::is_enabled() const {
|
|||||||
void gossiper::add_expire_time_for_endpoint(locator::host_id endpoint, clk::time_point expire_time) {
|
void gossiper::add_expire_time_for_endpoint(locator::host_id endpoint, clk::time_point expire_time) {
|
||||||
auto now_ = now();
|
auto now_ = now();
|
||||||
auto diff = std::chrono::duration_cast<std::chrono::seconds>(expire_time - now_).count();
|
auto diff = std::chrono::duration_cast<std::chrono::seconds>(expire_time - now_).count();
|
||||||
logger.info("Node {} will be removed from gossip at [{:%Y-%m-%d %T %z}]: (expire = {}, now = {}, diff = {} seconds)",
|
logger.info("Node {} will be removed from gossip at [{:%Y-%m-%d %T %z}]: (expire = {}, now = {}, diff = {} seconds)", endpoint,
|
||||||
endpoint, fmt::gmtime(clk::to_time_t(expire_time)), expire_time.time_since_epoch().count(),
|
fmt::gmtime(clk::to_time_t(expire_time)), expire_time.time_since_epoch().count(), now_.time_since_epoch().count(), diff);
|
||||||
now_.time_since_epoch().count(), diff);
|
|
||||||
_expire_time_endpoint_map[endpoint] = expire_time;
|
_expire_time_endpoint_map[endpoint] = expire_time;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2280,9 +2294,11 @@ future<> gossiper::wait_alive_helper(noncopyable_function<std::vector<locator::h
|
|||||||
for (const auto& node : nodes) {
|
for (const auto& node : nodes) {
|
||||||
auto es = get_endpoint_state_ptr(node);
|
auto es = get_endpoint_state_ptr(node);
|
||||||
if (es) {
|
if (es) {
|
||||||
size_t nr_alive = co_await container().map_reduce0([node = es->get_host_id()] (gossiper& g) -> size_t {
|
size_t nr_alive = co_await container().map_reduce0(
|
||||||
|
[node = es->get_host_id()](gossiper& g) -> size_t {
|
||||||
return g.is_alive(node) ? 1 : 0;
|
return g.is_alive(node) ? 1 : 0;
|
||||||
}, 0, std::plus<size_t>());
|
},
|
||||||
|
0, std::plus<size_t>());
|
||||||
logger.debug("Marked node={} as alive on {} out of {} shards", node, nr_alive, smp::count);
|
logger.debug("Marked node={} as alive on {} out of {} shards", node, nr_alive, smp::count);
|
||||||
if (nr_alive == smp::count) {
|
if (nr_alive == smp::count) {
|
||||||
live_nodes.push_back(node);
|
live_nodes.push_back(node);
|
||||||
@@ -2294,8 +2310,7 @@ future<> gossiper::wait_alive_helper(noncopyable_function<std::vector<locator::h
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (std::chrono::steady_clock::now() > timeout + start_time) {
|
if (std::chrono::steady_clock::now() > timeout + start_time) {
|
||||||
throw std::runtime_error(fmt::format("Failed to mark node as alive in {} ms, nodes={}, live_nodes={}",
|
throw std::runtime_error(fmt::format("Failed to mark node as alive in {} ms, nodes={}, live_nodes={}", timeout.count(), nodes, live_nodes));
|
||||||
timeout.count(), nodes, live_nodes));
|
|
||||||
}
|
}
|
||||||
co_await sleep_abortable(std::chrono::milliseconds(100), _abort_source);
|
co_await sleep_abortable(std::chrono::milliseconds(100), _abort_source);
|
||||||
}
|
}
|
||||||
@@ -2303,12 +2318,18 @@ future<> gossiper::wait_alive_helper(noncopyable_function<std::vector<locator::h
|
|||||||
|
|
||||||
// Needed for legacy (node_ops) mode only)
|
// Needed for legacy (node_ops) mode only)
|
||||||
future<> gossiper::wait_alive(std::vector<gms::inet_address> nodes, std::chrono::milliseconds timeout) {
|
future<> gossiper::wait_alive(std::vector<gms::inet_address> nodes, std::chrono::milliseconds timeout) {
|
||||||
auto ids = nodes | std::views::transform([this] (auto ip) { return get_host_id(ip); }) | std::ranges::to<std::vector>();
|
auto ids = nodes | std::views::transform([this](auto ip) {
|
||||||
|
return get_host_id(ip);
|
||||||
|
}) | std::ranges::to<std::vector>();
|
||||||
return wait_alive(std::move(ids), timeout);
|
return wait_alive(std::move(ids), timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> gossiper::wait_alive(std::vector<locator::host_id> nodes, std::chrono::milliseconds timeout) {
|
future<> gossiper::wait_alive(std::vector<locator::host_id> nodes, std::chrono::milliseconds timeout) {
|
||||||
return wait_alive_helper([nodes = std::move(nodes)] { return nodes; }, timeout);
|
return wait_alive_helper(
|
||||||
|
[nodes = std::move(nodes)] {
|
||||||
|
return nodes;
|
||||||
|
},
|
||||||
|
timeout);
|
||||||
}
|
}
|
||||||
|
|
||||||
future<> gossiper::wait_alive(noncopyable_function<std::vector<locator::host_id>()> get_nodes, std::chrono::milliseconds timeout) {
|
future<> gossiper::wait_alive(noncopyable_function<std::vector<locator::host_id>()> get_nodes, std::chrono::milliseconds timeout) {
|
||||||
@@ -2407,7 +2428,8 @@ std::set<sstring> gossiper::get_supported_features(locator::host_id endpoint) co
|
|||||||
return feature_service::to_feature_set(app_state->value());
|
return feature_service::to_feature_set(app_state->value());
|
||||||
}
|
}
|
||||||
|
|
||||||
std::set<sstring> gossiper::get_supported_features(const std::unordered_map<locator::host_id, sstring>& loaded_peer_features, ignore_features_of_local_node ignore_local_node) const {
|
std::set<sstring> gossiper::get_supported_features(
|
||||||
|
const std::unordered_map<locator::host_id, sstring>& loaded_peer_features, ignore_features_of_local_node ignore_local_node) const {
|
||||||
std::unordered_map<locator::host_id, std::set<sstring>> features_map;
|
std::unordered_map<locator::host_id, std::set<sstring>> features_map;
|
||||||
std::set<sstring> common_features;
|
std::set<sstring> common_features;
|
||||||
|
|
||||||
@@ -2430,7 +2452,8 @@ std::set<sstring> gossiper::get_supported_features(const std::unordered_map<loca
|
|||||||
if (features.empty()) {
|
if (features.empty()) {
|
||||||
auto it = loaded_peer_features.find(host_id);
|
auto it = loaded_peer_features.find(host_id);
|
||||||
if (it != loaded_peer_features.end()) {
|
if (it != loaded_peer_features.end()) {
|
||||||
logger.info("Node {} does not contain SUPPORTED_FEATURES in gossip, using features saved in system table, features={}", host_id, feature_service::to_feature_set(it->second));
|
logger.info("Node {} does not contain SUPPORTED_FEATURES in gossip, using features saved in system table, features={}", host_id,
|
||||||
|
feature_service::to_feature_set(it->second));
|
||||||
} else {
|
} else {
|
||||||
logger.warn("Node {} does not contain SUPPORTED_FEATURES in gossip or system table", host_id);
|
logger.warn("Node {} does not contain SUPPORTED_FEATURES in gossip or system table", host_id);
|
||||||
}
|
}
|
||||||
@@ -2451,9 +2474,7 @@ std::set<sstring> gossiper::get_supported_features(const std::unordered_map<loca
|
|||||||
for (auto& x : features_map) {
|
for (auto& x : features_map) {
|
||||||
auto& features = x.second;
|
auto& features = x.second;
|
||||||
std::set<sstring> result;
|
std::set<sstring> result;
|
||||||
std::set_intersection(features.begin(), features.end(),
|
std::set_intersection(features.begin(), features.end(), common_features.begin(), common_features.end(), std::inserter(result, result.end()));
|
||||||
common_features.begin(), common_features.end(),
|
|
||||||
std::inserter(result, result.end()));
|
|
||||||
common_features = std::move(result);
|
common_features = std::move(result);
|
||||||
}
|
}
|
||||||
common_features.erase("");
|
common_features.erase("");
|
||||||
@@ -2468,7 +2489,8 @@ void gossiper::check_snitch_name_matches(sstring local_snitch_name) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (remote_snitch_name->value() != local_snitch_name) {
|
if (remote_snitch_name->value() != local_snitch_name) {
|
||||||
throw std::runtime_error(format("Snitch check failed. This node cannot join the cluster because it uses {} and not {}", local_snitch_name, remote_snitch_name->value()));
|
throw std::runtime_error(format(
|
||||||
|
"Snitch check failed. This node cannot join the cluster because it uses {} and not {}", local_snitch_name, remote_snitch_name->value()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -2509,7 +2531,6 @@ locator::token_metadata_ptr gossiper::get_token_metadata_ptr() const noexcept {
|
|||||||
} // namespace gms
|
} // namespace gms
|
||||||
|
|
||||||
auto fmt::formatter<gms::loaded_endpoint_state>::format(const gms::loaded_endpoint_state& st, fmt::format_context& ctx) const -> decltype(ctx.out()) {
|
auto fmt::formatter<gms::loaded_endpoint_state>::format(const gms::loaded_endpoint_state& st, fmt::format_context& ctx) const -> decltype(ctx.out()) {
|
||||||
return fmt::format_to(ctx.out(), "{{ endpoint={} dc={} rack={} }}", st.endpoint,
|
return fmt::format_to(
|
||||||
st.opt_dc_rack ? st.opt_dc_rack->dc : "",
|
ctx.out(), "{{ endpoint={} dc={} rack={} }}", st.endpoint, st.opt_dc_rack ? st.opt_dc_rack->dc : "", st.opt_dc_rack ? st.opt_dc_rack->rack : "");
|
||||||
st.opt_dc_rack ? st.opt_dc_rack->rack : "");
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,15 +33,14 @@ size_t hash<locator::endpoint_dc_rack>::operator()(const locator::endpoint_dc_ra
|
|||||||
return utils::tuple_hash()(std::tie(v.dc, v.rack));
|
return utils::tuple_hash()(std::tie(v.dc, v.rack));
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
} // namespace std
|
||||||
|
|
||||||
namespace locator {
|
namespace locator {
|
||||||
|
|
||||||
static logging::logger logger("network_topology_strategy");
|
static logging::logger logger("network_topology_strategy");
|
||||||
|
|
||||||
network_topology_strategy::network_topology_strategy(replication_strategy_params params, const topology* topo) :
|
network_topology_strategy::network_topology_strategy(replication_strategy_params params, const topology* topo)
|
||||||
abstract_replication_strategy(params,
|
: abstract_replication_strategy(params, replication_strategy_type::network_topology) {
|
||||||
replication_strategy_type::network_topology) {
|
|
||||||
auto opts = _config_options;
|
auto opts = _config_options;
|
||||||
|
|
||||||
logger.debug("options={}", opts);
|
logger.debug("options={}", opts);
|
||||||
@@ -65,8 +64,7 @@ network_topology_strategy::network_topology_strategy(replication_strategy_params
|
|||||||
if (boost::equals(key, "replication_factor")) {
|
if (boost::equals(key, "replication_factor")) {
|
||||||
on_internal_error(rslogger, "replication_factor should have been replaced with a DC:RF mapping by now");
|
on_internal_error(rslogger, "replication_factor should have been replaced with a DC:RF mapping by now");
|
||||||
} else {
|
} else {
|
||||||
throw exceptions::configuration_exception(format(
|
throw exceptions::configuration_exception(format("'{}' is not a valid option, did you mean (lowercase) 'replication_factor'?", key));
|
||||||
"'{}' is not a valid option, did you mean (lowercase) 'replication_factor'?", key));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -109,8 +107,8 @@ class natural_endpoints_tracker {
|
|||||||
, _rf_left(std::min(rf, node_count))
|
, _rf_left(std::min(rf, node_count))
|
||||||
// If there aren't enough racks in this DC to fill the RF, we'll still use at least one node from each rack,
|
// If there aren't enough racks in this DC to fill the RF, we'll still use at least one node from each rack,
|
||||||
// and the difference is to be filled by the first encountered nodes.
|
// and the difference is to be filled by the first encountered nodes.
|
||||||
, _acceptable_rack_repeats(rf - rack_count)
|
, _acceptable_rack_repeats(rf - rack_count) {
|
||||||
{}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Attempts to add an endpoint to the replicas for this datacenter, adding to the endpoints set if successful.
|
* Attempts to add an endpoint to the replicas for this datacenter, adding to the endpoints set if successful.
|
||||||
@@ -201,8 +199,7 @@ public:
|
|||||||
, _tp(_tm.get_topology())
|
, _tp(_tm.get_topology())
|
||||||
, _dc_rep_factor(dc_rep_factor)
|
, _dc_rep_factor(dc_rep_factor)
|
||||||
, _token_owners(_tm.get_datacenter_token_owners())
|
, _token_owners(_tm.get_datacenter_token_owners())
|
||||||
, _racks(_tm.get_datacenter_racks_token_owners())
|
, _racks(_tm.get_datacenter_racks_token_owners()) {
|
||||||
{
|
|
||||||
// not aware of any cluster members
|
// not aware of any cluster members
|
||||||
SCYLLA_ASSERT(!_token_owners.empty() && !_racks.empty());
|
SCYLLA_ASSERT(!_token_owners.empty() && !_racks.empty());
|
||||||
|
|
||||||
@@ -251,16 +248,14 @@ public:
|
|||||||
for (const auto& [dc, rf_data] : dc_rf) {
|
for (const auto& [dc, rf_data] : dc_rf) {
|
||||||
auto rf = rf_data.count();
|
auto rf = rf_data.count();
|
||||||
if (rf > endpoints_in(dc)) {
|
if (rf > endpoints_in(dc)) {
|
||||||
throw exceptions::configuration_exception(seastar::format(
|
throw exceptions::configuration_exception(
|
||||||
"Datacenter {} doesn't have enough token-owning nodes for replication_factor={}", dc, rf));
|
seastar::format("Datacenter {} doesn't have enough token-owning nodes for replication_factor={}", dc, rf));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
future<host_id_set>
|
future<host_id_set> network_topology_strategy::calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const {
|
||||||
network_topology_strategy::calculate_natural_endpoints(
|
|
||||||
const token& search_token, const token_metadata& tm) const {
|
|
||||||
|
|
||||||
natural_endpoints_tracker tracker(tm, _dc_rep_factor);
|
natural_endpoints_tracker tracker(tm, _dc_rep_factor);
|
||||||
|
|
||||||
@@ -285,12 +280,14 @@ void network_topology_strategy::validate_options(const gms::feature_service& fs,
|
|||||||
for (auto& c : _config_options) {
|
for (auto& c : _config_options) {
|
||||||
if (c.first == sstring("replication_factor")) {
|
if (c.first == sstring("replication_factor")) {
|
||||||
on_internal_error(rslogger, fmt::format("'replication_factor' tag should be unrolled into a list of DC:RF by now."
|
on_internal_error(rslogger, fmt::format("'replication_factor' tag should be unrolled into a list of DC:RF by now."
|
||||||
"_config_options:{}", _config_options));
|
"_config_options:{}",
|
||||||
|
_config_options));
|
||||||
}
|
}
|
||||||
auto dc = dcs.find(c.first);
|
auto dc = dcs.find(c.first);
|
||||||
if (dc == dcs.end()) {
|
if (dc == dcs.end()) {
|
||||||
throw exceptions::configuration_exception(format("Unrecognized strategy option {{{}}} "
|
throw exceptions::configuration_exception(format("Unrecognized strategy option {{{}}} "
|
||||||
"passed to NetworkTopologyStrategy", this->to_qualified_class_name(c.first)));
|
"passed to NetworkTopologyStrategy",
|
||||||
|
this->to_qualified_class_name(c.first)));
|
||||||
}
|
}
|
||||||
auto racks = dc->second | std::views::keys | std::ranges::to<std::unordered_set<sstring>>();
|
auto racks = dc->second | std::views::keys | std::ranges::to<std::unordered_set<sstring>>();
|
||||||
auto rf = parse_replication_factor(c.second);
|
auto rf = parse_replication_factor(c.second);
|
||||||
@@ -311,8 +308,8 @@ future<tablet_map> network_topology_strategy::allocate_tablets_for_new_table(sch
|
|||||||
rslogger.info("Rounding up tablet count from {} to {} for table {}.{}", tablet_count, aligned_tablet_count, s->ks_name(), s->cf_name());
|
rslogger.info("Rounding up tablet count from {} to {} for table {}.{}", tablet_count, aligned_tablet_count, s->ks_name(), s->cf_name());
|
||||||
tablet_count = aligned_tablet_count;
|
tablet_count = aligned_tablet_count;
|
||||||
}
|
}
|
||||||
co_return co_await reallocate_tablets(std::move(s), std::move(tm),
|
co_return co_await reallocate_tablets(
|
||||||
tablet_map(tablet_count, get_consistency() != data_dictionary::consistency_config_option::eventual));
|
std::move(s), std::move(tm), tablet_map(tablet_count, get_consistency() != data_dictionary::consistency_config_option::eventual));
|
||||||
}
|
}
|
||||||
|
|
||||||
future<tablet_map> network_topology_strategy::reallocate_tablets(schema_ptr s, token_metadata_ptr tm, tablet_map tablets) const {
|
future<tablet_map> network_topology_strategy::reallocate_tablets(schema_ptr s, token_metadata_ptr tm, tablet_map tablets) const {
|
||||||
@@ -321,16 +318,15 @@ future<tablet_map> network_topology_strategy::reallocate_tablets(schema_ptr s, t
|
|||||||
co_await load.populate_with_normalized_load();
|
co_await load.populate_with_normalized_load();
|
||||||
co_await load.populate(std::nullopt, s->id());
|
co_await load.populate(std::nullopt, s->id());
|
||||||
|
|
||||||
tablet_logger.debug("Allocating tablets for {}.{} ({}): dc_rep_factor={} tablet_count={}", s->ks_name(), s->cf_name(), s->id(), _dc_rep_factor, tablets.tablet_count());
|
tablet_logger.debug(
|
||||||
|
"Allocating tablets for {}.{} ({}): dc_rep_factor={} tablet_count={}", s->ks_name(), s->cf_name(), s->id(), _dc_rep_factor, tablets.tablet_count());
|
||||||
|
|
||||||
for (tablet_id tb : tablets.tablet_ids()) {
|
for (tablet_id tb : tablets.tablet_ids()) {
|
||||||
auto tinfo = tablets.get_tablet_info(tb);
|
auto tinfo = tablets.get_tablet_info(tb);
|
||||||
tinfo.replicas = co_await reallocate_tablets(s, tm, load, tablets, tb);
|
tinfo.replicas = co_await reallocate_tablets(s, tm, load, tablets, tb);
|
||||||
if (tablets.has_raft_info()) {
|
if (tablets.has_raft_info()) {
|
||||||
if (!tablets.get_tablet_raft_info(tb).group_id) {
|
if (!tablets.get_tablet_raft_info(tb).group_id) {
|
||||||
tablets.set_tablet_raft_info(tb, tablet_raft_info {
|
tablets.set_tablet_raft_info(tb, tablet_raft_info{.group_id = raft::group_id{utils::UUID_gen::get_time_UUID()}});
|
||||||
.group_id = raft::group_id{utils::UUID_gen::get_time_UUID()}
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
tablets.set_tablet(tb, std::move(tinfo));
|
tablets.set_tablet(tb, std::move(tinfo));
|
||||||
@@ -340,7 +336,8 @@ future<tablet_map> network_topology_strategy::reallocate_tablets(schema_ptr s, t
|
|||||||
co_return tablets;
|
co_return tablets;
|
||||||
}
|
}
|
||||||
|
|
||||||
future<tablet_replica_set> network_topology_strategy::reallocate_tablets(schema_ptr s, token_metadata_ptr tm, load_sketch& load, const tablet_map& cur_tablets, tablet_id tb) const {
|
future<tablet_replica_set> network_topology_strategy::reallocate_tablets(
|
||||||
|
schema_ptr s, token_metadata_ptr tm, load_sketch& load, const tablet_map& cur_tablets, tablet_id tb) const {
|
||||||
tablet_replica_set replicas;
|
tablet_replica_set replicas;
|
||||||
// Current number of replicas per dc
|
// Current number of replicas per dc
|
||||||
std::unordered_map<sstring, size_t> nodes_per_dc;
|
std::unordered_map<sstring, size_t> nodes_per_dc;
|
||||||
@@ -364,8 +361,8 @@ future<tablet_replica_set> network_topology_strategy::reallocate_tablets(schema_
|
|||||||
if (new_rf && new_rf->is_rack_based()) {
|
if (new_rf && new_rf->is_rack_based()) {
|
||||||
auto diff = diff_racks(old_racks_per_dc[dc], new_rf->get_rack_list());
|
auto diff = diff_racks(old_racks_per_dc[dc], new_rf->get_rack_list());
|
||||||
|
|
||||||
tablet_logger.debug("reallocate_tablets {}.{} tablet_id={} dc={} old_racks={} add_racks={} del_racks={}",
|
tablet_logger.debug("reallocate_tablets {}.{} tablet_id={} dc={} old_racks={} add_racks={} del_racks={}", s->ks_name(), s->cf_name(), tb, dc,
|
||||||
s->ks_name(), s->cf_name(), tb, dc, old_racks_per_dc[dc], diff.added, diff.removed);
|
old_racks_per_dc[dc], diff.added, diff.removed);
|
||||||
|
|
||||||
if (!diff) {
|
if (!diff) {
|
||||||
continue;
|
continue;
|
||||||
@@ -395,13 +392,8 @@ future<tablet_replica_set> network_topology_strategy::reallocate_tablets(schema_
|
|||||||
co_return replicas;
|
co_return replicas;
|
||||||
}
|
}
|
||||||
|
|
||||||
tablet_replica_set network_topology_strategy::drop_tablets_in_racks(schema_ptr s,
|
tablet_replica_set network_topology_strategy::drop_tablets_in_racks(schema_ptr s, token_metadata_ptr tm, load_sketch& load, tablet_id tb,
|
||||||
token_metadata_ptr tm,
|
const tablet_replica_set& cur_replicas, const sstring& dc, const rack_list& racks_to_drop) const {
|
||||||
load_sketch& load,
|
|
||||||
tablet_id tb,
|
|
||||||
const tablet_replica_set& cur_replicas,
|
|
||||||
const sstring& dc,
|
|
||||||
const rack_list& racks_to_drop) const {
|
|
||||||
auto& topo = tm->get_topology();
|
auto& topo = tm->get_topology();
|
||||||
tablet_replica_set filtered;
|
tablet_replica_set filtered;
|
||||||
auto is_rack_to_drop = [&racks_to_drop](const sstring& rack) {
|
auto is_rack_to_drop = [&racks_to_drop](const sstring& rack) {
|
||||||
@@ -410,8 +402,8 @@ tablet_replica_set network_topology_strategy::drop_tablets_in_racks(schema_ptr s
|
|||||||
for (const auto& tr : cur_replicas) {
|
for (const auto& tr : cur_replicas) {
|
||||||
auto& node = topo.get_node(tr.host);
|
auto& node = topo.get_node(tr.host);
|
||||||
if (node.dc_rack().dc == dc && is_rack_to_drop(node.dc_rack().rack)) {
|
if (node.dc_rack().dc == dc && is_rack_to_drop(node.dc_rack().rack)) {
|
||||||
tablet_logger.debug("drop_tablets_in_rack {}.{} tablet_id={} dc={} rack={} removing replica: {}",
|
tablet_logger.debug("drop_tablets_in_rack {}.{} tablet_id={} dc={} rack={} removing replica: {}", s->ks_name(), s->cf_name(), tb, node.dc_rack().dc,
|
||||||
s->ks_name(), s->cf_name(), tb, node.dc_rack().dc, node.dc_rack().rack, tr);
|
node.dc_rack().rack, tr);
|
||||||
load.unload(tr.host, tr.shard, 1, service::default_target_tablet_size);
|
load.unload(tr.host, tr.shard, 1, service::default_target_tablet_size);
|
||||||
} else {
|
} else {
|
||||||
filtered.emplace_back(tr);
|
filtered.emplace_back(tr);
|
||||||
@@ -420,13 +412,8 @@ tablet_replica_set network_topology_strategy::drop_tablets_in_racks(schema_ptr s
|
|||||||
return filtered;
|
return filtered;
|
||||||
}
|
}
|
||||||
|
|
||||||
tablet_replica_set network_topology_strategy::add_tablets_in_racks(schema_ptr s,
|
tablet_replica_set network_topology_strategy::add_tablets_in_racks(schema_ptr s, token_metadata_ptr tm, load_sketch& load, tablet_id tb,
|
||||||
token_metadata_ptr tm,
|
const tablet_replica_set& cur_replicas, const sstring& dc, const rack_list& racks_to_add) const {
|
||||||
load_sketch& load,
|
|
||||||
tablet_id tb,
|
|
||||||
const tablet_replica_set& cur_replicas,
|
|
||||||
const sstring& dc,
|
|
||||||
const rack_list& racks_to_add) const {
|
|
||||||
auto nodes = tm->get_datacenter_racks_token_owners_nodes();
|
auto nodes = tm->get_datacenter_racks_token_owners_nodes();
|
||||||
auto& dc_nodes = nodes.at(dc);
|
auto& dc_nodes = nodes.at(dc);
|
||||||
auto new_replicas = cur_replicas;
|
auto new_replicas = cur_replicas;
|
||||||
@@ -450,29 +437,26 @@ tablet_replica_set network_topology_strategy::add_tablets_in_racks(schema_ptr s,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (!min_node) {
|
if (!min_node) {
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(fmt::format("No candidate node in rack {}.{} to allocate tablet replica", dc, rack));
|
||||||
fmt::format("No candidate node in rack {}.{} to allocate tablet replica", dc, rack));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
auto new_replica = tablet_replica{min_node, load.next_shard(min_node, 1, service::default_target_tablet_size)};
|
auto new_replica = tablet_replica{min_node, load.next_shard(min_node, 1, service::default_target_tablet_size)};
|
||||||
new_replicas.push_back(new_replica);
|
new_replicas.push_back(new_replica);
|
||||||
|
|
||||||
tablet_logger.trace("add_tablet_in_rack {}.{} tablet_id={} dc={} rack={} load={} new_replica={}",
|
tablet_logger.trace("add_tablet_in_rack {}.{} tablet_id={} dc={} rack={} load={} new_replica={}", s->ks_name(), s->cf_name(), tb.id, dc, rack, min_load,
|
||||||
s->ks_name(), s->cf_name(), tb.id, dc, rack, min_load, new_replica);
|
new_replica);
|
||||||
}
|
}
|
||||||
return new_replicas;
|
return new_replicas;
|
||||||
}
|
}
|
||||||
|
|
||||||
future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_ptr s, token_metadata_ptr tm, load_sketch& load, tablet_id tb,
|
future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_ptr s, token_metadata_ptr tm, load_sketch& load, tablet_id tb,
|
||||||
std::map<sstring, std::unordered_set<locator::host_id>>& replicas_per_rack,
|
std::map<sstring, std::unordered_set<locator::host_id>>& replicas_per_rack, const tablet_replica_set& cur_replicas, sstring dc, size_t dc_node_count,
|
||||||
const tablet_replica_set& cur_replicas,
|
size_t dc_rf) const {
|
||||||
sstring dc, size_t dc_node_count, size_t dc_rf) const {
|
|
||||||
static thread_local std::default_random_engine rnd_engine{std::random_device{}()};
|
static thread_local std::default_random_engine rnd_engine{std::random_device{}()};
|
||||||
|
|
||||||
auto replicas = cur_replicas;
|
auto replicas = cur_replicas;
|
||||||
// all_dc_racks is ordered lexicographically on purpose
|
// all_dc_racks is ordered lexicographically on purpose
|
||||||
auto all_dc_racks = tm->get_datacenter_racks_token_owners_nodes().at(dc)
|
auto all_dc_racks = tm->get_datacenter_racks_token_owners_nodes().at(dc) | std::ranges::to<std::map>();
|
||||||
| std::ranges::to<std::map>();
|
|
||||||
|
|
||||||
// Track all nodes with no replicas on them for this tablet, per rack.
|
// Track all nodes with no replicas on them for this tablet, per rack.
|
||||||
struct node_load {
|
struct node_load {
|
||||||
@@ -545,8 +529,7 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
|||||||
append_candidate_racks(existing_racks);
|
append_candidate_racks(existing_racks);
|
||||||
|
|
||||||
if (candidate_racks.empty()) {
|
if (candidate_racks.empty()) {
|
||||||
on_internal_error(tablet_logger,
|
on_internal_error(tablet_logger, seastar::format("allocate_replica {}.{}: no candidate racks found for dc={} allocated={} rf={}: existing={}",
|
||||||
seastar::format("allocate_replica {}.{}: no candidate racks found for dc={} allocated={} rf={}: existing={}",
|
|
||||||
s->ks_name(), s->cf_name(), dc, dc_node_count, dc_rf, replicas_per_rack));
|
s->ks_name(), s->cf_name(), dc, dc_node_count, dc_rf, replicas_per_rack));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -556,8 +539,8 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
|||||||
const auto& rack = candidate->rack;
|
const auto& rack = candidate->rack;
|
||||||
auto& nodes = candidate->nodes;
|
auto& nodes = candidate->nodes;
|
||||||
if (nodes.empty()) {
|
if (nodes.empty()) {
|
||||||
on_internal_error(tablet_logger,
|
on_internal_error(tablet_logger, seastar::format("allocate_replica {}.{} tablet_id={}: candidates vector for rack={} is empty for allocating "
|
||||||
seastar::format("allocate_replica {}.{} tablet_id={}: candidates vector for rack={} is empty for allocating tablet replicas in dc={} allocated={} rf={}",
|
"tablet replicas in dc={} allocated={} rf={}",
|
||||||
s->ks_name(), s->cf_name(), tb.id, rack, dc, dc_node_count, dc_rf));
|
s->ks_name(), s->cf_name(), tb.id, rack, dc, dc_node_count, dc_rf));
|
||||||
}
|
}
|
||||||
auto host_id = nodes.back().host;
|
auto host_id = nodes.back().host;
|
||||||
@@ -566,13 +549,13 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
|||||||
auto inserted = replicas_per_rack[node.dc_rack().rack].insert(host_id).second;
|
auto inserted = replicas_per_rack[node.dc_rack().rack].insert(host_id).second;
|
||||||
// Sanity check that a node is not used more than once
|
// Sanity check that a node is not used more than once
|
||||||
if (!inserted) {
|
if (!inserted) {
|
||||||
on_internal_error(tablet_logger,
|
on_internal_error(tablet_logger, seastar::format("allocate_replica {}.{} tablet_id={}: allocated replica={} node already used when allocating "
|
||||||
seastar::format("allocate_replica {}.{} tablet_id={}: allocated replica={} node already used when allocating tablet replicas in dc={} allocated={} rf={}: replicas={}",
|
"tablet replicas in dc={} allocated={} rf={}: replicas={}",
|
||||||
s->ks_name(), s->cf_name(), tb.id, replica, dc, dc_node_count, dc_rf, replicas));
|
s->ks_name(), s->cf_name(), tb.id, replica, dc, dc_node_count, dc_rf, replicas));
|
||||||
}
|
}
|
||||||
nodes.pop_back();
|
nodes.pop_back();
|
||||||
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: allocated tablet replica={} dc={} rack={}: nodes remaining in rack={}",
|
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: allocated tablet replica={} dc={} rack={}: nodes remaining in rack={}", s->ks_name(),
|
||||||
s->ks_name(), s->cf_name(), tb.id, replica, node.dc_rack().dc, node.dc_rack().rack, nodes.size());
|
s->cf_name(), tb.id, replica, node.dc_rack().dc, node.dc_rack().rack, nodes.size());
|
||||||
if (nodes.empty()) {
|
if (nodes.empty()) {
|
||||||
candidate = candidate_racks.erase(candidate);
|
candidate = candidate_racks.erase(candidate);
|
||||||
} else {
|
} else {
|
||||||
@@ -583,7 +566,8 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
|||||||
}
|
}
|
||||||
if (tablet_logger.is_enabled(log_level::trace)) {
|
if (tablet_logger.is_enabled(log_level::trace)) {
|
||||||
if (candidate != candidate_racks.end()) {
|
if (candidate != candidate_racks.end()) {
|
||||||
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: next rack={} nodes={}", s->ks_name(), s->cf_name(), tb.id, candidate->rack, candidate->nodes.size());
|
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: next rack={} nodes={}", s->ks_name(), s->cf_name(), tb.id, candidate->rack,
|
||||||
|
candidate->nodes.size());
|
||||||
} else {
|
} else {
|
||||||
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: no candidate racks left", s->ks_name(), s->cf_name(), tb.id);
|
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: no candidate racks left", s->ks_name(), s->cf_name(), tb.id);
|
||||||
}
|
}
|
||||||
@@ -591,14 +575,14 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
|||||||
return replica;
|
return replica;
|
||||||
};
|
};
|
||||||
|
|
||||||
tablet_logger.debug("allocate_replica {}.{} tablet_id={}: allocating tablet replicas in dc={} allocated={} rf={}",
|
tablet_logger.debug("allocate_replica {}.{} tablet_id={}: allocating tablet replicas in dc={} allocated={} rf={}", s->ks_name(), s->cf_name(), tb.id, dc,
|
||||||
s->ks_name(), s->cf_name(), tb.id, dc, dc_node_count, dc_rf);
|
dc_node_count, dc_rf);
|
||||||
|
|
||||||
for (size_t remaining = dc_rf - dc_node_count; remaining; --remaining) {
|
for (size_t remaining = dc_rf - dc_node_count; remaining; --remaining) {
|
||||||
co_await coroutine::maybe_yield();
|
co_await coroutine::maybe_yield();
|
||||||
if (candidate_rack == candidate_racks.end()) {
|
if (candidate_rack == candidate_racks.end()) {
|
||||||
on_internal_error(tablet_logger,
|
on_internal_error(tablet_logger, format("allocate_replica {}.{} tablet_id={}: ran out of candidates for allocating tablet replicas in dc={} "
|
||||||
format("allocate_replica {}.{} tablet_id={}: ran out of candidates for allocating tablet replicas in dc={} allocated={} rf={}: remaining={}",
|
"allocated={} rf={}: remaining={}",
|
||||||
s->ks_name(), s->cf_name(), tb.id, dc, dc_node_count, dc_rf, remaining));
|
s->ks_name(), s->cf_name(), tb.id, dc, dc_node_count, dc_rf, remaining));
|
||||||
}
|
}
|
||||||
replicas.emplace_back(allocate_replica(candidate_rack));
|
replicas.emplace_back(allocate_replica(candidate_rack));
|
||||||
@@ -608,9 +592,9 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
|||||||
}
|
}
|
||||||
|
|
||||||
tablet_replica_set network_topology_strategy::drop_tablets_in_dc(schema_ptr s, const locator::topology& topo, load_sketch& load, tablet_id tb,
|
tablet_replica_set network_topology_strategy::drop_tablets_in_dc(schema_ptr s, const locator::topology& topo, load_sketch& load, tablet_id tb,
|
||||||
const tablet_replica_set& cur_replicas,
|
const tablet_replica_set& cur_replicas, sstring dc, size_t dc_node_count, size_t dc_rf) const {
|
||||||
sstring dc, size_t dc_node_count, size_t dc_rf) const {
|
tablet_logger.debug("drop_tablets_in_dc {}.{} tablet_id={}: deallocating tablet replicas in dc={} allocated={} rf={}", s->ks_name(), s->cf_name(), tb.id,
|
||||||
tablet_logger.debug("drop_tablets_in_dc {}.{} tablet_id={}: deallocating tablet replicas in dc={} allocated={} rf={}", s->ks_name(), s->cf_name(), tb.id, dc, dc_node_count, dc_rf);
|
dc, dc_node_count, dc_rf);
|
||||||
|
|
||||||
// Leave dc_rf replicas in dc, effectively deallocating in reverse order,
|
// Leave dc_rf replicas in dc, effectively deallocating in reverse order,
|
||||||
// to maintain replica pairing between the base table and its materialized views.
|
// to maintain replica pairing between the base table and its materialized views.
|
||||||
@@ -629,8 +613,7 @@ tablet_replica_set network_topology_strategy::drop_tablets_in_dc(schema_ptr s, c
|
|||||||
return filtered;
|
return filtered;
|
||||||
}
|
}
|
||||||
|
|
||||||
sstring network_topology_strategy::sanity_check_read_replicas(const effective_replication_map& erm,
|
sstring network_topology_strategy::sanity_check_read_replicas(const effective_replication_map& erm, const host_id_vector_replica_set& read_replicas) const {
|
||||||
const host_id_vector_replica_set& read_replicas) const {
|
|
||||||
const auto& topology = erm.get_topology();
|
const auto& topology = erm.get_topology();
|
||||||
|
|
||||||
struct rf_node_count {
|
struct rf_node_count {
|
||||||
@@ -663,4 +646,4 @@ sstring network_topology_strategy::sanity_check_read_replicas(const effective_re
|
|||||||
using registry = class_registrator<abstract_replication_strategy, network_topology_strategy, replication_strategy_params, const topology*>;
|
using registry = class_registrator<abstract_replication_strategy, network_topology_strategy, replication_strategy_params, const topology*>;
|
||||||
static registry registrator("org.apache.cassandra.locator.NetworkTopologyStrategy");
|
static registry registrator("org.apache.cassandra.locator.NetworkTopologyStrategy");
|
||||||
static registry registrator_short_name("NetworkTopologyStrategy");
|
static registry registrator_short_name("NetworkTopologyStrategy");
|
||||||
}
|
} // namespace locator
|
||||||
|
|||||||
@@ -43,8 +43,7 @@ std::optional<std::pair<tablet_id, tablet_id>> tablet_map::sibling_tablets(table
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static
|
static write_replica_set_selector get_selector_for_writes(tablet_transition_stage stage) {
|
||||||
write_replica_set_selector get_selector_for_writes(tablet_transition_stage stage) {
|
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case tablet_transition_stage::allow_write_both_read_old:
|
case tablet_transition_stage::allow_write_both_read_old:
|
||||||
return write_replica_set_selector::previous;
|
return write_replica_set_selector::previous;
|
||||||
@@ -76,8 +75,7 @@ write_replica_set_selector get_selector_for_writes(tablet_transition_stage stage
|
|||||||
on_internal_error(tablet_logger, format("Invalid tablet transition stage: {}", static_cast<int>(stage)));
|
on_internal_error(tablet_logger, format("Invalid tablet transition stage: {}", static_cast<int>(stage)));
|
||||||
}
|
}
|
||||||
|
|
||||||
static
|
static read_replica_set_selector get_selector_for_reads(tablet_transition_stage stage) {
|
||||||
read_replica_set_selector get_selector_for_reads(tablet_transition_stage stage) {
|
|
||||||
switch (stage) {
|
switch (stage) {
|
||||||
case tablet_transition_stage::allow_write_both_read_old:
|
case tablet_transition_stage::allow_write_both_read_old:
|
||||||
return read_replica_set_selector::previous;
|
return read_replica_set_selector::previous;
|
||||||
@@ -109,19 +107,16 @@ read_replica_set_selector get_selector_for_reads(tablet_transition_stage stage)
|
|||||||
on_internal_error(tablet_logger, format("Invalid tablet transition stage: {}", static_cast<int>(stage)));
|
on_internal_error(tablet_logger, format("Invalid tablet transition stage: {}", static_cast<int>(stage)));
|
||||||
}
|
}
|
||||||
|
|
||||||
tablet_transition_info::tablet_transition_info(tablet_transition_stage stage,
|
tablet_transition_info::tablet_transition_info(tablet_transition_stage stage, tablet_transition_kind transition, tablet_replica_set next,
|
||||||
tablet_transition_kind transition,
|
std::optional<tablet_replica> pending_replica, service::session_id session_id)
|
||||||
tablet_replica_set next,
|
|
||||||
std::optional<tablet_replica> pending_replica,
|
|
||||||
service::session_id session_id)
|
|
||||||
: stage(stage)
|
: stage(stage)
|
||||||
, transition(transition)
|
, transition(transition)
|
||||||
, next(std::move(next))
|
, next(std::move(next))
|
||||||
, pending_replica(std::move(pending_replica))
|
, pending_replica(std::move(pending_replica))
|
||||||
, session_id(session_id)
|
, session_id(session_id)
|
||||||
, writes(get_selector_for_writes(stage))
|
, writes(get_selector_for_writes(stage))
|
||||||
, reads(get_selector_for_reads(stage))
|
, reads(get_selector_for_reads(stage)) {
|
||||||
{ }
|
}
|
||||||
|
|
||||||
tablet_migration_streaming_info get_migration_streaming_info(const locator::topology& topo, const tablet_info& tinfo, const tablet_migration_info& trinfo) {
|
tablet_migration_streaming_info get_migration_streaming_info(const locator::topology& topo, const tablet_info& tinfo, const tablet_migration_info& trinfo) {
|
||||||
return get_migration_streaming_info(topo, tinfo, migration_to_transition_info(tinfo, trinfo));
|
return get_migration_streaming_info(topo, tinfo, migration_to_transition_info(tinfo, trinfo));
|
||||||
@@ -188,17 +183,18 @@ bool tablet_has_excluded_node(const locator::topology& topo, const tablet_info&
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
tablet_info::tablet_info(tablet_replica_set replicas, db_clock::time_point repair_time, tablet_task_info repair_task_info, tablet_task_info migration_task_info, int64_t sstables_repaired_at)
|
tablet_info::tablet_info(tablet_replica_set replicas, db_clock::time_point repair_time, tablet_task_info repair_task_info, tablet_task_info migration_task_info,
|
||||||
|
int64_t sstables_repaired_at)
|
||||||
: replicas(std::move(replicas))
|
: replicas(std::move(replicas))
|
||||||
, repair_time(repair_time)
|
, repair_time(repair_time)
|
||||||
, repair_task_info(std::move(repair_task_info))
|
, repair_task_info(std::move(repair_task_info))
|
||||||
, migration_task_info(std::move(migration_task_info))
|
, migration_task_info(std::move(migration_task_info))
|
||||||
, sstables_repaired_at(sstables_repaired_at)
|
, sstables_repaired_at(sstables_repaired_at) {
|
||||||
{}
|
}
|
||||||
|
|
||||||
tablet_info::tablet_info(tablet_replica_set replicas)
|
tablet_info::tablet_info(tablet_replica_set replicas)
|
||||||
: tablet_info(std::move(replicas), db_clock::time_point{}, tablet_task_info{}, tablet_task_info{}, int64_t(0))
|
: tablet_info(std::move(replicas), db_clock::time_point{}, tablet_task_info{}, tablet_task_info{}, int64_t(0)) {
|
||||||
{}
|
}
|
||||||
|
|
||||||
std::optional<tablet_info> merge_tablet_info(tablet_info a, tablet_info b) {
|
std::optional<tablet_info> merge_tablet_info(tablet_info a, tablet_info b) {
|
||||||
auto repair_task_info = tablet_task_info::merge_repair_tasks(a.repair_task_info, b.repair_task_info);
|
auto repair_task_info = tablet_task_info::merge_repair_tasks(a.repair_task_info, b.repair_task_info);
|
||||||
@@ -247,7 +243,8 @@ tablet_replica_set get_new_replicas(const tablet_info& tinfo, const tablet_migra
|
|||||||
return replace_replica(tinfo.replicas, mig.src, mig.dst);
|
return replace_replica(tinfo.replicas, mig.src, mig.dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
tablet_replica_set get_primary_replicas(const locator::tablet_map& tablet_map, tablet_id tid, const locator::topology& topo, std::function<bool(const tablet_replica&)> filter) {
|
tablet_replica_set get_primary_replicas(
|
||||||
|
const locator::tablet_map& tablet_map, tablet_id tid, const locator::topology& topo, std::function<bool(const tablet_replica&)> filter) {
|
||||||
const auto& info = tablet_map.get_tablet_info(tid);
|
const auto& info = tablet_map.get_tablet_info(tid);
|
||||||
const auto* transition = tablet_map.get_tablet_transition_info(tid);
|
const auto* transition = tablet_map.get_tablet_transition_info(tid);
|
||||||
|
|
||||||
@@ -291,17 +288,11 @@ tablet_replica_set get_primary_replicas(const locator::tablet_map& tablet_map, t
|
|||||||
}
|
}
|
||||||
|
|
||||||
tablet_transition_info migration_to_transition_info(const tablet_info& ti, const tablet_migration_info& mig) {
|
tablet_transition_info migration_to_transition_info(const tablet_info& ti, const tablet_migration_info& mig) {
|
||||||
return tablet_transition_info {
|
return tablet_transition_info{tablet_transition_stage::allow_write_both_read_old, mig.kind, get_new_replicas(ti, mig), mig.dst};
|
||||||
tablet_transition_stage::allow_write_both_read_old,
|
|
||||||
mig.kind,
|
|
||||||
get_new_replicas(ti, mig),
|
|
||||||
mig.dst
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
|
|
||||||
no_such_tablet_map::no_such_tablet_map(const table_id& id)
|
no_such_tablet_map::no_such_tablet_map(const table_id& id)
|
||||||
: runtime_error{fmt::format("Tablet map not found for table {}", id)}
|
: runtime_error{fmt::format("Tablet map not found for table {}", id)} {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const tablet_map& tablet_metadata::get_tablet_map(table_id id) const {
|
const tablet_map& tablet_metadata::get_tablet_map(table_id id) const {
|
||||||
@@ -393,7 +384,8 @@ future<> tablet_metadata::set_colocated_table(table_id id, table_id base_id) {
|
|||||||
// This shouldn't be used normally except for unit tests.
|
// This shouldn't be used normally except for unit tests.
|
||||||
tablet_logger.warn("Changing base table {} to be a co-located table of another base table {}. This should be used only in tests.", id, base_id);
|
tablet_logger.warn("Changing base table {} to be a co-located table of another base table {}. This should be used only in tests.", id, base_id);
|
||||||
if (it->second.size() > 1) {
|
if (it->second.size() > 1) {
|
||||||
on_internal_error(tablet_logger, format("Table {} is already a base table for {} and cannot be set as a co-located table of another base table.", id, it->second));
|
on_internal_error(tablet_logger,
|
||||||
|
format("Table {} is already a base table for {} and cannot be set as a co-located table of another base table.", id, it->second));
|
||||||
}
|
}
|
||||||
_table_groups.erase(it);
|
_table_groups.erase(it);
|
||||||
}
|
}
|
||||||
@@ -486,8 +478,7 @@ tablet_map::tablet_map(size_t tablet_count, bool with_raft_info)
|
|||||||
}
|
}
|
||||||
|
|
||||||
tablet_map tablet_map::clone() const {
|
tablet_map tablet_map::clone() const {
|
||||||
return tablet_map(_tablets, _log2_tablets, _transitions, _resize_decision, _resize_task_info,
|
return tablet_map(_tablets, _log2_tablets, _transitions, _resize_decision, _resize_task_info, _repair_scheduler_config, _raft_info);
|
||||||
_repair_scheduler_config, _raft_info);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
future<tablet_map> tablet_map::clone_gently() const {
|
future<tablet_map> tablet_map::clone_gently() const {
|
||||||
@@ -512,8 +503,8 @@ future<tablet_map> tablet_map::clone_gently() const {
|
|||||||
co_await coroutine::maybe_yield();
|
co_await coroutine::maybe_yield();
|
||||||
}
|
}
|
||||||
|
|
||||||
co_return tablet_map(std::move(tablets), _log2_tablets, std::move(transitions), _resize_decision,
|
co_return tablet_map(
|
||||||
_resize_task_info, _repair_scheduler_config, std::move(raft_info));
|
std::move(tablets), _log2_tablets, std::move(transitions), _resize_decision, _resize_task_info, _repair_scheduler_config, std::move(raft_info));
|
||||||
}
|
}
|
||||||
|
|
||||||
void tablet_map::check_tablet_id(tablet_id id) const {
|
void tablet_map::check_tablet_id(tablet_id id) const {
|
||||||
@@ -593,7 +584,8 @@ auto tablet_replica_comparator(const locator::topology& topo) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<tablet_replica> maybe_get_primary_replica(tablet_id id, const tablet_replica_set& replica_set, const locator::topology& topo, std::function<bool(const tablet_replica&)> filter) {
|
std::optional<tablet_replica> maybe_get_primary_replica(
|
||||||
|
tablet_id id, const tablet_replica_set& replica_set, const locator::topology& topo, std::function<bool(const tablet_replica&)> filter) {
|
||||||
tablet_replica_set replica_set_copy = replica_set;
|
tablet_replica_set replica_set_copy = replica_set;
|
||||||
std::ranges::sort(replica_set_copy, tablet_replica_comparator(topo));
|
std::ranges::sort(replica_set_copy, tablet_replica_comparator(topo));
|
||||||
const auto replicas = replica_set_copy | std::views::filter(std::move(filter)) | std::ranges::to<tablet_replica_set>();
|
const auto replicas = replica_set_copy | std::views::filter(std::move(filter)) | std::ranges::to<tablet_replica_set>();
|
||||||
@@ -618,7 +610,9 @@ std::optional<tablet_replica> maybe_get_primary_replica(tablet_id id, const tabl
|
|||||||
|
|
||||||
tablet_replica tablet_map::get_primary_replica(tablet_id id, const locator::topology& topo) const {
|
tablet_replica tablet_map::get_primary_replica(tablet_id id, const locator::topology& topo) const {
|
||||||
const auto& replicas = get_tablet_info(id).replicas;
|
const auto& replicas = get_tablet_info(id).replicas;
|
||||||
return maybe_get_primary_replica(id, replicas, topo, [&] (const auto& _) { return true; }).value();
|
return maybe_get_primary_replica(id, replicas, topo, [&](const auto& _) {
|
||||||
|
return true;
|
||||||
|
}).value();
|
||||||
}
|
}
|
||||||
|
|
||||||
tablet_replica tablet_map::get_secondary_replica(tablet_id id, const locator::topology& topo) const {
|
tablet_replica tablet_map::get_secondary_replica(tablet_id id, const locator::topology& topo) const {
|
||||||
@@ -743,9 +737,7 @@ const tablet_raft_info& tablet_map::get_tablet_raft_info(tablet_id id) const {
|
|||||||
void tablet_map::set_tablet_raft_info(tablet_id id, tablet_raft_info raft_info) {
|
void tablet_map::set_tablet_raft_info(tablet_id id, tablet_raft_info raft_info) {
|
||||||
check_tablet_id(id);
|
check_tablet_id(id);
|
||||||
if (_raft_info.empty()) {
|
if (_raft_info.empty()) {
|
||||||
on_internal_error(tablet_logger,
|
on_internal_error(tablet_logger, format("Tablet map has no raft info, tablet_id {}, group_id {}", id, raft_info.group_id));
|
||||||
format("Tablet map has no raft info, tablet_id {}, group_id {}",
|
|
||||||
id, raft_info.group_id));
|
|
||||||
}
|
}
|
||||||
_raft_info[size_t(id)] = std::move(raft_info);
|
_raft_info[size_t(id)] = std::move(raft_info);
|
||||||
}
|
}
|
||||||
@@ -988,7 +980,8 @@ std::optional<uint64_t> load_stats::get_tablet_size(host_id host, const range_ba
|
|||||||
return std::nullopt;
|
return std::nullopt;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<uint64_t> load_stats::get_tablet_size_in_transition(host_id host, const range_based_tablet_id& rb_tid, const tablet_info& ti, const tablet_transition_info* trinfo) const {
|
std::optional<uint64_t> load_stats::get_tablet_size_in_transition(
|
||||||
|
host_id host, const range_based_tablet_id& rb_tid, const tablet_info& ti, const tablet_transition_info* trinfo) const {
|
||||||
std::optional<uint64_t> tablet_size_opt;
|
std::optional<uint64_t> tablet_size_opt;
|
||||||
tablet_size_opt = get_tablet_size(host, rb_tid);
|
tablet_size_opt = get_tablet_size(host, rb_tid);
|
||||||
if (tablet_size_opt) {
|
if (tablet_size_opt) {
|
||||||
@@ -1038,7 +1031,8 @@ std::optional<uint64_t> load_stats::get_tablet_size_in_transition(host_id host,
|
|||||||
return tablet_size_opt;
|
return tablet_size_opt;
|
||||||
}
|
}
|
||||||
|
|
||||||
lw_shared_ptr<load_stats> load_stats::reconcile_tablets_resize(const std::unordered_set<table_id>& tables, const token_metadata& old_tm, const token_metadata& new_tm) const {
|
lw_shared_ptr<load_stats> load_stats::reconcile_tablets_resize(
|
||||||
|
const std::unordered_set<table_id>& tables, const token_metadata& old_tm, const token_metadata& new_tm) const {
|
||||||
lw_shared_ptr<load_stats> reconciled_stats{make_lw_shared<load_stats>(*this)};
|
lw_shared_ptr<load_stats> reconciled_stats{make_lw_shared<load_stats>(*this)};
|
||||||
load_stats& new_stats = *reconciled_stats;
|
load_stats& new_stats = *reconciled_stats;
|
||||||
|
|
||||||
@@ -1065,10 +1059,12 @@ lw_shared_ptr<load_stats> load_stats::reconcile_tablets_resize(const std::unorde
|
|||||||
auto tablet_size_opt2 = new_stats.get_tablet_size(replica.host, rb_tid2);
|
auto tablet_size_opt2 = new_stats.get_tablet_size(replica.host, rb_tid2);
|
||||||
if (!tablet_size_opt1 || !tablet_size_opt2) {
|
if (!tablet_size_opt1 || !tablet_size_opt2) {
|
||||||
if (!tablet_size_opt1) {
|
if (!tablet_size_opt1) {
|
||||||
tablet_logger.debug("Unable to find tablet size in stats for table resize reconcile for tablet {} on host {}", rb_tid1, replica.host);
|
tablet_logger.debug(
|
||||||
|
"Unable to find tablet size in stats for table resize reconcile for tablet {} on host {}", rb_tid1, replica.host);
|
||||||
}
|
}
|
||||||
if (!tablet_size_opt2) {
|
if (!tablet_size_opt2) {
|
||||||
tablet_logger.debug("Unable to find tablet size in stats for table resize reconcile for tablet {} on host {}", rb_tid2, replica.host);
|
tablet_logger.debug(
|
||||||
|
"Unable to find tablet size in stats for table resize reconcile for tablet {} on host {}", rb_tid2, replica.host);
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@@ -1106,7 +1102,8 @@ lw_shared_ptr<load_stats> load_stats::reconcile_tablets_resize(const std::unorde
|
|||||||
return reconciled_stats;
|
return reconciled_stats;
|
||||||
}
|
}
|
||||||
|
|
||||||
lw_shared_ptr<load_stats> load_stats::migrate_tablet_size(locator::host_id leaving, locator::host_id pending, locator::global_tablet_id gid, const dht::token_range trange) const {
|
lw_shared_ptr<load_stats> load_stats::migrate_tablet_size(
|
||||||
|
locator::host_id leaving, locator::host_id pending, locator::global_tablet_id gid, const dht::token_range trange) const {
|
||||||
|
|
||||||
lw_shared_ptr<load_stats> result;
|
lw_shared_ptr<load_stats> result;
|
||||||
|
|
||||||
@@ -1131,13 +1128,14 @@ lw_shared_ptr<load_stats> load_stats::migrate_tablet_size(locator::host_id leavi
|
|||||||
tablet_range_splitter::tablet_range_splitter(schema_ptr schema, const tablet_map& tablets, host_id host, const dht::partition_range_vector& ranges)
|
tablet_range_splitter::tablet_range_splitter(schema_ptr schema, const tablet_map& tablets, host_id host, const dht::partition_range_vector& ranges)
|
||||||
: _schema(std::move(schema))
|
: _schema(std::move(schema))
|
||||||
, _ranges(ranges)
|
, _ranges(ranges)
|
||||||
, _ranges_it(_ranges.begin())
|
, _ranges_it(_ranges.begin()) {
|
||||||
{
|
|
||||||
// Filter all tablets and save only those that have a replica on the specified host.
|
// Filter all tablets and save only those that have a replica on the specified host.
|
||||||
for (auto tid = std::optional(tablets.first_tablet()); tid; tid = tablets.next_tablet(*tid)) {
|
for (auto tid = std::optional(tablets.first_tablet()); tid; tid = tablets.next_tablet(*tid)) {
|
||||||
const auto& tablet_info = tablets.get_tablet_info(*tid);
|
const auto& tablet_info = tablets.get_tablet_info(*tid);
|
||||||
|
|
||||||
auto replica_it = std::ranges::find_if(tablet_info.replicas, [&] (auto&& r) { return r.host == host; });
|
auto replica_it = std::ranges::find_if(tablet_info.replicas, [&](auto&& r) {
|
||||||
|
return r.host == host;
|
||||||
|
});
|
||||||
if (replica_it == tablet_info.replicas.end()) {
|
if (replica_it == tablet_info.replicas.end()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@@ -1235,6 +1233,7 @@ class tablet_effective_replication_map : public effective_replication_map {
|
|||||||
table_id _table;
|
table_id _table;
|
||||||
tablet_sharder _sharder;
|
tablet_sharder _sharder;
|
||||||
mutable const tablet_map* _tmap = nullptr;
|
mutable const tablet_map* _tmap = nullptr;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
host_id_vector_replica_set to_host_set(const tablet_replica_set& replicas) const {
|
host_id_vector_replica_set to_host_set(const tablet_replica_set& replicas) const {
|
||||||
host_id_vector_replica_set result;
|
host_id_vector_replica_set result;
|
||||||
@@ -1289,8 +1288,7 @@ private:
|
|||||||
if (!info->pending_replica) {
|
if (!info->pending_replica) {
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
tablet_logger.trace("get_pending_endpoints({}): table={}, tablet={}, replica={}",
|
tablet_logger.trace("get_pending_endpoints({}): table={}, tablet={}, replica={}", search_token, _table, tablet, *info->pending_replica);
|
||||||
search_token, _table, tablet, *info->pending_replica);
|
|
||||||
return {info->pending_replica->host};
|
return {info->pending_replica->host};
|
||||||
}
|
}
|
||||||
case write_replica_set_selector::next:
|
case write_replica_set_selector::next:
|
||||||
@@ -1322,14 +1320,11 @@ private:
|
|||||||
|
|
||||||
|
|
||||||
public:
|
public:
|
||||||
tablet_effective_replication_map(table_id table,
|
tablet_effective_replication_map(table_id table, replication_strategy_ptr rs, token_metadata_ptr tmptr, size_t replication_factor)
|
||||||
replication_strategy_ptr rs,
|
|
||||||
token_metadata_ptr tmptr,
|
|
||||||
size_t replication_factor)
|
|
||||||
: effective_replication_map(std::move(rs), std::move(tmptr), replication_factor)
|
: effective_replication_map(std::move(rs), std::move(tmptr), replication_factor)
|
||||||
, _table(table)
|
, _table(table)
|
||||||
, _sharder(*_tmptr, table)
|
, _sharder(*_tmptr, table) {
|
||||||
{ }
|
}
|
||||||
|
|
||||||
virtual ~tablet_effective_replication_map() = default;
|
virtual ~tablet_effective_replication_map() = default;
|
||||||
|
|
||||||
@@ -1416,11 +1411,12 @@ public:
|
|||||||
token_metadata_ptr _tmptr; // To keep the tablet map alive.
|
token_metadata_ptr _tmptr; // To keep the tablet map alive.
|
||||||
const tablet_map& _tmap;
|
const tablet_map& _tmap;
|
||||||
std::optional<tablet_id> _next;
|
std::optional<tablet_id> _next;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
splitter(token_metadata_ptr tmptr, const tablet_map& tmap)
|
splitter(token_metadata_ptr tmptr, const tablet_map& tmap)
|
||||||
: _tmptr(std::move(tmptr))
|
: _tmptr(std::move(tmptr))
|
||||||
, _tmap(tmap)
|
, _tmap(tmap) {
|
||||||
{ }
|
}
|
||||||
|
|
||||||
void reset(dht::ring_position_view pos) override {
|
void reset(dht::ring_position_view pos) override {
|
||||||
_next = _tmap.get_tablet_id(pos.token());
|
_next = _tmap.get_tablet_id(pos.token());
|
||||||
@@ -1447,17 +1443,15 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
void tablet_aware_replication_strategy::validate_tablet_options(const abstract_replication_strategy& ars,
|
void tablet_aware_replication_strategy::validate_tablet_options(
|
||||||
const gms::feature_service& fs,
|
const abstract_replication_strategy& ars, const gms::feature_service& fs, const replication_strategy_config_options& opts) const {
|
||||||
const replication_strategy_config_options& opts) const {
|
|
||||||
if (ars._uses_tablets && !fs.tablets) {
|
if (ars._uses_tablets && !fs.tablets) {
|
||||||
throw exceptions::configuration_exception("Tablet replication is not enabled");
|
throw exceptions::configuration_exception("Tablet replication is not enabled");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void tablet_aware_replication_strategy::process_tablet_options(abstract_replication_strategy& ars,
|
void tablet_aware_replication_strategy::process_tablet_options(
|
||||||
replication_strategy_config_options& opts,
|
abstract_replication_strategy& ars, replication_strategy_config_options& opts, replication_strategy_params params) {
|
||||||
replication_strategy_params params) {
|
|
||||||
if (ars._uses_tablets) {
|
if (ars._uses_tablets) {
|
||||||
_initial_tablets = params.initial_tablets.value_or(0);
|
_initial_tablets = params.initial_tablets.value_or(0);
|
||||||
_consistency = params.consistency.value_or(data_dictionary::consistency_config_option::eventual);
|
_consistency = params.consistency.value_or(data_dictionary::consistency_config_option::eventual);
|
||||||
@@ -1477,15 +1471,9 @@ void tablet_metadata_guard::check() noexcept {
|
|||||||
auto* trinfo = tmap.get_tablet_transition_info(_tablet.tablet);
|
auto* trinfo = tmap.get_tablet_transition_info(_tablet.tablet);
|
||||||
tablet_logger.debug("tablet_metadata_guard::check: table {}.{}, tablet {}, "
|
tablet_logger.debug("tablet_metadata_guard::check: table {}.{}, tablet {}, "
|
||||||
"old erm version {}, new erm version {}, old tablet map {}, new tablet map {}",
|
"old erm version {}, new erm version {}, old tablet map {}, new tablet map {}",
|
||||||
_table->schema()->ks_name(), _table->schema()->cf_name(),
|
_table->schema()->ks_name(), _table->schema()->cf_name(), _tablet, _erm.get()->get_token_metadata().get_version(),
|
||||||
_tablet,
|
erm.get()->get_token_metadata().get_version(), old_tmap, tmap);
|
||||||
_erm.get()->get_token_metadata().get_version(),
|
if (bool(_stage) != bool(trinfo) || (_stage && _stage != trinfo->stage) || old_tmap.tablet_count() != tmap.tablet_count()) {
|
||||||
erm.get()->get_token_metadata().get_version(),
|
|
||||||
old_tmap,
|
|
||||||
tmap);
|
|
||||||
if (bool(_stage) != bool(trinfo) || (_stage && _stage != trinfo->stage) ||
|
|
||||||
old_tmap.tablet_count() != tmap.tablet_count())
|
|
||||||
{
|
|
||||||
tablet_logger.debug("tablet_metadata_guard::check: retain the erm and abort the guard");
|
tablet_logger.debug("tablet_metadata_guard::check: retain the erm and abort the guard");
|
||||||
_abort_source.request_abort();
|
_abort_source.request_abort();
|
||||||
} else {
|
} else {
|
||||||
@@ -1498,8 +1486,7 @@ void tablet_metadata_guard::check() noexcept {
|
|||||||
tablet_metadata_guard::tablet_metadata_guard(replica::table& table, global_tablet_id tablet)
|
tablet_metadata_guard::tablet_metadata_guard(replica::table& table, global_tablet_id tablet)
|
||||||
: _table(table.shared_from_this())
|
: _table(table.shared_from_this())
|
||||||
, _tablet(tablet)
|
, _tablet(tablet)
|
||||||
, _erm(table.get_effective_replication_map())
|
, _erm(table.get_effective_replication_map()) {
|
||||||
{
|
|
||||||
subscribe();
|
subscribe();
|
||||||
if (auto* trinfo = get_tablet_map().get_tablet_transition_info(tablet.tablet)) {
|
if (auto* trinfo = get_tablet_map().get_tablet_transition_info(tablet.tablet)) {
|
||||||
_stage = trinfo->stage;
|
_stage = trinfo->stage;
|
||||||
@@ -1522,12 +1509,8 @@ token_metadata_guard::token_metadata_guard(replica::table& table, dht::token tok
|
|||||||
}
|
}
|
||||||
const auto table_id = table.schema()->id();
|
const auto table_id = table.schema()->id();
|
||||||
const auto& tablet_map = erm->get_token_metadata().tablets().get_tablet_map(table_id);
|
const auto& tablet_map = erm->get_token_metadata().tablets().get_tablet_map(table_id);
|
||||||
return make_lw_shared<tablet_metadata_guard>(table, global_tablet_id {
|
return make_lw_shared<tablet_metadata_guard>(table, global_tablet_id{.table = table_id, .tablet = tablet_map.get_tablet_id(token)});
|
||||||
.table = table_id,
|
})) {
|
||||||
.tablet = tablet_map.get_tablet_id(token)
|
|
||||||
});
|
|
||||||
}))
|
|
||||||
{
|
|
||||||
}
|
}
|
||||||
|
|
||||||
const effective_replication_map_ptr& token_metadata_guard::get_erm() const {
|
const effective_replication_map_ptr& token_metadata_guard::get_erm() const {
|
||||||
@@ -1537,8 +1520,7 @@ const effective_replication_map_ptr& token_metadata_guard::get_erm() const {
|
|||||||
|
|
||||||
static void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars,
|
static void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars,
|
||||||
const std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<host_id>>>& dc_rack_map,
|
const std::unordered_map<sstring, std::unordered_map<sstring, std::unordered_set<host_id>>>& dc_rack_map,
|
||||||
std::function<bool(host_id)> is_normal_token_owner,
|
std::function<bool(host_id)> is_normal_token_owner, std::function<bool(host_id)> is_transitioning_token_owner) {
|
||||||
std::function<bool(host_id)> is_transitioning_token_owner) {
|
|
||||||
tablet_logger.debug("[assert_rf_rack_valid_keyspace]: Starting verifying that keyspace '{}' is RF-rack-valid", ks);
|
tablet_logger.debug("[assert_rf_rack_valid_keyspace]: Starting verifying that keyspace '{}' is RF-rack-valid", ks);
|
||||||
|
|
||||||
// Any keyspace that does NOT use tablets is RF-rack-valid.
|
// Any keyspace that does NOT use tablets is RF-rack-valid.
|
||||||
@@ -1553,9 +1535,9 @@ static void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metad
|
|||||||
|
|
||||||
for (const auto& dc : nts.get_datacenters()) {
|
for (const auto& dc : nts.get_datacenters()) {
|
||||||
if (!dc_rack_map.contains(dc)) {
|
if (!dc_rack_map.contains(dc)) {
|
||||||
on_internal_error(tablet_logger, seastar::format(
|
on_internal_error(tablet_logger, seastar::format("Precondition violated: DC '{}' is part of the passed replication strategy, but it is not "
|
||||||
"Precondition violated: DC '{}' is part of the passed replication strategy, but it is not "
|
"known by the passed locator::token_metadata_ptr.",
|
||||||
"known by the passed locator::token_metadata_ptr.", dc));
|
dc));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1567,8 +1549,7 @@ static void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metad
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto required_racks = rf_data->is_rack_based() ?
|
auto required_racks = rf_data->is_rack_based() ? std::flat_set<sstring>(std::from_range, rf_data->get_rack_list()) : std::flat_set<sstring>{};
|
||||||
std::flat_set<sstring>(std::from_range, rf_data->get_rack_list()) : std::flat_set<sstring>{};
|
|
||||||
|
|
||||||
size_t normal_rack_count = 0;
|
size_t normal_rack_count = 0;
|
||||||
size_t transitioning_rack_count = 0;
|
size_t transitioning_rack_count = 0;
|
||||||
@@ -1588,12 +1569,9 @@ static void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metad
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (required_racks.size() > 0) {
|
if (required_racks.size() > 0) {
|
||||||
const auto rack_list = required_racks
|
const auto rack_list = required_racks | std::views::join_with(std::string_view(", ")) | std::ranges::to<std::string>();
|
||||||
| std::views::join_with(std::string_view(", "))
|
|
||||||
| std::ranges::to<std::string>();
|
|
||||||
|
|
||||||
throw std::invalid_argument(std::format(
|
throw std::invalid_argument(std::format("The keyspace '{}' is required to be RF-rack-valid. "
|
||||||
"The keyspace '{}' is required to be RF-rack-valid. "
|
|
||||||
"That condition is violated for DC '{}': the following racks are "
|
"That condition is violated for DC '{}': the following racks are "
|
||||||
"required by the replication strategy, but they don't exist in the topology: {}.",
|
"required by the replication strategy, but they don't exist in the topology: {}.",
|
||||||
ks, std::string_view(dc), rack_list));
|
ks, std::string_view(dc), rack_list));
|
||||||
@@ -1615,8 +1593,7 @@ static void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metad
|
|||||||
const bool invalid_arbiter_dc = normal_rack_count == 0 && rf > 0;
|
const bool invalid_arbiter_dc = normal_rack_count == 0 && rf > 0;
|
||||||
|
|
||||||
if (!valid_rf || invalid_arbiter_dc) {
|
if (!valid_rf || invalid_arbiter_dc) {
|
||||||
throw std::invalid_argument(std::format(
|
throw std::invalid_argument(std::format("The keyspace '{}' is required to be RF-rack-valid. "
|
||||||
"The keyspace '{}' is required to be RF-rack-valid. "
|
|
||||||
"That condition is violated for DC '{}': RF={} vs. rack count={}.",
|
"That condition is violated for DC '{}': RF={} vs. rack count={}.",
|
||||||
ks, std::string_view(dc), rf, normal_rack_count));
|
ks, std::string_view(dc), rf, normal_rack_count));
|
||||||
}
|
}
|
||||||
@@ -1632,23 +1609,22 @@ static bool is_normal_token_owner(const token_metadata& tm, locator::host_id hos
|
|||||||
|
|
||||||
static bool is_transitioning_token_owner(const token_metadata& tm, locator::host_id host) {
|
static bool is_transitioning_token_owner(const token_metadata& tm, locator::host_id host) {
|
||||||
auto& node = tm.get_topology().get_node(host);
|
auto& node = tm.get_topology().get_node(host);
|
||||||
return tm.get_topology().get_node(host).is_bootstrapping() ||
|
return tm.get_topology().get_node(host).is_bootstrapping() || (tm.is_normal_token_owner(host) && (tm.is_leaving(host) || node.is_draining()));
|
||||||
(tm.is_normal_token_owner(host) && (tm.is_leaving(host) || node.is_draining()));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars) {
|
void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars) {
|
||||||
assert_rf_rack_valid_keyspace(ks, tmptr, ars,
|
assert_rf_rack_valid_keyspace(
|
||||||
tmptr->get_topology().get_datacenter_racks(),
|
ks, tmptr, ars, tmptr->get_topology().get_datacenter_racks(),
|
||||||
[&tmptr](host_id host) {
|
[&tmptr](host_id host) {
|
||||||
return is_normal_token_owner(*tmptr, host);
|
return is_normal_token_owner(*tmptr, host);
|
||||||
},
|
},
|
||||||
[&tmptr](host_id host) {
|
[&tmptr](host_id host) {
|
||||||
return is_transitioning_token_owner(*tmptr, host);
|
return is_transitioning_token_owner(*tmptr, host);
|
||||||
}
|
});
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars, rf_rack_topology_operation op) {
|
void assert_rf_rack_valid_keyspace(
|
||||||
|
std::string_view ks, const token_metadata_ptr tmptr, const abstract_replication_strategy& ars, rf_rack_topology_operation op) {
|
||||||
auto dc_rack_map = tmptr->get_topology().get_datacenter_racks();
|
auto dc_rack_map = tmptr->get_topology().get_datacenter_racks();
|
||||||
|
|
||||||
switch (op.tag) {
|
switch (op.tag) {
|
||||||
@@ -1674,8 +1650,8 @@ void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
assert_rf_rack_valid_keyspace(ks, tmptr, ars,
|
assert_rf_rack_valid_keyspace(
|
||||||
dc_rack_map,
|
ks, tmptr, ars, dc_rack_map,
|
||||||
[&tmptr, &op](host_id host) {
|
[&tmptr, &op](host_id host) {
|
||||||
if (op.tag == rf_rack_topology_operation::type::add && host == op.node_id) {
|
if (op.tag == rf_rack_topology_operation::type::add && host == op.node_id) {
|
||||||
return true;
|
return true;
|
||||||
@@ -1687,8 +1663,7 @@ void assert_rf_rack_valid_keyspace(std::string_view ks, const token_metadata_ptr
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return is_transitioning_token_owner(*tmptr, host);
|
return is_transitioning_token_owner(*tmptr, host);
|
||||||
}
|
});
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rack_list get_allowed_racks(const locator::token_metadata& tm, const sstring& dc) {
|
rack_list get_allowed_racks(const locator::token_metadata& tm, const sstring& dc) {
|
||||||
@@ -1704,17 +1679,16 @@ rack_list get_allowed_racks(const locator::token_metadata& tm, const sstring& dc
|
|||||||
const auto& all_dcs = tm.get_datacenter_racks_token_owners();
|
const auto& all_dcs = tm.get_datacenter_racks_token_owners();
|
||||||
auto it = all_dcs.find(dc);
|
auto it = all_dcs.find(dc);
|
||||||
if (it != all_dcs.end()) {
|
if (it != all_dcs.end()) {
|
||||||
return it->second | std::views::keys
|
return it->second | std::views::keys | std::views::filter([&](const sstring& rack) {
|
||||||
| std::views::filter([&] (const sstring& rack) { return normal_nodes(rack) > 0; })
|
return normal_nodes(rack) > 0;
|
||||||
| std::ranges::to<std::vector<sstring>>();
|
}) | std::ranges::to<std::vector<sstring>>();
|
||||||
}
|
}
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
} // namespace locator
|
||||||
|
|
||||||
auto fmt::formatter<locator::resize_decision_way>::format(const locator::resize_decision_way& way, fmt::format_context& ctx) const
|
auto fmt::formatter<locator::resize_decision_way>::format(const locator::resize_decision_way& way, fmt::format_context& ctx) const -> decltype(ctx.out()) {
|
||||||
-> decltype(ctx.out()) {
|
|
||||||
static const std::array<sstring, 3> index_to_string = {
|
static const std::array<sstring, 3> index_to_string = {
|
||||||
"none",
|
"none",
|
||||||
"split",
|
"split",
|
||||||
@@ -1724,8 +1698,7 @@ auto fmt::formatter<locator::resize_decision_way>::format(const locator::resize_
|
|||||||
return fmt::format_to(ctx.out(), "{}", index_to_string[way.index()]);
|
return fmt::format_to(ctx.out(), "{}", index_to_string[way.index()]);
|
||||||
}
|
}
|
||||||
|
|
||||||
auto fmt::formatter<locator::global_tablet_id>::format(const locator::global_tablet_id& id, fmt::format_context& ctx) const
|
auto fmt::formatter<locator::global_tablet_id>::format(const locator::global_tablet_id& id, fmt::format_context& ctx) const -> decltype(ctx.out()) {
|
||||||
-> decltype(ctx.out()) {
|
|
||||||
return fmt::format_to(ctx.out(), "{}:{}", id.table, id.tablet);
|
return fmt::format_to(ctx.out(), "{}:{}", id.table, id.tablet);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1739,8 +1712,7 @@ auto fmt::formatter<locator::tablet_transition_kind>::format(const locator::tabl
|
|||||||
return fmt::format_to(ctx.out(), "{}", locator::tablet_transition_kind_to_string(kind));
|
return fmt::format_to(ctx.out(), "{}", locator::tablet_transition_kind_to_string(kind));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto fmt::formatter<locator::tablet_task_type>::format(const locator::tablet_task_type& kind, fmt::format_context& ctx) const
|
auto fmt::formatter<locator::tablet_task_type>::format(const locator::tablet_task_type& kind, fmt::format_context& ctx) const -> decltype(ctx.out()) {
|
||||||
-> decltype(ctx.out()) {
|
|
||||||
return fmt::format_to(ctx.out(), "{}", locator::tablet_task_type_to_string(kind));
|
return fmt::format_to(ctx.out(), "{}", locator::tablet_task_type_to_string(kind));
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1749,8 +1721,7 @@ auto fmt::formatter<locator::tablet_repair_incremental_mode>::format(const locat
|
|||||||
return fmt::format_to(ctx.out(), "{}", locator::tablet_repair_incremental_mode_to_string(mode));
|
return fmt::format_to(ctx.out(), "{}", locator::tablet_repair_incremental_mode_to_string(mode));
|
||||||
}
|
}
|
||||||
|
|
||||||
auto fmt::formatter<locator::tablet_map>::format(const locator::tablet_map& r, fmt::format_context& ctx) const
|
auto fmt::formatter<locator::tablet_map>::format(const locator::tablet_map& r, fmt::format_context& ctx) const -> decltype(ctx.out()) {
|
||||||
-> decltype(ctx.out()) {
|
|
||||||
auto out = ctx.out();
|
auto out = ctx.out();
|
||||||
if (r.tablet_count() == 0) {
|
if (r.tablet_count() == 0) {
|
||||||
return fmt::format_to(out, "{{}}");
|
return fmt::format_to(out, "{{}}");
|
||||||
@@ -1781,8 +1752,7 @@ auto fmt::formatter<locator::tablet_map>::format(const locator::tablet_map& r, f
|
|||||||
return fmt::format_to(out, "}}");
|
return fmt::format_to(out, "}}");
|
||||||
}
|
}
|
||||||
|
|
||||||
auto fmt::formatter<locator::tablet_metadata>::format(const locator::tablet_metadata& tm, fmt::format_context& ctx) const
|
auto fmt::formatter<locator::tablet_metadata>::format(const locator::tablet_metadata& tm, fmt::format_context& ctx) const -> decltype(ctx.out()) {
|
||||||
-> decltype(ctx.out()) {
|
|
||||||
auto out = ctx.out();
|
auto out = ctx.out();
|
||||||
out = fmt::format_to(out, "{{");
|
out = fmt::format_to(out, "{{");
|
||||||
bool first = true;
|
bool first = true;
|
||||||
@@ -1820,8 +1790,7 @@ auto fmt::formatter<locator::repair_scheduler_config>::format(const locator::rep
|
|||||||
return fmt::format_to(ctx.out(), "{}", rjson::print(rjson::from_string_map(ret)));
|
return fmt::format_to(ctx.out(), "{}", rjson::print(rjson::from_string_map(ret)));
|
||||||
};
|
};
|
||||||
|
|
||||||
auto fmt::formatter<locator::tablet_task_info>::format(const locator::tablet_task_info& info, fmt::format_context& ctx) const
|
auto fmt::formatter<locator::tablet_task_info>::format(const locator::tablet_task_info& info, fmt::format_context& ctx) const -> decltype(ctx.out()) {
|
||||||
-> decltype(ctx.out()) {
|
|
||||||
std::map<sstring, sstring> ret{
|
std::map<sstring, sstring> ret{
|
||||||
{"request_type", fmt::to_string(info.request_type)},
|
{"request_type", fmt::to_string(info.request_type)},
|
||||||
{"tablet_task_id", fmt::to_string(info.tablet_task_id)},
|
{"tablet_task_id", fmt::to_string(info.tablet_task_id)},
|
||||||
@@ -1854,29 +1823,31 @@ bool locator::tablet_task_info::selected_by_filters(const tablet_replica& replic
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
locator::tablet_task_info locator::tablet_task_info::make_auto_repair_request(std::unordered_set<locator::host_id> hosts_filter, std::unordered_set<sstring> dcs_filter, tablet_repair_incremental_mode mode) {
|
locator::tablet_task_info locator::tablet_task_info::make_auto_repair_request(
|
||||||
|
std::unordered_set<locator::host_id> hosts_filter, std::unordered_set<sstring> dcs_filter, tablet_repair_incremental_mode mode) {
|
||||||
long sched_nr = 0;
|
long sched_nr = 0;
|
||||||
auto tablet_task_id = locator::tablet_task_id(utils::UUID_gen::get_time_UUID());
|
auto tablet_task_id = locator::tablet_task_id(utils::UUID_gen::get_time_UUID());
|
||||||
return locator::tablet_task_info{locator::tablet_task_type::auto_repair, tablet_task_id, db_clock::now(), sched_nr, db_clock::time_point(), hosts_filter, dcs_filter, mode};
|
return locator::tablet_task_info{
|
||||||
|
locator::tablet_task_type::auto_repair, tablet_task_id, db_clock::now(), sched_nr, db_clock::time_point(), hosts_filter, dcs_filter, mode};
|
||||||
}
|
}
|
||||||
|
|
||||||
locator::tablet_task_info locator::tablet_task_info::make_user_repair_request(std::unordered_set<locator::host_id> hosts_filter, std::unordered_set<sstring> dcs_filter, tablet_repair_incremental_mode mode) {
|
locator::tablet_task_info locator::tablet_task_info::make_user_repair_request(
|
||||||
|
std::unordered_set<locator::host_id> hosts_filter, std::unordered_set<sstring> dcs_filter, tablet_repair_incremental_mode mode) {
|
||||||
long sched_nr = 0;
|
long sched_nr = 0;
|
||||||
auto tablet_task_id = locator::tablet_task_id(utils::UUID_gen::get_time_UUID());
|
auto tablet_task_id = locator::tablet_task_id(utils::UUID_gen::get_time_UUID());
|
||||||
return locator::tablet_task_info{locator::tablet_task_type::user_repair, tablet_task_id, db_clock::now(), sched_nr, db_clock::time_point(), hosts_filter, dcs_filter, mode};
|
return locator::tablet_task_info{
|
||||||
|
locator::tablet_task_type::user_repair, tablet_task_id, db_clock::now(), sched_nr, db_clock::time_point(), hosts_filter, dcs_filter, mode};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::optional<locator::tablet_task_info> locator::tablet_task_info::merge_repair_tasks(const locator::tablet_task_info& t1, const locator::tablet_task_info& t2) {
|
std::optional<locator::tablet_task_info> locator::tablet_task_info::merge_repair_tasks(
|
||||||
|
const locator::tablet_task_info& t1, const locator::tablet_task_info& t2) {
|
||||||
if (t1.is_valid() && t2.is_valid()) {
|
if (t1.is_valid() && t2.is_valid()) {
|
||||||
// In most cases, all tablets are requested to be repaired by a single
|
// In most cases, all tablets are requested to be repaired by a single
|
||||||
// API request, so they share the same task_id, request_type and other
|
// API request, so they share the same task_id, request_type and other
|
||||||
// parameters. If both tablets have a valid repair_task_info, we could
|
// parameters. If both tablets have a valid repair_task_info, we could
|
||||||
// merge them most of the time.
|
// merge them most of the time.
|
||||||
if (t1.request_type == t2.request_type &&
|
if (t1.request_type == t2.request_type && t1.tablet_task_id == t2.tablet_task_id && t1.repair_incremental_mode == t2.repair_incremental_mode &&
|
||||||
t1.tablet_task_id == t2.tablet_task_id &&
|
t1.repair_dcs_filter == t2.repair_dcs_filter && t1.repair_hosts_filter == t2.repair_hosts_filter) {
|
||||||
t1.repair_incremental_mode == t2.repair_incremental_mode &&
|
|
||||||
t1.repair_dcs_filter == t2.repair_dcs_filter &&
|
|
||||||
t1.repair_hosts_filter == t2.repair_hosts_filter) {
|
|
||||||
// Allow repair_task_info merge, use combination of t1 and t2;
|
// Allow repair_task_info merge, use combination of t1 and t2;
|
||||||
tablet_task_info t = t1;
|
tablet_task_info t = t1;
|
||||||
t.request_time = std::min(t1.request_time, t2.request_time);
|
t.request_time = std::min(t1.request_time, t2.request_time);
|
||||||
|
|||||||
@@ -26,12 +26,16 @@
|
|||||||
|
|
||||||
struct node_printer {
|
struct node_printer {
|
||||||
const locator::node* v;
|
const locator::node* v;
|
||||||
node_printer(const locator::node* n) noexcept : v(n) {}
|
node_printer(const locator::node* n) noexcept
|
||||||
|
: v(n) {
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct fmt::formatter<node_printer> {
|
struct fmt::formatter<node_printer> {
|
||||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
constexpr auto parse(format_parse_context& ctx) {
|
||||||
|
return ctx.begin();
|
||||||
|
}
|
||||||
auto format(const node_printer& np, fmt::format_context& ctx) const {
|
auto format(const node_printer& np, fmt::format_context& ctx) const {
|
||||||
const locator::node* node = np.v;
|
const locator::node* node = np.v;
|
||||||
auto out = fmt::format_to(ctx.out(), "node={}", fmt::ptr(node));
|
auto out = fmt::format_to(ctx.out(), "node={}", fmt::ptr(node));
|
||||||
@@ -43,7 +47,9 @@ struct fmt::formatter<node_printer> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
static auto lazy_backtrace() {
|
static auto lazy_backtrace() {
|
||||||
return seastar::value_of([] { return current_backtrace(); });
|
return seastar::value_of([] {
|
||||||
|
return current_backtrace();
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace locator {
|
namespace locator {
|
||||||
@@ -55,7 +61,8 @@ thread_local const endpoint_dc_rack endpoint_dc_rack::default_location = {
|
|||||||
.rack = locator::production_snitch_base::default_rack,
|
.rack = locator::production_snitch_base::default_rack,
|
||||||
};
|
};
|
||||||
|
|
||||||
node::node(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, bool excluded, this_node is_this_node, node::idx_type idx, bool draining)
|
node::node(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, bool excluded,
|
||||||
|
this_node is_this_node, node::idx_type idx, bool draining)
|
||||||
: _topology(topology)
|
: _topology(topology)
|
||||||
, _host_id(id)
|
, _host_id(id)
|
||||||
, _dc_rack(std::move(dc_rack))
|
, _dc_rack(std::move(dc_rack))
|
||||||
@@ -64,10 +71,11 @@ node::node(const locator::topology* topology, locator::host_id id, endpoint_dc_r
|
|||||||
, _excluded(excluded)
|
, _excluded(excluded)
|
||||||
, _draining(draining)
|
, _draining(draining)
|
||||||
, _is_this_node(is_this_node)
|
, _is_this_node(is_this_node)
|
||||||
, _idx(idx)
|
, _idx(idx) {
|
||||||
{}
|
}
|
||||||
|
|
||||||
node_holder node::make(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, bool excluded, node::this_node is_this_node, node::idx_type idx, bool draining) {
|
node_holder node::make(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, bool excluded,
|
||||||
|
node::this_node is_this_node, node::idx_type idx, bool draining) {
|
||||||
return std::make_unique<node>(topology, std::move(id), std::move(dc_rack), std::move(state), shard_count, excluded, is_this_node, idx, draining);
|
return std::make_unique<node>(topology, std::move(id), std::move(dc_rack), std::move(state), shard_count, excluded, is_this_node, idx, draining);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -77,14 +85,22 @@ node_holder node::clone() const {
|
|||||||
|
|
||||||
std::string node::to_string(node::state s) {
|
std::string node::to_string(node::state s) {
|
||||||
switch (s) {
|
switch (s) {
|
||||||
case state::none: return "none";
|
case state::none:
|
||||||
case state::bootstrapping: return "bootstrapping";
|
return "none";
|
||||||
case state::replacing: return "replacing";
|
case state::bootstrapping:
|
||||||
case state::normal: return "normal";
|
return "bootstrapping";
|
||||||
case state::being_decommissioned: return "being_decommissioned";
|
case state::replacing:
|
||||||
case state::being_removed: return "being_removed";
|
return "replacing";
|
||||||
case state::being_replaced: return "being_replaced";
|
case state::normal:
|
||||||
case state::left: return "left";
|
return "normal";
|
||||||
|
case state::being_decommissioned:
|
||||||
|
return "being_decommissioned";
|
||||||
|
case state::being_removed:
|
||||||
|
return "being_removed";
|
||||||
|
case state::being_replaced:
|
||||||
|
return "being_replaced";
|
||||||
|
case state::left:
|
||||||
|
return "left";
|
||||||
}
|
}
|
||||||
__builtin_unreachable();
|
__builtin_unreachable();
|
||||||
}
|
}
|
||||||
@@ -103,8 +119,7 @@ future<> topology::clear_gently() noexcept {
|
|||||||
topology::topology(shallow_copy, config cfg)
|
topology::topology(shallow_copy, config cfg)
|
||||||
: _shard(this_shard_id())
|
: _shard(this_shard_id())
|
||||||
, _cfg(cfg)
|
, _cfg(cfg)
|
||||||
, _sort_by_proximity(true)
|
, _sort_by_proximity(true) {
|
||||||
{
|
|
||||||
// constructor for shallow copying of token_metadata_impl
|
// constructor for shallow copying of token_metadata_impl
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -112,10 +127,9 @@ topology::topology(config cfg)
|
|||||||
: _shard(this_shard_id())
|
: _shard(this_shard_id())
|
||||||
, _cfg(cfg)
|
, _cfg(cfg)
|
||||||
, _sort_by_proximity(!cfg.disable_proximity_sorting)
|
, _sort_by_proximity(!cfg.disable_proximity_sorting)
|
||||||
, _random_engine(std::random_device{}())
|
, _random_engine(std::random_device{}()) {
|
||||||
{
|
tlogger.trace("topology[{}]: constructing using config: endpoint={} id={} dc={} rack={}", fmt::ptr(this), cfg.this_endpoint, cfg.this_host_id,
|
||||||
tlogger.trace("topology[{}]: constructing using config: endpoint={} id={} dc={} rack={}", fmt::ptr(this),
|
cfg.local_dc_rack.dc, cfg.local_dc_rack.rack);
|
||||||
cfg.this_endpoint, cfg.this_host_id, cfg.local_dc_rack.dc, cfg.local_dc_rack.rack);
|
|
||||||
add_node(cfg.this_host_id, cfg.local_dc_rack, node::state::none);
|
add_node(cfg.this_host_id, cfg.local_dc_rack, node::state::none);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -131,8 +145,7 @@ topology::topology(topology&& o) noexcept
|
|||||||
, _dc_racks(std::move(o._dc_racks))
|
, _dc_racks(std::move(o._dc_racks))
|
||||||
, _sort_by_proximity(o._sort_by_proximity)
|
, _sort_by_proximity(o._sort_by_proximity)
|
||||||
, _datacenters(std::move(o._datacenters))
|
, _datacenters(std::move(o._datacenters))
|
||||||
, _random_engine(std::move(o._random_engine))
|
, _random_engine(std::move(o._random_engine)) {
|
||||||
{
|
|
||||||
SCYLLA_ASSERT(_shard == this_shard_id());
|
SCYLLA_ASSERT(_shard == this_shard_id());
|
||||||
tlogger.trace("topology[{}]: move from [{}]", fmt::ptr(this), fmt::ptr(&o));
|
tlogger.trace("topology[{}]: move from [{}]", fmt::ptr(this), fmt::ptr(&o));
|
||||||
|
|
||||||
@@ -153,7 +166,8 @@ topology& topology::operator=(topology&& o) noexcept {
|
|||||||
|
|
||||||
void topology::set_host_id_cfg(host_id this_host_id) {
|
void topology::set_host_id_cfg(host_id this_host_id) {
|
||||||
if (_cfg.this_host_id) {
|
if (_cfg.this_host_id) {
|
||||||
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg can be caller only once current id {} new id {}", fmt::ptr(this), _cfg.this_host_id, this_host_id));
|
on_internal_error(tlogger,
|
||||||
|
fmt::format("topology[{}] set_host_id_cfg can be caller only once current id {} new id {}", fmt::ptr(this), _cfg.this_host_id, this_host_id));
|
||||||
}
|
}
|
||||||
if (_nodes.size() != 1) {
|
if (_nodes.size() != 1) {
|
||||||
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg called while nodes size is greater than 1", fmt::ptr(this)));
|
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg called while nodes size is greater than 1", fmt::ptr(this)));
|
||||||
@@ -162,7 +176,8 @@ void topology::set_host_id_cfg(host_id this_host_id) {
|
|||||||
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg called while _this_nodes is null", fmt::ptr(this)));
|
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg called while _this_nodes is null", fmt::ptr(this)));
|
||||||
}
|
}
|
||||||
if (_this_node->host_id()) {
|
if (_this_node->host_id()) {
|
||||||
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg called while _this_nodes has non null id {}", fmt::ptr(this), _this_node->host_id()));
|
on_internal_error(
|
||||||
|
tlogger, fmt::format("topology[{}] set_host_id_cfg called while _this_nodes has non null id {}", fmt::ptr(this), _this_node->host_id()));
|
||||||
}
|
}
|
||||||
|
|
||||||
remove_node(*_this_node);
|
remove_node(*_this_node);
|
||||||
@@ -203,7 +218,8 @@ const node& topology::add_node(node_holder nptr) {
|
|||||||
|
|
||||||
if (nptr->topology() != this) {
|
if (nptr->topology() != this) {
|
||||||
if (nptr->topology()) {
|
if (nptr->topology()) {
|
||||||
on_fatal_internal_error(tlogger, seastar::format("topology[{}]: {} belongs to different topology={}", fmt::ptr(this), node_printer(node), fmt::ptr(node->topology())));
|
on_fatal_internal_error(tlogger,
|
||||||
|
seastar::format("topology[{}]: {} belongs to different topology={}", fmt::ptr(this), node_printer(node), fmt::ptr(node->topology())));
|
||||||
}
|
}
|
||||||
nptr->set_topology(this);
|
nptr->set_topology(this);
|
||||||
}
|
}
|
||||||
@@ -219,7 +235,8 @@ const node& topology::add_node(node_holder nptr) {
|
|||||||
try {
|
try {
|
||||||
if (is_configured_this_node(*node)) {
|
if (is_configured_this_node(*node)) {
|
||||||
if (_this_node) {
|
if (_this_node) {
|
||||||
on_internal_error(tlogger, seastar::format("topology[{}]: {}: local node already mapped to {}", fmt::ptr(this), node_printer(node), node_printer(this_node())));
|
on_internal_error(tlogger,
|
||||||
|
seastar::format("topology[{}]: {}: local node already mapped to {}", fmt::ptr(this), node_printer(node), node_printer(this_node())));
|
||||||
}
|
}
|
||||||
locator::node& n = *_nodes.back();
|
locator::node& n = *_nodes.back();
|
||||||
n._is_this_node = node::this_node::yes;
|
n._is_this_node = node::this_node::yes;
|
||||||
@@ -238,13 +255,24 @@ const node& topology::add_node(node_holder nptr) {
|
|||||||
return *node;
|
return *node;
|
||||||
}
|
}
|
||||||
|
|
||||||
void topology::update_node(node& node, std::optional<host_id> opt_id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> opt_shard_count) {
|
void topology::update_node(node& node, std::optional<host_id> opt_id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st,
|
||||||
|
std::optional<shard_id> opt_shard_count) {
|
||||||
tlogger.debug("topology[{}]: update_node: {}: to: host_id={} dc={} rack={} state={} shard_count={}, at {}", fmt::ptr(this), node_printer(&node),
|
tlogger.debug("topology[{}]: update_node: {}: to: host_id={} dc={} rack={} state={} shard_count={}, at {}", fmt::ptr(this), node_printer(&node),
|
||||||
opt_id ? format("{}", *opt_id) : "unchanged",
|
seastar::value_of([&] {
|
||||||
opt_dr ? format("{}", opt_dr->dc) : "unchanged",
|
return opt_id ? format("{}", *opt_id) : "unchanged";
|
||||||
opt_dr ? format("{}", opt_dr->rack) : "unchanged",
|
}),
|
||||||
opt_st ? format("{}", *opt_st) : "unchanged",
|
seastar::value_of([&] {
|
||||||
opt_shard_count ? format("{}", *opt_shard_count) : "unchanged",
|
return opt_dr ? format("{}", opt_dr->dc) : "unchanged";
|
||||||
|
}),
|
||||||
|
seastar::value_of([&] {
|
||||||
|
return opt_dr ? format("{}", opt_dr->rack) : "unchanged";
|
||||||
|
}),
|
||||||
|
seastar::value_of([&] {
|
||||||
|
return opt_st ? format("{}", *opt_st) : "unchanged";
|
||||||
|
}),
|
||||||
|
seastar::value_of([&] {
|
||||||
|
return opt_shard_count ? format("{}", *opt_shard_count) : "unchanged";
|
||||||
|
}),
|
||||||
lazy_backtrace());
|
lazy_backtrace());
|
||||||
|
|
||||||
bool changed = false;
|
bool changed = false;
|
||||||
@@ -257,7 +285,8 @@ void topology::update_node(node& node, std::optional<host_id> opt_id, std::optio
|
|||||||
on_internal_error(tlogger, seastar::format("This node host_id is already set: {}: new host_id={}", node_printer(&node), *opt_id));
|
on_internal_error(tlogger, seastar::format("This node host_id is already set: {}: new host_id={}", node_printer(&node), *opt_id));
|
||||||
}
|
}
|
||||||
if (_nodes_by_host_id.contains(*opt_id)) {
|
if (_nodes_by_host_id.contains(*opt_id)) {
|
||||||
on_internal_error(tlogger, seastar::format("Cannot update node host_id: {}: new host_id already exists: {}", node_printer(&node), node_printer(find_node(*opt_id))));
|
on_internal_error(tlogger, seastar::format("Cannot update node host_id: {}: new host_id already exists: {}", node_printer(&node),
|
||||||
|
node_printer(find_node(*opt_id))));
|
||||||
}
|
}
|
||||||
changed = true;
|
changed = true;
|
||||||
} else {
|
} else {
|
||||||
@@ -442,10 +471,10 @@ const node* topology::find_node(node::idx_type idx) const noexcept {
|
|||||||
return _nodes.at(idx).get();
|
return _nodes.at(idx).get();
|
||||||
}
|
}
|
||||||
|
|
||||||
const node& topology::add_or_update_endpoint(host_id id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> shard_count)
|
const node& topology::add_or_update_endpoint(
|
||||||
{
|
host_id id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> shard_count) {
|
||||||
tlogger.trace("topology[{}]: add_or_update_endpoint: host_id={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this),
|
tlogger.trace("topology[{}]: add_or_update_endpoint: host_id={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this), id,
|
||||||
id, opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count,
|
opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count,
|
||||||
lazy_backtrace());
|
lazy_backtrace());
|
||||||
|
|
||||||
auto* n = find_node(id);
|
auto* n = find_node(id);
|
||||||
@@ -454,14 +483,10 @@ const node& topology::add_or_update_endpoint(host_id id, std::optional<endpoint_
|
|||||||
return *n;
|
return *n;
|
||||||
}
|
}
|
||||||
|
|
||||||
return add_node(id,
|
return add_node(id, opt_dr.value_or(endpoint_dc_rack::default_location), opt_st.value_or(node::state::none), shard_count.value_or(0));
|
||||||
opt_dr.value_or(endpoint_dc_rack::default_location),
|
|
||||||
opt_st.value_or(node::state::none),
|
|
||||||
shard_count.value_or(0));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool topology::remove_endpoint(locator::host_id host_id)
|
bool topology::remove_endpoint(locator::host_id host_id) {
|
||||||
{
|
|
||||||
auto node = find_node(host_id);
|
auto node = find_node(host_id);
|
||||||
tlogger.debug("topology[{}]: remove_endpoint: host_id={}: {}", fmt::ptr(this), host_id, node_printer(node));
|
tlogger.debug("topology[{}]: remove_endpoint: host_id={}: {}", fmt::ptr(this), host_id, node_printer(node));
|
||||||
// Do not allow removing yourself from the topology
|
// Do not allow removing yourself from the topology
|
||||||
@@ -564,11 +589,12 @@ std::unordered_set<locator::host_id> topology::get_all_host_ids() const {
|
|||||||
return ids;
|
return ids;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unordered_map<sstring, std::unordered_set<host_id>>
|
std::unordered_map<sstring, std::unordered_set<host_id>> topology::get_datacenter_host_ids() const {
|
||||||
topology::get_datacenter_host_ids() const {
|
|
||||||
std::unordered_map<sstring, std::unordered_set<host_id>> ret;
|
std::unordered_map<sstring, std::unordered_set<host_id>> ret;
|
||||||
for (auto& [dc, nodes] : _dc_nodes) {
|
for (auto& [dc, nodes] : _dc_nodes) {
|
||||||
ret[dc] = nodes | std::views::transform([] (const node& n) { return n.host_id(); }) | std::ranges::to<std::unordered_set>();
|
ret[dc] = nodes | std::views::transform([](const node& n) {
|
||||||
|
return n.host_id();
|
||||||
|
}) | std::ranges::to<std::unordered_set>();
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|||||||
792
repair/repair.cc
792
repair/repair.cc
File diff suppressed because it is too large
Load Diff
@@ -3253,10 +3253,13 @@ private:
|
|||||||
// sequentially because the rows from repair follower 1 to
|
// sequentially because the rows from repair follower 1 to
|
||||||
// repair master might reduce the amount of missing data
|
// repair master might reduce the amount of missing data
|
||||||
// between repair master and repair follower 2.
|
// between repair master and repair follower 2.
|
||||||
repair_hash_set set_diff = get_set_diff(master.peer_row_hash_sets(node_idx), master.working_row_hashes().get());
|
auto working_hashes = master.working_row_hashes().get();
|
||||||
|
repair_hash_set set_diff = get_set_diff(master.peer_row_hash_sets(node_idx), working_hashes);
|
||||||
// Request missing sets from peer node
|
// Request missing sets from peer node
|
||||||
|
if (rlogger.is_enabled(logging::log_level::debug)) {
|
||||||
rlogger.debug("Before get_row_diff to node {}, local={}, peer={}, set_diff={}",
|
rlogger.debug("Before get_row_diff to node {}, local={}, peer={}, set_diff={}",
|
||||||
node, master.working_row_hashes().get().size(), master.peer_row_hash_sets(node_idx).size(), set_diff.size());
|
node, working_hashes.size(), master.peer_row_hash_sets(node_idx).size(), set_diff.size());
|
||||||
|
}
|
||||||
// If we need to pull all rows from the peer. We can avoid
|
// If we need to pull all rows from the peer. We can avoid
|
||||||
// sending the row hashes on wire by setting needs_all_rows flag.
|
// sending the row hashes on wire by setting needs_all_rows flag.
|
||||||
auto needs_all_rows = repair_meta::needs_all_rows_t(set_diff.size() == master.peer_row_hash_sets(node_idx).size());
|
auto needs_all_rows = repair_meta::needs_all_rows_t(set_diff.size() == master.peer_row_hash_sets(node_idx).size());
|
||||||
@@ -3269,7 +3272,9 @@ private:
|
|||||||
master.get_row_diff(std::move(set_diff), needs_all_rows, node, node_idx, dst_cpu_id);
|
master.get_row_diff(std::move(set_diff), needs_all_rows, node, node_idx, dst_cpu_id);
|
||||||
ns.state = repair_state::get_row_diff_finished;
|
ns.state = repair_state::get_row_diff_finished;
|
||||||
}
|
}
|
||||||
|
if (rlogger.is_enabled(logging::log_level::debug)) {
|
||||||
rlogger.debug("After get_row_diff node {}, hash_sets={}", master.myhostid(), master.working_row_hashes().get().size());
|
rlogger.debug("After get_row_diff node {}, hash_sets={}", master.myhostid(), master.working_row_hashes().get().size());
|
||||||
|
}
|
||||||
} catch (...) {
|
} catch (...) {
|
||||||
rlogger.warn("repair[{}]: get_row_diff: got error from node={}, keyspace={}, table={}, range={}, error={}",
|
rlogger.warn("repair[{}]: get_row_diff: got error from node={}, keyspace={}, table={}, range={}, error={}",
|
||||||
_shard_task.global_repair_id.uuid(), node, _shard_task.get_keyspace(), _cf_name, _range, std::current_exception());
|
_shard_task.global_repair_id.uuid(), node, _shard_task.get_keyspace(), _cf_name, _range, std::current_exception());
|
||||||
|
|||||||
File diff suppressed because it is too large
Load Diff
@@ -22,12 +22,12 @@ static logging::logger slogger("schema_registry");
|
|||||||
static thread_local schema_registry registry;
|
static thread_local schema_registry registry;
|
||||||
|
|
||||||
schema_version_not_found::schema_version_not_found(table_schema_version v)
|
schema_version_not_found::schema_version_not_found(table_schema_version v)
|
||||||
: std::runtime_error{format("Schema version {} not found", v)}
|
: std::runtime_error{format("Schema version {} not found", v)} {
|
||||||
{ }
|
}
|
||||||
|
|
||||||
schema_version_loading_failed::schema_version_loading_failed(table_schema_version v)
|
schema_version_loading_failed::schema_version_loading_failed(table_schema_version v)
|
||||||
: std::runtime_error{format("Failed to load schema version {}", v)}
|
: std::runtime_error{format("Failed to load schema version {}", v)} {
|
||||||
{ }
|
}
|
||||||
|
|
||||||
schema_registry_entry::~schema_registry_entry() {
|
schema_registry_entry::~schema_registry_entry() {
|
||||||
if (_schema) {
|
if (_schema) {
|
||||||
@@ -39,8 +39,7 @@ schema_registry_entry::schema_registry_entry(table_schema_version v, schema_regi
|
|||||||
: _state(state::INITIAL)
|
: _state(state::INITIAL)
|
||||||
, _version(v)
|
, _version(v)
|
||||||
, _registry(r)
|
, _registry(r)
|
||||||
, _sync_state(sync_state::NOT_SYNCED)
|
, _sync_state(sync_state::NOT_SYNCED) {
|
||||||
{
|
|
||||||
_erase_timer.set_callback([this] {
|
_erase_timer.set_callback([this] {
|
||||||
slogger.debug("Dropping {}", _version);
|
slogger.debug("Dropping {}", _version);
|
||||||
SCYLLA_ASSERT(!_schema);
|
SCYLLA_ASSERT(!_schema);
|
||||||
@@ -71,8 +70,8 @@ void schema_registry::attach_table(schema_registry_entry& e) noexcept {
|
|||||||
e.set_table(table.weak_from_this());
|
e.set_table(table.weak_from_this());
|
||||||
} catch (const replica::no_such_column_family&) {
|
} catch (const replica::no_such_column_family&) {
|
||||||
if (slogger.is_enabled(seastar::log_level::debug)) {
|
if (slogger.is_enabled(seastar::log_level::debug)) {
|
||||||
slogger.debug("No table for schema version {} of {}.{}: {}", e._version,
|
slogger.debug("No table for schema version {} of {}.{}: {}", e._version, e.get_schema()->ks_name(), e.get_schema()->cf_name(),
|
||||||
e.get_schema()->ks_name(), e.get_schema()->cf_name(), seastar::current_backtrace());
|
seastar::current_backtrace());
|
||||||
}
|
}
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
@@ -294,8 +293,8 @@ schema_registry& local_schema_registry() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
global_schema_ptr::global_schema_ptr(const global_schema_ptr& o)
|
global_schema_ptr::global_schema_ptr(const global_schema_ptr& o)
|
||||||
: global_schema_ptr(o.get())
|
: global_schema_ptr(o.get()) {
|
||||||
{ }
|
}
|
||||||
|
|
||||||
global_schema_ptr::global_schema_ptr(global_schema_ptr&& o) noexcept {
|
global_schema_ptr::global_schema_ptr(global_schema_ptr&& o) noexcept {
|
||||||
auto current = this_shard_id();
|
auto current = this_shard_id();
|
||||||
|
|||||||
@@ -65,8 +65,7 @@ struct send_info {
|
|||||||
mutation_fragment_v1_stream reader;
|
mutation_fragment_v1_stream reader;
|
||||||
noncopyable_function<void(size_t)> update;
|
noncopyable_function<void(size_t)> update;
|
||||||
send_info(netw::messaging_service& ms_, streaming::plan_id plan_id_, lw_shared_ptr<replica::table> tbl_, reader_permit permit_,
|
send_info(netw::messaging_service& ms_, streaming::plan_id plan_id_, lw_shared_ptr<replica::table> tbl_, reader_permit permit_,
|
||||||
dht::token_range_vector ranges_, locator::host_id id_,
|
dht::token_range_vector ranges_, locator::host_id id_, uint32_t dst_cpu_id_, stream_reason reason_, service::frozen_topology_guard topo_guard_,
|
||||||
uint32_t dst_cpu_id_, stream_reason reason_, service::frozen_topology_guard topo_guard_,
|
|
||||||
noncopyable_function<void(size_t)> update_fn)
|
noncopyable_function<void(size_t)> update_fn)
|
||||||
: ms(ms_)
|
: ms(ms_)
|
||||||
, plan_id(plan_id_)
|
, plan_id(plan_id_)
|
||||||
@@ -79,12 +78,13 @@ struct send_info {
|
|||||||
, ranges(std::move(ranges_))
|
, ranges(std::move(ranges_))
|
||||||
, prs(dht::to_partition_ranges(ranges))
|
, prs(dht::to_partition_ranges(ranges))
|
||||||
, reader(cf->make_streaming_reader(cf->schema(), std::move(permit_), prs, gc_clock::now()))
|
, reader(cf->make_streaming_reader(cf->schema(), std::move(permit_), prs, gc_clock::now()))
|
||||||
, update(std::move(update_fn))
|
, update(std::move(update_fn)) {
|
||||||
{
|
|
||||||
}
|
}
|
||||||
future<bool> has_relevant_range_on_this_shard() {
|
future<bool> has_relevant_range_on_this_shard() {
|
||||||
return do_with(false, ranges.begin(), [this](bool& found_relevant_range, dht::token_range_vector::iterator& ranges_it) {
|
return do_with(false, ranges.begin(), [this](bool& found_relevant_range, dht::token_range_vector::iterator& ranges_it) {
|
||||||
auto stop_cond = [this, &found_relevant_range, &ranges_it] { return ranges_it == ranges.end() || found_relevant_range; };
|
auto stop_cond = [this, &found_relevant_range, &ranges_it] {
|
||||||
|
return ranges_it == ranges.end() || found_relevant_range;
|
||||||
|
};
|
||||||
return do_until(std::move(stop_cond), [this, &found_relevant_range, &ranges_it] {
|
return do_until(std::move(stop_cond), [this, &found_relevant_range, &ranges_it] {
|
||||||
dht::token_range range = *ranges_it++;
|
dht::token_range range = *ranges_it++;
|
||||||
if (!found_relevant_range) {
|
if (!found_relevant_range) {
|
||||||
@@ -116,19 +116,24 @@ future<> send_mutation_fragments(lw_shared_ptr<send_info> si) {
|
|||||||
return si->reader.has_more_fragments().then([si](bool there_is_more) {
|
return si->reader.has_more_fragments().then([si](bool there_is_more) {
|
||||||
if (!there_is_more) {
|
if (!there_is_more) {
|
||||||
// The reader contains no data
|
// The reader contains no data
|
||||||
sslog.info("[Stream #{}] Skip sending ks={}, cf={}, reader contains no data, with new rpc streaming",
|
sslog.info("[Stream #{}] Skip sending ks={}, cf={}, reader contains no data, with new rpc streaming", si->plan_id, si->cf->schema()->ks_name(),
|
||||||
si->plan_id, si->cf->schema()->ks_name(), si->cf->schema()->cf_name());
|
si->cf->schema()->cf_name());
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
return si->estimate_partitions().then([si](size_t estimated_partitions) {
|
return si->estimate_partitions().then([si](size_t estimated_partitions) {
|
||||||
sslog.info("[Stream #{}] Start sending ks={}, cf={}, estimated_partitions={}, with new rpc streaming", si->plan_id, si->cf->schema()->ks_name(), si->cf->schema()->cf_name(), estimated_partitions);
|
sslog.info("[Stream #{}] Start sending ks={}, cf={}, estimated_partitions={}, with new rpc streaming", si->plan_id, si->cf->schema()->ks_name(),
|
||||||
return si->ms.make_sink_and_source_for_stream_mutation_fragments(si->reader.schema()->version(), si->plan_id, si->cf_id, estimated_partitions, si->reason, si->topo_guard, si->id).then_unpack([si] (rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd> sink, rpc::source<int32_t> source) mutable {
|
si->cf->schema()->cf_name(), estimated_partitions);
|
||||||
|
return si->ms
|
||||||
|
.make_sink_and_source_for_stream_mutation_fragments(
|
||||||
|
si->reader.schema()->version(), si->plan_id, si->cf_id, estimated_partitions, si->reason, si->topo_guard, si->id)
|
||||||
|
.then_unpack([si](rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd> sink, rpc::source<int32_t> source) mutable {
|
||||||
auto got_error_from_peer = make_lw_shared<bool>(false);
|
auto got_error_from_peer = make_lw_shared<bool>(false);
|
||||||
auto table_is_dropped = make_lw_shared<bool>(false);
|
auto table_is_dropped = make_lw_shared<bool>(false);
|
||||||
|
|
||||||
auto source_op = [source, got_error_from_peer, table_is_dropped, si]() mutable -> future<> {
|
auto source_op = [source, got_error_from_peer, table_is_dropped, si]() mutable -> future<> {
|
||||||
return repeat([source, got_error_from_peer, table_is_dropped, si]() mutable {
|
return repeat([source, got_error_from_peer, table_is_dropped, si]() mutable {
|
||||||
return source().then([source, got_error_from_peer, table_is_dropped, si] (std::optional<std::tuple<int32_t>> status_opt) mutable {
|
return source().then([source, got_error_from_peer, table_is_dropped, si](
|
||||||
|
std::optional<std::tuple<int32_t>> status_opt) mutable {
|
||||||
if (status_opt) {
|
if (status_opt) {
|
||||||
auto status = std::get<0>(*status_opt);
|
auto status = std::get<0>(*status_opt);
|
||||||
if (status == -1) {
|
if (status == -1) {
|
||||||
@@ -152,37 +157,49 @@ future<> send_mutation_fragments(lw_shared_ptr<send_info> si) {
|
|||||||
|
|
||||||
auto sink_op = [sink, si, got_error_from_peer]() mutable -> future<> {
|
auto sink_op = [sink, si, got_error_from_peer]() mutable -> future<> {
|
||||||
mutation_fragment_stream_validator validator(*(si->reader.schema()));
|
mutation_fragment_stream_validator validator(*(si->reader.schema()));
|
||||||
return do_with(std::move(sink), std::move(validator), [si, got_error_from_peer] (rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd>& sink, mutation_fragment_stream_validator& validator) {
|
return do_with(std::move(sink), std::move(validator),
|
||||||
|
[si, got_error_from_peer](rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd>& sink,
|
||||||
|
mutation_fragment_stream_validator& validator) {
|
||||||
return repeat([&sink, &validator, si, got_error_from_peer]() mutable {
|
return repeat([&sink, &validator, si, got_error_from_peer]() mutable {
|
||||||
return si->reader().then([&sink, &validator, si, s = si->reader.schema(), got_error_from_peer] (mutation_fragment_opt mf) mutable {
|
return si->reader().then(
|
||||||
|
[&sink, &validator, si, s = si->reader.schema(), got_error_from_peer](mutation_fragment_opt mf) mutable {
|
||||||
if (*got_error_from_peer) {
|
if (*got_error_from_peer) {
|
||||||
return make_exception_future<stop_iteration>(std::runtime_error("Got status error code from peer"));
|
return make_exception_future<stop_iteration>(std::runtime_error("Got status error code from peer"));
|
||||||
}
|
}
|
||||||
if (mf) {
|
if (mf) {
|
||||||
if (!validator(mf->mutation_fragment_kind())) {
|
if (!validator(mf->mutation_fragment_kind())) {
|
||||||
return make_exception_future<stop_iteration>(std::runtime_error(format("Stream reader mutation_fragment validator failed, previous={}, current={}",
|
return make_exception_future<stop_iteration>(std::runtime_error(
|
||||||
|
format("Stream reader mutation_fragment validator failed, previous={}, current={}",
|
||||||
validator.previous_mutation_fragment_kind(), mf->mutation_fragment_kind())));
|
validator.previous_mutation_fragment_kind(), mf->mutation_fragment_kind())));
|
||||||
}
|
}
|
||||||
frozen_mutation_fragment fmf = freeze(*s, *mf);
|
frozen_mutation_fragment fmf = freeze(*s, *mf);
|
||||||
auto size = fmf.representation().size();
|
auto size = fmf.representation().size();
|
||||||
si->update(size);
|
si->update(size);
|
||||||
return sink(fmf, stream_mutation_fragments_cmd::mutation_fragment_data).then([] { return stop_iteration::no; });
|
return sink(fmf, stream_mutation_fragments_cmd::mutation_fragment_data).then([] {
|
||||||
|
return stop_iteration::no;
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
if (!validator.on_end_of_stream()) {
|
if (!validator.on_end_of_stream()) {
|
||||||
return make_exception_future<stop_iteration>(std::runtime_error(format("Stream reader mutation_fragment validator failed on end_of_stream, previous={}, current=end_of_stream",
|
return make_exception_future<stop_iteration>(
|
||||||
|
std::runtime_error(format("Stream reader mutation_fragment validator failed on "
|
||||||
|
"end_of_stream, previous={}, current=end_of_stream",
|
||||||
validator.previous_mutation_fragment_kind())));
|
validator.previous_mutation_fragment_kind())));
|
||||||
}
|
}
|
||||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}).then([&sink] () mutable {
|
})
|
||||||
|
.then([&sink]() mutable {
|
||||||
return sink(frozen_mutation_fragment(bytes_ostream()), stream_mutation_fragments_cmd::end_of_stream);
|
return sink(frozen_mutation_fragment(bytes_ostream()), stream_mutation_fragments_cmd::end_of_stream);
|
||||||
}).handle_exception([&sink] (std::exception_ptr ep) mutable {
|
})
|
||||||
|
.handle_exception([&sink](std::exception_ptr ep) mutable {
|
||||||
// Notify the receiver the sender has failed
|
// Notify the receiver the sender has failed
|
||||||
return sink(frozen_mutation_fragment(bytes_ostream()), stream_mutation_fragments_cmd::error).then([ep = std::move(ep)] () mutable {
|
return sink(frozen_mutation_fragment(bytes_ostream()), stream_mutation_fragments_cmd::error)
|
||||||
|
.then([ep = std::move(ep)]() mutable {
|
||||||
return make_exception_future<>(std::move(ep));
|
return make_exception_future<>(std::move(ep));
|
||||||
});
|
});
|
||||||
}).finally([&sink] () mutable {
|
})
|
||||||
|
.finally([&sink]() mutable {
|
||||||
return sink.close();
|
return sink.close();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
@@ -191,9 +208,11 @@ future<> send_mutation_fragments(lw_shared_ptr<send_info> si) {
|
|||||||
return when_all_succeed(std::move(source_op), std::move(sink_op)).then_unpack([got_error_from_peer, table_is_dropped, si] {
|
return when_all_succeed(std::move(source_op), std::move(sink_op)).then_unpack([got_error_from_peer, table_is_dropped, si] {
|
||||||
if (*got_error_from_peer) {
|
if (*got_error_from_peer) {
|
||||||
if (*table_is_dropped) {
|
if (*table_is_dropped) {
|
||||||
sslog.info("[Stream #{}] Skipped streaming the dropped table {}.{}", si->plan_id, si->cf->schema()->ks_name(), si->cf->schema()->cf_name());
|
sslog.info("[Stream #{}] Skipped streaming the dropped table {}.{}", si->plan_id, si->cf->schema()->ks_name(),
|
||||||
|
si->cf->schema()->cf_name());
|
||||||
} else {
|
} else {
|
||||||
throw std::runtime_error(format("Peer failed to process mutation_fragment peer={}, plan_id={}, cf_id={}", si->id, si->plan_id, si->cf_id));
|
throw std::runtime_error(
|
||||||
|
format("Peer failed to process mutation_fragment peer={}, plan_id={}, cf_id={}", si->id, si->plan_id, si->cf_id));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -213,34 +232,43 @@ future<> stream_transfer_task::execute() {
|
|||||||
sort_and_merge_ranges();
|
sort_and_merge_ranges();
|
||||||
auto reason = session->get_reason();
|
auto reason = session->get_reason();
|
||||||
auto topo_guard = session->topo_guard();
|
auto topo_guard = session->topo_guard();
|
||||||
return sm.container().invoke_on_all([plan_id, cf_id, id, dst_cpu_id, ranges=this->_ranges, reason, topo_guard] (stream_manager& sm) mutable {
|
return sm.container()
|
||||||
|
.invoke_on_all([plan_id, cf_id, id, dst_cpu_id, ranges = this->_ranges, reason, topo_guard](stream_manager& sm) mutable {
|
||||||
auto tbl = sm.db().find_column_family(cf_id).shared_from_this();
|
auto tbl = sm.db().find_column_family(cf_id).shared_from_this();
|
||||||
return sm.db().obtain_reader_permit(*tbl, "stream-transfer-task", db::no_timeout, {}).then([&sm, tbl, plan_id, cf_id, id, dst_cpu_id, ranges=std::move(ranges), reason, topo_guard] (reader_permit permit) mutable {
|
return sm.db()
|
||||||
auto si = make_lw_shared<send_info>(sm.ms(), plan_id, tbl, std::move(permit), std::move(ranges), id, dst_cpu_id, reason, topo_guard, [&sm, plan_id, id] (size_t sz) {
|
.obtain_reader_permit(*tbl, "stream-transfer-task", db::no_timeout, {})
|
||||||
|
.then([&sm, tbl, plan_id, cf_id, id, dst_cpu_id, ranges = std::move(ranges), reason, topo_guard](reader_permit permit) mutable {
|
||||||
|
auto si = make_lw_shared<send_info>(sm.ms(), plan_id, tbl, std::move(permit), std::move(ranges), id, dst_cpu_id, reason,
|
||||||
|
topo_guard, [&sm, plan_id, id](size_t sz) {
|
||||||
sm.update_progress(plan_id, id, streaming::progress_info::direction::OUT, sz);
|
sm.update_progress(plan_id, id, streaming::progress_info::direction::OUT, sz);
|
||||||
});
|
});
|
||||||
return si->has_relevant_range_on_this_shard().then([si, plan_id, cf_id] (bool has_relevant_range_on_this_shard) {
|
return si->has_relevant_range_on_this_shard()
|
||||||
|
.then([si, plan_id, cf_id](bool has_relevant_range_on_this_shard) {
|
||||||
if (!has_relevant_range_on_this_shard) {
|
if (!has_relevant_range_on_this_shard) {
|
||||||
sslog.debug("[Stream #{}] stream_transfer_task: cf_id={}: ignore ranges on shard={}",
|
sslog.debug("[Stream #{}] stream_transfer_task: cf_id={}: ignore ranges on shard={}", plan_id, cf_id,
|
||||||
plan_id, cf_id, this_shard_id());
|
this_shard_id());
|
||||||
return make_ready_future<>();
|
return make_ready_future<>();
|
||||||
}
|
}
|
||||||
return send_mutation_fragments(std::move(si));
|
return send_mutation_fragments(std::move(si));
|
||||||
}).finally([si] {
|
})
|
||||||
|
.finally([si] {
|
||||||
return si->reader.close();
|
return si->reader.close();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
}).then([this, plan_id, cf_id, id, &sm] {
|
})
|
||||||
|
.then([this, plan_id, cf_id, id, &sm] {
|
||||||
sslog.debug("[Stream #{}] SEND STREAM_MUTATION_DONE to {}, cf_id={}", plan_id, id, cf_id);
|
sslog.debug("[Stream #{}] SEND STREAM_MUTATION_DONE to {}, cf_id={}", plan_id, id, cf_id);
|
||||||
return ser::streaming_rpc_verbs::send_stream_mutation_done(&sm.ms(), id, plan_id, _ranges,
|
return ser::streaming_rpc_verbs::send_stream_mutation_done(&sm.ms(), id, plan_id, _ranges, cf_id, session->dst_cpu_id)
|
||||||
cf_id, session->dst_cpu_id).handle_exception([plan_id, id] (auto ep) {
|
.handle_exception([plan_id, id](auto ep) {
|
||||||
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send STREAM_MUTATION_DONE to {}: {}", plan_id, id, ep);
|
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send STREAM_MUTATION_DONE to {}: {}", plan_id, id, ep);
|
||||||
std::rethrow_exception(ep);
|
std::rethrow_exception(ep);
|
||||||
});
|
});
|
||||||
}).then([this, id, plan_id] {
|
})
|
||||||
|
.then([this, id, plan_id] {
|
||||||
_mutation_done_sent = true;
|
_mutation_done_sent = true;
|
||||||
sslog.debug("[Stream #{}] GOT STREAM_MUTATION_DONE Reply from {}", plan_id, id);
|
sslog.debug("[Stream #{}] GOT STREAM_MUTATION_DONE Reply from {}", plan_id, id);
|
||||||
}).handle_exception([plan_id, id, &sm] (std::exception_ptr ep) {
|
})
|
||||||
|
.handle_exception([plan_id, id, &sm](std::exception_ptr ep) {
|
||||||
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send to {}: {}", plan_id, id, ep);
|
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send to {}: {}", plan_id, id, ep);
|
||||||
utils::get_local_injector().inject("stream_mutation_fragments_table_dropped", [&sm]() {
|
utils::get_local_injector().inject("stream_mutation_fragments_table_dropped", [&sm]() {
|
||||||
sm.db().find_column_family(table_id::create_null_id());
|
sm.db().find_column_family(table_id::create_null_id());
|
||||||
|
|||||||
@@ -350,7 +350,6 @@ utils::gcp::storage::client::impl::send_with_retry(const std::string& path, cons
|
|||||||
co_await authorize(req, scope);
|
co_await authorize(req, scope);
|
||||||
}
|
}
|
||||||
auto content = co_await util::read_entire_stream_contiguous(_in);
|
auto content = co_await util::read_entire_stream_contiguous(_in);
|
||||||
auto error_msg = get_gcp_error_message(std::string_view(content));
|
|
||||||
gcp_storage.debug("Got unexpected response status: {}, content: {}", rep._status, content);
|
gcp_storage.debug("Got unexpected response status: {}, content: {}", rep._status, content);
|
||||||
co_await coroutine::return_exception_ptr(std::make_exception_ptr(httpd::unexpected_status_error(rep._status)));
|
co_await coroutine::return_exception_ptr(std::make_exception_ptr(httpd::unexpected_status_error(rep._status)));
|
||||||
}
|
}
|
||||||
@@ -629,7 +628,7 @@ future<> utils::gcp::storage::client::object_data_sink::remove_upload() {
|
|||||||
co_return;
|
co_return;
|
||||||
}
|
}
|
||||||
|
|
||||||
gcp_storage.debug("Removing incomplete upload {}:{} ()", _bucket, _object_name, _session_path);
|
gcp_storage.debug("Removing incomplete upload {}:{} ({})", _bucket, _object_name, _session_path);
|
||||||
|
|
||||||
auto res = co_await _impl->send_with_retry(_session_path
|
auto res = co_await _impl->send_with_retry(_session_path
|
||||||
, GCP_OBJECT_SCOPE_READ_WRITE
|
, GCP_OBJECT_SCOPE_READ_WRITE
|
||||||
|
|||||||
@@ -1583,7 +1583,7 @@ void reclaim_timer::report() const noexcept {
|
|||||||
if (_memory_released > 0) {
|
if (_memory_released > 0) {
|
||||||
auto bytes_per_second =
|
auto bytes_per_second =
|
||||||
static_cast<float>(_memory_released) / std::chrono::duration_cast<std::chrono::duration<float>>(_duration).count();
|
static_cast<float>(_memory_released) / std::chrono::duration_cast<std::chrono::duration<float>>(_duration).count();
|
||||||
timing_logger.log(info_level, "- reclamation rate = {} MiB/s", format("{:.3f}", bytes_per_second / MiB));
|
timing_logger.log(info_level, "- reclamation rate = {:.3f} MiB/s", bytes_per_second / MiB);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_debug_enabled) {
|
if (_debug_enabled) {
|
||||||
|
|||||||
Reference in New Issue
Block a user