Compare commits
23 Commits
debug_form
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c575bbf1e8 | ||
|
|
7fdd650009 | ||
|
|
552a2d0995 | ||
|
|
73de865ca3 | ||
|
|
f988ec18cb | ||
|
|
cd1679934c | ||
|
|
d52fbf7ada | ||
|
|
141aa2d696 | ||
|
|
c670183be8 | ||
|
|
e639dcda0b | ||
|
|
503a6e2d7e | ||
|
|
0f02c0d6fa | ||
|
|
4fead4baae | ||
|
|
ffd58ca1f0 | ||
|
|
f6fd3bbea0 | ||
|
|
148217bed6 | ||
|
|
2b472fe7fd | ||
|
|
ae12c712ce | ||
|
|
dd446aa442 | ||
|
|
dea79b09a9 | ||
|
|
3d04fd1d13 | ||
|
|
f5438e0587 | ||
|
|
f6ab576ed9 |
@@ -32,7 +32,7 @@ namespace {
|
||||
logger mylog{"ldap_role_manager"}; // `log` is taken by math.
|
||||
|
||||
struct url_desc_deleter {
|
||||
void operator()(LDAPURLDesc* p) {
|
||||
void operator()(LDAPURLDesc *p) {
|
||||
ldap_free_urldesc(p);
|
||||
}
|
||||
};
|
||||
@@ -40,7 +40,7 @@ struct url_desc_deleter {
|
||||
using url_desc_ptr = std::unique_ptr<LDAPURLDesc, url_desc_deleter>;
|
||||
|
||||
url_desc_ptr parse_url(std::string_view url) {
|
||||
LDAPURLDesc* desc = nullptr;
|
||||
LDAPURLDesc *desc = nullptr;
|
||||
if (ldap_url_parse(url.data(), &desc)) {
|
||||
mylog.error("error in ldap_url_parse({})", url);
|
||||
}
|
||||
@@ -53,12 +53,8 @@ std::vector<sstring> get_attr_values(LDAP* ld, LDAPMessage* res, const char* att
|
||||
mylog.debug("Analyzing search results");
|
||||
for (auto e = ldap_first_entry(ld, res); e; e = ldap_next_entry(ld, e)) {
|
||||
struct deleter {
|
||||
void operator()(berval** p) {
|
||||
ldap_value_free_len(p);
|
||||
}
|
||||
void operator()(char* p) {
|
||||
ldap_memfree(p);
|
||||
}
|
||||
void operator()(berval** p) { ldap_value_free_len(p); }
|
||||
void operator()(char* p) { ldap_memfree(p); }
|
||||
};
|
||||
const std::unique_ptr<char, deleter> dname(ldap_get_dn(ld, e));
|
||||
mylog.debug("Analyzing entry {}", dname.get());
|
||||
@@ -79,29 +75,32 @@ std::vector<sstring> get_attr_values(LDAP* ld, LDAPMessage* res, const char* att
|
||||
|
||||
namespace auth {
|
||||
|
||||
ldap_role_manager::ldap_role_manager(std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
|
||||
uint32_t permissions_update_interval_in_ms, utils::observer<uint32_t> permissions_update_interval_in_ms_observer, cql3::query_processor& qp,
|
||||
::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
||||
: _std_mgr(qp, rg0c, mm, cache)
|
||||
, _group0_client(rg0c)
|
||||
, _query_template(query_template)
|
||||
, _target_attr(target_attr)
|
||||
, _bind_name(bind_name)
|
||||
, _bind_password(bind_password)
|
||||
, _permissions_update_interval_in_ms(permissions_update_interval_in_ms)
|
||||
, _permissions_update_interval_in_ms_observer(std::move(permissions_update_interval_in_ms_observer))
|
||||
, _connection_factory(bind(std::mem_fn(&ldap_role_manager::reconnect), std::ref(*this)))
|
||||
, _cache(cache)
|
||||
, _cache_pruner(make_ready_future<>()) {
|
||||
ldap_role_manager::ldap_role_manager(
|
||||
std::string_view query_template, std::string_view target_attr, std::string_view bind_name, std::string_view bind_password,
|
||||
uint32_t permissions_update_interval_in_ms,
|
||||
utils::observer<uint32_t> permissions_update_interval_in_ms_observer,
|
||||
cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
||||
: _std_mgr(qp, rg0c, mm, cache), _group0_client(rg0c), _query_template(query_template), _target_attr(target_attr), _bind_name(bind_name)
|
||||
, _bind_password(bind_password)
|
||||
, _permissions_update_interval_in_ms(permissions_update_interval_in_ms)
|
||||
, _permissions_update_interval_in_ms_observer(std::move(permissions_update_interval_in_ms_observer))
|
||||
, _connection_factory(bind(std::mem_fn(&ldap_role_manager::reconnect), std::ref(*this)))
|
||||
, _cache(cache)
|
||||
, _cache_pruner(make_ready_future<>()) {
|
||||
}
|
||||
|
||||
ldap_role_manager::ldap_role_manager(cql3::query_processor& qp, ::service::raft_group0_client& rg0c, ::service::migration_manager& mm, cache& cache)
|
||||
: ldap_role_manager(qp.db().get_config().ldap_url_template(), qp.db().get_config().ldap_attr_role(), qp.db().get_config().ldap_bind_dn(),
|
||||
qp.db().get_config().ldap_bind_passwd(), qp.db().get_config().permissions_update_interval_in_ms(),
|
||||
qp.db().get_config().permissions_update_interval_in_ms.observe([this](const uint32_t& v) {
|
||||
_permissions_update_interval_in_ms = v;
|
||||
}),
|
||||
qp, rg0c, mm, cache) {
|
||||
: ldap_role_manager(
|
||||
qp.db().get_config().ldap_url_template(),
|
||||
qp.db().get_config().ldap_attr_role(),
|
||||
qp.db().get_config().ldap_bind_dn(),
|
||||
qp.db().get_config().ldap_bind_passwd(),
|
||||
qp.db().get_config().permissions_update_interval_in_ms(),
|
||||
qp.db().get_config().permissions_update_interval_in_ms.observe([this] (const uint32_t& v) { _permissions_update_interval_in_ms = v; }),
|
||||
qp,
|
||||
rg0c,
|
||||
mm,
|
||||
cache) {
|
||||
}
|
||||
|
||||
std::string_view ldap_role_manager::qualified_java_name() const noexcept {
|
||||
@@ -114,16 +113,17 @@ const resource_set& ldap_role_manager::protected_resources() const {
|
||||
|
||||
future<> ldap_role_manager::start() {
|
||||
if (!parse_url(get_url("dummy-user"))) { // Just need host and port -- any user should do.
|
||||
return make_exception_future(std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
|
||||
return make_exception_future(
|
||||
std::runtime_error(fmt::format("error getting LDAP server address from template {}", _query_template)));
|
||||
}
|
||||
_cache_pruner = futurize_invoke([this]() -> future<> {
|
||||
_cache_pruner = futurize_invoke([this] () -> future<> {
|
||||
while (true) {
|
||||
try {
|
||||
co_await seastar::sleep_abortable(std::chrono::milliseconds(_permissions_update_interval_in_ms), _as);
|
||||
} catch (const seastar::sleep_aborted&) {
|
||||
co_return; // ignore
|
||||
}
|
||||
co_await _cache.container().invoke_on_all([](cache& c) -> future<> {
|
||||
co_await _cache.container().invoke_on_all([] (cache& c) -> future<> {
|
||||
try {
|
||||
co_await c.reload_all_permissions();
|
||||
} catch (...) {
|
||||
@@ -165,7 +165,7 @@ future<conn_ptr> ldap_role_manager::connect() {
|
||||
future<conn_ptr> ldap_role_manager::reconnect() {
|
||||
unsigned retries_left = 5;
|
||||
using namespace std::literals::chrono_literals;
|
||||
conn_ptr conn = co_await exponential_backoff_retry::do_until_value(1s, 32s, _as, [this, &retries_left]() -> future<std::optional<conn_ptr>> {
|
||||
conn_ptr conn = co_await exponential_backoff_retry::do_until_value(1s, 32s, _as, [this, &retries_left] () -> future<std::optional<conn_ptr>> {
|
||||
if (!retries_left) {
|
||||
co_return conn_ptr{};
|
||||
}
|
||||
@@ -188,13 +188,11 @@ future<conn_ptr> ldap_role_manager::reconnect() {
|
||||
|
||||
future<> ldap_role_manager::stop() {
|
||||
_as.request_abort();
|
||||
return std::move(_cache_pruner)
|
||||
.then([this] {
|
||||
return _std_mgr.stop();
|
||||
})
|
||||
.then([this] {
|
||||
return _connection_factory.stop();
|
||||
});
|
||||
return std::move(_cache_pruner).then([this] {
|
||||
return _std_mgr.stop();
|
||||
}).then([this] {
|
||||
return _connection_factory.stop();
|
||||
});
|
||||
}
|
||||
|
||||
future<> ldap_role_manager::create(std::string_view name, const role_config& config, ::service::group0_batch& mc) {
|
||||
@@ -223,42 +221,43 @@ future<role_set> ldap_role_manager::query_granted(std::string_view grantee_name,
|
||||
if (!desc) {
|
||||
return make_exception_future<role_set>(std::runtime_error(format("Error parsing URL {}", url)));
|
||||
}
|
||||
return _connection_factory.with_connection(
|
||||
[this, desc = std::move(desc), grantee_name_ = sstring(grantee_name)](ldap_connection& conn) -> future<role_set> {
|
||||
sstring grantee_name = std::move(grantee_name_);
|
||||
ldap_msg_ptr res = co_await conn.search(desc->lud_dn, desc->lud_scope, desc->lud_filter, desc->lud_attrs,
|
||||
/*attrsonly=*/0, /*serverctrls=*/nullptr, /*clientctrls=*/nullptr,
|
||||
/*timeout=*/nullptr, /*sizelimit=*/0);
|
||||
mylog.trace("query_granted: got search results");
|
||||
const auto mtype = ldap_msgtype(res.get());
|
||||
if (mtype != LDAP_RES_SEARCH_ENTRY && mtype != LDAP_RES_SEARCH_RESULT && mtype != LDAP_RES_SEARCH_REFERENCE) {
|
||||
mylog.error("ldap search yielded result {} of type {}", static_cast<const void*>(res.get()), mtype);
|
||||
co_return coroutine::exception(std::make_exception_ptr(std::runtime_error("ldap_role_manager: search result has wrong type")));
|
||||
return _connection_factory.with_connection([this, desc = std::move(desc), grantee_name_ = sstring(grantee_name)]
|
||||
(ldap_connection& conn) -> future<role_set> {
|
||||
sstring grantee_name = std::move(grantee_name_);
|
||||
ldap_msg_ptr res = co_await conn.search(desc->lud_dn, desc->lud_scope, desc->lud_filter, desc->lud_attrs,
|
||||
/*attrsonly=*/0, /*serverctrls=*/nullptr, /*clientctrls=*/nullptr,
|
||||
/*timeout=*/nullptr, /*sizelimit=*/0);
|
||||
mylog.trace("query_granted: got search results");
|
||||
const auto mtype = ldap_msgtype(res.get());
|
||||
if (mtype != LDAP_RES_SEARCH_ENTRY && mtype != LDAP_RES_SEARCH_RESULT && mtype != LDAP_RES_SEARCH_REFERENCE) {
|
||||
mylog.error("ldap search yielded result {} of type {}", static_cast<const void*>(res.get()), mtype);
|
||||
co_return coroutine::exception(std::make_exception_ptr(std::runtime_error("ldap_role_manager: search result has wrong type")));
|
||||
}
|
||||
std::vector<sstring> values = get_attr_values(conn.get_ldap(), res.get(), _target_attr.c_str());
|
||||
auth::role_set valid_roles{grantee_name};
|
||||
|
||||
// Each value is a role to be granted.
|
||||
co_await parallel_for_each(values, [this, &valid_roles] (const sstring& ldap_role) {
|
||||
return _std_mgr.exists(ldap_role).then([&valid_roles, &ldap_role] (bool exists) {
|
||||
if (exists) {
|
||||
valid_roles.insert(ldap_role);
|
||||
} else {
|
||||
mylog.error("unrecognized role received from LDAP: {}", ldap_role);
|
||||
}
|
||||
std::vector<sstring> values = get_attr_values(conn.get_ldap(), res.get(), _target_attr.c_str());
|
||||
auth::role_set valid_roles{grantee_name};
|
||||
|
||||
// Each value is a role to be granted.
|
||||
co_await parallel_for_each(values, [this, &valid_roles](const sstring& ldap_role) {
|
||||
return _std_mgr.exists(ldap_role).then([&valid_roles, &ldap_role](bool exists) {
|
||||
if (exists) {
|
||||
valid_roles.insert(ldap_role);
|
||||
} else {
|
||||
mylog.error("unrecognized role received from LDAP: {}", ldap_role);
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
co_return std::move(valid_roles);
|
||||
});
|
||||
});
|
||||
|
||||
co_return std::move(valid_roles);
|
||||
});
|
||||
}
|
||||
|
||||
future<role_to_directly_granted_map> ldap_role_manager::query_all_directly_granted(::service::query_state& qs) {
|
||||
future<role_to_directly_granted_map>
|
||||
ldap_role_manager::query_all_directly_granted(::service::query_state& qs) {
|
||||
role_to_directly_granted_map result;
|
||||
auto roles = co_await query_all(qs);
|
||||
for (auto& role : roles) {
|
||||
for (auto& role: roles) {
|
||||
auto granted_set = co_await query_granted(role, recursive_role_query::no);
|
||||
for (auto& granted : granted_set) {
|
||||
for (auto& granted: granted_set) {
|
||||
if (granted != role) {
|
||||
result.insert({role, granted});
|
||||
}
|
||||
@@ -272,7 +271,7 @@ future<role_set> ldap_role_manager::query_all(::service::query_state& qs) {
|
||||
}
|
||||
|
||||
future<> ldap_role_manager::create_role(std::string_view role_name) {
|
||||
return smp::submit_to(0, [this, role_name]() -> future<> {
|
||||
return smp::submit_to(0, [this, role_name] () -> future<> {
|
||||
int retries = 10;
|
||||
while (true) {
|
||||
auto guard = co_await _group0_client.start_operation(_as, ::service::raft_timeout{});
|
||||
@@ -284,8 +283,8 @@ future<> ldap_role_manager::create_role(std::string_view role_name) {
|
||||
} catch (const role_already_exists&) {
|
||||
// ok
|
||||
} catch (const ::service::group0_concurrent_modification& ex) {
|
||||
mylog.warn("Failed to auto-create role \"{}\" due to guard conflict.{}.", role_name,
|
||||
retries ? " Retrying" : " Number of retries exceeded, giving up");
|
||||
mylog.warn("Failed to auto-create role \"{}\" due to guard conflict.{}.",
|
||||
role_name, retries ? " Retrying" : " Number of retries exceeded, giving up");
|
||||
if (retries--) {
|
||||
continue;
|
||||
}
|
||||
@@ -330,7 +329,8 @@ future<bool> ldap_role_manager::can_login(std::string_view role_name) {
|
||||
return _std_mgr.can_login(role_name);
|
||||
}
|
||||
|
||||
future<std::optional<sstring>> ldap_role_manager::get_attribute(std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
|
||||
future<std::optional<sstring>> ldap_role_manager::get_attribute(
|
||||
std::string_view role_name, std::string_view attribute_name, ::service::query_state& qs) {
|
||||
return _std_mgr.get_attribute(role_name, attribute_name, qs);
|
||||
}
|
||||
|
||||
|
||||
219
cdc/split.cc
219
cdc/split.cc
@@ -76,14 +76,14 @@ struct partition_deletion {
|
||||
|
||||
using clustered_column_set = std::map<clustering_key, cdc::one_kind_column_set, clustering_key::less_compare>;
|
||||
|
||||
template <typename Container>
|
||||
template<typename Container>
|
||||
concept EntryContainer = requires(Container& container) {
|
||||
// Parenthesized due to https://bugs.llvm.org/show_bug.cgi?id=45088
|
||||
{ (container.atomic_entries) } -> std::same_as<std::vector<atomic_column_update>&>;
|
||||
{ (container.nonatomic_entries) } -> std::same_as<std::vector<nonatomic_column_update>&>;
|
||||
};
|
||||
|
||||
template <EntryContainer Container>
|
||||
template<EntryContainer Container>
|
||||
static void add_columns_affected_by_entries(cdc::one_kind_column_set& cset, const Container& cont) {
|
||||
for (const auto& entry : cont.atomic_entries) {
|
||||
cset.set(entry.id);
|
||||
@@ -134,7 +134,7 @@ struct batch {
|
||||
ret.emplace(clustering_key::make_empty(), all_columns);
|
||||
}
|
||||
|
||||
auto process_change_type = [&](const auto& changes) {
|
||||
auto process_change_type = [&] (const auto& changes) {
|
||||
for (const auto& change : changes) {
|
||||
auto& cset = ret[change.key];
|
||||
cset.resize(s.regular_columns_count());
|
||||
@@ -211,9 +211,7 @@ private:
|
||||
|
||||
public:
|
||||
extract_collection_visitor(column_id id, std::map<change_key_t, row_update>& updates)
|
||||
: _id(id)
|
||||
, _updates(updates) {
|
||||
}
|
||||
: _id(id), _updates(updates) {}
|
||||
|
||||
void collection_tombstone(const tombstone& t) {
|
||||
auto& entry = get_or_append_entry(t.timestamp + 1, gc_clock::duration(0));
|
||||
@@ -228,9 +226,7 @@ public:
|
||||
cell(key, c);
|
||||
}
|
||||
|
||||
constexpr bool finished() const {
|
||||
return false;
|
||||
}
|
||||
constexpr bool finished() const { return false; }
|
||||
};
|
||||
|
||||
/* Visits all cells and tombstones in a row, putting the encountered changes into buckets
|
||||
@@ -253,46 +249,41 @@ struct extract_row_visitor {
|
||||
|
||||
void collection_column(const column_definition& cdef, auto&& visit_collection) {
|
||||
visit(*cdef.type, make_visitor(
|
||||
[&](const collection_type_impl& ctype) {
|
||||
struct collection_visitor : public extract_collection_visitor<collection_visitor> {
|
||||
data_type _value_type;
|
||||
[&] (const collection_type_impl& ctype) {
|
||||
struct collection_visitor : public extract_collection_visitor<collection_visitor> {
|
||||
data_type _value_type;
|
||||
|
||||
collection_visitor(column_id id, std::map<change_key_t, row_update>& updates, const collection_type_impl& ctype)
|
||||
: extract_collection_visitor<collection_visitor>(id, updates)
|
||||
, _value_type(ctype.value_comparator()) {
|
||||
}
|
||||
collection_visitor(column_id id, std::map<change_key_t, row_update>& updates, const collection_type_impl& ctype)
|
||||
: extract_collection_visitor<collection_visitor>(id, updates), _value_type(ctype.value_comparator()) {}
|
||||
|
||||
data_type get_value_type(bytes_view) {
|
||||
return _value_type;
|
||||
}
|
||||
} v(cdef.id, _updates, ctype);
|
||||
data_type get_value_type(bytes_view) {
|
||||
return _value_type;
|
||||
}
|
||||
} v(cdef.id, _updates, ctype);
|
||||
|
||||
visit_collection(v);
|
||||
},
|
||||
[&](const user_type_impl& utype) {
|
||||
struct udt_visitor : public extract_collection_visitor<udt_visitor> {
|
||||
const user_type_impl& _utype;
|
||||
visit_collection(v);
|
||||
},
|
||||
[&] (const user_type_impl& utype) {
|
||||
struct udt_visitor : public extract_collection_visitor<udt_visitor> {
|
||||
const user_type_impl& _utype;
|
||||
|
||||
udt_visitor(column_id id, std::map<change_key_t, row_update>& updates, const user_type_impl& utype)
|
||||
: extract_collection_visitor<udt_visitor>(id, updates)
|
||||
, _utype(utype) {
|
||||
}
|
||||
udt_visitor(column_id id, std::map<change_key_t, row_update>& updates, const user_type_impl& utype)
|
||||
: extract_collection_visitor<udt_visitor>(id, updates), _utype(utype) {}
|
||||
|
||||
data_type get_value_type(bytes_view key) {
|
||||
return _utype.type(deserialize_field_index(key));
|
||||
}
|
||||
} v(cdef.id, _updates, utype);
|
||||
data_type get_value_type(bytes_view key) {
|
||||
return _utype.type(deserialize_field_index(key));
|
||||
}
|
||||
} v(cdef.id, _updates, utype);
|
||||
|
||||
visit_collection(v);
|
||||
},
|
||||
[&](const abstract_type& o) {
|
||||
throw std::runtime_error(format("extract_changes: unknown collection type:", o.name()));
|
||||
}));
|
||||
visit_collection(v);
|
||||
},
|
||||
[&] (const abstract_type& o) {
|
||||
throw std::runtime_error(format("extract_changes: unknown collection type:", o.name()));
|
||||
}
|
||||
));
|
||||
}
|
||||
|
||||
constexpr bool finished() const {
|
||||
return false;
|
||||
}
|
||||
constexpr bool finished() const { return false; }
|
||||
};
|
||||
|
||||
struct extract_changes_visitor {
|
||||
@@ -302,8 +293,12 @@ struct extract_changes_visitor {
|
||||
extract_row_visitor v;
|
||||
visit_row_cells(v);
|
||||
|
||||
for (auto& [ts_ttl, row_update] : v._updates) {
|
||||
_result[ts_ttl.first].static_updates.push_back({ts_ttl.second, std::move(row_update.atomic_entries), std::move(row_update.nonatomic_entries)});
|
||||
for (auto& [ts_ttl, row_update]: v._updates) {
|
||||
_result[ts_ttl.first].static_updates.push_back({
|
||||
ts_ttl.second,
|
||||
std::move(row_update.atomic_entries),
|
||||
std::move(row_update.nonatomic_entries)
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -324,18 +319,24 @@ struct extract_changes_visitor {
|
||||
} v;
|
||||
visit_row_cells(v);
|
||||
|
||||
for (auto& [ts_ttl, row_update] : v._updates) {
|
||||
for (auto& [ts_ttl, row_update]: v._updates) {
|
||||
// It is important that changes in the resulting `set_of_changes` are listed
|
||||
// in increasing TTL order. The reason is explained in a comment in cdc/log.cc,
|
||||
// search for "#6070".
|
||||
auto [ts, ttl] = ts_ttl;
|
||||
|
||||
if (v._marker && ts == v._marker_ts && ttl == v._marker_ttl) {
|
||||
_result[ts].clustered_inserts.push_back({ttl, ckey, *v._marker, std::move(row_update.atomic_entries), {}});
|
||||
_result[ts].clustered_inserts.push_back({
|
||||
ttl,
|
||||
ckey,
|
||||
*v._marker,
|
||||
std::move(row_update.atomic_entries),
|
||||
{}
|
||||
});
|
||||
|
||||
auto& cr_insert = _result[ts].clustered_inserts.back();
|
||||
bool clustered_update_exists = false;
|
||||
for (auto& nonatomic_up : row_update.nonatomic_entries) {
|
||||
for (auto& nonatomic_up: row_update.nonatomic_entries) {
|
||||
// Updating a collection column with an INSERT statement implies inserting a tombstone.
|
||||
//
|
||||
// For example, suppose that we have:
|
||||
@@ -361,7 +362,12 @@ struct extract_changes_visitor {
|
||||
cr_insert.nonatomic_entries.push_back(std::move(nonatomic_up));
|
||||
} else {
|
||||
if (!clustered_update_exists) {
|
||||
_result[ts].clustered_updates.push_back({ttl, ckey, {}, {}});
|
||||
_result[ts].clustered_updates.push_back({
|
||||
ttl,
|
||||
ckey,
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
// Multiple iterations of this `for` loop (for different collection columns)
|
||||
// might want to put their `nonatomic_up`s into an UPDATE change;
|
||||
@@ -384,7 +390,12 @@ struct extract_changes_visitor {
|
||||
}
|
||||
}
|
||||
} else {
|
||||
_result[ts].clustered_updates.push_back({ttl, ckey, std::move(row_update.atomic_entries), std::move(row_update.nonatomic_entries)});
|
||||
_result[ts].clustered_updates.push_back({
|
||||
ttl,
|
||||
ckey,
|
||||
std::move(row_update.atomic_entries),
|
||||
std::move(row_update.nonatomic_entries)
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -401,9 +412,7 @@ struct extract_changes_visitor {
|
||||
_result[t.timestamp].partition_deletions = partition_deletion{t};
|
||||
}
|
||||
|
||||
constexpr bool finished() const {
|
||||
return false;
|
||||
}
|
||||
constexpr bool finished() const { return false; }
|
||||
};
|
||||
|
||||
set_of_changes extract_changes(const mutation& m) {
|
||||
@@ -417,23 +426,13 @@ namespace cdc {
|
||||
struct find_timestamp_visitor {
|
||||
api::timestamp_type _ts = api::missing_timestamp;
|
||||
|
||||
bool finished() const {
|
||||
return _ts != api::missing_timestamp;
|
||||
}
|
||||
bool finished() const { return _ts != api::missing_timestamp; }
|
||||
|
||||
void visit(api::timestamp_type ts) {
|
||||
_ts = ts;
|
||||
}
|
||||
void visit(const atomic_cell_view& cell) {
|
||||
visit(cell.timestamp());
|
||||
}
|
||||
void visit(api::timestamp_type ts) { _ts = ts; }
|
||||
void visit(const atomic_cell_view& cell) { visit(cell.timestamp()); }
|
||||
|
||||
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
||||
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
||||
void collection_tombstone(const tombstone& t) {
|
||||
// A collection tombstone with timestamp T can be created with:
|
||||
// UPDATE ks.t USING TIMESTAMP T + 1 SET X = null WHERE ...
|
||||
@@ -442,33 +441,15 @@ struct find_timestamp_visitor {
|
||||
// with cdc$time using timestamp T + 1 instead of T.
|
||||
visit(t.timestamp + 1);
|
||||
}
|
||||
void live_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void collection_column(const column_definition&, auto&& visit_collection) {
|
||||
visit_collection(*this);
|
||||
}
|
||||
void marker(const row_marker& rm) {
|
||||
visit(rm.timestamp());
|
||||
}
|
||||
void static_row_cells(auto&& visit_row_cells) {
|
||||
visit_row_cells(*this);
|
||||
}
|
||||
void clustered_row_cells(const clustering_key&, auto&& visit_row_cells) {
|
||||
visit_row_cells(*this);
|
||||
}
|
||||
void clustered_row_delete(const clustering_key&, const tombstone& t) {
|
||||
visit(t.timestamp);
|
||||
}
|
||||
void range_delete(const range_tombstone& t) {
|
||||
visit(t.tomb.timestamp);
|
||||
}
|
||||
void partition_delete(const tombstone& t) {
|
||||
visit(t.timestamp);
|
||||
}
|
||||
void live_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
|
||||
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
|
||||
void collection_column(const column_definition&, auto&& visit_collection) { visit_collection(*this); }
|
||||
void marker(const row_marker& rm) { visit(rm.timestamp()); }
|
||||
void static_row_cells(auto&& visit_row_cells) { visit_row_cells(*this); }
|
||||
void clustered_row_cells(const clustering_key&, auto&& visit_row_cells) { visit_row_cells(*this); }
|
||||
void clustered_row_delete(const clustering_key&, const tombstone& t) { visit(t.timestamp); }
|
||||
void range_delete(const range_tombstone& t) { visit(t.tomb.timestamp); }
|
||||
void partition_delete(const tombstone& t) { visit(t.timestamp); }
|
||||
};
|
||||
|
||||
/* Find some timestamp inside the given mutation.
|
||||
@@ -524,12 +505,8 @@ struct should_split_visitor {
|
||||
|
||||
virtual ~should_split_visitor() = default;
|
||||
|
||||
inline bool finished() const {
|
||||
return _result;
|
||||
}
|
||||
inline void stop() {
|
||||
_result = true;
|
||||
}
|
||||
inline bool finished() const { return _result; }
|
||||
inline void stop() { _result = true; }
|
||||
|
||||
void visit(api::timestamp_type ts, gc_clock::duration ttl = gc_clock::duration(0)) {
|
||||
if (_ts != api::missing_timestamp && _ts != ts) {
|
||||
@@ -540,23 +517,15 @@ struct should_split_visitor {
|
||||
if (_ttl && *_ttl != ttl) {
|
||||
return stop();
|
||||
}
|
||||
_ttl = {ttl};
|
||||
_ttl = { ttl };
|
||||
}
|
||||
|
||||
void visit(const atomic_cell_view& cell) {
|
||||
visit(cell.timestamp(), get_ttl(cell));
|
||||
}
|
||||
void visit(const atomic_cell_view& cell) { visit(cell.timestamp(), get_ttl(cell)); }
|
||||
|
||||
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void live_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
||||
void dead_atomic_cell(const column_definition&, const atomic_cell_view& cell) { visit(cell); }
|
||||
|
||||
void collection_tombstone(const tombstone& t) {
|
||||
visit(t.timestamp + 1);
|
||||
}
|
||||
void collection_tombstone(const tombstone& t) { visit(t.timestamp + 1); }
|
||||
|
||||
virtual void live_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||
if (_had_row_marker) {
|
||||
@@ -565,12 +534,8 @@ struct should_split_visitor {
|
||||
}
|
||||
visit(cell);
|
||||
}
|
||||
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) {
|
||||
visit(cell);
|
||||
}
|
||||
void collection_column(const column_definition&, auto&& visit_collection) {
|
||||
visit_collection(*this);
|
||||
}
|
||||
void dead_collection_cell(bytes_view, const atomic_cell_view& cell) { visit(cell); }
|
||||
void collection_column(const column_definition&, auto&& visit_collection) { visit_collection(*this); }
|
||||
|
||||
virtual void marker(const row_marker& rm) {
|
||||
_had_row_marker = true;
|
||||
@@ -641,8 +606,8 @@ bool should_split(const mutation& m, const per_request_options& options) {
|
||||
cdc::inspect_mutation(m, v);
|
||||
|
||||
return v._result
|
||||
// A mutation with no timestamp will be split into 0 mutations:
|
||||
|| v._ts == api::missing_timestamp;
|
||||
// A mutation with no timestamp will be split into 0 mutations:
|
||||
|| v._ts == api::missing_timestamp;
|
||||
}
|
||||
|
||||
// Returns true if the row state and the atomic and nonatomic entries represent
|
||||
@@ -677,7 +642,7 @@ static bool entries_match_row_state(const schema_ptr& base_schema, const cell_ma
|
||||
if (current_values.size() != update.cells.size()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
std::unordered_map<sstring_view, bytes> current_values_map;
|
||||
for (const auto& entry : current_values) {
|
||||
const auto attr_name = std::string_view(value_cast<sstring>(entry.first));
|
||||
@@ -746,8 +711,8 @@ bool should_skip(batch& changes, const mutation& base_mutation, change_processor
|
||||
return true;
|
||||
}
|
||||
|
||||
void process_changes_with_splitting(
|
||||
const mutation& base_mutation, change_processor& processor, bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||
void process_changes_with_splitting(const mutation& base_mutation, change_processor& processor,
|
||||
bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||
const auto base_schema = base_mutation.schema();
|
||||
auto changes = extract_changes(base_mutation);
|
||||
auto pk = base_mutation.key();
|
||||
@@ -859,8 +824,8 @@ void process_changes_with_splitting(
|
||||
}
|
||||
}
|
||||
|
||||
void process_changes_without_splitting(
|
||||
const mutation& base_mutation, change_processor& processor, bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||
void process_changes_without_splitting(const mutation& base_mutation, change_processor& processor,
|
||||
bool enable_preimage, bool enable_postimage, bool alternator_strict_compatibility) {
|
||||
if (alternator_strict_compatibility) {
|
||||
auto changes = extract_changes(base_mutation);
|
||||
if (should_skip(changes.begin()->second, base_mutation, processor)) {
|
||||
@@ -877,7 +842,7 @@ void process_changes_without_splitting(
|
||||
|
||||
one_kind_column_set columns{base_schema->static_columns_count()};
|
||||
if (!p.static_row().empty()) {
|
||||
p.static_row().get().for_each_cell([&](column_id id, const atomic_cell_or_collection& cell) {
|
||||
p.static_row().get().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
columns.set(id);
|
||||
});
|
||||
processor.produce_preimage(nullptr, columns);
|
||||
@@ -890,7 +855,7 @@ void process_changes_without_splitting(
|
||||
// Row deleted - include all columns in preimage
|
||||
columns.set(0, base_schema->regular_columns_count(), true);
|
||||
} else {
|
||||
cr.row().cells().for_each_cell([&](column_id id, const atomic_cell_or_collection& cell) {
|
||||
cr.row().cells().for_each_cell([&] (column_id id, const atomic_cell_or_collection& cell) {
|
||||
columns.set(id);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -946,7 +946,7 @@ sstables::shared_sstable sstables_task_executor::consume_sstable() {
|
||||
auto sst = _sstables.back();
|
||||
_sstables.pop_back();
|
||||
--_cm._stats.pending_tasks; // from this point on, switch_state(pending|active) works the same way as any other task
|
||||
cmlog.debug("consumed {}", sst->get_filename());
|
||||
cmlog.debug("{}", format("consumed {}", sst->get_filename()));
|
||||
return sst;
|
||||
}
|
||||
|
||||
@@ -1208,6 +1208,7 @@ future<> compaction_manager::await_tasks(std::vector<shared_ptr<compaction_task_
|
||||
|
||||
std::vector<shared_ptr<compaction_task_executor>>
|
||||
compaction_manager::do_stop_ongoing_compactions(sstring reason, std::function<bool(const compaction_group_view*)> filter, std::optional<compaction_type> type_opt) noexcept {
|
||||
auto ongoing_compactions = get_compactions(filter).size();
|
||||
auto tasks = _tasks
|
||||
| std::views::filter([&filter, type_opt] (const auto& task) {
|
||||
return filter(task.compacting_table()) && (!type_opt || task.compaction_type() == *type_opt);
|
||||
@@ -1216,7 +1217,6 @@ compaction_manager::do_stop_ongoing_compactions(sstring reason, std::function<bo
|
||||
| std::ranges::to<std::vector<shared_ptr<compaction_task_executor>>>();
|
||||
logging::log_level level = tasks.empty() ? log_level::debug : log_level::info;
|
||||
if (cmlog.is_enabled(level)) {
|
||||
auto ongoing_compactions = get_compactions(filter).size();
|
||||
std::string scope = "";
|
||||
if (!tasks.empty()) {
|
||||
const compaction_group_view* t = tasks.front()->compacting_table();
|
||||
@@ -1426,17 +1426,11 @@ protected:
|
||||
compaction_strategy cs = t.get_compaction_strategy();
|
||||
compaction_descriptor descriptor = co_await cs.get_sstables_for_compaction(t, _cm.get_strategy_control());
|
||||
int weight = calculate_weight(descriptor);
|
||||
bool debug_enabled = cmlog.is_enabled(log_level::debug);
|
||||
if (debug_enabled) {
|
||||
cmlog.debug("Started minor compaction sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
||||
descriptor.sstables, compacting_table()->get_sstables_repaired_at(),
|
||||
compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
||||
}
|
||||
cmlog.debug("Started minor compaction sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
||||
descriptor.sstables, compacting_table()->get_sstables_repaired_at(),
|
||||
compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
||||
|
||||
sstring old_sstables;
|
||||
if (debug_enabled) {
|
||||
old_sstables = ::format("{}", descriptor.sstables);
|
||||
}
|
||||
auto old_sstables = ::format("{}", descriptor.sstables);
|
||||
|
||||
if (descriptor.sstables.empty() || !can_proceed() || t.is_auto_compaction_disabled_by_user()) {
|
||||
cmlog.debug("{}: sstables={} can_proceed={} auto_compaction={}", *this, descriptor.sstables.size(), can_proceed(), t.is_auto_compaction_disabled_by_user());
|
||||
@@ -1466,10 +1460,8 @@ protected:
|
||||
try {
|
||||
bool should_update_history = this->should_update_history(descriptor.options.type());
|
||||
compaction_result res = co_await compact_sstables(std::move(descriptor), _compaction_data, on_replace);
|
||||
if (debug_enabled) {
|
||||
cmlog.debug("Finished minor compaction old_sstables={} new_sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
||||
old_sstables, res.new_sstables, compacting_table()->get_sstables_repaired_at(), compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
||||
}
|
||||
cmlog.debug("Finished minor compaction old_sstables={} new_sstables={} sstables_reapired_at={} range={} uuid={} compaction_uuid={}",
|
||||
old_sstables, res.new_sstables, compacting_table()->get_sstables_repaired_at(), compacting_table()->token_range(), uuid, _compaction_data.compaction_uuid);
|
||||
finish_compaction();
|
||||
if (should_update_history) {
|
||||
// update_history can take a long time compared to
|
||||
|
||||
@@ -33,10 +33,8 @@ future<compaction_descriptor> leveled_compaction_strategy::get_sstables_for_comp
|
||||
auto candidate = manifest.get_compaction_candidates(*state->last_compacted_keys, state->compaction_counter);
|
||||
|
||||
if (!candidate.sstables.empty()) {
|
||||
if (leveled_manifest::logger.is_enabled(logging::log_level::debug)) {
|
||||
auto main_set = co_await table_s.main_sstable_set();
|
||||
leveled_manifest::logger.debug("leveled: Compacting {} out of {} sstables", candidate.sstables.size(), main_set->size());
|
||||
}
|
||||
auto main_set = co_await table_s.main_sstable_set();
|
||||
leveled_manifest::logger.debug("leveled: Compacting {} out of {} sstables", candidate.sstables.size(), main_set->size());
|
||||
co_return candidate;
|
||||
}
|
||||
|
||||
|
||||
@@ -15,7 +15,6 @@
|
||||
#include "compaction_strategy_state.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
|
||||
#include <seastar/util/lazy.hh>
|
||||
#include <ranges>
|
||||
|
||||
namespace compaction {
|
||||
@@ -29,12 +28,12 @@ time_window_compaction_strategy_state_ptr time_window_compaction_strategy::get_s
|
||||
}
|
||||
|
||||
const std::unordered_map<sstring, std::chrono::seconds> time_window_compaction_strategy_options::valid_window_units = {
|
||||
{"MINUTES", 60s}, {"HOURS", 3600s}, {"DAYS", 86400s}};
|
||||
{ "MINUTES", 60s }, { "HOURS", 3600s }, { "DAYS", 86400s }
|
||||
};
|
||||
|
||||
const std::unordered_map<sstring, time_window_compaction_strategy_options::timestamp_resolutions>
|
||||
time_window_compaction_strategy_options::valid_timestamp_resolutions = {
|
||||
{"MICROSECONDS", timestamp_resolutions::microsecond},
|
||||
{"MILLISECONDS", timestamp_resolutions::millisecond},
|
||||
const std::unordered_map<sstring, time_window_compaction_strategy_options::timestamp_resolutions> time_window_compaction_strategy_options::valid_timestamp_resolutions = {
|
||||
{ "MICROSECONDS", timestamp_resolutions::microsecond },
|
||||
{ "MILLISECONDS", timestamp_resolutions::millisecond },
|
||||
};
|
||||
|
||||
static std::chrono::seconds validate_compaction_window_unit(const std::map<sstring, sstring>& options) {
|
||||
@@ -44,8 +43,7 @@ static std::chrono::seconds validate_compaction_window_unit(const std::map<sstri
|
||||
if (tmp_value) {
|
||||
auto valid_window_units_it = time_window_compaction_strategy_options::valid_window_units.find(tmp_value.value());
|
||||
if (valid_window_units_it == time_window_compaction_strategy_options::valid_window_units.end()) {
|
||||
throw exceptions::configuration_exception(
|
||||
fmt::format("Invalid window unit {} for {}", tmp_value.value(), time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY));
|
||||
throw exceptions::configuration_exception(fmt::format("Invalid window unit {} for {}", tmp_value.value(), time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY));
|
||||
}
|
||||
window_unit = valid_window_units_it->second;
|
||||
}
|
||||
@@ -61,12 +59,10 @@ static std::chrono::seconds validate_compaction_window_unit(const std::map<sstri
|
||||
|
||||
static int validate_compaction_window_size(const std::map<sstring, sstring>& options) {
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY);
|
||||
int window_size = cql3::statements::property_definitions::to_long(time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, tmp_value,
|
||||
time_window_compaction_strategy_options::DEFAULT_COMPACTION_WINDOW_SIZE);
|
||||
int window_size = cql3::statements::property_definitions::to_long(time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, tmp_value, time_window_compaction_strategy_options::DEFAULT_COMPACTION_WINDOW_SIZE);
|
||||
|
||||
if (window_size <= 0) {
|
||||
throw exceptions::configuration_exception(
|
||||
fmt::format("{} value ({}) must be greater than 1", time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, window_size));
|
||||
throw exceptions::configuration_exception(fmt::format("{} value ({}) must be greater than 1", time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, window_size));
|
||||
}
|
||||
|
||||
return window_size;
|
||||
@@ -86,30 +82,26 @@ static db_clock::duration validate_expired_sstable_check_frequency_seconds(const
|
||||
try {
|
||||
expired_sstable_check_frequency = std::chrono::seconds(std::stol(tmp_value.value()));
|
||||
} catch (const std::exception& e) {
|
||||
throw exceptions::syntax_exception(fmt::format(
|
||||
"Invalid long value {} for {}", tmp_value.value(), time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY));
|
||||
throw exceptions::syntax_exception(fmt::format("Invalid long value {} for {}", tmp_value.value(), time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY));
|
||||
}
|
||||
}
|
||||
|
||||
return expired_sstable_check_frequency;
|
||||
}
|
||||
|
||||
static db_clock::duration validate_expired_sstable_check_frequency_seconds(
|
||||
const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
static db_clock::duration validate_expired_sstable_check_frequency_seconds(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
db_clock::duration expired_sstable_check_frequency = validate_expired_sstable_check_frequency_seconds(options);
|
||||
unchecked_options.erase(time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY);
|
||||
return expired_sstable_check_frequency;
|
||||
}
|
||||
|
||||
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options) {
|
||||
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution =
|
||||
time_window_compaction_strategy_options::timestamp_resolutions::microsecond;
|
||||
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = time_window_compaction_strategy_options::timestamp_resolutions::microsecond;
|
||||
|
||||
auto tmp_value = compaction_strategy_impl::get_value(options, time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
|
||||
if (tmp_value) {
|
||||
if (!time_window_compaction_strategy_options::valid_timestamp_resolutions.contains(tmp_value.value())) {
|
||||
throw exceptions::configuration_exception(fmt::format(
|
||||
"Invalid timestamp resolution {} for {}", tmp_value.value(), time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY));
|
||||
throw exceptions::configuration_exception(fmt::format("Invalid timestamp resolution {} for {}", tmp_value.value(), time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY));
|
||||
} else {
|
||||
timestamp_resolution = time_window_compaction_strategy_options::valid_timestamp_resolutions.at(tmp_value.value());
|
||||
}
|
||||
@@ -118,8 +110,7 @@ static time_window_compaction_strategy_options::timestamp_resolutions validate_t
|
||||
return timestamp_resolution;
|
||||
}
|
||||
|
||||
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(
|
||||
const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
static time_window_compaction_strategy_options::timestamp_resolutions validate_timestamp_resolution(const std::map<sstring, sstring>& options, std::map<sstring, sstring>& unchecked_options) {
|
||||
time_window_compaction_strategy_options::timestamp_resolutions timestamp_resolution = validate_timestamp_resolution(options);
|
||||
unchecked_options.erase(time_window_compaction_strategy_options::TIMESTAMP_RESOLUTION_KEY);
|
||||
return timestamp_resolution;
|
||||
@@ -154,7 +145,7 @@ void time_window_compaction_strategy_options::validate(const std::map<sstring, s
|
||||
compaction_strategy_impl::validate_min_max_threshold(options, unchecked_options);
|
||||
|
||||
auto it = options.find("enable_optimized_twcs_queries");
|
||||
if (it != options.end() && it->second != "true" && it->second != "false") {
|
||||
if (it != options.end() && it->second != "true" && it->second != "false") {
|
||||
throw exceptions::configuration_exception(fmt::format("enable_optimized_twcs_queries value ({}) must be \"true\" or \"false\"", it->second));
|
||||
}
|
||||
unchecked_options.erase("enable_optimized_twcs_queries");
|
||||
@@ -171,9 +162,7 @@ class classify_by_timestamp {
|
||||
std::vector<int64_t> _known_windows;
|
||||
|
||||
public:
|
||||
explicit classify_by_timestamp(time_window_compaction_strategy_options options)
|
||||
: _options(std::move(options)) {
|
||||
}
|
||||
explicit classify_by_timestamp(time_window_compaction_strategy_options options) : _options(std::move(options)) { }
|
||||
int64_t operator()(api::timestamp_type ts) {
|
||||
const auto window = time_window_compaction_strategy::get_window_for(_options, ts);
|
||||
if (const auto it = std::ranges::find(_known_windows, window); it != _known_windows.end()) {
|
||||
@@ -201,7 +190,7 @@ uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutati
|
||||
auto estimated_window_count = max_data_segregation_window_count;
|
||||
auto default_ttl = std::chrono::duration_cast<std::chrono::microseconds>(s->default_time_to_live());
|
||||
bool min_and_max_ts_available = ms_meta.min_timestamp && ms_meta.max_timestamp;
|
||||
auto estimate_window_count = [this](timestamp_type min_window, timestamp_type max_window) {
|
||||
auto estimate_window_count = [this] (timestamp_type min_window, timestamp_type max_window) {
|
||||
const auto window_size = get_window_size(_options);
|
||||
return (max_window + (window_size - 1) - min_window) / window_size;
|
||||
};
|
||||
@@ -221,19 +210,21 @@ uint64_t time_window_compaction_strategy::adjust_partition_estimate(const mutati
|
||||
return partition_estimate / std::max(1UL, uint64_t(estimated_window_count));
|
||||
}
|
||||
|
||||
mutation_reader_consumer time_window_compaction_strategy::make_interposer_consumer(
|
||||
const mutation_source_metadata& ms_meta, mutation_reader_consumer end_consumer) const {
|
||||
if (ms_meta.min_timestamp && ms_meta.max_timestamp &&
|
||||
get_window_for(_options, *ms_meta.min_timestamp) == get_window_for(_options, *ms_meta.max_timestamp)) {
|
||||
mutation_reader_consumer time_window_compaction_strategy::make_interposer_consumer(const mutation_source_metadata& ms_meta, mutation_reader_consumer end_consumer) const {
|
||||
if (ms_meta.min_timestamp && ms_meta.max_timestamp
|
||||
&& get_window_for(_options, *ms_meta.min_timestamp) == get_window_for(_options, *ms_meta.max_timestamp)) {
|
||||
return end_consumer;
|
||||
}
|
||||
return [options = _options, end_consumer = std::move(end_consumer)](mutation_reader rd) mutable -> future<> {
|
||||
return mutation_writer::segregate_by_timestamp(std::move(rd), classify_by_timestamp(std::move(options)), end_consumer);
|
||||
return [options = _options, end_consumer = std::move(end_consumer)] (mutation_reader rd) mutable -> future<> {
|
||||
return mutation_writer::segregate_by_timestamp(
|
||||
std::move(rd),
|
||||
classify_by_timestamp(std::move(options)),
|
||||
end_consumer);
|
||||
};
|
||||
}
|
||||
|
||||
compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||
std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||
compaction_descriptor
|
||||
time_window_compaction_strategy::get_reshaping_job(std::vector<sstables::shared_sstable> input, schema_ptr schema, reshape_config cfg) const {
|
||||
auto mode = cfg.mode;
|
||||
std::vector<sstables::shared_sstable> single_window;
|
||||
std::vector<sstables::shared_sstable> multi_window;
|
||||
@@ -248,7 +239,7 @@ compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||
|
||||
// Sort input sstables by first_key order
|
||||
// to allow efficient reshaping of disjoint sstables.
|
||||
std::sort(input.begin(), input.end(), [&schema](const sstables::shared_sstable& a, const sstables::shared_sstable& b) {
|
||||
std::sort(input.begin(), input.end(), [&schema] (const sstables::shared_sstable& a, const sstables::shared_sstable& b) {
|
||||
return dht::ring_position(a->get_first_decorated_key()).less_compare(*schema, dht::ring_position(b->get_first_decorated_key()));
|
||||
});
|
||||
|
||||
@@ -262,34 +253,31 @@ compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||
}
|
||||
}
|
||||
|
||||
auto is_disjoint = [&schema, mode, max_sstables](const std::vector<sstables::shared_sstable>& ssts) {
|
||||
auto is_disjoint = [&schema, mode, max_sstables] (const std::vector<sstables::shared_sstable>& ssts) {
|
||||
size_t tolerance = (mode == reshape_mode::relaxed) ? max_sstables : 0;
|
||||
return sstable_set_overlapping_count(schema, ssts) <= tolerance;
|
||||
};
|
||||
|
||||
clogger.debug("time_window_compaction_strategy::get_reshaping_job: offstrategy_threshold={} max_sstables={} multi_window={} disjoint={} "
|
||||
"single_window={} disjoint={}",
|
||||
offstrategy_threshold, max_sstables, multi_window.size(), seastar::value_of([&] {
|
||||
return !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0;
|
||||
}),
|
||||
single_window.size(), seastar::value_of([&] {
|
||||
return !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0;
|
||||
}));
|
||||
clogger.debug("time_window_compaction_strategy::get_reshaping_job: offstrategy_threshold={} max_sstables={} multi_window={} disjoint={} single_window={} disjoint={}",
|
||||
offstrategy_threshold, max_sstables,
|
||||
multi_window.size(), !multi_window.empty() && sstable_set_overlapping_count(schema, multi_window) == 0,
|
||||
single_window.size(), !single_window.empty() && sstable_set_overlapping_count(schema, single_window) == 0);
|
||||
|
||||
auto get_job_size = [](const std::vector<sstables::shared_sstable>& ssts) {
|
||||
auto get_job_size = [] (const std::vector<sstables::shared_sstable>& ssts) {
|
||||
return std::ranges::fold_left(ssts | std::views::transform(std::mem_fn(&sstables::sstable::bytes_on_disk)), uint64_t(0), std::plus{});
|
||||
};
|
||||
|
||||
// Targets a space overhead of 10%. All disjoint sstables can be compacted together as long as they won't
|
||||
// cause an overhead above target. Otherwise, the job targets a maximum of #max_threshold sstables.
|
||||
auto need_trimming = [&](const std::vector<sstables::shared_sstable>& ssts, const uint64_t job_size, bool is_disjoint) {
|
||||
auto need_trimming = [&] (const std::vector<sstables::shared_sstable>& ssts, const uint64_t job_size, bool is_disjoint) {
|
||||
const size_t min_sstables = 2;
|
||||
auto is_above_target_size = job_size > target_job_size;
|
||||
|
||||
return (ssts.size() > max_sstables && !is_disjoint) || (ssts.size() > min_sstables && is_above_target_size);
|
||||
return (ssts.size() > max_sstables && !is_disjoint) ||
|
||||
(ssts.size() > min_sstables && is_above_target_size);
|
||||
};
|
||||
|
||||
auto maybe_trim_job = [&need_trimming](std::vector<sstables::shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
|
||||
auto maybe_trim_job = [&need_trimming] (std::vector<sstables::shared_sstable>& ssts, uint64_t job_size, bool is_disjoint) {
|
||||
while (need_trimming(ssts, job_size, is_disjoint)) {
|
||||
auto sst = ssts.back();
|
||||
ssts.pop_back();
|
||||
@@ -306,7 +294,7 @@ compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||
// For example, if there are N sstables spanning window W, where N <= 32, then we can produce all data for W
|
||||
// in a single compaction round, removing the need to later compact W to reduce its number of files.
|
||||
auto sort_size = std::min(max_sstables, multi_window.size());
|
||||
std::ranges::partial_sort(multi_window, multi_window.begin() + sort_size, std::ranges::less(), [](const sstables::shared_sstable& a) {
|
||||
std::ranges::partial_sort(multi_window, multi_window.begin() + sort_size, std::ranges::less(), [] (const sstables::shared_sstable &a) {
|
||||
return a->get_stats_metadata().max_timestamp;
|
||||
});
|
||||
maybe_trim_job(multi_window, job_size, disjoint);
|
||||
@@ -346,7 +334,8 @@ compaction_descriptor time_window_compaction_strategy::get_reshaping_job(
|
||||
return compaction_descriptor();
|
||||
}
|
||||
|
||||
future<compaction_descriptor> time_window_compaction_strategy::get_sstables_for_compaction(compaction_group_view& table_s, strategy_control& control) {
|
||||
future<compaction_descriptor>
|
||||
time_window_compaction_strategy::get_sstables_for_compaction(compaction_group_view& table_s, strategy_control& control) {
|
||||
auto state = get_state(table_s);
|
||||
auto compaction_time = gc_clock::now();
|
||||
auto candidates = co_await control.candidates(table_s);
|
||||
@@ -380,8 +369,10 @@ future<compaction_descriptor> time_window_compaction_strategy::get_sstables_for_
|
||||
co_return compaction_descriptor(std::move(compaction_candidates));
|
||||
}
|
||||
|
||||
time_window_compaction_strategy::bucket_compaction_mode time_window_compaction_strategy::compaction_mode(
|
||||
const time_window_compaction_strategy_state& state, const bucket_t& bucket, timestamp_type bucket_key, timestamp_type now, size_t min_threshold) const {
|
||||
time_window_compaction_strategy::bucket_compaction_mode
|
||||
time_window_compaction_strategy::compaction_mode(const time_window_compaction_strategy_state& state,
|
||||
const bucket_t& bucket, timestamp_type bucket_key,
|
||||
timestamp_type now, size_t min_threshold) const {
|
||||
// STCS will also be performed on older window buckets, to avoid a bad write and
|
||||
// space amplification when something like read repair cause small updates to
|
||||
// those past windows.
|
||||
@@ -394,7 +385,8 @@ time_window_compaction_strategy::bucket_compaction_mode time_window_compaction_s
|
||||
return bucket_compaction_mode::none;
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> time_window_compaction_strategy::get_next_non_expired_sstables(compaction_group_view& table_s, strategy_control& control,
|
||||
std::vector<sstables::shared_sstable>
|
||||
time_window_compaction_strategy::get_next_non_expired_sstables(compaction_group_view& table_s, strategy_control& control,
|
||||
std::vector<sstables::shared_sstable> non_expiring_sstables, gc_clock::time_point compaction_time, time_window_compaction_strategy_state& state) {
|
||||
auto most_interesting = get_compaction_candidates(table_s, control, non_expiring_sstables, state);
|
||||
|
||||
@@ -408,29 +400,31 @@ std::vector<sstables::shared_sstable> time_window_compaction_strategy::get_next_
|
||||
|
||||
// if there is no sstable to compact in standard way, try compacting single sstable whose droppable tombstone
|
||||
// ratio is greater than threshold.
|
||||
std::erase_if(non_expiring_sstables, [this, compaction_time, &table_s](const sstables::shared_sstable& sst) -> bool {
|
||||
std::erase_if(non_expiring_sstables, [this, compaction_time, &table_s] (const sstables::shared_sstable& sst) -> bool {
|
||||
return !worth_dropping_tombstones(sst, compaction_time, table_s);
|
||||
});
|
||||
if (non_expiring_sstables.empty()) {
|
||||
return {};
|
||||
}
|
||||
auto it = std::ranges::min_element(non_expiring_sstables, [](auto& i, auto& j) {
|
||||
auto it = std::ranges::min_element(non_expiring_sstables, [] (auto& i, auto& j) {
|
||||
return i->get_stats_metadata().min_timestamp < j->get_stats_metadata().min_timestamp;
|
||||
});
|
||||
return {*it};
|
||||
return { *it };
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> time_window_compaction_strategy::get_compaction_candidates(compaction_group_view& table_s, strategy_control& control,
|
||||
std::vector<sstables::shared_sstable> candidate_sstables, time_window_compaction_strategy_state& state) {
|
||||
std::vector<sstables::shared_sstable>
|
||||
time_window_compaction_strategy::get_compaction_candidates(compaction_group_view& table_s, strategy_control& control,
|
||||
std::vector<sstables::shared_sstable> candidate_sstables, time_window_compaction_strategy_state& state) {
|
||||
auto [buckets, max_timestamp] = get_buckets(std::move(candidate_sstables), _options);
|
||||
// Update the highest window seen, if necessary
|
||||
state.highest_window_seen = std::max(state.highest_window_seen, max_timestamp);
|
||||
|
||||
return newest_bucket(table_s, control, std::move(buckets), table_s.min_compaction_threshold(), table_s.schema()->max_compaction_threshold(),
|
||||
state.highest_window_seen, state);
|
||||
state.highest_window_seen, state);
|
||||
}
|
||||
|
||||
timestamp_type time_window_compaction_strategy::get_window_lower_bound(std::chrono::seconds sstable_window_size, timestamp_type timestamp) {
|
||||
timestamp_type
|
||||
time_window_compaction_strategy::get_window_lower_bound(std::chrono::seconds sstable_window_size, timestamp_type timestamp) {
|
||||
using namespace std::chrono;
|
||||
// mask out window size from timestamp to get lower bound of its window
|
||||
auto num_windows = microseconds(timestamp) / sstable_window_size;
|
||||
@@ -438,8 +432,8 @@ timestamp_type time_window_compaction_strategy::get_window_lower_bound(std::chro
|
||||
return duration_cast<microseconds>(num_windows * sstable_window_size).count();
|
||||
}
|
||||
|
||||
std::pair<std::map<timestamp_type, std::vector<sstables::shared_sstable>>, timestamp_type> time_window_compaction_strategy::get_buckets(
|
||||
std::vector<sstables::shared_sstable> files, const time_window_compaction_strategy_options& options) {
|
||||
std::pair<std::map<timestamp_type, std::vector<sstables::shared_sstable>>, timestamp_type>
|
||||
time_window_compaction_strategy::get_buckets(std::vector<sstables::shared_sstable> files, const time_window_compaction_strategy_options& options) {
|
||||
std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets;
|
||||
|
||||
timestamp_type max_timestamp = 0;
|
||||
@@ -456,13 +450,11 @@ std::pair<std::map<timestamp_type, std::vector<sstables::shared_sstable>>, times
|
||||
return std::make_pair(std::move(buckets), max_timestamp);
|
||||
}
|
||||
|
||||
} // namespace compaction
|
||||
}
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<std::map<compaction::timestamp_type, std::vector<sstables::shared_sstable>>> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
||||
auto format(const std::map<compaction::timestamp_type, std::vector<sstables::shared_sstable>>& buckets, fmt::format_context& ctx) const {
|
||||
auto out = fmt::format_to(ctx.out(), " buckets = {{\n");
|
||||
for (auto& [timestamp, sstables] : buckets | std::views::reverse) {
|
||||
@@ -474,9 +466,9 @@ struct fmt::formatter<std::map<compaction::timestamp_type, std::vector<sstables:
|
||||
|
||||
namespace compaction {
|
||||
|
||||
std::vector<sstables::shared_sstable> time_window_compaction_strategy::newest_bucket(compaction_group_view& table_s, strategy_control& control,
|
||||
std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets, int min_threshold, int max_threshold, timestamp_type now,
|
||||
time_window_compaction_strategy_state& state) {
|
||||
std::vector<sstables::shared_sstable>
|
||||
time_window_compaction_strategy::newest_bucket(compaction_group_view& table_s, strategy_control& control, std::map<timestamp_type, std::vector<sstables::shared_sstable>> buckets,
|
||||
int min_threshold, int max_threshold, timestamp_type now, time_window_compaction_strategy_state& state) {
|
||||
clogger.debug("time_window_compaction_strategy::newest_bucket:\n now {}\n{}", now, buckets);
|
||||
|
||||
for (auto&& [key, bucket] : buckets | std::views::reverse) {
|
||||
@@ -517,7 +509,8 @@ std::vector<sstables::shared_sstable> time_window_compaction_strategy::newest_bu
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<sstables::shared_sstable> time_window_compaction_strategy::trim_to_threshold(std::vector<sstables::shared_sstable> bucket, int max_threshold) {
|
||||
std::vector<sstables::shared_sstable>
|
||||
time_window_compaction_strategy::trim_to_threshold(std::vector<sstables::shared_sstable> bucket, int max_threshold) {
|
||||
auto n = std::min(bucket.size(), size_t(max_threshold));
|
||||
// Trim the largest sstables off the end to meet the maxThreshold
|
||||
std::ranges::partial_sort(bucket, bucket.begin() + n, std::ranges::less(), std::mem_fn(&sstables::sstable::ondisk_data_size));
|
||||
@@ -549,8 +542,8 @@ future<int64_t> time_window_compaction_strategy::estimated_pending_compactions(c
|
||||
co_return n;
|
||||
}
|
||||
|
||||
std::vector<compaction_descriptor> time_window_compaction_strategy::get_cleanup_compaction_jobs(
|
||||
compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const {
|
||||
std::vector<compaction_descriptor>
|
||||
time_window_compaction_strategy::get_cleanup_compaction_jobs(compaction_group_view& table_s, std::vector<sstables::shared_sstable> candidates) const {
|
||||
std::vector<compaction_descriptor> ret;
|
||||
for (auto&& [_, sstables] : get_buckets(std::move(candidates), _options).first) {
|
||||
auto per_window_jobs = size_tiered_compaction_strategy(_stcs_options).get_cleanup_compaction_jobs(table_s, std::move(sstables));
|
||||
@@ -563,4 +556,4 @@ std::unique_ptr<sstables::sstable_set_impl> time_window_compaction_strategy::mak
|
||||
return std::make_unique<sstables::time_series_sstable_set>(ts.schema(), _options.enable_optimized_twcs_queries);
|
||||
}
|
||||
|
||||
} // namespace compaction
|
||||
}
|
||||
|
||||
@@ -48,15 +48,13 @@ const sstring query_processor::CQL_VERSION = "3.3.1";
|
||||
const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono::minutes(60);
|
||||
|
||||
struct query_processor::remote {
|
||||
remote(service::migration_manager& mm, service::mapreduce_service& fwd, service::storage_service& ss, service::raft_group0_client& group0_client,
|
||||
service::strong_consistency::coordinator& _sc_coordinator)
|
||||
: mm(mm)
|
||||
, mapreducer(fwd)
|
||||
, ss(ss)
|
||||
, group0_client(group0_client)
|
||||
, sc_coordinator(_sc_coordinator)
|
||||
, gate("query_processor::remote") {
|
||||
}
|
||||
remote(service::migration_manager& mm, service::mapreduce_service& fwd,
|
||||
service::storage_service& ss, service::raft_group0_client& group0_client,
|
||||
service::strong_consistency::coordinator& _sc_coordinator)
|
||||
: mm(mm), mapreducer(fwd), ss(ss), group0_client(group0_client)
|
||||
, sc_coordinator(_sc_coordinator)
|
||||
, gate("query_processor::remote")
|
||||
{}
|
||||
|
||||
service::migration_manager& mm;
|
||||
service::mapreduce_service& mapreducer;
|
||||
@@ -79,34 +77,24 @@ static service::query_state query_state_for_internal_call() {
|
||||
return {service::client_state::for_internal_calls(), empty_service_permit()};
|
||||
}
|
||||
|
||||
query_processor::query_processor(service::storage_proxy& proxy, data_dictionary::database db, service::migration_notifier& mn,
|
||||
vector_search::vector_store_client& vsc, query_processor::memory_config mcfg, cql_config& cql_cfg, utils::loading_cache_config auth_prep_cache_cfg,
|
||||
lang::manager& langm)
|
||||
: _migration_subscriber{std::make_unique<migration_subscriber>(this)}
|
||||
, _proxy(proxy)
|
||||
, _db(db)
|
||||
, _mnotifier(mn)
|
||||
, _vector_store_client(vsc)
|
||||
, _mcfg(mcfg)
|
||||
, _cql_config(cql_cfg)
|
||||
, _prepared_cache(prep_cache_log, _mcfg.prepared_statment_cache_size)
|
||||
, _authorized_prepared_cache(std::move(auth_prep_cache_cfg), authorized_prepared_statements_cache_log)
|
||||
, _auth_prepared_cache_cfg_cb([this](uint32_t) {
|
||||
(void)_authorized_prepared_cache_config_action.trigger_later();
|
||||
})
|
||||
, _authorized_prepared_cache_config_action([this] {
|
||||
update_authorized_prepared_cache_config();
|
||||
return make_ready_future<>();
|
||||
})
|
||||
, _authorized_prepared_cache_update_interval_in_ms_observer(_db.get_config().permissions_update_interval_in_ms.observe(_auth_prepared_cache_cfg_cb))
|
||||
, _authorized_prepared_cache_validity_in_ms_observer(_db.get_config().permissions_validity_in_ms.observe(_auth_prepared_cache_cfg_cb))
|
||||
, _lang_manager(langm)
|
||||
, _write_consistency_levels_warned_observer(_db.get_config().write_consistency_levels_warned.observe([this](const auto& v) {
|
||||
_write_consistency_levels_warned = to_consistency_level_set(v);
|
||||
}))
|
||||
, _write_consistency_levels_disallowed_observer(_db.get_config().write_consistency_levels_disallowed.observe([this](const auto& v) {
|
||||
_write_consistency_levels_disallowed = to_consistency_level_set(v);
|
||||
})) {
|
||||
query_processor::query_processor(service::storage_proxy& proxy, data_dictionary::database db, service::migration_notifier& mn, vector_search::vector_store_client& vsc, query_processor::memory_config mcfg, cql_config& cql_cfg, utils::loading_cache_config auth_prep_cache_cfg, lang::manager& langm)
|
||||
: _migration_subscriber{std::make_unique<migration_subscriber>(this)}
|
||||
, _proxy(proxy)
|
||||
, _db(db)
|
||||
, _mnotifier(mn)
|
||||
, _vector_store_client(vsc)
|
||||
, _mcfg(mcfg)
|
||||
, _cql_config(cql_cfg)
|
||||
, _prepared_cache(prep_cache_log, _mcfg.prepared_statment_cache_size)
|
||||
, _authorized_prepared_cache(std::move(auth_prep_cache_cfg), authorized_prepared_statements_cache_log)
|
||||
, _auth_prepared_cache_cfg_cb([this] (uint32_t) { (void) _authorized_prepared_cache_config_action.trigger_later(); })
|
||||
, _authorized_prepared_cache_config_action([this] { update_authorized_prepared_cache_config(); return make_ready_future<>(); })
|
||||
, _authorized_prepared_cache_update_interval_in_ms_observer(_db.get_config().permissions_update_interval_in_ms.observe(_auth_prepared_cache_cfg_cb))
|
||||
, _authorized_prepared_cache_validity_in_ms_observer(_db.get_config().permissions_validity_in_ms.observe(_auth_prepared_cache_cfg_cb))
|
||||
, _lang_manager(langm)
|
||||
, _write_consistency_levels_warned_observer(_db.get_config().write_consistency_levels_warned.observe([this](const auto& v) { _write_consistency_levels_warned = to_consistency_level_set(v); }))
|
||||
, _write_consistency_levels_disallowed_observer(_db.get_config().write_consistency_levels_disallowed.observe([this](const auto& v) { _write_consistency_levels_disallowed = to_consistency_level_set(v); }))
|
||||
{
|
||||
_write_consistency_levels_warned = to_consistency_level_set(_db.get_config().write_consistency_levels_warned());
|
||||
_write_consistency_levels_disallowed = to_consistency_level_set(_db.get_config().write_consistency_levels_disallowed());
|
||||
namespace sm = seastar::metrics;
|
||||
@@ -114,7 +102,7 @@ query_processor::query_processor(service::storage_proxy& proxy, data_dictionary:
|
||||
using clevel = db::consistency_level;
|
||||
sm::label cl_label("consistency_level");
|
||||
|
||||
sm::label who_label("who"); // Who queried system tables
|
||||
sm::label who_label("who"); // Who queried system tables
|
||||
const auto user_who_label_instance = who_label("user");
|
||||
const auto internal_who_label_instance = who_label("internal");
|
||||
|
||||
@@ -122,11 +110,17 @@ query_processor::query_processor(service::storage_proxy& proxy, data_dictionary:
|
||||
const auto system_ks_label_instance = ks_label("system");
|
||||
|
||||
std::vector<sm::metric_definition> qp_group;
|
||||
qp_group.push_back(sm::make_counter("statements_prepared", _stats.prepare_invocations, sm::description("Counts the total number of parsed CQL requests.")));
|
||||
qp_group.push_back(sm::make_counter(
|
||||
"statements_prepared",
|
||||
_stats.prepare_invocations,
|
||||
sm::description("Counts the total number of parsed CQL requests.")));
|
||||
for (auto cl = size_t(clevel::MIN_VALUE); cl <= size_t(clevel::MAX_VALUE); ++cl) {
|
||||
qp_group.push_back(sm::make_counter(
|
||||
"queries", _stats.queries_by_cl[cl], sm::description("Counts queries by consistency level."), {cl_label(clevel(cl)), basic_level})
|
||||
.set_skip_when_empty());
|
||||
qp_group.push_back(
|
||||
sm::make_counter(
|
||||
"queries",
|
||||
_stats.queries_by_cl[cl],
|
||||
sm::description("Counts queries by consistency level."),
|
||||
{cl_label(clevel(cl)), basic_level}).set_skip_when_empty());
|
||||
}
|
||||
_metrics.add_group("query_processor", qp_group);
|
||||
|
||||
@@ -527,23 +521,29 @@ query_processor::query_processor(service::storage_proxy& proxy, data_dictionary:
|
||||
|
||||
std::vector<sm::metric_definition> cql_cl_group;
|
||||
for (auto cl = size_t(clevel::MIN_VALUE); cl <= size_t(clevel::MAX_VALUE); ++cl) {
|
||||
cql_cl_group.push_back(sm::make_counter("writes_per_consistency_level", _cql_stats.writes_per_consistency_level[cl],
|
||||
sm::description("Counts the number of writes for each consistency level."), {cl_label(clevel(cl)), basic_level})
|
||||
.set_skip_when_empty());
|
||||
cql_cl_group.push_back(
|
||||
sm::make_counter(
|
||||
"writes_per_consistency_level",
|
||||
_cql_stats.writes_per_consistency_level[cl],
|
||||
sm::description("Counts the number of writes for each consistency level."),
|
||||
{cl_label(clevel(cl)), basic_level}).set_skip_when_empty());
|
||||
}
|
||||
_metrics.add_group("cql", cql_cl_group);
|
||||
|
||||
_metrics.add_group(
|
||||
"cql", {
|
||||
sm::make_counter("write_consistency_levels_disallowed_violations", _cql_stats.write_consistency_levels_disallowed_violations,
|
||||
sm::description("Counts the number of write_consistency_levels_disallowed guardrail violations, "
|
||||
"i.e. attempts to write with a forbidden consistency level."),
|
||||
{basic_level}),
|
||||
sm::make_counter("write_consistency_levels_warned_violations", _cql_stats.write_consistency_levels_warned_violations,
|
||||
sm::description("Counts the number of write_consistency_levels_warned guardrail violations, "
|
||||
"i.e. attempts to write with a discouraged consistency level."),
|
||||
{basic_level}),
|
||||
});
|
||||
_metrics.add_group("cql", {
|
||||
sm::make_counter(
|
||||
"write_consistency_levels_disallowed_violations",
|
||||
_cql_stats.write_consistency_levels_disallowed_violations,
|
||||
sm::description("Counts the number of write_consistency_levels_disallowed guardrail violations, "
|
||||
"i.e. attempts to write with a forbidden consistency level."),
|
||||
{basic_level}),
|
||||
sm::make_counter(
|
||||
"write_consistency_levels_warned_violations",
|
||||
_cql_stats.write_consistency_levels_warned_violations,
|
||||
sm::description("Counts the number of write_consistency_levels_warned guardrail violations, "
|
||||
"i.e. attempts to write with a discouraged consistency level."),
|
||||
{basic_level}),
|
||||
});
|
||||
|
||||
_mnotifier.register_listener(_migration_subscriber.get());
|
||||
}
|
||||
@@ -554,13 +554,15 @@ query_processor::~query_processor() {
|
||||
}
|
||||
}
|
||||
|
||||
std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder> query_processor::acquire_strongly_consistent_coordinator() {
|
||||
std::pair<std::reference_wrapper<service::strong_consistency::coordinator>, gate::holder>
|
||||
query_processor::acquire_strongly_consistent_coordinator() {
|
||||
auto [remote_, holder] = remote();
|
||||
return {remote_.get().sc_coordinator, std::move(holder)};
|
||||
}
|
||||
|
||||
void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer, service::storage_service& ss,
|
||||
service::raft_group0_client& group0_client, service::strong_consistency::coordinator& sc_coordinator) {
|
||||
void query_processor::start_remote(service::migration_manager& mm, service::mapreduce_service& mapreducer,
|
||||
service::storage_service& ss, service::raft_group0_client& group0_client,
|
||||
service::strong_consistency::coordinator& sc_coordinator) {
|
||||
_remote = std::make_unique<struct remote>(mm, mapreducer, ss, group0_client, sc_coordinator);
|
||||
}
|
||||
|
||||
@@ -580,9 +582,7 @@ future<> query_processor::stop() {
|
||||
}
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>> query_processor::execute_with_guard(
|
||||
std::function<future<::shared_ptr<cql_transport::messages::result_message>>(
|
||||
service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>)>
|
||||
fn,
|
||||
std::function<future<::shared_ptr<cql_transport::messages::result_message>>(service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>)> fn,
|
||||
::shared_ptr<cql_statement> statement, service::query_state& query_state, const query_options& options) {
|
||||
// execute all statements that need group0 guard on shard0
|
||||
if (this_shard_id() != 0) {
|
||||
@@ -591,13 +591,13 @@ future<::shared_ptr<cql_transport::messages::result_message>> query_processor::e
|
||||
|
||||
auto [remote_, holder] = remote();
|
||||
size_t retries = remote_.get().mm.get_concurrent_ddl_retries();
|
||||
while (true) {
|
||||
while (true) {
|
||||
try {
|
||||
auto guard = co_await remote_.get().mm.start_group0_operation();
|
||||
co_return co_await fn(query_state, statement, options, std::move(guard));
|
||||
} catch (const service::group0_concurrent_modification& ex) {
|
||||
log.warn("Failed to execute statement \"{}\" due to guard conflict.{}.", statement->raw_cql_statement,
|
||||
retries ? " Retrying" : " Number of retries exceeded, giving up");
|
||||
log.warn("Failed to execute statement \"{}\" due to guard conflict.{}.",
|
||||
statement->raw_cql_statement, retries ? " Retrying" : " Number of retries exceeded, giving up");
|
||||
if (retries--) {
|
||||
continue;
|
||||
}
|
||||
@@ -606,30 +606,29 @@ future<::shared_ptr<cql_transport::messages::result_message>> query_processor::e
|
||||
}
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
future<::shared_ptr<result_message>> query_processor::execute_maybe_with_guard(service::query_state& query_state, ::shared_ptr<cql_statement> statement,
|
||||
const query_options& options,
|
||||
future<::shared_ptr<result_message>> (query_processor::*fn)(
|
||||
service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>, Args...),
|
||||
Args... args) {
|
||||
template<typename... Args>
|
||||
future<::shared_ptr<result_message>>
|
||||
query_processor::execute_maybe_with_guard(service::query_state& query_state, ::shared_ptr<cql_statement> statement, const query_options& options,
|
||||
future<::shared_ptr<result_message>>(query_processor::*fn)(service::query_state&, ::shared_ptr<cql_statement>, const query_options&, std::optional<service::group0_guard>, Args...), Args... args) {
|
||||
if (!statement->needs_guard(*this, query_state)) {
|
||||
return (this->*fn)(query_state, std::move(statement), options, std::nullopt, std::forward<Args>(args)...);
|
||||
}
|
||||
static auto exec = [fn](query_processor& qp, Args... args, service::query_state& query_state, ::shared_ptr<cql_statement> statement,
|
||||
const query_options& options, std::optional<service::group0_guard> guard) {
|
||||
static auto exec = [fn] (query_processor& qp, Args... args, service::query_state& query_state, ::shared_ptr<cql_statement> statement, const query_options& options, std::optional<service::group0_guard> guard) {
|
||||
return (qp.*fn)(query_state, std::move(statement), options, std::move(guard), std::forward<Args>(args)...);
|
||||
};
|
||||
return execute_with_guard(std::bind_front(exec, std::ref(*this), std::forward<Args>(args)...), std::move(statement), query_state, options);
|
||||
}
|
||||
|
||||
future<::shared_ptr<result_message>> query_processor::execute_direct_without_checking_exception_message(
|
||||
const std::string_view& query_string, service::query_state& query_state, dialect d, query_options& options) {
|
||||
future<::shared_ptr<result_message>>
|
||||
query_processor::execute_direct_without_checking_exception_message(const std::string_view& query_string, service::query_state& query_state, dialect d, query_options& options) {
|
||||
log.trace("execute_direct: \"{}\"", query_string);
|
||||
tracing::trace(query_state.get_trace_state(), "Parsing a statement");
|
||||
auto p = get_statement(query_string, query_state.get_client_state(), d);
|
||||
auto statement = p->statement;
|
||||
if (statement->get_bound_terms() != options.get_values_count()) {
|
||||
const auto msg = format("Invalid amount of bind variables: expected {:d} received {:d}", statement->get_bound_terms(), options.get_values_count());
|
||||
const auto msg = format("Invalid amount of bind variables: expected {:d} received {:d}",
|
||||
statement->get_bound_terms(),
|
||||
options.get_values_count());
|
||||
throw exceptions::invalid_request_exception(msg);
|
||||
}
|
||||
options.prepare(p->bound_names);
|
||||
@@ -640,13 +639,17 @@ future<::shared_ptr<result_message>> query_processor::execute_direct_without_che
|
||||
metrics.regularStatementsExecuted.inc();
|
||||
#endif
|
||||
auto user = query_state.get_client_state().user();
|
||||
tracing::trace(query_state.get_trace_state(), "Processing a statement for authenticated user: {}",
|
||||
user ? (user->name ? *user->name : "anonymous") : "no user authenticated");
|
||||
tracing::trace(query_state.get_trace_state(), "Processing a statement for authenticated user: {}", user ? (user->name ? *user->name : "anonymous") : "no user authenticated");
|
||||
return execute_maybe_with_guard(query_state, std::move(statement), options, &query_processor::do_execute_direct, std::move(p->warnings));
|
||||
}
|
||||
|
||||
future<::shared_ptr<result_message>> query_processor::do_execute_direct(service::query_state& query_state, shared_ptr<cql_statement> statement,
|
||||
const query_options& options, std::optional<service::group0_guard> guard, cql3::cql_warnings_vec warnings) {
|
||||
future<::shared_ptr<result_message>>
|
||||
query_processor::do_execute_direct(
|
||||
service::query_state& query_state,
|
||||
shared_ptr<cql_statement> statement,
|
||||
const query_options& options,
|
||||
std::optional<service::group0_guard> guard,
|
||||
cql3::cql_warnings_vec warnings) {
|
||||
auto access_future = co_await coroutine::as_future(statement->check_access(*this, query_state.get_client_state()));
|
||||
if (access_future.failed()) {
|
||||
co_await audit::inspect(statement, query_state, options, true);
|
||||
@@ -671,16 +674,26 @@ future<::shared_ptr<result_message>> query_processor::do_execute_direct(service:
|
||||
co_return std::move(m);
|
||||
}
|
||||
|
||||
future<::shared_ptr<result_message>> query_processor::execute_prepared_without_checking_exception_message(service::query_state& query_state,
|
||||
shared_ptr<cql_statement> statement, const query_options& options, statements::prepared_statement::checked_weak_ptr prepared,
|
||||
cql3::prepared_cache_key_type cache_key, bool needs_authorization) {
|
||||
return execute_maybe_with_guard(
|
||||
query_state, std::move(statement), options, &query_processor::do_execute_prepared, std::move(prepared), std::move(cache_key), needs_authorization);
|
||||
future<::shared_ptr<result_message>>
|
||||
query_processor::execute_prepared_without_checking_exception_message(
|
||||
service::query_state& query_state,
|
||||
shared_ptr<cql_statement> statement,
|
||||
const query_options& options,
|
||||
statements::prepared_statement::checked_weak_ptr prepared,
|
||||
cql3::prepared_cache_key_type cache_key,
|
||||
bool needs_authorization) {
|
||||
return execute_maybe_with_guard(query_state, std::move(statement), options, &query_processor::do_execute_prepared, std::move(prepared), std::move(cache_key), needs_authorization);
|
||||
}
|
||||
|
||||
future<::shared_ptr<result_message>> query_processor::do_execute_prepared(service::query_state& query_state, shared_ptr<cql_statement> statement,
|
||||
const query_options& options, std::optional<service::group0_guard> guard, statements::prepared_statement::checked_weak_ptr prepared,
|
||||
cql3::prepared_cache_key_type cache_key, bool needs_authorization) {
|
||||
future<::shared_ptr<result_message>>
|
||||
query_processor::do_execute_prepared(
|
||||
service::query_state& query_state,
|
||||
shared_ptr<cql_statement> statement,
|
||||
const query_options& options,
|
||||
std::optional<service::group0_guard> guard,
|
||||
statements::prepared_statement::checked_weak_ptr prepared,
|
||||
cql3::prepared_cache_key_type cache_key,
|
||||
bool needs_authorization) {
|
||||
if (needs_authorization) {
|
||||
co_await statement->check_access(*this, query_state.get_client_state());
|
||||
try {
|
||||
@@ -694,8 +707,8 @@ future<::shared_ptr<result_message>> query_processor::do_execute_prepared(servic
|
||||
co_return co_await process_authorized_statement(std::move(statement), query_state, options, std::move(guard));
|
||||
}
|
||||
|
||||
future<::shared_ptr<result_message>> query_processor::process_authorized_statement(const ::shared_ptr<cql_statement> statement,
|
||||
service::query_state& query_state, const query_options& options, std::optional<service::group0_guard> guard) {
|
||||
future<::shared_ptr<result_message>>
|
||||
query_processor::process_authorized_statement(const ::shared_ptr<cql_statement> statement, service::query_state& query_state, const query_options& options, std::optional<service::group0_guard> guard) {
|
||||
auto& client_state = query_state.get_client_state();
|
||||
|
||||
++_stats.queries_by_cl[size_t(options.get_consistency())];
|
||||
@@ -705,39 +718,43 @@ future<::shared_ptr<result_message>> query_processor::process_authorized_stateme
|
||||
auto msg = co_await statement->execute_without_checking_exception_message(*this, query_state, options, std::move(guard));
|
||||
|
||||
if (msg) {
|
||||
co_return std::move(msg);
|
||||
co_return std::move(msg);
|
||||
}
|
||||
co_return ::make_shared<result_message::void_message>();
|
||||
}
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>> query_processor::prepare(
|
||||
sstring query_string, service::query_state& query_state, cql3::dialect d) {
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
||||
query_processor::prepare(sstring query_string, service::query_state& query_state, cql3::dialect d) {
|
||||
auto& client_state = query_state.get_client_state();
|
||||
return prepare(std::move(query_string), client_state, d);
|
||||
}
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>> query_processor::prepare(
|
||||
sstring query_string, const service::client_state& client_state, cql3::dialect d) {
|
||||
future<::shared_ptr<cql_transport::messages::result_message::prepared>>
|
||||
query_processor::prepare(sstring query_string, const service::client_state& client_state, cql3::dialect d) {
|
||||
try {
|
||||
auto key = compute_id(query_string, client_state.get_raw_keyspace(), d);
|
||||
auto prep_entry = co_await _prepared_cache.get_pinned(key, [this, &query_string, &client_state, d] {
|
||||
auto prepared = get_statement(query_string, client_state, d);
|
||||
prepared->calculate_metadata_id();
|
||||
auto bound_terms = prepared->statement->get_bound_terms();
|
||||
if (bound_terms > std::numeric_limits<uint16_t>::max()) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
format("Too many markers(?). {:d} markers exceed the allowed maximum of {:d}", bound_terms, std::numeric_limits<uint16_t>::max()));
|
||||
}
|
||||
throwing_assert(bound_terms == prepared->bound_names.size());
|
||||
return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
|
||||
});
|
||||
|
||||
co_await utils::get_local_injector().inject("query_processor_prepare_wait_after_cache_get", utils::wait_for_message(std::chrono::seconds(60)));
|
||||
auto prepared = get_statement(query_string, client_state, d);
|
||||
prepared->calculate_metadata_id();
|
||||
auto bound_terms = prepared->statement->get_bound_terms();
|
||||
if (bound_terms > std::numeric_limits<uint16_t>::max()) {
|
||||
throw exceptions::invalid_request_exception(
|
||||
format("Too many markers(?). {:d} markers exceed the allowed maximum of {:d}",
|
||||
bound_terms,
|
||||
std::numeric_limits<uint16_t>::max()));
|
||||
}
|
||||
throwing_assert(bound_terms == prepared->bound_names.size());
|
||||
return make_ready_future<std::unique_ptr<statements::prepared_statement>>(std::move(prepared));
|
||||
});
|
||||
|
||||
co_await utils::get_local_injector().inject(
|
||||
"query_processor_prepare_wait_after_cache_get",
|
||||
utils::wait_for_message(std::chrono::seconds(60)));
|
||||
|
||||
auto msg = ::make_shared<result_message::prepared::cql>(prepared_cache_key_type::cql_id(key), std::move(prep_entry),
|
||||
client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK));
|
||||
client_state.is_protocol_extension_set(cql_transport::cql_protocol_extension::LWT_ADD_METADATA_MARK));
|
||||
co_return std::move(msg);
|
||||
} catch (typename prepared_statements_cache::statement_is_too_big&) {
|
||||
} catch(typename prepared_statements_cache::statement_is_too_big&) {
|
||||
throw prepared_statement_is_too_big(query_string);
|
||||
}
|
||||
}
|
||||
@@ -748,11 +765,15 @@ static std::string hash_target(std::string_view query_string, std::string_view k
|
||||
return ret;
|
||||
}
|
||||
|
||||
prepared_cache_key_type query_processor::compute_id(std::string_view query_string, std::string_view keyspace, dialect d) {
|
||||
prepared_cache_key_type query_processor::compute_id(
|
||||
std::string_view query_string,
|
||||
std::string_view keyspace,
|
||||
dialect d) {
|
||||
return prepared_cache_key_type(md5_hasher::calculate(hash_target(query_string, keyspace)), d);
|
||||
}
|
||||
|
||||
std::unique_ptr<prepared_statement> query_processor::get_statement(const std::string_view& query, const service::client_state& client_state, dialect d) {
|
||||
std::unique_ptr<prepared_statement>
|
||||
query_processor::get_statement(const std::string_view& query, const service::client_state& client_state, dialect d) {
|
||||
// Measuring allocation cost requires that no yield points exist
|
||||
// between bytes_before and bytes_after. It needs fixing if this
|
||||
// function is ever futurized.
|
||||
@@ -777,7 +798,8 @@ std::unique_ptr<prepared_statement> query_processor::get_statement(const std::st
|
||||
return p;
|
||||
}
|
||||
|
||||
std::unique_ptr<raw::parsed_statement> query_processor::parse_statement(const std::string_view& query, dialect d) {
|
||||
std::unique_ptr<raw::parsed_statement>
|
||||
query_processor::parse_statement(const std::string_view& query, dialect d) {
|
||||
try {
|
||||
{
|
||||
const char* error_injection_key = "query_processor-parse_statement-test_failure";
|
||||
@@ -802,7 +824,8 @@ std::unique_ptr<raw::parsed_statement> query_processor::parse_statement(const st
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<std::unique_ptr<raw::parsed_statement>> query_processor::parse_statements(std::string_view queries, dialect d) {
|
||||
std::vector<std::unique_ptr<raw::parsed_statement>>
|
||||
query_processor::parse_statements(std::string_view queries, dialect d) {
|
||||
try {
|
||||
auto statements = util::do_with_parser(queries, d, std::mem_fn(&cql3_parser::CqlParser::queries));
|
||||
if (statements.empty()) {
|
||||
@@ -831,10 +854,15 @@ std::pair<std::reference_wrapper<struct query_processor::remote>, gate::holder>
|
||||
on_internal_error(log, "attempted to perform distributed query when `query_processor::remote` is unavailable");
|
||||
}
|
||||
|
||||
query_options query_processor::make_internal_options(const statements::prepared_statement::checked_weak_ptr& p, const std::vector<data_value_or_unset>& values,
|
||||
db::consistency_level cl, int32_t page_size, service::node_local_only node_local_only) const {
|
||||
query_options query_processor::make_internal_options(
|
||||
const statements::prepared_statement::checked_weak_ptr& p,
|
||||
const std::vector<data_value_or_unset>& values,
|
||||
db::consistency_level cl,
|
||||
int32_t page_size,
|
||||
service::node_local_only node_local_only) const {
|
||||
if (p->bound_names.size() != values.size()) {
|
||||
throw std::invalid_argument(format("Invalid number of values. Expecting {:d} but got {:d}", p->bound_names.size(), values.size()));
|
||||
throw std::invalid_argument(
|
||||
format("Invalid number of values. Expecting {:d} but got {:d}", p->bound_names.size(), values.size()));
|
||||
}
|
||||
auto ni = p->bound_names.begin();
|
||||
raw_value_vector_with_unset bound_values;
|
||||
@@ -842,28 +870,32 @@ query_options query_processor::make_internal_options(const statements::prepared_
|
||||
bound_values.unset.resize(values.size());
|
||||
for (auto& var : values) {
|
||||
auto& n = *ni;
|
||||
std::visit(overloaded_functor{[&](const data_value& v) {
|
||||
if (v.type() == bytes_type) {
|
||||
bound_values.values.emplace_back(cql3::raw_value::make_value(value_cast<bytes>(v)));
|
||||
} else if (v.is_null()) {
|
||||
bound_values.values.emplace_back(cql3::raw_value::make_null());
|
||||
} else {
|
||||
bound_values.values.emplace_back(cql3::raw_value::make_value(n->type->decompose(v)));
|
||||
}
|
||||
},
|
||||
[&](const unset_value&) {
|
||||
bound_values.values.emplace_back(cql3::raw_value::make_null());
|
||||
bound_values.unset[std::distance(p->bound_names.begin(), ni)] = true;
|
||||
}},
|
||||
var);
|
||||
std::visit(overloaded_functor {
|
||||
[&] (const data_value& v) {
|
||||
if (v.type() == bytes_type) {
|
||||
bound_values.values.emplace_back(cql3::raw_value::make_value(value_cast<bytes>(v)));
|
||||
} else if (v.is_null()) {
|
||||
bound_values.values.emplace_back(cql3::raw_value::make_null());
|
||||
} else {
|
||||
bound_values.values.emplace_back(cql3::raw_value::make_value(n->type->decompose(v)));
|
||||
}
|
||||
}, [&] (const unset_value&) {
|
||||
bound_values.values.emplace_back(cql3::raw_value::make_null());
|
||||
bound_values.unset[std::distance(p->bound_names.begin(), ni)] = true;
|
||||
}
|
||||
}, var);
|
||||
++ni;
|
||||
}
|
||||
return query_options(cl, std::move(bound_values),
|
||||
cql3::query_options::specific_options{.page_size = page_size,
|
||||
.state = {},
|
||||
.serial_consistency = db::consistency_level::SERIAL,
|
||||
.timestamp = api::missing_timestamp,
|
||||
.node_local_only = node_local_only});
|
||||
return query_options(
|
||||
cl,
|
||||
std::move(bound_values),
|
||||
cql3::query_options::specific_options {
|
||||
.page_size = page_size,
|
||||
.state = {},
|
||||
.serial_consistency = db::consistency_level::SERIAL,
|
||||
.timestamp = api::missing_timestamp,
|
||||
.node_local_only = node_local_only
|
||||
});
|
||||
}
|
||||
|
||||
statements::prepared_statement::checked_weak_ptr query_processor::prepare_internal(const sstring& query_string) {
|
||||
@@ -885,7 +917,11 @@ struct internal_query_state {
|
||||
};
|
||||
|
||||
internal_query_state query_processor::create_paged_state(
|
||||
const sstring& query_string, db::consistency_level cl, const data_value_list& values, int32_t page_size, std::optional<service::query_state> qs) {
|
||||
const sstring& query_string,
|
||||
db::consistency_level cl,
|
||||
const data_value_list& values,
|
||||
int32_t page_size,
|
||||
std::optional<service::query_state> qs) {
|
||||
auto p = prepare_internal(query_string);
|
||||
auto opts = make_internal_options(p, values, cl, page_size);
|
||||
if (!qs) {
|
||||
@@ -899,7 +935,8 @@ bool query_processor::has_more_results(cql3::internal_query_state& state) const
|
||||
}
|
||||
|
||||
future<> query_processor::for_each_cql_result(
|
||||
cql3::internal_query_state& state, noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)> f) {
|
||||
cql3::internal_query_state& state,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set::row&)> f) {
|
||||
do {
|
||||
auto msg = co_await execute_paged_internal(state);
|
||||
for (auto& row : *msg) {
|
||||
@@ -910,18 +947,17 @@ future<> query_processor::for_each_cql_result(
|
||||
} while (has_more_results(state));
|
||||
}
|
||||
|
||||
future<::shared_ptr<untyped_result_set>> query_processor::execute_paged_internal(internal_query_state& state) {
|
||||
future<::shared_ptr<untyped_result_set>>
|
||||
query_processor::execute_paged_internal(internal_query_state& state) {
|
||||
state.p->statement->validate(*this, service::client_state::for_internal_calls());
|
||||
::shared_ptr<cql_transport::messages::result_message> msg = co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);
|
||||
::shared_ptr<cql_transport::messages::result_message> msg =
|
||||
co_await state.p->statement->execute(*this, *state.qs, *state.opts, std::nullopt);
|
||||
|
||||
class visitor : public result_message::visitor_base {
|
||||
internal_query_state& _state;
|
||||
query_processor& _qp;
|
||||
|
||||
public:
|
||||
visitor(internal_query_state& state, query_processor& qp)
|
||||
: _state(state)
|
||||
, _qp(qp) {
|
||||
visitor(internal_query_state& state, query_processor& qp) : _state(state), _qp(qp) {
|
||||
}
|
||||
virtual ~visitor() = default;
|
||||
void visit(const result_message::rows& rmrs) override {
|
||||
@@ -950,14 +986,23 @@ future<::shared_ptr<untyped_result_set>> query_processor::execute_paged_internal
|
||||
co_return ::make_shared<untyped_result_set>(msg);
|
||||
}
|
||||
|
||||
future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
|
||||
const sstring& query_string, db::consistency_level cl, const data_value_list& values, cache_internal cache) {
|
||||
future<::shared_ptr<untyped_result_set>>
|
||||
query_processor::execute_internal(
|
||||
const sstring& query_string,
|
||||
db::consistency_level cl,
|
||||
const data_value_list& values,
|
||||
cache_internal cache) {
|
||||
auto qs = query_state_for_internal_call();
|
||||
co_return co_await execute_internal(query_string, cl, qs, values, cache);
|
||||
}
|
||||
|
||||
future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
|
||||
const sstring& query_string, db::consistency_level cl, service::query_state& query_state, const data_value_list& values, cache_internal cache) {
|
||||
future<::shared_ptr<untyped_result_set>>
|
||||
query_processor::execute_internal(
|
||||
const sstring& query_string,
|
||||
db::consistency_level cl,
|
||||
service::query_state& query_state,
|
||||
const data_value_list& values,
|
||||
cache_internal cache) {
|
||||
|
||||
if (log.is_enabled(logging::log_level::trace)) {
|
||||
log.trace("execute_internal: {}\"{}\" ({})", cache ? "(cached) " : "", query_string, fmt::join(values, ", "));
|
||||
@@ -975,7 +1020,10 @@ future<::shared_ptr<untyped_result_set>> query_processor::execute_internal(
|
||||
}
|
||||
|
||||
future<utils::chunked_vector<mutation>> query_processor::get_mutations_internal(
|
||||
const sstring query_string, service::query_state& query_state, api::timestamp_type timestamp, std::vector<data_value_or_unset> values) {
|
||||
const sstring query_string,
|
||||
service::query_state& query_state,
|
||||
api::timestamp_type timestamp,
|
||||
std::vector<data_value_or_unset> values) {
|
||||
log.debug("get_mutations_internal: \"{}\" ({})", query_string, fmt::join(values, ", "));
|
||||
auto stmt = prepare_internal(query_string);
|
||||
auto mod_stmt = dynamic_pointer_cast<cql3::statements::modification_statement>(stmt->statement);
|
||||
@@ -993,8 +1041,12 @@ future<utils::chunked_vector<mutation>> query_processor::get_mutations_internal(
|
||||
co_return co_await mod_stmt->get_mutations(*this, opts, timeout, true, timestamp, query_state, json_cache, std::move(keys));
|
||||
}
|
||||
|
||||
future<::shared_ptr<untyped_result_set>> query_processor::execute_with_params(
|
||||
statements::prepared_statement::checked_weak_ptr p, db::consistency_level cl, service::query_state& query_state, const data_value_list& values) {
|
||||
future<::shared_ptr<untyped_result_set>>
|
||||
query_processor::execute_with_params(
|
||||
statements::prepared_statement::checked_weak_ptr p,
|
||||
db::consistency_level cl,
|
||||
service::query_state& query_state,
|
||||
const data_value_list& values) {
|
||||
auto opts = make_internal_options(p, values, cl);
|
||||
auto statement = p->statement;
|
||||
|
||||
@@ -1002,24 +1054,30 @@ future<::shared_ptr<untyped_result_set>> query_processor::execute_with_params(
|
||||
co_return ::make_shared<untyped_result_set>(msg);
|
||||
}
|
||||
|
||||
future<::shared_ptr<result_message>> query_processor::do_execute_with_params(
|
||||
service::query_state& query_state, shared_ptr<cql_statement> statement, const query_options& options, std::optional<service::group0_guard> guard) {
|
||||
future<::shared_ptr<result_message>>
|
||||
query_processor::do_execute_with_params(
|
||||
service::query_state& query_state,
|
||||
shared_ptr<cql_statement> statement,
|
||||
const query_options& options, std::optional<service::group0_guard> guard) {
|
||||
statement->validate(*this, service::client_state::for_internal_calls());
|
||||
co_return co_await coroutine::try_future(statement->execute(*this, query_state, options, std::move(guard)));
|
||||
}
|
||||
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>> query_processor::execute_batch_without_checking_exception_message(
|
||||
::shared_ptr<statements::batch_statement> batch, service::query_state& query_state, query_options& options,
|
||||
future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
query_processor::execute_batch_without_checking_exception_message(
|
||||
::shared_ptr<statements::batch_statement> batch,
|
||||
service::query_state& query_state,
|
||||
query_options& options,
|
||||
std::unordered_map<prepared_cache_key_type, authorized_prepared_statements_cache::value_type> pending_authorization_entries) {
|
||||
auto access_future = co_await coroutine::as_future(batch->check_access(*this, query_state.get_client_state()));
|
||||
co_await coroutine::parallel_for_each(pending_authorization_entries, [this, &query_state](auto& e) -> future<> {
|
||||
try {
|
||||
co_await _authorized_prepared_cache.insert(*query_state.get_client_state().user(), e.first, std::move(e.second));
|
||||
} catch (...) {
|
||||
log.error("failed to cache the entry: {}", std::current_exception());
|
||||
}
|
||||
});
|
||||
co_await coroutine::parallel_for_each(pending_authorization_entries, [this, &query_state] (auto& e) -> future<> {
|
||||
try {
|
||||
co_await _authorized_prepared_cache.insert(*query_state.get_client_state().user(), e.first, std::move(e.second));
|
||||
} catch (...) {
|
||||
log.error("failed to cache the entry: {}", std::current_exception());
|
||||
}
|
||||
});
|
||||
bool failed = access_future.failed();
|
||||
co_await audit::inspect(batch, query_state, options, failed);
|
||||
if (access_future.failed()) {
|
||||
@@ -1028,28 +1086,30 @@ future<::shared_ptr<cql_transport::messages::result_message>> query_processor::e
|
||||
batch->validate();
|
||||
batch->validate(*this, query_state.get_client_state());
|
||||
_stats.queries_by_cl[size_t(options.get_consistency())] += batch->get_statements().size();
|
||||
if (log.is_enabled(logging::log_level::trace)) {
|
||||
if (log.is_enabled(logging::log_level::trace)) {
|
||||
std::ostringstream oss;
|
||||
for (const auto& s : batch->get_statements()) {
|
||||
oss << std::endl << s.statement->raw_cql_statement;
|
||||
for (const auto& s: batch->get_statements()) {
|
||||
oss << std::endl << s.statement->raw_cql_statement;
|
||||
}
|
||||
log.trace("execute_batch({}): {}", batch->get_statements().size(), oss.str());
|
||||
}
|
||||
co_return co_await batch->execute(*this, query_state, options, std::nullopt);
|
||||
}
|
||||
|
||||
future<service::broadcast_tables::query_result> query_processor::execute_broadcast_table_query(const service::broadcast_tables::query& query) {
|
||||
future<service::broadcast_tables::query_result>
|
||||
query_processor::execute_broadcast_table_query(const service::broadcast_tables::query& query) {
|
||||
auto [remote_, holder] = remote();
|
||||
co_return co_await service::broadcast_tables::execute(remote_.get().group0_client, query);
|
||||
}
|
||||
|
||||
future<query::mapreduce_result> query_processor::mapreduce(query::mapreduce_request req, tracing::trace_state_ptr tr_state) {
|
||||
future<query::mapreduce_result>
|
||||
query_processor::mapreduce(query::mapreduce_request req, tracing::trace_state_ptr tr_state) {
|
||||
auto [remote_, holder] = remote();
|
||||
co_return co_await remote_.get().mapreducer.dispatch(std::move(req), std::move(tr_state));
|
||||
}
|
||||
|
||||
future<::shared_ptr<messages::result_message>> query_processor::execute_schema_statement(
|
||||
const statements::schema_altering_statement& stmt, service::query_state& state, const query_options& options, service::group0_batch& mc) {
|
||||
future<::shared_ptr<messages::result_message>>
|
||||
query_processor::execute_schema_statement(const statements::schema_altering_statement& stmt, service::query_state& state, const query_options& options, service::group0_batch& mc) {
|
||||
if (this_shard_id() != 0) {
|
||||
on_internal_error(log, "DDL must be executed on shard 0");
|
||||
}
|
||||
@@ -1103,8 +1163,7 @@ future<> query_processor::announce_schema_statement(const statements::schema_alt
|
||||
co_await remote_.get().mm.announce(std::move(m), std::move(guard), description);
|
||||
}
|
||||
|
||||
query_processor::migration_subscriber::migration_subscriber(query_processor* qp)
|
||||
: _qp{qp} {
|
||||
query_processor::migration_subscriber::migration_subscriber(query_processor* qp) : _qp{qp} {
|
||||
}
|
||||
|
||||
void query_processor::migration_subscriber::on_create_keyspace(const sstring& ks_name) {
|
||||
@@ -1130,7 +1189,10 @@ void query_processor::migration_subscriber::on_create_view(const sstring& ks_nam
|
||||
void query_processor::migration_subscriber::on_update_keyspace(const sstring& ks_name) {
|
||||
}
|
||||
|
||||
void query_processor::migration_subscriber::on_update_column_family(const sstring& ks_name, const sstring& cf_name, bool columns_changed) {
|
||||
void query_processor::migration_subscriber::on_update_column_family(
|
||||
const sstring& ks_name,
|
||||
const sstring& cf_name,
|
||||
bool columns_changed) {
|
||||
// #1255: Ignoring columns_changed deliberately.
|
||||
log.info("Column definitions for {}.{} changed, invalidating related prepared statements", ks_name, cf_name);
|
||||
remove_invalid_prepared_statements(ks_name, cf_name);
|
||||
@@ -1145,7 +1207,9 @@ void query_processor::migration_subscriber::on_update_function(const sstring& ks
|
||||
void query_processor::migration_subscriber::on_update_aggregate(const sstring& ks_name, const sstring& aggregate_name) {
|
||||
}
|
||||
|
||||
void query_processor::migration_subscriber::on_update_view(const sstring& ks_name, const sstring& view_name, bool columns_changed) {
|
||||
void query_processor::migration_subscriber::on_update_view(
|
||||
const sstring& ks_name,
|
||||
const sstring& view_name, bool columns_changed) {
|
||||
// scylladb/scylladb#16392 - Materialized views are also tables so we need at least handle
|
||||
// them as such when changed.
|
||||
on_update_column_family(ks_name, view_name, columns_changed);
|
||||
@@ -1174,28 +1238,39 @@ void query_processor::migration_subscriber::on_drop_view(const sstring& ks_name,
|
||||
remove_invalid_prepared_statements(ks_name, view_name);
|
||||
}
|
||||
|
||||
void query_processor::migration_subscriber::remove_invalid_prepared_statements(sstring ks_name, std::optional<sstring> cf_name) {
|
||||
_qp->_prepared_cache.remove_if([&](::shared_ptr<cql_statement> stmt) {
|
||||
void query_processor::migration_subscriber::remove_invalid_prepared_statements(
|
||||
sstring ks_name,
|
||||
std::optional<sstring> cf_name) {
|
||||
_qp->_prepared_cache.remove_if([&] (::shared_ptr<cql_statement> stmt) {
|
||||
return this->should_invalidate(ks_name, cf_name, stmt);
|
||||
});
|
||||
}
|
||||
|
||||
bool query_processor::migration_subscriber::should_invalidate(sstring ks_name, std::optional<sstring> cf_name, ::shared_ptr<cql_statement> statement) {
|
||||
bool query_processor::migration_subscriber::should_invalidate(
|
||||
sstring ks_name,
|
||||
std::optional<sstring> cf_name,
|
||||
::shared_ptr<cql_statement> statement) {
|
||||
return statement->depends_on(ks_name, cf_name);
|
||||
}
|
||||
|
||||
future<> query_processor::query_internal(const sstring& query_string, db::consistency_level cl, const data_value_list& values, int32_t page_size,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f, std::optional<service::query_state> qs) {
|
||||
future<> query_processor::query_internal(
|
||||
const sstring& query_string,
|
||||
db::consistency_level cl,
|
||||
const data_value_list& values,
|
||||
int32_t page_size,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f,
|
||||
std::optional<service::query_state> qs) {
|
||||
auto query_state = create_paged_state(query_string, cl, values, page_size, std::move(qs));
|
||||
co_return co_await for_each_cql_result(query_state, std::move(f));
|
||||
}
|
||||
|
||||
future<> query_processor::query_internal(const sstring& query_string, noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
|
||||
future<> query_processor::query_internal(
|
||||
const sstring& query_string,
|
||||
noncopyable_function<future<stop_iteration>(const cql3::untyped_result_set_row&)> f) {
|
||||
return query_internal(query_string, db::consistency_level::ONE, {}, 1000, std::move(f));
|
||||
}
|
||||
|
||||
shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_shard(
|
||||
unsigned shard, cql3::computed_function_values cached_fn_calls, bool track) {
|
||||
shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_shard(unsigned shard, cql3::computed_function_values cached_fn_calls, bool track) {
|
||||
if (track) {
|
||||
_proxy.get_stats().replica_cross_shard_ops++;
|
||||
}
|
||||
@@ -1203,8 +1278,7 @@ shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_s
|
||||
return ::make_shared<cql_transport::messages::result_message::bounce>(my_host_id, shard, std::move(cached_fn_calls));
|
||||
}
|
||||
|
||||
shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_node(
|
||||
locator::tablet_replica replica, cql3::computed_function_values cached_fn_calls, seastar::lowres_clock::time_point timeout, bool is_write) {
|
||||
shared_ptr<cql_transport::messages::result_message> query_processor::bounce_to_node(locator::tablet_replica replica, cql3::computed_function_values cached_fn_calls, seastar::lowres_clock::time_point timeout, bool is_write) {
|
||||
get_cql_stats().forwarded_requests++;
|
||||
return ::make_shared<cql_transport::messages::result_message::bounce>(replica.host, replica.shard, std::move(cached_fn_calls), timeout, is_write);
|
||||
}
|
||||
@@ -1221,7 +1295,7 @@ void query_processor::update_authorized_prepared_cache_config() {
|
||||
utils::loading_cache_config cfg;
|
||||
cfg.max_size = _mcfg.authorized_prepared_cache_size;
|
||||
cfg.expiry = std::min(std::chrono::milliseconds(_db.get_config().permissions_validity_in_ms()),
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(prepared_statements_cache::entry_expiry));
|
||||
std::chrono::duration_cast<std::chrono::milliseconds>(prepared_statements_cache::entry_expiry));
|
||||
cfg.refresh = std::chrono::milliseconds(_db.get_config().permissions_update_interval_in_ms());
|
||||
|
||||
if (!_authorized_prepared_cache.update_config(std::move(cfg))) {
|
||||
@@ -1233,4 +1307,4 @@ void query_processor::reset_cache() {
|
||||
_authorized_prepared_cache.reset();
|
||||
}
|
||||
|
||||
} // namespace cql3
|
||||
}
|
||||
|
||||
@@ -63,14 +63,15 @@ namespace db {
|
||||
|
||||
namespace schema_tables {
|
||||
|
||||
static constexpr std::initializer_list<table_kind> all_table_kinds = {table_kind::table, table_kind::view};
|
||||
static constexpr std::initializer_list<table_kind> all_table_kinds = {
|
||||
table_kind::table,
|
||||
table_kind::view
|
||||
};
|
||||
|
||||
static schema_ptr get_table_holder(table_kind k) {
|
||||
switch (k) {
|
||||
case table_kind::table:
|
||||
return tables();
|
||||
case table_kind::view:
|
||||
return views();
|
||||
case table_kind::table: return tables();
|
||||
case table_kind::view: return views();
|
||||
}
|
||||
abort();
|
||||
}
|
||||
@@ -93,18 +94,15 @@ void table_selector::add(sstring name) {
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace schema_tables
|
||||
}
|
||||
|
||||
} // namespace db
|
||||
}
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<db::schema_tables::table_kind> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
template <> struct fmt::formatter<db::schema_tables::table_kind> {
|
||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
||||
auto format(db::schema_tables::table_kind k, fmt::format_context& ctx) const {
|
||||
switch (k) {
|
||||
using enum db::schema_tables::table_kind;
|
||||
using enum db::schema_tables::table_kind;
|
||||
case table:
|
||||
return fmt::format_to(ctx.out(), "table");
|
||||
case view:
|
||||
@@ -127,8 +125,11 @@ static std::optional<table_id> table_id_from_mutations(const schema_mutations& s
|
||||
return table_id(table_row.get_nonnull<utils::UUID>("id"));
|
||||
}
|
||||
|
||||
static future<std::map<table_id, schema_mutations>> read_tables_for_keyspaces(sharded<service::storage_proxy>& proxy, const std::set<sstring>& keyspace_names,
|
||||
table_kind kind, const std::unordered_map<sstring, table_selector>& tables_per_keyspace) {
|
||||
static
|
||||
future<std::map<table_id, schema_mutations>>
|
||||
read_tables_for_keyspaces(sharded<service::storage_proxy>& proxy, const std::set<sstring>& keyspace_names, table_kind kind,
|
||||
const std::unordered_map<sstring, table_selector>& tables_per_keyspace)
|
||||
{
|
||||
std::map<table_id, schema_mutations> result;
|
||||
for (auto&& [keyspace_name, sel] : tables_per_keyspace) {
|
||||
if (!sel.tables.contains(kind)) {
|
||||
@@ -148,30 +149,32 @@ static future<std::map<table_id, schema_mutations>> read_tables_for_keyspaces(sh
|
||||
|
||||
// Extracts the names of tables affected by a schema mutation.
|
||||
// The mutation must target one of the tables in schema_tables_holding_schema_mutations().
|
||||
static table_selector get_affected_tables(const sstring& keyspace_name, const mutation& m) {
|
||||
static
|
||||
table_selector get_affected_tables(const sstring& keyspace_name, const mutation& m) {
|
||||
const schema& s = *m.schema();
|
||||
auto get_table_name = [&](const clustering_key& ck) {
|
||||
auto get_table_name = [&] (const clustering_key& ck) {
|
||||
// The first component of the clustering key in each table listed in
|
||||
// schema_tables_holding_schema_mutations contains the table name.
|
||||
return value_cast<sstring>(utf8_type->deserialize(ck.get_component(s, 0)));
|
||||
};
|
||||
table_selector result;
|
||||
if (m.partition().partition_tombstone()) {
|
||||
slogger.trace("Mutation of {}.{} for keyspace {} contains a partition tombstone", m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
||||
slogger.trace("Mutation of {}.{} for keyspace {} contains a partition tombstone",
|
||||
m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
||||
result.all_in_keyspace = true;
|
||||
}
|
||||
for (auto&& e : m.partition().row_tombstones()) {
|
||||
const range_tombstone& rt = e.tombstone();
|
||||
if (rt.start.size(s) == 0 || rt.end.size(s) == 0) {
|
||||
slogger.trace(
|
||||
"Mutation of {}.{} for keyspace {} contains a multi-table range tombstone", m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
||||
slogger.trace("Mutation of {}.{} for keyspace {} contains a multi-table range tombstone",
|
||||
m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
||||
result.all_in_keyspace = true;
|
||||
break;
|
||||
}
|
||||
auto table_name = get_table_name(rt.start);
|
||||
if (table_name != get_table_name(rt.end)) {
|
||||
slogger.trace(
|
||||
"Mutation of {}.{} for keyspace {} contains a multi-table range tombstone", m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
||||
slogger.trace("Mutation of {}.{} for keyspace {} contains a multi-table range tombstone",
|
||||
m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name);
|
||||
result.all_in_keyspace = true;
|
||||
break;
|
||||
}
|
||||
@@ -180,17 +183,16 @@ static table_selector get_affected_tables(const sstring& keyspace_name, const mu
|
||||
for (auto&& row : m.partition().clustered_rows()) {
|
||||
result.add(get_table_name(row.key()));
|
||||
}
|
||||
slogger.trace("Mutation of {}.{} for keyspace {} affects tables: {}, all_in_keyspace: {}", m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name,
|
||||
result.tables, result.all_in_keyspace);
|
||||
slogger.trace("Mutation of {}.{} for keyspace {} affects tables: {}, all_in_keyspace: {}",
|
||||
m.schema()->ks_name(), m.schema()->cf_name(), keyspace_name, result.tables, result.all_in_keyspace);
|
||||
return result;
|
||||
}
|
||||
|
||||
future<schema_result> static read_schema_for_keyspaces(
|
||||
sharded<service::storage_proxy>& proxy, const sstring& schema_table_name, const std::set<sstring>& keyspace_names) {
|
||||
auto map = [&proxy, schema_table_name](const sstring& keyspace_name) {
|
||||
return read_schema_partition_for_keyspace(proxy, schema_table_name, keyspace_name);
|
||||
};
|
||||
auto insert = [](schema_result&& result, auto&& schema_entity) {
|
||||
future<schema_result>
|
||||
static read_schema_for_keyspaces(sharded<service::storage_proxy>& proxy, const sstring& schema_table_name, const std::set<sstring>& keyspace_names)
|
||||
{
|
||||
auto map = [&proxy, schema_table_name] (const sstring& keyspace_name) { return read_schema_partition_for_keyspace(proxy, schema_table_name, keyspace_name); };
|
||||
auto insert = [] (schema_result&& result, auto&& schema_entity) {
|
||||
if (!schema_entity.second->empty()) {
|
||||
result.insert(std::move(schema_entity));
|
||||
}
|
||||
@@ -200,11 +202,11 @@ future<schema_result> static read_schema_for_keyspaces(
|
||||
}
|
||||
|
||||
// Returns names of live table definitions of given keyspace
|
||||
future<std::vector<sstring>> static read_table_names_of_keyspace(
|
||||
sharded<service::storage_proxy>& proxy, const sstring& keyspace_name, schema_ptr schema_table) {
|
||||
future<std::vector<sstring>>
|
||||
static read_table_names_of_keyspace(sharded<service::storage_proxy>& proxy, const sstring& keyspace_name, schema_ptr schema_table) {
|
||||
auto pkey = dht::decorate_key(*schema_table, partition_key::from_singular(*schema_table, keyspace_name));
|
||||
auto&& rs = co_await db::system_keyspace::query(proxy.local().get_db(), schema_table->ks_name(), schema_table->cf_name(), pkey);
|
||||
co_return rs->rows() | std::views::transform([schema_table](const query::result_set_row& row) {
|
||||
co_return rs->rows() | std::views::transform([schema_table] (const query::result_set_row& row) {
|
||||
const sstring name = schema_table->clustering_key_columns().begin()->name_as_text();
|
||||
return row.get_nonnull<sstring>(name);
|
||||
}) | std::ranges::to<std::vector>();
|
||||
@@ -240,7 +242,8 @@ static void maybe_delete_schema_version(mutation& m) {
|
||||
}
|
||||
}
|
||||
|
||||
future<> schema_applier::merge_keyspaces() {
|
||||
future<> schema_applier::merge_keyspaces()
|
||||
{
|
||||
/*
|
||||
* - we don't care about entriesOnlyOnLeft() or entriesInCommon(), because only the changes are of interest to us
|
||||
* - of all entriesOnlyOnRight(), we only care about ones that have live columns; it's possible to have a ColumnFamily
|
||||
@@ -277,16 +280,21 @@ future<> schema_applier::merge_keyspaces() {
|
||||
for (auto& name : created) {
|
||||
slogger.info("Creating keyspace {}", name);
|
||||
auto sk_after_v = _after.scylla_keyspaces.contains(name) ? _after.scylla_keyspaces.at(name) : nullptr;
|
||||
auto ksm = co_await create_keyspace_metadata(schema_result_value_type{name, _after.keyspaces.at(name)}, sk_after_v);
|
||||
auto ksm = co_await create_keyspace_metadata(
|
||||
schema_result_value_type{name, _after.keyspaces.at(name)}, sk_after_v);
|
||||
_affected_keyspaces.created.push_back(
|
||||
co_await replica::database::prepare_create_keyspace_on_all_shards(sharded_db, _proxy, *ksm, _pending_token_metadata));
|
||||
co_await replica::database::prepare_create_keyspace_on_all_shards(
|
||||
sharded_db, _proxy, *ksm, _pending_token_metadata));
|
||||
_affected_keyspaces.names.created.insert(name);
|
||||
}
|
||||
for (auto& name : altered) {
|
||||
slogger.info("Altering keyspace {}", name);
|
||||
auto sk_after_v = _after.scylla_keyspaces.contains(name) ? _after.scylla_keyspaces.at(name) : nullptr;
|
||||
auto tmp_ksm = co_await create_keyspace_metadata(schema_result_value_type{name, _after.keyspaces.at(name)}, sk_after_v);
|
||||
_affected_keyspaces.altered.push_back(co_await replica::database::prepare_update_keyspace_on_all_shards(sharded_db, *tmp_ksm, _pending_token_metadata));
|
||||
auto tmp_ksm = co_await create_keyspace_metadata(
|
||||
schema_result_value_type{name, _after.keyspaces.at(name)}, sk_after_v);
|
||||
_affected_keyspaces.altered.push_back(
|
||||
co_await replica::database::prepare_update_keyspace_on_all_shards(
|
||||
sharded_db, *tmp_ksm, _pending_token_metadata));
|
||||
_affected_keyspaces.names.altered.insert(name);
|
||||
}
|
||||
for (auto& key : _affected_keyspaces.names.dropped) {
|
||||
@@ -319,7 +327,7 @@ static std::vector<column_definition> get_primary_key_definition(const schema_pt
|
||||
static std::vector<bytes> get_primary_key(const std::vector<column_definition>& primary_key, const query::result_set_row* row) {
|
||||
std::vector<bytes> key;
|
||||
for (const auto& column : primary_key) {
|
||||
const data_value* val = row->get_data_value(column.name_as_text());
|
||||
const data_value *val = row->get_data_value(column.name_as_text());
|
||||
key.push_back(val->serialize_nonnull());
|
||||
}
|
||||
return key;
|
||||
@@ -330,7 +338,7 @@ static std::map<std::vector<bytes>, const query::result_set_row*> build_row_map(
|
||||
const std::vector<query::result_set_row>& rows = result.rows();
|
||||
auto primary_key = get_primary_key_definition(result.schema());
|
||||
std::map<std::vector<bytes>, const query::result_set_row*> ret;
|
||||
for (const auto& row : rows) {
|
||||
for (const auto& row: rows) {
|
||||
auto key = get_primary_key(primary_key, &row);
|
||||
ret.insert(std::pair(std::move(key), &row));
|
||||
}
|
||||
@@ -383,8 +391,8 @@ struct aggregate_diff {
|
||||
std::vector<std::pair<const query::result_set_row*, const query::result_set_row*>> dropped;
|
||||
};
|
||||
|
||||
static aggregate_diff diff_aggregates_rows(
|
||||
const schema_result& aggr_before, const schema_result& aggr_after, const schema_result& scylla_aggr_before, const schema_result& scylla_aggr_after) {
|
||||
static aggregate_diff diff_aggregates_rows(const schema_result& aggr_before, const schema_result& aggr_after,
|
||||
const schema_result& scylla_aggr_before, const schema_result& scylla_aggr_after) {
|
||||
using map = std::map<std::vector<bytes>, const query::result_set_row*>;
|
||||
auto aggr_diff = difference(aggr_before, aggr_after, indirect_equal_to<lw_shared_ptr<query::result_set>>());
|
||||
|
||||
@@ -428,11 +436,15 @@ static aggregate_diff diff_aggregates_rows(
|
||||
|
||||
for (const auto& k : diff.entries_only_on_left) {
|
||||
auto entry = scylla_aggr_rows_before.find(k);
|
||||
dropped.push_back({aggr_before_rows.find(k)->second, (entry != scylla_aggr_rows_before.end()) ? entry->second : nullptr});
|
||||
dropped.push_back({
|
||||
aggr_before_rows.find(k)->second, (entry != scylla_aggr_rows_before.end()) ? entry->second : nullptr
|
||||
});
|
||||
}
|
||||
for (const auto& k : diff.entries_only_on_right) {
|
||||
auto entry = scylla_aggr_rows_after.find(k);
|
||||
created.push_back({aggr_after_rows.find(k)->second, (entry != scylla_aggr_rows_after.end()) ? entry->second : nullptr});
|
||||
created.push_back({
|
||||
aggr_after_rows.find(k)->second, (entry != scylla_aggr_rows_after.end()) ? entry->second : nullptr
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
@@ -440,10 +452,11 @@ static aggregate_diff diff_aggregates_rows(
|
||||
}
|
||||
|
||||
// see the comments for merge_keyspaces()
|
||||
future<> schema_applier::merge_types() {
|
||||
future<> schema_applier::merge_types()
|
||||
{
|
||||
auto diff = diff_rows(_before.types, _after.types);
|
||||
co_await _affected_user_types.start();
|
||||
co_await _affected_user_types.invoke_on_all([&](affected_user_types_per_shard& af) mutable -> future<> {
|
||||
co_await _affected_user_types.invoke_on_all([&] (affected_user_types_per_shard& af) mutable -> future<> {
|
||||
auto& db = _proxy.local().get_db().local();
|
||||
|
||||
std::map<sstring, std::reference_wrapper<replica::keyspace>> new_keyspaces_per_shard;
|
||||
@@ -465,12 +478,16 @@ future<> schema_applier::merge_types() {
|
||||
// version of view to "before" version of base table and "after" to "after"
|
||||
// respectively.
|
||||
enum class schema_diff_side {
|
||||
left, // old, before
|
||||
left, // old, before
|
||||
right, // new, after
|
||||
};
|
||||
|
||||
static schema_diff_per_shard diff_table_or_view(sharded<service::storage_proxy>& proxy, const std::map<table_id, schema_mutations>& before,
|
||||
const std::map<table_id, schema_mutations>& after, bool reload, noncopyable_function<schema_ptr(schema_mutations sm, schema_diff_side)> create_schema) {
|
||||
static schema_diff_per_shard diff_table_or_view(sharded<service::storage_proxy>& proxy,
|
||||
const std::map<table_id, schema_mutations>& before,
|
||||
const std::map<table_id, schema_mutations>& after,
|
||||
bool reload,
|
||||
noncopyable_function<schema_ptr (schema_mutations sm, schema_diff_side)> create_schema)
|
||||
{
|
||||
schema_diff_per_shard d;
|
||||
auto diff = difference(before, after);
|
||||
for (auto&& key : diff.entries_only_on_left) {
|
||||
@@ -490,10 +507,10 @@ static schema_diff_per_shard diff_table_or_view(sharded<service::storage_proxy>&
|
||||
d.altered.emplace_back(schema_diff_per_shard::altered_schema{s_before, s});
|
||||
}
|
||||
if (reload) {
|
||||
for (auto&& key : diff.entries_in_common) {
|
||||
for (auto&& key: diff.entries_in_common) {
|
||||
auto s = create_schema(std::move(after.at(key)), schema_diff_side::right);
|
||||
slogger.info("Reloading {}.{} id={} version={}", s->ks_name(), s->cf_name(), s->id(), s->version());
|
||||
d.altered.emplace_back(schema_diff_per_shard::altered_schema{s, s});
|
||||
d.altered.emplace_back(schema_diff_per_shard::altered_schema {s, s});
|
||||
}
|
||||
}
|
||||
return d;
|
||||
@@ -507,9 +524,7 @@ static schema_diff_per_shard diff_table_or_view(sharded<service::storage_proxy>&
|
||||
constexpr size_t max_concurrent = 8;
|
||||
|
||||
|
||||
in_progress_types_storage_per_shard::in_progress_types_storage_per_shard(
|
||||
replica::database& db, const affected_keyspaces& affected_keyspaces, const affected_user_types& affected_types)
|
||||
: _stored_user_types(db.as_user_types_storage()) {
|
||||
in_progress_types_storage_per_shard::in_progress_types_storage_per_shard(replica::database& db, const affected_keyspaces& affected_keyspaces, const affected_user_types& affected_types) : _stored_user_types(db.as_user_types_storage()) {
|
||||
// initialize metadata for new keyspaces
|
||||
for (auto& ks_per_shard : affected_keyspaces.created) {
|
||||
auto metadata = ks_per_shard[this_shard_id()]->metadata();
|
||||
@@ -537,7 +552,7 @@ in_progress_types_storage_per_shard::in_progress_types_storage_per_shard(
|
||||
auto& ks_name = type->_keyspace;
|
||||
_in_progress_types[ks_name].remove_type(type);
|
||||
}
|
||||
for (const auto& ks_name : affected_keyspaces.names.dropped) {
|
||||
for (const auto &ks_name : affected_keyspaces.names.dropped) {
|
||||
// can't reference a type when it's keyspace is being dropped
|
||||
_in_progress_types[ks_name] = data_dictionary::user_types_metadata();
|
||||
}
|
||||
@@ -555,9 +570,8 @@ std::shared_ptr<data_dictionary::user_types_storage> in_progress_types_storage_p
|
||||
return _stored_user_types;
|
||||
}
|
||||
|
||||
future<> in_progress_types_storage::init(
|
||||
sharded<replica::database>& sharded_db, const affected_keyspaces& affected_keyspaces, const affected_user_types& affected_types) {
|
||||
co_await sharded_db.invoke_on_all([&](replica::database& db) {
|
||||
future<> in_progress_types_storage::init(sharded<replica::database>& sharded_db, const affected_keyspaces& affected_keyspaces, const affected_user_types& affected_types) {
|
||||
co_await sharded_db.invoke_on_all([&] (replica::database& db) {
|
||||
shards[this_shard_id()] = make_foreign(seastar::make_shared<in_progress_types_storage_per_shard>(db, affected_keyspaces, affected_types));
|
||||
});
|
||||
}
|
||||
@@ -571,7 +585,8 @@ in_progress_types_storage_per_shard& in_progress_types_storage::local() {
|
||||
// that when a base schema and a subset of its views are modified together (i.e.,
|
||||
// upon an alter table or alter type statement), then they are published together
|
||||
// as well, without any deferring in-between.
|
||||
future<> schema_applier::merge_tables_and_views() {
|
||||
future<> schema_applier::merge_tables_and_views()
|
||||
{
|
||||
auto& user_types = _types_storage.local();
|
||||
co_await _affected_tables_and_views.tables_and_views.start();
|
||||
|
||||
@@ -582,10 +597,10 @@ future<> schema_applier::merge_tables_and_views() {
|
||||
|
||||
// Create CDC tables before non-CDC base tables, because we want the base tables with CDC enabled
|
||||
// to point to their CDC tables.
|
||||
local_cdc = diff_table_or_view(_proxy, _before.cdc, _after.cdc, _reload, [&](schema_mutations sm, schema_diff_side) {
|
||||
local_cdc = diff_table_or_view(_proxy, _before.cdc, _after.cdc, _reload, [&] (schema_mutations sm, schema_diff_side) {
|
||||
return create_table_from_mutations(_proxy, std::move(sm), user_types, nullptr);
|
||||
});
|
||||
local_tables = diff_table_or_view(_proxy, _before.tables, _after.tables, _reload, [&](schema_mutations sm, schema_diff_side side) {
|
||||
local_tables = diff_table_or_view(_proxy, _before.tables, _after.tables, _reload, [&] (schema_mutations sm, schema_diff_side side) {
|
||||
// If the table has CDC enabled, find the CDC schema version and set it in the table schema.
|
||||
// If the table is created or altered with CDC enabled, then the CDC
|
||||
// table is also created or altered in the same operation, so we can
|
||||
@@ -621,7 +636,7 @@ future<> schema_applier::merge_tables_and_views() {
|
||||
|
||||
return create_table_from_mutations(_proxy, std::move(sm), user_types, cdc_schema);
|
||||
});
|
||||
local_views = diff_table_or_view(_proxy, _before.views, _after.views, _reload, [&](schema_mutations sm, schema_diff_side side) {
|
||||
local_views = diff_table_or_view(_proxy, _before.views, _after.views, _reload, [&] (schema_mutations sm, schema_diff_side side) {
|
||||
// The view schema mutation should be created with reference to the base table schema because we definitely know it by now.
|
||||
// If we don't do it we are leaving a window where write commands to this schema are illegal.
|
||||
// There are 3 possibilities:
|
||||
@@ -668,26 +683,31 @@ future<> schema_applier::merge_tables_and_views() {
|
||||
frozen_schema_diff tables_frozen = co_await local_tables.freeze();
|
||||
frozen_schema_diff cdc_frozen = co_await local_cdc.freeze();
|
||||
frozen_schema_diff views_frozen = co_await local_views.freeze();
|
||||
co_await _affected_tables_and_views.tables_and_views.invoke_on_others(
|
||||
[this, &tables_frozen, &cdc_frozen, &views_frozen](affected_tables_and_views_per_shard& tables_and_views) -> future<> {
|
||||
auto& db = _proxy.local().get_db().local();
|
||||
tables_and_views.tables = co_await schema_diff_per_shard::copy_from(db, _types_storage, tables_frozen);
|
||||
tables_and_views.cdc = co_await schema_diff_per_shard::copy_from(db, _types_storage, cdc_frozen);
|
||||
tables_and_views.views = co_await schema_diff_per_shard::copy_from(db, _types_storage, views_frozen);
|
||||
});
|
||||
co_await _affected_tables_and_views.tables_and_views.invoke_on_others([this, &tables_frozen, &cdc_frozen, &views_frozen] (affected_tables_and_views_per_shard& tables_and_views) -> future<> {
|
||||
auto& db = _proxy.local().get_db().local();
|
||||
tables_and_views.tables = co_await schema_diff_per_shard::copy_from(
|
||||
db, _types_storage, tables_frozen);
|
||||
tables_and_views.cdc = co_await schema_diff_per_shard::copy_from(
|
||||
db, _types_storage, cdc_frozen);
|
||||
tables_and_views.views = co_await schema_diff_per_shard::copy_from(
|
||||
db, _types_storage, views_frozen);
|
||||
});
|
||||
|
||||
auto& db = _proxy.local().get_db();
|
||||
co_await max_concurrent_for_each(local_views.dropped, max_concurrent, [&db, this](schema_ptr& dt) -> future<> {
|
||||
co_await max_concurrent_for_each(local_views.dropped, max_concurrent, [&db, this] (schema_ptr& dt) -> future<> {
|
||||
auto uuid = dt->id();
|
||||
_affected_tables_and_views.table_shards.insert({uuid, co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
||||
_affected_tables_and_views.table_shards.insert({uuid,
|
||||
co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
||||
});
|
||||
co_await max_concurrent_for_each(local_tables.dropped, max_concurrent, [&db, this](schema_ptr& dt) -> future<> {
|
||||
co_await max_concurrent_for_each(local_tables.dropped, max_concurrent, [&db, this] (schema_ptr& dt) -> future<> {
|
||||
auto uuid = dt->id();
|
||||
_affected_tables_and_views.table_shards.insert({uuid, co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
||||
_affected_tables_and_views.table_shards.insert({uuid,
|
||||
co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
||||
});
|
||||
co_await max_concurrent_for_each(local_cdc.dropped, max_concurrent, [&db, this](schema_ptr& dt) -> future<> {
|
||||
co_await max_concurrent_for_each(local_cdc.dropped, max_concurrent, [&db, this] (schema_ptr& dt) -> future<> {
|
||||
auto uuid = dt->id();
|
||||
_affected_tables_and_views.table_shards.insert({uuid, co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
||||
_affected_tables_and_views.table_shards.insert({uuid,
|
||||
co_await replica::database::prepare_drop_table_on_all_shards(db, uuid)});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -699,8 +719,8 @@ future<frozen_schema_diff> schema_diff_per_shard::freeze() const {
|
||||
}
|
||||
for (const auto& a : altered) {
|
||||
result.altered.push_back(frozen_schema_diff::altered_schema{
|
||||
.old_schema = extended_frozen_schema(a.old_schema),
|
||||
.new_schema = extended_frozen_schema(a.new_schema),
|
||||
.old_schema = extended_frozen_schema(a.old_schema),
|
||||
.new_schema = extended_frozen_schema(a.new_schema),
|
||||
});
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
@@ -723,8 +743,8 @@ future<schema_diff_per_shard> schema_diff_per_shard::copy_from(replica::database
|
||||
}
|
||||
for (const auto& a : oth.altered) {
|
||||
result.altered.push_back(schema_diff_per_shard::altered_schema{
|
||||
.old_schema = a.old_schema.unfreeze(commited_ctxt),
|
||||
.new_schema = a.new_schema.unfreeze(ctxt),
|
||||
.old_schema = a.old_schema.unfreeze(commited_ctxt),
|
||||
.new_schema = a.new_schema.unfreeze(ctxt),
|
||||
});
|
||||
co_await coroutine::maybe_yield();
|
||||
}
|
||||
@@ -738,7 +758,7 @@ future<schema_diff_per_shard> schema_diff_per_shard::copy_from(replica::database
|
||||
|
||||
static future<> notify_tables_and_views(service::migration_notifier& notifier, const affected_tables_and_views& diff) {
|
||||
auto it = diff.tables_and_views.local().columns_changed.cbegin();
|
||||
auto notify = [&](auto& r, auto&& f) -> future<> {
|
||||
auto notify = [&] (auto& r, auto&& f) -> future<> {
|
||||
co_await max_concurrent_for_each(r, max_concurrent, std::move(f));
|
||||
};
|
||||
|
||||
@@ -747,41 +767,24 @@ static future<> notify_tables_and_views(service::migration_notifier& notifier, c
|
||||
const auto& views = diff.tables_and_views.local().views;
|
||||
|
||||
// View drops are notified first, because a table can only be dropped if its views are already deleted
|
||||
co_await notify(views.dropped, [&](auto&& dt) {
|
||||
return notifier.drop_view(view_ptr(dt));
|
||||
});
|
||||
co_await notify(tables.dropped, [&](auto&& dt) {
|
||||
return notifier.drop_column_family(dt);
|
||||
});
|
||||
co_await notify(cdc.dropped, [&](auto&& dt) {
|
||||
return notifier.drop_column_family(dt);
|
||||
});
|
||||
co_await notify(views.dropped, [&] (auto&& dt) { return notifier.drop_view(view_ptr(dt)); });
|
||||
co_await notify(tables.dropped, [&] (auto&& dt) { return notifier.drop_column_family(dt); });
|
||||
co_await notify(cdc.dropped, [&] (auto&& dt) { return notifier.drop_column_family(dt); });
|
||||
// Table creations are notified first, in case a view is created right after the table
|
||||
co_await notify(tables.created, [&](auto&& gs) {
|
||||
return notifier.create_column_family(gs);
|
||||
});
|
||||
co_await notify(cdc.created, [&](auto&& gs) {
|
||||
return notifier.create_column_family(gs);
|
||||
});
|
||||
co_await notify(views.created, [&](auto&& gs) {
|
||||
return notifier.create_view(view_ptr(gs));
|
||||
});
|
||||
co_await notify(tables.created, [&] (auto&& gs) { return notifier.create_column_family(gs); });
|
||||
co_await notify(cdc.created, [&] (auto&& gs) { return notifier.create_column_family(gs); });
|
||||
co_await notify(views.created, [&] (auto&& gs) { return notifier.create_view(view_ptr(gs)); });
|
||||
// Table altering is notified first, in case new base columns appear
|
||||
co_await notify(tables.altered, [&](auto&& altered) {
|
||||
return notifier.update_column_family(altered.new_schema, *it++);
|
||||
});
|
||||
co_await notify(cdc.altered, [&](auto&& altered) {
|
||||
return notifier.update_column_family(altered.new_schema, *it++);
|
||||
});
|
||||
co_await notify(views.altered, [&](auto&& altered) {
|
||||
return notifier.update_view(view_ptr(altered.new_schema), *it++);
|
||||
});
|
||||
co_await notify(tables.altered, [&] (auto&& altered) { return notifier.update_column_family(altered.new_schema, *it++); });
|
||||
co_await notify(cdc.altered, [&] (auto&& altered) { return notifier.update_column_family(altered.new_schema, *it++); });
|
||||
co_await notify(views.altered, [&] (auto&& altered) { return notifier.update_view(view_ptr(altered.new_schema), *it++); });
|
||||
}
|
||||
|
||||
static void drop_cached_func(replica::database& db, const query::result_set_row& row) {
|
||||
auto language = row.get_nonnull<sstring>("language");
|
||||
if (language == "wasm") {
|
||||
cql3::functions::function_name name{row.get_nonnull<sstring>("keyspace_name"), row.get_nonnull<sstring>("function_name")};
|
||||
cql3::functions::function_name name{
|
||||
row.get_nonnull<sstring>("keyspace_name"), row.get_nonnull<sstring>("function_name")};
|
||||
auto arg_types = read_arg_types(row, name.keyspace, db.user_types());
|
||||
db.lang().remove(name, arg_types);
|
||||
}
|
||||
@@ -790,13 +793,14 @@ static void drop_cached_func(replica::database& db, const query::result_set_row&
|
||||
future<> schema_applier::merge_functions() {
|
||||
auto diff = diff_rows(_before.functions, _after.functions);
|
||||
co_await _functions_batch.start();
|
||||
co_await _functions_batch.invoke_on_all(coroutine::lambda([&](cql3::functions::change_batch& batch) -> future<> {
|
||||
co_await _functions_batch.invoke_on_all(coroutine::lambda([&] (cql3::functions::change_batch& batch) -> future<> {
|
||||
auto& db = _proxy.local().get_db().local();
|
||||
for (const auto& val : diff.created) {
|
||||
batch.add_function(co_await create_func(db, *val, _types_storage.local()));
|
||||
}
|
||||
for (const auto& val : diff.dropped) {
|
||||
cql3::functions::function_name name{val->get_nonnull<sstring>("keyspace_name"), val->get_nonnull<sstring>("function_name")};
|
||||
cql3::functions::function_name name{
|
||||
val->get_nonnull<sstring>("keyspace_name"), val->get_nonnull<sstring>("function_name")};
|
||||
auto commited_storage = _types_storage.local().committed_storage();
|
||||
auto arg_types = read_arg_types(*val, name.keyspace, *commited_storage);
|
||||
// as we don't yield between dropping cache and committing batch
|
||||
@@ -814,13 +818,14 @@ future<> schema_applier::merge_functions() {
|
||||
future<> schema_applier::merge_aggregates() {
|
||||
auto diff = diff_aggregates_rows(_before.aggregates, _after.aggregates, _before.scylla_aggregates, _after.scylla_aggregates);
|
||||
|
||||
co_await _functions_batch.invoke_on_all([&](cql3::functions::change_batch& batch) -> future<> {
|
||||
co_await _functions_batch.invoke_on_all([&] (cql3::functions::change_batch& batch)-> future<> {
|
||||
auto& db = _proxy.local().get_db().local();
|
||||
for (const auto& val : diff.created) {
|
||||
batch.add_function(create_aggregate(db, *val.first, val.second, batch, _types_storage.local()));
|
||||
}
|
||||
for (const auto& val : diff.dropped) {
|
||||
cql3::functions::function_name name{val.first->get_nonnull<sstring>("keyspace_name"), val.first->get_nonnull<sstring>("aggregate_name")};
|
||||
cql3::functions::function_name name{
|
||||
val.first->get_nonnull<sstring>("keyspace_name"), val.first->get_nonnull<sstring>("aggregate_name")};
|
||||
auto commited_storage = _types_storage.local().committed_storage();
|
||||
auto arg_types = read_arg_types(*val.first, name.keyspace, *commited_storage);
|
||||
batch.remove_aggregate(name, arg_types);
|
||||
@@ -855,15 +860,15 @@ future<schema_persisted_state> schema_applier::get_schema_persisted_state() {
|
||||
auto [tables, cdc] = extract_cdc(std::move(tables_and_cdc));
|
||||
|
||||
schema_persisted_state v{
|
||||
.keyspaces = co_await read_schema_for_keyspaces(_proxy, KEYSPACES, _keyspaces),
|
||||
.scylla_keyspaces = co_await read_schema_for_keyspaces(_proxy, SCYLLA_KEYSPACES, _keyspaces),
|
||||
.tables = std::move(tables),
|
||||
.types = co_await read_schema_for_keyspaces(_proxy, TYPES, _keyspaces),
|
||||
.views = co_await read_tables_for_keyspaces(_proxy, _keyspaces, table_kind::view, _affected_tables),
|
||||
.cdc = std::move(cdc),
|
||||
.functions = co_await read_schema_for_keyspaces(_proxy, FUNCTIONS, _keyspaces),
|
||||
.aggregates = co_await read_schema_for_keyspaces(_proxy, AGGREGATES, _keyspaces),
|
||||
.scylla_aggregates = co_await read_schema_for_keyspaces(_proxy, SCYLLA_AGGREGATES, _keyspaces),
|
||||
.keyspaces = co_await read_schema_for_keyspaces(_proxy, KEYSPACES, _keyspaces),
|
||||
.scylla_keyspaces = co_await read_schema_for_keyspaces(_proxy, SCYLLA_KEYSPACES, _keyspaces),
|
||||
.tables = std::move(tables),
|
||||
.types = co_await read_schema_for_keyspaces(_proxy, TYPES, _keyspaces),
|
||||
.views = co_await read_tables_for_keyspaces(_proxy, _keyspaces, table_kind::view, _affected_tables),
|
||||
.cdc = std::move(cdc),
|
||||
.functions = co_await read_schema_for_keyspaces(_proxy, FUNCTIONS, _keyspaces),
|
||||
.aggregates = co_await read_schema_for_keyspaces(_proxy, AGGREGATES, _keyspaces),
|
||||
.scylla_aggregates = co_await read_schema_for_keyspaces(_proxy, SCYLLA_AGGREGATES, _keyspaces),
|
||||
};
|
||||
co_return v;
|
||||
}
|
||||
@@ -919,11 +924,10 @@ class pending_schema_getter : public service::schema_getter {
|
||||
private:
|
||||
schema_applier& _sa;
|
||||
sharded<replica::database>& _db;
|
||||
|
||||
public:
|
||||
pending_schema_getter(schema_applier& sa)
|
||||
: _sa(sa)
|
||||
, _db(sa._proxy.local().get_db()) {};
|
||||
pending_schema_getter(schema_applier& sa) :
|
||||
_sa(sa), _db(sa._proxy.local().get_db()) {
|
||||
};
|
||||
|
||||
virtual flat_hash_map<sstring, locator::replication_strategy_ptr> get_keyspaces_replication() const override {
|
||||
flat_hash_map<sstring, locator::replication_strategy_ptr> out;
|
||||
@@ -985,7 +989,8 @@ future<> schema_applier::update_tablets() {
|
||||
if (_tablet_hint) {
|
||||
slogger.info("Tablet metadata changed");
|
||||
pending_schema_getter getter{*this};
|
||||
_token_metadata_change = co_await _ss.local().prepare_token_metadata_change(_pending_token_metadata.local(), getter);
|
||||
_token_metadata_change = co_await _ss.local().prepare_token_metadata_change(
|
||||
_pending_token_metadata.local(), getter);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -994,7 +999,8 @@ future<> schema_applier::update_tablets() {
|
||||
future<> schema_applier::load_mutable_token_metadata() {
|
||||
locator::mutable_token_metadata_ptr current_token_metadata = co_await _ss.local().get_mutable_token_metadata_ptr();
|
||||
if (_tablet_hint) {
|
||||
auto new_token_metadata = co_await _ss.local().prepare_tablet_metadata(_tablet_hint, current_token_metadata);
|
||||
auto new_token_metadata = co_await _ss.local().prepare_tablet_metadata(
|
||||
_tablet_hint, current_token_metadata);
|
||||
co_return co_await _pending_token_metadata.assign(new_token_metadata);
|
||||
}
|
||||
co_await _pending_token_metadata.assign(current_token_metadata);
|
||||
@@ -1109,13 +1115,14 @@ future<> schema_applier::commit() {
|
||||
// However, we can only acquire the (write) lock after preparing all
|
||||
// entities for the pending schema change that need to iterate over tables_metadata;
|
||||
// otherwise, such iteration would deadlock.
|
||||
_metadata_locks = std::make_unique<replica::tables_metadata_lock_on_all_shards>(co_await replica::database::lock_tables_metadata(sharded_db));
|
||||
_metadata_locks = std::make_unique<replica::tables_metadata_lock_on_all_shards>(
|
||||
co_await replica::database::lock_tables_metadata(sharded_db));
|
||||
// Run func first on shard 0
|
||||
// to allow "seeding" of the effective_replication_map
|
||||
// with a new e_r_m instance.
|
||||
SCYLLA_ASSERT(this_shard_id() == 0);
|
||||
commit_on_shard(sharded_db.local());
|
||||
co_await sharded_db.invoke_on_others([this](replica::database& db) {
|
||||
co_await sharded_db.invoke_on_others([this] (replica::database& db) {
|
||||
commit_on_shard(db);
|
||||
});
|
||||
// unlock as some functions in post_commit() may read data under those locks
|
||||
@@ -1147,11 +1154,12 @@ future<> schema_applier::finalize_tables_and_views() {
|
||||
|
||||
if (_tablet_hint) {
|
||||
auto& db = sharded_db.local();
|
||||
co_await db.get_compaction_manager().get_shared_tombstone_gc_state().flush_pending_repair_time_update(db);
|
||||
co_await db.get_compaction_manager().get_shared_tombstone_gc_state().
|
||||
flush_pending_repair_time_update(db);
|
||||
_ss.local().wake_up_topology_state_machine();
|
||||
}
|
||||
|
||||
co_await sharded_db.invoke_on_all([&diff](replica::database& db) -> future<> {
|
||||
co_await sharded_db.invoke_on_all([&diff] (replica::database& db) -> future<> {
|
||||
const auto& tables = diff.tables_and_views.local().tables;
|
||||
const auto& cdc = diff.tables_and_views.local().cdc;
|
||||
const auto& views = diff.tables_and_views.local().views;
|
||||
@@ -1176,14 +1184,15 @@ future<> schema_applier::finalize_tables_and_views() {
|
||||
//
|
||||
// Drop column mapping entries for dropped tables since these will not be TTLed automatically
|
||||
// and will stay there forever if we don't clean them up manually
|
||||
co_await max_concurrent_for_each(diff.tables_and_views.local().tables.created, max_concurrent, [this](const schema_ptr& gs) -> future<> {
|
||||
co_await max_concurrent_for_each(diff.tables_and_views.local().tables.created, max_concurrent, [this] (const schema_ptr& gs) -> future<> {
|
||||
co_await store_column_mapping(_proxy, gs, false);
|
||||
});
|
||||
co_await max_concurrent_for_each(
|
||||
diff.tables_and_views.local().tables.altered, max_concurrent, [this](const schema_diff_per_shard::altered_schema& altered) -> future<> {
|
||||
co_await when_all_succeed(store_column_mapping(_proxy, altered.old_schema, true), store_column_mapping(_proxy, altered.new_schema, false));
|
||||
});
|
||||
co_await max_concurrent_for_each(diff.tables_and_views.local().tables.dropped, max_concurrent, [this](const schema_ptr& s) -> future<> {
|
||||
co_await max_concurrent_for_each(diff.tables_and_views.local().tables.altered, max_concurrent, [this] (const schema_diff_per_shard::altered_schema& altered) -> future<> {
|
||||
co_await when_all_succeed(
|
||||
store_column_mapping(_proxy, altered.old_schema, true),
|
||||
store_column_mapping(_proxy, altered.new_schema, false));
|
||||
});
|
||||
co_await max_concurrent_for_each(diff.tables_and_views.local().tables.dropped, max_concurrent, [this] (const schema_ptr& s) -> future<> {
|
||||
co_await drop_column_mapping(_sys_ks.local(), s->id(), s->version());
|
||||
});
|
||||
}
|
||||
@@ -1191,7 +1200,7 @@ future<> schema_applier::finalize_tables_and_views() {
|
||||
future<> schema_applier::post_commit() {
|
||||
co_await finalize_tables_and_views();
|
||||
auto& sharded_db = _proxy.local().get_db();
|
||||
co_await sharded_db.invoke_on_all([&](replica::database& db) -> future<> {
|
||||
co_await sharded_db.invoke_on_all([&] (replica::database& db) -> future<> {
|
||||
auto& notifier = db.get_notifier();
|
||||
// notify about keyspaces
|
||||
for (const auto& name : _affected_keyspaces.names.created) {
|
||||
@@ -1251,8 +1260,8 @@ static future<> execute_do_merge_schema(sharded<service::storage_proxy>& proxy,
|
||||
co_await ap.post_commit();
|
||||
}
|
||||
|
||||
static future<> do_merge_schema(sharded<service::storage_proxy>& proxy, sharded<service::storage_service>& ss, sharded<db::system_keyspace>& sys_ks,
|
||||
utils::chunked_vector<mutation> mutations, bool reload) {
|
||||
static future<> do_merge_schema(sharded<service::storage_proxy>& proxy, sharded<service::storage_service>& ss, sharded<db::system_keyspace>& sys_ks, utils::chunked_vector<mutation> mutations, bool reload)
|
||||
{
|
||||
slogger.trace("do_merge_schema: {}", mutations);
|
||||
schema_applier ap(proxy, ss, sys_ks, reload);
|
||||
co_await execute_do_merge_schema(proxy, ap, std::move(mutations)).finally([&ap]() {
|
||||
@@ -1269,22 +1278,22 @@ static future<> do_merge_schema(sharded<service::storage_proxy>& proxy, sharded<
|
||||
* @throws ConfigurationException If one of metadata attributes has invalid value
|
||||
* @throws IOException If data was corrupted during transportation or failed to apply fs operations
|
||||
*/
|
||||
future<> merge_schema(sharded<db::system_keyspace>& sys_ks, sharded<service::storage_proxy>& proxy, sharded<service::storage_service>& ss,
|
||||
utils::chunked_vector<mutation> mutations, bool reload) {
|
||||
future<> merge_schema(sharded<db::system_keyspace>& sys_ks, sharded<service::storage_proxy>& proxy, sharded<service::storage_service>& ss, utils::chunked_vector<mutation> mutations, bool reload)
|
||||
{
|
||||
if (this_shard_id() != 0) {
|
||||
// mutations must be applied on the owning shard (0).
|
||||
co_await smp::submit_to(0, coroutine::lambda([&, fmuts = freeze(mutations)]() mutable -> future<> {
|
||||
co_await smp::submit_to(0, coroutine::lambda([&, fmuts = freeze(mutations)] () mutable -> future<> {
|
||||
co_await merge_schema(sys_ks, proxy, ss, co_await unfreeze_gently(fmuts), reload);
|
||||
}));
|
||||
co_return;
|
||||
}
|
||||
co_await with_merge_lock([&]() mutable -> future<> {
|
||||
co_await with_merge_lock([&] () mutable -> future<> {
|
||||
co_await do_merge_schema(proxy, ss, sys_ks, std::move(mutations), reload);
|
||||
auto version = co_await get_group0_schema_version(sys_ks.local());
|
||||
co_await update_schema_version_and_announce(sys_ks, proxy, version);
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace schema_tables
|
||||
}
|
||||
|
||||
} // namespace db
|
||||
}
|
||||
|
||||
@@ -29,8 +29,8 @@ static logging::logger blogger("boot_strapper");
|
||||
|
||||
namespace dht {
|
||||
|
||||
future<> boot_strapper::bootstrap(
|
||||
streaming::stream_reason reason, gms::gossiper& gossiper, service::frozen_topology_guard topo_guard, locator::host_id replace_address) {
|
||||
future<> boot_strapper::bootstrap(streaming::stream_reason reason, gms::gossiper& gossiper, service::frozen_topology_guard topo_guard,
|
||||
locator::host_id replace_address) {
|
||||
blogger.debug("Beginning bootstrap process: sorted_tokens={}", get_token_metadata().sorted_tokens());
|
||||
sstring description;
|
||||
if (reason == streaming::stream_reason::bootstrap) {
|
||||
@@ -41,8 +41,7 @@ future<> boot_strapper::bootstrap(
|
||||
throw std::runtime_error("Wrong stream_reason provided: it can only be replace or bootstrap");
|
||||
}
|
||||
try {
|
||||
auto streamer = make_lw_shared<range_streamer>(
|
||||
_db, _stream_manager, _token_metadata_ptr, _abort_source, _tokens, _address, _dr, description, reason, topo_guard);
|
||||
auto streamer = make_lw_shared<range_streamer>(_db, _stream_manager, _token_metadata_ptr, _abort_source, _tokens, _address, _dr, description, reason, topo_guard);
|
||||
auto nodes_to_filter = gossiper.get_unreachable_members();
|
||||
if (reason == streaming::stream_reason::replace) {
|
||||
nodes_to_filter.insert(std::move(replace_address));
|
||||
@@ -72,8 +71,7 @@ std::unordered_set<token> boot_strapper::get_random_bootstrap_tokens(const token
|
||||
}
|
||||
|
||||
if (num_tokens == 1) {
|
||||
blogger.warn(
|
||||
"Picking random token for a single vnode. You should probably add more vnodes; failing that, you should probably specify the token manually");
|
||||
blogger.warn("Picking random token for a single vnode. You should probably add more vnodes; failing that, you should probably specify the token manually");
|
||||
}
|
||||
|
||||
auto tokens = get_random_tokens(std::move(tmptr), num_tokens);
|
||||
@@ -88,8 +86,7 @@ std::unordered_set<token> boot_strapper::get_bootstrap_tokens(token_metadata_ptr
|
||||
return get_bootstrap_tokens(std::move(tmptr), cfg.initial_token(), cfg.num_tokens(), check);
|
||||
}
|
||||
|
||||
std::unordered_set<token> boot_strapper::get_bootstrap_tokens(
|
||||
const token_metadata_ptr tmptr, sstring tokens_string, uint32_t num_tokens, check_token_endpoint check) {
|
||||
std::unordered_set<token> boot_strapper::get_bootstrap_tokens(const token_metadata_ptr tmptr, sstring tokens_string, uint32_t num_tokens, check_token_endpoint check) {
|
||||
std::unordered_set<sstring> initial_tokens;
|
||||
try {
|
||||
boost::split(initial_tokens, tokens_string, boost::is_any_of(sstring(", ")));
|
||||
@@ -105,8 +102,7 @@ std::unordered_set<token> boot_strapper::get_bootstrap_tokens(
|
||||
for (auto& token_string : initial_tokens) {
|
||||
auto token = dht::token::from_sstring(token_string);
|
||||
if (check && tmptr->get_endpoint(token)) {
|
||||
throw std::runtime_error(
|
||||
format("Bootstrapping to existing token {} is not allowed (decommission/removenode the old node first).", token_string));
|
||||
throw std::runtime_error(format("Bootstrapping to existing token {} is not allowed (decommission/removenode the old node first).", token_string));
|
||||
}
|
||||
tokens.insert(token);
|
||||
}
|
||||
|
||||
@@ -26,9 +26,10 @@ static logging::logger logger("range_streamer");
|
||||
|
||||
using inet_address = gms::inet_address;
|
||||
|
||||
std::unordered_map<locator::host_id, dht::token_range_vector> range_streamer::get_range_fetch_map(
|
||||
const std::unordered_map<dht::token_range, std::vector<locator::host_id>>& ranges_with_sources,
|
||||
const std::unordered_set<std::unique_ptr<i_source_filter>>& source_filters, const sstring& keyspace) {
|
||||
std::unordered_map<locator::host_id, dht::token_range_vector>
|
||||
range_streamer::get_range_fetch_map(const std::unordered_map<dht::token_range, std::vector<locator::host_id>>& ranges_with_sources,
|
||||
const std::unordered_set<std::unique_ptr<i_source_filter>>& source_filters,
|
||||
const sstring& keyspace) {
|
||||
std::unordered_map<locator::host_id, dht::token_range_vector> range_fetch_map_map;
|
||||
const auto& topo = _token_metadata_ptr->get_topology();
|
||||
for (const auto& x : ranges_with_sources) {
|
||||
@@ -78,8 +79,8 @@ std::unordered_map<locator::host_id, dht::token_range_vector> range_streamer::ge
|
||||
}
|
||||
|
||||
// Must be called from a seastar thread
|
||||
std::unordered_map<dht::token_range, std::vector<locator::host_id>> range_streamer::get_all_ranges_with_sources_for(
|
||||
const sstring& keyspace_name, const locator::vnode_effective_replication_map* erm, dht::token_range_vector desired_ranges) {
|
||||
std::unordered_map<dht::token_range, std::vector<locator::host_id>>
|
||||
range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, const locator::vnode_effective_replication_map* erm, dht::token_range_vector desired_ranges) {
|
||||
logger.debug("{} ks={}", __func__, keyspace_name);
|
||||
|
||||
auto range_addresses = erm->get_range_host_ids().get();
|
||||
@@ -113,24 +114,24 @@ std::unordered_map<dht::token_range, std::vector<locator::host_id>> range_stream
|
||||
}
|
||||
|
||||
// Must be called from a seastar thread
|
||||
std::unordered_map<dht::token_range, std::vector<locator::host_id>> range_streamer::get_all_ranges_with_strict_sources_for(
|
||||
const sstring& keyspace_name, const locator::vnode_effective_replication_map* erm, dht::token_range_vector desired_ranges, gms::gossiper& gossiper) {
|
||||
std::unordered_map<dht::token_range, std::vector<locator::host_id>>
|
||||
range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_name, const locator::vnode_effective_replication_map* erm, dht::token_range_vector desired_ranges, gms::gossiper& gossiper) {
|
||||
logger.debug("{} ks={}", __func__, keyspace_name);
|
||||
SCYLLA_ASSERT(_tokens.empty() == false);
|
||||
SCYLLA_ASSERT (_tokens.empty() == false);
|
||||
|
||||
auto& strat = erm->get_replication_strategy();
|
||||
|
||||
// Active ranges
|
||||
//Active ranges
|
||||
auto metadata_clone = get_token_metadata().clone_only_token_map().get();
|
||||
auto range_addresses = strat.get_range_host_ids(metadata_clone).get();
|
||||
|
||||
// Pending ranges
|
||||
//Pending ranges
|
||||
metadata_clone.update_topology(_address, _dr);
|
||||
metadata_clone.update_normal_tokens(_tokens, _address).get();
|
||||
auto pending_range_addresses = strat.get_range_host_ids(metadata_clone).get();
|
||||
auto pending_range_addresses = strat.get_range_host_ids(metadata_clone).get();
|
||||
metadata_clone.clear_gently().get();
|
||||
|
||||
// Collects the source that will have its range moved to the new node
|
||||
//Collects the source that will have its range moved to the new node
|
||||
std::unordered_map<dht::token_range, std::vector<locator::host_id>> range_sources;
|
||||
|
||||
logger.debug("keyspace={}, desired_ranges.size={}, range_addresses.size={}", keyspace_name, desired_ranges.size(), range_addresses.size());
|
||||
@@ -149,12 +150,11 @@ std::unordered_map<dht::token_range, std::vector<locator::host_id>> range_stream
|
||||
}
|
||||
|
||||
std::unordered_set<locator::host_id> new_endpoints(it->second.begin(), it->second.end());
|
||||
// Due to CASSANDRA-5953 we can have a higher RF then we have endpoints.
|
||||
// So we need to be careful to only be strict when endpoints == RF
|
||||
//Due to CASSANDRA-5953 we can have a higher RF then we have endpoints.
|
||||
//So we need to be careful to only be strict when endpoints == RF
|
||||
if (old_endpoints.size() == erm->get_replication_factor()) {
|
||||
std::erase_if(old_endpoints, [&new_endpoints](locator::host_id ep) {
|
||||
return new_endpoints.contains(ep);
|
||||
});
|
||||
std::erase_if(old_endpoints,
|
||||
[&new_endpoints] (locator::host_id ep) { return new_endpoints.contains(ep); });
|
||||
if (old_endpoints.size() != 1) {
|
||||
throw std::runtime_error(format("Expected 1 endpoint but found {:d}", old_endpoints.size()));
|
||||
}
|
||||
@@ -163,7 +163,7 @@ std::unordered_map<dht::token_range, std::vector<locator::host_id>> range_stream
|
||||
}
|
||||
}
|
||||
|
||||
// Validate
|
||||
//Validate
|
||||
auto it = range_sources.find(desired_range);
|
||||
if (it == range_sources.end()) {
|
||||
throw std::runtime_error(format("No sources found for {}", desired_range));
|
||||
@@ -176,9 +176,7 @@ std::unordered_map<dht::token_range, std::vector<locator::host_id>> range_stream
|
||||
locator::host_id source_id = it->second.front();
|
||||
|
||||
if (gossiper.is_enabled() && !gossiper.is_alive(source_id)) {
|
||||
throw std::runtime_error(format("A node required to move the data consistently is down ({}). If you wish to move the data from a potentially "
|
||||
"inconsistent replica, restart the node with consistent_rangemovement=false",
|
||||
source_id));
|
||||
throw std::runtime_error(format("A node required to move the data consistently is down ({}). If you wish to move the data from a potentially inconsistent replica, restart the node with consistent_rangemovement=false", source_id));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -190,8 +188,12 @@ bool range_streamer::use_strict_sources_for_ranges(const sstring& keyspace_name,
|
||||
auto nr_nodes_in_ring = get_token_metadata().get_normal_token_owners().size();
|
||||
bool everywhere_topology = erm.get_replication_strategy().get_type() == locator::replication_strategy_type::everywhere_topology;
|
||||
// Use strict when number of nodes in the ring is equal or more than RF
|
||||
auto strict = _db.local().get_config().consistent_rangemovement() && !_tokens.empty() && !everywhere_topology && nr_nodes_in_ring >= rf;
|
||||
logger.debug("use_strict_sources_for_ranges: ks={}, nr_nodes_in_ring={}, rf={}, strict={}", keyspace_name, nr_nodes_in_ring, rf, strict);
|
||||
auto strict = _db.local().get_config().consistent_rangemovement()
|
||||
&& !_tokens.empty()
|
||||
&& !everywhere_topology
|
||||
&& nr_nodes_in_ring >= rf;
|
||||
logger.debug("use_strict_sources_for_ranges: ks={}, nr_nodes_in_ring={}, rf={}, strict={}",
|
||||
keyspace_name, nr_nodes_in_ring, rf, strict);
|
||||
return strict;
|
||||
}
|
||||
|
||||
@@ -212,36 +214,34 @@ void range_streamer::add_rx_ranges(const sstring& keyspace_name, std::unordered_
|
||||
}
|
||||
|
||||
// TODO: This is the legacy range_streamer interface, it is add_rx_ranges which adds rx ranges.
|
||||
future<> range_streamer::add_ranges(const sstring& keyspace_name, locator::static_effective_replication_map_ptr erm, dht::token_range_vector ranges,
|
||||
gms::gossiper& gossiper, bool is_replacing) {
|
||||
return seastar::async([this, keyspace_name, ermp = std::move(erm), ranges = std::move(ranges), &gossiper, is_replacing]() mutable {
|
||||
if (_nr_tx_added) {
|
||||
throw std::runtime_error("Mixed sending and receiving is not supported");
|
||||
}
|
||||
_nr_rx_added++;
|
||||
auto erm = ermp->maybe_as_vnode_effective_replication_map();
|
||||
SCYLLA_ASSERT(erm != nullptr);
|
||||
auto ranges_for_keyspace = !is_replacing && use_strict_sources_for_ranges(keyspace_name, *erm)
|
||||
? get_all_ranges_with_strict_sources_for(keyspace_name, erm, std::move(ranges), gossiper)
|
||||
: get_all_ranges_with_sources_for(keyspace_name, erm, std::move(ranges));
|
||||
future<> range_streamer::add_ranges(const sstring& keyspace_name, locator::static_effective_replication_map_ptr erm, dht::token_range_vector ranges, gms::gossiper& gossiper, bool is_replacing) {
|
||||
return seastar::async([this, keyspace_name, ermp = std::move(erm), ranges= std::move(ranges), &gossiper, is_replacing] () mutable {
|
||||
if (_nr_tx_added) {
|
||||
throw std::runtime_error("Mixed sending and receiving is not supported");
|
||||
}
|
||||
_nr_rx_added++;
|
||||
auto erm = ermp->maybe_as_vnode_effective_replication_map();
|
||||
SCYLLA_ASSERT(erm != nullptr);
|
||||
auto ranges_for_keyspace = !is_replacing && use_strict_sources_for_ranges(keyspace_name, *erm)
|
||||
? get_all_ranges_with_strict_sources_for(keyspace_name, erm, std::move(ranges), gossiper)
|
||||
: get_all_ranges_with_sources_for(keyspace_name, erm, std::move(ranges));
|
||||
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
for (auto& x : ranges_for_keyspace) {
|
||||
logger.debug("{} : keyspace {} range {} exists on {}", _description, keyspace_name, x.first, x.second);
|
||||
}
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
for (auto& x : ranges_for_keyspace) {
|
||||
logger.debug("{} : keyspace {} range {} exists on {}", _description, keyspace_name, x.first, x.second);
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_map<locator::host_id, dht::token_range_vector> range_fetch_map =
|
||||
get_range_fetch_map(ranges_for_keyspace, _source_filters, keyspace_name);
|
||||
utils::clear_gently(ranges_for_keyspace).get();
|
||||
std::unordered_map<locator::host_id, dht::token_range_vector> range_fetch_map = get_range_fetch_map(ranges_for_keyspace, _source_filters, keyspace_name);
|
||||
utils::clear_gently(ranges_for_keyspace).get();
|
||||
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
for (auto& x : range_fetch_map) {
|
||||
logger.debug("{} : keyspace={}, ranges={} from source={}, range_size={}", _description, keyspace_name, x.second, x.first, x.second.size());
|
||||
}
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
for (auto& x : range_fetch_map) {
|
||||
logger.debug("{} : keyspace={}, ranges={} from source={}, range_size={}", _description, keyspace_name, x.second, x.first, x.second.size());
|
||||
}
|
||||
_to_stream.emplace(keyspace_name, std::move(range_fetch_map));
|
||||
});
|
||||
}
|
||||
_to_stream.emplace(keyspace_name, std::move(range_fetch_map));
|
||||
});
|
||||
}
|
||||
|
||||
future<> range_streamer::stream_async() {
|
||||
@@ -250,73 +250,73 @@ future<> range_streamer::stream_async() {
|
||||
_token_metadata_ptr = nullptr;
|
||||
logger.info("{} starts, nr_ranges_remaining={}", _description, _nr_ranges_remaining);
|
||||
auto start = lowres_clock::now();
|
||||
return do_for_each(_to_stream, [this, description = _description](auto& stream) {
|
||||
return do_for_each(_to_stream, [this, description = _description] (auto& stream) {
|
||||
const auto& keyspace = stream.first;
|
||||
auto& ip_range_vec = stream.second;
|
||||
auto ips = ip_range_vec | std::views::keys | std::ranges::to<std::list>();
|
||||
// Fetch from or send to peer node in parallel
|
||||
logger.info("{} with {} for keyspace={} started, nodes_to_stream={}", description, ips, keyspace, ip_range_vec.size());
|
||||
return parallel_for_each(ip_range_vec, [this, description, keyspace](auto& ip_range) {
|
||||
auto& source = ip_range.first;
|
||||
auto& range_vec = ip_range.second;
|
||||
return seastar::with_semaphore(_limiter, 1, [this, description, keyspace, source, &range_vec]() mutable {
|
||||
return seastar::async([this, description, keyspace, source, &range_vec]() mutable {
|
||||
// TODO: It is better to use fiber instead of thread here because
|
||||
// creating a thread per peer can be some memory in a large cluster.
|
||||
auto start_time = lowres_clock::now();
|
||||
unsigned sp_index = 0;
|
||||
unsigned nr_ranges_streamed = 0;
|
||||
size_t nr_ranges_total = range_vec.size();
|
||||
auto do_streaming = [&](dht::token_range_vector&& ranges_to_stream) {
|
||||
auto sp = stream_plan(_stream_manager.local(), format("{}-{}-index-{:d}", description, keyspace, sp_index++), _reason, _topo_guard);
|
||||
auto abort_listener = _abort_source.subscribe([&]() noexcept {
|
||||
sp.abort();
|
||||
});
|
||||
_abort_source.check();
|
||||
logger.info("{} with {} for keyspace={}, streaming [{}, {}) out of {} ranges", description, source, keyspace, nr_ranges_streamed,
|
||||
nr_ranges_streamed + ranges_to_stream.size(), nr_ranges_total);
|
||||
auto ranges_streamed = ranges_to_stream.size();
|
||||
if (_nr_rx_added) {
|
||||
sp.request_ranges(source, keyspace, std::move(ranges_to_stream), _tables);
|
||||
} else if (_nr_tx_added) {
|
||||
sp.transfer_ranges(source, keyspace, std::move(ranges_to_stream), _tables);
|
||||
}
|
||||
sp.execute().discard_result().get();
|
||||
// Update finished percentage
|
||||
nr_ranges_streamed += ranges_streamed;
|
||||
_nr_ranges_remaining -= ranges_streamed;
|
||||
float percentage = _nr_total_ranges == 0 ? 1 : (_nr_total_ranges - _nr_ranges_remaining) / (float)_nr_total_ranges;
|
||||
_stream_manager.local().update_finished_percentage(_reason, percentage);
|
||||
logger.info("Finished {} out of {} ranges for {}, finished percentage={}", _nr_total_ranges - _nr_ranges_remaining, _nr_total_ranges,
|
||||
_reason, percentage);
|
||||
};
|
||||
dht::token_range_vector ranges_to_stream;
|
||||
try {
|
||||
for (auto it = range_vec.begin(); it < range_vec.end();) {
|
||||
ranges_to_stream.push_back(*it);
|
||||
++it;
|
||||
auto fraction = _db.local().get_config().stream_plan_ranges_fraction();
|
||||
size_t nr_ranges_per_stream_plan = nr_ranges_total * fraction;
|
||||
if (ranges_to_stream.size() < nr_ranges_per_stream_plan) {
|
||||
continue;
|
||||
} else {
|
||||
do_streaming(std::exchange(ranges_to_stream, {}));
|
||||
it = range_vec.erase(range_vec.begin(), it);
|
||||
}
|
||||
}
|
||||
if (ranges_to_stream.size() > 0) {
|
||||
do_streaming(std::exchange(ranges_to_stream, {}));
|
||||
range_vec.clear();
|
||||
}
|
||||
} catch (...) {
|
||||
auto t = std::chrono::duration_cast<std::chrono::duration<float>>(lowres_clock::now() - start_time).count();
|
||||
logger.warn("{} with {} for keyspace={} failed, took {} seconds: {}", description, source, keyspace, t, std::current_exception());
|
||||
throw;
|
||||
return parallel_for_each(ip_range_vec, [this, description, keyspace] (auto& ip_range) {
|
||||
auto& source = ip_range.first;
|
||||
auto& range_vec = ip_range.second;
|
||||
return seastar::with_semaphore(_limiter, 1, [this, description, keyspace, source, &range_vec] () mutable {
|
||||
return seastar::async([this, description, keyspace, source, &range_vec] () mutable {
|
||||
// TODO: It is better to use fiber instead of thread here because
|
||||
// creating a thread per peer can be some memory in a large cluster.
|
||||
auto start_time = lowres_clock::now();
|
||||
unsigned sp_index = 0;
|
||||
unsigned nr_ranges_streamed = 0;
|
||||
size_t nr_ranges_total = range_vec.size();
|
||||
auto do_streaming = [&] (dht::token_range_vector&& ranges_to_stream) {
|
||||
auto sp = stream_plan(_stream_manager.local(), format("{}-{}-index-{:d}", description, keyspace, sp_index++),
|
||||
_reason, _topo_guard);
|
||||
auto abort_listener = _abort_source.subscribe([&] () noexcept { sp.abort(); });
|
||||
_abort_source.check();
|
||||
logger.info("{} with {} for keyspace={}, streaming [{}, {}) out of {} ranges",
|
||||
description, source, keyspace,
|
||||
nr_ranges_streamed, nr_ranges_streamed + ranges_to_stream.size(), nr_ranges_total);
|
||||
auto ranges_streamed = ranges_to_stream.size();
|
||||
if (_nr_rx_added) {
|
||||
sp.request_ranges(source, keyspace, std::move(ranges_to_stream), _tables);
|
||||
} else if (_nr_tx_added) {
|
||||
sp.transfer_ranges(source, keyspace, std::move(ranges_to_stream), _tables);
|
||||
}
|
||||
sp.execute().discard_result().get();
|
||||
// Update finished percentage
|
||||
nr_ranges_streamed += ranges_streamed;
|
||||
_nr_ranges_remaining -= ranges_streamed;
|
||||
float percentage = _nr_total_ranges == 0 ? 1 : (_nr_total_ranges - _nr_ranges_remaining) / (float)_nr_total_ranges;
|
||||
_stream_manager.local().update_finished_percentage(_reason, percentage);
|
||||
logger.info("Finished {} out of {} ranges for {}, finished percentage={}",
|
||||
_nr_total_ranges - _nr_ranges_remaining, _nr_total_ranges, _reason, percentage);
|
||||
};
|
||||
dht::token_range_vector ranges_to_stream;
|
||||
try {
|
||||
for (auto it = range_vec.begin(); it < range_vec.end();) {
|
||||
ranges_to_stream.push_back(*it);
|
||||
++it;
|
||||
auto fraction = _db.local().get_config().stream_plan_ranges_fraction();
|
||||
size_t nr_ranges_per_stream_plan = nr_ranges_total * fraction;
|
||||
if (ranges_to_stream.size() < nr_ranges_per_stream_plan) {
|
||||
continue;
|
||||
} else {
|
||||
do_streaming(std::exchange(ranges_to_stream, {}));
|
||||
it = range_vec.erase(range_vec.begin(), it);
|
||||
}
|
||||
}
|
||||
if (ranges_to_stream.size() > 0) {
|
||||
do_streaming(std::exchange(ranges_to_stream, {}));
|
||||
range_vec.clear();
|
||||
}
|
||||
} catch (...) {
|
||||
auto t = std::chrono::duration_cast<std::chrono::duration<float>>(lowres_clock::now() - start_time).count();
|
||||
logger.info("{} with {} for keyspace={} succeeded, took {} seconds", description, source, keyspace, t);
|
||||
});
|
||||
});
|
||||
logger.warn("{} with {} for keyspace={} failed, took {} seconds: {}", description, source, keyspace, t, std::current_exception());
|
||||
throw;
|
||||
}
|
||||
auto t = std::chrono::duration_cast<std::chrono::duration<float>>(lowres_clock::now() - start_time).count();
|
||||
logger.info("{} with {} for keyspace={} succeeded, took {} seconds", description, source, keyspace, t);
|
||||
});
|
||||
});
|
||||
});
|
||||
}).finally([this, start] {
|
||||
auto t = std::chrono::duration_cast<std::chrono::seconds>(lowres_clock::now() - start).count();
|
||||
@@ -344,4 +344,4 @@ size_t range_streamer::nr_ranges_to_stream() {
|
||||
return nr_ranges_remaining;
|
||||
}
|
||||
|
||||
} // namespace dht
|
||||
} // dht
|
||||
|
||||
591
gms/gossiper.cc
591
gms/gossiper.cc
File diff suppressed because it is too large
Load Diff
@@ -42,7 +42,14 @@ void everywhere_replication_strategy::validate_options(const gms::feature_servic
|
||||
|
||||
sstring everywhere_replication_strategy::sanity_check_read_replicas(const effective_replication_map& erm, const host_id_vector_replica_set& read_replicas) const {
|
||||
const auto replication_factor = erm.get_replication_factor();
|
||||
if (read_replicas.size() > replication_factor) {
|
||||
if (const auto& topo_info = erm.get_token_metadata().get_topology_change_info(); topo_info && topo_info->read_new) {
|
||||
if (read_replicas.size() > replication_factor + 1) {
|
||||
return seastar::format(
|
||||
"everywhere_replication_strategy: the number of replicas for everywhere_replication_strategy is {}, "
|
||||
"cannot be higher than replication factor {} + 1 during the 'read from new replicas' stage of a topology change",
|
||||
read_replicas.size(), replication_factor);
|
||||
}
|
||||
} else if (read_replicas.size() > replication_factor) {
|
||||
return seastar::format("everywhere_replication_strategy: the number of replicas for everywhere_replication_strategy is {}, cannot be higher than replication factor {}", read_replicas.size(), replication_factor);
|
||||
}
|
||||
return {};
|
||||
|
||||
@@ -33,14 +33,15 @@ size_t hash<locator::endpoint_dc_rack>::operator()(const locator::endpoint_dc_ra
|
||||
return utils::tuple_hash()(std::tie(v.dc, v.rack));
|
||||
}
|
||||
|
||||
} // namespace std
|
||||
}
|
||||
|
||||
namespace locator {
|
||||
|
||||
static logging::logger logger("network_topology_strategy");
|
||||
|
||||
network_topology_strategy::network_topology_strategy(replication_strategy_params params, const topology* topo)
|
||||
: abstract_replication_strategy(params, replication_strategy_type::network_topology) {
|
||||
network_topology_strategy::network_topology_strategy(replication_strategy_params params, const topology* topo) :
|
||||
abstract_replication_strategy(params,
|
||||
replication_strategy_type::network_topology) {
|
||||
auto opts = _config_options;
|
||||
|
||||
logger.debug("options={}", opts);
|
||||
@@ -64,7 +65,8 @@ network_topology_strategy::network_topology_strategy(replication_strategy_params
|
||||
if (boost::equals(key, "replication_factor")) {
|
||||
on_internal_error(rslogger, "replication_factor should have been replaced with a DC:RF mapping by now");
|
||||
} else {
|
||||
throw exceptions::configuration_exception(format("'{}' is not a valid option, did you mean (lowercase) 'replication_factor'?", key));
|
||||
throw exceptions::configuration_exception(format(
|
||||
"'{}' is not a valid option, did you mean (lowercase) 'replication_factor'?", key));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -107,8 +109,8 @@ class natural_endpoints_tracker {
|
||||
, _rf_left(std::min(rf, node_count))
|
||||
// If there aren't enough racks in this DC to fill the RF, we'll still use at least one node from each rack,
|
||||
// and the difference is to be filled by the first encountered nodes.
|
||||
, _acceptable_rack_repeats(rf - rack_count) {
|
||||
}
|
||||
, _acceptable_rack_repeats(rf - rack_count)
|
||||
{}
|
||||
|
||||
/**
|
||||
* Attempts to add an endpoint to the replicas for this datacenter, adding to the endpoints set if successful.
|
||||
@@ -199,7 +201,8 @@ public:
|
||||
, _tp(_tm.get_topology())
|
||||
, _dc_rep_factor(dc_rep_factor)
|
||||
, _token_owners(_tm.get_datacenter_token_owners())
|
||||
, _racks(_tm.get_datacenter_racks_token_owners()) {
|
||||
, _racks(_tm.get_datacenter_racks_token_owners())
|
||||
{
|
||||
// not aware of any cluster members
|
||||
SCYLLA_ASSERT(!_token_owners.empty() && !_racks.empty());
|
||||
|
||||
@@ -248,14 +251,16 @@ public:
|
||||
for (const auto& [dc, rf_data] : dc_rf) {
|
||||
auto rf = rf_data.count();
|
||||
if (rf > endpoints_in(dc)) {
|
||||
throw exceptions::configuration_exception(
|
||||
seastar::format("Datacenter {} doesn't have enough token-owning nodes for replication_factor={}", dc, rf));
|
||||
throw exceptions::configuration_exception(seastar::format(
|
||||
"Datacenter {} doesn't have enough token-owning nodes for replication_factor={}", dc, rf));
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
future<host_id_set> network_topology_strategy::calculate_natural_endpoints(const token& search_token, const token_metadata& tm) const {
|
||||
future<host_id_set>
|
||||
network_topology_strategy::calculate_natural_endpoints(
|
||||
const token& search_token, const token_metadata& tm) const {
|
||||
|
||||
natural_endpoints_tracker tracker(tm, _dc_rep_factor);
|
||||
|
||||
@@ -280,14 +285,12 @@ void network_topology_strategy::validate_options(const gms::feature_service& fs,
|
||||
for (auto& c : _config_options) {
|
||||
if (c.first == sstring("replication_factor")) {
|
||||
on_internal_error(rslogger, fmt::format("'replication_factor' tag should be unrolled into a list of DC:RF by now."
|
||||
"_config_options:{}",
|
||||
_config_options));
|
||||
"_config_options:{}", _config_options));
|
||||
}
|
||||
auto dc = dcs.find(c.first);
|
||||
if (dc == dcs.end()) {
|
||||
throw exceptions::configuration_exception(format("Unrecognized strategy option {{{}}} "
|
||||
"passed to NetworkTopologyStrategy",
|
||||
this->to_qualified_class_name(c.first)));
|
||||
"passed to NetworkTopologyStrategy", this->to_qualified_class_name(c.first)));
|
||||
}
|
||||
auto racks = dc->second | std::views::keys | std::ranges::to<std::unordered_set<sstring>>();
|
||||
auto rf = parse_replication_factor(c.second);
|
||||
@@ -308,8 +311,8 @@ future<tablet_map> network_topology_strategy::allocate_tablets_for_new_table(sch
|
||||
rslogger.info("Rounding up tablet count from {} to {} for table {}.{}", tablet_count, aligned_tablet_count, s->ks_name(), s->cf_name());
|
||||
tablet_count = aligned_tablet_count;
|
||||
}
|
||||
co_return co_await reallocate_tablets(
|
||||
std::move(s), std::move(tm), tablet_map(tablet_count, get_consistency() != data_dictionary::consistency_config_option::eventual));
|
||||
co_return co_await reallocate_tablets(std::move(s), std::move(tm),
|
||||
tablet_map(tablet_count, get_consistency() != data_dictionary::consistency_config_option::eventual));
|
||||
}
|
||||
|
||||
future<tablet_map> network_topology_strategy::reallocate_tablets(schema_ptr s, token_metadata_ptr tm, tablet_map tablets) const {
|
||||
@@ -318,15 +321,16 @@ future<tablet_map> network_topology_strategy::reallocate_tablets(schema_ptr s, t
|
||||
co_await load.populate_with_normalized_load();
|
||||
co_await load.populate(std::nullopt, s->id());
|
||||
|
||||
tablet_logger.debug(
|
||||
"Allocating tablets for {}.{} ({}): dc_rep_factor={} tablet_count={}", s->ks_name(), s->cf_name(), s->id(), _dc_rep_factor, tablets.tablet_count());
|
||||
tablet_logger.debug("Allocating tablets for {}.{} ({}): dc_rep_factor={} tablet_count={}", s->ks_name(), s->cf_name(), s->id(), _dc_rep_factor, tablets.tablet_count());
|
||||
|
||||
for (tablet_id tb : tablets.tablet_ids()) {
|
||||
auto tinfo = tablets.get_tablet_info(tb);
|
||||
tinfo.replicas = co_await reallocate_tablets(s, tm, load, tablets, tb);
|
||||
if (tablets.has_raft_info()) {
|
||||
if (!tablets.get_tablet_raft_info(tb).group_id) {
|
||||
tablets.set_tablet_raft_info(tb, tablet_raft_info{.group_id = raft::group_id{utils::UUID_gen::get_time_UUID()}});
|
||||
tablets.set_tablet_raft_info(tb, tablet_raft_info {
|
||||
.group_id = raft::group_id{utils::UUID_gen::get_time_UUID()}
|
||||
});
|
||||
}
|
||||
}
|
||||
tablets.set_tablet(tb, std::move(tinfo));
|
||||
@@ -336,8 +340,7 @@ future<tablet_map> network_topology_strategy::reallocate_tablets(schema_ptr s, t
|
||||
co_return tablets;
|
||||
}
|
||||
|
||||
future<tablet_replica_set> network_topology_strategy::reallocate_tablets(
|
||||
schema_ptr s, token_metadata_ptr tm, load_sketch& load, const tablet_map& cur_tablets, tablet_id tb) const {
|
||||
future<tablet_replica_set> network_topology_strategy::reallocate_tablets(schema_ptr s, token_metadata_ptr tm, load_sketch& load, const tablet_map& cur_tablets, tablet_id tb) const {
|
||||
tablet_replica_set replicas;
|
||||
// Current number of replicas per dc
|
||||
std::unordered_map<sstring, size_t> nodes_per_dc;
|
||||
@@ -361,8 +364,8 @@ future<tablet_replica_set> network_topology_strategy::reallocate_tablets(
|
||||
if (new_rf && new_rf->is_rack_based()) {
|
||||
auto diff = diff_racks(old_racks_per_dc[dc], new_rf->get_rack_list());
|
||||
|
||||
tablet_logger.debug("reallocate_tablets {}.{} tablet_id={} dc={} old_racks={} add_racks={} del_racks={}", s->ks_name(), s->cf_name(), tb, dc,
|
||||
old_racks_per_dc[dc], diff.added, diff.removed);
|
||||
tablet_logger.debug("reallocate_tablets {}.{} tablet_id={} dc={} old_racks={} add_racks={} del_racks={}",
|
||||
s->ks_name(), s->cf_name(), tb, dc, old_racks_per_dc[dc], diff.added, diff.removed);
|
||||
|
||||
if (!diff) {
|
||||
continue;
|
||||
@@ -392,18 +395,23 @@ future<tablet_replica_set> network_topology_strategy::reallocate_tablets(
|
||||
co_return replicas;
|
||||
}
|
||||
|
||||
tablet_replica_set network_topology_strategy::drop_tablets_in_racks(schema_ptr s, token_metadata_ptr tm, load_sketch& load, tablet_id tb,
|
||||
const tablet_replica_set& cur_replicas, const sstring& dc, const rack_list& racks_to_drop) const {
|
||||
tablet_replica_set network_topology_strategy::drop_tablets_in_racks(schema_ptr s,
|
||||
token_metadata_ptr tm,
|
||||
load_sketch& load,
|
||||
tablet_id tb,
|
||||
const tablet_replica_set& cur_replicas,
|
||||
const sstring& dc,
|
||||
const rack_list& racks_to_drop) const {
|
||||
auto& topo = tm->get_topology();
|
||||
tablet_replica_set filtered;
|
||||
auto is_rack_to_drop = [&racks_to_drop](const sstring& rack) {
|
||||
auto is_rack_to_drop = [&racks_to_drop] (const sstring& rack) {
|
||||
return std::ranges::contains(racks_to_drop, rack);
|
||||
};
|
||||
for (const auto& tr : cur_replicas) {
|
||||
auto& node = topo.get_node(tr.host);
|
||||
if (node.dc_rack().dc == dc && is_rack_to_drop(node.dc_rack().rack)) {
|
||||
tablet_logger.debug("drop_tablets_in_rack {}.{} tablet_id={} dc={} rack={} removing replica: {}", s->ks_name(), s->cf_name(), tb, node.dc_rack().dc,
|
||||
node.dc_rack().rack, tr);
|
||||
tablet_logger.debug("drop_tablets_in_rack {}.{} tablet_id={} dc={} rack={} removing replica: {}",
|
||||
s->ks_name(), s->cf_name(), tb, node.dc_rack().dc, node.dc_rack().rack, tr);
|
||||
load.unload(tr.host, tr.shard, 1, service::default_target_tablet_size);
|
||||
} else {
|
||||
filtered.emplace_back(tr);
|
||||
@@ -412,17 +420,22 @@ tablet_replica_set network_topology_strategy::drop_tablets_in_racks(schema_ptr s
|
||||
return filtered;
|
||||
}
|
||||
|
||||
tablet_replica_set network_topology_strategy::add_tablets_in_racks(schema_ptr s, token_metadata_ptr tm, load_sketch& load, tablet_id tb,
|
||||
const tablet_replica_set& cur_replicas, const sstring& dc, const rack_list& racks_to_add) const {
|
||||
tablet_replica_set network_topology_strategy::add_tablets_in_racks(schema_ptr s,
|
||||
token_metadata_ptr tm,
|
||||
load_sketch& load,
|
||||
tablet_id tb,
|
||||
const tablet_replica_set& cur_replicas,
|
||||
const sstring& dc,
|
||||
const rack_list& racks_to_add) const {
|
||||
auto nodes = tm->get_datacenter_racks_token_owners_nodes();
|
||||
auto& dc_nodes = nodes.at(dc);
|
||||
auto new_replicas = cur_replicas;
|
||||
|
||||
for (auto&& rack : racks_to_add) {
|
||||
for (auto&& rack: racks_to_add) {
|
||||
host_id min_node;
|
||||
double min_load = std::numeric_limits<double>::max();
|
||||
|
||||
for (auto&& node : dc_nodes.at(rack)) {
|
||||
for (auto&& node: dc_nodes.at(rack)) {
|
||||
if (!node.get().is_normal()) {
|
||||
continue;
|
||||
}
|
||||
@@ -437,26 +450,29 @@ tablet_replica_set network_topology_strategy::add_tablets_in_racks(schema_ptr s,
|
||||
}
|
||||
|
||||
if (!min_node) {
|
||||
throw std::runtime_error(fmt::format("No candidate node in rack {}.{} to allocate tablet replica", dc, rack));
|
||||
throw std::runtime_error(
|
||||
fmt::format("No candidate node in rack {}.{} to allocate tablet replica", dc, rack));
|
||||
}
|
||||
|
||||
auto new_replica = tablet_replica{min_node, load.next_shard(min_node, 1, service::default_target_tablet_size)};
|
||||
new_replicas.push_back(new_replica);
|
||||
|
||||
tablet_logger.trace("add_tablet_in_rack {}.{} tablet_id={} dc={} rack={} load={} new_replica={}", s->ks_name(), s->cf_name(), tb.id, dc, rack, min_load,
|
||||
new_replica);
|
||||
tablet_logger.trace("add_tablet_in_rack {}.{} tablet_id={} dc={} rack={} load={} new_replica={}",
|
||||
s->ks_name(), s->cf_name(), tb.id, dc, rack, min_load, new_replica);
|
||||
}
|
||||
return new_replicas;
|
||||
}
|
||||
|
||||
future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_ptr s, token_metadata_ptr tm, load_sketch& load, tablet_id tb,
|
||||
std::map<sstring, std::unordered_set<locator::host_id>>& replicas_per_rack, const tablet_replica_set& cur_replicas, sstring dc, size_t dc_node_count,
|
||||
size_t dc_rf) const {
|
||||
std::map<sstring, std::unordered_set<locator::host_id>>& replicas_per_rack,
|
||||
const tablet_replica_set& cur_replicas,
|
||||
sstring dc, size_t dc_node_count, size_t dc_rf) const {
|
||||
static thread_local std::default_random_engine rnd_engine{std::random_device{}()};
|
||||
|
||||
auto replicas = cur_replicas;
|
||||
// all_dc_racks is ordered lexicographically on purpose
|
||||
auto all_dc_racks = tm->get_datacenter_racks_token_owners_nodes().at(dc) | std::ranges::to<std::map>();
|
||||
auto all_dc_racks = tm->get_datacenter_racks_token_owners_nodes().at(dc)
|
||||
| std::ranges::to<std::map>();
|
||||
|
||||
// Track all nodes with no replicas on them for this tablet, per rack.
|
||||
struct node_load {
|
||||
@@ -465,7 +481,7 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
||||
};
|
||||
// for sorting in descending load order
|
||||
// (in terms of load)
|
||||
auto node_load_cmp = [](const node_load& a, const node_load& b) {
|
||||
auto node_load_cmp = [] (const node_load& a, const node_load& b) {
|
||||
return a.load > b.load;
|
||||
};
|
||||
|
||||
@@ -517,7 +533,7 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
||||
|
||||
// ensure fairness across racks (in particular if rf < number_of_racks)
|
||||
// by rotating the racks order
|
||||
auto append_candidate_racks = [&](candidates_list& racks) {
|
||||
auto append_candidate_racks = [&] (candidates_list& racks) {
|
||||
if (auto size = racks.size()) {
|
||||
auto it = racks.begin() + tb.id % size;
|
||||
std::move(it, racks.end(), std::back_inserter(candidate_racks));
|
||||
@@ -529,19 +545,20 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
||||
append_candidate_racks(existing_racks);
|
||||
|
||||
if (candidate_racks.empty()) {
|
||||
on_internal_error(tablet_logger, seastar::format("allocate_replica {}.{}: no candidate racks found for dc={} allocated={} rf={}: existing={}",
|
||||
s->ks_name(), s->cf_name(), dc, dc_node_count, dc_rf, replicas_per_rack));
|
||||
on_internal_error(tablet_logger,
|
||||
seastar::format("allocate_replica {}.{}: no candidate racks found for dc={} allocated={} rf={}: existing={}",
|
||||
s->ks_name(), s->cf_name(), dc, dc_node_count, dc_rf, replicas_per_rack));
|
||||
}
|
||||
|
||||
auto candidate_rack = candidate_racks.begin();
|
||||
|
||||
auto allocate_replica = [&](candidates_list::iterator& candidate) {
|
||||
auto allocate_replica = [&] (candidates_list::iterator& candidate) {
|
||||
const auto& rack = candidate->rack;
|
||||
auto& nodes = candidate->nodes;
|
||||
if (nodes.empty()) {
|
||||
on_internal_error(tablet_logger, seastar::format("allocate_replica {}.{} tablet_id={}: candidates vector for rack={} is empty for allocating "
|
||||
"tablet replicas in dc={} allocated={} rf={}",
|
||||
s->ks_name(), s->cf_name(), tb.id, rack, dc, dc_node_count, dc_rf));
|
||||
on_internal_error(tablet_logger,
|
||||
seastar::format("allocate_replica {}.{} tablet_id={}: candidates vector for rack={} is empty for allocating tablet replicas in dc={} allocated={} rf={}",
|
||||
s->ks_name(), s->cf_name(), tb.id, rack, dc, dc_node_count, dc_rf));
|
||||
}
|
||||
auto host_id = nodes.back().host;
|
||||
auto replica = tablet_replica{host_id, load.next_shard(host_id, 1, service::default_target_tablet_size)};
|
||||
@@ -549,13 +566,13 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
||||
auto inserted = replicas_per_rack[node.dc_rack().rack].insert(host_id).second;
|
||||
// Sanity check that a node is not used more than once
|
||||
if (!inserted) {
|
||||
on_internal_error(tablet_logger, seastar::format("allocate_replica {}.{} tablet_id={}: allocated replica={} node already used when allocating "
|
||||
"tablet replicas in dc={} allocated={} rf={}: replicas={}",
|
||||
s->ks_name(), s->cf_name(), tb.id, replica, dc, dc_node_count, dc_rf, replicas));
|
||||
on_internal_error(tablet_logger,
|
||||
seastar::format("allocate_replica {}.{} tablet_id={}: allocated replica={} node already used when allocating tablet replicas in dc={} allocated={} rf={}: replicas={}",
|
||||
s->ks_name(), s->cf_name(), tb.id, replica, dc, dc_node_count, dc_rf, replicas));
|
||||
}
|
||||
nodes.pop_back();
|
||||
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: allocated tablet replica={} dc={} rack={}: nodes remaining in rack={}", s->ks_name(),
|
||||
s->cf_name(), tb.id, replica, node.dc_rack().dc, node.dc_rack().rack, nodes.size());
|
||||
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: allocated tablet replica={} dc={} rack={}: nodes remaining in rack={}",
|
||||
s->ks_name(), s->cf_name(), tb.id, replica, node.dc_rack().dc, node.dc_rack().rack, nodes.size());
|
||||
if (nodes.empty()) {
|
||||
candidate = candidate_racks.erase(candidate);
|
||||
} else {
|
||||
@@ -566,8 +583,7 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
||||
}
|
||||
if (tablet_logger.is_enabled(log_level::trace)) {
|
||||
if (candidate != candidate_racks.end()) {
|
||||
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: next rack={} nodes={}", s->ks_name(), s->cf_name(), tb.id, candidate->rack,
|
||||
candidate->nodes.size());
|
||||
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: next rack={} nodes={}", s->ks_name(), s->cf_name(), tb.id, candidate->rack, candidate->nodes.size());
|
||||
} else {
|
||||
tablet_logger.trace("allocate_replica {}.{} tablet_id={}: no candidate racks left", s->ks_name(), s->cf_name(), tb.id);
|
||||
}
|
||||
@@ -575,15 +591,15 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
||||
return replica;
|
||||
};
|
||||
|
||||
tablet_logger.debug("allocate_replica {}.{} tablet_id={}: allocating tablet replicas in dc={} allocated={} rf={}", s->ks_name(), s->cf_name(), tb.id, dc,
|
||||
dc_node_count, dc_rf);
|
||||
tablet_logger.debug("allocate_replica {}.{} tablet_id={}: allocating tablet replicas in dc={} allocated={} rf={}",
|
||||
s->ks_name(), s->cf_name(), tb.id, dc, dc_node_count, dc_rf);
|
||||
|
||||
for (size_t remaining = dc_rf - dc_node_count; remaining; --remaining) {
|
||||
co_await coroutine::maybe_yield();
|
||||
if (candidate_rack == candidate_racks.end()) {
|
||||
on_internal_error(tablet_logger, format("allocate_replica {}.{} tablet_id={}: ran out of candidates for allocating tablet replicas in dc={} "
|
||||
"allocated={} rf={}: remaining={}",
|
||||
s->ks_name(), s->cf_name(), tb.id, dc, dc_node_count, dc_rf, remaining));
|
||||
on_internal_error(tablet_logger,
|
||||
format("allocate_replica {}.{} tablet_id={}: ran out of candidates for allocating tablet replicas in dc={} allocated={} rf={}: remaining={}",
|
||||
s->ks_name(), s->cf_name(), tb.id, dc, dc_node_count, dc_rf, remaining));
|
||||
}
|
||||
replicas.emplace_back(allocate_replica(candidate_rack));
|
||||
}
|
||||
@@ -592,9 +608,9 @@ future<tablet_replica_set> network_topology_strategy::add_tablets_in_dc(schema_p
|
||||
}
|
||||
|
||||
tablet_replica_set network_topology_strategy::drop_tablets_in_dc(schema_ptr s, const locator::topology& topo, load_sketch& load, tablet_id tb,
|
||||
const tablet_replica_set& cur_replicas, sstring dc, size_t dc_node_count, size_t dc_rf) const {
|
||||
tablet_logger.debug("drop_tablets_in_dc {}.{} tablet_id={}: deallocating tablet replicas in dc={} allocated={} rf={}", s->ks_name(), s->cf_name(), tb.id,
|
||||
dc, dc_node_count, dc_rf);
|
||||
const tablet_replica_set& cur_replicas,
|
||||
sstring dc, size_t dc_node_count, size_t dc_rf) const {
|
||||
tablet_logger.debug("drop_tablets_in_dc {}.{} tablet_id={}: deallocating tablet replicas in dc={} allocated={} rf={}", s->ks_name(), s->cf_name(), tb.id, dc, dc_node_count, dc_rf);
|
||||
|
||||
// Leave dc_rf replicas in dc, effectively deallocating in reverse order,
|
||||
// to maintain replica pairing between the base table and its materialized views.
|
||||
@@ -613,7 +629,8 @@ tablet_replica_set network_topology_strategy::drop_tablets_in_dc(schema_ptr s, c
|
||||
return filtered;
|
||||
}
|
||||
|
||||
sstring network_topology_strategy::sanity_check_read_replicas(const effective_replication_map& erm, const host_id_vector_replica_set& read_replicas) const {
|
||||
sstring network_topology_strategy::sanity_check_read_replicas(const effective_replication_map& erm,
|
||||
const host_id_vector_replica_set& read_replicas) const {
|
||||
const auto& topology = erm.get_topology();
|
||||
|
||||
struct rf_node_count {
|
||||
@@ -646,4 +663,4 @@ sstring network_topology_strategy::sanity_check_read_replicas(const effective_re
|
||||
using registry = class_registrator<abstract_replication_strategy, network_topology_strategy, replication_strategy_params, const topology*>;
|
||||
static registry registrator("org.apache.cassandra.locator.NetworkTopologyStrategy");
|
||||
static registry registrator_short_name("NetworkTopologyStrategy");
|
||||
} // namespace locator
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -26,16 +26,12 @@
|
||||
|
||||
struct node_printer {
|
||||
const locator::node* v;
|
||||
node_printer(const locator::node* n) noexcept
|
||||
: v(n) {
|
||||
}
|
||||
node_printer(const locator::node* n) noexcept : v(n) {}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct fmt::formatter<node_printer> {
|
||||
constexpr auto parse(format_parse_context& ctx) {
|
||||
return ctx.begin();
|
||||
}
|
||||
constexpr auto parse(format_parse_context& ctx) { return ctx.begin(); }
|
||||
auto format(const node_printer& np, fmt::format_context& ctx) const {
|
||||
const locator::node* node = np.v;
|
||||
auto out = fmt::format_to(ctx.out(), "node={}", fmt::ptr(node));
|
||||
@@ -47,9 +43,7 @@ struct fmt::formatter<node_printer> {
|
||||
};
|
||||
|
||||
static auto lazy_backtrace() {
|
||||
return seastar::value_of([] {
|
||||
return current_backtrace();
|
||||
});
|
||||
return seastar::value_of([] { return current_backtrace(); });
|
||||
}
|
||||
|
||||
namespace locator {
|
||||
@@ -57,12 +51,11 @@ namespace locator {
|
||||
static logging::logger tlogger("topology");
|
||||
|
||||
thread_local const endpoint_dc_rack endpoint_dc_rack::default_location = {
|
||||
.dc = locator::production_snitch_base::default_dc,
|
||||
.rack = locator::production_snitch_base::default_rack,
|
||||
.dc = locator::production_snitch_base::default_dc,
|
||||
.rack = locator::production_snitch_base::default_rack,
|
||||
};
|
||||
|
||||
node::node(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, bool excluded,
|
||||
this_node is_this_node, node::idx_type idx, bool draining)
|
||||
node::node(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, bool excluded, this_node is_this_node, node::idx_type idx, bool draining)
|
||||
: _topology(topology)
|
||||
, _host_id(id)
|
||||
, _dc_rack(std::move(dc_rack))
|
||||
@@ -71,11 +64,10 @@ node::node(const locator::topology* topology, locator::host_id id, endpoint_dc_r
|
||||
, _excluded(excluded)
|
||||
, _draining(draining)
|
||||
, _is_this_node(is_this_node)
|
||||
, _idx(idx) {
|
||||
}
|
||||
, _idx(idx)
|
||||
{}
|
||||
|
||||
node_holder node::make(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, bool excluded,
|
||||
node::this_node is_this_node, node::idx_type idx, bool draining) {
|
||||
node_holder node::make(const locator::topology* topology, locator::host_id id, endpoint_dc_rack dc_rack, state state, shard_id shard_count, bool excluded, node::this_node is_this_node, node::idx_type idx, bool draining) {
|
||||
return std::make_unique<node>(topology, std::move(id), std::move(dc_rack), std::move(state), shard_count, excluded, is_this_node, idx, draining);
|
||||
}
|
||||
|
||||
@@ -85,22 +77,14 @@ node_holder node::clone() const {
|
||||
|
||||
std::string node::to_string(node::state s) {
|
||||
switch (s) {
|
||||
case state::none:
|
||||
return "none";
|
||||
case state::bootstrapping:
|
||||
return "bootstrapping";
|
||||
case state::replacing:
|
||||
return "replacing";
|
||||
case state::normal:
|
||||
return "normal";
|
||||
case state::being_decommissioned:
|
||||
return "being_decommissioned";
|
||||
case state::being_removed:
|
||||
return "being_removed";
|
||||
case state::being_replaced:
|
||||
return "being_replaced";
|
||||
case state::left:
|
||||
return "left";
|
||||
case state::none: return "none";
|
||||
case state::bootstrapping: return "bootstrapping";
|
||||
case state::replacing: return "replacing";
|
||||
case state::normal: return "normal";
|
||||
case state::being_decommissioned: return "being_decommissioned";
|
||||
case state::being_removed: return "being_removed";
|
||||
case state::being_replaced: return "being_replaced";
|
||||
case state::left: return "left";
|
||||
}
|
||||
__builtin_unreachable();
|
||||
}
|
||||
@@ -117,19 +101,21 @@ future<> topology::clear_gently() noexcept {
|
||||
}
|
||||
|
||||
topology::topology(shallow_copy, config cfg)
|
||||
: _shard(this_shard_id())
|
||||
, _cfg(cfg)
|
||||
, _sort_by_proximity(true) {
|
||||
: _shard(this_shard_id())
|
||||
, _cfg(cfg)
|
||||
, _sort_by_proximity(true)
|
||||
{
|
||||
// constructor for shallow copying of token_metadata_impl
|
||||
}
|
||||
|
||||
topology::topology(config cfg)
|
||||
: _shard(this_shard_id())
|
||||
, _cfg(cfg)
|
||||
, _sort_by_proximity(!cfg.disable_proximity_sorting)
|
||||
, _random_engine(std::random_device{}()) {
|
||||
tlogger.trace("topology[{}]: constructing using config: endpoint={} id={} dc={} rack={}", fmt::ptr(this), cfg.this_endpoint, cfg.this_host_id,
|
||||
cfg.local_dc_rack.dc, cfg.local_dc_rack.rack);
|
||||
: _shard(this_shard_id())
|
||||
, _cfg(cfg)
|
||||
, _sort_by_proximity(!cfg.disable_proximity_sorting)
|
||||
, _random_engine(std::random_device{}())
|
||||
{
|
||||
tlogger.trace("topology[{}]: constructing using config: endpoint={} id={} dc={} rack={}", fmt::ptr(this),
|
||||
cfg.this_endpoint, cfg.this_host_id, cfg.local_dc_rack.dc, cfg.local_dc_rack.rack);
|
||||
add_node(cfg.this_host_id, cfg.local_dc_rack, node::state::none);
|
||||
}
|
||||
|
||||
@@ -145,7 +131,8 @@ topology::topology(topology&& o) noexcept
|
||||
, _dc_racks(std::move(o._dc_racks))
|
||||
, _sort_by_proximity(o._sort_by_proximity)
|
||||
, _datacenters(std::move(o._datacenters))
|
||||
, _random_engine(std::move(o._random_engine)) {
|
||||
, _random_engine(std::move(o._random_engine))
|
||||
{
|
||||
SCYLLA_ASSERT(_shard == this_shard_id());
|
||||
tlogger.trace("topology[{}]: move from [{}]", fmt::ptr(this), fmt::ptr(&o));
|
||||
|
||||
@@ -166,18 +153,16 @@ topology& topology::operator=(topology&& o) noexcept {
|
||||
|
||||
void topology::set_host_id_cfg(host_id this_host_id) {
|
||||
if (_cfg.this_host_id) {
|
||||
on_internal_error(tlogger,
|
||||
fmt::format("topology[{}] set_host_id_cfg can be caller only once current id {} new id {}", fmt::ptr(this), _cfg.this_host_id, this_host_id));
|
||||
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg can be caller only once current id {} new id {}", fmt::ptr(this), _cfg.this_host_id, this_host_id));
|
||||
}
|
||||
if (_nodes.size() != 1) {
|
||||
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg called while nodes size is greater than 1", fmt::ptr(this)));
|
||||
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg called while nodes size is greater than 1", fmt::ptr(this)));
|
||||
}
|
||||
if (!_this_node) {
|
||||
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg called while _this_nodes is null", fmt::ptr(this)));
|
||||
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg called while _this_nodes is null", fmt::ptr(this)));
|
||||
}
|
||||
if (_this_node->host_id()) {
|
||||
on_internal_error(
|
||||
tlogger, fmt::format("topology[{}] set_host_id_cfg called while _this_nodes has non null id {}", fmt::ptr(this), _this_node->host_id()));
|
||||
on_internal_error(tlogger, fmt::format("topology[{}] set_host_id_cfg called while _this_nodes has non null id {}", fmt::ptr(this), _this_node->host_id()));
|
||||
}
|
||||
|
||||
remove_node(*_this_node);
|
||||
@@ -218,8 +203,7 @@ const node& topology::add_node(node_holder nptr) {
|
||||
|
||||
if (nptr->topology() != this) {
|
||||
if (nptr->topology()) {
|
||||
on_fatal_internal_error(tlogger,
|
||||
seastar::format("topology[{}]: {} belongs to different topology={}", fmt::ptr(this), node_printer(node), fmt::ptr(node->topology())));
|
||||
on_fatal_internal_error(tlogger, seastar::format("topology[{}]: {} belongs to different topology={}", fmt::ptr(this), node_printer(node), fmt::ptr(node->topology())));
|
||||
}
|
||||
nptr->set_topology(this);
|
||||
}
|
||||
@@ -235,8 +219,7 @@ const node& topology::add_node(node_holder nptr) {
|
||||
try {
|
||||
if (is_configured_this_node(*node)) {
|
||||
if (_this_node) {
|
||||
on_internal_error(tlogger,
|
||||
seastar::format("topology[{}]: {}: local node already mapped to {}", fmt::ptr(this), node_printer(node), node_printer(this_node())));
|
||||
on_internal_error(tlogger, seastar::format("topology[{}]: {}: local node already mapped to {}", fmt::ptr(this), node_printer(node), node_printer(this_node())));
|
||||
}
|
||||
locator::node& n = *_nodes.back();
|
||||
n._is_this_node = node::this_node::yes;
|
||||
@@ -255,25 +238,14 @@ const node& topology::add_node(node_holder nptr) {
|
||||
return *node;
|
||||
}
|
||||
|
||||
void topology::update_node(node& node, std::optional<host_id> opt_id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st,
|
||||
std::optional<shard_id> opt_shard_count) {
|
||||
void topology::update_node(node& node, std::optional<host_id> opt_id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> opt_shard_count) {
|
||||
tlogger.debug("topology[{}]: update_node: {}: to: host_id={} dc={} rack={} state={} shard_count={}, at {}", fmt::ptr(this), node_printer(&node),
|
||||
seastar::value_of([&] {
|
||||
return opt_id ? format("{}", *opt_id) : "unchanged";
|
||||
}),
|
||||
seastar::value_of([&] {
|
||||
return opt_dr ? format("{}", opt_dr->dc) : "unchanged";
|
||||
}),
|
||||
seastar::value_of([&] {
|
||||
return opt_dr ? format("{}", opt_dr->rack) : "unchanged";
|
||||
}),
|
||||
seastar::value_of([&] {
|
||||
return opt_st ? format("{}", *opt_st) : "unchanged";
|
||||
}),
|
||||
seastar::value_of([&] {
|
||||
return opt_shard_count ? format("{}", *opt_shard_count) : "unchanged";
|
||||
}),
|
||||
lazy_backtrace());
|
||||
opt_id ? format("{}", *opt_id) : "unchanged",
|
||||
opt_dr ? format("{}", opt_dr->dc) : "unchanged",
|
||||
opt_dr ? format("{}", opt_dr->rack) : "unchanged",
|
||||
opt_st ? format("{}", *opt_st) : "unchanged",
|
||||
opt_shard_count ? format("{}", *opt_shard_count) : "unchanged",
|
||||
lazy_backtrace());
|
||||
|
||||
bool changed = false;
|
||||
if (opt_id) {
|
||||
@@ -285,8 +257,7 @@ void topology::update_node(node& node, std::optional<host_id> opt_id, std::optio
|
||||
on_internal_error(tlogger, seastar::format("This node host_id is already set: {}: new host_id={}", node_printer(&node), *opt_id));
|
||||
}
|
||||
if (_nodes_by_host_id.contains(*opt_id)) {
|
||||
on_internal_error(tlogger, seastar::format("Cannot update node host_id: {}: new host_id already exists: {}", node_printer(&node),
|
||||
node_printer(find_node(*opt_id))));
|
||||
on_internal_error(tlogger, seastar::format("Cannot update node host_id: {}: new host_id already exists: {}", node_printer(&node), node_printer(find_node(*opt_id))));
|
||||
}
|
||||
changed = true;
|
||||
} else {
|
||||
@@ -471,11 +442,11 @@ const node* topology::find_node(node::idx_type idx) const noexcept {
|
||||
return _nodes.at(idx).get();
|
||||
}
|
||||
|
||||
const node& topology::add_or_update_endpoint(
|
||||
host_id id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> shard_count) {
|
||||
tlogger.trace("topology[{}]: add_or_update_endpoint: host_id={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this), id,
|
||||
opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count,
|
||||
lazy_backtrace());
|
||||
const node& topology::add_or_update_endpoint(host_id id, std::optional<endpoint_dc_rack> opt_dr, std::optional<node::state> opt_st, std::optional<shard_id> shard_count)
|
||||
{
|
||||
tlogger.trace("topology[{}]: add_or_update_endpoint: host_id={} dc={} rack={} state={} shards={}, at {}", fmt::ptr(this),
|
||||
id, opt_dr.value_or(endpoint_dc_rack{}).dc, opt_dr.value_or(endpoint_dc_rack{}).rack, opt_st.value_or(node::state::none), shard_count,
|
||||
lazy_backtrace());
|
||||
|
||||
auto* n = find_node(id);
|
||||
if (n) {
|
||||
@@ -483,10 +454,14 @@ const node& topology::add_or_update_endpoint(
|
||||
return *n;
|
||||
}
|
||||
|
||||
return add_node(id, opt_dr.value_or(endpoint_dc_rack::default_location), opt_st.value_or(node::state::none), shard_count.value_or(0));
|
||||
return add_node(id,
|
||||
opt_dr.value_or(endpoint_dc_rack::default_location),
|
||||
opt_st.value_or(node::state::none),
|
||||
shard_count.value_or(0));
|
||||
}
|
||||
|
||||
bool topology::remove_endpoint(locator::host_id host_id) {
|
||||
bool topology::remove_endpoint(locator::host_id host_id)
|
||||
{
|
||||
auto node = find_node(host_id);
|
||||
tlogger.debug("topology[{}]: remove_endpoint: host_id={}: {}", fmt::ptr(this), host_id, node_printer(node));
|
||||
// Do not allow removing yourself from the topology
|
||||
@@ -527,7 +502,7 @@ void topology::do_sort_by_proximity(locator::host_id address, host_id_vector_rep
|
||||
locator::host_id id;
|
||||
int distance;
|
||||
};
|
||||
auto host_infos = addresses | std::views::transform([&](locator::host_id id) {
|
||||
auto host_infos = addresses | std::views::transform([&] (locator::host_id id) {
|
||||
const auto& loc1 = get_location(id);
|
||||
return info{id, distance(address, loc, id, loc1)};
|
||||
}) | std::ranges::to<utils::small_vector<info, host_id_vector_replica_set::internal_capacity()>>();
|
||||
@@ -589,12 +564,11 @@ std::unordered_set<locator::host_id> topology::get_all_host_ids() const {
|
||||
return ids;
|
||||
}
|
||||
|
||||
std::unordered_map<sstring, std::unordered_set<host_id>> topology::get_datacenter_host_ids() const {
|
||||
std::unordered_map<sstring, std::unordered_set<host_id>>
|
||||
topology::get_datacenter_host_ids() const {
|
||||
std::unordered_map<sstring, std::unordered_set<host_id>> ret;
|
||||
for (auto& [dc, nodes] : _dc_nodes) {
|
||||
ret[dc] = nodes | std::views::transform([](const node& n) {
|
||||
return n.host_id();
|
||||
}) | std::ranges::to<std::unordered_set>();
|
||||
ret[dc] = nodes | std::views::transform([] (const node& n) { return n.host_id(); }) | std::ranges::to<std::unordered_set>();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
version https://git-lfs.github.com/spec/v1
|
||||
oid sha256:34a0955d2c5a88e18ddab0f1df085e10a17e14129c3e21de91e4f27ef949b6c4
|
||||
size 6502668
|
||||
oid sha256:d424ce6cc7f65338c34dd35881d23f5ad3425651d66e47dc2c3a20dc798848d4
|
||||
size 6598648
|
||||
|
||||
1740
repair/repair.cc
1740
repair/repair.cc
File diff suppressed because it is too large
Load Diff
@@ -3253,13 +3253,10 @@ private:
|
||||
// sequentially because the rows from repair follower 1 to
|
||||
// repair master might reduce the amount of missing data
|
||||
// between repair master and repair follower 2.
|
||||
auto working_hashes = master.working_row_hashes().get();
|
||||
repair_hash_set set_diff = get_set_diff(master.peer_row_hash_sets(node_idx), working_hashes);
|
||||
repair_hash_set set_diff = get_set_diff(master.peer_row_hash_sets(node_idx), master.working_row_hashes().get());
|
||||
// Request missing sets from peer node
|
||||
if (rlogger.is_enabled(logging::log_level::debug)) {
|
||||
rlogger.debug("Before get_row_diff to node {}, local={}, peer={}, set_diff={}",
|
||||
node, working_hashes.size(), master.peer_row_hash_sets(node_idx).size(), set_diff.size());
|
||||
}
|
||||
rlogger.debug("Before get_row_diff to node {}, local={}, peer={}, set_diff={}",
|
||||
node, master.working_row_hashes().get().size(), master.peer_row_hash_sets(node_idx).size(), set_diff.size());
|
||||
// If we need to pull all rows from the peer. We can avoid
|
||||
// sending the row hashes on wire by setting needs_all_rows flag.
|
||||
auto needs_all_rows = repair_meta::needs_all_rows_t(set_diff.size() == master.peer_row_hash_sets(node_idx).size());
|
||||
@@ -3272,9 +3269,7 @@ private:
|
||||
master.get_row_diff(std::move(set_diff), needs_all_rows, node, node_idx, dst_cpu_id);
|
||||
ns.state = repair_state::get_row_diff_finished;
|
||||
}
|
||||
if (rlogger.is_enabled(logging::log_level::debug)) {
|
||||
rlogger.debug("After get_row_diff node {}, hash_sets={}", master.myhostid(), master.working_row_hashes().get().size());
|
||||
}
|
||||
rlogger.debug("After get_row_diff node {}, hash_sets={}", master.myhostid(), master.working_row_hashes().get().size());
|
||||
} catch (...) {
|
||||
rlogger.warn("repair[{}]: get_row_diff: got error from node={}, keyspace={}, table={}, range={}, error={}",
|
||||
_shard_task.global_repair_id.uuid(), node, _shard_task.get_keyspace(), _cf_name, _range, std::current_exception());
|
||||
|
||||
1493
replica/database.cc
1493
replica/database.cc
File diff suppressed because it is too large
Load Diff
@@ -87,6 +87,11 @@ target_include_directories(wasmtime_bindings
|
||||
target_link_libraries(wasmtime_bindings
|
||||
INTERFACE Rust::rust_combined)
|
||||
if (Scylla_USE_PRECOMPILED_HEADER_USE)
|
||||
# The PCH from scylla-precompiled-header is compiled with Seastar's compile
|
||||
# flags, including sanitizer flags in Debug/Sanitize modes. Any target reusing
|
||||
# this PCH must have matching compile options, otherwise the compiler rejects
|
||||
# the PCH due to flag mismatch (e.g., -fsanitize=address).
|
||||
target_link_libraries(wasmtime_bindings PRIVATE Seastar::seastar)
|
||||
target_precompile_headers(wasmtime_bindings REUSE_FROM scylla-precompiled-header)
|
||||
endif()
|
||||
|
||||
@@ -108,5 +113,6 @@ target_include_directories(inc
|
||||
target_link_libraries(inc
|
||||
INTERFACE Rust::rust_combined)
|
||||
if (Scylla_USE_PRECOMPILED_HEADER_USE)
|
||||
target_link_libraries(inc PRIVATE Seastar::seastar)
|
||||
target_precompile_headers(inc REUSE_FROM scylla-precompiled-header)
|
||||
endif()
|
||||
|
||||
@@ -22,12 +22,12 @@ static logging::logger slogger("schema_registry");
|
||||
static thread_local schema_registry registry;
|
||||
|
||||
schema_version_not_found::schema_version_not_found(table_schema_version v)
|
||||
: std::runtime_error{format("Schema version {} not found", v)} {
|
||||
}
|
||||
: std::runtime_error{format("Schema version {} not found", v)}
|
||||
{ }
|
||||
|
||||
schema_version_loading_failed::schema_version_loading_failed(table_schema_version v)
|
||||
: std::runtime_error{format("Failed to load schema version {}", v)} {
|
||||
}
|
||||
: std::runtime_error{format("Failed to load schema version {}", v)}
|
||||
{ }
|
||||
|
||||
schema_registry_entry::~schema_registry_entry() {
|
||||
if (_schema) {
|
||||
@@ -39,7 +39,8 @@ schema_registry_entry::schema_registry_entry(table_schema_version v, schema_regi
|
||||
: _state(state::INITIAL)
|
||||
, _version(v)
|
||||
, _registry(r)
|
||||
, _sync_state(sync_state::NOT_SYNCED) {
|
||||
, _sync_state(sync_state::NOT_SYNCED)
|
||||
{
|
||||
_erase_timer.set_callback([this] {
|
||||
slogger.debug("Dropping {}", _version);
|
||||
SCYLLA_ASSERT(!_schema);
|
||||
@@ -70,8 +71,8 @@ void schema_registry::attach_table(schema_registry_entry& e) noexcept {
|
||||
e.set_table(table.weak_from_this());
|
||||
} catch (const replica::no_such_column_family&) {
|
||||
if (slogger.is_enabled(seastar::log_level::debug)) {
|
||||
slogger.debug("No table for schema version {} of {}.{}: {}", e._version, e.get_schema()->ks_name(), e.get_schema()->cf_name(),
|
||||
seastar::current_backtrace());
|
||||
slogger.debug("No table for schema version {} of {}.{}: {}", e._version,
|
||||
e.get_schema()->ks_name(), e.get_schema()->cf_name(), seastar::current_backtrace());
|
||||
}
|
||||
// ignore
|
||||
}
|
||||
@@ -220,7 +221,7 @@ future<schema_ptr> schema_registry_entry::start_loading(async_schema_loader load
|
||||
_state = state::LOADING;
|
||||
slogger.trace("Loading {}", _version);
|
||||
// Move to background.
|
||||
(void)f.then_wrapped([self = shared_from_this(), this](future<extended_frozen_schema>&& f) {
|
||||
(void)f.then_wrapped([self = shared_from_this(), this] (future<extended_frozen_schema>&& f) {
|
||||
_loader = {};
|
||||
if (_state != state::LOADING) {
|
||||
slogger.trace("Loading of {} aborted", _version);
|
||||
@@ -293,8 +294,8 @@ schema_registry& local_schema_registry() {
|
||||
}
|
||||
|
||||
global_schema_ptr::global_schema_ptr(const global_schema_ptr& o)
|
||||
: global_schema_ptr(o.get()) {
|
||||
}
|
||||
: global_schema_ptr(o.get())
|
||||
{ }
|
||||
|
||||
global_schema_ptr::global_schema_ptr(global_schema_ptr&& o) noexcept {
|
||||
auto current = this_shard_id();
|
||||
@@ -331,15 +332,15 @@ schema_ptr global_schema_ptr::get() const {
|
||||
}
|
||||
|
||||
global_schema_ptr::global_schema_ptr(const schema_ptr& ptr)
|
||||
: _cpu_of_origin(this_shard_id()) {
|
||||
: _cpu_of_origin(this_shard_id()) {
|
||||
// _ptr must always have an associated registry entry,
|
||||
// if ptr doesn't, we need to load it into the registry.
|
||||
auto ensure_registry_entry = [](const schema_ptr& s) {
|
||||
auto ensure_registry_entry = [] (const schema_ptr& s) {
|
||||
schema_registry_entry* e = s->registry_entry();
|
||||
if (e) {
|
||||
return s;
|
||||
} else {
|
||||
return local_schema_registry().get_or_load(s->version(), [&s](table_schema_version) -> extended_frozen_schema {
|
||||
return local_schema_registry().get_or_load(s->version(), [&s] (table_schema_version) -> extended_frozen_schema {
|
||||
return extended_frozen_schema(s);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -65,8 +65,9 @@ struct send_info {
|
||||
mutation_fragment_v1_stream reader;
|
||||
noncopyable_function<void(size_t)> update;
|
||||
send_info(netw::messaging_service& ms_, streaming::plan_id plan_id_, lw_shared_ptr<replica::table> tbl_, reader_permit permit_,
|
||||
dht::token_range_vector ranges_, locator::host_id id_, uint32_t dst_cpu_id_, stream_reason reason_, service::frozen_topology_guard topo_guard_,
|
||||
noncopyable_function<void(size_t)> update_fn)
|
||||
dht::token_range_vector ranges_, locator::host_id id_,
|
||||
uint32_t dst_cpu_id_, stream_reason reason_, service::frozen_topology_guard topo_guard_,
|
||||
noncopyable_function<void(size_t)> update_fn)
|
||||
: ms(ms_)
|
||||
, plan_id(plan_id_)
|
||||
, cf_id(tbl_->schema()->id())
|
||||
@@ -78,13 +79,12 @@ struct send_info {
|
||||
, ranges(std::move(ranges_))
|
||||
, prs(dht::to_partition_ranges(ranges))
|
||||
, reader(cf->make_streaming_reader(cf->schema(), std::move(permit_), prs, gc_clock::now()))
|
||||
, update(std::move(update_fn)) {
|
||||
, update(std::move(update_fn))
|
||||
{
|
||||
}
|
||||
future<bool> has_relevant_range_on_this_shard() {
|
||||
return do_with(false, ranges.begin(), [this](bool& found_relevant_range, dht::token_range_vector::iterator& ranges_it) {
|
||||
auto stop_cond = [this, &found_relevant_range, &ranges_it] {
|
||||
return ranges_it == ranges.end() || found_relevant_range;
|
||||
};
|
||||
return do_with(false, ranges.begin(), [this] (bool& found_relevant_range, dht::token_range_vector::iterator& ranges_it) {
|
||||
auto stop_cond = [this, &found_relevant_range, &ranges_it] { return ranges_it == ranges.end() || found_relevant_range; };
|
||||
return do_until(std::move(stop_cond), [this, &found_relevant_range, &ranges_it] {
|
||||
dht::token_range range = *ranges_it++;
|
||||
if (!found_relevant_range) {
|
||||
@@ -113,112 +113,93 @@ struct send_info {
|
||||
};
|
||||
|
||||
future<> send_mutation_fragments(lw_shared_ptr<send_info> si) {
|
||||
return si->reader.has_more_fragments().then([si](bool there_is_more) {
|
||||
if (!there_is_more) {
|
||||
// The reader contains no data
|
||||
sslog.info("[Stream #{}] Skip sending ks={}, cf={}, reader contains no data, with new rpc streaming", si->plan_id, si->cf->schema()->ks_name(),
|
||||
si->cf->schema()->cf_name());
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return si->estimate_partitions().then([si](size_t estimated_partitions) {
|
||||
sslog.info("[Stream #{}] Start sending ks={}, cf={}, estimated_partitions={}, with new rpc streaming", si->plan_id, si->cf->schema()->ks_name(),
|
||||
si->cf->schema()->cf_name(), estimated_partitions);
|
||||
return si->ms
|
||||
.make_sink_and_source_for_stream_mutation_fragments(
|
||||
si->reader.schema()->version(), si->plan_id, si->cf_id, estimated_partitions, si->reason, si->topo_guard, si->id)
|
||||
.then_unpack([si](rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd> sink, rpc::source<int32_t> source) mutable {
|
||||
auto got_error_from_peer = make_lw_shared<bool>(false);
|
||||
auto table_is_dropped = make_lw_shared<bool>(false);
|
||||
return si->reader.has_more_fragments().then([si] (bool there_is_more) {
|
||||
if (!there_is_more) {
|
||||
// The reader contains no data
|
||||
sslog.info("[Stream #{}] Skip sending ks={}, cf={}, reader contains no data, with new rpc streaming",
|
||||
si->plan_id, si->cf->schema()->ks_name(), si->cf->schema()->cf_name());
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return si->estimate_partitions().then([si] (size_t estimated_partitions) {
|
||||
sslog.info("[Stream #{}] Start sending ks={}, cf={}, estimated_partitions={}, with new rpc streaming", si->plan_id, si->cf->schema()->ks_name(), si->cf->schema()->cf_name(), estimated_partitions);
|
||||
return si->ms.make_sink_and_source_for_stream_mutation_fragments(si->reader.schema()->version(), si->plan_id, si->cf_id, estimated_partitions, si->reason, si->topo_guard, si->id).then_unpack([si] (rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd> sink, rpc::source<int32_t> source) mutable {
|
||||
auto got_error_from_peer = make_lw_shared<bool>(false);
|
||||
auto table_is_dropped = make_lw_shared<bool>(false);
|
||||
|
||||
auto source_op = [source, got_error_from_peer, table_is_dropped, si]() mutable -> future<> {
|
||||
return repeat([source, got_error_from_peer, table_is_dropped, si]() mutable {
|
||||
return source().then([source, got_error_from_peer, table_is_dropped, si](
|
||||
std::optional<std::tuple<int32_t>> status_opt) mutable {
|
||||
if (status_opt) {
|
||||
auto status = std::get<0>(*status_opt);
|
||||
if (status == -1) {
|
||||
*got_error_from_peer = true;
|
||||
} else if (status == -2) {
|
||||
*got_error_from_peer = true;
|
||||
*table_is_dropped = true;
|
||||
}
|
||||
sslog.debug("Got status code from peer={}, plan_id={}, cf_id={}, status={}", si->id, si->plan_id, si->cf_id, status);
|
||||
// we've got an error from the other side, but we cannot just abandon rpc::source we
|
||||
// need to continue reading until EOS since this will signal that no more work
|
||||
// is left and rpc::source can be destroyed. The sender closes connection immediately
|
||||
// after sending the status, so EOS should arrive shortly.
|
||||
return stop_iteration::no;
|
||||
} else {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
});
|
||||
});
|
||||
}();
|
||||
auto source_op = [source, got_error_from_peer, table_is_dropped, si] () mutable -> future<> {
|
||||
return repeat([source, got_error_from_peer, table_is_dropped, si] () mutable {
|
||||
return source().then([source, got_error_from_peer, table_is_dropped, si] (std::optional<std::tuple<int32_t>> status_opt) mutable {
|
||||
if (status_opt) {
|
||||
auto status = std::get<0>(*status_opt);
|
||||
if (status == -1) {
|
||||
*got_error_from_peer = true;
|
||||
} else if (status == -2) {
|
||||
*got_error_from_peer = true;
|
||||
*table_is_dropped = true;
|
||||
}
|
||||
sslog.debug("Got status code from peer={}, plan_id={}, cf_id={}, status={}", si->id, si->plan_id, si->cf_id, status);
|
||||
// we've got an error from the other side, but we cannot just abandon rpc::source we
|
||||
// need to continue reading until EOS since this will signal that no more work
|
||||
// is left and rpc::source can be destroyed. The sender closes connection immediately
|
||||
// after sending the status, so EOS should arrive shortly.
|
||||
return stop_iteration::no;
|
||||
} else {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
});
|
||||
});
|
||||
}();
|
||||
|
||||
auto sink_op = [sink, si, got_error_from_peer]() mutable -> future<> {
|
||||
mutation_fragment_stream_validator validator(*(si->reader.schema()));
|
||||
return do_with(std::move(sink), std::move(validator),
|
||||
[si, got_error_from_peer](rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd>& sink,
|
||||
mutation_fragment_stream_validator& validator) {
|
||||
return repeat([&sink, &validator, si, got_error_from_peer]() mutable {
|
||||
return si->reader().then(
|
||||
[&sink, &validator, si, s = si->reader.schema(), got_error_from_peer](mutation_fragment_opt mf) mutable {
|
||||
if (*got_error_from_peer) {
|
||||
return make_exception_future<stop_iteration>(std::runtime_error("Got status error code from peer"));
|
||||
}
|
||||
if (mf) {
|
||||
if (!validator(mf->mutation_fragment_kind())) {
|
||||
return make_exception_future<stop_iteration>(std::runtime_error(
|
||||
format("Stream reader mutation_fragment validator failed, previous={}, current={}",
|
||||
validator.previous_mutation_fragment_kind(), mf->mutation_fragment_kind())));
|
||||
}
|
||||
frozen_mutation_fragment fmf = freeze(*s, *mf);
|
||||
auto size = fmf.representation().size();
|
||||
si->update(size);
|
||||
return sink(fmf, stream_mutation_fragments_cmd::mutation_fragment_data).then([] {
|
||||
return stop_iteration::no;
|
||||
});
|
||||
} else {
|
||||
if (!validator.on_end_of_stream()) {
|
||||
return make_exception_future<stop_iteration>(
|
||||
std::runtime_error(format("Stream reader mutation_fragment validator failed on "
|
||||
"end_of_stream, previous={}, current=end_of_stream",
|
||||
validator.previous_mutation_fragment_kind())));
|
||||
}
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
});
|
||||
})
|
||||
.then([&sink]() mutable {
|
||||
return sink(frozen_mutation_fragment(bytes_ostream()), stream_mutation_fragments_cmd::end_of_stream);
|
||||
})
|
||||
.handle_exception([&sink](std::exception_ptr ep) mutable {
|
||||
// Notify the receiver the sender has failed
|
||||
return sink(frozen_mutation_fragment(bytes_ostream()), stream_mutation_fragments_cmd::error)
|
||||
.then([ep = std::move(ep)]() mutable {
|
||||
return make_exception_future<>(std::move(ep));
|
||||
});
|
||||
})
|
||||
.finally([&sink]() mutable {
|
||||
return sink.close();
|
||||
});
|
||||
});
|
||||
}();
|
||||
|
||||
return when_all_succeed(std::move(source_op), std::move(sink_op)).then_unpack([got_error_from_peer, table_is_dropped, si] {
|
||||
if (*got_error_from_peer) {
|
||||
if (*table_is_dropped) {
|
||||
sslog.info("[Stream #{}] Skipped streaming the dropped table {}.{}", si->plan_id, si->cf->schema()->ks_name(),
|
||||
si->cf->schema()->cf_name());
|
||||
} else {
|
||||
throw std::runtime_error(
|
||||
format("Peer failed to process mutation_fragment peer={}, plan_id={}, cf_id={}", si->id, si->plan_id, si->cf_id));
|
||||
}
|
||||
auto sink_op = [sink, si, got_error_from_peer] () mutable -> future<> {
|
||||
mutation_fragment_stream_validator validator(*(si->reader.schema()));
|
||||
return do_with(std::move(sink), std::move(validator), [si, got_error_from_peer] (rpc::sink<frozen_mutation_fragment, stream_mutation_fragments_cmd>& sink, mutation_fragment_stream_validator& validator) {
|
||||
return repeat([&sink, &validator, si, got_error_from_peer] () mutable {
|
||||
return si->reader().then([&sink, &validator, si, s = si->reader.schema(), got_error_from_peer] (mutation_fragment_opt mf) mutable {
|
||||
if (*got_error_from_peer) {
|
||||
return make_exception_future<stop_iteration>(std::runtime_error("Got status error code from peer"));
|
||||
}
|
||||
if (mf) {
|
||||
if (!validator(mf->mutation_fragment_kind())) {
|
||||
return make_exception_future<stop_iteration>(std::runtime_error(format("Stream reader mutation_fragment validator failed, previous={}, current={}",
|
||||
validator.previous_mutation_fragment_kind(), mf->mutation_fragment_kind())));
|
||||
}
|
||||
});
|
||||
frozen_mutation_fragment fmf = freeze(*s, *mf);
|
||||
auto size = fmf.representation().size();
|
||||
si->update(size);
|
||||
return sink(fmf, stream_mutation_fragments_cmd::mutation_fragment_data).then([] { return stop_iteration::no; });
|
||||
} else {
|
||||
if (!validator.on_end_of_stream()) {
|
||||
return make_exception_future<stop_iteration>(std::runtime_error(format("Stream reader mutation_fragment validator failed on end_of_stream, previous={}, current=end_of_stream",
|
||||
validator.previous_mutation_fragment_kind())));
|
||||
}
|
||||
return make_ready_future<stop_iteration>(stop_iteration::yes);
|
||||
}
|
||||
});
|
||||
}).then([&sink] () mutable {
|
||||
return sink(frozen_mutation_fragment(bytes_ostream()), stream_mutation_fragments_cmd::end_of_stream);
|
||||
}).handle_exception([&sink] (std::exception_ptr ep) mutable {
|
||||
// Notify the receiver the sender has failed
|
||||
return sink(frozen_mutation_fragment(bytes_ostream()), stream_mutation_fragments_cmd::error).then([ep = std::move(ep)] () mutable {
|
||||
return make_exception_future<>(std::move(ep));
|
||||
});
|
||||
}).finally([&sink] () mutable {
|
||||
return sink.close();
|
||||
});
|
||||
});
|
||||
}();
|
||||
|
||||
return when_all_succeed(std::move(source_op), std::move(sink_op)).then_unpack([got_error_from_peer, table_is_dropped, si] {
|
||||
if (*got_error_from_peer) {
|
||||
if (*table_is_dropped) {
|
||||
sslog.info("[Stream #{}] Skipped streaming the dropped table {}.{}", si->plan_id, si->cf->schema()->ks_name(), si->cf->schema()->cf_name());
|
||||
} else {
|
||||
throw std::runtime_error(format("Peer failed to process mutation_fragment peer={}, plan_id={}, cf_id={}", si->id, si->plan_id, si->cf_id));
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<> stream_transfer_task::execute() {
|
||||
@@ -226,55 +207,46 @@ future<> stream_transfer_task::execute() {
|
||||
auto cf_id = this->cf_id;
|
||||
auto id = session->peer;
|
||||
auto& sm = session->manager();
|
||||
auto table_dropped = co_await streaming::with_table_drop_silenced(sm.db(), sm.mm(), cf_id, [this, &sm, cf_id, plan_id, id](const table_id&) {
|
||||
auto table_dropped = co_await streaming::with_table_drop_silenced(sm.db(), sm.mm(), cf_id, [this, &sm, cf_id, plan_id, id] (const table_id &) {
|
||||
auto dst_cpu_id = session->dst_cpu_id;
|
||||
sslog.debug("[Stream #{}] stream_transfer_task: cf_id={}", plan_id, cf_id);
|
||||
sort_and_merge_ranges();
|
||||
auto reason = session->get_reason();
|
||||
auto topo_guard = session->topo_guard();
|
||||
return sm.container()
|
||||
.invoke_on_all([plan_id, cf_id, id, dst_cpu_id, ranges = this->_ranges, reason, topo_guard](stream_manager& sm) mutable {
|
||||
auto tbl = sm.db().find_column_family(cf_id).shared_from_this();
|
||||
return sm.db()
|
||||
.obtain_reader_permit(*tbl, "stream-transfer-task", db::no_timeout, {})
|
||||
.then([&sm, tbl, plan_id, cf_id, id, dst_cpu_id, ranges = std::move(ranges), reason, topo_guard](reader_permit permit) mutable {
|
||||
auto si = make_lw_shared<send_info>(sm.ms(), plan_id, tbl, std::move(permit), std::move(ranges), id, dst_cpu_id, reason,
|
||||
topo_guard, [&sm, plan_id, id](size_t sz) {
|
||||
sm.update_progress(plan_id, id, streaming::progress_info::direction::OUT, sz);
|
||||
});
|
||||
return si->has_relevant_range_on_this_shard()
|
||||
.then([si, plan_id, cf_id](bool has_relevant_range_on_this_shard) {
|
||||
if (!has_relevant_range_on_this_shard) {
|
||||
sslog.debug("[Stream #{}] stream_transfer_task: cf_id={}: ignore ranges on shard={}", plan_id, cf_id,
|
||||
this_shard_id());
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return send_mutation_fragments(std::move(si));
|
||||
})
|
||||
.finally([si] {
|
||||
return si->reader.close();
|
||||
});
|
||||
});
|
||||
})
|
||||
.then([this, plan_id, cf_id, id, &sm] {
|
||||
sslog.debug("[Stream #{}] SEND STREAM_MUTATION_DONE to {}, cf_id={}", plan_id, id, cf_id);
|
||||
return ser::streaming_rpc_verbs::send_stream_mutation_done(&sm.ms(), id, plan_id, _ranges, cf_id, session->dst_cpu_id)
|
||||
.handle_exception([plan_id, id](auto ep) {
|
||||
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send STREAM_MUTATION_DONE to {}: {}", plan_id, id, ep);
|
||||
std::rethrow_exception(ep);
|
||||
});
|
||||
})
|
||||
.then([this, id, plan_id] {
|
||||
_mutation_done_sent = true;
|
||||
sslog.debug("[Stream #{}] GOT STREAM_MUTATION_DONE Reply from {}", plan_id, id);
|
||||
})
|
||||
.handle_exception([plan_id, id, &sm](std::exception_ptr ep) {
|
||||
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send to {}: {}", plan_id, id, ep);
|
||||
utils::get_local_injector().inject("stream_mutation_fragments_table_dropped", [&sm]() {
|
||||
sm.db().find_column_family(table_id::create_null_id());
|
||||
});
|
||||
std::rethrow_exception(ep);
|
||||
return sm.container().invoke_on_all([plan_id, cf_id, id, dst_cpu_id, ranges=this->_ranges, reason, topo_guard] (stream_manager& sm) mutable {
|
||||
auto tbl = sm.db().find_column_family(cf_id).shared_from_this();
|
||||
return sm.db().obtain_reader_permit(*tbl, "stream-transfer-task", db::no_timeout, {}).then([&sm, tbl, plan_id, cf_id, id, dst_cpu_id, ranges=std::move(ranges), reason, topo_guard] (reader_permit permit) mutable {
|
||||
auto si = make_lw_shared<send_info>(sm.ms(), plan_id, tbl, std::move(permit), std::move(ranges), id, dst_cpu_id, reason, topo_guard, [&sm, plan_id, id] (size_t sz) {
|
||||
sm.update_progress(plan_id, id, streaming::progress_info::direction::OUT, sz);
|
||||
});
|
||||
return si->has_relevant_range_on_this_shard().then([si, plan_id, cf_id] (bool has_relevant_range_on_this_shard) {
|
||||
if (!has_relevant_range_on_this_shard) {
|
||||
sslog.debug("[Stream #{}] stream_transfer_task: cf_id={}: ignore ranges on shard={}",
|
||||
plan_id, cf_id, this_shard_id());
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return send_mutation_fragments(std::move(si));
|
||||
}).finally([si] {
|
||||
return si->reader.close();
|
||||
});
|
||||
});
|
||||
}).then([this, plan_id, cf_id, id, &sm] {
|
||||
sslog.debug("[Stream #{}] SEND STREAM_MUTATION_DONE to {}, cf_id={}", plan_id, id, cf_id);
|
||||
return ser::streaming_rpc_verbs::send_stream_mutation_done(&sm.ms(), id, plan_id, _ranges,
|
||||
cf_id, session->dst_cpu_id).handle_exception([plan_id, id] (auto ep) {
|
||||
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send STREAM_MUTATION_DONE to {}: {}", plan_id, id, ep);
|
||||
std::rethrow_exception(ep);
|
||||
});
|
||||
}).then([this, id, plan_id] {
|
||||
_mutation_done_sent = true;
|
||||
sslog.debug("[Stream #{}] GOT STREAM_MUTATION_DONE Reply from {}", plan_id, id);
|
||||
}).handle_exception([plan_id, id, &sm] (std::exception_ptr ep) {
|
||||
sslog.warn("[Stream #{}] stream_transfer_task: Fail to send to {}: {}", plan_id, id, ep);
|
||||
utils::get_local_injector().inject("stream_mutation_fragments_table_dropped", [&sm] () {
|
||||
sm.db().find_column_family(table_id::create_null_id());
|
||||
});
|
||||
std::rethrow_exception(ep);
|
||||
});
|
||||
});
|
||||
// If the table is dropped during streaming, we can ignore the
|
||||
// errors and make the stream successful. This allows user to
|
||||
|
||||
2
test.py
2
test.py
@@ -181,7 +181,7 @@ def parse_cmd_line() -> argparse.Namespace:
|
||||
help="Run only tests for given build mode(s)")
|
||||
parser.add_argument('--repeat', action="store", default="1", type=int,
|
||||
help="number of times to repeat test execution")
|
||||
parser.add_argument('--timeout', action="store", default="24000", type=int,
|
||||
parser.add_argument('--timeout', action="store", default="3600", type=int,
|
||||
help="timeout value for single test execution")
|
||||
parser.add_argument('--session-timeout', action="store", default="24000", type=int,
|
||||
help="timeout value for test.py/pytest session execution")
|
||||
|
||||
@@ -511,8 +511,7 @@ class AuditBackendComposite(AuditBackend):
|
||||
return rows_dict
|
||||
|
||||
|
||||
@pytest.mark.single_node
|
||||
class TestCQLAudit(AuditTester):
|
||||
class CQLAuditTester(AuditTester):
|
||||
"""
|
||||
Make sure CQL statements are audited
|
||||
"""
|
||||
@@ -1763,7 +1762,7 @@ class TestCQLAudit(AuditTester):
|
||||
|
||||
async def test_audit_table_noauth(manager: ManagerClient):
|
||||
"""Table backend, no auth, single node — groups all tests that share this config."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
await t.test_using_non_existent_keyspace(AuditBackendTable)
|
||||
await t.test_audit_keyspace(AuditBackendTable)
|
||||
await t.test_audit_keyspace_extra_parameter(AuditBackendTable)
|
||||
@@ -1787,7 +1786,7 @@ async def test_audit_table_noauth(manager: ManagerClient):
|
||||
|
||||
async def test_audit_table_auth(manager: ManagerClient):
|
||||
"""Table backend, auth enabled, single node."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
await t.test_user_password_masking(AuditBackendTable)
|
||||
await t.test_negative_audit_records_auth()
|
||||
await t.test_negative_audit_records_admin()
|
||||
@@ -1803,7 +1802,7 @@ async def test_audit_table_auth(manager: ManagerClient):
|
||||
|
||||
async def test_audit_table_auth_multinode(manager: ManagerClient):
|
||||
"""Table backend, auth enabled, multi-node (rf=3)."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
await t.test_negative_audit_records_ddl()
|
||||
|
||||
|
||||
@@ -1811,49 +1810,49 @@ async def test_audit_table_auth_multinode(manager: ManagerClient):
|
||||
|
||||
async def test_audit_type_none_standalone(manager: ManagerClient):
|
||||
"""audit=None — verify no auditing occurs."""
|
||||
await TestCQLAudit(manager).test_audit_type_none()
|
||||
await CQLAuditTester(manager).test_audit_type_none()
|
||||
|
||||
|
||||
async def test_audit_type_invalid_standalone(manager: ManagerClient):
|
||||
"""audit=invalid — server should fail to start."""
|
||||
await TestCQLAudit(manager).test_audit_type_invalid()
|
||||
await CQLAuditTester(manager).test_audit_type_invalid()
|
||||
|
||||
|
||||
async def test_composite_audit_type_invalid_standalone(manager: ManagerClient):
|
||||
"""audit=table,syslog,invalid — server should fail to start."""
|
||||
await TestCQLAudit(manager).test_composite_audit_type_invalid()
|
||||
await CQLAuditTester(manager).test_composite_audit_type_invalid()
|
||||
|
||||
|
||||
async def test_audit_empty_settings_standalone(manager: ManagerClient):
|
||||
"""audit=none — verify no auditing occurs."""
|
||||
await TestCQLAudit(manager).test_audit_empty_settings()
|
||||
await CQLAuditTester(manager).test_audit_empty_settings()
|
||||
|
||||
|
||||
async def test_composite_audit_empty_settings_standalone(manager: ManagerClient):
|
||||
"""audit=table,syslog,none — verify no auditing occurs."""
|
||||
await TestCQLAudit(manager).test_composite_audit_empty_settings()
|
||||
await CQLAuditTester(manager).test_composite_audit_empty_settings()
|
||||
|
||||
|
||||
async def test_audit_categories_invalid_standalone(manager: ManagerClient):
|
||||
"""Invalid audit_categories — server should fail to start."""
|
||||
await TestCQLAudit(manager).test_audit_categories_invalid()
|
||||
await CQLAuditTester(manager).test_audit_categories_invalid()
|
||||
|
||||
|
||||
async def test_insert_failure_standalone(manager: ManagerClient):
|
||||
"""7-node topology, audit=table, no auth — standalone due to unique topology."""
|
||||
await TestCQLAudit(manager).test_insert_failure_doesnt_report_success()
|
||||
await CQLAuditTester(manager).test_insert_failure_doesnt_report_success()
|
||||
|
||||
|
||||
async def test_service_level_statements_standalone(manager: ManagerClient):
|
||||
"""audit=table, auth, cmdline=--smp 1 — standalone due to special cmdline."""
|
||||
await TestCQLAudit(manager).test_service_level_statements()
|
||||
await CQLAuditTester(manager).test_service_level_statements()
|
||||
|
||||
|
||||
# AuditBackendSyslog, no auth, rf=1
|
||||
|
||||
async def test_audit_syslog_noauth(manager: ManagerClient):
|
||||
"""Syslog backend, no auth, single node."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
Syslog = functools.partial(AuditBackendSyslog, socket_path=syslog_socket_path)
|
||||
await t.test_using_non_existent_keyspace(Syslog)
|
||||
await t.test_audit_keyspace(Syslog)
|
||||
@@ -1870,7 +1869,7 @@ async def test_audit_syslog_noauth(manager: ManagerClient):
|
||||
|
||||
async def test_audit_syslog_auth(manager: ManagerClient):
|
||||
"""Syslog backend, auth enabled, single node."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
Syslog = functools.partial(AuditBackendSyslog, socket_path=syslog_socket_path)
|
||||
await t.test_user_password_masking(Syslog)
|
||||
await t.test_role_password_masking(Syslog)
|
||||
@@ -1881,7 +1880,7 @@ async def test_audit_syslog_auth(manager: ManagerClient):
|
||||
|
||||
async def test_audit_composite_noauth(manager: ManagerClient):
|
||||
"""Composite backend (table+syslog), no auth, single node."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
Composite = functools.partial(AuditBackendComposite, socket_path=syslog_socket_path)
|
||||
await t.test_using_non_existent_keyspace(Composite)
|
||||
await t.test_audit_keyspace(Composite)
|
||||
@@ -1898,7 +1897,7 @@ async def test_audit_composite_noauth(manager: ManagerClient):
|
||||
|
||||
async def test_audit_composite_auth(manager: ManagerClient):
|
||||
"""Composite backend (table+syslog), auth enabled, single node."""
|
||||
t = TestCQLAudit(manager)
|
||||
t = CQLAuditTester(manager)
|
||||
Composite = functools.partial(AuditBackendComposite, socket_path=syslog_socket_path)
|
||||
await t.test_user_password_masking(Composite)
|
||||
await t.test_role_password_masking(Composite)
|
||||
@@ -1910,29 +1909,29 @@ _composite = functools.partial(AuditBackendComposite, socket_path=syslog_socket_
|
||||
|
||||
|
||||
@pytest.mark.parametrize("helper_class,config_changer", [
|
||||
pytest.param(AuditBackendTable, TestCQLAudit.AuditSighupConfigChanger, id="table-sighup"),
|
||||
pytest.param(AuditBackendTable, TestCQLAudit.AuditCqlConfigChanger, id="table-cql"),
|
||||
pytest.param(_syslog, TestCQLAudit.AuditSighupConfigChanger, id="syslog-sighup"),
|
||||
pytest.param(_syslog, TestCQLAudit.AuditCqlConfigChanger, id="syslog-cql"),
|
||||
pytest.param(_composite, TestCQLAudit.AuditSighupConfigChanger, id="composite-sighup"),
|
||||
pytest.param(_composite, TestCQLAudit.AuditCqlConfigChanger, id="composite-cql"),
|
||||
pytest.param(AuditBackendTable, CQLAuditTester.AuditSighupConfigChanger, id="table-sighup"),
|
||||
pytest.param(AuditBackendTable, CQLAuditTester.AuditCqlConfigChanger, id="table-cql"),
|
||||
pytest.param(_syslog, CQLAuditTester.AuditSighupConfigChanger, id="syslog-sighup"),
|
||||
pytest.param(_syslog, CQLAuditTester.AuditCqlConfigChanger, id="syslog-cql"),
|
||||
pytest.param(_composite, CQLAuditTester.AuditSighupConfigChanger, id="composite-sighup"),
|
||||
pytest.param(_composite, CQLAuditTester.AuditCqlConfigChanger, id="composite-cql"),
|
||||
])
|
||||
async def test_config_no_liveupdate(manager: ManagerClient, helper_class, config_changer):
|
||||
"""Non-live audit config params (audit, audit_unix_socket_path, audit_syslog_write_buffer_size) must be unmodifiable."""
|
||||
await TestCQLAudit(manager).test_config_no_liveupdate(helper_class, config_changer)
|
||||
await CQLAuditTester(manager).test_config_no_liveupdate(helper_class, config_changer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("helper_class,config_changer", [
|
||||
pytest.param(AuditBackendTable, TestCQLAudit.AuditSighupConfigChanger, id="table-sighup"),
|
||||
pytest.param(AuditBackendTable, TestCQLAudit.AuditCqlConfigChanger, id="table-cql"),
|
||||
pytest.param(_syslog, TestCQLAudit.AuditSighupConfigChanger, id="syslog-sighup"),
|
||||
pytest.param(_syslog, TestCQLAudit.AuditCqlConfigChanger, id="syslog-cql"),
|
||||
pytest.param(_composite, TestCQLAudit.AuditSighupConfigChanger, id="composite-sighup"),
|
||||
pytest.param(_composite, TestCQLAudit.AuditCqlConfigChanger, id="composite-cql"),
|
||||
pytest.param(AuditBackendTable, CQLAuditTester.AuditSighupConfigChanger, id="table-sighup"),
|
||||
pytest.param(AuditBackendTable, CQLAuditTester.AuditCqlConfigChanger, id="table-cql"),
|
||||
pytest.param(_syslog, CQLAuditTester.AuditSighupConfigChanger, id="syslog-sighup"),
|
||||
pytest.param(_syslog, CQLAuditTester.AuditCqlConfigChanger, id="syslog-cql"),
|
||||
pytest.param(_composite, CQLAuditTester.AuditSighupConfigChanger, id="composite-sighup"),
|
||||
pytest.param(_composite, CQLAuditTester.AuditCqlConfigChanger, id="composite-cql"),
|
||||
])
|
||||
async def test_config_liveupdate(manager: ManagerClient, helper_class, config_changer):
|
||||
"""Live-updatable audit config params (categories, keyspaces, tables) must be modifiable at runtime."""
|
||||
await TestCQLAudit(manager).test_config_liveupdate(helper_class, config_changer)
|
||||
await CQLAuditTester(manager).test_config_liveupdate(helper_class, config_changer)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("helper_class", [
|
||||
@@ -1942,4 +1941,4 @@ async def test_config_liveupdate(manager: ManagerClient, helper_class, config_ch
|
||||
])
|
||||
async def test_parallel_syslog_audit(manager: ManagerClient, helper_class):
|
||||
"""Cluster must not fail when multiple queries are audited in parallel."""
|
||||
await TestCQLAudit(manager).test_parallel_syslog_audit(helper_class)
|
||||
await CQLAuditTester(manager).test_parallel_syslog_audit(helper_class)
|
||||
|
||||
@@ -17,9 +17,9 @@ from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.rest_client import ScyllaMetricsClient, TCPRESTClient, inject_error
|
||||
from test.pylib.tablets import get_tablet_replicas
|
||||
from test.pylib.scylla_cluster import ReplaceConfig
|
||||
from test.pylib.util import wait_for
|
||||
from test.pylib.util import gather_safely, wait_for
|
||||
|
||||
from test.cluster.util import get_topology_coordinator, find_server_by_host_id, new_test_keyspace
|
||||
from test.cluster.util import get_topology_coordinator, find_server_by_host_id, keyspace_has_tablets, new_test_keyspace, new_test_table
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -51,28 +51,42 @@ async def await_sync_point(client: TCPRESTClient, server_ip: IPAddress, sync_poi
|
||||
@pytest.mark.asyncio
|
||||
async def test_write_cl_any_to_dead_node_generates_hints(manager: ManagerClient):
|
||||
node_count = 2
|
||||
servers = await manager.servers_add(node_count)
|
||||
cmdline = ["--logger-log-level", "hints_manager=trace"]
|
||||
servers = await manager.servers_add(node_count, cmdline=cmdline)
|
||||
|
||||
async def wait_for_hints_written(min_hint_count: int, timeout: int):
|
||||
async def aux():
|
||||
hints_written = await get_hint_metrics(manager.metrics, servers[0].ip_addr, "written")
|
||||
if hints_written >= min_hint_count:
|
||||
return True
|
||||
return None
|
||||
assert await wait_for(aux, time.time() + timeout)
|
||||
|
||||
cql = manager.get_cql()
|
||||
async with new_test_keyspace(manager, "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}") as ks:
|
||||
table = f"{ks}.t"
|
||||
await cql.run_async(f"CREATE TABLE {table} (pk int primary key, v int)")
|
||||
uses_tablets = await keyspace_has_tablets(manager, ks)
|
||||
# If the keyspace uses tablets, let's explicitly require the table to use multiple tablets.
|
||||
# Otherwise, it could happen that all mutations would target servers[0] only, which would
|
||||
# ultimately lead to a test failure here. We rely on the assumption that mutations will be
|
||||
# distributed more or less uniformly!
|
||||
extra_opts = "WITH tablets = {'min_tablet_count': 16}" if uses_tablets else ""
|
||||
async with new_test_table(manager, ks, "pk int PRIMARY KEY, v int", extra_opts) as table:
|
||||
await manager.server_stop_gracefully(servers[1].server_id)
|
||||
|
||||
await manager.server_stop_gracefully(servers[1].server_id)
|
||||
hints_before = await get_hint_metrics(manager.metrics, servers[0].ip_addr, "written")
|
||||
|
||||
hints_before = await get_hint_metrics(manager.metrics, servers[0].ip_addr, "written")
|
||||
stmt = cql.prepare(f"INSERT INTO {table} (pk, v) VALUES (?, ?)")
|
||||
stmt.consistency_level = ConsistencyLevel.ANY
|
||||
|
||||
# Some of the inserts will be targeted to the dead node.
|
||||
# The coordinator doesn't have live targets to send the write to, but it should write a hint.
|
||||
for i in range(100):
|
||||
await cql.run_async(SimpleStatement(f"INSERT INTO {table} (pk, v) VALUES ({i}, {i+1})", consistency_level=ConsistencyLevel.ANY))
|
||||
# Some of the inserts will be targeted to the dead node.
|
||||
# The coordinator doesn't have live targets to send the write to, but it should write a hint.
|
||||
await gather_safely(*[cql.run_async(stmt, (i, i + 1)) for i in range(100)])
|
||||
|
||||
# Verify hints are written
|
||||
hints_after = await get_hint_metrics(manager.metrics, servers[0].ip_addr, "written")
|
||||
assert hints_after > hints_before
|
||||
# Verify hints are written
|
||||
await wait_for_hints_written(hints_before + 1, timeout=60)
|
||||
|
||||
# For dropping the keyspace
|
||||
await manager.server_start(servers[1].server_id)
|
||||
# For dropping the keyspace
|
||||
await manager.server_start(servers[1].server_id)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_limited_concurrency_of_writes(manager: ManagerClient):
|
||||
|
||||
@@ -151,7 +151,7 @@ async def trigger_tablet_merge(manager, servers, logs):
|
||||
await s1_log.wait_for('Detected tablet merge for table', from_mark=s1_mark)
|
||||
await inject_error_off(manager, "tablet_force_tablet_count_decrease", servers)
|
||||
|
||||
async def preapre_cluster_for_incremental_repair(manager, nr_keys = 100 , cmdline = []):
|
||||
async def prepare_cluster_for_incremental_repair(manager, nr_keys = 100 , cmdline = []):
|
||||
servers, cql, hosts, ks, table_id = await create_table_insert_data_for_repair(manager, nr_keys=nr_keys, cmdline=cmdline)
|
||||
repaired_keys = set(range(0, nr_keys))
|
||||
unrepaired_keys = set()
|
||||
@@ -164,7 +164,7 @@ async def preapre_cluster_for_incremental_repair(manager, nr_keys = 100 , cmdlin
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tablet_repair_sstable_skipped_read_metrics(manager: ManagerClient):
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, token = await preapre_cluster_for_incremental_repair(manager)
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, token = await prepare_cluster_for_incremental_repair(manager)
|
||||
|
||||
await insert_keys(cql, ks, 0, 100)
|
||||
|
||||
@@ -274,7 +274,7 @@ async def test_tablet_incremental_repair_error(manager: ManagerClient):
|
||||
|
||||
async def do_tablet_incremental_repair_and_ops(manager: ManagerClient, ops: str):
|
||||
nr_keys = 100
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys, cmdline=['--logger-log-level', 'compaction=debug'])
|
||||
token = -1
|
||||
|
||||
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental')
|
||||
@@ -335,7 +335,7 @@ async def test_tablet_incremental_repair_and_major(manager: ManagerClient):
|
||||
@pytest.mark.asyncio
|
||||
async def test_tablet_incremental_repair_and_minor(manager: ManagerClient):
|
||||
nr_keys = 100
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys)
|
||||
|
||||
# Disable autocompaction
|
||||
for server in servers:
|
||||
@@ -381,7 +381,7 @@ async def test_tablet_incremental_repair_and_minor(manager: ManagerClient):
|
||||
|
||||
async def do_test_tablet_incremental_repair_with_split_and_merge(manager, do_split, do_merge):
|
||||
nr_keys = 100
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys)
|
||||
|
||||
# First repair
|
||||
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
|
||||
@@ -442,7 +442,7 @@ async def test_tablet_incremental_repair_with_merge(manager: ManagerClient):
|
||||
async def test_tablet_incremental_repair_existing_and_repair_produced_sstable(manager: ManagerClient):
|
||||
nr_keys = 100
|
||||
cmdline = ["--hinted-handoff-enabled", "0"]
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys, cmdline)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys, cmdline)
|
||||
|
||||
await manager.server_stop_gracefully(servers[1].server_id)
|
||||
|
||||
@@ -466,7 +466,7 @@ async def test_tablet_incremental_repair_existing_and_repair_produced_sstable(ma
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_tablet_incremental_repair_merge_higher_repaired_at_number(manager):
|
||||
nr_keys = 100
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys)
|
||||
|
||||
# First repair
|
||||
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
|
||||
@@ -507,7 +507,7 @@ async def test_tablet_incremental_repair_merge_higher_repaired_at_number(manager
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_tablet_incremental_repair_merge_correct_repaired_at_number_after_merge(manager):
|
||||
nr_keys = 100
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys)
|
||||
|
||||
# First repair
|
||||
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
|
||||
@@ -541,7 +541,7 @@ async def do_test_tablet_incremental_repair_merge_error(manager, error):
|
||||
nr_keys = 100
|
||||
# Make sure no data commit log replay after force server stop
|
||||
cmdline = ['--enable-commitlog', '0']
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await preapre_cluster_for_incremental_repair(manager, nr_keys, cmdline)
|
||||
servers, cql, hosts, ks, table_id, logs, repaired_keys, unrepaired_keys, current_key, token = await prepare_cluster_for_incremental_repair(manager, nr_keys, cmdline)
|
||||
|
||||
# First repair
|
||||
await manager.api.tablet_repair(servers[0].ip_addr, ks, "test", token, incremental_mode='incremental') # sstables_repaired_at 1
|
||||
@@ -587,7 +587,7 @@ async def test_tablet_incremental_repair_merge_error_in_merge_completion_fiber(m
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_tablet_repair_with_incremental_option(manager: ManagerClient):
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, token = await preapre_cluster_for_incremental_repair(manager)
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, token = await prepare_cluster_for_incremental_repair(manager)
|
||||
token = -1
|
||||
|
||||
sstables_repaired_at = 0
|
||||
@@ -632,7 +632,7 @@ async def test_tablet_repair_with_incremental_option(manager: ManagerClient):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_incremental_repair_tablet_time_metrics(manager: ManagerClient):
|
||||
servers, _, _, ks, _, _, _, _, _, token = await preapre_cluster_for_incremental_repair(manager)
|
||||
servers, _, _, ks, _, _, _, _, _, token = await prepare_cluster_for_incremental_repair(manager)
|
||||
time1 = 0
|
||||
time2 = 0
|
||||
|
||||
@@ -820,7 +820,7 @@ async def test_repair_sigsegv_with_diff_shard_count(manager: ManagerClient, use_
|
||||
@pytest.mark.skip_mode(mode='release', reason='error injections are not supported in release mode')
|
||||
async def test_tablet_incremental_repair_table_drop_compaction_group_gone(manager: ManagerClient):
|
||||
cmdline = ['--logger-log-level', 'repair=debug']
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, _ = await preapre_cluster_for_incremental_repair(manager, cmdline=cmdline)
|
||||
servers, cql, hosts, ks, table_id, logs, _, _, _, _ = await prepare_cluster_for_incremental_repair(manager, cmdline=cmdline)
|
||||
|
||||
coord = await get_topology_coordinator(manager)
|
||||
coord_serv = await find_server_by_host_id(manager, servers, coord)
|
||||
|
||||
@@ -20,6 +20,7 @@ from cassandra.query import SimpleStatement
|
||||
from test.pylib.async_cql import _wrap_future
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.random_tables import RandomTables, TextType, Column
|
||||
from test.pylib.rest_client import read_barrier
|
||||
from test.pylib.util import unique_name
|
||||
from test.cluster.conftest import cluster_con
|
||||
|
||||
@@ -403,6 +404,7 @@ async def test_arbiter_dc_rf_rack_valid_keyspaces(manager: ManagerClient):
|
||||
for task in [*valid_keyspaces, *invalid_keyspaces]:
|
||||
_ = tg.create_task(task)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_startup_with_keyspaces_violating_rf_rack_valid_keyspaces(manager: ManagerClient):
|
||||
"""
|
||||
This test verifies that starting a Scylla node fails when there's an RF-rack-invalid keyspace.
|
||||
@@ -464,22 +466,50 @@ async def test_startup_with_keyspaces_violating_rf_rack_valid_keyspaces(manager:
|
||||
for rfs, tablets in valid_keyspaces:
|
||||
_ = tg.create_task(create_keyspace(rfs, tablets))
|
||||
|
||||
await manager.server_stop_gracefully(s1.server_id)
|
||||
await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "true")
|
||||
|
||||
# Precondition: s1 has rf_rack_valid_keyspaces set to false.
|
||||
# Postcondition: s1 still has rf_rack_valid_keyspaces set to false.
|
||||
async def try_fail(rfs: List[int], dc: str, rf: int, rack_count: int):
|
||||
running_servers = await manager.running_servers()
|
||||
should_start = s1.server_id not in [server.server_id for server in running_servers]
|
||||
if should_start:
|
||||
await manager.server_start(s1.server_id)
|
||||
|
||||
ks = await create_keyspace(rfs, True)
|
||||
# We need to wait for the new schema to propagate.
|
||||
# Otherwise, it's not clear when the mutation
|
||||
# corresponding to the created keyspace will
|
||||
# arrive at server 1.
|
||||
# It could happen only after the node performs
|
||||
# the check upon start-up, effectively leading
|
||||
# to a successful start-up, which we don't want.
|
||||
# For more context, see issue: SCYLLADB-1137.
|
||||
await read_barrier(manager.api, s1.ip_addr)
|
||||
|
||||
await manager.server_stop_gracefully(s1.server_id)
|
||||
await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "true")
|
||||
|
||||
err = f"The keyspace '{ks}' is required to be RF-rack-valid. " \
|
||||
f"That condition is violated for DC '{dc}': RF={rf} vs. rack count={rack_count}."
|
||||
_ = await manager.server_start(s1.server_id, expected_error=err)
|
||||
await manager.server_start(s1.server_id, expected_error=err)
|
||||
await cql.run_async(f"DROP KEYSPACE {ks}")
|
||||
|
||||
await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "false")
|
||||
|
||||
# Test RF-rack-invalid keyspaces.
|
||||
await try_fail([2, 0], "dc1", 2, 3)
|
||||
await try_fail([3, 2], "dc2", 2, 1)
|
||||
await try_fail([4, 1], "dc1", 4, 3)
|
||||
|
||||
_ = await manager.server_start(s1.server_id)
|
||||
# We need to perform a read barrier on the node to make
|
||||
# sure that it processes the last DROP KEYSPACE.
|
||||
# Otherwise, the node could think the RF-rack-invalid
|
||||
# keyspace still exists.
|
||||
await manager.server_start(s1.server_id)
|
||||
await read_barrier(manager.api, s1.ip_addr)
|
||||
await manager.server_stop_gracefully(s1.server_id)
|
||||
|
||||
await manager.server_update_config(s1.server_id, "rf_rack_valid_keyspaces", "true")
|
||||
await manager.server_start(s1.server_id)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_startup_with_keyspaces_violating_rf_rack_valid_keyspaces_but_not_enforced(manager: ManagerClient):
|
||||
|
||||
@@ -23,10 +23,25 @@ from test.cluster.object_store.conftest import format_tuples
|
||||
from test.cluster.object_store.test_backup import topo, take_snapshot, do_test_streaming_scopes
|
||||
from test.cluster.util import new_test_keyspace
|
||||
from test.pylib.rest_client import read_barrier
|
||||
from test.pylib.util import unique_name
|
||||
from test.pylib.util import unique_name, wait_for
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
async def wait_for_upload_dir_empty(upload_dir, timeout=30):
|
||||
'''
|
||||
Wait until the upload directory is empty with a timeout.
|
||||
SSTable unlinking is asynchronous and in rare situations, it can happen
|
||||
that not all sstables are deleted from the upload dir immediately after refresh is done.
|
||||
'''
|
||||
deadline = time.time() + timeout
|
||||
async def check_empty():
|
||||
files = os.listdir(upload_dir)
|
||||
if not files:
|
||||
return True
|
||||
return None
|
||||
await wait_for(check_empty, deadline, period=0.5)
|
||||
|
||||
class SSTablesOnLocalStorage:
|
||||
def __init__(self):
|
||||
self.tmpdir = f'tmpbackup-{str(uuid.uuid4())}'
|
||||
@@ -153,7 +168,8 @@ async def test_refresh_deletes_uploaded_sstables(manager: ManagerClient):
|
||||
|
||||
for s in servers:
|
||||
cf_dir = dirs[s.server_id]["cf_dir"]
|
||||
files = os.listdir(os.path.join(cf_dir, 'upload'))
|
||||
assert files == [], f'Upload dir not empty on server {s.server_id}: {files}'
|
||||
upload_dir = os.path.join(cf_dir, 'upload')
|
||||
assert os.path.exists(upload_dir)
|
||||
await wait_for_upload_dir_empty(upload_dir)
|
||||
|
||||
shutil.rmtree(tmpbackup)
|
||||
|
||||
@@ -196,7 +196,7 @@ async def test_group0_tombstone_gc(manager: ManagerClient):
|
||||
tombstone_mark = datetime.now(timezone.utc)
|
||||
|
||||
# test #2: the tombstones are not cleaned up when one node is down
|
||||
with pytest.raises(AssertionError, match="Deadline exceeded"):
|
||||
with pytest.raises(AssertionError, match="timed out"):
|
||||
# waiting for shorter time (5s normally enough for a successful case, we expect the timeout here)
|
||||
await verify_tombstone_gc(tombstone_mark, timeout=5)
|
||||
|
||||
@@ -249,7 +249,7 @@ async def test_group0_tombstone_gc(manager: ManagerClient):
|
||||
await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
|
||||
|
||||
# test #4a: the tombstones are not cleaned up after both live nodes join the new group0
|
||||
with pytest.raises(AssertionError, match="Deadline exceeded"):
|
||||
with pytest.raises(AssertionError, match="timed out"):
|
||||
await verify_tombstone_gc(tombstone_mark, timeout=5)
|
||||
|
||||
await manager.remove_node(servers[0].server_id, down_server.server_id)
|
||||
|
||||
@@ -165,7 +165,7 @@ async def wait_for_cdc_generations_publishing(cql: Session, hosts: list[Host], d
|
||||
unpublished_generations = topo_res[0].unpublished_cdc_generations
|
||||
return unpublished_generations is None or len(unpublished_generations) == 0 or None
|
||||
|
||||
await wait_for(all_generations_published, deadline=deadline, period=1.0)
|
||||
await wait_for(all_generations_published, deadline=deadline)
|
||||
|
||||
|
||||
async def check_system_topology_and_cdc_generations_v3_consistency(manager: ManagerClient, live_hosts: list[Host], cqls: Optional[list[Session]] = None, ignored_hosts: list[Host] = []):
|
||||
@@ -470,6 +470,17 @@ async def new_materialized_view(manager: ManagerClient, table, select, pk, where
|
||||
await manager.get_cql().run_async(f"DROP MATERIALIZED VIEW {mv}")
|
||||
|
||||
|
||||
async def keyspace_has_tablets(manager: ManagerClient, keyspace: str) -> bool:
|
||||
"""
|
||||
Checks whether the given keyspace uses tablets.
|
||||
Adapted from its counterpart in the cqlpy test: cqlpy/util.py::keyspace_has_tablets.
|
||||
"""
|
||||
cql = manager.get_cql()
|
||||
rows_iter = await cql.run_async(f"SELECT * FROM system_schema.scylla_keyspaces WHERE keyspace_name='{keyspace}'")
|
||||
rows = list(rows_iter)
|
||||
return len(rows) > 0 and getattr(rows[0], "initial_tablets", None) is not None
|
||||
|
||||
|
||||
async def get_raft_log_size(cql, host) -> int:
|
||||
query = "select count(\"index\") from system.raft"
|
||||
return (await cql.run_async(query, host=host))[0][0]
|
||||
|
||||
@@ -271,10 +271,21 @@ future<std::tuple<tests::proc::process_fixture, int>> tests::proc::start_docker_
|
||||
// arbitrary timeout of 120s for the server to make some output. Very generous.
|
||||
// but since we (maybe) run docker, and might need to pull image, this can take
|
||||
// some time if we're unlucky.
|
||||
co_await with_timeout(std::chrono::steady_clock::now() + 120s, when_all(std::move(out_fut), std::move(err_fut)));
|
||||
} catch (in_use&) {
|
||||
retry = true;
|
||||
p = std::current_exception();
|
||||
auto [f1, f2] = co_await with_timeout(std::chrono::steady_clock::now() + 120s, when_all(std::move(out_fut), std::move(err_fut)));
|
||||
for (auto* f : {&f1, &f2}) {
|
||||
if (f->failed()) {
|
||||
try {
|
||||
f->get();
|
||||
} catch (in_use&) {
|
||||
retry = true;
|
||||
p = std::current_exception();
|
||||
} catch (...) {
|
||||
if (!p) {
|
||||
p = std::current_exception();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (...) {
|
||||
p = std::current_exception();
|
||||
}
|
||||
|
||||
@@ -56,15 +56,25 @@ def unique_name(unique_name_prefix = 'test_'):
|
||||
async def wait_for(
|
||||
pred: Callable[[], Awaitable[Optional[T]]],
|
||||
deadline: float,
|
||||
period: float = 1,
|
||||
period: float = 0.1,
|
||||
before_retry: Optional[Callable[[], Any]] = None,
|
||||
backoff_factor: float = 1,
|
||||
max_period: float = None) -> T:
|
||||
backoff_factor: float = 1.5,
|
||||
max_period: float = 1.0,
|
||||
label: Optional[str] = None) -> T:
|
||||
tag = label or getattr(pred, '__name__', 'unlabeled')
|
||||
start = time.time()
|
||||
retries = 0
|
||||
while True:
|
||||
assert(time.time() < deadline), "Deadline exceeded, failing test."
|
||||
elapsed = time.time() - start
|
||||
assert time.time() < deadline, \
|
||||
f"wait_for({tag}) timed out after {elapsed:.2f}s ({retries} retries)"
|
||||
res = await pred()
|
||||
if res is not None:
|
||||
if retries > 0:
|
||||
logger.debug(f"wait_for({tag}) completed "
|
||||
f"in {elapsed:.2f}s ({retries} retries)")
|
||||
return res
|
||||
retries += 1
|
||||
await asyncio.sleep(period)
|
||||
period *= backoff_factor
|
||||
if max_period is not None:
|
||||
@@ -273,14 +283,14 @@ async def wait_for_view_v1(cql: Session, name: str, node_count: int, timeout: in
|
||||
done = await cql.run_async(f"SELECT COUNT(*) FROM system_distributed.view_build_status WHERE status = 'SUCCESS' AND view_name = '{name}' ALLOW FILTERING")
|
||||
return done[0][0] == node_count or None
|
||||
deadline = time.time() + timeout
|
||||
await wait_for(view_is_built, deadline)
|
||||
await wait_for(view_is_built, deadline, label=f"view_v1_{name}")
|
||||
|
||||
async def wait_for_view(cql: Session, name: str, node_count: int, timeout: int = 120):
|
||||
async def view_is_built():
|
||||
done = await cql.run_async(f"SELECT COUNT(*) FROM system.view_build_status_v2 WHERE status = 'SUCCESS' AND view_name = '{name}' ALLOW FILTERING")
|
||||
return done[0][0] == node_count or None
|
||||
deadline = time.time() + timeout
|
||||
await wait_for(view_is_built, deadline)
|
||||
await wait_for(view_is_built, deadline, label=f"view_{name}")
|
||||
|
||||
|
||||
async def wait_for_first_completed(coros: list[Coroutine], timeout: int|None = None):
|
||||
|
||||
@@ -350,6 +350,7 @@ utils::gcp::storage::client::impl::send_with_retry(const std::string& path, cons
|
||||
co_await authorize(req, scope);
|
||||
}
|
||||
auto content = co_await util::read_entire_stream_contiguous(_in);
|
||||
auto error_msg = get_gcp_error_message(std::string_view(content));
|
||||
gcp_storage.debug("Got unexpected response status: {}, content: {}", rep._status, content);
|
||||
co_await coroutine::return_exception_ptr(std::make_exception_ptr(httpd::unexpected_status_error(rep._status)));
|
||||
}
|
||||
@@ -628,7 +629,7 @@ future<> utils::gcp::storage::client::object_data_sink::remove_upload() {
|
||||
co_return;
|
||||
}
|
||||
|
||||
gcp_storage.debug("Removing incomplete upload {}:{} ({})", _bucket, _object_name, _session_path);
|
||||
gcp_storage.debug("Removing incomplete upload {}:{} ()", _bucket, _object_name, _session_path);
|
||||
|
||||
auto res = co_await _impl->send_with_retry(_session_path
|
||||
, GCP_OBJECT_SCOPE_READ_WRITE
|
||||
|
||||
@@ -1583,7 +1583,7 @@ void reclaim_timer::report() const noexcept {
|
||||
if (_memory_released > 0) {
|
||||
auto bytes_per_second =
|
||||
static_cast<float>(_memory_released) / std::chrono::duration_cast<std::chrono::duration<float>>(_duration).count();
|
||||
timing_logger.log(info_level, "- reclamation rate = {:.3f} MiB/s", bytes_per_second / MiB);
|
||||
timing_logger.log(info_level, "- reclamation rate = {} MiB/s", format("{:.3f}", bytes_per_second / MiB));
|
||||
}
|
||||
|
||||
if (_debug_enabled) {
|
||||
|
||||
Reference in New Issue
Block a user