Compare commits
55 Commits
debug_form
...
scylla-6.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
cbf47319c1 | ||
|
|
64388bcf22 | ||
|
|
83dfe6bfd6 | ||
|
|
3c47ab9851 | ||
|
|
bef3777a5f | ||
|
|
b25dd2696f | ||
|
|
57d267a97e | ||
|
|
5b8523273b | ||
|
|
6497ed68ed | ||
|
|
39c1237e25 | ||
|
|
e04964ba17 | ||
|
|
fb5b9012e6 | ||
|
|
749197f0a4 | ||
|
|
1f4428153f | ||
|
|
544c424e89 | ||
|
|
73b59b244d | ||
|
|
5afa3028a3 | ||
|
|
885c7309ee | ||
|
|
adfad686b3 | ||
|
|
1a70db17a6 | ||
|
|
bd4b781dc8 | ||
|
|
51b8b04d97 | ||
|
|
242caa14fe | ||
|
|
cedb47d843 | ||
|
|
da816bf50c | ||
|
|
8bff078a89 | ||
|
|
68d12daa7b | ||
|
|
e1616a2970 | ||
|
|
62f5171a55 | ||
|
|
fd928601ad | ||
|
|
ae474f6897 | ||
|
|
099338b766 | ||
|
|
375610ace8 | ||
|
|
1b64e80393 | ||
|
|
fa330a6a4d | ||
|
|
68544d5bb3 | ||
|
|
bc711a169d | ||
|
|
0d0c037e1d | ||
|
|
4d616ccb8c | ||
|
|
25d3398b93 | ||
|
|
ed3ac1eea4 | ||
|
|
7229c820cf | ||
|
|
67878af591 | ||
|
|
2dbc555933 | ||
|
|
3b9c86dcf5 | ||
|
|
1dd522edc8 | ||
|
|
6d655e6766 | ||
|
|
54b9fdab03 | ||
|
|
13f8486cd7 | ||
|
|
747ffd8776 | ||
|
|
a87683c7be | ||
|
|
eff7b0d42d | ||
|
|
7dbcfe5a39 | ||
|
|
d078bafa00 | ||
|
|
1b4d5d02ef |
@@ -78,7 +78,7 @@ fi
|
||||
|
||||
# Default scylla product/version tags
|
||||
PRODUCT=scylla
|
||||
VERSION=5.5.0-dev
|
||||
VERSION=6.0.0-rc2
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include "service/raft/group0_state_machine.hh"
|
||||
#include "timeout_config.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
|
||||
namespace auth {
|
||||
@@ -41,14 +40,14 @@ constinit const std::string_view AUTH_PACKAGE_NAME("org.apache.cassandra.auth.")
|
||||
static logging::logger auth_log("auth");
|
||||
|
||||
bool legacy_mode(cql3::query_processor& qp) {
|
||||
return qp.auth_version < db::system_auth_keyspace::version_t::v2;
|
||||
return qp.auth_version < db::system_keyspace::auth_version_t::v2;
|
||||
}
|
||||
|
||||
std::string_view get_auth_ks_name(cql3::query_processor& qp) {
|
||||
if (legacy_mode(qp)) {
|
||||
return meta::legacy::AUTH_KS;
|
||||
}
|
||||
return db::system_auth_keyspace::NAME;
|
||||
return db::system_keyspace::NAME;
|
||||
}
|
||||
|
||||
// Func must support being invoked more than once.
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
*/
|
||||
|
||||
#include "auth/default_authorizer.hh"
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
|
||||
extern "C" {
|
||||
#include <crypt.h>
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#include "db/config.hh"
|
||||
#include "db/consistency_level_type.hh"
|
||||
#include "db/functions/function_name.hh"
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "log.hh"
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include <seastar/core/future.hh>
|
||||
@@ -644,7 +643,7 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
|
||||
}
|
||||
auto muts = co_await qp.get_mutations_internal(
|
||||
format("INSERT INTO {}.{} ({}) VALUES ({})",
|
||||
db::system_auth_keyspace::NAME,
|
||||
db::system_keyspace::NAME,
|
||||
cf_name,
|
||||
col_names_str,
|
||||
val_binders_str),
|
||||
@@ -659,7 +658,7 @@ future<> migrate_to_auth_v2(db::system_keyspace& sys_ks, ::service::raft_group0_
|
||||
}
|
||||
}
|
||||
co_yield co_await sys_ks.make_auth_version_mutation(ts,
|
||||
db::system_auth_keyspace::version_t::v2);
|
||||
db::system_keyspace::auth_version_t::v2);
|
||||
};
|
||||
co_await announce_mutations_with_batching(g0,
|
||||
start_operation_func,
|
||||
|
||||
@@ -1015,7 +1015,6 @@ scylla_core = (['message/messaging_service.cc',
|
||||
'cql3/result_set.cc',
|
||||
'cql3/prepare_context.cc',
|
||||
'db/consistency_level.cc',
|
||||
'db/system_auth_keyspace.cc',
|
||||
'db/system_keyspace.cc',
|
||||
'db/virtual_table.cc',
|
||||
'db/virtual_tables.cc',
|
||||
|
||||
@@ -14,9 +14,11 @@
|
||||
#include <seastar/coroutine/parallel_for_each.hh>
|
||||
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "service/topology_mutation.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "service/forward_service.hh"
|
||||
#include "service/raft/raft_group0_client.hh"
|
||||
#include "service/storage_service.hh"
|
||||
#include "cql3/CqlParser.hpp"
|
||||
#include "cql3/statements/batch_statement.hh"
|
||||
#include "cql3/statements/modification_statement.hh"
|
||||
@@ -42,16 +44,22 @@ const sstring query_processor::CQL_VERSION = "3.3.1";
|
||||
const std::chrono::minutes prepared_statements_cache::entry_expiry = std::chrono::minutes(60);
|
||||
|
||||
struct query_processor::remote {
|
||||
remote(service::migration_manager& mm, service::forward_service& fwd, service::raft_group0_client& group0_client)
|
||||
: mm(mm), forwarder(fwd), group0_client(group0_client) {}
|
||||
remote(service::migration_manager& mm, service::forward_service& fwd,
|
||||
service::storage_service& ss, service::raft_group0_client& group0_client)
|
||||
: mm(mm), forwarder(fwd), ss(ss), group0_client(group0_client) {}
|
||||
|
||||
service::migration_manager& mm;
|
||||
service::forward_service& forwarder;
|
||||
service::storage_service& ss;
|
||||
service::raft_group0_client& group0_client;
|
||||
|
||||
seastar::gate gate;
|
||||
};
|
||||
|
||||
bool query_processor::topology_global_queue_empty() {
|
||||
return remote().first.get().ss.topology_global_queue_empty();
|
||||
}
|
||||
|
||||
static service::query_state query_state_for_internal_call() {
|
||||
return {service::client_state::for_internal_calls(), empty_service_permit()};
|
||||
}
|
||||
@@ -498,8 +506,8 @@ query_processor::~query_processor() {
|
||||
}
|
||||
|
||||
void query_processor::start_remote(service::migration_manager& mm, service::forward_service& forwarder,
|
||||
service::raft_group0_client& group0_client) {
|
||||
_remote = std::make_unique<struct remote>(mm, forwarder, group0_client);
|
||||
service::storage_service& ss, service::raft_group0_client& group0_client) {
|
||||
_remote = std::make_unique<struct remote>(mm, forwarder, ss, group0_client);
|
||||
}
|
||||
|
||||
future<> query_processor::stop_remote() {
|
||||
@@ -1018,16 +1026,29 @@ query_processor::execute_schema_statement(const statements::schema_altering_stat
|
||||
|
||||
cql3::cql_warnings_vec warnings;
|
||||
|
||||
auto request_id = guard->new_group0_state_id();
|
||||
stmt.global_req_id = request_id;
|
||||
|
||||
auto [ret, m, cql_warnings] = co_await stmt.prepare_schema_mutations(*this, options, guard->write_timestamp());
|
||||
warnings = std::move(cql_warnings);
|
||||
|
||||
ce = std::move(ret);
|
||||
if (!m.empty()) {
|
||||
auto description = format("CQL DDL statement: \"{}\"", stmt.raw_cql_statement);
|
||||
co_await remote_.get().mm.announce(std::move(m), std::move(*guard), description);
|
||||
if (ce && ce->target == cql_transport::event::schema_change::target_type::TABLET_KEYSPACE) {
|
||||
co_await remote_.get().mm.announce<service::topology_change>(std::move(m), std::move(*guard), description);
|
||||
// TODO: eliminate timeout from alter ks statement on the cqlsh/driver side
|
||||
auto error = co_await remote_.get().ss.wait_for_topology_request_completion(request_id);
|
||||
co_await remote_.get().ss.wait_for_topology_not_busy();
|
||||
if (!error.empty()) {
|
||||
log.error("CQL statement \"{}\" with topology request_id \"{}\" failed with error: \"{}\"", stmt.raw_cql_statement, request_id, error);
|
||||
throw exceptions::request_execution_exception(exceptions::exception_code::INVALID, error);
|
||||
}
|
||||
} else {
|
||||
co_await remote_.get().mm.announce<service::schema_change>(std::move(m), std::move(*guard), description);
|
||||
}
|
||||
}
|
||||
|
||||
ce = std::move(ret);
|
||||
|
||||
// If an IF [NOT] EXISTS clause was used, this may not result in an actual schema change. To avoid doing
|
||||
// extra work in the drivers to handle schema changes, we return an empty message in this case. (CASSANDRA-7600)
|
||||
::shared_ptr<messages::result_message> result;
|
||||
|
||||
@@ -31,7 +31,6 @@
|
||||
#include "lang/wasm.hh"
|
||||
#include "service/raft/raft_group0_client.hh"
|
||||
#include "types/types.hh"
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
|
||||
|
||||
namespace service {
|
||||
@@ -151,7 +150,8 @@ public:
|
||||
|
||||
~query_processor();
|
||||
|
||||
void start_remote(service::migration_manager&, service::forward_service&, service::raft_group0_client&);
|
||||
void start_remote(service::migration_manager&, service::forward_service&,
|
||||
service::storage_service& ss, service::raft_group0_client&);
|
||||
future<> stop_remote();
|
||||
|
||||
data_dictionary::database db() {
|
||||
@@ -176,7 +176,7 @@ public:
|
||||
|
||||
wasm::manager& wasm() { return _wasm; }
|
||||
|
||||
db::system_auth_keyspace::version_t auth_version;
|
||||
db::system_keyspace::auth_version_t auth_version;
|
||||
|
||||
statements::prepared_statement::checked_weak_ptr get_prepared(const std::optional<auth::authenticated_user>& user, const prepared_cache_key_type& key) {
|
||||
if (user) {
|
||||
@@ -461,6 +461,8 @@ public:
|
||||
|
||||
void reset_cache();
|
||||
|
||||
bool topology_global_queue_empty();
|
||||
|
||||
private:
|
||||
// Keep the holder until you stop using the `remote` services.
|
||||
std::pair<std::reference_wrapper<remote>, gate::holder> remote();
|
||||
|
||||
@@ -8,11 +8,15 @@
|
||||
* SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
|
||||
*/
|
||||
|
||||
#include <boost/range/algorithm.hpp>
|
||||
#include <fmt/format.h>
|
||||
#include <seastar/core/coroutine.hh>
|
||||
#include <stdexcept>
|
||||
#include "alter_keyspace_statement.hh"
|
||||
#include "prepared_statement.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "service/storage_proxy.hh"
|
||||
#include "service/topology_mutation.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "data_dictionary/data_dictionary.hh"
|
||||
#include "data_dictionary/keyspace_metadata.hh"
|
||||
@@ -21,6 +25,8 @@
|
||||
#include "create_keyspace_statement.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
|
||||
static logging::logger mylogger("alter_keyspace");
|
||||
|
||||
bool is_system_keyspace(std::string_view keyspace);
|
||||
|
||||
cql3::statements::alter_keyspace_statement::alter_keyspace_statement(sstring name, ::shared_ptr<ks_prop_defs> attrs)
|
||||
@@ -36,6 +42,20 @@ future<> cql3::statements::alter_keyspace_statement::check_access(query_processo
|
||||
return state.has_keyspace_access(_name, auth::permission::ALTER);
|
||||
}
|
||||
|
||||
static bool validate_rf_difference(const std::string_view curr_rf, const std::string_view new_rf) {
|
||||
auto to_number = [] (const std::string_view rf) {
|
||||
int result;
|
||||
// We assume the passed string view represents a valid decimal number,
|
||||
// so we don't need the error code.
|
||||
(void) std::from_chars(rf.begin(), rf.end(), result);
|
||||
return result;
|
||||
};
|
||||
|
||||
// We want to ensure that each DC's RF is going to change by at most 1
|
||||
// because in that case the old and new quorums must overlap.
|
||||
return std::abs(to_number(curr_rf) - to_number(new_rf)) <= 1;
|
||||
}
|
||||
|
||||
void cql3::statements::alter_keyspace_statement::validate(query_processor& qp, const service::client_state& state) const {
|
||||
auto tmp = _name;
|
||||
std::transform(tmp.begin(), tmp.end(), tmp.begin(), ::tolower);
|
||||
@@ -61,6 +81,17 @@ void cql3::statements::alter_keyspace_statement::validate(query_processor& qp, c
|
||||
}
|
||||
|
||||
auto new_ks = _attrs->as_ks_metadata_update(ks.metadata(), *qp.proxy().get_token_metadata_ptr(), qp.proxy().features());
|
||||
|
||||
if (ks.get_replication_strategy().uses_tablets()) {
|
||||
const std::map<sstring, sstring>& current_rfs = ks.metadata()->strategy_options();
|
||||
for (const auto& [new_dc, new_rf] : _attrs->get_replication_options()) {
|
||||
auto it = current_rfs.find(new_dc);
|
||||
if (it != current_rfs.end() && !validate_rf_difference(it->second, new_rf)) {
|
||||
throw exceptions::invalid_request_exception("Cannot modify replication factor of any DC by more than 1 at a time.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
locator::replication_strategy_params params(new_ks->strategy_options(), new_ks->initial_tablets());
|
||||
auto new_rs = locator::abstract_replication_strategy::create_replication_strategy(new_ks->strategy_name(), params);
|
||||
if (new_rs->is_per_table() != ks.get_replication_strategy().is_per_table()) {
|
||||
@@ -83,20 +114,63 @@ void cql3::statements::alter_keyspace_statement::validate(query_processor& qp, c
|
||||
|
||||
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>>
|
||||
cql3::statements::alter_keyspace_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
|
||||
using namespace cql_transport;
|
||||
try {
|
||||
auto old_ksm = qp.db().find_keyspace(_name).metadata();
|
||||
event::schema_change::target_type target_type = event::schema_change::target_type::KEYSPACE;
|
||||
auto ks = qp.db().find_keyspace(_name);
|
||||
auto ks_md = ks.metadata();
|
||||
const auto& tm = *qp.proxy().get_token_metadata_ptr();
|
||||
const auto& feat = qp.proxy().features();
|
||||
auto ks_md_update = _attrs->as_ks_metadata_update(ks_md, tm, feat);
|
||||
std::vector<mutation> muts;
|
||||
std::vector<sstring> warnings;
|
||||
auto ks_options = _attrs->get_all_options_flattened(feat);
|
||||
|
||||
auto m = service::prepare_keyspace_update_announcement(qp.db().real_database(), _attrs->as_ks_metadata_update(old_ksm, tm, feat), ts);
|
||||
// we only want to run the tablets path if there are actually any tablets changes, not only schema changes
|
||||
if (ks.get_replication_strategy().uses_tablets() && !_attrs->get_replication_options().empty()) {
|
||||
if (!qp.topology_global_queue_empty()) {
|
||||
return make_exception_future<std::tuple<::shared_ptr<::cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>>(
|
||||
exceptions::invalid_request_exception("Another global topology request is ongoing, please retry."));
|
||||
}
|
||||
if (_attrs->get_replication_options().contains(ks_prop_defs::REPLICATION_FACTOR_KEY)) {
|
||||
return make_exception_future<std::tuple<::shared_ptr<::cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>>(
|
||||
exceptions::invalid_request_exception("'replication_factor' tag is not allowed when executing ALTER KEYSPACE with tablets, please list the DCs explicitly"));
|
||||
}
|
||||
qp.db().real_database().validate_keyspace_update(*ks_md_update);
|
||||
|
||||
service::topology_mutation_builder builder(ts);
|
||||
builder.set_global_topology_request(service::global_topology_request::keyspace_rf_change);
|
||||
builder.set_global_topology_request_id(this->global_req_id);
|
||||
builder.set_new_keyspace_rf_change_data(_name, ks_options);
|
||||
service::topology_change change{{builder.build()}};
|
||||
|
||||
auto topo_schema = qp.db().find_schema(db::system_keyspace::NAME, db::system_keyspace::TOPOLOGY);
|
||||
boost::transform(change.mutations, std::back_inserter(muts), [topo_schema] (const canonical_mutation& cm) {
|
||||
return cm.to_mutation(topo_schema);
|
||||
});
|
||||
|
||||
service::topology_request_tracking_mutation_builder rtbuilder{utils::UUID{this->global_req_id}};
|
||||
rtbuilder.set("done", false)
|
||||
.set("start_time", db_clock::now());
|
||||
service::topology_change req_change{{rtbuilder.build()}};
|
||||
|
||||
auto topo_req_schema = qp.db().find_schema(db::system_keyspace::NAME, db::system_keyspace::TOPOLOGY_REQUESTS);
|
||||
boost::transform(req_change.mutations, std::back_inserter(muts), [topo_req_schema] (const canonical_mutation& cm) {
|
||||
return cm.to_mutation(topo_req_schema);
|
||||
});
|
||||
|
||||
target_type = event::schema_change::target_type::TABLET_KEYSPACE;
|
||||
} else {
|
||||
auto schema_mutations = service::prepare_keyspace_update_announcement(qp.db().real_database(), ks_md_update, ts);
|
||||
muts.insert(muts.begin(), schema_mutations.begin(), schema_mutations.end());
|
||||
}
|
||||
|
||||
using namespace cql_transport;
|
||||
auto ret = ::make_shared<event::schema_change>(
|
||||
event::schema_change::change_type::UPDATED,
|
||||
event::schema_change::target_type::KEYSPACE,
|
||||
target_type,
|
||||
keyspace());
|
||||
|
||||
return make_ready_future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>>(std::make_tuple(std::move(ret), std::move(m), std::vector<sstring>()));
|
||||
return make_ready_future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>>(std::make_tuple(std::move(ret), std::move(muts), warnings));
|
||||
} catch (data_dictionary::no_such_keyspace& e) {
|
||||
return make_exception_future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>>(exceptions::invalid_request_exception("Unknown keyspace " + _name));
|
||||
}
|
||||
@@ -107,7 +181,6 @@ cql3::statements::alter_keyspace_statement::prepare(data_dictionary::database db
|
||||
return std::make_unique<prepared_statement>(make_shared<alter_keyspace_statement>(*this));
|
||||
}
|
||||
|
||||
static logging::logger mylogger("alter_keyspace");
|
||||
|
||||
future<::shared_ptr<cql_transport::messages::result_message>>
|
||||
cql3::statements::alter_keyspace_statement::execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const {
|
||||
|
||||
@@ -24,7 +24,6 @@ static std::map<sstring, sstring> prepare_options(
|
||||
const sstring& strategy_class,
|
||||
const locator::token_metadata& tm,
|
||||
std::map<sstring, sstring> options,
|
||||
std::optional<unsigned>& initial_tablets,
|
||||
const std::map<sstring, sstring>& old_options = {}) {
|
||||
options.erase(ks_prop_defs::REPLICATION_STRATEGY_CLASS_KEY);
|
||||
|
||||
@@ -72,6 +71,35 @@ static std::map<sstring, sstring> prepare_options(
|
||||
return options;
|
||||
}
|
||||
|
||||
ks_prop_defs::ks_prop_defs(std::map<sstring, sstring> options) {
|
||||
std::map<sstring, sstring> replication_opts, storage_opts, tablets_opts, durable_writes_opts;
|
||||
|
||||
auto read_property_into = [] (auto& map, const sstring& name, const sstring& value, const sstring& tag) {
|
||||
map[name.substr(sstring(tag).size() + 1)] = value;
|
||||
};
|
||||
|
||||
for (const auto& [name, value] : options) {
|
||||
if (name.starts_with(KW_DURABLE_WRITES)) {
|
||||
read_property_into(durable_writes_opts, name, value, KW_DURABLE_WRITES);
|
||||
} else if (name.starts_with(KW_REPLICATION)) {
|
||||
read_property_into(replication_opts, name, value, KW_REPLICATION);
|
||||
} else if (name.starts_with(KW_TABLETS)) {
|
||||
read_property_into(tablets_opts, name, value, KW_TABLETS);
|
||||
} else if (name.starts_with(KW_STORAGE)) {
|
||||
read_property_into(storage_opts, name, value, KW_STORAGE);
|
||||
}
|
||||
}
|
||||
|
||||
if (!replication_opts.empty())
|
||||
add_property(KW_REPLICATION, replication_opts);
|
||||
if (!storage_opts.empty())
|
||||
add_property(KW_STORAGE, storage_opts);
|
||||
if (!tablets_opts.empty())
|
||||
add_property(KW_TABLETS, tablets_opts);
|
||||
if (!durable_writes_opts.empty())
|
||||
add_property(KW_DURABLE_WRITES, durable_writes_opts.begin()->second);
|
||||
}
|
||||
|
||||
void ks_prop_defs::validate() {
|
||||
// Skip validation if the strategy class is already set as it means we've already
|
||||
// prepared (and redoing it would set strategyClass back to null, which we don't want)
|
||||
@@ -134,7 +162,7 @@ std::optional<unsigned> ks_prop_defs::get_initial_tablets(const sstring& strateg
|
||||
assert(!ret.has_value());
|
||||
return ret;
|
||||
} else {
|
||||
throw exceptions::configuration_exception(sstring("Tablets enabled value must be true or false; found ") + it->second);
|
||||
throw exceptions::configuration_exception(sstring("Tablets enabled value must be true or false; found: ") + enabled);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -159,10 +187,30 @@ std::optional<sstring> ks_prop_defs::get_replication_strategy_class() const {
|
||||
return _strategy_class;
|
||||
}
|
||||
|
||||
bool ks_prop_defs::get_durable_writes() const {
|
||||
return get_boolean(KW_DURABLE_WRITES, true);
|
||||
}
|
||||
|
||||
std::map<sstring, sstring> ks_prop_defs::get_all_options_flattened(const gms::feature_service& feat) const {
|
||||
std::map<sstring, sstring> all_options;
|
||||
|
||||
auto ingest_flattened_options = [&all_options](const std::map<sstring, sstring>& options, const sstring& prefix) {
|
||||
for (auto& option: options) {
|
||||
all_options[prefix + ":" + option.first] = option.second;
|
||||
}
|
||||
};
|
||||
ingest_flattened_options(get_replication_options(), KW_REPLICATION);
|
||||
ingest_flattened_options(get_storage_options().to_map(), KW_STORAGE);
|
||||
ingest_flattened_options(get_map(KW_TABLETS).value_or(std::map<sstring, sstring>{}), KW_TABLETS);
|
||||
ingest_flattened_options({{sstring(KW_DURABLE_WRITES), to_sstring(get_boolean(KW_DURABLE_WRITES, true))}}, KW_DURABLE_WRITES);
|
||||
|
||||
return all_options;
|
||||
}
|
||||
|
||||
lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata(sstring ks_name, const locator::token_metadata& tm, const gms::feature_service& feat) {
|
||||
auto sc = get_replication_strategy_class().value();
|
||||
std::optional<unsigned> initial_tablets = get_initial_tablets(sc, feat.tablets);
|
||||
auto options = prepare_options(sc, tm, get_replication_options(), initial_tablets);
|
||||
auto options = prepare_options(sc, tm, get_replication_options());
|
||||
return data_dictionary::keyspace_metadata::new_keyspace(ks_name, sc,
|
||||
std::move(options), initial_tablets, get_boolean(KW_DURABLE_WRITES, true), get_storage_options());
|
||||
}
|
||||
@@ -171,13 +219,14 @@ lw_shared_ptr<data_dictionary::keyspace_metadata> ks_prop_defs::as_ks_metadata_u
|
||||
std::map<sstring, sstring> options;
|
||||
const auto& old_options = old->strategy_options();
|
||||
auto sc = get_replication_strategy_class();
|
||||
std::optional<unsigned> initial_tablets;
|
||||
if (sc) {
|
||||
initial_tablets = get_initial_tablets(*sc, old->initial_tablets().has_value());
|
||||
options = prepare_options(*sc, tm, get_replication_options(), initial_tablets, old_options);
|
||||
options = prepare_options(*sc, tm, get_replication_options(), old_options);
|
||||
} else {
|
||||
sc = old->strategy_name();
|
||||
options = old_options;
|
||||
}
|
||||
std::optional<unsigned> initial_tablets = get_initial_tablets(*sc, old->initial_tablets().has_value());
|
||||
if (!initial_tablets) {
|
||||
initial_tablets = old->initial_tablets();
|
||||
}
|
||||
|
||||
|
||||
@@ -49,11 +49,16 @@ public:
|
||||
private:
|
||||
std::optional<sstring> _strategy_class;
|
||||
public:
|
||||
ks_prop_defs() = default;
|
||||
explicit ks_prop_defs(std::map<sstring, sstring> options);
|
||||
|
||||
void validate();
|
||||
std::map<sstring, sstring> get_replication_options() const;
|
||||
std::optional<sstring> get_replication_strategy_class() const;
|
||||
std::optional<unsigned> get_initial_tablets(const sstring& strategy_class, bool enabled_by_default) const;
|
||||
data_dictionary::storage_options get_storage_options() const;
|
||||
bool get_durable_writes() const;
|
||||
std::map<sstring, sstring> get_all_options_flattened(const gms::feature_service& feat) const;
|
||||
lw_shared_ptr<data_dictionary::keyspace_metadata> as_ks_metadata(sstring ks_name, const locator::token_metadata&, const gms::feature_service&);
|
||||
lw_shared_ptr<data_dictionary::keyspace_metadata> as_ks_metadata_update(lw_shared_ptr<data_dictionary::keyspace_metadata> old, const locator::token_metadata&, const gms::feature_service&);
|
||||
};
|
||||
|
||||
@@ -63,6 +63,7 @@ protected:
|
||||
|
||||
public:
|
||||
virtual future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, std::vector<mutation>, cql3::cql_warnings_vec>> prepare_schema_mutations(query_processor& qp, const query_options& options, api::timestamp_type) const = 0;
|
||||
mutable utils::UUID global_req_id;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
@@ -2,7 +2,6 @@ add_library(db STATIC)
|
||||
target_sources(db
|
||||
PRIVATE
|
||||
consistency_level.cc
|
||||
system_auth_keyspace.cc
|
||||
system_keyspace.cc
|
||||
virtual_table.cc
|
||||
virtual_tables.cc
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#include "gms/feature_service.hh"
|
||||
#include "partition_slice_builder.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
#include "system_auth_keyspace.hh"
|
||||
#include "system_keyspace.hh"
|
||||
#include "query-result-set.hh"
|
||||
#include "query-result-writer.hh"
|
||||
@@ -235,7 +234,6 @@ future<> save_system_schema(cql3::query_processor& qp) {
|
||||
co_await save_system_schema_to_keyspace(qp, schema_tables::NAME);
|
||||
// #2514 - make sure "system" is written to system_schema.keyspaces.
|
||||
co_await save_system_schema_to_keyspace(qp, system_keyspace::NAME);
|
||||
co_await save_system_schema_to_keyspace(qp, system_auth_keyspace::NAME);
|
||||
}
|
||||
|
||||
namespace v3 {
|
||||
|
||||
@@ -1,141 +0,0 @@
|
||||
/*
|
||||
* Modified by ScyllaDB
|
||||
* Copyright (C) 2024-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
|
||||
*/
|
||||
|
||||
#include "system_auth_keyspace.hh"
|
||||
#include "system_keyspace.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
#include "schema/schema_builder.hh"
|
||||
#include "types/set.hh"
|
||||
|
||||
namespace db {
|
||||
|
||||
// all system auth tables use schema commitlog
|
||||
namespace {
|
||||
const auto set_use_schema_commitlog = schema_builder::register_static_configurator([](const sstring& ks_name, const sstring& cf_name, schema_static_props& props) {
|
||||
if (ks_name == system_auth_keyspace::NAME) {
|
||||
props.enable_schema_commitlog();
|
||||
}
|
||||
});
|
||||
} // anonymous namespace
|
||||
|
||||
namespace system_auth_keyspace {
|
||||
|
||||
// use the same gc setting as system_schema tables
|
||||
using days = std::chrono::duration<int, std::ratio<24 * 3600>>;
|
||||
// FIXME: in some cases time-based gc may cause data resurrection,
|
||||
// for more info see https://github.com/scylladb/scylladb/issues/15607
|
||||
static constexpr auto auth_gc_grace = std::chrono::duration_cast<std::chrono::seconds>(days(7)).count();
|
||||
|
||||
schema_ptr roles() {
|
||||
static thread_local auto schema = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, ROLES), NAME, ROLES,
|
||||
// partition key
|
||||
{{"role", utf8_type}},
|
||||
// clustering key
|
||||
{},
|
||||
// regular columns
|
||||
{
|
||||
{"can_login", boolean_type},
|
||||
{"is_superuser", boolean_type},
|
||||
{"member_of", set_type_impl::get_instance(utf8_type, true)},
|
||||
{"salted_hash", utf8_type}
|
||||
},
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type,
|
||||
// comment
|
||||
"roles for authentication and RBAC"
|
||||
);
|
||||
builder.set_gc_grace_seconds(auth_gc_grace);
|
||||
builder.with_version(system_keyspace::generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr role_members() {
|
||||
static thread_local auto schema = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, ROLE_MEMBERS), NAME, ROLE_MEMBERS,
|
||||
// partition key
|
||||
{{"role", utf8_type}},
|
||||
// clustering key
|
||||
{{"member", utf8_type}},
|
||||
// regular columns
|
||||
{},
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type,
|
||||
// comment
|
||||
"joins users and their granted roles in RBAC"
|
||||
);
|
||||
builder.set_gc_grace_seconds(auth_gc_grace);
|
||||
builder.with_version(system_keyspace::generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr role_attributes() {
|
||||
static thread_local auto schema = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, ROLE_ATTRIBUTES), NAME, ROLE_ATTRIBUTES,
|
||||
// partition key
|
||||
{{"role", utf8_type}},
|
||||
// clustering key
|
||||
{{"name", utf8_type}},
|
||||
// regular columns
|
||||
{
|
||||
{"value", utf8_type}
|
||||
},
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type,
|
||||
// comment
|
||||
"role permissions in RBAC"
|
||||
);
|
||||
builder.set_gc_grace_seconds(auth_gc_grace);
|
||||
builder.with_version(system_keyspace::generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr role_permissions() {
|
||||
static thread_local auto schema = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, ROLE_PERMISSIONS), NAME, ROLE_PERMISSIONS,
|
||||
// partition key
|
||||
{{"role", utf8_type}},
|
||||
// clustering key
|
||||
{{"resource", utf8_type}},
|
||||
// regular columns
|
||||
{
|
||||
{"permissions", set_type_impl::get_instance(utf8_type, true)}
|
||||
},
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type,
|
||||
// comment
|
||||
"role permissions for CassandraAuthorizer"
|
||||
);
|
||||
builder.set_gc_grace_seconds(auth_gc_grace);
|
||||
builder.with_version(system_keyspace::generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
std::vector<schema_ptr> all_tables() {
|
||||
return {roles(), role_members(), role_attributes(), role_permissions()};
|
||||
}
|
||||
|
||||
} // namespace system_auth_keyspace
|
||||
} // namespace db
|
||||
@@ -1,38 +0,0 @@
|
||||
/*
|
||||
* Modified by ScyllaDB
|
||||
* Copyright (C) 2024-present ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* SPDX-License-Identifier: (AGPL-3.0-or-later and Apache-2.0)
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include <vector>
|
||||
|
||||
namespace db {
|
||||
|
||||
namespace system_auth_keyspace {
|
||||
enum class version_t: int64_t {
|
||||
v1 = 1,
|
||||
v2 = 2,
|
||||
};
|
||||
static constexpr auto NAME = "system_auth_v2";
|
||||
// tables
|
||||
static constexpr auto ROLES = "roles";
|
||||
static constexpr auto ROLE_MEMBERS = "role_members";
|
||||
static constexpr auto ROLE_ATTRIBUTES = "role_attributes";
|
||||
static constexpr auto ROLE_PERMISSIONS = "role_permissions";
|
||||
|
||||
|
||||
schema_ptr roles();
|
||||
schema_ptr role_members();
|
||||
schema_ptr role_attributes();
|
||||
schema_ptr role_permissions();
|
||||
|
||||
std::vector<schema_ptr> all_tables();
|
||||
}; // namespace system_auth_keyspace
|
||||
|
||||
} // namespace db
|
||||
@@ -18,7 +18,6 @@
|
||||
#include <seastar/core/on_internal_error.hh>
|
||||
#include "system_keyspace.hh"
|
||||
#include "cql3/untyped_result_set.hh"
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "thrift/server.hh"
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "partition_slice_builder.hh"
|
||||
@@ -88,6 +87,10 @@ namespace {
|
||||
system_keyspace::SCYLLA_LOCAL,
|
||||
system_keyspace::COMMITLOG_CLEANUPS,
|
||||
system_keyspace::SERVICE_LEVELS_V2,
|
||||
system_keyspace::ROLES,
|
||||
system_keyspace::ROLE_MEMBERS,
|
||||
system_keyspace::ROLE_ATTRIBUTES,
|
||||
system_keyspace::ROLE_PERMISSIONS,
|
||||
system_keyspace::v3::CDC_LOCAL
|
||||
};
|
||||
if (ks_name == system_keyspace::NAME && tables.contains(cf_name)) {
|
||||
@@ -233,12 +236,15 @@ schema_ptr system_keyspace::topology() {
|
||||
.with_column("request_id", timeuuid_type)
|
||||
.with_column("ignore_nodes", set_type_impl::get_instance(uuid_type, true), column_kind::static_column)
|
||||
.with_column("new_cdc_generation_data_uuid", timeuuid_type, column_kind::static_column)
|
||||
.with_column("new_keyspace_rf_change_ks_name", utf8_type, column_kind::static_column)
|
||||
.with_column("new_keyspace_rf_change_data", map_type_impl::get_instance(utf8_type, utf8_type, false), column_kind::static_column)
|
||||
.with_column("version", long_type, column_kind::static_column)
|
||||
.with_column("fence_version", long_type, column_kind::static_column)
|
||||
.with_column("transition_state", utf8_type, column_kind::static_column)
|
||||
.with_column("committed_cdc_generations", set_type_impl::get_instance(cdc_generation_ts_id_type, true), column_kind::static_column)
|
||||
.with_column("unpublished_cdc_generations", set_type_impl::get_instance(cdc_generation_ts_id_type, true), column_kind::static_column)
|
||||
.with_column("global_topology_request", utf8_type, column_kind::static_column)
|
||||
.with_column("global_topology_request_id", timeuuid_type, column_kind::static_column)
|
||||
.with_column("enabled_features", set_type_impl::get_instance(utf8_type, true), column_kind::static_column)
|
||||
.with_column("session", uuid_type, column_kind::static_column)
|
||||
.with_column("tablet_balancing_enabled", boolean_type, column_kind::static_column)
|
||||
@@ -1139,6 +1145,103 @@ schema_ptr system_keyspace::service_levels_v2() {
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr system_keyspace::roles() {
|
||||
static thread_local auto schema = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, ROLES), NAME, ROLES,
|
||||
// partition key
|
||||
{{"role", utf8_type}},
|
||||
// clustering key
|
||||
{},
|
||||
// regular columns
|
||||
{
|
||||
{"can_login", boolean_type},
|
||||
{"is_superuser", boolean_type},
|
||||
{"member_of", set_type_impl::get_instance(utf8_type, true)},
|
||||
{"salted_hash", utf8_type}
|
||||
},
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type,
|
||||
// comment
|
||||
"roles for authentication and RBAC"
|
||||
);
|
||||
builder.with_version(system_keyspace::generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr system_keyspace::role_members() {
|
||||
static thread_local auto schema = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, ROLE_MEMBERS), NAME, ROLE_MEMBERS,
|
||||
// partition key
|
||||
{{"role", utf8_type}},
|
||||
// clustering key
|
||||
{{"member", utf8_type}},
|
||||
// regular columns
|
||||
{},
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type,
|
||||
// comment
|
||||
"joins users and their granted roles in RBAC"
|
||||
);
|
||||
builder.with_version(system_keyspace::generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr system_keyspace::role_attributes() {
|
||||
static thread_local auto schema = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, ROLE_ATTRIBUTES), NAME, ROLE_ATTRIBUTES,
|
||||
// partition key
|
||||
{{"role", utf8_type}},
|
||||
// clustering key
|
||||
{{"name", utf8_type}},
|
||||
// regular columns
|
||||
{
|
||||
{"value", utf8_type}
|
||||
},
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type,
|
||||
// comment
|
||||
"role permissions in RBAC"
|
||||
);
|
||||
builder.with_version(system_keyspace::generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr system_keyspace::role_permissions() {
|
||||
static thread_local auto schema = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, ROLE_PERMISSIONS), NAME, ROLE_PERMISSIONS,
|
||||
// partition key
|
||||
{{"role", utf8_type}},
|
||||
// clustering key
|
||||
{{"resource", utf8_type}},
|
||||
// regular columns
|
||||
{
|
||||
{"permissions", set_type_impl::get_instance(utf8_type, true)}
|
||||
},
|
||||
// static columns
|
||||
{},
|
||||
// regular column name type
|
||||
utf8_type,
|
||||
// comment
|
||||
"role permissions for CassandraAuthorizer"
|
||||
);
|
||||
builder.with_version(system_keyspace::generate_schema_version(builder.uuid()));
|
||||
return builder.build();
|
||||
}();
|
||||
return schema;
|
||||
}
|
||||
|
||||
schema_ptr system_keyspace::legacy::hints() {
|
||||
static thread_local auto schema = [] {
|
||||
schema_builder builder(generate_legacy_id(NAME, HINTS), NAME, HINTS,
|
||||
@@ -2130,10 +2233,16 @@ future<> system_keyspace::set_bootstrap_state(bootstrap_state state) {
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<schema_ptr> system_keyspace::auth_tables() {
|
||||
return {roles(), role_members(), role_attributes(), role_permissions()};
|
||||
}
|
||||
|
||||
std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
|
||||
std::vector<schema_ptr> r;
|
||||
auto schema_tables = db::schema_tables::all_tables(schema_features::full());
|
||||
std::copy(schema_tables.begin(), schema_tables.end(), std::back_inserter(r));
|
||||
auto auth_tables = system_keyspace::auth_tables();
|
||||
std::copy(auth_tables.begin(), auth_tables.end(), std::back_inserter(r));
|
||||
r.insert(r.end(), { built_indexes(), hints(), batchlog(), paxos(), local(),
|
||||
peers(), peer_events(), range_xfers(),
|
||||
compactions_in_progress(), compaction_history(),
|
||||
@@ -2149,9 +2258,6 @@ std::vector<schema_ptr> system_keyspace::all_tables(const db::config& cfg) {
|
||||
topology(), cdc_generations_v3(), topology_requests(), service_levels_v2(),
|
||||
});
|
||||
|
||||
auto auth_tables = db::system_auth_keyspace::all_tables();
|
||||
std::copy(auth_tables.begin(), auth_tables.end(), std::back_inserter(r));
|
||||
|
||||
if (cfg.check_experimental(db::experimental_features_t::feature::BROADCAST_TABLES)) {
|
||||
r.insert(r.end(), {broadcast_kv_store()});
|
||||
}
|
||||
@@ -2691,17 +2797,17 @@ future<std::optional<mutation>> system_keyspace::get_group0_schema_version() {
|
||||
|
||||
static constexpr auto AUTH_VERSION_KEY = "auth_version";
|
||||
|
||||
future<system_auth_keyspace::version_t> system_keyspace::get_auth_version() {
|
||||
future<system_keyspace::auth_version_t> system_keyspace::get_auth_version() {
|
||||
auto str_opt = co_await get_scylla_local_param(AUTH_VERSION_KEY);
|
||||
if (!str_opt) {
|
||||
co_return db::system_auth_keyspace::version_t::v1;
|
||||
co_return auth_version_t::v1;
|
||||
}
|
||||
auto& str = *str_opt;
|
||||
if (str == "" || str == "1") {
|
||||
co_return db::system_auth_keyspace::version_t::v1;
|
||||
co_return auth_version_t::v1;
|
||||
}
|
||||
if (str == "2") {
|
||||
co_return db::system_auth_keyspace::version_t::v2;
|
||||
co_return auth_version_t::v2;
|
||||
}
|
||||
on_internal_error(slogger, fmt::format("unexpected auth_version in scylla_local got {}", str));
|
||||
}
|
||||
@@ -2719,7 +2825,7 @@ static service::query_state& internal_system_query_state() {
|
||||
return qs;
|
||||
};
|
||||
|
||||
future<mutation> system_keyspace::make_auth_version_mutation(api::timestamp_type ts, db::system_auth_keyspace::version_t version) {
|
||||
future<mutation> system_keyspace::make_auth_version_mutation(api::timestamp_type ts, db::system_keyspace::auth_version_t version) {
|
||||
static sstring query = format("INSERT INTO {}.{} (key, value) VALUES (?, ?);", db::system_keyspace::NAME, db::system_keyspace::SCYLLA_LOCAL);
|
||||
auto muts = co_await _qp.get_mutations_internal(query, internal_system_query_state(), ts, {AUTH_VERSION_KEY, std::to_string(int64_t(version))});
|
||||
if (muts.size() != 1) {
|
||||
@@ -2967,6 +3073,11 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
|
||||
ret.committed_cdc_generations = decode_cdc_generations_ids(deserialize_set_column(*topology(), some_row, "committed_cdc_generations"));
|
||||
}
|
||||
|
||||
if (some_row.has("new_keyspace_rf_change_data")) {
|
||||
ret.new_keyspace_rf_change_ks_name = some_row.get_as<sstring>("new_keyspace_rf_change_ks_name");
|
||||
ret.new_keyspace_rf_change_data = some_row.get_map<sstring,sstring>("new_keyspace_rf_change_data");
|
||||
}
|
||||
|
||||
if (!ret.committed_cdc_generations.empty()) {
|
||||
// Sanity check for CDC generation data consistency.
|
||||
auto gen_id = ret.committed_cdc_generations.back();
|
||||
@@ -2998,6 +3109,10 @@ future<service::topology> system_keyspace::load_topology_state(const std::unorde
|
||||
ret.global_request.emplace(req);
|
||||
}
|
||||
|
||||
if (some_row.has("global_topology_request_id")) {
|
||||
ret.global_request_id = some_row.get_as<utils::UUID>("global_topology_request_id");
|
||||
}
|
||||
|
||||
if (some_row.has("enabled_features")) {
|
||||
ret.enabled_features = decode_features(deserialize_set_column(*topology(), some_row, "enabled_features"));
|
||||
}
|
||||
|
||||
@@ -14,7 +14,6 @@
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "gms/gossiper.hh"
|
||||
#include "schema/schema_fwd.hh"
|
||||
#include "utils/UUID.hh"
|
||||
@@ -180,6 +179,12 @@ public:
|
||||
static constexpr auto TABLETS = "tablets";
|
||||
static constexpr auto SERVICE_LEVELS_V2 = "service_levels_v2";
|
||||
|
||||
// auth
|
||||
static constexpr auto ROLES = "roles";
|
||||
static constexpr auto ROLE_MEMBERS = "role_members";
|
||||
static constexpr auto ROLE_ATTRIBUTES = "role_attributes";
|
||||
static constexpr auto ROLE_PERMISSIONS = "role_permissions";
|
||||
|
||||
struct v3 {
|
||||
static constexpr auto BATCHES = "batches";
|
||||
static constexpr auto PAXOS = "paxos";
|
||||
@@ -267,6 +272,12 @@ public:
|
||||
static schema_ptr tablets();
|
||||
static schema_ptr service_levels_v2();
|
||||
|
||||
// auth
|
||||
static schema_ptr roles();
|
||||
static schema_ptr role_members();
|
||||
static schema_ptr role_attributes();
|
||||
static schema_ptr role_permissions();
|
||||
|
||||
static table_schema_version generate_schema_version(table_id table_id, uint16_t offset = 0);
|
||||
|
||||
future<> build_bootstrap_info();
|
||||
@@ -310,7 +321,9 @@ public:
|
||||
template <typename T>
|
||||
future<std::optional<T>> get_scylla_local_param_as(const sstring& key);
|
||||
|
||||
static std::vector<schema_ptr> auth_tables();
|
||||
static std::vector<schema_ptr> all_tables(const db::config& cfg);
|
||||
|
||||
future<> make(
|
||||
locator::effective_replication_map_factory&,
|
||||
replica::database&);
|
||||
@@ -577,11 +590,16 @@ public:
|
||||
// returns the corresponding mutation. Otherwise returns nullopt.
|
||||
future<std::optional<mutation>> get_group0_schema_version();
|
||||
|
||||
enum class auth_version_t: int64_t {
|
||||
v1 = 1,
|
||||
v2 = 2,
|
||||
};
|
||||
|
||||
// If the `auth_version` key in `system.scylla_local` is present (either live or tombstone),
|
||||
// returns the corresponding mutation. Otherwise returns nullopt.
|
||||
future<std::optional<mutation>> get_auth_version_mutation();
|
||||
future<mutation> make_auth_version_mutation(api::timestamp_type ts, db::system_auth_keyspace::version_t version);
|
||||
future<system_auth_keyspace::version_t> get_auth_version();
|
||||
future<mutation> make_auth_version_mutation(api::timestamp_type ts, auth_version_t version);
|
||||
future<auth_version_t> get_auth_version();
|
||||
|
||||
future<> sstables_registry_create_entry(sstring location, sstring status, sstables::sstable_state state, sstables::entry_descriptor desc);
|
||||
future<> sstables_registry_update_entry_status(sstring location, sstables::generation_type gen, sstring status);
|
||||
|
||||
@@ -1625,25 +1625,26 @@ get_view_natural_endpoint(
|
||||
}
|
||||
}
|
||||
|
||||
auto& view_topology = view_erm->get_token_metadata_ptr()->get_topology();
|
||||
for (auto&& view_endpoint : view_erm->get_replicas(view_token)) {
|
||||
if (use_legacy_self_pairing) {
|
||||
auto it = std::find(base_endpoints.begin(), base_endpoints.end(),
|
||||
view_endpoint);
|
||||
// If this base replica is also one of the view replicas, we use
|
||||
// ourselves as the view replica.
|
||||
if (view_endpoint == me) {
|
||||
if (view_endpoint == me && it != base_endpoints.end()) {
|
||||
return topology.my_address();
|
||||
}
|
||||
// We have to remove any endpoint which is shared between the base
|
||||
// and the view, as it will select itself and throw off the counts
|
||||
// otherwise.
|
||||
auto it = std::find(base_endpoints.begin(), base_endpoints.end(),
|
||||
view_endpoint);
|
||||
if (it != base_endpoints.end()) {
|
||||
base_endpoints.erase(it);
|
||||
} else if (!network_topology || topology.get_datacenter(view_endpoint) == my_datacenter) {
|
||||
} else if (!network_topology || view_topology.get_datacenter(view_endpoint) == my_datacenter) {
|
||||
view_endpoints.push_back(view_endpoint);
|
||||
}
|
||||
} else {
|
||||
if (!network_topology || topology.get_datacenter(view_endpoint) == my_datacenter) {
|
||||
if (!network_topology || view_topology.get_datacenter(view_endpoint) == my_datacenter) {
|
||||
view_endpoints.push_back(view_endpoint);
|
||||
}
|
||||
}
|
||||
@@ -1658,7 +1659,7 @@ get_view_natural_endpoint(
|
||||
return {};
|
||||
}
|
||||
auto replica = view_endpoints[base_it - base_endpoints.begin()];
|
||||
return topology.get_node(replica).endpoint();
|
||||
return view_topology.get_node(replica).endpoint();
|
||||
}
|
||||
|
||||
static future<> apply_to_remote_endpoints(service::storage_proxy& proxy, locator::effective_replication_map_ptr ermp,
|
||||
@@ -1715,6 +1716,7 @@ future<> view_update_generator::mutate_MV(
|
||||
{
|
||||
auto base_ermp = base->table().get_effective_replication_map();
|
||||
static constexpr size_t max_concurrent_updates = 128;
|
||||
co_await utils::get_local_injector().inject("delay_before_get_view_natural_endpoint", 8000ms);
|
||||
co_await max_concurrent_for_each(view_updates, max_concurrent_updates,
|
||||
[this, base_token, &stats, &cf_stats, tr_state, &pending_view_updates, allow_hints, wait_for_all, base_ermp] (frozen_mutation_and_schema mut) mutable -> future<> {
|
||||
auto view_token = dht::get_token(*mut.s, mut.fm.key());
|
||||
|
||||
@@ -85,7 +85,7 @@ redirects: setup
|
||||
# Preview commands
|
||||
.PHONY: preview
|
||||
preview: setup
|
||||
$(POETRY) run sphinx-autobuild -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml --host $(PREVIEW_HOST) --port 5500 --ignore *.csv --ignore *.yaml
|
||||
$(POETRY) run sphinx-autobuild -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml --host $(PREVIEW_HOST) --port 5500 --ignore *.csv --ignore *.json --ignore *.yaml
|
||||
|
||||
.PHONY: multiversionpreview
|
||||
multiversionpreview: multiversion
|
||||
|
||||
@@ -1,23 +1,19 @@
|
||||
import os
|
||||
import re
|
||||
import yaml
|
||||
from typing import Any, Dict, List
|
||||
|
||||
import jinja2
|
||||
|
||||
from sphinx import addnodes
|
||||
from sphinx.application import Sphinx
|
||||
from sphinx.directives import ObjectDescription
|
||||
from sphinx.util import logging, ws_re
|
||||
from sphinx.util.display import status_iterator
|
||||
from sphinx.util.docfields import Field
|
||||
from sphinx.util.docutils import switch_source_input, SphinxDirective
|
||||
from sphinx.util.nodes import make_id, nested_parse_with_titles
|
||||
from sphinx.jinja2glue import BuiltinTemplateLoader
|
||||
from docutils import nodes
|
||||
from docutils.parsers.rst import directives
|
||||
from docutils.statemachine import StringList
|
||||
|
||||
from utils import maybe_add_filters
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
class DBConfigParser:
|
||||
@@ -152,51 +148,6 @@ class DBConfigParser:
|
||||
return DBConfigParser.all_properties[name]
|
||||
|
||||
|
||||
def readable_desc(description: str) -> str:
|
||||
"""
|
||||
This function is deprecated and maintained only for backward compatibility
|
||||
with previous versions. Use ``readable_desc_rst``instead.
|
||||
"""
|
||||
return (
|
||||
description.replace("\\n", "")
|
||||
.replace('<', '<')
|
||||
.replace('>', '>')
|
||||
.replace("\n", "<br>")
|
||||
.replace("\\t", "- ")
|
||||
.replace('"', "")
|
||||
)
|
||||
|
||||
|
||||
def readable_desc_rst(description):
|
||||
indent = ' ' * 3
|
||||
lines = description.split('\n')
|
||||
cleaned_lines = []
|
||||
|
||||
for line in lines:
|
||||
|
||||
cleaned_line = line.replace('\\n', '\n')
|
||||
|
||||
if line.endswith('"'):
|
||||
cleaned_line = cleaned_line[:-1] + ' '
|
||||
|
||||
cleaned_line = cleaned_line.lstrip()
|
||||
cleaned_line = cleaned_line.replace('"', '')
|
||||
|
||||
if cleaned_line != '':
|
||||
cleaned_line = indent + cleaned_line
|
||||
cleaned_lines.append(cleaned_line)
|
||||
|
||||
return ''.join(cleaned_lines)
|
||||
|
||||
|
||||
def maybe_add_filters(builder):
|
||||
env = builder.templates.environment
|
||||
if 'readable_desc' not in env.filters:
|
||||
env.filters['readable_desc'] = readable_desc
|
||||
|
||||
if 'readable_desc_rst' not in env.filters:
|
||||
env.filters['readable_desc_rst'] = readable_desc_rst
|
||||
|
||||
|
||||
class ConfigOption(ObjectDescription):
|
||||
has_content = True
|
||||
|
||||
188
docs/_ext/scylladb_metrics.py
Normal file
188
docs/_ext/scylladb_metrics.py
Normal file
@@ -0,0 +1,188 @@
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
from sphinx import addnodes
|
||||
from sphinx.directives import ObjectDescription
|
||||
from sphinx.util.docfields import Field
|
||||
from sphinx.util.docutils import switch_source_input
|
||||
from sphinx.util.nodes import make_id
|
||||
from sphinx.util import logging, ws_re
|
||||
from docutils.parsers.rst import Directive, directives
|
||||
from docutils.statemachine import StringList
|
||||
from sphinxcontrib.datatemplates.directive import DataTemplateJSON
|
||||
from utils import maybe_add_filters
|
||||
|
||||
sys.path.insert(0, os.path.abspath("../../scripts"))
|
||||
import scripts.get_description as metrics
|
||||
|
||||
LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class MetricsProcessor:
|
||||
|
||||
MARKER = "::description"
|
||||
|
||||
def _create_output_directory(self, app, metrics_directory):
|
||||
output_directory = os.path.join(app.builder.srcdir, metrics_directory)
|
||||
os.makedirs(output_directory, exist_ok=True)
|
||||
return output_directory
|
||||
|
||||
def _process_single_file(self, file_path, destination_path, metrics_config_path):
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
if self.MARKER in content and not os.path.exists(destination_path):
|
||||
try:
|
||||
metrics_file = metrics.get_metrics_from_file(file_path, "scylla", metrics.get_metrics_information(metrics_config_path))
|
||||
with open(destination_path, 'w+', encoding='utf-8') as f:
|
||||
json.dump(metrics_file, f, indent=4)
|
||||
except SystemExit:
|
||||
LOGGER.info(f'Skipping file: {file_path}')
|
||||
except Exception as error:
|
||||
LOGGER.info(error)
|
||||
|
||||
def _process_metrics_files(self, repo_dir, output_directory, metrics_config_path):
|
||||
for root, _, files in os.walk(repo_dir):
|
||||
for file in files:
|
||||
if file.endswith(".cc"):
|
||||
file_path = os.path.join(root, file)
|
||||
file_name = os.path.splitext(file)[0] + ".json"
|
||||
destination_path = os.path.join(output_directory, file_name)
|
||||
self._process_single_file(file_path, destination_path, metrics_config_path)
|
||||
|
||||
def run(self, app, exception=None):
|
||||
repo_dir = os.path.abspath(os.path.join(app.srcdir, ".."))
|
||||
metrics_config_path = os.path.join(repo_dir, app.config.scylladb_metrics_config_path)
|
||||
output_directory = self._create_output_directory(app, app.config.scylladb_metrics_directory)
|
||||
|
||||
self._process_metrics_files(repo_dir, output_directory, metrics_config_path)
|
||||
|
||||
|
||||
class MetricsTemplateDirective(DataTemplateJSON):
|
||||
option_spec = DataTemplateJSON.option_spec.copy()
|
||||
option_spec["title"] = lambda x: x
|
||||
|
||||
def _make_context(self, data, config, env):
|
||||
context = super()._make_context(data, config, env)
|
||||
context["title"] = self.options.get("title")
|
||||
return context
|
||||
|
||||
def run(self):
|
||||
return super().run()
|
||||
|
||||
|
||||
class MetricsOption(ObjectDescription):
|
||||
has_content = True
|
||||
required_arguments = 1
|
||||
optional_arguments = 0
|
||||
final_argument_whitespace = False
|
||||
option_spec = {
|
||||
'type': directives.unchanged,
|
||||
'component': directives.unchanged,
|
||||
'key': directives.unchanged,
|
||||
'source': directives.unchanged,
|
||||
}
|
||||
|
||||
doc_field_types = [
|
||||
Field('type', label='Type', has_arg=False, names=('type',)),
|
||||
Field('component', label='Component', has_arg=False, names=('component',)),
|
||||
Field('key', label='Key', has_arg=False, names=('key',)),
|
||||
Field('source', label='Source', has_arg=False, names=('source',)),
|
||||
]
|
||||
|
||||
def handle_signature(self, sig: str, signode: addnodes.desc_signature):
|
||||
signode.clear()
|
||||
signode += addnodes.desc_name(sig, sig)
|
||||
return ws_re.sub(' ', sig)
|
||||
|
||||
@property
|
||||
def env(self):
|
||||
return self.state.document.settings.env
|
||||
|
||||
def _render(self, name, option_type, component, key, source):
|
||||
item = {'name': name, 'type': option_type, 'component': component, 'key': key, 'source': source }
|
||||
template = self.config.scylladb_metrics_option_template
|
||||
return self.env.app.builder.templates.render(template, item)
|
||||
|
||||
def transform_content(self, contentnode: addnodes.desc_content) -> None:
|
||||
name = self.arguments[0]
|
||||
option_type = self.options.get('type', '')
|
||||
component = self.options.get('component', '')
|
||||
key = self.options.get('key', '')
|
||||
source_file = self.options.get('source', '')
|
||||
_, lineno = self.get_source_info()
|
||||
source = f'scylladb_metrics:{lineno}:<{name}>'
|
||||
fields = StringList(self._render(name, option_type, component, key, source_file).splitlines(), source=source, parent_offset=lineno)
|
||||
with switch_source_input(self.state, fields):
|
||||
self.state.nested_parse(fields, 0, contentnode)
|
||||
|
||||
def add_target_and_index(self, name: str, sig: str, signode: addnodes.desc_signature) -> None:
|
||||
node_id = make_id(self.env, self.state.document, self.objtype, name)
|
||||
signode['ids'].append(node_id)
|
||||
self.state.document.note_explicit_target(signode)
|
||||
entry = f'{name}; metrics option'
|
||||
self.indexnode['entries'].append(('pair', entry, node_id, '', None))
|
||||
self.env.get_domain('std').note_object(self.objtype, name, node_id, location=signode)
|
||||
|
||||
class MetricsDirective(Directive):
|
||||
TEMPLATE = 'metrics.tmpl'
|
||||
required_arguments = 0
|
||||
optional_arguments = 1
|
||||
option_spec = {'template': directives.path}
|
||||
has_content = True
|
||||
|
||||
def _process_file(self, file, relative_path_from_current_rst):
|
||||
data_directive = MetricsTemplateDirective(
|
||||
name=self.name,
|
||||
arguments=[os.path.join(relative_path_from_current_rst, file)],
|
||||
options=self.options,
|
||||
content=self.content,
|
||||
lineno=self.lineno,
|
||||
content_offset=self.content_offset,
|
||||
block_text=self.block_text,
|
||||
state=self.state,
|
||||
state_machine=self.state_machine,
|
||||
)
|
||||
data_directive.options["template"] = self.options.get('template', self.TEMPLATE)
|
||||
data_directive.options["title"] = file.replace('_', ' ').replace('.json','').capitalize()
|
||||
return data_directive.run()
|
||||
|
||||
def _get_relative_path(self, output_directory, app, docname):
|
||||
current_rst_path = os.path.join(app.builder.srcdir, docname + ".rst")
|
||||
return os.path.relpath(output_directory, os.path.dirname(current_rst_path))
|
||||
|
||||
|
||||
def run(self):
|
||||
maybe_add_filters(self.state.document.settings.env.app.builder)
|
||||
app = self.state.document.settings.env.app
|
||||
docname = self.state.document.settings.env.docname
|
||||
metrics_directory = os.path.join(app.builder.srcdir, app.config.scylladb_metrics_directory)
|
||||
output = []
|
||||
try:
|
||||
relative_path_from_current_rst = self._get_relative_path(metrics_directory, app, docname)
|
||||
files = os.listdir(metrics_directory)
|
||||
for _, file in enumerate(files):
|
||||
output.extend(self._process_file(file, relative_path_from_current_rst))
|
||||
except Exception as error:
|
||||
LOGGER.info(error)
|
||||
return output
|
||||
|
||||
def setup(app):
|
||||
app.add_config_value("scylladb_metrics_directory", default="_data/metrics", rebuild="html")
|
||||
app.add_config_value("scylladb_metrics_config_path", default='scripts/metrics-config.yml', rebuild="html")
|
||||
app.add_config_value('scylladb_metrics_option_template', default='metrics_option.tmpl', rebuild='html', types=[str])
|
||||
app.connect("builder-inited", MetricsProcessor().run)
|
||||
app.add_object_type(
|
||||
'metrics_option',
|
||||
'metrics_option',
|
||||
objname='metrics option')
|
||||
app.add_directive_to_domain('std', 'metrics_option', MetricsOption, override=True)
|
||||
app.add_directive("metrics_option", MetricsOption)
|
||||
app.add_directive("scylladb_metrics", MetricsDirective)
|
||||
|
||||
|
||||
return {
|
||||
"version": "0.1",
|
||||
"parallel_read_safe": True,
|
||||
"parallel_write_safe": True,
|
||||
}
|
||||
|
||||
44
docs/_ext/utils.py
Normal file
44
docs/_ext/utils.py
Normal file
@@ -0,0 +1,44 @@
|
||||
def readable_desc(description: str) -> str:
|
||||
"""
|
||||
This function is deprecated and maintained only for backward compatibility
|
||||
with previous versions. Use ``readable_desc_rst``instead.
|
||||
"""
|
||||
return (
|
||||
description.replace("\\n", "")
|
||||
.replace('<', '<')
|
||||
.replace('>', '>')
|
||||
.replace("\n", "<br>")
|
||||
.replace("\\t", "- ")
|
||||
.replace('"', "")
|
||||
)
|
||||
|
||||
|
||||
def readable_desc_rst(description):
|
||||
indent = ' ' * 3
|
||||
lines = description.split('\n')
|
||||
cleaned_lines = []
|
||||
|
||||
for line in lines:
|
||||
|
||||
cleaned_line = line.replace('\\n', '\n')
|
||||
|
||||
if line.endswith('"'):
|
||||
cleaned_line = cleaned_line[:-1] + ' '
|
||||
|
||||
cleaned_line = cleaned_line.lstrip()
|
||||
cleaned_line = cleaned_line.replace('"', '')
|
||||
|
||||
if cleaned_line != '':
|
||||
cleaned_line = indent + cleaned_line
|
||||
cleaned_lines.append(cleaned_line)
|
||||
|
||||
return ''.join(cleaned_lines)
|
||||
|
||||
|
||||
def maybe_add_filters(builder):
|
||||
env = builder.templates.environment
|
||||
if 'readable_desc' not in env.filters:
|
||||
env.filters['readable_desc'] = readable_desc
|
||||
|
||||
if 'readable_desc_rst' not in env.filters:
|
||||
env.filters['readable_desc_rst'] = readable_desc_rst
|
||||
2
docs/_static/css/custom.css
vendored
2
docs/_static/css/custom.css
vendored
@@ -41,6 +41,6 @@ dl dt:hover > a.headerlink {
|
||||
visibility: visible;
|
||||
}
|
||||
|
||||
dl.confval {
|
||||
dl.confval, dl.metrics_option {
|
||||
border-bottom: 1px solid #cacaca;
|
||||
}
|
||||
|
||||
19
docs/_templates/metrics.tmpl
vendored
Normal file
19
docs/_templates/metrics.tmpl
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
.. -*- mode: rst -*-
|
||||
|
||||
{{title}}
|
||||
{{ '-' * title|length }}
|
||||
|
||||
{% if data %}
|
||||
{% for key, value in data.items() %}
|
||||
.. _metricsprop_{{ key }}:
|
||||
|
||||
.. metrics_option:: {{ key }}
|
||||
:type: {{value[0]}}
|
||||
:source: {{value[4]}}
|
||||
:component: {{value[2]}}
|
||||
:key: {{value[3]}}
|
||||
|
||||
{{value[1] | readable_desc_rst}}
|
||||
|
||||
{% endfor %}
|
||||
{% endif %}
|
||||
3
docs/_templates/metrics_option.tmpl
vendored
Normal file
3
docs/_templates/metrics_option.tmpl
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
{% if type %}* **Type:** ``{{ type }}``{% endif %}
|
||||
{% if component %}* **Component:** ``{{ component }}``{% endif %}
|
||||
{% if key %}* **Key:** ``{{ key }}``{% endif %}
|
||||
@@ -21,6 +21,9 @@
|
||||
# remove the Open Source vs. Enterprise Matrix from the Open Source docs
|
||||
|
||||
/stable/reference/versions-matrix-enterprise-oss.html: https://enterprise.docs.scylladb.com/stable/reference/versions-matrix-enterprise-oss.html
|
||||
# Remove the outdated Troubleshooting article
|
||||
|
||||
/stable/troubleshooting/error-messages/create-mv.html: /stable/troubleshooting/index.html
|
||||
|
||||
# Remove the Learn page (replaced with a link to a page in a different repo)
|
||||
|
||||
|
||||
@@ -117,9 +117,9 @@ request. Alternator can then validate the authenticity and authorization of
|
||||
each request using a known list of authorized key pairs.
|
||||
|
||||
In the current implementation, the user stores the list of allowed key pairs
|
||||
in the `system_auth_v2.roles` table: The access key ID is the `role` column, and
|
||||
in the `system.roles` table: The access key ID is the `role` column, and
|
||||
the secret key is the `salted_hash`, i.e., the secret key can be found by
|
||||
`SELECT salted_hash from system_auth_v2.roles WHERE role = ID;`.
|
||||
`SELECT salted_hash from system.roles WHERE role = ID;`.
|
||||
|
||||
<!--- REMOVE IN FUTURE VERSIONS - Remove the note below in version 6.1 -->
|
||||
|
||||
|
||||
BIN
docs/architecture/images/tablets-cluster.png
Normal file
BIN
docs/architecture/images/tablets-cluster.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 18 KiB |
BIN
docs/architecture/images/tablets-load-balancing.png
Normal file
BIN
docs/architecture/images/tablets-load-balancing.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 21 KiB |
@@ -4,6 +4,7 @@ ScyllaDB Architecture
|
||||
:titlesonly:
|
||||
:hidden:
|
||||
|
||||
Data Distribution with Tablets </architecture/tablets>
|
||||
ScyllaDB Ring Architecture <ringarchitecture/index/>
|
||||
ScyllaDB Fault Tolerance <architecture-fault-tolerance>
|
||||
Consistency Level Console Demo <console-CL-full-demo>
|
||||
@@ -13,6 +14,7 @@ ScyllaDB Architecture
|
||||
Raft Consensus Algorithm in ScyllaDB </architecture/raft>
|
||||
|
||||
|
||||
* :doc:`Data Distribution with Tablets </architecture/tablets/>` - Tablets in ScyllaDB
|
||||
* :doc:`ScyllaDB Ring Architecture </architecture/ringarchitecture/index/>` - High-Level view of ScyllaDB Ring Architecture
|
||||
* :doc:`ScyllaDB Fault Tolerance </architecture/architecture-fault-tolerance>` - Deep dive into ScyllaDB Fault Tolerance
|
||||
* :doc:`Consistency Level Console Demo </architecture/console-CL-full-demo>` - Console Demos of Consistency Level Settings
|
||||
|
||||
128
docs/architecture/tablets.rst
Normal file
128
docs/architecture/tablets.rst
Normal file
@@ -0,0 +1,128 @@
|
||||
=========================================
|
||||
Data Distribution with Tablets
|
||||
=========================================
|
||||
|
||||
A ScyllaDB cluster is a group of interconnected nodes. The data of the entire
|
||||
cluster has to be distributed as evenly as possible across those nodes.
|
||||
|
||||
ScyllaDB is designed to ensure a balanced distribution of data by storing data
|
||||
in tablets. When you add or remove nodes to scale your cluster, add or remove
|
||||
a datacenter, or replace a node, tablets are moved between the nodes to keep
|
||||
the same number on each node. In addition, tablets are balanced across shards
|
||||
in each node.
|
||||
|
||||
This article explains the concept of tablets and how they let you scale your
|
||||
cluster quickly and seamlessly.
|
||||
|
||||
Data Distribution
|
||||
-------------------
|
||||
|
||||
ScyllaDB distributes data by splitting tables into tablets. Each tablet has
|
||||
its replicas on different nodes, depending on the RF (replication factor). Each
|
||||
partition of a table is mapped to a single tablet in a deterministic way. When you
|
||||
query or update the data, ScyllaDB can quickly identify the tablet that stores
|
||||
the relevant partition.
|
||||
|
||||
The following example shows a 3-node cluster with a replication factor (RF) of
|
||||
3. The data is stored in a table (Table 1) with two rows. Both rows are mapped
|
||||
to one tablet (T1) with replicas on all three nodes.
|
||||
|
||||
.. image:: images/tablets-cluster.png
|
||||
|
||||
.. TODO - Add a section about tablet splitting when there are more triggers,
|
||||
like throughput. In 6.0, tablets only split when reaching a threshold size
|
||||
(the threshold is based on the average tablet data size).
|
||||
|
||||
Load Balancing
|
||||
==================
|
||||
|
||||
ScyllaDB autonomously moves tablets to balance the load. This process
|
||||
is managed by a load balancer mechanism and happens independently of
|
||||
the administrator. The tablet load balancer decides where to migrate
|
||||
the tablets, either within the same node to balance the shards or across
|
||||
the nodes to balance the global load in the cluster.
|
||||
|
||||
As a table grows, each tablet can split into two, creating a new tablet.
|
||||
The load balancer can migrate the split halves independently to different nodes
|
||||
or shards.
|
||||
|
||||
The load-balancing process takes place in the background and is performed
|
||||
without any service interruption.
|
||||
|
||||
Scaling Out
|
||||
=============
|
||||
|
||||
A tablet can be dynamically migrated to an existing node or a newly added
|
||||
empty node. Paired with consistent topology updates with Raft, tablets allow
|
||||
you to add multiple nodes simultaneously. After nodes are added to the cluster,
|
||||
existing nodes stream data to the new ones, and the system load eventually
|
||||
converges to an even distribution as the process completes.
|
||||
|
||||
With tablets enabled, manual cleanup is not required.
|
||||
Cleanup is performed automatically per tablet,
|
||||
making tablets-based streaming user-independent and safer.
|
||||
|
||||
In addition, tablet cleanup is lightweight and efficient, as it doesn't
|
||||
involve rewriting SStables on the existing nodes, which makes data ownership
|
||||
changes faster. This dramatically reduces
|
||||
the impact of cleanup on the performance of user queries.
|
||||
|
||||
The following diagrams show migrating tablets from heavily loaded nodes A and B
|
||||
to a new node.
|
||||
|
||||
.. image:: images/tablets-load-balancing.png
|
||||
|
||||
.. _tablets-enable-tablets:
|
||||
|
||||
Enabling Tablets
|
||||
-------------------
|
||||
|
||||
Tablets are enabled or disabled on the keyspace level. When you create a new
|
||||
keyspace, tablets are enabled by default.
|
||||
``NetworkTopologyStrategy``, recommended for all keyspaces,
|
||||
is *required* when creating a keyspace with tablets enabled.
|
||||
|
||||
You can create a keyspace with tablets
|
||||
disabled with the ``tablets = {'enabled': false}`` option:
|
||||
|
||||
.. code:: cql
|
||||
|
||||
CREATE KEYSPACE my_keyspace
|
||||
WITH replication = {
|
||||
'class': 'NetworkTopologyStrategy',
|
||||
'replication_factor': 3,
|
||||
} AND tablets = {
|
||||
'enabled': false
|
||||
};
|
||||
|
||||
|
||||
|
||||
.. warning::
|
||||
|
||||
You cannot ALTER a keyspace to enable or disable tablets.
|
||||
The only way to update the tablet support for a keyspace is to DROP it
|
||||
(losing the schema and data) and then recreate it after redefining
|
||||
the keyspace schema with ``tablets = { 'enabled': false }`` or
|
||||
``tablets = { 'enabled': true }``.
|
||||
|
||||
Limitations and Unsupported Features
|
||||
--------------------------------------
|
||||
|
||||
The following ScyllaDB features are not supported if a keyspace has tablets
|
||||
enabled:
|
||||
|
||||
* Counters
|
||||
* Change Data Capture (CDC)
|
||||
* Lightweight Transactions (LWT)
|
||||
* Alternator (as it uses LWT)
|
||||
|
||||
If you plan to use any of the above features, CREATE your keyspace
|
||||
:ref:`with tablets disabled <tablets-enable-tablets>`.
|
||||
|
||||
Resharding in keyspaces with tablets enabled has the following limitations:
|
||||
|
||||
* ScyllaDB does not support reducing the number of shards after node restart.
|
||||
* ScyllaDB does not reshard data on node restart. Tablet replicas remain
|
||||
allocated to the old shards on restart and are subject to background
|
||||
load-balancing to additional shards after restart completes and the node
|
||||
starts serving CQL.
|
||||
@@ -44,7 +44,8 @@ extensions = [
|
||||
"scylladb_gcp_images",
|
||||
"scylladb_include_flag",
|
||||
"scylladb_dynamic_substitutions",
|
||||
"scylladb_swagger"
|
||||
"scylladb_swagger",
|
||||
"scylladb_metrics"
|
||||
]
|
||||
|
||||
# The suffix(es) of source filenames.
|
||||
@@ -127,6 +128,10 @@ scylladb_swagger_origin_api = "../api"
|
||||
scylladb_swagger_template = "swagger.tmpl"
|
||||
scylladb_swagger_inc_template = "swagger_inc.tmpl"
|
||||
|
||||
# -- Options for scylladb_metrics
|
||||
scylladb_metrics_directory = "_data/opensource/metrics"
|
||||
|
||||
|
||||
# -- Options for HTML output
|
||||
|
||||
# The theme to use for pages.
|
||||
|
||||
@@ -107,12 +107,6 @@ For example:
|
||||
WITH replication = {'class': 'NetworkTopologyStrategy', 'DC1' : 1, 'DC2' : 3}
|
||||
AND durable_writes = true;
|
||||
|
||||
.. TODO Add a link to the description of minimum_keyspace_rf when the ScyllaDB options section is added to the docs.
|
||||
|
||||
You can configure the minimum acceptable replication factor using the ``minimum_keyspace_rf`` option.
|
||||
Attempting to create a keyspace with a replication factor lower than the value set with
|
||||
``minimum_keyspace_rf`` will return an error (the default value is 0).
|
||||
|
||||
The supported ``options`` are:
|
||||
|
||||
=================== ========== =========== ========= ===================================================================
|
||||
@@ -142,7 +136,12 @@ query latency. For a production ready strategy, see *NetworkTopologyStrategy* .
|
||||
========================= ====== ======= =============================================
|
||||
sub-option type since description
|
||||
========================= ====== ======= =============================================
|
||||
``'replication_factor'`` int all The number of replicas to store per range
|
||||
``'replication_factor'`` int all The number of replicas to store per range.
|
||||
|
||||
The replication factor should be equal to
|
||||
or lower than the number of nodes.
|
||||
Configuring a higher RF may prevent
|
||||
creating tables in that keyspace.
|
||||
========================= ====== ======= =============================================
|
||||
|
||||
.. note:: Using NetworkTopologyStrategy is recommended. Using SimpleStrategy will make it harder to add Data Center in the future.
|
||||
@@ -166,6 +165,11 @@ sub-option type description
|
||||
definitions or explicit datacenter settings.
|
||||
For example, to have three replicas per
|
||||
datacenter, supply this with a value of 3.
|
||||
|
||||
The replication factor configured for a DC
|
||||
should be equal to or lower than the number
|
||||
of nodes in that DC. Configuring a higher RF
|
||||
may prevent creating tables in that keyspace.
|
||||
===================================== ====== =============================================
|
||||
|
||||
Note that when ``ALTER`` ing keyspaces and supplying ``replication_factor``,
|
||||
@@ -289,6 +293,17 @@ For instance::
|
||||
|
||||
The supported options are the same as :ref:`creating a keyspace <create-keyspace-statement>`.
|
||||
|
||||
ALTER KEYSPACE with Tablets :label-caution:`Experimental`
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Modifying a keyspace with tablets enabled is possible and doesn't require any special CQL syntax. However, there are some limitations:
|
||||
|
||||
- The replication factor (RF) can be increased or decreased by at most 1 at a time. To reach the desired RF value, modify the RF repeatedly.
|
||||
- The ``ALTER`` statement rejects the ``replication_factor`` tag. List the DCs explicitly when altering a keyspace. See :ref:`NetworkTopologyStrategy <replication-strategy>`.
|
||||
- If there's any other ongoing global topology operation, executing the ``ALTER`` statement will fail (with an explicit and specific error) and needs to be repeated.
|
||||
- The ``ALTER`` statement may take longer than the regular query timeout, and even if it times out, it will continue to execute in the background.
|
||||
- The replication strategy cannot be modified, as keyspaces with tablets only support ``NetworkTopologyStrategy``.
|
||||
|
||||
.. _drop-keyspace-statement:
|
||||
|
||||
DROP KEYSPACE
|
||||
|
||||
@@ -341,7 +341,7 @@ The `--authenticator` command lines option allows to provide the authenticator c
|
||||
|
||||
#### `--authorizer AUTHORIZER`
|
||||
|
||||
The `--authorizer` command lines option allows to provide the authorizer class ScyllaDB will use. By default ScyllaDB uses the `AllowAllAuthorizer` which allows any action to any user. The second option is using the `CassandraAuthorizer` parameter, which stores permissions in `system_auth_v2.permissions` table.
|
||||
The `--authorizer` command lines option allows to provide the authorizer class ScyllaDB will use. By default ScyllaDB uses the `AllowAllAuthorizer` which allows any action to any user. The second option is using the `CassandraAuthorizer` parameter, which stores permissions in `system.permissions` table.
|
||||
|
||||
**Since: 2.3**
|
||||
|
||||
|
||||
@@ -6,7 +6,7 @@ There are two system tables that are used to facilitate the service level featur
|
||||
### Service Level Attachment Table
|
||||
|
||||
```
|
||||
CREATE TABLE system_auth_v2.role_attributes (
|
||||
CREATE TABLE system.role_attributes (
|
||||
role text,
|
||||
attribute_name text,
|
||||
attribute_value text,
|
||||
@@ -23,7 +23,7 @@ So for example in order to find out which `service_level` is attached to role `r
|
||||
one can run the following query:
|
||||
|
||||
```
|
||||
SELECT * FROM system_auth_v2.role_attributes WHERE role='r' and attribute_name='service_level'
|
||||
SELECT * FROM system.role_attributes WHERE role='r' and attribute_name='service_level'
|
||||
|
||||
```
|
||||
|
||||
@@ -157,4 +157,4 @@ The command displays a table with: option name, effective service level the valu
|
||||
----------------------+-------------------------+-------------
|
||||
workload_type | sl2 | batch
|
||||
timeout | sl1 | 2s
|
||||
```
|
||||
```
|
||||
|
||||
@@ -549,7 +549,10 @@ CREATE TABLE system.topology (
|
||||
committed_cdc_generations set<tuple<timestamp, timeuuid>> static,
|
||||
unpublished_cdc_generations set<tuple<timestamp, timeuuid>> static,
|
||||
global_topology_request text static,
|
||||
global_topology_request_id timeuuid static,
|
||||
new_cdc_generation_data_uuid timeuuid static,
|
||||
new_keyspace_rf_change_ks_name text static,
|
||||
new_keyspace_rf_change_data frozen<map<text, text>> static,
|
||||
PRIMARY KEY (key, host_id)
|
||||
)
|
||||
```
|
||||
@@ -575,8 +578,11 @@ There are also a few static columns for cluster-global properties:
|
||||
- `committed_cdc_generations` - the IDs of the committed CDC generations
|
||||
- `unpublished_cdc_generations` - the IDs of the committed yet unpublished CDC generations
|
||||
- `global_topology_request` - if set, contains one of the supported global topology requests
|
||||
- `global_topology_request_id` - if set, contains global topology request's id, which is a new group0's state id
|
||||
- `new_cdc_generation_data_uuid` - used in `commit_cdc_generation` state, the time UUID of the generation to be committed
|
||||
- `upgrade_state` - describes the progress of the upgrade to raft-based topology.
|
||||
- 'new_keyspace_rf_change_ks_name' - the name of the KS that is being the target of the scheduled ALTER KS statement
|
||||
- 'new_keyspace_rf_change_data' - the KS options to be used when executing the scheduled ALTER KS statement
|
||||
|
||||
# Join procedure
|
||||
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
You can `build ScyllaDB from source <https://github.com/scylladb/scylladb#build-prerequisites>`_ on other x86_64 or aarch64 platforms, without any guarantees.
|
||||
|
||||
+----------------------------+-------------+---------------+---------------+
|
||||
| Linux Distributions |Ubuntu | Debian | Rocky / |
|
||||
| | | | RHEL |
|
||||
+----------------------------+------+------+-------+-------+-------+-------+
|
||||
| ScyllaDB Version / Version |20.04 |22.04 | 10 | 11 | 8 | 9 |
|
||||
+============================+======+======+=======+=======+=======+=======+
|
||||
| 6.0 | |v| | |v| | |v| | |v| | |v| | |v| |
|
||||
+----------------------------+------+------+-------+-------+-------+-------+
|
||||
| 5.4 | |v| | |v| | |v| | |v| | |v| | |v| |
|
||||
+----------------------------+------+------+-------+-------+-------+-------+
|
||||
+----------------------------+--------------------+---------------+---------------+
|
||||
| Linux Distributions |Ubuntu | Debian | Rocky / |
|
||||
| | | | RHEL |
|
||||
+----------------------------+------+------+------+-------+-------+-------+-------+
|
||||
| ScyllaDB Version / Version |20.04 |22.04 |24.04 | 10 | 11 | 8 | 9 |
|
||||
+============================+======+======+======+=======+=======+=======+=======+
|
||||
| 6.0 | |v| | |v| | |v| | |v| | |v| | |v| | |v| |
|
||||
+----------------------------+------+------+------+-------+-------+-------+-------+
|
||||
| 5.4 | |v| | |v| | |x| | |v| | |v| | |v| | |v| |
|
||||
+----------------------------+------+------+------+-------+-------+-------+-------+
|
||||
|
||||
* The recommended OS for ScyllaDB Open Source is Ubuntu 22.04.
|
||||
* All releases are available as a Docker container and EC2 AMI, GCP, and Azure images.
|
||||
|
||||
@@ -3,16 +3,31 @@ nodetool decommission
|
||||
|
||||
**decommission** - Deactivate a selected node by streaming its data to the next node in the ring.
|
||||
|
||||
.. note::
|
||||
|
||||
You cannot decomission a node if any existing node is down.
|
||||
|
||||
For example:
|
||||
|
||||
``nodetool decommission``
|
||||
|
||||
.. include:: /operating-scylla/_common/decommission_warning.rst
|
||||
|
||||
Use the ``nodetool netstats`` command to monitor the progress of the token reallocation.
|
||||
|
||||
.. note::
|
||||
|
||||
You cannot decomission a node if any existing node is down.
|
||||
|
||||
See :doc:`Remove a Node from a ScyllaDB Cluster (Down Scale) </operating-scylla/procedures/cluster-management/remove-node>`
|
||||
for procedure details.
|
||||
|
||||
Before you run ``nodetool decommission``:
|
||||
|
||||
* Review current disk space utilization on existing nodes and make sure the amount
|
||||
of data streamed from the node being removed can fit into the disk space available
|
||||
on the remaining nodes. If there is not enough disk space on the remaining nodes,
|
||||
the removal of a node will fail. Add more storage to remaining nodes **before**
|
||||
starting the removal procedure.
|
||||
* Make sure that the number of nodes remaining in the DC after you decommission a node
|
||||
will be the same or higher than the Replication Factor configured for the keyspace
|
||||
in this DC. If the number of remaining nodes is lower than the RF, the decommission
|
||||
request may fail.
|
||||
In such a case, ALTER the keyspace to reduce the RF before running ``nodetool decommission``.
|
||||
|
||||
|
||||
.. include:: nodetool-index.rst
|
||||
|
||||
@@ -2,14 +2,28 @@ Nodetool describering
|
||||
=====================
|
||||
|
||||
**describering** - :code:`<keyspace>`- Shows the partition ranges of a given keyspace.
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
nodetool describering nba
|
||||
|
||||
Example output (for three node cluster on AWS):
|
||||
If :doc:`tablets </architecture/tablets>` are enabled for your keyspace, you
|
||||
need to additionally specify the table name. The command will display the ring
|
||||
of the table.
|
||||
|
||||
.. code:: shell
|
||||
|
||||
nodetool describering <keyspace> <table>
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
nodetool describering nba player_name
|
||||
|
||||
|
||||
Example output (for a three-node cluster on AWS with tablets disabled):
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
|
||||
@@ -21,9 +21,16 @@ is removed from the cluster or replaced.
|
||||
Prerequisites
|
||||
------------------------
|
||||
|
||||
Using ``removenode`` requires at least a quorum of nodes in a cluster to be available.
|
||||
If the quorum is lost, it must be restored before you change the cluster topology.
|
||||
See :doc:`Handling Node Failures </troubleshooting/handling-node-failures>` for details.
|
||||
* Using ``removenode`` requires at least a quorum of nodes in a cluster to be available.
|
||||
If the quorum is lost, it must be restored before you change the cluster topology.
|
||||
See :doc:`Handling Node Failures </troubleshooting/handling-node-failures>` for details.
|
||||
|
||||
* Make sure that the number of nodes remaining in the DC after you remove a node
|
||||
will be the same or higher than the Replication Factor configured for the keyspace
|
||||
in this DC. If the number of remaining nodes is lower than the RF, the removenode
|
||||
request may fail. In such a case, you should follow the procedure to
|
||||
:doc:`replace a dead node </operating-scylla/procedures/cluster-management/replace-dead-node>`
|
||||
instead of running ``nodetool removenode``.
|
||||
|
||||
Usage
|
||||
--------
|
||||
|
||||
@@ -17,8 +17,8 @@ limitations while applying the procedure:
|
||||
retry, or the node refuses to boot on subsequent attempts, consult the
|
||||
:doc:`Handling Membership Change Failures </operating-scylla/procedures/cluster-management/handling-membership-change-failures>`
|
||||
document.
|
||||
* The ``system_auth`` keyspace has not been upgraded to ``system_auth_v2``.
|
||||
* The ``system_auth`` keyspace has not been upgraded to ``system``.
|
||||
As a result, if ``authenticator`` is set to ``PasswordAuthenticator``, you must
|
||||
increase the replication factor of the ``system_auth`` keyspace. It is
|
||||
recommended to set ``system_auth`` replication factor to the number of nodes
|
||||
in each DC.
|
||||
in each DC.
|
||||
|
||||
@@ -156,7 +156,9 @@ Add New DC
|
||||
UN 54.160.174.243 109.54 KB 256 ? c7686ffd-7a5b-4124-858e-df2e61130aaa RACK1
|
||||
UN 54.235.9.159 109.75 KB 256 ? 39798227-9f6f-4868-8193-08570856c09a RACK1
|
||||
UN 54.146.228.25 128.33 KB 256 ? 7a4957a1-9590-4434-9746-9c8a6f796a0c RACK1
|
||||
|
||||
|
||||
.. TODO possibly provide additional information WRT how ALTER works with tablets
|
||||
|
||||
#. When all nodes are up and running ``ALTER`` the following Keyspaces in the new nodes:
|
||||
|
||||
* Keyspace created by the user (which needed to replicate to the new DC).
|
||||
|
||||
@@ -70,11 +70,46 @@ Step One: Determining Host IDs of Ghost Members
|
||||
If you cannot determine the ghost members' host ID using the suggestions above, use the method described below.
|
||||
|
||||
#. Make sure there are no ongoing membership changes.
|
||||
#. Execute the following CQL query on one of your nodes to obtain the host IDs of all token ring members:
|
||||
|
||||
#. Execute the following CQL query on one of your nodes to retrieve the Raft group 0 ID:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
select peer, host_id, up from system.cluster_status;
|
||||
select value from system.scylla_local where key = 'raft_group0_id'
|
||||
|
||||
For example:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
cqlsh> select value from system.scylla_local where key = 'raft_group0_id';
|
||||
|
||||
value
|
||||
--------------------------------------
|
||||
607fef80-c276-11ed-a6f6-3075f294cc65
|
||||
|
||||
#. Use the obtained Raft group 0 ID to query the set of all cluster members' host IDs (which includes the ghost members), by executing the following query:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
select server_id from system.raft_state where group_id = <group0_id>
|
||||
|
||||
replace ``<group0_id>`` with the group 0 ID that you obtained. For example:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
cqlsh> select server_id from system.raft_state where group_id = 607fef80-c276-11ed-a6f6-3075f294cc65;
|
||||
|
||||
server_id
|
||||
--------------------------------------
|
||||
26a9badc-6e96-4b86-a8df-5173e5ab47fe
|
||||
7991e7f5-692e-45a0-8ae5-438be5bc7c4f
|
||||
aff11c6d-fbe7-4395-b7ca-3912d7dba2c6
|
||||
|
||||
#. Execute the following CQL query to obtain the host IDs of all token ring members:
|
||||
|
||||
.. code-block:: cql
|
||||
|
||||
select host_id, up from system.cluster_status;
|
||||
|
||||
For example:
|
||||
|
||||
@@ -83,25 +118,28 @@ If you cannot determine the ghost members' host ID using the suggestions above,
|
||||
cqlsh> select peer, host_id, up from system.cluster_status;
|
||||
|
||||
peer | host_id | up
|
||||
-----------+--------------------------------------+-------
|
||||
127.0.0.3 | 42405b3b-487e-4759-8590-ddb9bdcebdc5 | False
|
||||
127.0.0.1 | 4e3ee715-528f-4dc9-b10f-7cf294655a9e | True
|
||||
127.0.0.2 | 225a80d0-633d-45d2-afeb-a5fa422c9bd5 | True
|
||||
-----------+--------------------------------------+-------
|
||||
127.0.0.3 | null | False
|
||||
127.0.0.1 | 26a9badc-6e96-4b86-a8df-5173e5ab47fe | True
|
||||
127.0.0.2 | 7991e7f5-692e-45a0-8ae5-438be5bc7c4f | True
|
||||
|
||||
The output of this query is similar to the output of ``nodetool status``.
|
||||
|
||||
We included the ``up`` column to see which nodes are down.
|
||||
We included the ``up`` column to see which nodes are down and the ``peer`` column to see their IP addresses.
|
||||
|
||||
In this example, one of the 3 nodes tried to decommission but crashed while it was leaving the token ring. The node is in a partially left state and will refuse to restart, but other nodes still consider it as a normal member. We'll have to use ``removenode`` to clean up after it.
|
||||
In this example, one of the nodes tried to decommission and crashed as soon as it left the token ring but before it left the Raft group. Its entry will show up in ``system.cluster_status`` queries with ``host_id = null``, like above, until the cluster is restarted.
|
||||
|
||||
#. A host ID belongs to a ghost member if it appears in the ``system.cluster_status`` query but does not correspond to any remaining node in your cluster.
|
||||
#. A host ID belongs to a ghost member if:
|
||||
|
||||
* It appears in the ``system.raft_state`` query but not in the ``system.cluster_status`` query,
|
||||
* Or it appears in the ``system.cluster_status`` query but does not correspond to any remaining node in your cluster.
|
||||
|
||||
In our example, the ghost member's host ID was ``aff11c6d-fbe7-4395-b7ca-3912d7dba2c6`` because it appeared in the ``system.raft_state`` query but not in the ``system.cluster_status`` query.
|
||||
|
||||
If you're unsure whether a given row in the ``system.cluster_status`` query corresponds to a node in your cluster, you can connect to each node in the cluster and execute ``select host_id from system.local`` (or search the node's logs) to obtain that node's host ID, collecting the host IDs of all nodes in your cluster. Then check if each host ID from the ``system.cluster_status`` query appears in your collected set; if not, it's a ghost member.
|
||||
|
||||
A good rule of thumb is to look at the members marked as down (``up = False`` in ``system.cluster_status``) - ghost members are eventually marked as down by the remaining members of the cluster. But remember that a real member might also be marked as down if it was shutdown or partitioned away from the rest of the cluster. If in doubt, connect to each node and collect their host IDs, as described in the previous paragraph.
|
||||
|
||||
In our example, the ghost member's host ID is ``42405b3b-487e-4759-8590-ddb9bdcebdc5`` because it is the only member marked as down and we can verify that the other two rows appearing in ``system.cluster_status`` belong to the remaining 2 nodes in the cluster.
|
||||
|
||||
In some cases, even after a failed topology change, there may be no ghost members left - for example, if a bootstrapping node crashed very early in the procedure or a decommissioning node crashed after it committed the membership change but before it finalized its own shutdown steps.
|
||||
|
||||
If any ghost members are present, proceed to the next step.
|
||||
|
||||
6
docs/reference/metrics.rst
Normal file
6
docs/reference/metrics.rst
Normal file
@@ -0,0 +1,6 @@
|
||||
==============
|
||||
Metrics (BETA)
|
||||
==============
|
||||
|
||||
.. scylladb_metrics::
|
||||
:template: metrics.tmpl
|
||||
@@ -1,95 +0,0 @@
|
||||
A Removed Node was not Removed Properly from the Seed Node List
|
||||
===============================================================
|
||||
|
||||
Phenonoma
|
||||
^^^^^^^^^
|
||||
|
||||
Failed to create :doc:`materialized view </cql/mv>` after node was removed from the cluster.
|
||||
|
||||
|
||||
Error message:
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
InvalidRequest: Error from server: code=2200 [Invalid query] message="Can't create materialized views until the whole cluster has been upgraded"
|
||||
|
||||
Problem
|
||||
^^^^^^^
|
||||
|
||||
A removed node was not removed properly from the seed node list.
|
||||
|
||||
Scylla Open Source 4.3 and later and Scylla Enterprise 2021.1 and later are seedless. See :doc:`Scylla Seed Nodes </kb/seed-nodes/>` for details.
|
||||
This problem may occur in an earlier version of Scylla.
|
||||
|
||||
How to Verify
|
||||
^^^^^^^^^^^^^
|
||||
|
||||
Scylla logs show the error message above.
|
||||
|
||||
To verify that the node wasn't remove properly use the :doc:`nodetool gossipinfo </operating-scylla/nodetool-commands/gossipinfo>` command
|
||||
|
||||
For example:
|
||||
|
||||
A three nodes cluster, with one node (54.62.0.101) removed.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
nodetool gossipinfo
|
||||
|
||||
/54.62.0.99
|
||||
generation:1172279348
|
||||
heartbeat:7212
|
||||
LOAD:2.0293227179E10
|
||||
INTERNAL_IP:10.240.0.83
|
||||
DC:E1
|
||||
STATUS:NORMAL,-872190912874367364312
|
||||
HOST_ID:12fdcf43-4642-53b1-a987-c0e825e4e10a
|
||||
RPC_ADDRESS:10.240.0.83
|
||||
RACK:R1
|
||||
|
||||
/54.62.0.100
|
||||
generation:1657463198
|
||||
heartbeat:8135
|
||||
LOAD:2.0114638716E12
|
||||
INTERNAL_IP:10.240.0.93
|
||||
DC:E1
|
||||
STATUS:NORMAL,-258152127640110957173
|
||||
HOST_ID:99acbh55-1013-24a1-a987-s1w718c1e01b
|
||||
RPC_ADDRESS:10.240.0.93
|
||||
RACK:R1
|
||||
|
||||
/54.62.0.101
|
||||
generation:1657463198
|
||||
heartbeat:7022
|
||||
LOAD:2.5173672157E48
|
||||
INTERNAL_IP:10.240.0.103
|
||||
DC:E1
|
||||
STATUS:NORMAL,-365481201980413697284
|
||||
HOST_ID:99acbh55-1301-55a1-a628-s4w254c1e01b
|
||||
RPC_ADDRESS:10.240.0.103
|
||||
RACK:R1
|
||||
|
||||
We can see that node ``54.62.0.101`` is still part of the cluster and needs to be removed.
|
||||
|
||||
Solution
|
||||
^^^^^^^^
|
||||
|
||||
Remove the relevant node from the other nodes seed list (under scylla.yaml) and restart the nodes one by one.
|
||||
|
||||
For example:
|
||||
|
||||
Seed list before remove the node
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
- seeds: "10.240.0.83,10.240.0.93,10.240.0.103"
|
||||
|
||||
Seed list after removing the node
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
- seeds: "10.240.0.83,10.240.0.93"
|
||||
|
||||
Restart Scylla nodes
|
||||
|
||||
.. include:: /rst_include/scylla-commands-restart-index.rst
|
||||
@@ -6,7 +6,6 @@ Error Messages
|
||||
|
||||
kb-fs-not-qualified-aio
|
||||
address-already-in-use
|
||||
create-mv
|
||||
schema-mismatch
|
||||
invalid-ssl-prot-error
|
||||
|
||||
@@ -20,8 +19,6 @@ Error Messages
|
||||
|
||||
* :doc:`"Address already in use" messages </troubleshooting/error-messages/address-already-in-use/>`
|
||||
|
||||
* :doc:`"Error from server: code=2200 [Invalid query] message="Can't create materialized views until the whole cluster has been upgraded" </troubleshooting/error-messages/create-mv/>`
|
||||
|
||||
* :doc:`Schema Mismatch </troubleshooting/error-messages/schema-mismatch>`
|
||||
|
||||
* :doc:`Invalid SSL Protocol </troubleshooting/error-messages/invalid-ssl-prot-error>`
|
||||
|
||||
@@ -78,16 +78,10 @@ You can follow the manual recovery procedure when:
|
||||
**irrecoverable** nodes. If possible, restart your nodes, and use the manual
|
||||
recovery procedure as a last resort.
|
||||
|
||||
.. note::
|
||||
.. warning::
|
||||
|
||||
Before proceeding, make sure that the irrecoverable nodes are truly dead, and not,
|
||||
for example, temporarily partitioned away due to a network failure. If it is
|
||||
possible for the 'dead' nodes to come back to life, they might communicate and
|
||||
interfere with the recovery procedure and cause unpredictable problems.
|
||||
|
||||
If you have no means of ensuring that these irrecoverable nodes won't come back
|
||||
to life and communicate with the rest of the cluster, setup firewall rules or otherwise
|
||||
isolate your alive nodes to reject any communication attempts from these dead nodes.
|
||||
The manual recovery procedure is not supported :doc:`if tablets are enabled on any of your keyspaces </architecture/tablets/>`.
|
||||
In such a case, you need to :doc:`restore from backup </operating-scylla/procedures/backup-restore/restore>`.
|
||||
|
||||
During the manual recovery procedure you'll enter a special ``RECOVERY`` mode, remove
|
||||
all faulty nodes (using the standard :doc:`node removal procedure </operating-scylla/procedures/cluster-management/remove-node/>`),
|
||||
@@ -97,15 +91,26 @@ perform the Raft upgrade procedure again, initializing the Raft algorithm from s
|
||||
The manual recovery procedure is applicable both to clusters that were not running Raft
|
||||
in the past and then had Raft enabled, and to clusters that were bootstrapped using Raft.
|
||||
|
||||
.. note::
|
||||
**Prerequisites**
|
||||
|
||||
Entering ``RECOVERY`` mode requires a node restart. Restarting an additional node while
|
||||
some nodes are already dead may lead to unavailability of data queries (assuming that
|
||||
you haven't lost it already). For example, if you're using the standard RF=3,
|
||||
CL=QUORUM setup, and you're recovering from a stuck of upgrade procedure because one
|
||||
of your nodes is dead, restarting another node will cause temporary data query
|
||||
unavailability (until the node finishes restarting). Prepare your service for
|
||||
downtime before proceeding.
|
||||
* Before proceeding, make sure that the irrecoverable nodes are truly dead, and not,
|
||||
for example, temporarily partitioned away due to a network failure. If it is
|
||||
possible for the 'dead' nodes to come back to life, they might communicate and
|
||||
interfere with the recovery procedure and cause unpredictable problems.
|
||||
|
||||
If you have no means of ensuring that these irrecoverable nodes won't come back
|
||||
to life and communicate with the rest of the cluster, setup firewall rules or otherwise
|
||||
isolate your alive nodes to reject any communication attempts from these dead nodes.
|
||||
|
||||
* Prepare your service for downtime before proceeding.
|
||||
Entering ``RECOVERY`` mode requires a node restart. Restarting an additional node while
|
||||
some nodes are already dead may lead to unavailability of data queries (assuming that
|
||||
you haven't lost it already). For example, if you're using the standard RF=3,
|
||||
CL=QUORUM setup, and you're recovering from a stuck upgrade procedure because one
|
||||
of your nodes is dead, restarting another node will cause temporary data query
|
||||
unavailability (until the node finishes restarting).
|
||||
|
||||
**Procedure**
|
||||
|
||||
#. Perform the following query on **every alive node** in the cluster, using e.g. ``cqlsh``:
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ Reset Authenticator Password
|
||||
============================
|
||||
|
||||
This procedure describes what to do when a user loses his password and can not reset it with a superuser role.
|
||||
The procedure requires cluster downtime and as a result, all of the ``system_auth_v2`` data is deleted.
|
||||
The procedure requires cluster downtime and as a result, all auth data is deleted.
|
||||
|
||||
.. scylladb_include_flag:: system-auth-name-info.rst
|
||||
|
||||
@@ -15,11 +15,11 @@ Procedure
|
||||
|
||||
sudo systemctl stop scylla-server
|
||||
|
||||
| 2. Remove your tables under ``/var/lib/scylla/data/system_auth_v2/``.
|
||||
| 2. Remove system tables starting with ``role`` prefix from ``/var/lib/scylla/data/system`` directory.
|
||||
|
||||
.. code-block:: shell
|
||||
|
||||
rm -rf /var/lib/scylla/data/ssystem_auth_v2/
|
||||
rm -rf /var/lib/scylla/data/system/role*
|
||||
|
||||
| 3. Start Scylla nodes.
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ Apply the following procedure **serially** on each node. Do not move to the next
|
||||
* Not to run administration functions, like repairs, refresh, rebuild or add or remove nodes. See `sctool <https://manager.docs.scylladb.com/stable/sctool/index.html>`_ for suspending ScyllaDB Manager (only available for ScyllaDB Enterprise) scheduled or running repairs.
|
||||
* Not to apply schema changes
|
||||
|
||||
.. note:: Before upgrading, make sure to use the latest `ScyllaDB Montioring <https://monitoring.docs.scylladb.com/>`_ stack.
|
||||
.. note:: Before upgrading, make sure to use the latest `ScyllaDB Monitoring <https://monitoring.docs.scylladb.com/>`_ stack.
|
||||
|
||||
Upgrade Steps
|
||||
=============
|
||||
@@ -182,4 +182,4 @@ Start the node
|
||||
|
||||
Validate
|
||||
--------
|
||||
Check the upgrade instructions above for validation. Once you are sure the node rollback is successful, move to the next node in the cluster.
|
||||
Check the upgrade instructions above for validation. Once you are sure the node rollback is successful, move to the next node in the cluster.
|
||||
|
||||
@@ -34,7 +34,7 @@ Apply the following procedure **serially** on each node. Do not move to the next
|
||||
* Not to run administration functions, like repairs, refresh, rebuild or add or remove nodes. See `sctool <https://manager.docs.scylladb.com/stable/sctool/index.html>`_ for suspending Scylla Manager (only available Scylla Enterprise) scheduled or running repairs.
|
||||
* Not to apply schema changes
|
||||
|
||||
.. note:: Before upgrading, make sure to use the latest `Scylla Montioring <https://monitoring.docs.scylladb.com/>`_ stack.
|
||||
.. note:: Before upgrading, make sure to use the latest `Scylla Monitoring <https://monitoring.docs.scylladb.com/>`_ stack.
|
||||
|
||||
Upgrade steps
|
||||
=============
|
||||
|
||||
@@ -32,7 +32,7 @@ Apply the following procedure **serially** on each node. Do not move to the next
|
||||
* Not to run administration functions, like repairs, refresh, rebuild or add or remove nodes. See `sctool <https://manager.docs.scylladb.com/stable/sctool/>`_ for suspending ScyllaDB Manager (only available for ScyllaDB Enterprise) scheduled or running repairs.
|
||||
* Not to apply schema changes
|
||||
|
||||
.. note:: Before upgrading, make sure to use the latest `ScyllaDB Montioring <https://monitoring.docs.scylladb.com/>`_ stack.
|
||||
.. note:: Before upgrading, make sure to use the latest `ScyllaDB Monitoring <https://monitoring.docs.scylladb.com/>`_ stack.
|
||||
|
||||
Upgrade Steps
|
||||
=============
|
||||
|
||||
@@ -27,12 +27,16 @@ struct topology_change {
|
||||
std::vector<canonical_mutation> mutations;
|
||||
};
|
||||
|
||||
struct mixed_change {
|
||||
std::vector<canonical_mutation> mutations;
|
||||
};
|
||||
|
||||
struct write_mutations {
|
||||
std::vector<canonical_mutation> mutations;
|
||||
};
|
||||
|
||||
struct group0_command {
|
||||
std::variant<service::schema_change, service::broadcast_table_query, service::topology_change, service::write_mutations> change;
|
||||
std::variant<service::schema_change, service::broadcast_table_query, service::topology_change, service::write_mutations, service::mixed_change> change;
|
||||
canonical_mutation history_append;
|
||||
|
||||
std::optional<utils::UUID> prev_state_id;
|
||||
|
||||
@@ -53,7 +53,7 @@ using can_yield = utils::can_yield;
|
||||
|
||||
using replication_strategy_config_options = std::map<sstring, sstring>;
|
||||
struct replication_strategy_params {
|
||||
const replication_strategy_config_options& options;
|
||||
const replication_strategy_config_options options;
|
||||
std::optional<unsigned> initial_tablets;
|
||||
explicit replication_strategy_params(const replication_strategy_config_options& o, std::optional<unsigned> it) noexcept : options(o), initial_tablets(it) {}
|
||||
};
|
||||
|
||||
15
main.cc
15
main.cc
@@ -1435,13 +1435,6 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
raft_gr.stop().get();
|
||||
});
|
||||
|
||||
supervisor::notify("initializing query processor remote part");
|
||||
// TODO: do this together with proxy.start_remote(...)
|
||||
qp.invoke_on_all(&cql3::query_processor::start_remote, std::ref(mm), std::ref(forward_service), std::ref(group0_client)).get();
|
||||
auto stop_qp_remote = defer_verbose_shutdown("query processor remote part", [&qp] {
|
||||
qp.invoke_on_all(&cql3::query_processor::stop_remote).get();
|
||||
});
|
||||
|
||||
supervisor::notify("initializing storage service");
|
||||
debug::the_storage_service = &ss;
|
||||
ss.start(std::ref(stop_signal.as_sharded_abort_source()),
|
||||
@@ -1455,6 +1448,14 @@ To start the scylla server proper, simply invoke as: scylla server (or just scyl
|
||||
ss.stop().get();
|
||||
});
|
||||
|
||||
supervisor::notify("initializing query processor remote part");
|
||||
// TODO: do this together with proxy.start_remote(...)
|
||||
qp.invoke_on_all(&cql3::query_processor::start_remote, std::ref(mm), std::ref(forward_service),
|
||||
std::ref(ss), std::ref(group0_client)).get();
|
||||
auto stop_qp_remote = defer_verbose_shutdown("query processor remote part", [&qp] {
|
||||
qp.invoke_on_all(&cql3::query_processor::stop_remote).get();
|
||||
});
|
||||
|
||||
api::set_server_storage_service(ctx, ss, group0_client).get();
|
||||
auto stop_ss_api = defer_verbose_shutdown("storage service API", [&ctx] {
|
||||
api::unset_server_storage_service(ctx).get();
|
||||
|
||||
@@ -3217,13 +3217,22 @@ future<> repair_service::start() {
|
||||
}
|
||||
|
||||
future<> repair_service::stop() {
|
||||
try {
|
||||
rlogger.debug("Stopping repair task module");
|
||||
co_await _repair_module->stop();
|
||||
rlogger.debug("Waiting on load_history_done");
|
||||
co_await std::move(_load_history_done);
|
||||
rlogger.debug("Uninitializing messaging service handlers");
|
||||
co_await uninit_ms_handlers();
|
||||
if (this_shard_id() == 0) {
|
||||
rlogger.debug("Unregistering gossiper helper");
|
||||
co_await _gossiper.local().unregister_(_gossip_helper);
|
||||
}
|
||||
_stopped = true;
|
||||
rlogger.info("Stopped repair_service");
|
||||
} catch (...) {
|
||||
on_fatal_internal_error(rlogger, format("Failed stopping repair_service: {}", std::current_exception()));
|
||||
}
|
||||
}
|
||||
|
||||
repair_service::~repair_service() {
|
||||
@@ -3266,6 +3275,7 @@ future<> repair_service::cleanup_history(tasks::task_id repair_id) {
|
||||
}
|
||||
|
||||
future<> repair_service::load_history() {
|
||||
try {
|
||||
co_await get_db().local().get_tables_metadata().parallel_for_each_table(coroutine::lambda([&] (table_id table_uuid, lw_shared_ptr<replica::table> table) -> future<> {
|
||||
auto shard = utils::uuid_xor_to_uint32(table_uuid.uuid()) % smp::count;
|
||||
if (shard != this_shard_id()) {
|
||||
@@ -3294,6 +3304,11 @@ future<> repair_service::load_history() {
|
||||
}
|
||||
});
|
||||
}));
|
||||
} catch (const abort_requested_exception&) {
|
||||
// Ignore
|
||||
} catch (...) {
|
||||
rlogger.warn("Failed to update repair history time: {}. Ignored", std::current_exception());
|
||||
}
|
||||
}
|
||||
|
||||
repair_meta_ptr repair_service::get_repair_meta(gms::inet_address from, uint32_t repair_meta_id) {
|
||||
|
||||
@@ -13,7 +13,6 @@
|
||||
#include "utils/lister.hh"
|
||||
#include "replica/database.hh"
|
||||
#include <seastar/core/future-util.hh>
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/system_keyspace_sstables_registry.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
@@ -812,7 +811,6 @@ future<> database::drop_keyspace_on_all_shards(sharded<database>& sharded_db, co
|
||||
static bool is_system_table(const schema& s) {
|
||||
auto& k = s.ks_name();
|
||||
return k == db::system_keyspace::NAME ||
|
||||
k == db::system_auth_keyspace::NAME ||
|
||||
k == db::system_distributed_keyspace::NAME ||
|
||||
k == db::system_distributed_keyspace::NAME_EVERYWHERE;
|
||||
}
|
||||
|
||||
@@ -17,7 +17,6 @@
|
||||
#include "replica/global_table_ptr.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/extensions.hh"
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
@@ -36,7 +35,7 @@
|
||||
extern logging::logger dblog;
|
||||
|
||||
static const std::unordered_set<std::string_view> system_keyspaces = {
|
||||
db::system_keyspace::NAME, db::system_auth_keyspace::NAME, db::schema_tables::NAME,
|
||||
db::system_keyspace::NAME, db::schema_tables::NAME,
|
||||
};
|
||||
|
||||
// Not super nice. Adding statefulness to the file.
|
||||
@@ -60,7 +59,6 @@ static const std::unordered_set<std::string_view> internal_keyspaces = {
|
||||
db::system_distributed_keyspace::NAME,
|
||||
db::system_distributed_keyspace::NAME_EVERYWHERE,
|
||||
db::system_keyspace::NAME,
|
||||
db::system_auth_keyspace::NAME,
|
||||
db::schema_tables::NAME,
|
||||
auth::meta::legacy::AUTH_KS,
|
||||
tracing::trace_keyspace_helper::KEYSPACE_NAME
|
||||
|
||||
@@ -666,6 +666,10 @@ private:
|
||||
storage_group* storage_group_for_id(size_t i) const {
|
||||
return storage_group_manager::storage_group_for_id(schema(), i);
|
||||
}
|
||||
|
||||
size_t tablet_id_for_token(dht::token t) const noexcept {
|
||||
return tablet_map().get_tablet_id(t).value();
|
||||
}
|
||||
public:
|
||||
tablet_storage_group_manager(table& t, const locator::effective_replication_map& erm)
|
||||
: _t(t)
|
||||
@@ -715,9 +719,6 @@ public:
|
||||
size_t log2_storage_groups() const override {
|
||||
return log2ceil(tablet_map().tablet_count());
|
||||
}
|
||||
size_t storage_group_id_for_token(dht::token t) const noexcept {
|
||||
return storage_group_of(t).first;
|
||||
}
|
||||
storage_group* storage_group_for_token(dht::token token) const noexcept override {
|
||||
return storage_group_for_id(storage_group_of(token).first);
|
||||
}
|
||||
@@ -924,8 +925,8 @@ utils::chunked_vector<compaction_group*> tablet_storage_group_manager::compactio
|
||||
utils::chunked_vector<compaction_group*> ret;
|
||||
auto cmp = dht::token_comparator();
|
||||
|
||||
size_t candidate_start = tr.start() ? storage_group_id_for_token(tr.start()->value()) : size_t(0);
|
||||
size_t candidate_end = tr.end() ? storage_group_id_for_token(tr.end()->value()) : (tablet_count() - 1);
|
||||
size_t candidate_start = tr.start() ? tablet_id_for_token(tr.start()->value()) : size_t(0);
|
||||
size_t candidate_end = tr.end() ? tablet_id_for_token(tr.end()->value()) : (tablet_count() - 1);
|
||||
|
||||
while (candidate_start <= candidate_end) {
|
||||
auto it = _storage_groups.find(candidate_start++);
|
||||
|
||||
@@ -1853,7 +1853,7 @@ class schema_ptr:
|
||||
return self.ptr[item]
|
||||
|
||||
def is_system(self):
|
||||
return self.ks_name in ["system", "system_schema", "system_distributed", "system_traces", "system_auth", "system_auth_v2", "audit"]
|
||||
return self.ks_name in ["system", "system_schema", "system_distributed", "system_traces", "system_auth", "audit"]
|
||||
|
||||
|
||||
class scylla_active_sstables(gdb.Command):
|
||||
|
||||
@@ -883,7 +883,7 @@ future<std::vector<mutation>> prepare_new_view_announcement(storage_proxy& sp, v
|
||||
}
|
||||
mlogger.info("Create new view: {}", view);
|
||||
return seastar::async([&db, keyspace = std::move(keyspace), &sp, view = std::move(view), ts] {
|
||||
auto mutations = db::schema_tables::make_create_view_mutations(keyspace, std::move(view), ts);
|
||||
auto mutations = db::schema_tables::make_create_view_mutations(keyspace, view, ts);
|
||||
// We don't have a separate on_before_create_view() listener to
|
||||
// call. But a view is also a column family, and we need to call
|
||||
// the on_before_create_column_family listener - notably, to
|
||||
@@ -954,18 +954,19 @@ future<> migration_manager::push_schema_mutation(const gms::inet_address& endpoi
|
||||
return _messaging.send_definitions_update(id, std::vector<frozen_mutation>{}, std::move(cm));
|
||||
}
|
||||
|
||||
template<typename mutation_type>
|
||||
future<> migration_manager::announce_with_raft(std::vector<mutation> schema, group0_guard guard, std::string_view description) {
|
||||
assert(this_shard_id() == 0);
|
||||
auto schema_features = _feat.cluster_schema_features();
|
||||
auto adjusted_schema = db::schema_tables::adjust_schema_for_schema_features(std::move(schema), schema_features);
|
||||
|
||||
auto group0_cmd = _group0_client.prepare_command(
|
||||
schema_change{
|
||||
.mutations{adjusted_schema.begin(), adjusted_schema.end()},
|
||||
mutation_type {
|
||||
.mutations{adjusted_schema.begin(), adjusted_schema.end()},
|
||||
},
|
||||
guard, std::move(description));
|
||||
|
||||
co_return co_await _group0_client.add_entry(std::move(group0_cmd), std::move(guard), &_as, raft_timeout{});
|
||||
return _group0_client.add_entry(std::move(group0_cmd), std::move(guard), &_as);
|
||||
}
|
||||
|
||||
future<> migration_manager::announce_without_raft(std::vector<mutation> schema, group0_guard guard) {
|
||||
@@ -1027,6 +1028,7 @@ static void add_committed_by_group0_flag(std::vector<mutation>& schema, const gr
|
||||
}
|
||||
|
||||
// Returns a future on the local application of the schema
|
||||
template<typename mutation_type>
|
||||
future<> migration_manager::announce(std::vector<mutation> schema, group0_guard guard, std::string_view description) {
|
||||
if (_feat.group0_schema_versioning) {
|
||||
schema.push_back(make_group0_schema_version_mutation(_storage_proxy.data_dictionary(), guard));
|
||||
@@ -1034,11 +1036,20 @@ future<> migration_manager::announce(std::vector<mutation> schema, group0_guard
|
||||
}
|
||||
|
||||
if (guard.with_raft()) {
|
||||
return announce_with_raft(std::move(schema), std::move(guard), std::move(description));
|
||||
return announce_with_raft<mutation_type>(std::move(schema), std::move(guard), std::move(description));
|
||||
} else {
|
||||
return announce_without_raft(std::move(schema), std::move(guard));
|
||||
}
|
||||
}
|
||||
template
|
||||
future<> migration_manager::announce_with_raft<schema_change>(std::vector<mutation> schema, group0_guard, std::string_view description);
|
||||
template
|
||||
future<> migration_manager::announce_with_raft<topology_change>(std::vector<mutation> schema, group0_guard, std::string_view description);
|
||||
|
||||
template
|
||||
future<> migration_manager::announce<schema_change>(std::vector<mutation> schema, group0_guard, std::string_view description);
|
||||
template
|
||||
future<> migration_manager::announce<topology_change>(std::vector<mutation> schema, group0_guard, std::string_view description);
|
||||
|
||||
future<group0_guard> migration_manager::start_group0_operation() {
|
||||
assert(this_shard_id() == 0);
|
||||
|
||||
@@ -136,6 +136,7 @@ public:
|
||||
|
||||
// Apply a group 0 change.
|
||||
// The future resolves after the change is applied locally.
|
||||
template<typename mutation_type = schema_change>
|
||||
future<> announce(std::vector<mutation> schema, group0_guard, std::string_view description);
|
||||
|
||||
void passive_announce(table_schema_version version);
|
||||
@@ -164,6 +165,7 @@ private:
|
||||
|
||||
future<> maybe_schedule_schema_pull(const table_schema_version& their_version, const gms::inet_address& endpoint);
|
||||
|
||||
template<typename mutation_type = schema_change>
|
||||
future<> announce_with_raft(std::vector<mutation> schema, group0_guard, std::string_view description);
|
||||
future<> announce_without_raft(std::vector<mutation> schema, group0_guard);
|
||||
|
||||
@@ -193,6 +195,17 @@ public:
|
||||
void set_concurrent_ddl_retries(size_t);
|
||||
};
|
||||
|
||||
extern template
|
||||
future<> migration_manager::announce_with_raft<schema_change>(std::vector<mutation> schema, group0_guard, std::string_view description);
|
||||
extern template
|
||||
future<> migration_manager::announce_with_raft<topology_change>(std::vector<mutation> schema, group0_guard, std::string_view description);
|
||||
|
||||
extern template
|
||||
future<> migration_manager::announce<schema_change>(std::vector<mutation> schema, group0_guard, std::string_view description);
|
||||
extern template
|
||||
future<> migration_manager::announce<topology_change>(std::vector<mutation> schema, group0_guard, std::string_view description);
|
||||
|
||||
|
||||
future<column_mapping> get_column_mapping(db::system_keyspace& sys_ks, table_id, table_schema_version v);
|
||||
|
||||
std::vector<mutation> prepare_keyspace_update_announcement(replica::database& db, lw_shared_ptr<keyspace_metadata> ksm, api::timestamp_type ts);
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include "cql3/query_processor.hh"
|
||||
#include "db/consistency_level_type.hh"
|
||||
#include <seastar/core/abort_source.hh>
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "service/raft/raft_group0_client.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "types/types.hh"
|
||||
@@ -55,7 +56,7 @@ future<> raft_service_level_distributed_data_accessor::do_raft_command(service::
|
||||
co_await _group0_client.add_entry(std::move(group0_cmd), std::move(guard), &as);
|
||||
}
|
||||
|
||||
future<> raft_service_level_distributed_data_accessor::set_service_level(sstring service_level_name, qos::service_level_options slo, std::optional<service::group0_guard> guard, abort_source& as) const {
|
||||
static void validate_state(const service::raft_group0_client& group0_client, const std::optional<service::group0_guard>& guard) {
|
||||
if (this_shard_id() != 0) {
|
||||
on_internal_error(logger, "raft_service_level_distributed_data_accessor: must be executed on shard 0");
|
||||
}
|
||||
@@ -63,6 +64,14 @@ future<> raft_service_level_distributed_data_accessor::set_service_level(sstring
|
||||
if (!guard) {
|
||||
on_internal_error(logger, "raft_service_level_distributed_data_accessor: guard must be present");
|
||||
}
|
||||
|
||||
if (group0_client.in_recovery()) {
|
||||
throw exceptions::invalid_request_exception("The cluster is in recovery mode. Changes to service levels are not allowed.");
|
||||
}
|
||||
}
|
||||
|
||||
future<> raft_service_level_distributed_data_accessor::set_service_level(sstring service_level_name, qos::service_level_options slo, std::optional<service::group0_guard> guard, abort_source& as) const {
|
||||
validate_state(_group0_client, guard);
|
||||
|
||||
static sstring insert_query = format("INSERT INTO {}.{} (service_level, timeout, workload_type) VALUES (?, ?, ?);", db::system_keyspace::NAME, db::system_keyspace::SERVICE_LEVELS_V2);
|
||||
data_value workload = slo.workload == qos::service_level_options::workload_type::unspecified
|
||||
@@ -81,13 +90,7 @@ future<> raft_service_level_distributed_data_accessor::drop_service_level(sstrin
|
||||
guard = co_await _group0_client.start_operation(&as);
|
||||
}
|
||||
|
||||
if (this_shard_id() != 0) {
|
||||
on_internal_error(logger, "raft_service_level_distributed_data_accessor: must be executed on shard 0");
|
||||
}
|
||||
|
||||
if (!guard) {
|
||||
on_internal_error(logger, "raft_service_level_distributed_data_accessor: guard must be present");
|
||||
}
|
||||
validate_state(_group0_client, guard);
|
||||
|
||||
static sstring delete_query = format("DELETE FROM {}.{} WHERE service_level= ?;", db::system_keyspace::NAME, db::system_keyspace::SERVICE_LEVELS_V2);
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@
|
||||
* SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
*/
|
||||
#include "service/raft/group0_state_machine.hh"
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "mutation/atomic_cell.hh"
|
||||
#include "cql3/selection/selection.hh"
|
||||
#include "dht/i_partitioner.hh"
|
||||
@@ -169,6 +168,11 @@ future<> group0_state_machine::merge_and_apply(group0_state_machine_merger& merg
|
||||
co_await write_mutations_to_database(_sp, cmd.creator_addr, std::move(chng.mutations));
|
||||
co_await _ss.topology_transition();
|
||||
},
|
||||
[&] (mixed_change& chng) -> future<> {
|
||||
co_await _mm.merge_schema_from(netw::messaging_service::msg_addr(std::move(cmd.creator_addr)), std::move(chng.mutations));
|
||||
co_await _ss.topology_transition();
|
||||
co_return;
|
||||
},
|
||||
[&] (write_mutations& muts) -> future<> {
|
||||
return write_mutations_to_database(_sp, cmd.creator_addr, std::move(muts.mutations));
|
||||
}
|
||||
@@ -274,7 +278,7 @@ future<> group0_state_machine::transfer_snapshot(raft::server_id from_id, raft::
|
||||
std::optional<service::raft_snapshot> raft_snp;
|
||||
|
||||
if (_topology_change_enabled) {
|
||||
auto auth_tables = db::system_auth_keyspace::all_tables();
|
||||
auto auth_tables = db::system_keyspace::auth_tables();
|
||||
std::vector<table_id> tables;
|
||||
tables.reserve(3);
|
||||
tables.push_back(db::system_keyspace::topology()->id());
|
||||
|
||||
@@ -46,6 +46,12 @@ struct topology_change {
|
||||
std::vector<canonical_mutation> mutations;
|
||||
};
|
||||
|
||||
// Allows executing combined topology & schema mutations under a single RAFT command.
|
||||
// The order of the mutations doesn't matter.
|
||||
struct mixed_change {
|
||||
std::vector<canonical_mutation> mutations;
|
||||
};
|
||||
|
||||
// This command is used to write data to tables other than topology or
|
||||
// schema tables and it doesn't update any in-memory data structures.
|
||||
struct write_mutations {
|
||||
@@ -53,7 +59,7 @@ struct write_mutations {
|
||||
};
|
||||
|
||||
struct group0_command {
|
||||
std::variant<schema_change, broadcast_table_query, topology_change, write_mutations> change;
|
||||
std::variant<schema_change, broadcast_table_query, topology_change, write_mutations, mixed_change> change;
|
||||
|
||||
// Mutation of group0 history table, appending a new state ID and optionally a description.
|
||||
canonical_mutation history_append;
|
||||
|
||||
@@ -78,6 +78,9 @@ std::vector<canonical_mutation>& group0_state_machine_merger::get_command_mutati
|
||||
[] (topology_change& chng) -> std::vector<canonical_mutation>& {
|
||||
return chng.mutations;
|
||||
},
|
||||
[] (mixed_change& chng) -> std::vector<canonical_mutation>& {
|
||||
return chng.mutations;
|
||||
},
|
||||
[] (write_mutations& muts) -> std::vector<canonical_mutation>& {
|
||||
return muts.mutations;
|
||||
}
|
||||
|
||||
@@ -297,7 +297,7 @@ future<group0_guard> raft_group0_client::start_operation(seastar::abort_source*
|
||||
}
|
||||
|
||||
template<typename Command>
|
||||
requires std::same_as<Command, schema_change> || std::same_as<Command, topology_change> || std::same_as<Command, write_mutations>
|
||||
requires std::same_as<Command, schema_change> || std::same_as<Command, topology_change> || std::same_as<Command, write_mutations> || std::same_as<Command, mixed_change>
|
||||
group0_command raft_group0_client::prepare_command(Command change, group0_guard& guard, std::string_view description) {
|
||||
group0_command group0_cmd {
|
||||
.change{std::move(change)},
|
||||
@@ -501,5 +501,6 @@ template group0_command raft_group0_client::prepare_command(topology_change chan
|
||||
template group0_command raft_group0_client::prepare_command(write_mutations change, group0_guard& guard, std::string_view description);
|
||||
template group0_command raft_group0_client::prepare_command(broadcast_table_query change, std::string_view description);
|
||||
template group0_command raft_group0_client::prepare_command(write_mutations change, std::string_view description);
|
||||
template group0_command raft_group0_client::prepare_command(mixed_change change, group0_guard& guard, std::string_view description);
|
||||
|
||||
}
|
||||
|
||||
@@ -137,7 +137,7 @@ public:
|
||||
requires std::same_as<Command, broadcast_table_query> || std::same_as<Command, write_mutations>
|
||||
group0_command prepare_command(Command change, std::string_view description);
|
||||
template<typename Command>
|
||||
requires std::same_as<Command, schema_change> || std::same_as<Command, topology_change> || std::same_as<Command, write_mutations>
|
||||
requires std::same_as<Command, schema_change> || std::same_as<Command, topology_change> || std::same_as<Command, write_mutations> || std::same_as<Command, mixed_change>
|
||||
group0_command prepare_command(Command change, group0_guard& guard, std::string_view description);
|
||||
// Checks maximum allowed serialized command size, server rejects bigger commands with command_is_too_big_error exception
|
||||
size_t max_command_size() const;
|
||||
|
||||
@@ -11,7 +11,6 @@
|
||||
|
||||
#include "storage_service.hh"
|
||||
#include "compaction/task_manager_module.hh"
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "gc_clock.hh"
|
||||
#include "raft/raft.hh"
|
||||
#include "service/qos/raft_service_level_distributed_data_accessor.hh"
|
||||
@@ -651,7 +650,7 @@ future<> storage_service::topology_state_load() {
|
||||
co_await _qp.container().invoke_on_all([] (cql3::query_processor& qp) {
|
||||
// auth-v2 gets enabled when consistent topology changes are enabled
|
||||
// (see topology::upgrade_state_type::done above) as we use the same migration procedure
|
||||
qp.auth_version = db::system_auth_keyspace::version_t::v2;
|
||||
qp.auth_version = db::system_keyspace::auth_version_t::v2;
|
||||
});
|
||||
|
||||
co_await _sl_controller.invoke_on_all([this] (qos::service_level_controller& sl_controller) {
|
||||
@@ -773,6 +772,7 @@ future<> storage_service::topology_state_load() {
|
||||
for (const auto& gen_id : _topology_state_machine._topology.committed_cdc_generations) {
|
||||
co_await _cdc_gens.local().handle_cdc_generation(gen_id);
|
||||
if (gen_id == _topology_state_machine._topology.committed_cdc_generations.back()) {
|
||||
co_await _sys_ks.local().update_cdc_generation_id(gen_id);
|
||||
rtlogger.debug("topology_state_load: the last committed CDC generation ID: {}", gen_id);
|
||||
}
|
||||
}
|
||||
@@ -1269,7 +1269,7 @@ future<> storage_service::raft_initialize_discovery_leader(const join_node_reque
|
||||
insert_join_request_mutations.emplace_back(std::move(sl_status_mutation));
|
||||
|
||||
insert_join_request_mutations.emplace_back(
|
||||
co_await _sys_ks.local().make_auth_version_mutation(guard.write_timestamp(), db::system_auth_keyspace::version_t::v2));
|
||||
co_await _sys_ks.local().make_auth_version_mutation(guard.write_timestamp(), db::system_keyspace::auth_version_t::v2));
|
||||
|
||||
topology_change change{std::move(insert_join_request_mutations)};
|
||||
group0_command g0_cmd = _group0->client().prepare_command(std::move(change), guard,
|
||||
@@ -3037,6 +3037,18 @@ future<> storage_service::replicate_to_all_cores(mutable_token_metadata_ptr tmpt
|
||||
for (auto it = table_erms.begin(); it != table_erms.end(); ) {
|
||||
auto& cf = db.find_column_family(it->first);
|
||||
co_await cf.update_effective_replication_map(std::move(it->second));
|
||||
co_await utils::get_local_injector().inject("delay_after_erm_update", [&cf, &ss] (auto& handler) -> future<> {
|
||||
auto& ss_ = ss;
|
||||
const auto ks_name = handler.get("ks_name");
|
||||
const auto cf_name = handler.get("cf_name");
|
||||
assert(ks_name);
|
||||
assert(cf_name);
|
||||
if (cf.schema()->ks_name() != *ks_name || cf.schema()->cf_name() != *cf_name) {
|
||||
co_return;
|
||||
}
|
||||
|
||||
co_await sleep_abortable(std::chrono::seconds{5}, ss_._abort_source);
|
||||
});
|
||||
if (cf.uses_tablets()) {
|
||||
register_tablet_split_candidate(it->first);
|
||||
}
|
||||
@@ -4504,6 +4516,15 @@ future<sstring> storage_service::wait_for_topology_request_completion(utils::UUI
|
||||
co_return sstring();
|
||||
}
|
||||
|
||||
future<> storage_service::wait_for_topology_not_busy() {
|
||||
auto guard = co_await _group0->client().start_operation(&_group0_as, raft_timeout{});
|
||||
while (_topology_state_machine._topology.is_busy()) {
|
||||
release_guard(std::move(guard));
|
||||
co_await _topology_state_machine.event.wait();
|
||||
guard = co_await _group0->client().start_operation(&_group0_as, raft_timeout{});
|
||||
}
|
||||
}
|
||||
|
||||
future<> storage_service::raft_rebuild(sstring source_dc) {
|
||||
auto& raft_server = _group0->group0_server();
|
||||
utils::UUID request_id;
|
||||
|
||||
@@ -818,6 +818,11 @@ private:
|
||||
// coordinator fiber
|
||||
future<> raft_state_monitor_fiber(raft::server&, sharded<db::system_distributed_keyspace>& sys_dist_ks);
|
||||
|
||||
public:
|
||||
bool topology_global_queue_empty() const {
|
||||
return !_topology_state_machine._topology.global_request.has_value();
|
||||
}
|
||||
private:
|
||||
// State machine that is responsible for topology change
|
||||
topology_state_machine _topology_state_machine;
|
||||
|
||||
@@ -895,6 +900,11 @@ public:
|
||||
// It is incompatible with the `join_cluster` method.
|
||||
future<> start_maintenance_mode();
|
||||
|
||||
// Waits for a topology request with a given ID to complete and return non empty error string
|
||||
// if request completes with an error
|
||||
future<sstring> wait_for_topology_request_completion(utils::UUID id);
|
||||
future<> wait_for_topology_not_busy();
|
||||
|
||||
private:
|
||||
future<std::vector<canonical_mutation>> get_system_mutations(schema_ptr schema);
|
||||
future<std::vector<canonical_mutation>> get_system_mutations(const sstring& ks_name, const sstring& cf_name);
|
||||
@@ -931,9 +941,6 @@ private:
|
||||
|
||||
future<> _sstable_cleanup_fiber = make_ready_future<>();
|
||||
future<> sstable_cleanup_fiber(raft::server& raft, sharded<service::storage_proxy>& proxy) noexcept;
|
||||
// Waits for a topology request with a given ID to complete and return non empty error string
|
||||
// if request completes with an error
|
||||
future<sstring> wait_for_topology_request_completion(utils::UUID id);
|
||||
|
||||
// We need to be able to abort all group0 operation during shutdown, so we need special abort source for that
|
||||
abort_source _group0_as;
|
||||
|
||||
@@ -53,6 +53,7 @@ struct load_balancer_cluster_stats {
|
||||
using dc_name = sstring;
|
||||
|
||||
class load_balancer_stats_manager {
|
||||
sstring group_name;
|
||||
std::unordered_map<dc_name, std::unique_ptr<load_balancer_dc_stats>> _dc_stats;
|
||||
std::unordered_map<host_id, std::unique_ptr<load_balancer_node_stats>> _node_stats;
|
||||
load_balancer_cluster_stats _cluster_stats;
|
||||
@@ -63,7 +64,7 @@ class load_balancer_stats_manager {
|
||||
void setup_metrics(const dc_name& dc, load_balancer_dc_stats& stats) {
|
||||
namespace sm = seastar::metrics;
|
||||
auto dc_lb = dc_label(dc);
|
||||
_metrics.add_group("load_balancer", {
|
||||
_metrics.add_group(group_name, {
|
||||
sm::make_counter("calls", sm::description("number of calls to the load balancer"),
|
||||
stats.calls)(dc_lb),
|
||||
sm::make_counter("migrations_produced", sm::description("number of migrations produced by the load balancer"),
|
||||
@@ -77,7 +78,7 @@ class load_balancer_stats_manager {
|
||||
namespace sm = seastar::metrics;
|
||||
auto dc_lb = dc_label(dc);
|
||||
auto node_lb = node_label(node);
|
||||
_metrics.add_group("load_balancer", {
|
||||
_metrics.add_group(group_name, {
|
||||
sm::make_gauge("load", sm::description("node load during last load balancing"),
|
||||
stats.load)(dc_lb)(node_lb)
|
||||
});
|
||||
@@ -86,7 +87,7 @@ class load_balancer_stats_manager {
|
||||
void setup_metrics(load_balancer_cluster_stats& stats) {
|
||||
namespace sm = seastar::metrics;
|
||||
// FIXME: we can probably improve it by making it per resize type (split, merge or none).
|
||||
_metrics.add_group("load_balancer", {
|
||||
_metrics.add_group(group_name, {
|
||||
sm::make_counter("resizes_emitted", sm::description("number of resizes produced by the load balancer"),
|
||||
stats.resizes_emitted),
|
||||
sm::make_counter("resizes_revoked", sm::description("number of resizes revoked by the load balancer"),
|
||||
@@ -96,7 +97,9 @@ class load_balancer_stats_manager {
|
||||
});
|
||||
}
|
||||
public:
|
||||
load_balancer_stats_manager() {
|
||||
load_balancer_stats_manager(sstring group_name):
|
||||
group_name(std::move(group_name))
|
||||
{
|
||||
setup_metrics(_cluster_stats);
|
||||
}
|
||||
|
||||
@@ -1323,7 +1326,8 @@ public:
|
||||
tablet_allocator_impl(tablet_allocator::config cfg, service::migration_notifier& mn, replica::database& db)
|
||||
: _config(std::move(cfg))
|
||||
, _migration_notifier(mn)
|
||||
, _db(db) {
|
||||
, _db(db)
|
||||
, _load_balancer_stats("load_balancer") {
|
||||
if (_config.initial_tablets_scale == 0) {
|
||||
throw std::runtime_error("Initial tablets scale must be positive");
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@
|
||||
|
||||
#include "auth/service.hh"
|
||||
#include "cdc/generation.hh"
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "cql3/statements/ks_prop_defs.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "dht/boot_strapper.hh"
|
||||
@@ -29,10 +29,12 @@
|
||||
#include "locator/token_metadata.hh"
|
||||
#include "locator/network_topology_strategy.hh"
|
||||
#include "message/messaging_service.hh"
|
||||
#include "mutation/async_utils.hh"
|
||||
#include "replica/database.hh"
|
||||
#include "replica/tablet_mutation_builder.hh"
|
||||
#include "replica/tablets.hh"
|
||||
#include "service/qos/service_level_controller.hh"
|
||||
#include "service/migration_manager.hh"
|
||||
#include "service/raft/join_node.hh"
|
||||
#include "service/raft/raft_address_map.hh"
|
||||
#include "service/raft/raft_group0.hh"
|
||||
@@ -41,6 +43,7 @@
|
||||
#include "service/topology_state_machine.hh"
|
||||
#include "topology_mutation.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "utils/stall_free.hh"
|
||||
#include "utils/to_string.hh"
|
||||
#include "service/endpoint_lifecycle_subscriber.hh"
|
||||
|
||||
@@ -756,6 +759,84 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
case global_topology_request::cleanup:
|
||||
co_await start_cleanup_on_dirty_nodes(std::move(guard), true);
|
||||
break;
|
||||
case global_topology_request::keyspace_rf_change: {
|
||||
rtlogger.info("keyspace_rf_change requested");
|
||||
while (true) {
|
||||
sstring ks_name = *_topo_sm._topology.new_keyspace_rf_change_ks_name;
|
||||
auto& ks = _db.find_keyspace(ks_name);
|
||||
auto tmptr = get_token_metadata_ptr();
|
||||
std::unordered_map<sstring, sstring> saved_ks_props = *_topo_sm._topology.new_keyspace_rf_change_data;
|
||||
cql3::statements::ks_prop_defs new_ks_props{std::map<sstring, sstring>{saved_ks_props.begin(), saved_ks_props.end()}};
|
||||
|
||||
auto repl_opts = new_ks_props.get_replication_options();
|
||||
repl_opts.erase(cql3::statements::ks_prop_defs::REPLICATION_STRATEGY_CLASS_KEY);
|
||||
utils::UUID req_uuid = *_topo_sm._topology.global_request_id;
|
||||
std::vector<canonical_mutation> updates;
|
||||
sstring error;
|
||||
size_t unimportant_init_tablet_count = 2; // must be a power of 2
|
||||
locator::tablet_map new_tablet_map{unimportant_init_tablet_count};
|
||||
|
||||
for (const auto& table : ks.metadata()->tables()) {
|
||||
try {
|
||||
locator::tablet_map old_tablets = tmptr->tablets().get_tablet_map(table->id());
|
||||
locator::replication_strategy_params params{repl_opts, old_tablets.tablet_count()};
|
||||
auto new_strategy = locator::abstract_replication_strategy::create_replication_strategy("NetworkTopologyStrategy", params);
|
||||
new_tablet_map = co_await new_strategy->maybe_as_tablet_aware()->reallocate_tablets(table, tmptr, old_tablets);
|
||||
} catch (const std::exception& e) {
|
||||
error = e.what();
|
||||
rtlogger.error("Couldn't process global_topology_request::keyspace_rf_change, error: {},"
|
||||
"desired new ks opts: {}", error, new_ks_props.get_replication_options());
|
||||
updates.clear(); // remove all tablets mutations ...
|
||||
break; // ... and only create mutations deleting the global req
|
||||
}
|
||||
|
||||
replica::tablet_mutation_builder tablet_mutation_builder(guard.write_timestamp(), table->id());
|
||||
co_await new_tablet_map.for_each_tablet([&](locator::tablet_id tablet_id, const locator::tablet_info& tablet_info) -> future<> {
|
||||
auto last_token = new_tablet_map.get_last_token(tablet_id);
|
||||
updates.emplace_back(co_await make_canonical_mutation_gently(
|
||||
replica::tablet_mutation_builder(guard.write_timestamp(), table->id())
|
||||
.set_new_replicas(last_token, tablet_info.replicas)
|
||||
.set_stage(last_token, locator::tablet_transition_stage::allow_write_both_read_old)
|
||||
.set_transition(last_token, locator::tablet_transition_kind::rebuild)
|
||||
.build()
|
||||
));
|
||||
co_await coroutine::maybe_yield();
|
||||
});
|
||||
}
|
||||
|
||||
updates.push_back(canonical_mutation(topology_mutation_builder(guard.write_timestamp())
|
||||
.set_transition_state(topology::transition_state::tablet_migration)
|
||||
.set_version(_topo_sm._topology.version + 1)
|
||||
.del_global_topology_request()
|
||||
.del_global_topology_request_id()
|
||||
.build()));
|
||||
updates.push_back(canonical_mutation(topology_request_tracking_mutation_builder(req_uuid)
|
||||
.done(error)
|
||||
.build()));
|
||||
if (error.empty()) {
|
||||
const sstring strategy_name = "NetworkTopologyStrategy";
|
||||
auto ks_md = keyspace_metadata::new_keyspace(ks_name, strategy_name, repl_opts,
|
||||
new_ks_props.get_initial_tablets(strategy_name, true),
|
||||
new_ks_props.get_durable_writes(), new_ks_props.get_storage_options());
|
||||
auto schema_muts = prepare_keyspace_update_announcement(_db, ks_md, guard.write_timestamp());
|
||||
for (auto& m: schema_muts) {
|
||||
updates.emplace_back(m);
|
||||
}
|
||||
}
|
||||
|
||||
sstring reason = format("ALTER tablets KEYSPACE called with options: {}", saved_ks_props);
|
||||
rtlogger.trace("do update {} reason {}", updates, reason);
|
||||
mixed_change change{std::move(updates)};
|
||||
group0_command g0_cmd = _group0.client().prepare_command(std::move(change), guard, reason);
|
||||
try {
|
||||
co_await _group0.client().add_entry(std::move(g0_cmd), std::move(guard), &_as);
|
||||
break;
|
||||
} catch (group0_concurrent_modification&) {
|
||||
rtlogger.info("handle_global_request(): concurrent modification, retrying");
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1379,7 +1460,19 @@ class topology_coordinator : public endpoint_lifecycle_subscriber {
|
||||
topology_mutation_builder builder(ts);
|
||||
topology_request_tracking_mutation_builder rtbuilder(_topo_sm._topology.find(id)->second.request_id);
|
||||
auto node_builder = builder.with_node(id).del("topology_request");
|
||||
rtbuilder.done(fmt::format("Canceled. Dead nodes: {}", dead_nodes));
|
||||
auto done_msg = fmt::format("Canceled. Dead nodes: {}", dead_nodes);
|
||||
rtbuilder.done(done_msg);
|
||||
if (_topo_sm._topology.global_request_id) {
|
||||
try {
|
||||
utils::UUID uuid = utils::UUID{*_topo_sm._topology.global_request_id};
|
||||
topology_request_tracking_mutation_builder rt_global_req_builder{uuid};
|
||||
rt_global_req_builder.done(done_msg)
|
||||
.set("end_time", db_clock::now());
|
||||
muts.emplace_back(rt_global_req_builder.build());
|
||||
} catch (...) {
|
||||
rtlogger.warn("failed to cancel topology global request: {}", std::current_exception());
|
||||
}
|
||||
}
|
||||
switch (req) {
|
||||
case topology_request::replace:
|
||||
[[fallthrough]];
|
||||
@@ -2505,7 +2598,7 @@ future<> topology_coordinator::build_coordinator_state(group0_guard guard) {
|
||||
co_await _group0.wait_for_all_nodes_to_finish_upgrade(_as);
|
||||
|
||||
auto auth_version = co_await _sys_ks.get_auth_version();
|
||||
if (auth_version < db::system_auth_keyspace::version_t::v2) {
|
||||
if (auth_version < db::system_keyspace::auth_version_t::v2) {
|
||||
rtlogger.info("migrating system_auth keyspace data");
|
||||
co_await auth::migrate_to_auth_v2(_sys_ks, _group0.client(),
|
||||
[this] (abort_source*) { return start_operation();}, _as);
|
||||
|
||||
@@ -11,6 +11,7 @@
|
||||
#include "types/tuple.hh"
|
||||
#include "types/types.hh"
|
||||
#include "types/set.hh"
|
||||
#include "types/map.hh"
|
||||
|
||||
namespace db {
|
||||
extern thread_local data_type cdc_generation_ts_id_type;
|
||||
@@ -213,6 +214,15 @@ topology_mutation_builder& topology_mutation_builder::set_committed_cdc_generati
|
||||
return apply_set("committed_cdc_generations", collection_apply_mode::overwrite, std::move(dv));
|
||||
}
|
||||
|
||||
topology_mutation_builder& topology_mutation_builder::set_new_keyspace_rf_change_data(
|
||||
const sstring& ks_name, const std::map<sstring, sstring>& rf_per_dc) {
|
||||
apply_atomic("new_keyspace_rf_change_ks_name", ks_name);
|
||||
apply_atomic("new_keyspace_rf_change_data",
|
||||
make_map_value(schema().get_column_definition("new_keyspace_rf_change_data")->type,
|
||||
map_type_impl::native_type(rf_per_dc.begin(), rf_per_dc.end())));
|
||||
return *this;
|
||||
}
|
||||
|
||||
topology_mutation_builder& topology_mutation_builder::set_unpublished_cdc_generations(const std::vector<cdc::generation_id_v2>& values) {
|
||||
auto dv = values | boost::adaptors::transformed([&] (const auto& v) {
|
||||
return make_tuple_value(db::cdc_generation_ts_id_type, tuple_type_impl::native_type({v.ts, timeuuid_native_type{v.id}}));
|
||||
@@ -224,6 +234,10 @@ topology_mutation_builder& topology_mutation_builder::set_global_topology_reques
|
||||
return apply_atomic("global_topology_request", ::format("{}", value));
|
||||
}
|
||||
|
||||
topology_mutation_builder& topology_mutation_builder::set_global_topology_request_id(const utils::UUID& value) {
|
||||
return apply_atomic("global_topology_request_id", value);
|
||||
}
|
||||
|
||||
topology_mutation_builder& topology_mutation_builder::set_upgrade_state(topology::upgrade_state_type value) {
|
||||
return apply_atomic("upgrade_state", ::format("{}", value));
|
||||
}
|
||||
@@ -251,6 +265,10 @@ topology_mutation_builder& topology_mutation_builder::del_global_topology_reques
|
||||
return del("global_topology_request");
|
||||
}
|
||||
|
||||
topology_mutation_builder& topology_mutation_builder::del_global_topology_request_id() {
|
||||
return del("global_topology_request_id");
|
||||
}
|
||||
|
||||
topology_node_mutation_builder& topology_mutation_builder::with_node(raft::server_id n) {
|
||||
_node_builder.emplace(*this, n);
|
||||
return *_node_builder;
|
||||
|
||||
@@ -114,8 +114,10 @@ public:
|
||||
topology_mutation_builder& set_tablet_balancing_enabled(bool);
|
||||
topology_mutation_builder& set_new_cdc_generation_data_uuid(const utils::UUID& value);
|
||||
topology_mutation_builder& set_committed_cdc_generations(const std::vector<cdc::generation_id_v2>& values);
|
||||
topology_mutation_builder& set_new_keyspace_rf_change_data(const sstring &ks_name, const std::map<sstring, sstring> &rf_per_dc);
|
||||
topology_mutation_builder& set_unpublished_cdc_generations(const std::vector<cdc::generation_id_v2>& values);
|
||||
topology_mutation_builder& set_global_topology_request(global_topology_request);
|
||||
topology_mutation_builder& set_global_topology_request_id(const utils::UUID&);
|
||||
topology_mutation_builder& set_upgrade_state(topology::upgrade_state_type);
|
||||
topology_mutation_builder& add_enabled_features(const std::set<sstring>& value);
|
||||
topology_mutation_builder& add_ignored_nodes(const std::unordered_set<raft::server_id>& value);
|
||||
@@ -124,6 +126,7 @@ public:
|
||||
topology_mutation_builder& del_transition_state();
|
||||
topology_mutation_builder& del_session();
|
||||
topology_mutation_builder& del_global_topology_request();
|
||||
topology_mutation_builder& del_global_topology_request_id();
|
||||
topology_node_mutation_builder& with_node(raft::server_id);
|
||||
canonical_mutation build() { return canonical_mutation{std::move(_m)}; }
|
||||
};
|
||||
|
||||
@@ -192,6 +192,7 @@ topology_request topology_request_from_string(const sstring& s) {
|
||||
static std::unordered_map<global_topology_request, sstring> global_topology_request_to_name_map = {
|
||||
{global_topology_request::new_cdc_generation, "new_cdc_generation"},
|
||||
{global_topology_request::cleanup, "cleanup"},
|
||||
{global_topology_request::keyspace_rf_change, "keyspace_rf_change"},
|
||||
};
|
||||
|
||||
global_topology_request global_topology_request_from_string(const sstring& s) {
|
||||
|
||||
@@ -72,6 +72,7 @@ using request_param = std::variant<join_param, rebuild_param, replace_param>;
|
||||
enum class global_topology_request: uint16_t {
|
||||
new_cdc_generation,
|
||||
cleanup,
|
||||
keyspace_rf_change,
|
||||
};
|
||||
|
||||
struct ring_slice {
|
||||
@@ -153,6 +154,9 @@ struct topology {
|
||||
// Pending global topology request (i.e. not related to any specific node).
|
||||
std::optional<global_topology_request> global_request;
|
||||
|
||||
// Pending global topology request's id, which is a new group0's state id
|
||||
std::optional<utils::UUID> global_request_id;
|
||||
|
||||
// The IDs of the committed CDC generations sorted by timestamps.
|
||||
// The obsolete generations may not be in this list as they are continually deleted.
|
||||
std::vector<cdc::generation_id_v2> committed_cdc_generations;
|
||||
@@ -162,6 +166,11 @@ struct topology {
|
||||
// It's used as the first column of the clustering key in CDC_GENERATIONS_V3 table.
|
||||
std::optional<utils::UUID> new_cdc_generation_data_uuid;
|
||||
|
||||
// The name of the KS that is being the target of the scheduled ALTER KS statement
|
||||
std::optional<sstring> new_keyspace_rf_change_ks_name;
|
||||
// The KS options to be used when executing the scheduled ALTER KS statement
|
||||
std::optional<std::unordered_map<sstring, sstring>> new_keyspace_rf_change_data;
|
||||
|
||||
// The IDs of the committed yet unpublished CDC generations sorted by timestamps.
|
||||
std::vector<cdc::generation_id_v2> unpublished_cdc_generations;
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
type: Python
|
||||
pool_size: 4
|
||||
prepare_cql: INSERT INTO system_auth_v2.roles (role, salted_hash) VALUES ('alternator', 'secret_pass')
|
||||
prepare_cql: INSERT INTO system.roles (role, salted_hash) VALUES ('alternator', 'secret_pass')
|
||||
run_first:
|
||||
- test_streams
|
||||
- test_scan
|
||||
|
||||
@@ -19,7 +19,7 @@ async def test_auth_raft_command_split(manager: ManagerClient) -> None:
|
||||
servers = await manager.servers_add(3)
|
||||
cql, hosts = await manager.get_ready_cql(servers)
|
||||
|
||||
initial_perms = await cql.run_async("SELECT * FROM system_auth_v2.role_permissions")
|
||||
initial_perms = await cql.run_async("SELECT * FROM system.role_permissions")
|
||||
|
||||
shared_role = "shared_role_" + unique_name()
|
||||
await cql.run_async(f"CREATE ROLE {shared_role}")
|
||||
@@ -43,11 +43,11 @@ async def test_auth_raft_command_split(manager: ManagerClient) -> None:
|
||||
await asyncio.gather(*(read_barrier(cql, host) for host in hosts))
|
||||
|
||||
# confirm that deleted shared_role is not attached to any other role
|
||||
assert await cql.run_async(f"SELECT * FROM system_auth_v2.role_permissions WHERE resource = 'role/{shared_role}' ALLOW FILTERING") == []
|
||||
assert await cql.run_async(f"SELECT * FROM system.role_permissions WHERE resource = 'role/{shared_role}' ALLOW FILTERING") == []
|
||||
|
||||
# cleanup
|
||||
for user in users:
|
||||
await cql.run_async(f"DROP ROLE IF EXISTS {user}")
|
||||
await asyncio.gather(*(read_barrier(cql, host) for host in hosts))
|
||||
current_perms = await cql.run_async("SELECT * FROM system_auth_v2.role_permissions")
|
||||
current_perms = await cql.run_async("SELECT * FROM system.role_permissions")
|
||||
assert initial_perms == current_perms
|
||||
|
||||
@@ -90,18 +90,18 @@ async def check_auth_v2_data_migration(manager: ManagerClient, hosts):
|
||||
data = auth_data()
|
||||
|
||||
roles = set()
|
||||
for row in await cql.run_async("SELECT * FROM system_auth_v2.roles"):
|
||||
for row in await cql.run_async("SELECT * FROM system.roles"):
|
||||
member_of = frozenset(row.member_of) if row.member_of else None
|
||||
roles.add((row.role, row.can_login, row.is_superuser, member_of, row.salted_hash))
|
||||
assert roles == set(data[0]["rows"])
|
||||
|
||||
role_members = set()
|
||||
for row in await cql.run_async("SELECT * FROM system_auth_v2.role_members"):
|
||||
for row in await cql.run_async("SELECT * FROM system.role_members"):
|
||||
role_members.add((row.role, row.member))
|
||||
assert role_members == set(data[1]["rows"])
|
||||
|
||||
role_attributes = set()
|
||||
for row in await cql.run_async("SELECT * FROM system_auth_v2.role_attributes"):
|
||||
for row in await cql.run_async("SELECT * FROM system.role_attributes"):
|
||||
role_attributes.add((row.role, row.name, row.value))
|
||||
assert role_attributes == set(data[2]["rows"])
|
||||
|
||||
@@ -121,7 +121,7 @@ async def check_auth_v2_works(manager: ManagerClient, hosts):
|
||||
await asyncio.gather(*(read_barrier(cql, host) for host in hosts))
|
||||
# see warmup_v1_static_values for background about checks below
|
||||
# check if it was added to a new table
|
||||
assert len(await cql.run_async("SELECT role FROM system_auth_v2.roles WHERE role = 'user_after_migration'")) == 1
|
||||
assert len(await cql.run_async("SELECT role FROM system.roles WHERE role = 'user_after_migration'")) == 1
|
||||
# check whether list roles statement sees it also via new table (on all nodes)
|
||||
await asyncio.gather(*(cql.run_async("LIST ROLES OF user_after_migration", host=host) for host in hosts))
|
||||
await cql.run_async("DROP ROLE user_after_migration")
|
||||
@@ -158,7 +158,7 @@ async def test_auth_v2_migration(request, manager: ManagerClient):
|
||||
logging.info("Waiting until upgrade finishes")
|
||||
await asyncio.gather(*(wait_until_topology_upgrade_finishes(manager, h.address, time.time() + 60) for h in hosts))
|
||||
|
||||
logging.info("Checking migrated data in system_auth_v2")
|
||||
logging.info("Checking migrated data in system")
|
||||
await check_auth_v2_data_migration(manager, hosts)
|
||||
|
||||
logging.info("Checking auth statements after migration")
|
||||
|
||||
@@ -15,6 +15,7 @@ from test.topology.util import trigger_snapshot, wait_until_topology_upgrade_fin
|
||||
from test.topology.conftest import skip_mode
|
||||
from cassandra import ConsistencyLevel
|
||||
from cassandra.query import SimpleStatement
|
||||
from cassandra.protocol import InvalidRequest
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
@@ -136,6 +137,14 @@ async def test_service_levels_work_during_recovery(manager: ManagerClient):
|
||||
assert sl_v1 not in [sl.service_level for sl in recovery_result]
|
||||
assert set([sl.service_level for sl in recovery_result]) == set(sls)
|
||||
|
||||
logging.info("Checking changes to service levels are forbidden during recovery")
|
||||
with pytest.raises(InvalidRequest, match="The cluster is in recovery mode. Changes to service levels are not allowed."):
|
||||
await cql.run_async(f"CREATE SERVICE LEVEL sl_{unique_name()}")
|
||||
with pytest.raises(InvalidRequest, match="The cluster is in recovery mode. Changes to service levels are not allowed."):
|
||||
await cql.run_async(f"ALTER SERVICE LEVEL {sls[0]} WITH timeout = 1h")
|
||||
with pytest.raises(InvalidRequest, match="The cluster is in recovery mode. Changes to service levels are not allowed."):
|
||||
await cql.run_async(f"DROP SERVICE LEVEL {sls[0]}")
|
||||
|
||||
logging.info("Restoring cluster to normal status")
|
||||
await asyncio.gather(*(delete_raft_topology_state(cql, h) for h in hosts))
|
||||
await asyncio.gather(*(delete_raft_data_and_upgrade_state(cql, h) for h in hosts))
|
||||
|
||||
@@ -178,19 +178,19 @@ void require_table_protected(cql_test_env& env, const char* table) {
|
||||
|
||||
SEASTAR_TEST_CASE(roles_table_is_protected) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& env) {
|
||||
require_table_protected(env, "system_auth_v2.roles");
|
||||
require_table_protected(env, "system.roles");
|
||||
}, auth_on());
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(role_members_table_is_protected) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& env) {
|
||||
require_table_protected(env, "system_auth_v2.role_members");
|
||||
require_table_protected(env, "system.role_members");
|
||||
}, auth_on());
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(role_permissions_table_is_protected) {
|
||||
return do_with_cql_env_thread([] (cql_test_env& env) {
|
||||
require_table_protected(env, "system_auth_v2.role_permissions");
|
||||
require_table_protected(env, "system.role_permissions");
|
||||
}, auth_on());
|
||||
}
|
||||
|
||||
|
||||
@@ -32,6 +32,7 @@
|
||||
#include <boost/algorithm/cxx11/iota.hpp>
|
||||
#include "test/lib/log.hh"
|
||||
#include "test/lib/cql_test_env.hh"
|
||||
#include "test/lib/key_utils.hh"
|
||||
#include "test/lib/random_utils.hh"
|
||||
#include "test/lib/test_utils.hh"
|
||||
#include <seastar/core/coroutine.hh>
|
||||
@@ -139,14 +140,6 @@ void endpoints_check(
|
||||
}
|
||||
}
|
||||
|
||||
auto d2t = [](double d) -> int64_t {
|
||||
// Double to unsigned long conversion will overflow if the
|
||||
// input is greater than numeric_limits<long>::max(), so divide by two and
|
||||
// multiply again later.
|
||||
auto scale = std::numeric_limits<unsigned long>::max();
|
||||
return static_cast<unsigned long>(d * static_cast<double>(scale >> 1)) << 1;
|
||||
};
|
||||
|
||||
/**
|
||||
* Check the get_natural_endpoints() output for tokens between every two
|
||||
* adjacent ring points.
|
||||
@@ -168,7 +161,7 @@ void full_ring_check(const std::vector<ring_point>& ring_points,
|
||||
|
||||
for (auto& rp : ring_points) {
|
||||
double cur_point1 = rp.point - 0.5;
|
||||
token t1(dht::token::kind::key, d2t(cur_point1 / ring_points.size()));
|
||||
token t1(dht::token::kind::key, tests::d2t(cur_point1 / ring_points.size()));
|
||||
auto endpoints1 = erm->get_natural_endpoints(t1);
|
||||
|
||||
endpoints_check(ars_ptr, tmptr, endpoints1, topo);
|
||||
@@ -181,7 +174,7 @@ void full_ring_check(const std::vector<ring_point>& ring_points,
|
||||
// identical to the one not taken from the cache.
|
||||
//
|
||||
double cur_point2 = rp.point - 0.2;
|
||||
token t2(dht::token::kind::key, d2t(cur_point2 / ring_points.size()));
|
||||
token t2(dht::token::kind::key, tests::d2t(cur_point2 / ring_points.size()));
|
||||
auto endpoints2 = erm->get_natural_endpoints(t2);
|
||||
|
||||
endpoints_check(ars_ptr, tmptr, endpoints2, topo);
|
||||
@@ -306,7 +299,7 @@ void simple_test() {
|
||||
auto& topo = tm.get_topology();
|
||||
for (const auto& [ring_point, endpoint, id] : ring_points) {
|
||||
std::unordered_set<token> tokens;
|
||||
tokens.insert({dht::token::kind::key, d2t(ring_point / ring_points.size())});
|
||||
tokens.insert({dht::token::kind::key, tests::d2t(ring_point / ring_points.size())});
|
||||
topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal);
|
||||
co_await tm.update_normal_tokens(std::move(tokens), id);
|
||||
}
|
||||
@@ -402,7 +395,7 @@ void heavy_origin_test() {
|
||||
ring_point rp = {token_point, address};
|
||||
|
||||
ring_points.emplace_back(rp);
|
||||
tokens[address].emplace(token{dht::token::kind::key, d2t(token_point / total_eps)});
|
||||
tokens[address].emplace(token{dht::token::kind::key, tests::d2t(token_point / total_eps)});
|
||||
|
||||
testlog.debug("adding node {} at {}", address, token_point);
|
||||
|
||||
@@ -483,7 +476,7 @@ SEASTAR_THREAD_TEST_CASE(NetworkTopologyStrategy_tablets_test) {
|
||||
auto& topo = tm.get_topology();
|
||||
for (const auto& [ring_point, endpoint, id] : ring_points) {
|
||||
std::unordered_set<token> tokens;
|
||||
tokens.insert({dht::token::kind::key, d2t(ring_point / ring_points.size())});
|
||||
tokens.insert({dht::token::kind::key, tests::d2t(ring_point / ring_points.size())});
|
||||
topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count);
|
||||
tm.update_host_id(id, endpoint);
|
||||
co_await tm.update_normal_tokens(std::move(tokens), id);
|
||||
@@ -574,7 +567,7 @@ static void test_random_balancing(sharded<snitch_ptr>& snitch, gms::inet_address
|
||||
auto& topo = tm.get_topology();
|
||||
for (const auto& [ring_point, endpoint, id] : ring_points) {
|
||||
std::unordered_set<token> tokens;
|
||||
tokens.insert({dht::token::kind::key, d2t(ring_point / ring_points.size())});
|
||||
tokens.insert({dht::token::kind::key, tests::d2t(ring_point / ring_points.size())});
|
||||
topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, shard_count);
|
||||
tm.update_host_id(id, endpoint);
|
||||
co_await tm.update_normal_tokens(std::move(tokens), id);
|
||||
|
||||
@@ -15,19 +15,23 @@
|
||||
#include "test/lib/cql_test_env.hh"
|
||||
#include "test/lib/log.hh"
|
||||
#include "test/lib/simple_schema.hh"
|
||||
#include "test/lib/key_utils.hh"
|
||||
#include "test/lib/test_utils.hh"
|
||||
#include "db/config.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
#include "schema/schema_builder.hh"
|
||||
|
||||
#include "replica/tablets.hh"
|
||||
#include "replica/tablet_mutation_builder.hh"
|
||||
#include "locator/tablets.hh"
|
||||
#include "service/tablet_allocator.hh"
|
||||
#include "locator/tablet_replication_strategy.hh"
|
||||
#include "locator/tablet_sharder.hh"
|
||||
#include "locator/load_sketch.hh"
|
||||
#include "utils/UUID_gen.hh"
|
||||
#include "utils/error_injection.hh"
|
||||
#include "utils/to_string.hh"
|
||||
#include "service/topology_coordinator.hh"
|
||||
|
||||
using namespace locator;
|
||||
using namespace replica;
|
||||
@@ -2331,4 +2335,359 @@ SEASTAR_THREAD_TEST_CASE(test_tablet_range_splitter) {
|
||||
{bound{dks[0], true}, bound{dks[1], false}},
|
||||
{bound{dks[1], true}, bound{dks[2], false}},
|
||||
{bound{dks[2], true}, bound{dks[3], false}}});
|
||||
|
||||
}
|
||||
|
||||
static locator::endpoint_dc_rack make_endpoint_dc_rack(gms::inet_address endpoint) {
|
||||
// This resembles rack_inferring_snitch dc/rack generation which is
|
||||
// still in use by this test via token_metadata internals
|
||||
auto dc = std::to_string(uint8_t(endpoint.bytes()[1]));
|
||||
auto rack = std::to_string(uint8_t(endpoint.bytes()[2]));
|
||||
return locator::endpoint_dc_rack{dc, rack};
|
||||
}
|
||||
|
||||
struct calculate_tablet_replicas_for_new_rf_config
|
||||
{
|
||||
struct ring_point {
|
||||
double point;
|
||||
inet_address host;
|
||||
host_id id = host_id::create_random_id();
|
||||
};
|
||||
std::vector<ring_point> ring_points;
|
||||
std::map<sstring, sstring> options;
|
||||
std::map<sstring, sstring> new_dc_rep_factor;
|
||||
std::map<sstring, size_t> expected_rep_factor;
|
||||
};
|
||||
|
||||
static void execute_tablet_for_new_rf_test(calculate_tablet_replicas_for_new_rf_config const& test_config)
|
||||
{
|
||||
auto my_address = gms::inet_address("localhost");
|
||||
// Create the RackInferringSnitch
|
||||
snitch_config cfg;
|
||||
cfg.listen_address = my_address;
|
||||
cfg.broadcast_address = my_address;
|
||||
cfg.name = "RackInferringSnitch";
|
||||
sharded<snitch_ptr> snitch;
|
||||
snitch.start(cfg).get();
|
||||
auto stop_snitch = defer([&snitch] { snitch.stop().get(); });
|
||||
snitch.invoke_on_all(&snitch_ptr::start).get();
|
||||
|
||||
static constexpr size_t tablet_count = 8;
|
||||
|
||||
std::vector<unsigned> nodes_shard_count(test_config.ring_points.size(), 3);
|
||||
|
||||
locator::token_metadata::config tm_cfg;
|
||||
tm_cfg.topo_cfg.this_endpoint = test_config.ring_points[0].host;
|
||||
tm_cfg.topo_cfg.local_dc_rack = { snitch.local()->get_datacenter(), snitch.local()->get_rack() };
|
||||
tm_cfg.topo_cfg.this_host_id = test_config.ring_points[0].id;
|
||||
locator::shared_token_metadata stm([] () noexcept { return db::schema_tables::hold_merge_lock(); }, tm_cfg);
|
||||
|
||||
// Initialize the token_metadata
|
||||
stm.mutate_token_metadata([&] (token_metadata& tm) -> future<> {
|
||||
auto& topo = tm.get_topology();
|
||||
for (const auto& [ring_point, endpoint, id] : test_config.ring_points) {
|
||||
std::unordered_set<token> tokens;
|
||||
tokens.insert({dht::token::kind::key, tests::d2t(ring_point / test_config.ring_points.size())});
|
||||
topo.add_node(id, endpoint, make_endpoint_dc_rack(endpoint), locator::node::state::normal, 1);
|
||||
tm.update_host_id(id, endpoint);
|
||||
co_await tm.update_normal_tokens(std::move(tokens), id);
|
||||
}
|
||||
}).get();
|
||||
|
||||
locator::replication_strategy_params params(test_config.options, tablet_count);
|
||||
|
||||
auto ars_ptr = abstract_replication_strategy::create_replication_strategy(
|
||||
"NetworkTopologyStrategy", params);
|
||||
|
||||
auto tablet_aware_ptr = ars_ptr->maybe_as_tablet_aware();
|
||||
BOOST_REQUIRE(tablet_aware_ptr);
|
||||
|
||||
auto s = schema_builder("ks", "tb")
|
||||
.with_column("pk", utf8_type, column_kind::partition_key)
|
||||
.with_column("v", utf8_type)
|
||||
.build();
|
||||
|
||||
stm.mutate_token_metadata([&] (token_metadata& tm) {
|
||||
for (size_t i = 0; i < test_config.ring_points.size(); ++i) {
|
||||
auto& [ring_point, endpoint, id] = test_config.ring_points[i];
|
||||
tm.update_host_id(id, endpoint);
|
||||
tm.update_topology(id, make_endpoint_dc_rack(endpoint), std::nullopt, nodes_shard_count[i]);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}).get();
|
||||
|
||||
auto allocated_map = tablet_aware_ptr->allocate_tablets_for_new_table(s, stm.get(), 0).get();
|
||||
|
||||
BOOST_REQUIRE_EQUAL(allocated_map.tablet_count(), tablet_count);
|
||||
|
||||
auto host_id_to_dc = [&stm](const locator::host_id& ep) -> std::optional<sstring> {
|
||||
auto node = stm.get()->get_topology().find_node(ep);
|
||||
if (node == nullptr) {
|
||||
return std::nullopt;
|
||||
}
|
||||
return node->dc_rack().dc;
|
||||
};
|
||||
|
||||
stm.mutate_token_metadata([&] (token_metadata& tm) {
|
||||
tablet_metadata tab_meta;
|
||||
auto table = s->id();
|
||||
tab_meta.set_tablet_map(table, allocated_map);
|
||||
tm.set_tablets(std::move(tab_meta));
|
||||
return make_ready_future<>();
|
||||
}).get();
|
||||
|
||||
std::map<sstring, size_t> initial_rep_factor;
|
||||
for (auto const& [dc, shard_count] : test_config.options) {
|
||||
initial_rep_factor[dc] = std::stoul(shard_count);
|
||||
}
|
||||
|
||||
auto tablets = stm.get()->tablets().get_tablet_map(s->id());
|
||||
BOOST_REQUIRE_EQUAL(tablets.tablet_count(), tablet_count);
|
||||
for (auto tb : tablets.tablet_ids()) {
|
||||
const locator::tablet_info& ti = tablets.get_tablet_info(tb);
|
||||
|
||||
std::map<sstring, size_t> dc_replicas_count;
|
||||
for (const auto& r : ti.replicas) {
|
||||
auto dc = host_id_to_dc(r.host);
|
||||
if (dc) {
|
||||
dc_replicas_count[*dc]++;
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_REQUIRE_EQUAL(dc_replicas_count, initial_rep_factor);
|
||||
}
|
||||
|
||||
try {
|
||||
tablet_map old_tablets = stm.get()->tablets().get_tablet_map(s->id());
|
||||
locator::replication_strategy_params params{test_config.new_dc_rep_factor, old_tablets.tablet_count()};
|
||||
auto new_strategy = abstract_replication_strategy::create_replication_strategy("NetworkTopologyStrategy", params);
|
||||
auto tmap = new_strategy->maybe_as_tablet_aware()->reallocate_tablets(s, stm.get(), old_tablets).get();
|
||||
|
||||
auto const& ts = tmap.tablets();
|
||||
BOOST_REQUIRE_EQUAL(ts.size(), tablet_count);
|
||||
|
||||
for (auto tb : tmap.tablet_ids()) {
|
||||
const locator::tablet_info& ti = tmap.get_tablet_info(tb);
|
||||
|
||||
std::map<sstring, size_t> dc_replicas_count;
|
||||
for (const auto& r : ti.replicas) {
|
||||
auto dc = host_id_to_dc(r.host);
|
||||
if (dc) {
|
||||
dc_replicas_count[*dc]++;
|
||||
}
|
||||
}
|
||||
|
||||
BOOST_REQUIRE_EQUAL(dc_replicas_count, test_config.expected_rep_factor);
|
||||
}
|
||||
|
||||
} catch (exceptions::configuration_exception const& e) {
|
||||
thread_local boost::regex re("Datacenter [0-9]+ doesn't have enough nodes for replication_factor=[0-9]+");
|
||||
boost::cmatch what;
|
||||
if (!boost::regex_search(e.what(), what, re)) {
|
||||
BOOST_FAIL("Unexpected exception: " + std::string(e.what()));
|
||||
}
|
||||
} catch (std::exception const& e) {
|
||||
BOOST_FAIL("Unexpected exception: " + std::string(e.what()));
|
||||
} catch (...) {
|
||||
BOOST_FAIL("Unexpected exception");
|
||||
}
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_calculate_tablet_replicas_for_new_rf_upsize_one_dc) {
|
||||
calculate_tablet_replicas_for_new_rf_config config;
|
||||
config.ring_points = {
|
||||
{ 1.0, inet_address("192.100.10.1") },
|
||||
{ 4.0, inet_address("192.100.20.1") },
|
||||
{ 7.0, inet_address("192.100.30.1") },
|
||||
};
|
||||
config.options = {{"100", "2"}};
|
||||
config.new_dc_rep_factor = {{"100", "3"}};
|
||||
config.expected_rep_factor = {{"100", 3}};
|
||||
execute_tablet_for_new_rf_test(config);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_calculate_tablet_replicas_for_new_rf_downsize_one_dc) {
|
||||
calculate_tablet_replicas_for_new_rf_config config;
|
||||
config.ring_points = {
|
||||
{ 1.0, inet_address("192.100.10.1") },
|
||||
{ 4.0, inet_address("192.100.20.1") },
|
||||
{ 7.0, inet_address("192.100.30.1") },
|
||||
};
|
||||
config.options = {{"100", "3"}};
|
||||
config.new_dc_rep_factor = {{"100", "2"}};
|
||||
config.expected_rep_factor = {{"100", 2}};
|
||||
execute_tablet_for_new_rf_test(config);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_calculate_tablet_replicas_for_new_rf_no_change_one_dc) {
|
||||
calculate_tablet_replicas_for_new_rf_config config;
|
||||
config.ring_points = {
|
||||
{ 1.0, inet_address("192.100.10.1") },
|
||||
{ 4.0, inet_address("192.100.20.1") },
|
||||
{ 7.0, inet_address("192.100.30.1") },
|
||||
};
|
||||
config.options = {{"100", "3"}};
|
||||
config.new_dc_rep_factor = {{"100", "3"}};
|
||||
config.expected_rep_factor = {{"100", 3}};
|
||||
execute_tablet_for_new_rf_test(config);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_calculate_tablet_replicas_for_new_rf) {
|
||||
calculate_tablet_replicas_for_new_rf_config config;
|
||||
config.ring_points = {
|
||||
{ 1.0, inet_address("192.100.10.1") },
|
||||
{ 2.0, inet_address("192.101.10.1") },
|
||||
{ 3.0, inet_address("192.102.10.1") },
|
||||
{ 4.0, inet_address("192.100.20.1") },
|
||||
{ 5.0, inet_address("192.101.20.1") },
|
||||
{ 6.0, inet_address("192.102.20.1") },
|
||||
{ 7.0, inet_address("192.100.30.1") },
|
||||
{ 8.0, inet_address("192.101.30.1") },
|
||||
{ 9.0, inet_address("192.102.30.1") },
|
||||
{ 10.0, inet_address("192.101.40.1") },
|
||||
{ 11.0, inet_address("192.102.40.1") },
|
||||
{ 12.0, inet_address("192.102.40.2") }
|
||||
};
|
||||
config.options = {
|
||||
{"100", "3"},
|
||||
{"101", "2"},
|
||||
{"102", "3"}
|
||||
};
|
||||
config.new_dc_rep_factor = {
|
||||
{"100", "3"},
|
||||
{"101", "4"},
|
||||
{"102", "2"}
|
||||
};
|
||||
config.expected_rep_factor = {
|
||||
{"100", 3},
|
||||
{"101", 4},
|
||||
{"102", 2}
|
||||
};
|
||||
|
||||
execute_tablet_for_new_rf_test(config);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_calculate_tablet_replicas_for_new_rf_not_enough_nodes) {
|
||||
|
||||
calculate_tablet_replicas_for_new_rf_config config;
|
||||
config.ring_points = {
|
||||
{ 1.0, inet_address("192.100.10.1") },
|
||||
{ 4.0, inet_address("192.100.20.1") },
|
||||
{ 7.0, inet_address("192.100.30.1") },
|
||||
};
|
||||
config.options = {{"100", "3"}};
|
||||
config.new_dc_rep_factor = {{"100", "5"}};
|
||||
config.expected_rep_factor = {{"100", 3}};
|
||||
execute_tablet_for_new_rf_test(config);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_calculate_tablet_replicas_for_new_rf_one_dc) {
|
||||
calculate_tablet_replicas_for_new_rf_config config;
|
||||
config.ring_points = {
|
||||
{ 1.0, inet_address("192.100.10.1") },
|
||||
{ 4.0, inet_address("192.100.20.1") },
|
||||
{ 7.0, inet_address("192.100.30.1") },
|
||||
};
|
||||
config.options = {{"100", "2"}};
|
||||
config.new_dc_rep_factor = {{"100", "3"}};
|
||||
config.expected_rep_factor = {{"100", 3}};
|
||||
execute_tablet_for_new_rf_test(config);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_calculate_tablet_replicas_for_new_rf_one_dc_1_to_2) {
|
||||
calculate_tablet_replicas_for_new_rf_config config;
|
||||
config.ring_points = {
|
||||
{ 1.0, inet_address("192.100.10.1") },
|
||||
{ 4.0, inet_address("192.100.20.1") },
|
||||
};
|
||||
config.options = {{"100", "1"}};
|
||||
config.new_dc_rep_factor = {{"100", "2"}};
|
||||
config.expected_rep_factor = {{"100", 2}};
|
||||
execute_tablet_for_new_rf_test(config);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_calculate_tablet_replicas_for_new_rf_one_dc_not_enough_nodes) {
|
||||
|
||||
calculate_tablet_replicas_for_new_rf_config config;
|
||||
config.ring_points = {
|
||||
{ 1.0, inet_address("192.100.10.1") },
|
||||
{ 4.0, inet_address("192.100.10.2") },
|
||||
{ 7.0, inet_address("192.100.10.3") },
|
||||
};
|
||||
config.options = {{"100", "3"}};
|
||||
config.new_dc_rep_factor = {{"100", "5"}};
|
||||
config.expected_rep_factor = {{"100", 3}};
|
||||
execute_tablet_for_new_rf_test(config);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_calculate_tablet_replicas_for_new_rf_default_rf) {
|
||||
calculate_tablet_replicas_for_new_rf_config config;
|
||||
config.ring_points = {
|
||||
{ 1.0, inet_address("192.100.10.1") },
|
||||
{ 2.0, inet_address("192.101.10.1") },
|
||||
{ 3.0, inet_address("192.102.10.1") },
|
||||
{ 4.0, inet_address("192.100.20.1") },
|
||||
{ 5.0, inet_address("192.101.20.1") },
|
||||
{ 6.0, inet_address("192.102.20.1") },
|
||||
{ 7.0, inet_address("192.100.30.1") },
|
||||
{ 8.0, inet_address("192.101.30.1") },
|
||||
{ 9.0, inet_address("192.102.30.1") },
|
||||
{ 10.0, inet_address("192.100.40.1") },
|
||||
{ 11.0, inet_address("192.101.40.1") },
|
||||
{ 12.0, inet_address("192.102.40.1") },
|
||||
{ 13.0, inet_address("192.102.40.2") }
|
||||
};
|
||||
config.options = {
|
||||
{"100", "3"},
|
||||
{"101", "2"},
|
||||
{"102", "2"}
|
||||
};
|
||||
config.new_dc_rep_factor = {
|
||||
{"100", "4"},
|
||||
{"101", "3"},
|
||||
{"102", "3"},
|
||||
};
|
||||
config.expected_rep_factor = {
|
||||
{"100", 4},
|
||||
{"101", 3},
|
||||
{"102", 3},
|
||||
};
|
||||
|
||||
execute_tablet_for_new_rf_test(config);
|
||||
}
|
||||
|
||||
SEASTAR_THREAD_TEST_CASE(test_calculate_tablet_replicas_for_new_rf_default_rf_upsize_by_two) {
|
||||
calculate_tablet_replicas_for_new_rf_config config;
|
||||
config.ring_points = {
|
||||
{ 1.0, inet_address("192.100.10.1") },
|
||||
{ 2.0, inet_address("192.101.10.1") },
|
||||
{ 3.0, inet_address("192.102.10.1") },
|
||||
{ 4.0, inet_address("192.100.20.1") },
|
||||
{ 5.0, inet_address("192.101.20.1") },
|
||||
{ 6.0, inet_address("192.102.20.1") },
|
||||
{ 7.0, inet_address("192.100.30.1") },
|
||||
{ 8.0, inet_address("192.101.30.1") },
|
||||
{ 9.0, inet_address("192.102.30.1") },
|
||||
{ 10.0, inet_address("192.100.40.1") },
|
||||
{ 11.0, inet_address("192.101.40.1") },
|
||||
{ 12.0, inet_address("192.102.40.1") },
|
||||
{ 13.0, inet_address("192.102.40.2") }
|
||||
};
|
||||
config.options = {
|
||||
{"100", "3"},
|
||||
{"101", "2"},
|
||||
{"102", "1"}
|
||||
};
|
||||
config.new_dc_rep_factor = {
|
||||
{"100", "4"},
|
||||
{"101", "3"},
|
||||
{"102", "3"},
|
||||
};
|
||||
config.expected_rep_factor = {
|
||||
{"100", 4},
|
||||
{"101", 3},
|
||||
{"102", 3},
|
||||
};
|
||||
|
||||
execute_tablet_for_new_rf_test(config);
|
||||
}
|
||||
|
||||
@@ -87,6 +87,7 @@ SEASTAR_THREAD_TEST_CASE(test_response_request_reader) {
|
||||
res.serialize({sc::change_type::CREATED, sc::target_type::TYPE, "foo", "bar"}, version);
|
||||
res.serialize({sc::change_type::CREATED, sc::target_type::FUNCTION, "foo", "bar", "zed"}, version);
|
||||
res.serialize({sc::change_type::CREATED, sc::target_type::AGGREGATE, "foo", "bar", "zed"}, version);
|
||||
res.serialize({sc::change_type::CREATED, sc::target_type::TABLET_KEYSPACE, "foo"}, version);
|
||||
|
||||
auto msg = res.make_message(version, cql_transport::cql_compression::none).release();
|
||||
auto total_length = msg.len();
|
||||
|
||||
@@ -99,7 +99,19 @@ def test_drop_keyspace_nonexistent(cql):
|
||||
# Test trying to ALTER a keyspace.
|
||||
def test_alter_keyspace(cql, this_dc):
|
||||
with new_test_keyspace(cql, "WITH REPLICATION = { 'class' : 'NetworkTopologyStrategy', '" + this_dc + "' : 1 }") as keyspace:
|
||||
cql.execute(f"ALTER KEYSPACE {keyspace} WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', '{this_dc}' : 3 }} AND DURABLE_WRITES = false")
|
||||
cql.execute(f"ALTER KEYSPACE {keyspace} WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', '{this_dc}' : 2 }} AND DURABLE_WRITES = false")
|
||||
|
||||
# Test trying to ALTER RF of tablets-enabled KS by more than 1 at a time
|
||||
def test_alter_keyspace_rf_by_more_than_1(cql, this_dc):
|
||||
with new_test_keyspace(cql, "WITH REPLICATION = { 'class' : 'NetworkTopologyStrategy', '" + this_dc + "' : 1 }") as keyspace:
|
||||
with pytest.raises(InvalidRequest):
|
||||
cql.execute(f"ALTER KEYSPACE {keyspace} WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', '{this_dc}' : 3 }} AND DURABLE_WRITES = false")
|
||||
|
||||
# Test trying to ALTER a tablets-enabled KS by providing the 'replication_factor' tag
|
||||
def test_alter_keyspace_with_replication_factor_tag(cql):
|
||||
with new_test_keyspace(cql, "WITH REPLICATION = { 'class' : 'NetworkTopologyStrategy', 'replication_factor' : 1 }") as keyspace:
|
||||
with pytest.raises(InvalidRequest):
|
||||
cql.execute(f"ALTER KEYSPACE {keyspace} WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', 'replication_factor' : 2 }}")
|
||||
|
||||
# Test trying to ALTER a keyspace with invalid options.
|
||||
def test_alter_keyspace_invalid(cql, this_dc):
|
||||
|
||||
@@ -151,7 +151,7 @@ def test_udf_permissions_serialization(cql):
|
||||
for permission in permissions:
|
||||
cql.execute(f"GRANT {permission} ON {resource} TO {user}")
|
||||
|
||||
permissions = {row.resource: row.permissions for row in cql.execute(f"SELECT * FROM system_auth_v2.role_permissions")}
|
||||
permissions = {row.resource: row.permissions for row in cql.execute(f"SELECT * FROM system.role_permissions")}
|
||||
assert permissions['functions'] == set(['ALTER', 'AUTHORIZE', 'CREATE', 'DROP', 'EXECUTE'])
|
||||
assert permissions[f'functions/{keyspace}'] == set(['ALTER', 'AUTHORIZE', 'CREATE', 'DROP', 'EXECUTE'])
|
||||
assert permissions[f'functions/{keyspace}/{div_fun}[org.apache.cassandra.db.marshal.LongType^org.apache.cassandra.db.marshal.Int32Type]'] == set(['ALTER', 'AUTHORIZE', 'DROP', 'EXECUTE'])
|
||||
@@ -552,4 +552,4 @@ def test_native_functions_always_exeutable(cql):
|
||||
assert list(user_session.execute(f"SELECT count(*) FROM {table}")) == [(3,)]
|
||||
assert list(user_session.execute(f"SELECT max(a) FROM {table}")) == [(84,)]
|
||||
assert list(user_session.execute(f"SELECT min(a) FROM {table}")) == [(3,)]
|
||||
assert list(user_session.execute(f"SELECT sum(a) FROM {table}")) == [(102,)]
|
||||
assert list(user_session.execute(f"SELECT sum(a) FROM {table}")) == [(102,)]
|
||||
|
||||
@@ -92,14 +92,28 @@ def test_tablets_can_be_explicitly_disabled(cql, skip_without_tablets):
|
||||
assert len(list(res)) == 0, "tablets replication strategy turned on"
|
||||
|
||||
|
||||
def test_alter_changes_initial_tablets(cql, skip_without_tablets):
|
||||
ksdef = "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1} AND tablets = {'initial': 1};"
|
||||
def test_alter_changes_initial_tablets(cql, this_dc, skip_without_tablets):
|
||||
ksdef = f"WITH replication = {{'class': 'NetworkTopologyStrategy', '{this_dc}': 1}} AND tablets = {{'initial': 1}};"
|
||||
with new_test_keyspace(cql, ksdef) as keyspace:
|
||||
cql.execute(f"ALTER KEYSPACE {keyspace} WITH replication = {{'class': 'NetworkTopologyStrategy', 'replication_factor': 1}} AND tablets = {{'initial': 2}};")
|
||||
cql.execute(f"ALTER KEYSPACE {keyspace} WITH replication = {{'class': 'NetworkTopologyStrategy', '{this_dc}': 1}} AND tablets = {{'initial': 2}};")
|
||||
res = cql.execute(f"SELECT * FROM system_schema.scylla_keyspaces WHERE keyspace_name = '{keyspace}'").one()
|
||||
assert res.initial_tablets == 2
|
||||
|
||||
|
||||
def test_alter_changes_initial_tablets_short(cql, skip_without_tablets):
|
||||
ksdef = "WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1} AND tablets = {'initial': 1};"
|
||||
with new_test_keyspace(cql, ksdef) as keyspace:
|
||||
orig_rep = cql.execute(f"SELECT replication FROM system_schema.keyspaces WHERE keyspace_name = '{keyspace}'").one()
|
||||
|
||||
cql.execute(f"ALTER KEYSPACE {keyspace} WITH tablets = {{'initial': 2}};")
|
||||
res = cql.execute(f"SELECT * FROM system_schema.scylla_keyspaces WHERE keyspace_name = '{keyspace}'").one()
|
||||
assert res.initial_tablets == 2
|
||||
|
||||
# Test that replication parameters didn't change
|
||||
rep = cql.execute(f"SELECT replication FROM system_schema.keyspaces WHERE keyspace_name = '{keyspace}'").one()
|
||||
assert rep.replication == orig_rep.replication
|
||||
|
||||
|
||||
# Test that initial number of tablets is preserved in describe
|
||||
def test_describe_initial_tablets(cql, skip_without_tablets):
|
||||
ksdef = "WITH REPLICATION = { 'class' : 'NetworkTopologyStrategy', 'replication_factor' : '1' } " \
|
||||
@@ -219,3 +233,27 @@ def test_lwt_support_with_tablets(cql, test_keyspace, skip_without_tablets):
|
||||
cql.execute(f"DELETE FROM {table} WHERE key = 1 IF EXISTS")
|
||||
res = cql.execute(f"SELECT val FROM {table} WHERE key = 1").one()
|
||||
assert res.val == 0
|
||||
|
||||
|
||||
# We want to ensure that we can only change the RF of any DC by at most 1 at a time
|
||||
# if we use tablets. That provides us with the guarantee that the old and the new QUORUM
|
||||
# overlap by at least one node.
|
||||
def test_alter_tablet_keyspace(cql, this_dc):
|
||||
with new_test_keyspace(cql, f"WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', '{this_dc}' : 1 }} "
|
||||
f"AND TABLETS = {{ 'enabled': true, 'initial': 128 }}") as keyspace:
|
||||
def change_opt_rf(rf_opt, new_rf):
|
||||
cql.execute(f"ALTER KEYSPACE {keyspace} WITH REPLICATION = {{ 'class' : 'NetworkTopologyStrategy', '{rf_opt}' : {new_rf} }}")
|
||||
def change_dc_rf(new_rf):
|
||||
change_opt_rf(this_dc, new_rf)
|
||||
def change_default_rf(new_rf):
|
||||
change_opt_rf("replication_factor", new_rf)
|
||||
|
||||
change_dc_rf(2)
|
||||
change_dc_rf(3)
|
||||
|
||||
with pytest.raises(InvalidRequest):
|
||||
change_dc_rf(5)
|
||||
with pytest.raises(InvalidRequest):
|
||||
change_dc_rf(1)
|
||||
with pytest.raises(InvalidRequest):
|
||||
change_dc_rf(10)
|
||||
|
||||
@@ -747,13 +747,6 @@ private:
|
||||
_tablet_allocator.stop().get();
|
||||
});
|
||||
|
||||
_qp.invoke_on_all([this, &group0_client] (cql3::query_processor& qp) {
|
||||
qp.start_remote(_mm.local(), _forward_service.local(), group0_client);
|
||||
}).get();
|
||||
auto stop_qp_remote = defer([this] {
|
||||
_qp.invoke_on_all(&cql3::query_processor::stop_remote).get();
|
||||
});
|
||||
|
||||
service::raft_group0 group0_service{
|
||||
abort_sources.local(), _group0_registry.local(), _ms,
|
||||
_gossiper.local(), _feature_service.local(), _sys_ks.local(), group0_client};
|
||||
@@ -785,6 +778,13 @@ private:
|
||||
return db::initialize_virtual_tables(_db, _ss, _gossiper, _group0_registry, _sys_ks, *cfg);
|
||||
}).get();
|
||||
|
||||
_qp.invoke_on_all([this, &group0_client] (cql3::query_processor& qp) {
|
||||
qp.start_remote(_mm.local(), _forward_service.local(), _ss.local(), group0_client);
|
||||
}).get();
|
||||
auto stop_qp_remote = defer([this] {
|
||||
_qp.invoke_on_all(&cql3::query_processor::stop_remote).get();
|
||||
});
|
||||
|
||||
_cm.invoke_on_all([&](compaction_manager& cm) {
|
||||
auto cl = _db.local().commitlog();
|
||||
auto scl = _db.local().schema_commitlog();
|
||||
|
||||
@@ -106,4 +106,13 @@ clustering_key generate_clustering_key(schema_ptr s, bool allow_prefix, std::opt
|
||||
return std::move(keys.front());
|
||||
}
|
||||
|
||||
__attribute__((no_sanitize("undefined")))
|
||||
int64_t d2t(double d) {
|
||||
// Double to unsigned long conversion will overflow if the
|
||||
// input is greater than numeric_limits<long>::max(), so divide by two and
|
||||
// multiply again later.
|
||||
auto scale = std::numeric_limits<unsigned long>::max();
|
||||
return static_cast<unsigned long>(d * static_cast<double>(scale >> 1)) << 1;
|
||||
};
|
||||
|
||||
} // namespace tests
|
||||
|
||||
@@ -48,4 +48,7 @@ std::vector<clustering_key> generate_clustering_keys(size_t n, schema_ptr s, boo
|
||||
// Overload for a single key
|
||||
clustering_key generate_clustering_key(schema_ptr s, bool allow_prefix = false, std::optional<key_size> size = {});
|
||||
|
||||
// Double to unsigned long conversion
|
||||
int64_t d2t(double d);
|
||||
|
||||
} // namespace tests
|
||||
|
||||
@@ -265,6 +265,9 @@ async def start_writes_to_cdc_table(cql: Session, concurrency: int = 3):
|
||||
await cql.run_async(f"CREATE TABLE {ks_name}.tbl (pk int PRIMARY KEY, v int) WITH cdc = {{'enabled':true}}")
|
||||
|
||||
stmt = cql.prepare(f"INSERT INTO {ks_name}.tbl (pk, v) VALUES (?, 0)")
|
||||
# FIXME: this function is used by tests that use clusters with at least 3 nodes and restart nodes sequentially.
|
||||
# Therefore, RF=3 and CL=2 should work, but they don't. Some writes fail because CL=2 is not satisfied.
|
||||
# We should investigate why it happens and increase CL to 2 if possible.
|
||||
stmt.consistency_level = ConsistencyLevel.ONE
|
||||
|
||||
async def do_writes():
|
||||
@@ -287,6 +290,15 @@ async def start_writes_to_cdc_table(cql: Session, concurrency: int = 3):
|
||||
|
||||
tasks = [asyncio.create_task(do_writes()) for _ in range(concurrency)]
|
||||
|
||||
def restart(new_cql: Session):
|
||||
nonlocal cql
|
||||
nonlocal tasks
|
||||
logger.info("Restarting write workers")
|
||||
assert stop_event.is_set()
|
||||
stop_event.clear()
|
||||
cql = new_cql
|
||||
tasks = [asyncio.create_task(do_writes()) for _ in range(concurrency)]
|
||||
|
||||
async def verify():
|
||||
generations = await cql.run_async("SELECT * FROM system_distributed.cdc_streams_descriptions_v2")
|
||||
|
||||
@@ -298,13 +310,13 @@ async def start_writes_to_cdc_table(cql: Session, concurrency: int = 3):
|
||||
timestamp = stream_to_timestamp[log_entry.cdc_stream_id]
|
||||
assert timestamp <= datetime_from_uuid1(log_entry.cdc_time)
|
||||
|
||||
async def finish_and_verify():
|
||||
async def stop_and_verify():
|
||||
logger.info("Stopping write workers")
|
||||
stop_event.set()
|
||||
await asyncio.gather(*tasks)
|
||||
await verify()
|
||||
|
||||
return finish_and_verify
|
||||
return restart, stop_and_verify
|
||||
|
||||
def log_run_time(f):
|
||||
@functools.wraps(f)
|
||||
|
||||
79
test/topology_custom/test_mv_topology_change.py
Normal file
79
test/topology_custom/test_mv_topology_change.py
Normal file
@@ -0,0 +1,79 @@
|
||||
#
|
||||
# Copyright (C) 2024-present ScyllaDB
|
||||
#
|
||||
# SPDX-License-Identifier: AGPL-3.0-or-later
|
||||
#
|
||||
import asyncio
|
||||
import pytest
|
||||
import time
|
||||
import logging
|
||||
|
||||
from cassandra.cluster import ConnectionException, NoHostAvailable # type: ignore
|
||||
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.topology.conftest import skip_mode
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# This test reproduces issues #17786 and #18709
|
||||
# In the test, we create a keyspace with a table and a materialized view.
|
||||
# We then start writing to the table, causing the materialized view to be updated.
|
||||
# While the writes are in progress, we add then decommission a node in the cluster.
|
||||
# The test verifies that no node crashes as a result of the topology change combined
|
||||
# with the writes.
|
||||
@pytest.mark.asyncio
|
||||
@skip_mode('release', 'error injections are not supported in release mode')
|
||||
async def test_mv_topology_change(manager: ManagerClient):
|
||||
cfg = {'force_gossip_topology_changes': True, 'error_injections_at_startup': ['delay_before_get_view_natural_endpoint']}
|
||||
|
||||
servers = [await manager.server_add(config=cfg, timeout=60) for _ in range(3)]
|
||||
|
||||
cql = manager.get_cql()
|
||||
await cql.run_async("CREATE KEYSPACE ks WITH replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 3};")
|
||||
await cql.run_async("CREATE TABLE ks.t (pk int primary key, v int)")
|
||||
await cql.run_async("CREATE materialized view ks.t_view AS select pk, v from ks.t where v is not null primary key (v, pk)")
|
||||
|
||||
stop_event = asyncio.Event()
|
||||
concurrency = 10
|
||||
async def do_writes(start_it, repeat) -> int:
|
||||
iteration = start_it
|
||||
while not stop_event.is_set():
|
||||
start_time = time.time()
|
||||
try:
|
||||
await cql.run_async(f"insert into ks.t (pk, v) values ({iteration}, {iteration})")
|
||||
except NoHostAvailable as e:
|
||||
for _, err in e.errors.items():
|
||||
# ConnectionException can be raised when the node is shutting down.
|
||||
if not isinstance(err, ConnectionException):
|
||||
logger.error(f"Write started {time.time() - start_time}s ago failed: {e}")
|
||||
raise
|
||||
except Exception as e:
|
||||
logger.error(f"Write started {time.time() - start_time}s ago failed: {e}")
|
||||
raise
|
||||
iteration += concurrency
|
||||
if not repeat:
|
||||
break
|
||||
await asyncio.sleep(0.01)
|
||||
return iteration
|
||||
|
||||
|
||||
# to hit the issue #18709 it's enough to start one batch of writes, the effective
|
||||
# replication maps for base and view will change after the writes start but before they finish
|
||||
tasks = [asyncio.create_task(do_writes(i, repeat=False)) for i in range(concurrency)]
|
||||
|
||||
server = await manager.server_add()
|
||||
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
[await manager.api.disable_injection(s.ip_addr, "delay_before_get_view_natural_endpoint") for s in servers]
|
||||
[await manager.api.enable_injection(s.ip_addr, "delay_after_erm_update", False, parameters={'ks_name': 'ks', 'cf_name': 't'}) for s in servers]
|
||||
|
||||
# to hit the issue #17786 we need to run multiple batches of writes, so that some write is processed while the
|
||||
# effective replication maps for base and view are different
|
||||
tasks = [asyncio.create_task(do_writes(i, repeat=True)) for i in range(concurrency)]
|
||||
await manager.decommission_node(server.server_id)
|
||||
|
||||
stop_event.set()
|
||||
await asyncio.gather(*tasks)
|
||||
|
||||
@@ -8,11 +8,15 @@ from cassandra.query import SimpleStatement, ConsistencyLevel
|
||||
from test.pylib.manager_client import ManagerClient
|
||||
from test.pylib.rest_client import HTTPError
|
||||
from test.pylib.tablets import get_all_tablet_replicas
|
||||
from test.pylib.util import read_barrier
|
||||
from test.topology.util import wait_for_cql_and_get_hosts
|
||||
import time
|
||||
import pytest
|
||||
import logging
|
||||
import asyncio
|
||||
import re
|
||||
import requests
|
||||
import random
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -101,12 +105,13 @@ async def test_reshape_with_tablets(manager: ManagerClient):
|
||||
|
||||
|
||||
@pytest.mark.parametrize("direction", ["up", "down", "none"])
|
||||
@pytest.mark.xfail(reason="Scaling not implemented yet")
|
||||
@pytest.mark.asyncio
|
||||
async def test_tablet_rf_change(manager: ManagerClient, direction):
|
||||
cfg = {'enable_user_defined_functions': False,
|
||||
'experimental_features': ['tablets']}
|
||||
servers = await manager.servers_add(3, config=cfg)
|
||||
for s in servers:
|
||||
await manager.api.disable_tablet_balancing(s.ip_addr)
|
||||
|
||||
cql = manager.get_cql()
|
||||
res = await cql.run_async("SELECT data_center FROM system.local")
|
||||
@@ -143,6 +148,25 @@ async def test_tablet_rf_change(manager: ManagerClient, direction):
|
||||
logger.info(f"Checking {rf_to} re-allocated replicas")
|
||||
await check_allocated_replica(rf_to)
|
||||
|
||||
if direction != 'up':
|
||||
# Don't check fragments for up/none changes, scylla crashes when checking nodes
|
||||
# that (validly) miss the replica, see scylladb/scylladb#18786
|
||||
return
|
||||
|
||||
fragments = { pk: set() for pk in random.sample(range(128), 17) }
|
||||
for s in servers:
|
||||
host_id = await manager.get_host_id(s.server_id)
|
||||
host = await wait_for_cql_and_get_hosts(cql, [s], time.time() + 30)
|
||||
await read_barrier(manager.get_cql(), host[0]) # scylladb/scylladb#18199
|
||||
for k in fragments:
|
||||
res = await cql.run_async(f"SELECT partition_region FROM MUTATION_FRAGMENTS(test.test) WHERE pk={k}", host=host[0])
|
||||
for fragment in res:
|
||||
if fragment.partition_region == 0: # partition start
|
||||
fragments[k].add(host_id)
|
||||
logger.info("Checking fragments")
|
||||
for k in fragments:
|
||||
assert len(fragments[k]) == rf_to, f"Found mutations for {k} key on {fragments[k]} hosts, but expected only {rf_to} of them"
|
||||
|
||||
|
||||
# Reproducer for https://github.com/scylladb/scylladb/issues/18110
|
||||
# Check that an existing cached read, will be cleaned up when the tablet it reads
|
||||
|
||||
@@ -19,21 +19,23 @@ def get_expected_tombstone_gc_mode(rf, tablets):
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("rf", [1, 2])
|
||||
@pytest.mark.parametrize("tablets", ["true", "false"])
|
||||
@pytest.mark.parametrize("tablets", [True, False])
|
||||
async def test_default_tombstone_gc(manager, rf, tablets):
|
||||
servers = [await manager.server_add(), await manager.server_add()]
|
||||
cql = manager.cql
|
||||
async with new_test_keyspace(cql, f"with replication = {{ 'class': 'NetworkTopologyStrategy', 'replication_factor': {rf}}} and tablets = {{'initial': {rf}}}") as keyspace:
|
||||
tablets_enabled = "true" if tablets else "false"
|
||||
async with new_test_keyspace(cql, f"with replication = {{ 'class': 'NetworkTopologyStrategy', 'replication_factor': {rf}}} and tablets = {{ 'enabled': {tablets_enabled} }}") as keyspace:
|
||||
async with new_test_table(cql, keyspace, "p int primary key, x int") as table:
|
||||
check_tombstone_gc_mode(cql, table, get_expected_tombstone_gc_mode(rf, tablets))
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@pytest.mark.parametrize("rf", [1, 2])
|
||||
@pytest.mark.parametrize("tablets", ["true", "false"])
|
||||
@pytest.mark.parametrize("tablets", [True, False])
|
||||
async def test_default_tombstone_gc_does_not_override(manager, rf, tablets):
|
||||
servers = [await manager.server_add(), await manager.server_add()]
|
||||
cql = manager.cql
|
||||
async with new_test_keyspace(cql, f"with replication = {{ 'class': 'NetworkTopologyStrategy', 'replication_factor': {rf}}} and tablets = {{'initial': {rf}}}") as keyspace:
|
||||
tablets_enabled = "true" if tablets else "false"
|
||||
async with new_test_keyspace(cql, f"with replication = {{ 'class': 'NetworkTopologyStrategy', 'replication_factor': {rf}}} and tablets = {{ 'enabled': {tablets_enabled} }}") as keyspace:
|
||||
async with new_test_table(cql, keyspace, "p int primary key, x int", " with tombstone_gc = {'mode': 'disabled'}") as table:
|
||||
await cql.run_async(f"ALTER TABLE {table} add y int")
|
||||
check_tombstone_gc_mode(cql, table, "disabled")
|
||||
|
||||
@@ -101,7 +101,6 @@ async def start_writes(cql: Session, concurrency: int = 3):
|
||||
|
||||
async def do_writes(worker_id: int):
|
||||
write_count = 0
|
||||
last_error = None
|
||||
while not stop_event.is_set():
|
||||
start_time = time.time()
|
||||
try:
|
||||
@@ -109,10 +108,8 @@ async def start_writes(cql: Session, concurrency: int = 3):
|
||||
write_count += 1
|
||||
except Exception as e:
|
||||
logger.error(f"Write started {time.time() - start_time}s ago failed: {e}")
|
||||
last_error = e
|
||||
raise
|
||||
logger.info(f"Worker #{worker_id} did {write_count} successful writes")
|
||||
if last_error is not None:
|
||||
raise last_error
|
||||
|
||||
tasks = [asyncio.create_task(do_writes(worker_id)) for worker_id in range(concurrency)]
|
||||
|
||||
|
||||
@@ -14,41 +14,72 @@ from test.pylib.util import wait_for_cql_and_get_hosts
|
||||
from test.topology.util import reconnect_driver, enter_recovery_state, \
|
||||
delete_raft_data_and_upgrade_state, log_run_time, wait_until_upgrade_finishes as wait_until_schema_upgrade_finishes, \
|
||||
wait_until_topology_upgrade_finishes, delete_raft_topology_state, wait_for_cdc_generations_publishing, \
|
||||
check_system_topology_and_cdc_generations_v3_consistency
|
||||
check_system_topology_and_cdc_generations_v3_consistency, start_writes_to_cdc_table, wait_until_last_generation_is_in_use
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
@log_run_time
|
||||
async def test_topology_recovery_basic(request, manager: ManagerClient):
|
||||
servers = await manager.servers_add(3)
|
||||
async def test_topology_recovery_basic(request, mode: str, manager: ManagerClient):
|
||||
# Increase ring delay to ensure nodes learn about CDC generations before they start operating.
|
||||
cfg = {'ring_delay_ms': 15000 if mode == 'debug' else 5000}
|
||||
|
||||
servers = await manager.servers_add(3, config=cfg)
|
||||
cql = manager.cql
|
||||
assert(cql)
|
||||
|
||||
logging.info("Waiting until driver connects to every server")
|
||||
hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
|
||||
|
||||
restart_writes, stop_writes_and_verify = await start_writes_to_cdc_table(cql)
|
||||
|
||||
logging.info(f"Restarting hosts {hosts} in recovery mode")
|
||||
await asyncio.gather(*(enter_recovery_state(cql, h) for h in hosts))
|
||||
|
||||
# If we restarted nodes before the last generation was in use, some writes
|
||||
# could fail. After restart, nodes load only the last generation. If it's
|
||||
# not active yet, writes with lower timestamps would fail.
|
||||
await wait_until_last_generation_is_in_use(cql)
|
||||
|
||||
logging.debug("Sleeping for 1 second to make sure there are writes to the CDC table in all 3 generations")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
# Restart sequentially, as it tests how nodes operating in legacy mode
|
||||
# react to raft topology mode nodes and vice versa
|
||||
await manager.rolling_restart(servers)
|
||||
|
||||
await stop_writes_and_verify()
|
||||
|
||||
cql = await reconnect_driver(manager)
|
||||
|
||||
logging.info("Cluster restarted, waiting until driver reconnects to every server")
|
||||
hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
|
||||
logging.info(f"Driver reconnected, hosts: {hosts}")
|
||||
|
||||
restart_writes(cql)
|
||||
|
||||
logging.info(f"Deleting Raft data and upgrade state on {hosts}")
|
||||
await asyncio.gather(*(delete_raft_topology_state(cql, h) for h in hosts))
|
||||
await asyncio.gather(*(delete_raft_data_and_upgrade_state(cql, h) for h in hosts))
|
||||
|
||||
logging.info(f"Restarting hosts {hosts}")
|
||||
await manager.rolling_restart(servers)
|
||||
|
||||
# FIXME: We must reconnect the driver before performing CQL queries below, for example
|
||||
# in wait_until_schema_upgrade_finishes. Unfortunately, it forces us to stop writing to
|
||||
# a CDC table first. Reconnecting the driver would close the session used to send the
|
||||
# writes, and some writes could time out on the client.
|
||||
# Once https://github.com/scylladb/python-driver/issues/295 is fixed, we can remove
|
||||
# all calls to reconnect_driver, restart_writes and leave only the last call to
|
||||
# stop_writes_and_verify.
|
||||
await stop_writes_and_verify()
|
||||
|
||||
cql = await reconnect_driver(manager)
|
||||
|
||||
logging.info("Cluster restarted, waiting until driver reconnects to every server")
|
||||
hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
|
||||
|
||||
restart_writes(cql)
|
||||
|
||||
logging.info("Waiting until upgrade to raft schema finishes")
|
||||
await asyncio.gather(*(wait_until_schema_upgrade_finishes(cql, h, time.time() + 60) for h in hosts))
|
||||
|
||||
@@ -73,7 +104,7 @@ async def test_topology_recovery_basic(request, manager: ManagerClient):
|
||||
await check_system_topology_and_cdc_generations_v3_consistency(manager, hosts)
|
||||
|
||||
logging.info("Booting new node")
|
||||
servers += [await manager.server_add()]
|
||||
servers += [await manager.server_add(config=cfg)]
|
||||
hosts = await wait_for_cql_and_get_hosts(cql, servers, time.time() + 60)
|
||||
|
||||
logging.info("Waiting for the new CDC generation publishing")
|
||||
@@ -81,3 +112,11 @@ async def test_topology_recovery_basic(request, manager: ManagerClient):
|
||||
|
||||
logging.info("Checking consistency of data in system.topology and system.cdc_generations_v3")
|
||||
await check_system_topology_and_cdc_generations_v3_consistency(manager, hosts)
|
||||
|
||||
await wait_until_last_generation_is_in_use(cql)
|
||||
|
||||
logging.debug("Sleeping for 1 second to make sure there are writes to the CDC table in the last generation")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
logging.info("Checking correctness of data in system_distributed.cdc_streams_descriptions_v2")
|
||||
await stop_writes_and_verify()
|
||||
|
||||
@@ -44,7 +44,7 @@ async def test_topology_upgrade_basic(request, mode: str, manager: ManagerClient
|
||||
status = await manager.api.raft_topology_upgrade_status(host.address)
|
||||
assert status == "not_upgraded"
|
||||
|
||||
finish_writes_and_verify = await start_writes_to_cdc_table(cql)
|
||||
_, stop_writes_and_verify = await start_writes_to_cdc_table(cql)
|
||||
|
||||
logging.info("Triggering upgrade to raft topology")
|
||||
await manager.api.upgrade_to_raft_topology(hosts[0].address)
|
||||
@@ -79,4 +79,4 @@ async def test_topology_upgrade_basic(request, mode: str, manager: ManagerClient
|
||||
await asyncio.sleep(1)
|
||||
|
||||
logging.info("Checking correctness of data in system_distributed.cdc_streams_descriptions_v2")
|
||||
await finish_writes_and_verify()
|
||||
await stop_writes_and_verify()
|
||||
|
||||
@@ -29,7 +29,6 @@
|
||||
#include "db/large_data_handler.hh"
|
||||
#include "db/system_distributed_keyspace.hh"
|
||||
#include "db/schema_tables.hh"
|
||||
#include "db/system_auth_keyspace.hh"
|
||||
#include "db/system_keyspace.hh"
|
||||
#include "partition_slice_builder.hh"
|
||||
#include "readers/combined.hh"
|
||||
@@ -685,7 +684,6 @@ schema_ptr load_system_schema(const db::config& cfg, std::string_view keyspace,
|
||||
{db::system_distributed_keyspace::NAME, db::system_distributed_keyspace::all_distributed_tables()},
|
||||
{db::system_distributed_keyspace::NAME_EVERYWHERE, db::system_distributed_keyspace::all_everywhere_tables()},
|
||||
};
|
||||
schemas[db::system_auth_keyspace::NAME] = db::system_auth_keyspace::all_tables();
|
||||
auto ks_it = schemas.find(keyspace);
|
||||
if (ks_it == schemas.end()) {
|
||||
throw std::invalid_argument(fmt::format("unknown system keyspace: {}", keyspace));
|
||||
|
||||
@@ -45,7 +45,6 @@ future<schema_ptr> load_one_schema_from_file(const db::config& dbcfg, std::files
|
||||
/// Note that only schemas from builtin system tables are supported, i.e.,
|
||||
/// from the following keyspaces:
|
||||
/// * system
|
||||
/// * system_auth_v2
|
||||
/// * system_schema
|
||||
/// * system_distributed
|
||||
/// * system_distributed_everywhere
|
||||
|
||||
@@ -57,6 +57,7 @@ event::schema_change::schema_change(change_type change, target_type target, sstr
|
||||
{
|
||||
switch (target) {
|
||||
case event::schema_change::target_type::KEYSPACE:
|
||||
case event::schema_change::target_type::TABLET_KEYSPACE:
|
||||
assert(this->arguments.empty());
|
||||
break;
|
||||
case event::schema_change::target_type::TYPE:
|
||||
|
||||
@@ -61,7 +61,7 @@ public:
|
||||
class event::schema_change : public event {
|
||||
public:
|
||||
enum class change_type { CREATED, UPDATED, DROPPED };
|
||||
enum class target_type { KEYSPACE, TABLE, TYPE, FUNCTION, AGGREGATE };
|
||||
enum class target_type { KEYSPACE, TABLE, TYPE, FUNCTION, AGGREGATE, TABLET_KEYSPACE };
|
||||
|
||||
const change_type change;
|
||||
const target_type target;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user