mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-21 00:50:35 +00:00
Currently, we use std::vector<*mutation> to keep
a list of mutations for processing.
This can lead to large allocation, e.g. when the vector
size is a function of the number of tables.
Use a chunked vector instead to prevent oversized allocations.
`perf-simple-query --smp 1` results obtained for fixed 400MHz frequency
and PGO disabled:
Before (read path):
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=read, query_single_key=no, counters=no}
Disabling auto compaction
Creating 10000 partitions...
89055.97 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39417 insns/op, 18003 cycles/op, 0 errors)
103372.72 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39380 insns/op, 17300 cycles/op, 0 errors)
98942.27 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39413 insns/op, 17336 cycles/op, 0 errors)
103752.93 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39407 insns/op, 17252 cycles/op, 0 errors)
102516.77 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39403 insns/op, 17288 cycles/op, 0 errors)
throughput:
mean= 99528.13 standard-deviation=6155.71
median= 102516.77 median-absolute-deviation=3844.59
maximum=103752.93 minimum=89055.97
instructions_per_op:
mean= 39403.99 standard-deviation=14.25
median= 39406.75 median-absolute-deviation=9.30
maximum=39416.63 minimum=39380.39
cpu_cycles_per_op:
mean= 17435.81 standard-deviation=318.24
median= 17300.40 median-absolute-deviation=147.59
maximum=18002.53 minimum=17251.75
```
After (read path)
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=read, query_single_key=no, counters=no}
Disabling auto compaction
Creating 10000 partitions...
59755.04 tps ( 66.2 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39466 insns/op, 22834 cycles/op, 0 errors)
71854.16 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39417 insns/op, 17883 cycles/op, 0 errors)
82149.45 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.2 tasks/op, 39411 insns/op, 17409 cycles/op, 0 errors)
49640.04 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.3 tasks/op, 39474 insns/op, 19975 cycles/op, 0 errors)
54963.22 tps ( 66.1 allocs/op, 0.0 logallocs/op, 14.3 tasks/op, 39474 insns/op, 18235 cycles/op, 0 errors)
throughput:
mean= 63672.38 standard-deviation=13195.12
median= 59755.04 median-absolute-deviation=8709.16
maximum=82149.45 minimum=49640.04
instructions_per_op:
mean= 39448.38 standard-deviation=31.60
median= 39466.17 median-absolute-deviation=25.75
maximum=39474.12 minimum=39411.42
cpu_cycles_per_op:
mean= 19267.01 standard-deviation=2217.03
median= 18234.80 median-absolute-deviation=1384.25
maximum=22834.26 minimum=17408.67
```
`perf-simple-query --smp 1 --write` results obtained for fixed 400MHz frequency
and PGO disabled:
Before (write path):
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=write, query_single_key=no, counters=no}
Disabling auto compaction
63736.96 tps ( 59.4 allocs/op, 16.4 logallocs/op, 14.3 tasks/op, 49667 insns/op, 19924 cycles/op, 0 errors)
64109.41 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 49992 insns/op, 20084 cycles/op, 0 errors)
56950.47 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50005 insns/op, 20501 cycles/op, 0 errors)
44858.42 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50014 insns/op, 21947 cycles/op, 0 errors)
28592.87 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50027 insns/op, 27659 cycles/op, 0 errors)
throughput:
mean= 51649.63 standard-deviation=15059.74
median= 56950.47 median-absolute-deviation=12087.33
maximum=64109.41 minimum=28592.87
instructions_per_op:
mean= 49941.18 standard-deviation=153.76
median= 50005.24 median-absolute-deviation=73.01
maximum=50027.07 minimum=49667.05
cpu_cycles_per_op:
mean= 22023.01 standard-deviation=3249.92
median= 20500.74 median-absolute-deviation=1938.76
maximum=27658.75 minimum=19924.32
```
After (write path)
```
enable-cache=1
Running test with config: {partitions=10000, concurrency=100, mode=write, query_single_key=no, counters=no}
Disabling auto compaction
53395.93 tps ( 59.4 allocs/op, 16.5 logallocs/op, 14.3 tasks/op, 50326 insns/op, 21252 cycles/op, 0 errors)
46527.83 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50704 insns/op, 21555 cycles/op, 0 errors)
55846.30 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50731 insns/op, 21060 cycles/op, 0 errors)
55669.30 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50735 insns/op, 21521 cycles/op, 0 errors)
52130.17 tps ( 59.3 allocs/op, 16.0 logallocs/op, 14.3 tasks/op, 50757 insns/op, 21334 cycles/op, 0 errors)
throughput:
mean= 52713.91 standard-deviation=3795.38
median= 53395.93 median-absolute-deviation=2955.40
maximum=55846.30 minimum=46527.83
instructions_per_op:
mean= 50650.57 standard-deviation=182.46
median= 50731.38 median-absolute-deviation=84.09
maximum=50756.62 minimum=50325.87
cpu_cycles_per_op:
mean= 21344.42 standard-deviation=202.86
median= 21334.00 median-absolute-deviation=176.37
maximum=21554.61 minimum=21060.24
```
Fixes #24815
Improvement for rare corner cases. No backport required
Signed-off-by: Benny Halevy <bhalevy@scylladb.com>
Closes scylladb/scylladb#24919
313 lines
16 KiB
C++
313 lines
16 KiB
C++
/*
|
|
* Copyright 2015-present ScyllaDB
|
|
*
|
|
* Modified by ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: (LicenseRef-ScyllaDB-Source-Available-1.0 and Apache-2.0)
|
|
*/
|
|
|
|
#include <seastar/core/coroutine.hh>
|
|
#include "cql3/statements/create_keyspace_statement.hh"
|
|
#include "cql3/statements/ks_prop_defs.hh"
|
|
#include "exceptions/exceptions.hh"
|
|
#include "locator/tablets.hh"
|
|
#include "prepared_statement.hh"
|
|
#include "data_dictionary/data_dictionary.hh"
|
|
#include "data_dictionary/keyspace_metadata.hh"
|
|
#include "mutation/mutation.hh"
|
|
#include "service/migration_manager.hh"
|
|
#include "service/storage_proxy.hh"
|
|
#include "cql3/query_processor.hh"
|
|
#include "db/config.hh"
|
|
#include "gms/feature_service.hh"
|
|
|
|
#include <boost/regex.hpp>
|
|
#include <stdexcept>
|
|
|
|
bool is_system_keyspace(std::string_view keyspace);
|
|
|
|
namespace cql3 {
|
|
|
|
namespace statements {
|
|
|
|
static logging::logger mylogger("create_keyspace");
|
|
|
|
create_keyspace_statement::create_keyspace_statement(const sstring& name, shared_ptr<ks_prop_defs> attrs, bool if_not_exists)
|
|
: _name{name}
|
|
, _attrs{attrs}
|
|
, _if_not_exists{if_not_exists}
|
|
{
|
|
}
|
|
|
|
const sstring& create_keyspace_statement::keyspace() const
|
|
{
|
|
return _name;
|
|
}
|
|
|
|
future<> create_keyspace_statement::check_access(query_processor& qp, const service::client_state& state) const
|
|
{
|
|
return state.has_all_keyspaces_access(auth::permission::CREATE);
|
|
}
|
|
|
|
void create_keyspace_statement::validate(query_processor& qp, const service::client_state& state) const
|
|
{
|
|
std::string name;
|
|
name.resize(_name.length());
|
|
std::transform(_name.begin(), _name.end(), name.begin(), ::tolower);
|
|
if (is_system_keyspace(name)) {
|
|
throw exceptions::invalid_request_exception("system keyspace is not user-modifiable");
|
|
}
|
|
// keyspace name
|
|
boost::regex name_regex("\\w+");
|
|
if (!boost::regex_match(name, name_regex)) {
|
|
throw exceptions::invalid_request_exception(format("\"{}\" is not a valid keyspace name", _name.c_str()));
|
|
}
|
|
if (name.length() > schema::NAME_LENGTH) {
|
|
throw exceptions::invalid_request_exception(format("Keyspace names shouldn't be more than {:d} characters long (got \"{}\")", schema::NAME_LENGTH, _name.c_str()));
|
|
}
|
|
|
|
_attrs->validate();
|
|
|
|
if (!bool(_attrs->get_replication_strategy_class())) {
|
|
throw exceptions::configuration_exception("Missing mandatory replication strategy class");
|
|
}
|
|
try {
|
|
_attrs->get_storage_options();
|
|
} catch (const std::runtime_error& e) {
|
|
throw exceptions::invalid_request_exception(e.what());
|
|
}
|
|
#if 0
|
|
// The strategy is validated through KSMetaData.validate() in announceNewKeyspace below.
|
|
// However, for backward compatibility with thrift, this doesn't validate unexpected options yet,
|
|
// so doing proper validation here.
|
|
AbstractReplicationStrategy.validateReplicationStrategy(name,
|
|
AbstractReplicationStrategy.getClass(attrs.getReplicationStrategyClass()),
|
|
StorageService.instance.getTokenMetadata(),
|
|
DatabaseDescriptor.getEndpointSnitch(),
|
|
attrs.getReplicationOptions());
|
|
#endif
|
|
}
|
|
|
|
future<std::tuple<::shared_ptr<cql_transport::event::schema_change>, utils::chunked_vector<mutation>, cql3::cql_warnings_vec>> create_keyspace_statement::prepare_schema_mutations(query_processor& qp, const query_options&, api::timestamp_type ts) const {
|
|
using namespace cql_transport;
|
|
const auto tmptr = qp.proxy().get_token_metadata_ptr();
|
|
const auto& feat = qp.proxy().features();
|
|
const auto& cfg = qp.db().get_config();
|
|
utils::chunked_vector<mutation> m;
|
|
std::vector<sstring> warnings;
|
|
|
|
try {
|
|
auto ksm = _attrs->as_ks_metadata(_name, *tmptr, feat, cfg);
|
|
m = service::prepare_new_keyspace_announcement(qp.db().real_database(), ksm, ts);
|
|
// If the new keyspace uses tablets, as long as there are features
|
|
// which aren't supported by tablets we want to warn the user that
|
|
// they will not be usable on the new keyspace - and suggest how a
|
|
// keyspace can be created without tablets (see rationale in #16807).
|
|
// Once all feature will become supported with tablets, we should
|
|
// remove this check.
|
|
auto rs = locator::abstract_replication_strategy::create_replication_strategy(
|
|
ksm->strategy_name(),
|
|
locator::replication_strategy_params(ksm->strategy_options(), ksm->initial_tablets()));
|
|
if (rs->uses_tablets()) {
|
|
warnings.push_back(
|
|
"Tables in this keyspace will be replicated using Tablets "
|
|
"and will not support Materialized Views, Secondary Indexes, CDC, LWT and counters features. "
|
|
"To use Materialized Views, Secondary Indexes, CDC, LWT or counters, drop this keyspace and re-create it "
|
|
"without tablets by adding AND TABLETS = {'enabled': false} to the CREATE KEYSPACE statement.");
|
|
if (ksm->initial_tablets().value()) {
|
|
warnings.push_back("Keyspace `initial` tablets option is deprecated. Use per-table tablet options instead.");
|
|
}
|
|
}
|
|
|
|
// If `rf_rack_valid_keyspaces` is enabled, it's forbidden to create an RF-rack-invalid keyspace.
|
|
// Verify that it's RF-rack-valid.
|
|
// For more context, see: scylladb/scylladb#23071.
|
|
if (cfg.rf_rack_valid_keyspaces()) {
|
|
try {
|
|
// We hold a group0_guard, so it's correct to check this here.
|
|
// The topology or schema cannot change while we're performing this query.
|
|
locator::assert_rf_rack_valid_keyspace(_name, tmptr, *rs);
|
|
} catch (const std::exception& e) {
|
|
// There's no guarantee what the type of the exception will be, so we need to
|
|
// wrap it manually here in a type that can be passed to the user.
|
|
throw exceptions::invalid_request_exception(e.what());
|
|
}
|
|
}
|
|
} catch (const exceptions::already_exists_exception& e) {
|
|
if (!_if_not_exists) {
|
|
co_return coroutine::exception(std::current_exception());
|
|
}
|
|
}
|
|
|
|
// If an IF NOT EXISTS clause was used and resource was already created
|
|
// we shouldn't emit created event. However it interacts badly with
|
|
// concurrent clients creating resources. The client seeing no create event
|
|
// assumes resource already previously existed and proceeds with its logic
|
|
// which may depend on that resource. But it may send requests to nodes which
|
|
// are not yet aware of new schema or client's metadata may be outdated.
|
|
// To force synchronization always emit the event (see
|
|
// github.com/scylladb/scylladb/issues/16909).
|
|
co_return std::make_tuple(created_event(), std::move(m), std::move(warnings));
|
|
}
|
|
|
|
std::unique_ptr<cql3::statements::prepared_statement>
|
|
cql3::statements::create_keyspace_statement::prepare(data_dictionary::database db, cql_stats& stats) {
|
|
return std::make_unique<prepared_statement>(audit_info(), make_shared<create_keyspace_statement>(*this));
|
|
}
|
|
|
|
future<> cql3::statements::create_keyspace_statement::grant_permissions_to_creator(const service::client_state& cs, service::group0_batch& mc) const {
|
|
auto resource = auth::make_data_resource(keyspace());
|
|
try {
|
|
co_await auth::grant_applicable_permissions(
|
|
*cs.get_auth_service(),
|
|
*cs.user(),
|
|
resource,
|
|
mc);
|
|
} catch (const auth::unsupported_authorization_operation&) {
|
|
// Nothing.
|
|
}
|
|
}
|
|
|
|
// Check for replication strategy choices which are restricted by the
|
|
// configuration. This check can throw a configuration_exception immediately
|
|
// if the strategy is forbidden by the configuration, or return a warning
|
|
// string if the restriction was set to warning level.
|
|
// This function is only supposed to check for replication strategies
|
|
// restricted by the configuration. Checks for other types of strategy
|
|
// errors (such as unknown replication strategy name or unknown options
|
|
// to a known replication strategy) are done elsewhere.
|
|
std::vector<sstring> check_against_restricted_replication_strategies(
|
|
query_processor& qp,
|
|
const sstring& keyspace,
|
|
const ks_prop_defs& attrs,
|
|
cql_stats& stats)
|
|
{
|
|
if (!attrs.get_replication_strategy_class()) {
|
|
return {};
|
|
}
|
|
|
|
std::vector<sstring> warnings;
|
|
locator::replication_strategy_config_options opts;
|
|
locator::replication_strategy_params params(opts, std::nullopt);
|
|
auto replication_strategy = locator::abstract_replication_strategy::create_replication_strategy(
|
|
locator::abstract_replication_strategy::to_qualified_class_name(
|
|
*attrs.get_replication_strategy_class()), params)->get_type();
|
|
auto rs_warn_list = qp.db().get_config().replication_strategy_warn_list();
|
|
auto rs_fail_list = qp.db().get_config().replication_strategy_fail_list();
|
|
|
|
if (replication_strategy == locator::replication_strategy_type::simple) {
|
|
if (auto simple_strategy_restriction = qp.db().get_config().restrict_replication_simplestrategy();
|
|
simple_strategy_restriction == db::tri_mode_restriction_t::mode::TRUE) {
|
|
rs_fail_list.emplace_back(locator::replication_strategy_type::simple);
|
|
} else if (simple_strategy_restriction == db::tri_mode_restriction_t::mode::WARN) {
|
|
rs_warn_list.emplace_back(locator::replication_strategy_type::simple);
|
|
} else if (auto &topology = qp.proxy().get_token_metadata_ptr()->get_topology();
|
|
topology.get_datacenter_endpoints().size() > 1) {
|
|
// Scylla was configured to allow SimpleStrategy, but let's warn
|
|
// if it's used on a cluster which *already* has multiple DCs:
|
|
warnings.emplace_back("Using SimpleStrategy in a multi-datacenter environment is not recommended.");
|
|
}
|
|
}
|
|
|
|
if (auto present_on_fail_list = std::find(rs_fail_list.begin(), rs_fail_list.end(), replication_strategy); present_on_fail_list != rs_fail_list.end()) {
|
|
++stats.replication_strategy_fail_list_violations;
|
|
throw exceptions::configuration_exception(format(
|
|
"{} replication class is not recommended, and forbidden by the current configuration, "
|
|
"but was used for keyspace {}. You may override this restriction by modifying "
|
|
"replication_strategy_fail_list configuration option to not list {}.",
|
|
*attrs.get_replication_strategy_class(), keyspace, *attrs.get_replication_strategy_class()));
|
|
}
|
|
if (auto present_on_warn_list = std::find(rs_warn_list.begin(), rs_warn_list.end(), replication_strategy); present_on_warn_list != rs_warn_list.end()) {
|
|
++stats.replication_strategy_warn_list_violations;
|
|
warnings.push_back(format("{} replication class is not recommended, but was used for keyspace {}. "
|
|
"You may suppress this warning by delisting {} from replication_strategy_warn_list configuration option, "
|
|
"or make it into an error by listing this replication strategy on replication_strategy_fail_list.",
|
|
*attrs.get_replication_strategy_class(), keyspace, *attrs.get_replication_strategy_class()));
|
|
}
|
|
|
|
// The {minimum,maximum}_replication_factor_{warn,fail}_threshold configuration option can be used to forbid
|
|
// a smaller/greater replication factor. We assume that all numeric replication
|
|
// options except for initial_tablets are replication factors - this is true for both
|
|
// SimpleStrategy and NetworkTopologyStrategy
|
|
// A zero replication factor is not forbidden - it is the traditional
|
|
// way to avoid replication on some DC.
|
|
// We ignore errors (non-number, negative number, etc.) here,
|
|
// these are checked and reported elsewhere.
|
|
for (auto opt : attrs.get_replication_options()) {
|
|
try {
|
|
auto rf = std::stol(opt.second);
|
|
if (rf > 0) {
|
|
if (auto min_fail = qp.proxy().data_dictionary().get_config().minimum_replication_factor_fail_threshold();
|
|
min_fail >= 0 && rf < min_fail) {
|
|
++stats.minimum_replication_factor_fail_violations;
|
|
throw exceptions::configuration_exception(format(
|
|
"Replication Factor {}={} is forbidden by the current "
|
|
"configuration setting of minimum_replication_factor_fail_threshold={}. Please "
|
|
"increase replication factor, or lower minimum_replication_factor_fail_threshold "
|
|
"set in the configuration.", opt.first, rf,
|
|
qp.proxy().data_dictionary().get_config().minimum_replication_factor_fail_threshold()));
|
|
}
|
|
else if (auto max_fail = qp.proxy().data_dictionary().get_config().maximum_replication_factor_fail_threshold();
|
|
max_fail >= 0 && rf > max_fail) {
|
|
++stats.maximum_replication_factor_fail_violations;
|
|
throw exceptions::configuration_exception(format(
|
|
"Replication Factor {}={} is forbidden by the current "
|
|
"configuration setting of maximum_replication_factor_fail_threshold={}. Please "
|
|
"decrease replication factor, or increase maximum_replication_factor_fail_threshold "
|
|
"set in the configuration.", opt.first, rf,
|
|
qp.proxy().data_dictionary().get_config().maximum_replication_factor_fail_threshold()));
|
|
}
|
|
else if (auto min_warn = qp.proxy().data_dictionary().get_config().minimum_replication_factor_warn_threshold();
|
|
min_warn >= 0 && rf < min_warn)
|
|
{
|
|
++stats.minimum_replication_factor_warn_violations;
|
|
warnings.push_back(format("Using Replication Factor {}={} lower than the "
|
|
"minimum_replication_factor_warn_threshold={} is not recommended.", opt.first, rf,
|
|
qp.proxy().data_dictionary().get_config().minimum_replication_factor_warn_threshold()));
|
|
}
|
|
else if (auto max_warn = qp.proxy().data_dictionary().get_config().maximum_replication_factor_warn_threshold();
|
|
max_warn >= 0 && rf > max_warn)
|
|
{
|
|
++stats.maximum_replication_factor_warn_violations;
|
|
warnings.push_back(format("Using Replication Factor {}={} greater than the "
|
|
"maximum_replication_factor_warn_threshold={} is not recommended.", opt.first, rf,
|
|
qp.proxy().data_dictionary().get_config().maximum_replication_factor_warn_threshold()));
|
|
}
|
|
}
|
|
} catch (std::invalid_argument&) {
|
|
} catch (std::out_of_range& ) {
|
|
}
|
|
}
|
|
return warnings;
|
|
}
|
|
|
|
future<::shared_ptr<messages::result_message>>
|
|
create_keyspace_statement::execute(query_processor& qp, service::query_state& state, const query_options& options, std::optional<service::group0_guard> guard) const {
|
|
std::vector<sstring> warnings = check_against_restricted_replication_strategies(qp, keyspace(), *_attrs, qp.get_cql_stats());
|
|
return schema_altering_statement::execute(qp, state, options, std::move(guard)).then([warnings = std::move(warnings)] (::shared_ptr<messages::result_message> msg) {
|
|
for (const auto& warning : warnings) {
|
|
msg->add_warning(warning);
|
|
mylogger.warn("{}", warning);
|
|
}
|
|
return msg;
|
|
});
|
|
}
|
|
|
|
lw_shared_ptr<data_dictionary::keyspace_metadata> create_keyspace_statement::get_keyspace_metadata(const locator::token_metadata& tm, const gms::feature_service& feat, const db::config& cfg) {
|
|
_attrs->validate();
|
|
return _attrs->as_ks_metadata(_name, tm, feat, cfg);
|
|
}
|
|
|
|
::shared_ptr<schema_altering_statement::event_t> create_keyspace_statement::created_event() const {
|
|
return make_shared<event_t>(
|
|
event_t::change_type::CREATED,
|
|
event_t::target_type::KEYSPACE,
|
|
keyspace());
|
|
}
|
|
|
|
}
|
|
|
|
}
|