mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-19 16:15:07 +00:00
tablets: Introduce pow2_count per-table tablet option
By default it's true, in which case tablet count of the table is rounded up to a power of two. This option allows lifting this, in which case the count can be arbitrary. This will allow testing the logic of arbitrary tablet count.
This commit is contained in:
@@ -197,7 +197,7 @@ void cf_prop_defs::validate(const data_dictionary::database db, sstring ks_name,
|
||||
if (!db.features().tablet_options) {
|
||||
throw exceptions::configuration_exception("tablet options cannot be used until all nodes in the cluster enable this feature");
|
||||
}
|
||||
db::tablet_options::validate(*tablet_options_map);
|
||||
db::tablet_options::validate(*tablet_options_map, db.features());
|
||||
}
|
||||
|
||||
if (has_property(KW_STORAGE_ENGINE)) {
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
|
||||
#include "exceptions/exceptions.hh"
|
||||
#include "db/tablet_options.hh"
|
||||
#include "gms/feature_service.hh"
|
||||
#include <seastar/core/bitops.hh>
|
||||
#include "utils/log.hh"
|
||||
|
||||
@@ -16,6 +17,17 @@ extern logging::logger dblog;
|
||||
|
||||
namespace db {
|
||||
|
||||
static
|
||||
bool parse_bool_option(const sstring& value) {
|
||||
if (strcasecmp(value.c_str(), "true") == 0 || strcasecmp(value.c_str(), "yes") == 0 || value == "1") {
|
||||
return true;
|
||||
}
|
||||
if (strcasecmp(value.c_str(), "false") == 0 || strcasecmp(value.c_str(), "no") == 0 || value == "0") {
|
||||
return false;
|
||||
}
|
||||
throw std::invalid_argument(format("Invalid boolean value: {}", value));
|
||||
}
|
||||
|
||||
tablet_options::tablet_options(const map_type& map) {
|
||||
for (auto& [key, value_str] : map) {
|
||||
switch (tablet_options::from_string(key)) {
|
||||
@@ -39,6 +51,9 @@ tablet_options::tablet_options(const map_type& map) {
|
||||
expected_data_size_in_gb.emplace(value);
|
||||
}
|
||||
break;
|
||||
case tablet_option_type::pow2_count:
|
||||
pow2_count = parse_bool_option(value_str);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -49,6 +64,7 @@ sstring tablet_options::to_string(tablet_option_type hint) {
|
||||
case tablet_option_type::max_tablet_count: return "max_tablet_count";
|
||||
case tablet_option_type::min_per_shard_tablet_count: return "min_per_shard_tablet_count";
|
||||
case tablet_option_type::expected_data_size_in_gb: return "expected_data_size_in_gb";
|
||||
case tablet_option_type::pow2_count: return "pow2_count";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -61,6 +77,8 @@ tablet_option_type tablet_options::from_string(sstring hint_desc) {
|
||||
return tablet_option_type::min_per_shard_tablet_count;
|
||||
} else if (hint_desc == "expected_data_size_in_gb") {
|
||||
return tablet_option_type::expected_data_size_in_gb;
|
||||
} else if (hint_desc == "pow2_count") {
|
||||
return tablet_option_type::pow2_count;
|
||||
} else {
|
||||
throw exceptions::syntax_exception(fmt::format("Unknown tablet hint '{}'", hint_desc));
|
||||
}
|
||||
@@ -80,13 +98,17 @@ std::map<sstring, sstring> tablet_options::to_map() const {
|
||||
if (expected_data_size_in_gb) {
|
||||
res[to_string(tablet_option_type::expected_data_size_in_gb)] = fmt::to_string(*expected_data_size_in_gb);
|
||||
}
|
||||
if (pow2_count) {
|
||||
res[to_string(tablet_option_type::pow2_count)] = fmt::to_string(*pow2_count);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
void tablet_options::validate(const map_type& map) {
|
||||
void tablet_options::validate(const map_type& map, const gms::feature_service& features) {
|
||||
std::optional<ssize_t> min_tablets;
|
||||
std::optional<ssize_t> max_tablets;
|
||||
|
||||
bool pow2_count = features.arbitrary_tablet_boundaries ? default_pow2_count : true;
|
||||
|
||||
for (auto& [key, value_str] : map) {
|
||||
switch (tablet_options::from_string(key)) {
|
||||
case tablet_option_type::min_tablet_count:
|
||||
@@ -113,12 +135,23 @@ void tablet_options::validate(const map_type& map) {
|
||||
throw exceptions::configuration_exception(format("Invalid value '{}' for expected_data_size_in_gb", value));
|
||||
}
|
||||
break;
|
||||
case tablet_option_type::pow2_count:
|
||||
try {
|
||||
pow2_count = parse_bool_option(value_str);
|
||||
} catch (const std::invalid_argument& e) {
|
||||
throw exceptions::configuration_exception(format("Invalid value '{}' for pow2_count", value_str));
|
||||
}
|
||||
if (!pow2_count && !features.arbitrary_tablet_boundaries) {
|
||||
throw exceptions::configuration_exception(
|
||||
"pow2_count cannot be set to false until the arbitrary_tablet_boundaries feature is enabled");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (min_tablets && max_tablets) {
|
||||
auto effective_min = 1u << log2ceil(static_cast<size_t>(*min_tablets));
|
||||
auto effective_max = 1u << log2floor(static_cast<size_t>(*max_tablets));
|
||||
auto effective_min = pow2_count ? 1u << log2ceil(static_cast<size_t>(*min_tablets)) : static_cast<size_t>(*min_tablets);
|
||||
auto effective_max = pow2_count ? 1u << log2floor(static_cast<size_t>(*max_tablets)) : static_cast<size_t>(*max_tablets);
|
||||
|
||||
if (effective_min > effective_max) {
|
||||
throw exceptions::configuration_exception(
|
||||
|
||||
@@ -13,6 +13,8 @@
|
||||
|
||||
using namespace seastar;
|
||||
|
||||
namespace gms { class feature_service; }
|
||||
|
||||
namespace db {
|
||||
|
||||
// Per-table tablet options
|
||||
@@ -21,28 +23,33 @@ enum class tablet_option_type {
|
||||
max_tablet_count,
|
||||
min_per_shard_tablet_count,
|
||||
expected_data_size_in_gb,
|
||||
pow2_count,
|
||||
};
|
||||
|
||||
struct tablet_options {
|
||||
// System-wide default for pow2_count if the option is not set.
|
||||
static const bool default_pow2_count = true;
|
||||
|
||||
using map_type = std::map<sstring, sstring>;
|
||||
|
||||
std::optional<ssize_t> min_tablet_count;
|
||||
std::optional<ssize_t> max_tablet_count;
|
||||
std::optional<double> min_per_shard_tablet_count;
|
||||
std::optional<ssize_t> expected_data_size_in_gb;
|
||||
std::optional<bool> pow2_count;
|
||||
|
||||
tablet_options() = default;
|
||||
explicit tablet_options(const map_type& map);
|
||||
|
||||
operator bool() const noexcept {
|
||||
return min_tablet_count || max_tablet_count || min_per_shard_tablet_count || expected_data_size_in_gb;
|
||||
return min_tablet_count || max_tablet_count || min_per_shard_tablet_count || expected_data_size_in_gb || pow2_count;
|
||||
}
|
||||
|
||||
map_type to_map() const;
|
||||
|
||||
static sstring to_string(tablet_option_type hint);
|
||||
static tablet_option_type from_string(sstring hint_desc);
|
||||
static void validate(const map_type& map);
|
||||
static void validate(const map_type& map, const gms::feature_service& features);
|
||||
};
|
||||
|
||||
} // namespace db
|
||||
|
||||
@@ -108,6 +108,8 @@ The computed number of tablets a table will have is based on several parameters
|
||||
See :ref:`Per-table tablet options <cql-per-table-tablet-options>` for details.
|
||||
* Table-level option ``'max_tablet_count'``. This option sets the maximum number of tablets for the given table
|
||||
See :ref:`Per-table tablet options <cql-per-table-tablet-options>` for details.
|
||||
* Table-level option ``pow2_count``. This option, when set to true, forces the number of tablets for a given table to be a power of 2.
|
||||
See :ref:`Per-table tablet options <cql-per-table-tablet-options>` for details.
|
||||
* Config option ``'tablets_initial_scale_factor'``. This option sets the minimal number of tablets per shard
|
||||
per table globally. This option can be overridden by the table-level option: ``'min_per_shard_tablet_count'``.
|
||||
``'tablets_initial_scale_factor'`` is ignored if either the keyspace option ``'initial'`` or table-level
|
||||
@@ -126,8 +128,10 @@ will be used as the number of tablets for the given table.
|
||||
When both ``'min_tablet_count'`` and ``'max_tablet_count'`` are set together, ScyllaDB validates the
|
||||
combination by computing **effective** bounds:
|
||||
|
||||
* The **effective minimum** is the smallest power of 2 that is greater than or equal to ``min_tablet_count``.
|
||||
* The **effective maximum** is the largest power of 2 that is less than or equal to ``max_tablet_count``.
|
||||
* The **effective minimum** is the smallest power of 2 that is greater than or equal to ``min_tablet_count`` if ``pow2_count`` is true,
|
||||
or simply ``min_tablet_count`` otherwise.
|
||||
* The **effective maximum** is the largest power of 2 that is less than or equal to ``max_tablet_count`` if ``pow2_count`` is true,
|
||||
or simply ``max_tablet_count`` otherwise.
|
||||
|
||||
ScyllaDB validates that the effective minimum does not exceed the effective maximum. If it does,
|
||||
the ``CREATE TABLE`` statement will be rejected with an error. To avoid ambiguity, it is recommended
|
||||
|
||||
@@ -500,6 +500,7 @@ Creating a new table uses the ``CREATE TABLE`` statement:
|
||||
tablet_option: 'expected_data_size_in_gb' ':' <int>
|
||||
: | 'min_per_shard_tablet_count' ':' <float>
|
||||
: | 'min_tablet_count' ':' <int>
|
||||
: | 'pow2_count' ':' ( 'true' | 'false' )
|
||||
|
||||
For instance::
|
||||
|
||||
@@ -1138,6 +1139,8 @@ if its data size, or performance requirements are known in advance.
|
||||
This enables efficient file-based streaming during restore. Setting both
|
||||
``min_tablet_count`` and ``max_tablet_count`` to the same value fixes the
|
||||
tablet count for the table.
|
||||
``pow2_count`` "true" When set to ``true``, the tablet count of a table is always a power of 2. The
|
||||
count wanted due to all other factors is rounded up to the nearest power of 2.
|
||||
=============================== =============== ===================================================================================
|
||||
|
||||
When allocating tablets for a new table, ScyllaDB uses the maximum of the ``initial`` tablets configured for the keyspace
|
||||
|
||||
@@ -146,6 +146,15 @@ db::tablet_options combine_tablet_options(R&& opts) {
|
||||
combined_opts.max_tablet_count = std::min(*combined_opts.max_tablet_count, *opt.max_tablet_count);
|
||||
}
|
||||
}
|
||||
if (opt.pow2_count) {
|
||||
// We need some way to resolve conflicts.
|
||||
// pow2_count will be true if any of the options wants pow2_count, because
|
||||
// we want to treat pow2_count == true as a requirement (for backwards compatibility)
|
||||
// while pow2_count = false like a preference. Not a hard reason.
|
||||
if (!combined_opts.pow2_count || *opt.pow2_count) {
|
||||
combined_opts.pow2_count = *opt.pow2_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (total_expected_data_size_in_gb_count) {
|
||||
@@ -1715,9 +1724,10 @@ public:
|
||||
size_t target_tablet_count; // Tablet count wanted by scheduler.
|
||||
sstring target_tablet_count_reason; // Winning rule for target_tablet_count value.
|
||||
std::optional<uint64_t> avg_tablet_size; // nullopt when stats not yet available.
|
||||
bool pow2_count; // Whether tablet count for the table should be a power of two.
|
||||
|
||||
// Final tablet count.
|
||||
// It's target_tablet_count aligned to power of 2 if arbitrary_tablet_boundaries feature is not enabled.
|
||||
// It's target_tablet_count aligned to power of 2 if pow2_count == true.
|
||||
size_t target_tablet_count_aligned;
|
||||
|
||||
resize_decision::way_type resize_decision; // Decision which should be emitted to achieve target_tablet_count_aligned.
|
||||
@@ -1867,6 +1877,9 @@ public:
|
||||
auto process_table = [&] (table_id table, const locator::table_group_set& tables, schema_ptr s, db::tablet_options tablet_options, const tablet_aware_replication_strategy* rs, size_t tablet_count) {
|
||||
table_sizing& table_plan = plan.tables[table];
|
||||
table_plan.current_tablet_count = tablet_count;
|
||||
table_plan.pow2_count = tablet_options.pow2_count.value_or(
|
||||
_db.features().arbitrary_tablet_boundaries ? db::tablet_options::default_pow2_count : true);
|
||||
|
||||
rs_by_table[table] = rs;
|
||||
|
||||
// for a group of co-located tablets of size g with average tablet size t, the migration unit
|
||||
@@ -1963,8 +1976,8 @@ public:
|
||||
table_plan.target_tablet_count = target_tablet_count.tablet_count;
|
||||
table_plan.target_tablet_count_reason = target_tablet_count.reason;
|
||||
|
||||
lblogger.debug("Table {} ({}.{}) target_tablet_count: {} ({})", table, s->ks_name(), s->cf_name(),
|
||||
table_plan.target_tablet_count, table_plan.target_tablet_count_reason);
|
||||
lblogger.debug("Table {} ({}.{}) target_tablet_count: {} ({}), pow2_count: {}, opt: {}", table, s->ks_name(), s->cf_name(),
|
||||
table_plan.target_tablet_count, table_plan.target_tablet_count_reason, table_plan.pow2_count, tablet_options.to_map());
|
||||
};
|
||||
|
||||
for (const auto& [table, tables] : _tm->tablets().all_table_groups()) {
|
||||
@@ -1973,7 +1986,6 @@ public:
|
||||
}
|
||||
const auto& tmap = _tm->tablets().get_tablet_map(table);
|
||||
auto [s, rs] = get_schema_and_rs(table);
|
||||
|
||||
auto tablet_options = combine_tablet_options(
|
||||
tables | std::views::transform([&] (table_id table) { return _db.get_tables_metadata().get_table_if_exists(table); })
|
||||
| std::views::filter([] (auto t) { return t != nullptr; })
|
||||
@@ -2095,7 +2107,11 @@ public:
|
||||
// table_plan.resize_decision
|
||||
|
||||
for (auto&& [table, table_plan] : plan.tables) {
|
||||
table_plan.target_tablet_count_aligned = 1u << log2ceil(table_plan.target_tablet_count);
|
||||
if (!table_plan.pow2_count) {
|
||||
table_plan.target_tablet_count_aligned = table_plan.target_tablet_count;
|
||||
} else {
|
||||
table_plan.target_tablet_count_aligned = 1u << log2ceil(table_plan.target_tablet_count);
|
||||
}
|
||||
|
||||
if (table_plan.target_tablet_count_aligned > table_plan.current_tablet_count) {
|
||||
table_plan.resize_decision = locator::resize_decision::split();
|
||||
|
||||
Reference in New Issue
Block a user