Revert "Revert "Merge 'treewide: add uuid_sstable_identifier_enabled support' from Kefu Chai""

This reverts commit 562087beff.

The regressions introduced by the reverted change have been fixed.
So let's revert this revert to resurrect the
uuid_sstable_identifier_enabled support.

Fixes #10459
This commit is contained in:
Kefu Chai
2023-06-21 10:59:40 +08:00
committed by Avi Kivity
parent e233f471b8
commit f014ccf369
19 changed files with 275 additions and 66 deletions

View File

@@ -461,6 +461,7 @@ scylla_tests = set([
'test/boost/snitch_reset_test',
'test/boost/sstable_3_x_test',
'test/boost/sstable_datafile_test',
'test/boost/sstable_generation_test',
'test/boost/sstable_mutation_test',
'test/boost/sstable_partition_index_cache_test',
'test/boost/schema_changes_test',

View File

@@ -871,6 +871,9 @@ db::config::config(std::shared_ptr<db::extensions> exts)
, enable_sstables_mc_format(this, "enable_sstables_mc_format", value_status::Unused, true, "Enable SSTables 'mc' format to be used as the default file format. Deprecated, please use \"sstable_format\" instead.")
, enable_sstables_md_format(this, "enable_sstables_md_format", value_status::Unused, true, "Enable SSTables 'md' format to be used as the default file format. Deprecated, please use \"sstable_format\" instead.")
, sstable_format(this, "sstable_format", value_status::Used, "me", "Default sstable file format", {"md", "me"})
, uuid_sstable_identifiers_enabled(this,
"uuid_sstable_identifiers_enabled", liveness::LiveUpdate, value_status::Used, true, "If set to true, each newly created sstable will have a UUID "
"based generation identifier, and such files are not readable by previous Scylla versions.")
, enable_dangerous_direct_import_of_cassandra_counters(this, "enable_dangerous_direct_import_of_cassandra_counters", value_status::Used, false, "Only turn this option on if you want to import tables from Cassandra containing counters, and you are SURE that no counters in that table were created in a version earlier than Cassandra 2.1."
" It is not enough to have ever since upgraded to newer versions of Cassandra. If you EVER used a version earlier than 2.1 in the cluster where these SSTables come from, DO NOT TURN ON THIS OPTION! You will corrupt your data. You have been warned.")
, enable_shard_aware_drivers(this, "enable_shard_aware_drivers", value_status::Used, true, "Enable native transport drivers to use connection-per-shard for better performance")

View File

@@ -356,6 +356,7 @@ public:
named_value<bool> enable_sstables_mc_format;
named_value<bool> enable_sstables_md_format;
named_value<sstring> sstable_format;
named_value<bool> uuid_sstable_identifiers_enabled;
named_value<bool> enable_dangerous_direct_import_of_cassandra_counters;
named_value<bool> enable_shard_aware_drivers;
named_value<bool> enable_ipv6_dns_lookup;

View File

@@ -3753,7 +3753,7 @@ future<> system_keyspace::sstables_registry_list(sstring location, sstable_regis
co_await _qp.query_internal(req, db::consistency_level::ONE, { location }, 1000, [ consumer = std::move(consumer) ] (const cql3::untyped_result_set::row& row) -> future<stop_iteration> {
auto uuid = row.get_as<utils::UUID>("uuid");
auto status = row.get_as<sstring>("status");
auto gen = sstables::generation_type::from_uuid(row.get_as<utils::UUID>("generation"));
auto gen = sstables::generation_type(row.get_as<utils::UUID>("generation"));
auto ver = sstables::version_from_string(row.get_as<sstring>("version"));
auto fmt = sstables::format_from_string(row.get_as<sstring>("format"));
sstables::entry_descriptor desc("", "", "", gen, ver, fmt, sstables::component_type::TOC);

View File

@@ -72,6 +72,9 @@ feature_config feature_config_from_db_config(const db::config& cfg, std::set<sst
if (!cfg.check_experimental(db::experimental_features_t::feature::TABLETS)) {
fcfg._disabled_features.insert("TABLETS"s);
}
if (!cfg.uuid_sstable_identifiers_enabled()) {
fcfg._disabled_features.insert("UUID_SSTABLE_IDENTIFIERS"s);
}
if (!utils::get_local_injector().enter("features_enable_test_feature")) {
fcfg._disabled_features.insert("TEST_ONLY_FEATURE"s);

View File

@@ -117,6 +117,7 @@ public:
gms::feature large_collection_detection { *this, "LARGE_COLLECTION_DETECTION"sv };
gms::feature secondary_indexes_on_static_columns { *this, "SECONDARY_INDEXES_ON_STATIC_COLUMNS"sv };
gms::feature tablets { *this, "TABLETS"sv };
gms::feature uuid_sstable_identifiers { *this, "UUID_SSTABLE_IDENTIFIERS"sv };
// A feature just for use in tests. It must not be advertised unless
// the "features_enable_test_feature" injection is enabled.

View File

@@ -570,7 +570,7 @@ private:
// update the sstable generation, making sure (in calculate_generation_for_new_table)
// that new new sstables don't overwrite this one.
void update_sstables_known_generation(std::optional<sstables::generation_type> generation);
void update_sstables_known_generation(sstables::generation_type generation);
sstables::generation_type calculate_generation_for_new_table();
private:

View File

@@ -463,14 +463,16 @@ distributed_loader::process_upload_dir(distributed<replica::database>& db, distr
process_sstable_dir(directory, flags).get();
sharded<sstables::sstable_generation_generator> sharded_gen;
auto highest_generation = highest_generation_seen(directory).get0().value_or(
sstables::generation_type{0});
sharded_gen.start(highest_generation.as_int()).get();
auto highest_generation = highest_generation_seen(directory).get0();
sharded_gen.start(highest_generation ? highest_generation.as_int() : 0).get();
auto stop_generator = deferred_stop(sharded_gen);
auto make_sstable = [&] (shard_id shard) {
auto& sstm = global_table->get_sstables_manager();
auto generation = sharded_gen.invoke_on(shard, [] (auto& gen) { return gen(); }).get();
bool uuid_sstable_identifiers = db.local().features().uuid_sstable_identifiers;
auto generation = sharded_gen.invoke_on(shard, [uuid_sstable_identifiers] (auto& gen) {
return gen(sstables::uuid_identifiers{uuid_sstable_identifiers});
}).get();
return sstm.make_sstable(global_table->schema(), global_table->get_storage_options(),
upload.native(), generation, sstm.get_highest_supported_format(),
sstables::sstable_format_types::big, gc_clock::now(), &error_handler_gen_for_upload_dir);
@@ -551,7 +553,7 @@ class table_populator {
fs::path _base_path;
std::unordered_map<sstring, lw_shared_ptr<sharded<sstables::sstable_directory>>> _sstable_directories;
sstables::sstable_version_types _highest_version = sstables::oldest_writable_sstable_format;
std::optional<sstables::generation_type> _highest_generation;
sstables::generation_type _highest_generation;
sharded<locator::effective_replication_map_ptr> _erms;
public:
@@ -651,11 +653,7 @@ future<> table_populator::start_subdir(sstring subdir) {
auto generation = co_await highest_generation_seen(directory);
_highest_version = std::max(sst_version, _highest_version);
if (generation) {
_highest_generation = _highest_generation ?
std::max(*generation, *_highest_generation) :
*generation;
}
_highest_generation = std::max(generation, _highest_generation);
}
sstables::shared_sstable make_sstable(replica::table& table, fs::path dir, sstables::generation_type generation, sstables::sstable_version_types v) {

View File

@@ -41,6 +41,7 @@
#include "utils/fb_utilities.hh"
#include "mutation/mutation_source_metadata.hh"
#include "gms/gossiper.hh"
#include "gms/feature_service.hh"
#include "db/config.hh"
#include "db/commitlog/commitlog.hh"
#include "utils/lister.hh"
@@ -66,8 +67,8 @@ static seastar::metrics::label keyspace_label("ks");
using namespace std::chrono_literals;
void table::update_sstables_known_generation(std::optional<sstables::generation_type> generation) {
auto gen = generation.value_or(sstables::generation_type(0)).as_int();
void table::update_sstables_known_generation(sstables::generation_type generation) {
auto gen = generation ? generation.as_int() : 0;
if (_sstable_generation_generator) {
_sstable_generation_generator->update_known_generation(gen);
} else {
@@ -82,7 +83,8 @@ sstables::generation_type table::calculate_generation_for_new_table() {
// See https://github.com/scylladb/scylladb/issues/10459
// for uuid-based sstable generation
assert(_sstable_generation_generator);
auto ret = std::invoke(*_sstable_generation_generator);
auto ret = std::invoke(*_sstable_generation_generator,
uuid_identifiers{_sstables_manager.uuid_sstable_identifiers()});
tlogger.debug("{}.{} new sstable generation {}", schema()->ks_name(), schema()->cf_name(), ret);
return ret;
}

View File

@@ -15,9 +15,11 @@
#include <compare>
#include <limits>
#include <iostream>
#include <stdexcept>
#include <type_traits>
#include <boost/range/adaptors.hpp>
#include <seastar/core/on_internal_error.hh>
#include <boost/regex.hpp>
#include <seastar/core/smp.hh>
#include <seastar/core/sstring.hh>
#include "types/types.hh"
@@ -35,18 +37,24 @@ public:
private:
utils::UUID _value;
explicit constexpr generation_type(utils::UUID value) noexcept
: _value(value) {}
public:
generation_type() = delete;
// create an invalid sstable identifier
generation_type() = default;
// use zero as the timestamp to differentiate from the regular timeuuid,
// and use the least_sig_bits to encode the value of generation identifier.
explicit constexpr generation_type(int_t value) noexcept
: _value(utils::UUID_gen::create_time(std::chrono::milliseconds::zero()), value) {}
explicit constexpr generation_type(utils::UUID value) noexcept
: _value(value) {}
constexpr utils::UUID as_uuid() const noexcept {
if (_value.is_null() || _value.timestamp() == 0) {
on_internal_error(sstlog, "int generation used as a UUID ");
}
return _value;
}
constexpr int_t as_int() const noexcept {
if (_value.timestamp() != 0) {
if (_value.is_null() || _value.timestamp() != 0) {
on_internal_error(sstlog, "UUID generation used as an int");
}
return _value.get_least_significant_bits();
@@ -57,9 +65,31 @@ public:
ec == std::errc() && ptr == s.data() + s.size()) {
return generation_type(int_value);
} else {
throw std::invalid_argument(fmt::format("invalid UUID: {}", s));
static const boost::regex pattern("([0-9a-z]{4})_([0-9a-z]{4})_([0-9a-z]{5})([0-9a-z]{13})");
boost::smatch match;
if (!boost::regex_match(s, match, pattern)) {
throw std::invalid_argument(fmt::format("invalid UUID: {}", s));
}
utils::UUID_gen::decimicroseconds timestamp = {};
auto decode_base36 = [](const std::string& s) {
std::size_t pos{};
auto n = std::stoull(s, &pos, 36);
if (pos != s.size()) {
throw std::invalid_argument(fmt::format("invalid part in UUID: {}", s));
}
return n;
};
timestamp += std::chrono::days{decode_base36(match[1])};
timestamp += std::chrono::seconds{decode_base36(match[2])};
timestamp += ::utils::UUID_gen::decimicroseconds{decode_base36(match[3])};
int64_t lsb = decode_base36(match[4]);
return generation_type{utils::UUID_gen::get_time_UUID_raw(timestamp, lsb)};
}
}
// return true if the generation holds a valid id
explicit operator bool() const noexcept {
return bool(_value);
}
// convert to data_value
//
// this function is used when performing queries to SSTABLES_REGISTRY in
@@ -75,14 +105,27 @@ public:
explicit operator data_value() const noexcept {
return _value;
}
static generation_type from_uuid(utils::UUID value) {
// if the encoded value is an int64_t, the UUID's timestamp must be
// zero, and the least significant bits is used to encode the value
// of the int64_t.
assert(value.timestamp() == 0);
return generation_type(value);
constexpr bool is_uuid_based() const noexcept {
// if the value of generation_type should be an int64_t, its timestamp
// must be zero, and the least significant bits is used to encode the
// value of the int64_t.
return _value.timestamp() != 0;
}
std::strong_ordering operator<=>(const generation_type& other) const noexcept = default;
std::strong_ordering operator<=>(const generation_type& other) const noexcept {
if (bool(*this) && is_uuid_based() &&
bool(other) && other.is_uuid_based()) {
return this->_value <=> other._value;
}
int_t lhs = 0, rhs = 0;
if (bool(*this) && !is_uuid_based()) {
lhs = this->_value.get_least_significant_bits();
}
if (bool(other) && !other.is_uuid_based()) {
rhs = other._value.get_least_significant_bits();
}
return lhs <=> rhs;
}
bool operator==(const generation_type& other) const noexcept = default;
};
constexpr generation_type generation_from_value(generation_type::int_t value) {
@@ -103,6 +146,7 @@ Target generations_from_values(std::initializer_list<generation_type::int_t> val
}));
}
using uuid_identifiers = bool_class<struct uuid_identifiers_tag>;
class sstable_generation_generator {
// We still want to do our best to keep the generation numbers shard-friendly.
// Each destination shard will manage its own generation counter.
@@ -124,7 +168,10 @@ public:
_last_generation = generation;
}
}
sstables::generation_type operator()() {
generation_type operator()(uuid_identifiers use_uuid = uuid_identifiers::no) {
if (use_uuid) {
return generation_type(utils::UUID_gen::get_time_UUID());
}
// each shard has its own "namespace" so we increment the generation id
// by smp::count to avoid name confliction of sstables
_last_generation += seastar::smp::count;
@@ -134,7 +181,14 @@ public:
/// way to determine that is overlapping its partition-ranges with the shard's
/// owned ranges.
static bool maybe_owned_by_this_shard(const sstables::generation_type& gen) {
return gen.as_int() % smp::count == seastar::this_shard_id();
assert(bool(gen));
int64_t hint = 0;
if (gen.is_uuid_based()) {
hint = std::hash<utils::UUID>{}(gen.as_uuid());
} else {
hint = gen.as_int();
}
return hint % smp::count == seastar::this_shard_id();
}
};
@@ -144,7 +198,11 @@ namespace std {
template <>
struct hash<sstables::generation_type> {
size_t operator()(const sstables::generation_type& generation) const noexcept {
return hash<sstables::generation_type::int_t>{}(generation.as_int());
if (generation.is_uuid_based()) {
return hash<utils::UUID>{}(generation.as_uuid());
} else {
return hash<int64_t>{}(generation.as_int());
}
}
};
@@ -164,6 +222,46 @@ template <>
struct fmt::formatter<sstables::generation_type> : fmt::formatter<std::string_view> {
template <typename FormatContext>
auto format(const sstables::generation_type& generation, FormatContext& ctx) const {
return fmt::format_to(ctx.out(), "{}", generation.as_int());
if (!generation) {
return fmt::format_to(ctx.out(), "-");
} else if (generation.is_uuid_based()) {
// format the uuid with 4 parts splitted with "_". each these parts is encoded
// as base36 chars.
//
// This matches the way how Cassandra formats UUIDBasedSSTableId, but we
// don't have to. just don't want to use "-" as the delimeter in UUID, as
// "-" is already used to split different parts in a SStable filename like
// "nb-1-big-Data.db".
const auto uuid = generation.as_uuid();
auto timestamp = ::utils::UUID_gen::decimicroseconds(uuid.timestamp());
char days_buf[4] = {};
auto days = std::chrono::duration_cast<std::chrono::days>(timestamp);
timestamp -= days;
char* days_end = std::to_chars(std::begin(days_buf), std::end(days_buf),
days.count(), 36).ptr;
char secs_buf[4] = {};
auto secs = std::chrono::duration_cast<std::chrono::seconds>(timestamp);
timestamp -= secs;
char* secs_end = std::to_chars(std::begin(secs_buf), std::end(secs_buf),
secs.count(), 36).ptr;
char decimicro_buf[5] = {};
char* decimicro_end = std::to_chars(std::begin(decimicro_buf), std::end(decimicro_buf),
timestamp.count(), 36).ptr;
char lsb_buf[13] = {};
char* lsb_end = std::to_chars(std::begin(lsb_buf), std::end(lsb_buf),
static_cast<uint64_t>(uuid.get_least_significant_bits()), 36).ptr;
return fmt::format_to(ctx.out(), "{:0>4}_{:0>4}_{:0>5}{:0>13}",
std::string_view(days_buf, days_end),
std::string_view(secs_buf, secs_end),
std::string_view(decimicro_buf, decimicro_end),
std::string_view(lsb_buf, lsb_end));
} else {
return fmt::format_to(ctx.out(), "{}", generation.as_int());
}
}
};

View File

@@ -188,7 +188,7 @@ sstring sstable_directory::sstable_filename(const sstables::entry_descriptor& de
return sstable::filename(_sstable_dir.native(), _schema->ks_name(), _schema->cf_name(), desc.version, desc.generation, desc.format, component_type::Data);
}
std::optional<generation_type>
generation_type
sstable_directory::highest_generation_seen() const {
return _max_generation_seen;
}
@@ -256,14 +256,11 @@ future<> sstable_directory::filesystem_components_lister::process(sstable_direct
_directory, _state->descriptors.size(), _state->generations_found.size());
if (!_state->generations_found.empty()) {
// FIXME: for now set _max_generation_seen is any generation were found
// With https://github.com/scylladb/scylladb/issues/10459,
// We should do that only if any _numeric_ generations were found
directory._max_generation_seen = boost::accumulate(_state->generations_found | boost::adaptors::map_keys, sstables::generation_type(0), [] (generation_type a, generation_type b) {
directory._max_generation_seen = boost::accumulate(_state->generations_found | boost::adaptors::map_keys, sstables::generation_type{}, [] (generation_type a, generation_type b) {
return std::max<generation_type>(a, b);
});
msg = format("{}, highest generation seen: {}", msg, *directory._max_generation_seen);
msg = format("{}, highest generation seen: {}", msg, directory._max_generation_seen);
} else {
msg = format("{}, no numeric generation was seen", msg);
}
@@ -626,20 +623,14 @@ future<> sstable_directory::filesystem_components_lister::handle_sstables_pendin
co_await when_all_succeed(futures.begin(), futures.end()).discard_result();
}
future<std::optional<sstables::generation_type>>
future<sstables::generation_type>
highest_generation_seen(sharded<sstables::sstable_directory>& directory) {
auto highest = co_await directory.map_reduce0(std::mem_fn(&sstables::sstable_directory::highest_generation_seen), sstables::generation_type(0), [] (std::optional<sstables::generation_type> a, std::optional<sstables::generation_type> b) {
if (a && b) {
return std::max(*a, *b);
} else if (a) {
return *a;
} else if (b) {
return *b;
} else {
return sstables::generation_type(0);
}
});
co_return highest.as_int() ? std::make_optional(highest) : std::nullopt;
co_return co_await directory.map_reduce0(
std::mem_fn(&sstables::sstable_directory::highest_generation_seen),
sstables::generation_type{},
[] (sstables::generation_type a, sstables::generation_type b) {
return std::max(a, b);
});
}
}

View File

@@ -138,7 +138,7 @@ private:
std::unique_ptr<components_lister> _lister;
const dht::sharder& _sharder;
std::optional<generation_type> _max_generation_seen;
generation_type _max_generation_seen;
sstables::sstable_version_types _max_version_seen = sstables::sstable_version_types::ka;
// SSTables that are unshared and belong to this shard. They are already stored as an
@@ -201,7 +201,7 @@ public:
future<> move_foreign_sstables(sharded<sstable_directory>& source_directory);
// returns what is the highest generation seen in this directory.
std::optional<generation_type> highest_generation_seen() const;
generation_type highest_generation_seen() const;
// returns what is the highest version seen in this directory.
sstables::sstable_version_types highest_version_seen() const;
@@ -272,6 +272,6 @@ public:
future<> garbage_collect();
};
future<std::optional<sstables::generation_type>> highest_generation_seen(sharded<sstables::sstable_directory>& directory);
future<sstables::generation_type> highest_generation_seen(sharded<sstables::sstable_directory>& directory);
}

View File

@@ -98,6 +98,10 @@ const locator::host_id& sstables_manager::get_local_host_id() const {
return _db_config.host_id;
}
bool sstables_manager::uuid_sstable_identifiers() const {
return _features.uuid_sstable_identifiers;
}
shared_sstable sstables_manager::make_sstable(schema_ptr schema,
const data_dictionary::storage_options& storage,
sstring dir,

View File

@@ -121,6 +121,7 @@ public:
}
virtual sstable_writer_config configure_writer(sstring origin) const;
bool uuid_sstable_identifiers() const;
const db::config& config() const { return _db_config; }
cache_tracker& get_cache_tracker() { return _cache_tracker; }

View File

@@ -258,6 +258,8 @@ add_scylla_test(sstable_3_x_test
KIND SEASTAR)
add_scylla_test(sstable_datafile_test
KIND SEASTAR)
add_scylla_test(sstable_generation_test
KIND BOOST)
add_scylla_test(sstable_mutation_test
KIND SEASTAR)
add_scylla_test(sstable_partition_index_cache_test

View File

@@ -393,7 +393,9 @@ SEASTAR_THREAD_TEST_CASE(test_distributed_loader_with_pending_delete) {
std::vector<sstables::generation_type> gen;
constexpr size_t num_gens = 9;
std::generate_n(std::back_inserter(gen), num_gens, [&] {
return gen_generator();
// we assumes the integer-based generation identifier in this test, so disable
// uuid_identifier here
return gen_generator(sstables::uuid_identifiers::no);
});
// Regular log file with single entry

View File

@@ -382,7 +382,7 @@ SEASTAR_THREAD_TEST_CASE(sstable_directory_unshared_sstables_sanity_unmatched_ge
env.invoke_on(i, [dir = dir.path(), &sharded_gen] (sstables::test_env& env) -> future<> {
// intentionally generate the generation on a different shard
auto generation = co_await sharded_gen.invoke_on((this_shard_id() + 1) % smp::count, [] (auto& gen) {
return gen();
return gen(sstables::uuid_identifiers::no);
});
// this is why it is annoying for the internal functions in the test infrastructure to
// assume threaded execution
@@ -501,7 +501,7 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_correctly) {
for (unsigned nr = 0; nr < num_sstables; ++nr) {
auto generation = sharded_gen.invoke_on(nr % smp::count, [] (auto& gen) {
return gen();
return gen(sstables::uuid_identifiers::no);
}).get();
make_sstable_for_all_shards(e.db().local(), cf, upload_path.native(), generation);
}
@@ -512,12 +512,12 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_correctly) {
sharded<sstables::sstable_generation_generator> sharded_gen;
auto max_generation_seen = highest_generation_seen(sstdir).get0();
sharded_gen.start(max_generation_seen->as_int()).get();
sharded_gen.start(max_generation_seen.as_int()).get();
auto stop_generator = deferred_stop(sharded_gen);
auto make_sstable = [&e, upload_path, &sharded_gen] (shard_id shard) {
auto generation = sharded_gen.invoke_on(shard, [] (auto& gen) {
return gen();
return gen(sstables::uuid_identifiers::no);
}).get();
auto& cf = e.local_db().find_column_family("ks", "cf");
data_dictionary::storage_options local;
@@ -554,7 +554,7 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_distributes_well_eve
for (unsigned nr = 0; nr < num_sstables; ++nr) {
// always generate the generation on shard#0
auto generation = sharded_gen.invoke_on(0, [] (auto& gen) {
return gen();
return gen(sstables::uuid_identifiers::no);
}).get();
make_sstable_for_all_shards(e.db().local(), cf, upload_path.native(), generation);
}
@@ -565,12 +565,12 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_distributes_well_eve
sharded<sstables::sstable_generation_generator> sharded_gen;
auto max_generation_seen = highest_generation_seen(sstdir).get0();
sharded_gen.start(max_generation_seen->as_int()).get();
sharded_gen.start(max_generation_seen.as_int()).get();
auto stop_generator = deferred_stop(sharded_gen);
auto make_sstable = [&e, upload_path, &sharded_gen] (shard_id shard) {
auto generation = sharded_gen.invoke_on(shard, [] (auto& gen) {
return gen();
return gen(sstables::uuid_identifiers::no);
}).get();
auto& cf = e.local_db().find_column_family("ks", "cf");
data_dictionary::storage_options local;
@@ -606,7 +606,7 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_respect_max_threshol
for (unsigned nr = 0; nr < num_sstables; ++nr) {
auto generation = sharded_gen.invoke_on(nr % smp::count, [] (auto& gen) {
return gen();
return gen(sstables::uuid_identifiers::no);
}).get();
make_sstable_for_all_shards(e.db().local(), cf, upload_path.native(), generation);
}
@@ -617,12 +617,12 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_respect_max_threshol
sharded<sstables::sstable_generation_generator> sharded_gen;
auto max_generation_seen = highest_generation_seen(sstdir).get0();
sharded_gen.start(max_generation_seen->as_int()).get();
sharded_gen.start(max_generation_seen.as_int()).get();
auto stop_generator = deferred_stop(sharded_gen);
auto make_sstable = [&e, upload_path, &sharded_gen] (shard_id shard) {
auto generation = sharded_gen.invoke_on(shard, [] (auto& gen) {
return gen();
return gen(sstables::uuid_identifiers::no);
}).get();
auto& cf = e.local_db().find_column_family("ks", "cf");
data_dictionary::storage_options local;

View File

@@ -0,0 +1,102 @@
/*
* Copyright (C) 2023-present ScyllaDB
*/
/*
* SPDX-License-Identifier: AGPL-3.0-or-later
*/
#define BOOST_TEST_MODULE sstable-generation
#include <string>
#include <boost/test/unit_test.hpp>
#include "sstables/generation_type.hh"
using namespace std::literals;
namespace sstables {
std::ostream& boost_test_print_type(std::ostream& os, const generation_type& gen) {
fmt::print(os, "{}", gen);
return os;
}
}
BOOST_AUTO_TEST_CASE(from_string_uuid_good) {
// the id comes from https://cassandra.apache.org/_/blog/Apache-Cassandra-4.1-New-SSTable-Identifiers.html
const auto id = "3fw2_0tj4_46w3k2cpidnirvjy7k"s;
const uint64_t msb = 0x6636ac00da8411ec;
const uint64_t lsb = 0x9abaf56e1443def0;
const auto uuid = utils::UUID(msb, lsb);
const auto gen = sstables::generation_type::from_string(id);
BOOST_REQUIRE(bool(gen));
BOOST_REQUIRE(gen.is_uuid_based());
BOOST_CHECK_EQUAL(gen.as_uuid(), uuid);
BOOST_CHECK_EQUAL(id, fmt::to_string(gen));
}
BOOST_AUTO_TEST_CASE(from_string_int_good) {
const auto id = "42";
const auto gen = sstables::generation_type::from_string(id);
BOOST_REQUIRE(bool(gen));
BOOST_REQUIRE(!gen.is_uuid_based());
BOOST_CHECK_EQUAL(gen.as_int(), 42);
BOOST_CHECK_EQUAL(id, fmt::to_string(gen));
}
BOOST_AUTO_TEST_CASE(invalid_identifier) {
const auto invalid_id = sstables::generation_type{};
BOOST_CHECK_NO_THROW(fmt::to_string(invalid_id));
BOOST_CHECK(!invalid_id);
}
BOOST_AUTO_TEST_CASE(from_string_bad) {
const auto bad_uuids = {
"3fw _0tj4_46w3k2cpidnirvjy7k"s,
"3fw2_0tj4_46w3k2cpidnirvjy7 "s,
"3fw2_0tj__46w3k2cpidnirvjy7k"s,
"3fw2_0tj$_46w3k2cpidnirvjy7k"s,
"3fw2_0tj4_46w3k2cpidnirvjy7"s,
"3fw2_0tj4_46w3k2cpidnirvjy7kkkk"s,
"3fw2_0tj4"s,
"3fw2_0tj4_46w3k2cpidnirvjy7k_and_more"s,
"bonjour"s,
"0x42"s,
""s,
};
for (auto& bad_uuid : bad_uuids) {
BOOST_CHECK_THROW(sstables::generation_type::from_string(bad_uuid), std::logic_error);
}
}
BOOST_AUTO_TEST_CASE(compare) {
// an integer-based identifiers should be always greater than an invalid one
// so we can find the uuid-based identifier as before -- the invalid id is
// provided as the minimal identifier
BOOST_CHECK_GT(sstables::generation_type(42), sstables::generation_type{});
BOOST_CHECK_LT(sstables::generation_type{}, sstables::generation_type(42));
const auto uuid = "3fw2_0tj4_46w3k2cpidnirvjy7k"s;
const auto id_uuid = sstables::generation_type::from_string(uuid);
// an integer-based identifer should be always greater than a uuid-based one,
// so we can find the uuid-based identifier as before
BOOST_CHECK_GT(sstables::generation_type(42), id_uuid);
BOOST_CHECK_GT(sstables::generation_type(1), id_uuid);
BOOST_CHECK_LT(id_uuid, sstables::generation_type(1));
BOOST_CHECK_GT(sstables::generation_type(42), sstables::generation_type(41));
BOOST_CHECK_LT(sstables::generation_type(41), sstables::generation_type(42));
BOOST_CHECK_EQUAL(sstables::generation_type(42), sstables::generation_type(42));
// the ordering of uuid based generation does not matter, but we should be
// able to use them as key in an associative container
BOOST_CHECK_NE(sstables::generation_type::from_string("3fw2_0tj4_46w3k2cpidnirvjy7k"),
sstables::generation_type::from_string("3fw2_0tj4_46w3k2cpidnirvjy7z"));
BOOST_CHECK_EQUAL(sstables::generation_type::from_string(uuid),
sstables::generation_type::from_string(uuid));
// all invalid identifiers should be equal
BOOST_CHECK_EQUAL(sstables::generation_type{}, sstables::generation_type{});
BOOST_CHECK_NE(sstables::generation_type{},
sstables::generation_type::from_string(uuid));
}

View File

@@ -69,7 +69,7 @@ class test_env {
impl(const impl&) = delete;
sstables::generation_type new_generation() noexcept {
return gen();
return gen(sstables::uuid_identifiers::no);
}
};
std::unique_ptr<impl> _impl;