replica,sstable: introduce invalid generation id

the invalid sstable id is the NULL of a sstable identifier. with
this concept, it would be a lot simpler to find/track the greatest
generation. the complexity is hidden in the generation_type, which
compares the a) integer-based identifiers b) uuid-based identifiers
c) invalid identitifer in different ways.

so, in this change

* the default constructor generation_type is
  now public.
* we don't check for empty generation anymore when loading
  SSTables or enumerating them.

Signed-off-by: Kefu Chai <kefu.chai@scylladb.com>
This commit is contained in:
Kefu Chai
2023-05-26 18:32:01 +08:00
parent 939fa087cc
commit 2d265e860d
8 changed files with 37 additions and 43 deletions

View File

@@ -571,7 +571,7 @@ private:
// update the sstable generation, making sure (in calculate_generation_for_new_table)
// that new new sstables don't overwrite this one.
void update_sstables_known_generation(std::optional<sstables::generation_type> generation);
void update_sstables_known_generation(sstables::generation_type generation);
sstables::generation_type calculate_generation_for_new_table();
private:

View File

@@ -453,9 +453,8 @@ distributed_loader::process_upload_dir(distributed<replica::database>& db, distr
process_sstable_dir(directory, flags).get();
sharded<sstables::sstable_generation_generator> sharded_gen;
auto highest_generation = highest_generation_seen(directory).get0().value_or(
sstables::generation_type{0});
sharded_gen.start(highest_generation.as_int()).get();
auto highest_generation = highest_generation_seen(directory).get0();
sharded_gen.start(highest_generation ? highest_generation.as_int() : 0).get();
auto stop_generator = deferred_stop(sharded_gen);
auto make_sstable = [&] (shard_id shard) {
@@ -537,7 +536,7 @@ class table_populator {
fs::path _base_path;
std::unordered_map<sstring, lw_shared_ptr<sharded<sstables::sstable_directory>>> _sstable_directories;
sstables::sstable_version_types _highest_version = sstables::oldest_writable_sstable_format;
std::optional<sstables::generation_type> _highest_generation;
sstables::generation_type _highest_generation;
public:
table_populator(global_table_ptr ptr, distributed<replica::database>& db, sstring ks, sstring cf)
@@ -629,11 +628,7 @@ future<> table_populator::start_subdir(sstring subdir) {
auto generation = co_await highest_generation_seen(directory);
_highest_version = std::max(sst_version, _highest_version);
if (generation) {
_highest_generation = _highest_generation ?
std::max(*generation, *_highest_generation) :
*generation;
}
_highest_generation = std::max(generation, _highest_generation);
}
sstables::shared_sstable make_sstable(replica::table& table, fs::path dir, sstables::generation_type generation, sstables::sstable_version_types v) {

View File

@@ -67,8 +67,8 @@ static seastar::metrics::label keyspace_label("ks");
using namespace std::chrono_literals;
void table::update_sstables_known_generation(std::optional<sstables::generation_type> generation) {
auto gen = generation.value_or(sstables::generation_type(0)).as_int();
void table::update_sstables_known_generation(sstables::generation_type generation) {
auto gen = generation ? generation.as_int() : 0;
if (_sstable_generation_generator) {
_sstable_generation_generator->update_known_generation(gen);
} else {

View File

@@ -38,7 +38,8 @@ private:
utils::UUID _value;
public:
generation_type() = delete;
// create an invalid sstable identifier
generation_type() = default;
// use zero as the timestamp to differentiate from the regular timeuuid,
// and use the least_sig_bits to encode the value of generation identifier.
@@ -47,13 +48,13 @@ public:
explicit constexpr generation_type(utils::UUID value) noexcept
: _value(value) {}
constexpr utils::UUID as_uuid() const noexcept {
if (_value.timestamp() == 0) {
if (_value.is_null() || _value.timestamp() == 0) {
on_internal_error(sstlog, "int generation used as a UUID ");
}
return _value;
}
constexpr int_t as_int() const noexcept {
if (_value.timestamp() != 0) {
if (_value.is_null() || _value.timestamp() != 0) {
on_internal_error(sstlog, "UUID generation used as an int");
}
return _value.get_least_significant_bits();
@@ -221,7 +222,9 @@ template <>
struct fmt::formatter<sstables::generation_type> : fmt::formatter<std::string_view> {
template <typename FormatContext>
auto format(const sstables::generation_type& generation, FormatContext& ctx) const {
if (generation.is_uuid_based()) {
if (!generation) {
return fmt::format_to(ctx.out(), "-");
} else if (generation.is_uuid_based()) {
// format the uuid with 4 parts splitted with "_". each these parts is encoded
// as base36 chars.
//

View File

@@ -186,7 +186,7 @@ sstring sstable_directory::sstable_filename(const sstables::entry_descriptor& de
return sstable::filename(_sstable_dir.native(), _schema->ks_name(), _schema->cf_name(), desc.version, desc.generation, desc.format, component_type::Data);
}
std::optional<generation_type>
generation_type
sstable_directory::highest_generation_seen() const {
return _max_generation_seen;
}
@@ -254,14 +254,11 @@ future<> sstable_directory::filesystem_components_lister::process(sstable_direct
_directory, _state->descriptors.size(), _state->generations_found.size());
if (!_state->generations_found.empty()) {
// FIXME: for now set _max_generation_seen is any generation were found
// With https://github.com/scylladb/scylladb/issues/10459,
// We should do that only if any _numeric_ generations were found
directory._max_generation_seen = boost::accumulate(_state->generations_found | boost::adaptors::map_keys, sstables::generation_type(0), [] (generation_type a, generation_type b) {
directory._max_generation_seen = boost::accumulate(_state->generations_found | boost::adaptors::map_keys, sstables::generation_type{}, [] (generation_type a, generation_type b) {
return std::max<generation_type>(a, b);
});
msg = format("{}, highest generation seen: {}", msg, *directory._max_generation_seen);
msg = format("{}, highest generation seen: {}", msg, directory._max_generation_seen);
} else {
msg = format("{}, no numeric generation was seen", msg);
}
@@ -624,22 +621,14 @@ future<> sstable_directory::filesystem_components_lister::handle_sstables_pendin
co_await when_all_succeed(futures.begin(), futures.end()).discard_result();
}
future<std::optional<sstables::generation_type>>
future<sstables::generation_type>
highest_generation_seen(sharded<sstables::sstable_directory>& directory) {
// TODO: use an empty generation instead of an generation_type(0) and
// optional<generation_type> for finding the highest generation seen
auto highest = co_await directory.map_reduce0(std::mem_fn(&sstables::sstable_directory::highest_generation_seen), sstables::generation_type(0), [] (std::optional<sstables::generation_type> a, std::optional<sstables::generation_type> b) {
if (a && b) {
return std::max(*a, *b);
} else if (a) {
return *a;
} else if (b) {
return *b;
} else {
return sstables::generation_type(0);
}
});
co_return highest.as_int() ? std::make_optional(highest) : std::nullopt;
co_return co_await directory.map_reduce0(
std::mem_fn(&sstables::sstable_directory::highest_generation_seen),
sstables::generation_type{},
[] (sstables::generation_type a, sstables::generation_type b) {
return std::max(a, b);
});
}
}

View File

@@ -137,7 +137,7 @@ private:
io_error_handler_gen _error_handler_gen;
std::unique_ptr<components_lister> _lister;
std::optional<generation_type> _max_generation_seen;
generation_type _max_generation_seen;
sstables::sstable_version_types _max_version_seen = sstables::sstable_version_types::ka;
// SSTables that are unshared and belong to this shard. They are already stored as an
@@ -199,7 +199,7 @@ public:
future<> move_foreign_sstables(sharded<sstable_directory>& source_directory);
// returns what is the highest generation seen in this directory.
std::optional<generation_type> highest_generation_seen() const;
generation_type highest_generation_seen() const;
// returns what is the highest version seen in this directory.
sstables::sstable_version_types highest_version_seen() const;
@@ -270,6 +270,6 @@ public:
future<> garbage_collect();
};
future<std::optional<sstables::generation_type>> highest_generation_seen(sharded<sstables::sstable_directory>& directory);
future<sstables::generation_type> highest_generation_seen(sharded<sstables::sstable_directory>& directory);
}

View File

@@ -511,7 +511,7 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_correctly) {
sharded<sstables::sstable_generation_generator> sharded_gen;
auto max_generation_seen = highest_generation_seen(sstdir).get0();
sharded_gen.start(max_generation_seen->as_int()).get();
sharded_gen.start(max_generation_seen.as_int()).get();
auto stop_generator = deferred_stop(sharded_gen);
auto make_sstable = [&e, upload_path, &sharded_gen] (shard_id shard) {
@@ -564,7 +564,7 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_distributes_well_eve
sharded<sstables::sstable_generation_generator> sharded_gen;
auto max_generation_seen = highest_generation_seen(sstdir).get0();
sharded_gen.start(max_generation_seen->as_int()).get();
sharded_gen.start(max_generation_seen.as_int()).get();
auto stop_generator = deferred_stop(sharded_gen);
auto make_sstable = [&e, upload_path, &sharded_gen] (shard_id shard) {
@@ -616,7 +616,7 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_respect_max_threshol
sharded<sstables::sstable_generation_generator> sharded_gen;
auto max_generation_seen = highest_generation_seen(sstdir).get0();
sharded_gen.start(max_generation_seen->as_int()).get();
sharded_gen.start(max_generation_seen.as_int()).get();
auto stop_generator = deferred_stop(sharded_gen);
auto make_sstable = [&e, upload_path, &sharded_gen] (shard_id shard) {

View File

@@ -46,6 +46,13 @@ BOOST_AUTO_TEST_CASE(from_string_int_good) {
BOOST_CHECK_EQUAL(id, fmt::to_string(gen));
}
BOOST_AUTO_TEST_CASE(invalid_identifier) {
const auto invalid_id = sstables::generation_type{};
BOOST_CHECK_NO_THROW(fmt::to_string(invalid_id));
BOOST_CHECK(!invalid_id);
}
BOOST_AUTO_TEST_CASE(from_string_bad) {
const auto bad_uuids = {
"3fw _0tj4_46w3k2cpidnirvjy7k"s,