From 2d265e860dfe958302f0ee8b88f2a27898aaa244 Mon Sep 17 00:00:00 2001 From: Kefu Chai Date: Fri, 26 May 2023 18:32:01 +0800 Subject: [PATCH] replica,sstable: introduce invalid generation id the invalid sstable id is the NULL of a sstable identifier. with this concept, it would be a lot simpler to find/track the greatest generation. the complexity is hidden in the generation_type, which compares the a) integer-based identifiers b) uuid-based identifiers c) invalid identitifer in different ways. so, in this change * the default constructor generation_type is now public. * we don't check for empty generation anymore when loading SSTables or enumerating them. Signed-off-by: Kefu Chai --- replica/database.hh | 2 +- replica/distributed_loader.cc | 13 ++++------- replica/table.cc | 4 ++-- sstables/generation_type.hh | 11 ++++++---- sstables/sstable_directory.cc | 31 +++++++++------------------ sstables/sstable_directory.hh | 6 +++--- test/boost/sstable_directory_test.cc | 6 +++--- test/boost/sstable_generation_test.cc | 7 ++++++ 8 files changed, 37 insertions(+), 43 deletions(-) diff --git a/replica/database.hh b/replica/database.hh index 450f7a2f76..f346a0cbd7 100644 --- a/replica/database.hh +++ b/replica/database.hh @@ -571,7 +571,7 @@ private: // update the sstable generation, making sure (in calculate_generation_for_new_table) // that new new sstables don't overwrite this one. - void update_sstables_known_generation(std::optional generation); + void update_sstables_known_generation(sstables::generation_type generation); sstables::generation_type calculate_generation_for_new_table(); private: diff --git a/replica/distributed_loader.cc b/replica/distributed_loader.cc index 964c53cd50..9e3bc6ddc1 100644 --- a/replica/distributed_loader.cc +++ b/replica/distributed_loader.cc @@ -453,9 +453,8 @@ distributed_loader::process_upload_dir(distributed& db, distr process_sstable_dir(directory, flags).get(); sharded sharded_gen; - auto highest_generation = highest_generation_seen(directory).get0().value_or( - sstables::generation_type{0}); - sharded_gen.start(highest_generation.as_int()).get(); + auto highest_generation = highest_generation_seen(directory).get0(); + sharded_gen.start(highest_generation ? highest_generation.as_int() : 0).get(); auto stop_generator = deferred_stop(sharded_gen); auto make_sstable = [&] (shard_id shard) { @@ -537,7 +536,7 @@ class table_populator { fs::path _base_path; std::unordered_map>> _sstable_directories; sstables::sstable_version_types _highest_version = sstables::oldest_writable_sstable_format; - std::optional _highest_generation; + sstables::generation_type _highest_generation; public: table_populator(global_table_ptr ptr, distributed& db, sstring ks, sstring cf) @@ -629,11 +628,7 @@ future<> table_populator::start_subdir(sstring subdir) { auto generation = co_await highest_generation_seen(directory); _highest_version = std::max(sst_version, _highest_version); - if (generation) { - _highest_generation = _highest_generation ? - std::max(*generation, *_highest_generation) : - *generation; - } + _highest_generation = std::max(generation, _highest_generation); } sstables::shared_sstable make_sstable(replica::table& table, fs::path dir, sstables::generation_type generation, sstables::sstable_version_types v) { diff --git a/replica/table.cc b/replica/table.cc index 483c6ada00..c9272658e4 100644 --- a/replica/table.cc +++ b/replica/table.cc @@ -67,8 +67,8 @@ static seastar::metrics::label keyspace_label("ks"); using namespace std::chrono_literals; -void table::update_sstables_known_generation(std::optional generation) { - auto gen = generation.value_or(sstables::generation_type(0)).as_int(); +void table::update_sstables_known_generation(sstables::generation_type generation) { + auto gen = generation ? generation.as_int() : 0; if (_sstable_generation_generator) { _sstable_generation_generator->update_known_generation(gen); } else { diff --git a/sstables/generation_type.hh b/sstables/generation_type.hh index dd2d25f1e4..073a36296b 100644 --- a/sstables/generation_type.hh +++ b/sstables/generation_type.hh @@ -38,7 +38,8 @@ private: utils::UUID _value; public: - generation_type() = delete; + // create an invalid sstable identifier + generation_type() = default; // use zero as the timestamp to differentiate from the regular timeuuid, // and use the least_sig_bits to encode the value of generation identifier. @@ -47,13 +48,13 @@ public: explicit constexpr generation_type(utils::UUID value) noexcept : _value(value) {} constexpr utils::UUID as_uuid() const noexcept { - if (_value.timestamp() == 0) { + if (_value.is_null() || _value.timestamp() == 0) { on_internal_error(sstlog, "int generation used as a UUID "); } return _value; } constexpr int_t as_int() const noexcept { - if (_value.timestamp() != 0) { + if (_value.is_null() || _value.timestamp() != 0) { on_internal_error(sstlog, "UUID generation used as an int"); } return _value.get_least_significant_bits(); @@ -221,7 +222,9 @@ template <> struct fmt::formatter : fmt::formatter { template auto format(const sstables::generation_type& generation, FormatContext& ctx) const { - if (generation.is_uuid_based()) { + if (!generation) { + return fmt::format_to(ctx.out(), "-"); + } else if (generation.is_uuid_based()) { // format the uuid with 4 parts splitted with "_". each these parts is encoded // as base36 chars. // diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index 60a644a902..4837e2d1c8 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -186,7 +186,7 @@ sstring sstable_directory::sstable_filename(const sstables::entry_descriptor& de return sstable::filename(_sstable_dir.native(), _schema->ks_name(), _schema->cf_name(), desc.version, desc.generation, desc.format, component_type::Data); } -std::optional +generation_type sstable_directory::highest_generation_seen() const { return _max_generation_seen; } @@ -254,14 +254,11 @@ future<> sstable_directory::filesystem_components_lister::process(sstable_direct _directory, _state->descriptors.size(), _state->generations_found.size()); if (!_state->generations_found.empty()) { - // FIXME: for now set _max_generation_seen is any generation were found - // With https://github.com/scylladb/scylladb/issues/10459, - // We should do that only if any _numeric_ generations were found - directory._max_generation_seen = boost::accumulate(_state->generations_found | boost::adaptors::map_keys, sstables::generation_type(0), [] (generation_type a, generation_type b) { + directory._max_generation_seen = boost::accumulate(_state->generations_found | boost::adaptors::map_keys, sstables::generation_type{}, [] (generation_type a, generation_type b) { return std::max(a, b); }); - msg = format("{}, highest generation seen: {}", msg, *directory._max_generation_seen); + msg = format("{}, highest generation seen: {}", msg, directory._max_generation_seen); } else { msg = format("{}, no numeric generation was seen", msg); } @@ -624,22 +621,14 @@ future<> sstable_directory::filesystem_components_lister::handle_sstables_pendin co_await when_all_succeed(futures.begin(), futures.end()).discard_result(); } -future> +future highest_generation_seen(sharded& directory) { - // TODO: use an empty generation instead of an generation_type(0) and - // optional for finding the highest generation seen - auto highest = co_await directory.map_reduce0(std::mem_fn(&sstables::sstable_directory::highest_generation_seen), sstables::generation_type(0), [] (std::optional a, std::optional b) { - if (a && b) { - return std::max(*a, *b); - } else if (a) { - return *a; - } else if (b) { - return *b; - } else { - return sstables::generation_type(0); - } - }); - co_return highest.as_int() ? std::make_optional(highest) : std::nullopt; + co_return co_await directory.map_reduce0( + std::mem_fn(&sstables::sstable_directory::highest_generation_seen), + sstables::generation_type{}, + [] (sstables::generation_type a, sstables::generation_type b) { + return std::max(a, b); + }); } } diff --git a/sstables/sstable_directory.hh b/sstables/sstable_directory.hh index afa5a068c6..57215590aa 100644 --- a/sstables/sstable_directory.hh +++ b/sstables/sstable_directory.hh @@ -137,7 +137,7 @@ private: io_error_handler_gen _error_handler_gen; std::unique_ptr _lister; - std::optional _max_generation_seen; + generation_type _max_generation_seen; sstables::sstable_version_types _max_version_seen = sstables::sstable_version_types::ka; // SSTables that are unshared and belong to this shard. They are already stored as an @@ -199,7 +199,7 @@ public: future<> move_foreign_sstables(sharded& source_directory); // returns what is the highest generation seen in this directory. - std::optional highest_generation_seen() const; + generation_type highest_generation_seen() const; // returns what is the highest version seen in this directory. sstables::sstable_version_types highest_version_seen() const; @@ -270,6 +270,6 @@ public: future<> garbage_collect(); }; -future> highest_generation_seen(sharded& directory); +future highest_generation_seen(sharded& directory); } diff --git a/test/boost/sstable_directory_test.cc b/test/boost/sstable_directory_test.cc index abdefd747a..4bc5e529f5 100644 --- a/test/boost/sstable_directory_test.cc +++ b/test/boost/sstable_directory_test.cc @@ -511,7 +511,7 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_correctly) { sharded sharded_gen; auto max_generation_seen = highest_generation_seen(sstdir).get0(); - sharded_gen.start(max_generation_seen->as_int()).get(); + sharded_gen.start(max_generation_seen.as_int()).get(); auto stop_generator = deferred_stop(sharded_gen); auto make_sstable = [&e, upload_path, &sharded_gen] (shard_id shard) { @@ -564,7 +564,7 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_distributes_well_eve sharded sharded_gen; auto max_generation_seen = highest_generation_seen(sstdir).get0(); - sharded_gen.start(max_generation_seen->as_int()).get(); + sharded_gen.start(max_generation_seen.as_int()).get(); auto stop_generator = deferred_stop(sharded_gen); auto make_sstable = [&e, upload_path, &sharded_gen] (shard_id shard) { @@ -616,7 +616,7 @@ SEASTAR_TEST_CASE(sstable_directory_shared_sstables_reshard_respect_max_threshol sharded sharded_gen; auto max_generation_seen = highest_generation_seen(sstdir).get0(); - sharded_gen.start(max_generation_seen->as_int()).get(); + sharded_gen.start(max_generation_seen.as_int()).get(); auto stop_generator = deferred_stop(sharded_gen); auto make_sstable = [&e, upload_path, &sharded_gen] (shard_id shard) { diff --git a/test/boost/sstable_generation_test.cc b/test/boost/sstable_generation_test.cc index add7fd1f9a..c40186f61c 100644 --- a/test/boost/sstable_generation_test.cc +++ b/test/boost/sstable_generation_test.cc @@ -46,6 +46,13 @@ BOOST_AUTO_TEST_CASE(from_string_int_good) { BOOST_CHECK_EQUAL(id, fmt::to_string(gen)); } +BOOST_AUTO_TEST_CASE(invalid_identifier) { + const auto invalid_id = sstables::generation_type{}; + BOOST_CHECK_NO_THROW(fmt::to_string(invalid_id)); + BOOST_CHECK(!invalid_id); +} + + BOOST_AUTO_TEST_CASE(from_string_bad) { const auto bad_uuids = { "3fw _0tj4_46w3k2cpidnirvjy7k"s,