Merge 'Cleanup sstables in resharding and other compaction types' from Benny Halevy

This series extends sstable cleanup to resharding and other (offstrategy, major, and regular) compaction types so to:
* cleanup uploaded sstables (#11933)
* cleanup staging sstables after they are moved back to the main directory and become eligible for compaction (#9559)

When perform_cleanup is called, all sstables are scanned, and those that require cleanup are marked as such, and are added for tracking to table_state::cleanup_sstable_set.  They are removed from that set once released by compaction.
Along with that sstables set, we keep the owned_ranges_ptr used by cleanup in the table_state to allow other compaction types (offstrategy, major, or regular) to cleanup those sstables that are marked as require_cleanup and that were skipped by cleanup compaction for either being in the maintenance set (requiring offstrategy compaction) or in staging.

Resharding is using a more straightforward mechanism of passing the owned token ranges when resharding uploaded sstables and using it to detect sstable that require cleanup, now done as piggybacked on resharding compaction.

Closes #12422

* github.com:scylladb/scylladb:
  table: discard_sstables: update_sstable_cleanup_state when deleting sstables
  compaction_manager: compact_sstables: retrieve owned ranges if required
  sstables: add a printer for shared_sstable
  compaction_manager: keep owned_ranges_ptr in compaction_state
  compaction_manager: perform_cleanup: keep sstables in compaction_state::sstables_requiring_cleanup
  compaction: refactor compaction_state out of compaction_manager
  compaction: refactor compaction_fwd.hh out of compaction_descriptor.hh
  compaction_manager: compacting_sstable_registration: keep a ref to the compaction_state
  compaction_manager: refactor get_candidates
  compaction_manager: get_candidates: mark as const
  table, compaction_manager: add requires_cleanup
  sstable_set: add for_each_sstable_until
  distributed_loader: reshard: update sstable cleanup state
  table, compaction_manager: add update_sstable_cleanup_state
  compaction_manager: needs_cleanup: delete unused schema param
  compaction_manager: perform_cleanup: disallow empty sorted_owened_ranges
  distributed_loader: reshard: consider sstables for cleanup
  distributed_loader: process_upload_dir: pass owned_ranges_ptr to reshard
  distributed_loader: reshard: add optional owned_ranges_ptr param
  distributed_loader: reshard: get a ref to table_state
  distributed_loader: reshard: capture creator by ref
  distributed_loader: reshard: reserve num_jobs buckets
  compaction: move owned ranges filtering to base class
  compaction: move owned_ranges into descriptor
This commit is contained in:
Botond Dénes
2023-04-11 14:52:29 +03:00
19 changed files with 432 additions and 183 deletions

View File

@@ -1998,7 +1998,7 @@ SEASTAR_TEST_CASE(sstable_cleanup_correctness_test) {
auto local_ranges = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(ks_name));
auto descriptor = sstables::compaction_descriptor({std::move(sst)}, default_priority_class(), compaction_descriptor::default_level,
compaction_descriptor::default_max_sstable_bytes, run_identifier, compaction_type_options::make_cleanup(std::move(local_ranges)));
compaction_descriptor::default_max_sstable_bytes, run_identifier, compaction_type_options::make_cleanup(), std::move(local_ranges));
auto ret = compact_sstables(std::move(descriptor), cf, sst_gen).get0();
BOOST_REQUIRE(ret.new_sstables.size() == 1);
@@ -3571,23 +3571,23 @@ SEASTAR_TEST_CASE(sstable_needs_cleanup_test) {
{
auto local_ranges = { token_range(0, 9) };
auto sst = sst_gen(keys[0], keys[9]);
BOOST_REQUIRE(!needs_cleanup(sst, local_ranges, s));
BOOST_REQUIRE(!needs_cleanup(sst, local_ranges));
}
{
auto local_ranges = { token_range(0, 1), token_range(3, 4), token_range(5, 6) };
auto sst = sst_gen(keys[0], keys[1]);
BOOST_REQUIRE(!needs_cleanup(sst, local_ranges, s));
BOOST_REQUIRE(!needs_cleanup(sst, local_ranges));
auto sst2 = sst_gen(keys[2], keys[2]);
BOOST_REQUIRE(needs_cleanup(sst2, local_ranges, s));
BOOST_REQUIRE(needs_cleanup(sst2, local_ranges));
auto sst3 = sst_gen(keys[0], keys[6]);
BOOST_REQUIRE(needs_cleanup(sst3, local_ranges, s));
BOOST_REQUIRE(needs_cleanup(sst3, local_ranges));
auto sst5 = sst_gen(keys[7], keys[7]);
BOOST_REQUIRE(needs_cleanup(sst5, local_ranges, s));
BOOST_REQUIRE(needs_cleanup(sst5, local_ranges));
}
});
}
@@ -4886,3 +4886,28 @@ SEASTAR_TEST_CASE(compaction_manager_stop_and_drain_race_test) {
testlog.info("stopping compaction manager");
co_await cm.stop();
}
SEASTAR_TEST_CASE(test_print_shared_sstables_vector) {
return test_env::do_with_async([] (test_env& env) {
simple_schema ss;
auto s = ss.schema();
auto pks = ss.make_pkeys(2);
auto sst_gen = env.make_sst_factory(s);
std::vector<sstables::shared_sstable> ssts(2);
auto mut0 = mutation(s, pks[0]);
mut0.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp());
ssts[0] = make_sstable_containing(sst_gen, {std::move(mut0)});
auto mut1 = mutation(s, pks[1]);
mut1.partition().apply_insert(*s, ss.make_ckey(1), ss.new_timestamp());
ssts[1] = make_sstable_containing(sst_gen, {std::move(mut1)});
std::string msg = format("{}", ssts);
for (const auto& sst : ssts) {
auto gen_str = format("{}", sst->generation());
BOOST_REQUIRE(msg.find(gen_str) != std::string::npos);
}
});
}