diff --git a/sstables/object_storage_client.cc b/sstables/object_storage_client.cc index 3740e12e21..298eb170a5 100644 --- a/sstables/object_storage_client.cc +++ b/sstables/object_storage_client.cc @@ -96,6 +96,9 @@ public: data_source make_download_source(object_name name, abort_source* as) override { return _client->make_chunked_download_source(name.str(), s3::full_range, as); } + future object_exists(object_name name, abort_source* as) override { + return _client->object_exists(name.str(), as); + } abstract_lister make_object_lister(std::string bucket, std::string prefix, lister::filter_type filter) override { return abstract_lister::make(_client, std::move(bucket), std::move(prefix), std::move(filter)); } @@ -170,6 +173,9 @@ public: data_source make_download_source(object_name name, abort_source* as) override { return _client->create_download_source(name.bucket(), name.object(), as); } + future object_exists(object_name name, abort_source* as) override { + return _client->object_exists(name.bucket(), name.object(), as); + } abstract_lister make_object_lister(std::string bucket, std::string prefix, lister::filter_type filter) override { class list_impl : public abstract_lister::impl { shared_ptr _client; diff --git a/sstables/object_storage_client.hh b/sstables/object_storage_client.hh index bb02f3aba1..548be09ea5 100644 --- a/sstables/object_storage_client.hh +++ b/sstables/object_storage_client.hh @@ -76,6 +76,7 @@ public: virtual data_sink make_data_upload_sink(object_name, std::optional max_parts_per_piece, abort_source* = nullptr) = 0; virtual data_sink make_upload_sink(object_name, abort_source* = nullptr) = 0; virtual data_source make_download_source(object_name, abort_source* = nullptr) = 0; + virtual future object_exists(object_name name, abort_source* as = nullptr) = 0; virtual abstract_lister make_object_lister(std::string bucket, std::string prefix, lister::filter_type) = 0; diff --git a/sstables/storage.cc b/sstables/storage.cc index cab7c45c42..c12908dcce 100644 --- a/sstables/storage.cc +++ b/sstables/storage.cc @@ -109,6 +109,9 @@ public: virtual future<> unlink_component(const sstable& sst, component_type) noexcept override; virtual sstring prefix() const override { return _dir.native(); } + future exists(const sstable& sst, component_type type) const override { + return file_exists(sst.get_filename(type).format()); + } }; future filesystem_storage::make_data_or_index_sink(sstable& sst, component_type type) { @@ -667,6 +670,10 @@ public: return std::visit([] (const auto& v) { return fmt::to_string(v); }, _location); } + future exists(const sstable& sst, component_type type) const override { + return _client->object_exists(make_object_name(sst, type), abort_source()); + } + future<> put_object(object_name name, ::memory_data_sink_buffers bufs) { return _client->put_object(std::move(name), std::move(bufs), abort_source()); } diff --git a/sstables/storage.hh b/sstables/storage.hh index 913192696c..0885b3ac02 100644 --- a/sstables/storage.hh +++ b/sstables/storage.hh @@ -124,6 +124,7 @@ public: virtual future<> unlink_component(const sstable& sst, component_type) noexcept = 0; virtual sstring prefix() const = 0; + virtual future exists(const sstable& sst, component_type type) const = 0; }; std::unique_ptr make_storage(sstables_manager& manager, const data_dictionary::storage_options& s_opts, sstable_state state); diff --git a/test/boost/sstable_compaction_test.cc b/test/boost/sstable_compaction_test.cc index 528e9e03cc..503c2a4e92 100644 --- a/test/boost/sstable_compaction_test.cc +++ b/test/boost/sstable_compaction_test.cc @@ -158,17 +158,18 @@ static void assert_table_sstable_count(table_for_tests& t, size_t expected_count } static void corrupt_sstable(sstables::shared_sstable sst, component_type type = component_type::Data) { - auto f = open_file_dma(sstables::test(sst).filename(type).native(), open_flags::wo).get(); + auto f = sstables::test(sst).open_file(type, {}, {}).get(); auto close_f = deferred_close(f); const auto wbuf_align = f.memory_dma_alignment(); - const auto wbuf_len = f.disk_write_dma_alignment(); + const auto wbuf_len = f.size().get(); auto wbuf = seastar::temporary_buffer::aligned(wbuf_align, wbuf_len); std::fill(wbuf.get_write(), wbuf.get_write() + wbuf_len, 0xba); - f.dma_write(0, wbuf.get(), wbuf_len).get(); + auto os = output_stream(sstables::test(sst).get_storage().make_component_sink(*sst, component_type::Data, open_flags::wo, {}).get()); + auto close_os = deferred_close(os); + os.write(std::move(wbuf)).get(); } -SEASTAR_TEST_CASE(compaction_manager_basic_test) { - return test_env::do_with_async([] (test_env& env) { +void compaction_manager_basic(test_env& env) { BOOST_REQUIRE(smp::count == 1); auto s = schema_builder(some_keyspace, some_column_family) .with_column("p1", utf8_type, column_kind::partition_key) @@ -215,106 +216,145 @@ SEASTAR_TEST_CASE(compaction_manager_basic_test) { // expect sstables of cf to be compacted. BOOST_CHECK_EQUAL(cf->sstables_count(), 1); - }); } -SEASTAR_TEST_CASE(compact) { - return sstables::test_env::do_with_async([] (sstables::test_env& env) { - BOOST_REQUIRE(smp::count == 1); - // The "compaction" sstable was created with the following schema: - // CREATE TABLE compaction ( - // name text, - // age int, - // height int, - // PRIMARY KEY (name) - //); - auto builder = schema_builder("tests", "compaction") - .with_column("name", utf8_type, column_kind::partition_key) - .with_column("age", int32_type) - .with_column("height", int32_type); - builder.set_comment("Example table for compaction"); - builder.set_gc_grace_seconds(std::numeric_limits::max()); - auto s = builder.build(); - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); +SEASTAR_TEST_CASE(compaction_manager_basic_test) { + return test_env::do_with_async([](test_env& env) { compaction_manager_basic(env); }); +} - auto sstables = open_sstables(env, s, "test/resource/sstables/compaction", {1,2,3}).get(); - std::vector new_sstables; - auto new_sstable = [&] { - auto sst = env.make_sstable(cf->schema()); - new_sstables.push_back(sst); - return sst; - }; - compact_sstables(env, compaction::compaction_descriptor(std::move(sstables)), cf, new_sstable).get(); - // Verify that the compacted sstable has the right content. We expect to see: - // name | age | height - // -------+-----+-------- - // jerry | 40 | 170 - // tom | 20 | 180 - // john | 20 | deleted - // nadav - deleted partition - BOOST_REQUIRE_EQUAL(new_sstables.size(), 1); - auto sst = env.reusable_sst(s, new_sstables[0]).get(); - auto reader = sstable_reader(sst, s, env.make_reader_permit()); - auto close_reader = deferred_close(reader); - auto verify_mutation = [&] (std::function verify) { - std::invoke(verify, read_mutation_from_mutation_reader(reader).get()); - }; - verify_mutation([&] (mutation_opt m) { - BOOST_REQUIRE(m); - BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("jerry"))))); - BOOST_REQUIRE(!m->partition().partition_tombstone()); - auto rows = m->partition().clustered_rows(); - BOOST_REQUIRE(rows.calculate_size() == 1); - auto &row = rows.begin()->row(); - BOOST_REQUIRE(!row.deleted_at()); - auto &cells = row.cells(); - auto& cdef1 = *s->get_column_definition("age"); - auto& cdef2 = *s->get_column_definition("height"); - BOOST_REQUIRE(cells.cell_at(cdef1.id).as_atomic_cell(cdef1).value() == managed_bytes({0,0,0,40})); - BOOST_REQUIRE(cells.cell_at(cdef2.id).as_atomic_cell(cdef2).value() == managed_bytes({0,0,0,(int8_t)170})); - }); - verify_mutation([&] (mutation_opt m) { - BOOST_REQUIRE(m); - BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("tom"))))); - BOOST_REQUIRE(!m->partition().partition_tombstone()); - auto rows = m->partition().clustered_rows(); - BOOST_REQUIRE(rows.calculate_size() == 1); - auto &row = rows.begin()->row(); - BOOST_REQUIRE(!row.deleted_at()); - auto &cells = row.cells(); - auto& cdef1 = *s->get_column_definition("age"); - auto& cdef2 = *s->get_column_definition("height"); - BOOST_REQUIRE(cells.cell_at(cdef1.id).as_atomic_cell(cdef1).value() == managed_bytes({0,0,0,20})); - BOOST_REQUIRE(cells.cell_at(cdef2.id).as_atomic_cell(cdef2).value() == managed_bytes({0,0,0,(int8_t)180})); - }); - verify_mutation([&] (mutation_opt m) { - BOOST_REQUIRE(m); - BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("john"))))); - BOOST_REQUIRE(!m->partition().partition_tombstone()); - auto rows = m->partition().clustered_rows(); - BOOST_REQUIRE(rows.calculate_size() == 1); - auto &row = rows.begin()->row(); - BOOST_REQUIRE(!row.deleted_at()); - auto &cells = row.cells(); - auto& cdef1 = *s->get_column_definition("age"); - auto& cdef2 = *s->get_column_definition("height"); - BOOST_REQUIRE(cells.cell_at(cdef1.id).as_atomic_cell(cdef1).value() == managed_bytes({0,0,0,20})); - BOOST_REQUIRE(cells.find_cell(cdef2.id) == nullptr); - }); - verify_mutation([&] (mutation_opt m) { - BOOST_REQUIRE(m); - BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("nadav"))))); - BOOST_REQUIRE(m->partition().partition_tombstone()); - auto rows = m->partition().clustered_rows(); - BOOST_REQUIRE(rows.calculate_size() == 0); - }); - verify_mutation([&] (mutation_opt m) { - BOOST_REQUIRE(!m); - }); +SEASTAR_TEST_CASE(compaction_manager_basic_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { compaction_manager_basic(env); }, + test_env_config{ + .storage = make_test_object_storage_options("S3"), + }); +} + +SEASTAR_FIXTURE_TEST_CASE(compaction_manager_basic_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { compaction_manager_basic(env); }, + test_env_config{ + .storage = make_test_object_storage_options("GS"), + }); +} + +void compact(test_env& env) { + BOOST_REQUIRE(smp::count == 1); + // The "compaction" sstable was created with the following schema: + // CREATE TABLE compaction ( + // name text, + // age int, + // height int, + // PRIMARY KEY (name) + //); + auto builder = schema_builder("tests", "compaction") + .with_column("name", utf8_type, column_kind::partition_key) + .with_column("age", int32_type) + .with_column("height", int32_type); + builder.set_comment("Example table for compaction"); + builder.set_gc_grace_seconds(std::numeric_limits::max()); + auto s = builder.build(); + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + + auto sstables = open_sstables(env, s, "test/resource/sstables/compaction", {1,2,3}).get(); + std::vector new_sstables; + auto new_sstable = [&] { + auto sst = env.make_sstable(cf->schema()); + new_sstables.push_back(sst); + return sst; + }; + compact_sstables(env, compaction::compaction_descriptor(std::move(sstables)), cf, new_sstable).get(); + // Verify that the compacted sstable has the right content. We expect to see: + // name | age | height + // -------+-----+-------- + // jerry | 40 | 170 + // tom | 20 | 180 + // john | 20 | deleted + // nadav - deleted partition + BOOST_REQUIRE_EQUAL(new_sstables.size(), 1); + auto sst = env.reusable_sst(s, new_sstables[0]).get(); + auto reader = sstable_reader(sst, s, env.make_reader_permit()); + auto close_reader = deferred_close(reader); + auto verify_mutation = [&] (std::function verify) { + std::invoke(verify, read_mutation_from_mutation_reader(reader).get()); + }; + verify_mutation([&] (mutation_opt m) { + BOOST_REQUIRE(m); + BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("jerry"))))); + BOOST_REQUIRE(!m->partition().partition_tombstone()); + auto rows = m->partition().clustered_rows(); + BOOST_REQUIRE(rows.calculate_size() == 1); + auto &row = rows.begin()->row(); + BOOST_REQUIRE(!row.deleted_at()); + auto &cells = row.cells(); + auto& cdef1 = *s->get_column_definition("age"); + auto& cdef2 = *s->get_column_definition("height"); + BOOST_REQUIRE(cells.cell_at(cdef1.id).as_atomic_cell(cdef1).value() == managed_bytes({0,0,0,40})); + BOOST_REQUIRE(cells.cell_at(cdef2.id).as_atomic_cell(cdef2).value() == managed_bytes({0,0,0,(int8_t)170})); }); + verify_mutation([&] (mutation_opt m) { + BOOST_REQUIRE(m); + BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("tom"))))); + BOOST_REQUIRE(!m->partition().partition_tombstone()); + auto rows = m->partition().clustered_rows(); + BOOST_REQUIRE(rows.calculate_size() == 1); + auto &row = rows.begin()->row(); + BOOST_REQUIRE(!row.deleted_at()); + auto &cells = row.cells(); + auto& cdef1 = *s->get_column_definition("age"); + auto& cdef2 = *s->get_column_definition("height"); + BOOST_REQUIRE(cells.cell_at(cdef1.id).as_atomic_cell(cdef1).value() == managed_bytes({0,0,0,20})); + BOOST_REQUIRE(cells.cell_at(cdef2.id).as_atomic_cell(cdef2).value() == managed_bytes({0,0,0,(int8_t)180})); + }); + verify_mutation([&] (mutation_opt m) { + BOOST_REQUIRE(m); + BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("john"))))); + BOOST_REQUIRE(!m->partition().partition_tombstone()); + auto rows = m->partition().clustered_rows(); + BOOST_REQUIRE(rows.calculate_size() == 1); + auto &row = rows.begin()->row(); + BOOST_REQUIRE(!row.deleted_at()); + auto &cells = row.cells(); + auto& cdef1 = *s->get_column_definition("age"); + auto& cdef2 = *s->get_column_definition("height"); + BOOST_REQUIRE(cells.cell_at(cdef1.id).as_atomic_cell(cdef1).value() == managed_bytes({0,0,0,20})); + BOOST_REQUIRE(cells.find_cell(cdef2.id) == nullptr); + }); + verify_mutation([&] (mutation_opt m) { + BOOST_REQUIRE(m); + BOOST_REQUIRE(m->key().equal(*s, partition_key::from_singular(*s, data_value(sstring("nadav"))))); + BOOST_REQUIRE(m->partition().partition_tombstone()); + auto rows = m->partition().clustered_rows(); + BOOST_REQUIRE(rows.calculate_size() == 0); + }); + verify_mutation([&] (mutation_opt m) { + BOOST_REQUIRE(!m); + }); +} - // verify that the compacted sstable look like +SEASTAR_TEST_CASE(compact_test) { + return sstables::test_env::do_with_async([](sstables::test_env& env) { compact(env); }); +} + +SEASTAR_TEST_CASE(compact_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + testlog.info("OBJECT STORAGE only works with uuid_sstable_identifier enabled, skipping test"); + return make_ready_future(); +#if 0 + return sstables::test_env::do_with_async([](sstables::test_env& env) { compact(env); }, + test_env_config{ + .storage = make_test_object_storage_options("S3"), + }); +#endif +} + +SEASTAR_FIXTURE_TEST_CASE(compact_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + testlog.info("OBJECT STORAGE only works with uuid_sstable_identifier enabled, skipping test"); + return make_ready_future(); +#if 0 + return sstables::test_env::do_with_async([](sstables::test_env& env) { compact(env); }, + test_env_config{ + .storage = make_test_object_storage_options("GS"), + }); +#endif } static std::vector get_candidates_for_leveled_strategy(replica::column_family& cf) { @@ -466,7 +506,7 @@ static future<> check_compacted_sstables(test_env& env, compact_sstables_result }); } -SEASTAR_TEST_CASE(compact_02) { +void compact_02(test_env& env) { // NOTE: generations 18 to 38 are used here. // This tests size-tiered compaction strategy by creating 4 sstables of @@ -477,38 +517,55 @@ SEASTAR_TEST_CASE(compact_02) { // By the way, automatic compaction isn't tested here, instead the // strategy algorithm that selects candidates for compaction. - return test_env::do_with_async([] (test_env& env) { - std::vector all_input_sstables; - std::vector compacted; + std::vector all_input_sstables; + std::vector compacted; - auto compact_and_verify = [&] (size_t count) mutable{ - // Compact `count` sstables into 1 using size-tiered strategy to select sstables. - // E.g.: generations 18, 19, 20 and 21 will be compacted into generation 22. - auto res = create_and_compact_sstables(env, count).get(); - std::copy(res.input_sstables.begin(), res.input_sstables.end(), std::back_inserter(all_input_sstables)); - compacted.emplace_back(res.output_sstables[0]); - // Check that generation 22 contains all keys of generations 18, 19, 20 and 21. - check_compacted_sstables(env, std::move(res)).get(); - }; - - static constexpr size_t num_rounds = 4; - static constexpr size_t sstables_in_round = 4; - for (unsigned i = 0; i < num_rounds; ++i) { - compact_and_verify(sstables_in_round); - } - - // In this step, we compact 4 compacted sstables. - auto res = compact_sstables(env, std::move(compacted)).get(); - res.input_sstables = std::move(all_input_sstables); - // Check that the compacted sstable contains all keys. + auto compact_and_verify = [&] (size_t count) mutable{ + // Compact `count` sstables into 1 using size-tiered strategy to select sstables. + // E.g.: generations 18, 19, 20 and 21 will be compacted into generation 22. + auto res = create_and_compact_sstables(env, count).get(); + std::copy(res.input_sstables.begin(), res.input_sstables.end(), std::back_inserter(all_input_sstables)); + compacted.emplace_back(res.output_sstables[0]); + // Check that generation 22 contains all keys of generations 18, 19, 20 and 21. check_compacted_sstables(env, std::move(res)).get(); - }); + }; + + static constexpr size_t num_rounds = 4; + static constexpr size_t sstables_in_round = 4; + for (unsigned i = 0; i < num_rounds; ++i) { + compact_and_verify(sstables_in_round); + } + + // In this step, we compact 4 compacted sstables. + auto res = compact_sstables(env, std::move(compacted)).get(); + res.input_sstables = std::move(all_input_sstables); + // Check that the compacted sstable contains all keys. + check_compacted_sstables(env, std::move(res)).get(); +} + +SEASTAR_TEST_CASE(compact_02_test) { + return test_env::do_with_async([](test_env& env) { compact_02(env); }); +} + +SEASTAR_TEST_CASE(compact_02_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { compact_02(env); }, + test_env_config{ + .storage = make_test_object_storage_options("S3"), + }); +} + +SEASTAR_FIXTURE_TEST_CASE(compact_02_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { compact_02(env); }, + test_env_config{ + .storage = make_test_object_storage_options("GS"), + }); } template static void compact_corrupted_by_compression_mode(const std::string& tname, + test_env_config config, compress_sstable compress, - compaction::compaction_type_options&& options, + compaction::compaction_type_options options, const sstring& error_msg) { test_env::do_with_async([&] (test_env& env) { @@ -562,58 +619,145 @@ static void compact_corrupted_by_compression_mode(const std::string& tname, sst = make_sstable_containing(env.make_sstable(schema), muts); { - auto f = open_file_dma(sstables::test(sst).filename(component_type::Digest).native(), open_flags::rw).get(); + auto f = sstables::test(sst).open_file(component_type::Digest, {}, {}).get(); auto stream = make_file_input_stream(f); auto close_stream = deferred_close(stream); auto digest_str = util::read_entire_stream_contiguous(stream).get(); auto digest = boost::lexical_cast(digest_str); auto new_digest = to_sstring(digest + 1); // a random invalid digest - f.dma_write(0, new_digest.c_str(), new_digest.size()).get(); + auto os = output_stream(sstables::test(sst).get_storage().make_component_sink(*sst, component_type::Digest, open_flags::wo | open_flags::truncate, {}).get()); + auto close_os = deferred_close(os); + os.write(std::move(new_digest)).get(); } test_failing_compact(schema, {sst}, error_msg, "Digest mismatch"); - }).get(); + }, std::move(config)).get(); } template -static void compact_corrupted(const std::string& tname, compaction::compaction_type_options&& options, const sstring& error_msg) { +static void compact_corrupted(const std::string& tname, test_env_config config, compaction::compaction_type_options&& options, const sstring& error_msg) { for (const auto& compress : {compress_sstable::no, compress_sstable::yes}) { - compact_corrupted_by_compression_mode(tname, compress, std::move(options), error_msg); + compact_corrupted_by_compression_mode(tname, config, compress, options, error_msg); } } SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_regular) { - compact_corrupted(get_name(), + compact_corrupted(get_name(), {}, compaction::compaction_type_options::make_regular(), "Failed to read partition from SSTable"); } SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_scrub) { using scrub_mode = compaction::compaction_type_options::scrub::mode; - compact_corrupted(get_name(), + compact_corrupted(get_name(), {}, compaction::compaction_type_options::make_scrub(scrub_mode::segregate), "scrub compaction failed due to unrecoverable error: sstables::malformed_sstable_exception"); } SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_cleanup) { - compact_corrupted(get_name(), + compact_corrupted(get_name(), {}, compaction::compaction_type_options::make_cleanup(), "Failed to read partition from SSTable"); } SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_reshape) { - compact_corrupted(get_name(), + compact_corrupted(get_name(), {}, compaction::compaction_type_options::make_reshape(), "Failed to read partition from SSTable"); } SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_reshard) { - compact_corrupted(get_name(), + compact_corrupted(get_name(), {}, compaction::compaction_type_options::make_reshard(), "Failed to read partition from SSTable"); } SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_split) { auto classify_fn = [] (dht::token t) -> mutation_writer::token_group_id { return 1; }; - compact_corrupted(get_name(), + compact_corrupted(get_name(), {}, compaction::compaction_type_options::make_split(classify_fn), "Failed to read partition from SSTable"); } +SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_regular_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + compact_corrupted(get_name(), test_env_config{.storage = make_test_object_storage_options("S3")}, + compaction::compaction_type_options::make_regular(), + "Failed to read partition from SSTable"); +} +SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_scrub_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + using scrub_mode = compaction::compaction_type_options::scrub::mode; + compact_corrupted(get_name(), test_env_config{.storage = make_test_object_storage_options("S3")}, + compaction::compaction_type_options::make_scrub(scrub_mode::segregate), + "scrub compaction failed due to unrecoverable error: sstables::malformed_sstable_exception"); +} +SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_cleanup_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + compact_corrupted(get_name(), test_env_config{.storage = make_test_object_storage_options("S3")}, + compaction::compaction_type_options::make_cleanup(), + "Failed to read partition from SSTable"); +} +SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_reshape_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + compact_corrupted(get_name(), test_env_config{.storage = make_test_object_storage_options("S3")}, + compaction::compaction_type_options::make_reshape(), + "Failed to read partition from SSTable"); +} +SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_reshard_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + compact_corrupted(get_name(), test_env_config{.storage = make_test_object_storage_options("S3")}, + compaction::compaction_type_options::make_reshard(), + "Failed to read partition from SSTable"); +} +SEASTAR_THREAD_TEST_CASE(compact_with_corrupted_sstable_split_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + auto classify_fn = [] (dht::token t) -> mutation_writer::token_group_id { return 1; }; + compact_corrupted(get_name(), test_env_config{.storage = make_test_object_storage_options("S3")}, + compaction::compaction_type_options::make_split(classify_fn), + "Failed to read partition from SSTable"); +} + +SEASTAR_FIXTURE_TEST_CASE(compact_with_corrupted_sstable_regular_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return seastar::async([] { + compact_corrupted(testing::seastar_test::get_name(), + test_env_config{.storage = make_test_object_storage_options("GS")}, + compaction::compaction_type_options::make_regular(), + "Failed to read partition from SSTable"); + }); +} +SEASTAR_FIXTURE_TEST_CASE(compact_with_corrupted_sstable_scrub_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return seastar::async([] { + using scrub_mode = compaction::compaction_type_options::scrub::mode; + compact_corrupted( + testing::seastar_test::get_name(), + test_env_config{.storage = make_test_object_storage_options("GS")}, + compaction::compaction_type_options::make_scrub(scrub_mode::segregate), + "scrub compaction failed due to unrecoverable error: sstables::malformed_sstable_exception"); + }); +} +SEASTAR_FIXTURE_TEST_CASE(compact_with_corrupted_sstable_cleanup_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return seastar::async([] { + compact_corrupted(testing::seastar_test::get_name(), + test_env_config{.storage = make_test_object_storage_options("GS")}, + compaction::compaction_type_options::make_cleanup(), + "Failed to read partition from SSTable"); + }); +} +SEASTAR_FIXTURE_TEST_CASE(compact_with_corrupted_sstable_reshape_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return seastar::async([] { + compact_corrupted(testing::seastar_test::get_name(), + test_env_config{.storage = make_test_object_storage_options("GS")}, + compaction::compaction_type_options::make_reshape(), + "Failed to read partition from SSTable"); + }); +} +SEASTAR_FIXTURE_TEST_CASE(compact_with_corrupted_sstable_reshard_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return seastar::async([] { + compact_corrupted(testing::seastar_test::get_name(), + test_env_config{.storage = make_test_object_storage_options("GS")}, + compaction::compaction_type_options::make_reshard(), + "Failed to read partition from SSTable"); + }); +} +SEASTAR_FIXTURE_TEST_CASE(compact_with_corrupted_sstable_split_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return seastar::async([] { + auto classify_fn = [](dht::token t) -> mutation_writer::token_group_id { return 1; }; + compact_corrupted(testing::seastar_test::get_name(), + test_env_config{.storage = make_test_object_storage_options("GS")}, + compaction::compaction_type_options::make_split(classify_fn), + "Failed to read partition from SSTable"); + }); +} + // Leveled compaction strategy tests // NOTE: must run in a thread. @@ -658,10 +802,10 @@ static bool sstable_overlaps(const lw_shared_ptr& cf, ss return range1.overlaps(range2, dht::token_comparator()); } -SEASTAR_TEST_CASE(leveled_01) { - BOOST_REQUIRE_EQUAL(smp::count, 1); - return test_env::do_with_async([] (test_env& env) { - auto cf = env.make_table_for_tests(); +void leveled_01_fn(test_env& env) { + BOOST_REQUIRE_EQUAL(smp::count, 1); + auto schema = table_for_tests::make_default_schema(); + auto cf = env.make_table_for_tests(schema); auto stop_cf = deferred_stop(cf); const auto keys = tests::generate_partition_keys(50, cf.schema()); @@ -696,13 +840,24 @@ SEASTAR_TEST_CASE(leveled_01) { BOOST_REQUIRE(sst->get_sstable_level() == 0); } BOOST_REQUIRE(expected.empty()); - }); } -SEASTAR_TEST_CASE(leveled_02) { - BOOST_REQUIRE_EQUAL(smp::count, 1); - return test_env::do_with_async([] (test_env& env) { - auto cf = env.make_table_for_tests(); +SEASTAR_TEST_CASE(leveled_01) { + return test_env::do_with_async([](test_env& env) { leveled_01_fn(env); }); +} + +SEASTAR_TEST_CASE(leveled_01_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { leveled_01_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(leveled_01_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { leveled_01_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void leveled_02_fn(test_env& env) { + BOOST_REQUIRE_EQUAL(smp::count, 1); + auto schema = table_for_tests::make_default_schema(); + auto cf = env.make_table_for_tests(schema); auto stop_cf = deferred_stop(cf); const auto keys = tests::generate_partition_keys(50, cf.schema()); @@ -747,13 +902,24 @@ SEASTAR_TEST_CASE(leveled_02) { BOOST_REQUIRE(sst->get_sstable_level() == 0); } BOOST_REQUIRE(expected.empty()); - }); } -SEASTAR_TEST_CASE(leveled_03) { - BOOST_REQUIRE_EQUAL(smp::count, 1); - return test_env::do_with_async([] (test_env& env) { - auto cf = env.make_table_for_tests(); +SEASTAR_TEST_CASE(leveled_02) { + return test_env::do_with_async([](test_env& env) { leveled_02_fn(env); }); +} + +SEASTAR_TEST_CASE(leveled_02_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { leveled_02_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(leveled_02_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { leveled_02_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void leveled_03_fn(test_env& env) { + BOOST_REQUIRE_EQUAL(smp::count, 1); + auto schema = table_for_tests::make_default_schema(); + auto cf = env.make_table_for_tests(schema); auto stop_cf = deferred_stop(cf); const auto keys = tests::generate_partition_keys(50, cf.schema()); @@ -799,13 +965,24 @@ SEASTAR_TEST_CASE(leveled_03) { BOOST_REQUIRE(expected.erase(sst)); } BOOST_REQUIRE(expected.empty()); - }); } -SEASTAR_TEST_CASE(leveled_04) { - BOOST_REQUIRE_EQUAL(smp::count, 1); - return test_env::do_with_async([] (test_env& env) { - auto cf = env.make_table_for_tests(); +SEASTAR_TEST_CASE(leveled_03) { + return test_env::do_with_async([](test_env& env) { leveled_03_fn(env); }); +} + +SEASTAR_TEST_CASE(leveled_03_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { leveled_03_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(leveled_03_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { leveled_03_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void leveled_04_fn(test_env& env) { + BOOST_REQUIRE_EQUAL(smp::count, 1); + auto schema = table_for_tests::make_default_schema(); + auto cf = env.make_table_for_tests(schema); auto stop_cf = deferred_stop(cf); const auto keys = tests::generate_partition_keys(50, cf.schema()); @@ -863,28 +1040,48 @@ SEASTAR_TEST_CASE(leveled_04) { levels.erase(sst->get_sstable_level()); } BOOST_REQUIRE(levels.empty()); - }); +} + +SEASTAR_TEST_CASE(leveled_04) { + return test_env::do_with_async([](test_env& env) { leveled_04_fn(env); }); +} + +SEASTAR_TEST_CASE(leveled_04_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { leveled_04_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(leveled_04_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { leveled_04_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void leveled_05_fn(test_env& env) { + static constexpr size_t sstables_in_round = 2; + + // Check compaction code with leveled strategy. In this test, two sstables of level 0 will be created. + auto res = compact_sstables(env, {}, sstables_in_round, 1024*1024, compaction::compaction_strategy_type::leveled).get(); + BOOST_REQUIRE_EQUAL(res.input_sstables.size(), sstables_in_round); + + for (const auto& sst : res.output_sstables) { + BOOST_REQUIRE(sst->data_size() >= 1024*1024); + } } SEASTAR_TEST_CASE(leveled_05) { - // NOTE: Generations from 48 to 51 are used here. - return test_env::do_with_async([] (test_env& env) { - static constexpr size_t sstables_in_round = 2; - - // Check compaction code with leveled strategy. In this test, two sstables of level 0 will be created. - auto res = compact_sstables(env, {}, sstables_in_round, 1024*1024, compaction::compaction_strategy_type::leveled).get(); - BOOST_REQUIRE_EQUAL(res.input_sstables.size(), sstables_in_round); - - for (const auto& sst : res.output_sstables) { - BOOST_REQUIRE(sst->data_size() >= 1024*1024); - } - }); + return test_env::do_with_async([](test_env& env) { leveled_05_fn(env); }); } -SEASTAR_TEST_CASE(leveled_06) { +SEASTAR_TEST_CASE(leveled_05_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { leveled_05_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(leveled_05_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { leveled_05_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void leveled_06_fn(test_env& env) { // Test that we can compact a single L1 compaction into an empty L2. - return test_env::do_with_async([] (test_env& env) { - auto cf = env.make_table_for_tests(); + auto schema = table_for_tests::make_default_schema(); + auto cf = env.make_table_for_tests(schema); auto stop_cf = deferred_stop(cf); auto max_sstable_size_in_mb = 1; @@ -911,12 +1108,23 @@ SEASTAR_TEST_CASE(leveled_06) { auto& sst = (candidate.sstables)[0]; BOOST_REQUIRE(sst->get_sstable_level() == 1); BOOST_REQUIRE(sst == sst1); - }); } -SEASTAR_TEST_CASE(leveled_07) { - return test_env::do_with_async([] (test_env& env) { - auto cf = env.make_table_for_tests(); +SEASTAR_TEST_CASE(leveled_06) { + return test_env::do_with_async([](test_env& env) { leveled_06_fn(env); }); +} + +SEASTAR_TEST_CASE(leveled_06_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { leveled_06_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(leveled_06_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { leveled_06_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void leveled_07_fn(test_env& env) { + auto schema = table_for_tests::make_default_schema(); + auto cf = env.make_table_for_tests(schema); auto stop_cf = deferred_stop(cf); const auto key = tests::generate_partition_key(cf.schema()); @@ -935,12 +1143,23 @@ SEASTAR_TEST_CASE(leveled_07) { for (auto& sst : desc.sstables) { BOOST_REQUIRE(sst->get_stats_metadata().max_timestamp < compaction::leveled_manifest::MAX_COMPACTING_L0); } - }); } -SEASTAR_TEST_CASE(leveled_invariant_fix) { - return test_env::do_with_async([] (test_env& env) { - auto cf = env.make_table_for_tests(); +SEASTAR_TEST_CASE(leveled_07) { + return test_env::do_with_async([](test_env& env) { leveled_07_fn(env); }); +} + +SEASTAR_TEST_CASE(leveled_07_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { leveled_07_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(leveled_07_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { leveled_07_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void leveled_invariant_fix_fn(test_env& env) { + auto schema = table_for_tests::make_default_schema(); + auto cf = env.make_table_for_tests(schema); auto stop_cf = deferred_stop(cf); auto sstables_no = cf.schema()->max_compaction_threshold(); @@ -972,11 +1191,21 @@ SEASTAR_TEST_CASE(leveled_invariant_fix) { return expected.erase(sst); })); BOOST_REQUIRE(expected.empty()); - }); } -SEASTAR_TEST_CASE(leveled_stcs_on_L0) { - return test_env::do_with_async([] (test_env& env) { +SEASTAR_TEST_CASE(leveled_invariant_fix) { + return test_env::do_with_async([](test_env& env) { leveled_invariant_fix_fn(env); }); +} + +SEASTAR_TEST_CASE(leveled_invariant_fix_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { leveled_invariant_fix_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(leveled_invariant_fix_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { leveled_invariant_fix_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void leveled_stcs_on_L0_fn(test_env& env) { schema_builder builder(some_keyspace, some_column_family); builder.with_column("p1", utf8_type, column_kind::partition_key); builder.set_min_compaction_threshold(4); @@ -1023,12 +1252,25 @@ SEASTAR_TEST_CASE(leveled_stcs_on_L0) { BOOST_REQUIRE(candidate.level == 0); BOOST_REQUIRE(candidate.sstables.empty()); } - }); } -SEASTAR_TEST_CASE(overlapping_starved_sstables_test) { - return test_env::do_with_async([] (test_env& env) { - auto cf = env.make_table_for_tests(); +SEASTAR_TEST_CASE(leveled_stcs_on_L0) { + return test_env::do_with_async([](test_env& env) { leveled_stcs_on_L0_fn(env); }); +} + +SEASTAR_TEST_CASE(leveled_stcs_on_L0_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { leveled_stcs_on_L0_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(leveled_stcs_on_L0_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { leveled_stcs_on_L0_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void overlapping_starved_sstables_fn(test_env& env) { + auto schema = table_for_tests::make_default_schema(); + auto cf = env.make_table_for_tests(schema); auto stop_cf = deferred_stop(cf); const auto keys = tests::generate_partition_keys(5, cf.schema()); @@ -1055,12 +1297,25 @@ SEASTAR_TEST_CASE(overlapping_starved_sstables_test) { auto candidate = manifest.get_compaction_candidates(last_compacted_keys, compaction_counter); BOOST_REQUIRE(candidate.level == 2); BOOST_REQUIRE(candidate.sstables.size() == 3); - }); } -SEASTAR_TEST_CASE(check_overlapping) { - return test_env::do_with_async([] (test_env& env) { - auto cf = env.make_table_for_tests(); +SEASTAR_TEST_CASE(overlapping_starved_sstables_test) { + return test_env::do_with_async([](test_env& env) { overlapping_starved_sstables_fn(env); }); +} + +SEASTAR_TEST_CASE(overlapping_starved_sstables_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { overlapping_starved_sstables_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(overlapping_starved_sstables_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { overlapping_starved_sstables_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void check_overlapping_fn(test_env& env) { + auto schema = table_for_tests::make_default_schema(); + auto cf = env.make_table_for_tests(schema); auto stop_cf = deferred_stop(cf); const auto keys = tests::generate_partition_keys(4, cf.schema()); @@ -1079,491 +1334,548 @@ SEASTAR_TEST_CASE(check_overlapping) { auto overlapping_sstables = compaction::leveled_manifest::overlapping(*cf.schema(), compacting, uncompacting); BOOST_REQUIRE(overlapping_sstables.size() == 1); BOOST_REQUIRE(overlapping_sstables.front() == sst4); - }); +} + +SEASTAR_TEST_CASE(check_overlapping) { + return test_env::do_with_async([](test_env& env) { check_overlapping_fn(env); }); +} + +SEASTAR_TEST_CASE(check_overlapping_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { check_overlapping_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(check_overlapping_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { check_overlapping_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +future<> tombstone_purge(test_env& env) { + BOOST_REQUIRE(smp::count == 1); + // In a column family with gc_grace_seconds set to 0, check that a tombstone + // is purged after compaction. + auto builder = schema_builder("tests", "tombstone_purge") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + builder.set_gc_grace_seconds(0); + auto s = builder.build(); + + auto sst_gen = env.make_sst_factory(s); + + auto compact = [&, s] (std::vector all, std::vector to_compact) -> std::vector { + auto cf = env.make_table_for_tests(s); + auto stop_cf = deferred_stop(cf); + for (auto&& sst : all) { + column_family_test(cf).add_sstable(sst).get(); + } + return compact_sstables(env, compaction::compaction_descriptor(to_compact), cf, sst_gen).get().new_sstables; + }; + + auto next_timestamp = [] { + static thread_local api::timestamp_type next = 1; + return next++; + }; + + auto make_insert = [&] (partition_key key) { + mutation m(s, key); + auto timestamp = next_timestamp(); + testlog.info("make_insert: key={} timestamp={}", dht::decorate_key(*s, key), timestamp); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), timestamp); + return m; + }; + + auto make_expiring = [&] (partition_key key, int ttl) { + mutation m(s, key); + auto timestamp = next_timestamp(); + testlog.info("make_expliring: key={} ttl={} timestamp={}", dht::decorate_key(*s, key), ttl, timestamp); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), + timestamp, gc_clock::duration(ttl)); + return m; + }; + + auto make_delete = [&] (partition_key key, gc_clock::time_point deletion_time = gc_clock::now()) { + mutation m(s, key); + tombstone tomb(next_timestamp(), deletion_time); + testlog.info("make_delete: {}", tomb); + m.partition().apply(tomb); + return m; + }; + + auto assert_that_produces_dead_cell = [&] (auto& sst, partition_key& key) { + auto reader = make_lw_shared(sstable_reader(sst, s, env.make_reader_permit())); + read_mutation_from_mutation_reader(*reader).then([reader, s, &key] (mutation_opt m) { + BOOST_REQUIRE(m); + BOOST_REQUIRE(m->key().equal(*s, key)); + auto rows = m->partition().clustered_rows(); + BOOST_REQUIRE_EQUAL(rows.calculate_size(), 1); + auto& row = rows.begin()->row(); + auto& cells = row.cells(); + BOOST_REQUIRE_EQUAL(cells.size(), 1); + auto& cdef = *s->get_column_definition("value"); + BOOST_REQUIRE(!cells.cell_at(cdef.id).as_atomic_cell(cdef).is_live()); + return (*reader)(); + }).then([reader, s] (mutation_fragment_v2_opt m) { + BOOST_REQUIRE(!m); + }).finally([reader] { + return reader->close(); + }).get(); + }; + + auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")}); + auto beta = partition_key::from_exploded(*s, {to_bytes("beta")}); + + auto ttl = 10; + + { + auto mut1 = make_insert(alpha); + auto mut2 = make_insert(beta); + auto mut3 = make_delete(alpha); + + std::vector sstables = { + make_sstable_containing(sst_gen, {mut1, mut2}), + make_sstable_containing(sst_gen, {mut3}) + }; + + forward_jump_clocks(std::chrono::seconds(ttl)); + + auto result = compact(sstables, sstables); + BOOST_REQUIRE_EQUAL(1, result.size()); + + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut2) + .produces_end_of_stream(); + } + + { + auto mut1 = make_insert(alpha); + auto mut2 = make_insert(alpha); + auto mut3 = make_delete(alpha); + + auto sst1 = make_sstable_containing(sst_gen, {mut1}); + auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); + + forward_jump_clocks(std::chrono::seconds(ttl)); + + auto result = compact({sst1, sst2}, {sst2}); + BOOST_REQUIRE_EQUAL(1, result.size()); + + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut3) + .produces_end_of_stream(); + } + + { + auto mut1 = make_insert(alpha); + auto mut2 = make_delete(alpha); + auto mut3 = make_insert(beta); + auto mut4 = make_insert(alpha); + + auto sst1 = make_sstable_containing(sst_gen, {mut1, mut2, mut3}); + auto sst2 = make_sstable_containing(sst_gen, {mut4}); + + forward_jump_clocks(std::chrono::seconds(ttl)); + + auto result = compact({sst1, sst2}, {sst1}); + BOOST_REQUIRE_EQUAL(1, result.size()); + + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut3) + .produces_end_of_stream(); + } + + { + auto mut1 = make_insert(alpha); + auto mut2 = make_delete(alpha); + auto mut3 = make_insert(beta); + auto mut4 = make_insert(beta); + + auto sst1 = make_sstable_containing(sst_gen, {mut1, mut2, mut3}); + auto sst2 = make_sstable_containing(sst_gen, {mut4}); + + forward_jump_clocks(std::chrono::seconds(ttl)); + + auto result = compact({sst1, sst2}, {sst1}); + BOOST_REQUIRE_EQUAL(1, result.size()); + + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut3) + .produces_end_of_stream(); + } + + { + // check that expired cell will not be purged if it will resurrect overwritten data. + auto mut1 = make_insert(alpha); + auto mut2 = make_expiring(alpha, ttl); + + auto sst1 = make_sstable_containing(sst_gen, {mut1}); + auto sst2 = make_sstable_containing(sst_gen, {mut2}); + + forward_jump_clocks(std::chrono::seconds(ttl)); + + auto result = compact({sst1, sst2}, {sst2}); + BOOST_REQUIRE_EQUAL(1, result.size()); + assert_that_produces_dead_cell(result[0], alpha); + + result = compact({sst1, sst2}, {sst1, sst2}); + BOOST_REQUIRE_EQUAL(0, result.size()); + } + { + auto mut1 = make_insert(alpha); + auto mut2 = make_expiring(beta, ttl); + + auto sst1 = make_sstable_containing(sst_gen, {mut1}); + auto sst2 = make_sstable_containing(sst_gen, {mut2}); + + forward_jump_clocks(std::chrono::seconds(ttl)); + + auto result = compact({sst1, sst2}, {sst2}); + BOOST_REQUIRE_EQUAL(0, result.size()); + } + { + auto mut1 = make_insert(alpha); + auto mut2 = make_expiring(alpha, ttl); + auto mut3 = make_insert(beta); + + auto sst1 = make_sstable_containing(sst_gen, {mut1}); + auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); + + forward_jump_clocks(std::chrono::seconds(ttl)); + + auto result = compact({sst1, sst2}, {sst1, sst2}); + BOOST_REQUIRE_EQUAL(1, result.size()); + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut3) + .produces_end_of_stream(); + } + { + // We use int32_t for representing a timestamp in seconds since the + // UNIX epoch. This timestamp "local_deletion_time" (ldt for short) + // notes the time at which a tombstone was created. It is used for + // purging the tombstone after gc_grace_seconds. + // + // If ldt is greater than INT32_MAX (2147483647), then it cannot be + // represented using a int32_t. probably more importantly, it + // represents a time point too far in the future -- after 19 Jan 2028. + // so the tombstone would practically live with us forever. so, the + // sstable writer just caps it to INT32_MAX - 1 when reading a + // tombstone with a TTL after this timepoint. and we also consider + // it as the indication of a problem and report it using the metrics + // named "scylla_sstables_capped_tombstone_deletion_time" which + // notes the total number of tombstones whose deletion_time breaches + // the limit. + // + // This test verifies that the metrics reflecting the number of + // tombstones with far-into-the-future ldts by inserting tombstones + // with a ldt greater than the date. + auto deletion_time = gc_clock::from_time_t(sstables::max_deletion_time + 1); + auto sst1 = make_sstable_containing(sst_gen, + {make_insert(alpha), + make_delete(alpha, deletion_time)}, + validate::no); + auto result = compact({sst1}, {sst1}); + BOOST_CHECK_EQUAL(1, sstables_stats::get_shard_stats().capped_tombstone_deletion_time); + } + { + // Verify that old live data inhibit tombstone_gc of partition tombstone + auto mut1 = make_insert(alpha); + auto mut2 = make_delete(alpha); + auto mut3 = make_insert(beta); + + auto sst1 = make_sstable_containing(sst_gen, {mut1}); + auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); + + forward_jump_clocks(std::chrono::seconds(1)); + + auto result = compact({sst1, sst2}, {sst2}); + BOOST_REQUIRE_EQUAL(1, result.size()); + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut3) + .produces(mut2) + .produces_end_of_stream(); + } + { + // Verify that old deleted data do not inhibit tombstone_gc of partition tombstone + auto mut1 = make_delete(alpha); + auto mut2 = make_delete(alpha); + auto mut3 = make_insert(beta); + + auto sst1 = make_sstable_containing(sst_gen, {mut1}); + auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); + + forward_jump_clocks(std::chrono::seconds(1)); + + auto result = compact({sst1, sst2}, {sst2}); + BOOST_REQUIRE_EQUAL(1, result.size()); + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut3) + .produces_end_of_stream(); + } + { + // Verify that old live data inhibit tombstone_gc of expired cell + auto mut1 = make_insert(alpha); + auto mut2 = make_expiring(alpha, ttl); + + auto sst1 = make_sstable_containing(sst_gen, {mut1}); + auto sst2 = make_sstable_containing(sst_gen, {mut2}); + + forward_jump_clocks(std::chrono::seconds(ttl)); + + auto result = compact({sst1, sst2}, {sst2}); + BOOST_REQUIRE_EQUAL(1, result.size()); + assert_that_produces_dead_cell(result[0], alpha); + } + { + // Verify that old deleted data do not inhibit tombstone_gc of expired cell + auto mut1 = make_delete(alpha); + auto mut2 = make_expiring(alpha, ttl); + + auto sst1 = make_sstable_containing(sst_gen, {mut1}); + auto sst2 = make_sstable_containing(sst_gen, {mut2}); + + forward_jump_clocks(std::chrono::seconds(ttl)); + + auto result = compact({sst1, sst2}, {sst2}); + BOOST_REQUIRE_EQUAL(0, result.size()); + } + return make_ready_future(); } SEASTAR_TEST_CASE(tombstone_purge_test) { + return test_env::do_with_async([](test_env& env) { tombstone_purge(env).get(); }); +} + +SEASTAR_TEST_CASE(tombstone_purge_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { tombstone_purge(env).get(); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(tombstone_purge_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { tombstone_purge(env).get(); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +future<> mv_tombstone_purge(test_env& env) { BOOST_REQUIRE(smp::count == 1); - return test_env::do_with_async([] (test_env& env) { - // In a column family with gc_grace_seconds set to 0, check that a tombstone - // is purged after compaction. - auto builder = schema_builder("tests", "tombstone_purge") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - builder.set_gc_grace_seconds(0); - auto s = builder.build(); + // In a column family with gc_grace_seconds set to 0, check that a tombstone + // is purged after compaction. + auto builder = schema_builder("tests", "tombstone_purge") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("ck", int32_type, column_kind::clustering_key) + .with_column("value", int32_type); + builder.set_gc_grace_seconds(0); + auto s = builder.build(); - auto sst_gen = env.make_sst_factory(s); + auto sst_gen = env.make_sst_factory(s); - auto compact = [&, s] (std::vector all, std::vector to_compact) -> std::vector { - auto cf = env.make_table_for_tests(s); - auto stop_cf = deferred_stop(cf); - for (auto&& sst : all) { - column_family_test(cf).add_sstable(sst).get(); - } - return compact_sstables(env, compaction::compaction_descriptor(to_compact), cf, sst_gen).get().new_sstables; - }; - - auto next_timestamp = [] { - static thread_local api::timestamp_type next = 1; - return next++; - }; - - auto make_insert = [&] (partition_key key) { - mutation m(s, key); - auto timestamp = next_timestamp(); - testlog.info("make_insert: key={} timestamp={}", dht::decorate_key(*s, key), timestamp); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), timestamp); - return m; - }; - - auto make_expiring = [&] (partition_key key, int ttl) { - mutation m(s, key); - auto timestamp = next_timestamp(); - testlog.info("make_expliring: key={} ttl={} timestamp={}", dht::decorate_key(*s, key), ttl, timestamp); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), - timestamp, gc_clock::duration(ttl)); - return m; - }; - - auto make_delete = [&] (partition_key key, gc_clock::time_point deletion_time = gc_clock::now()) { - mutation m(s, key); - tombstone tomb(next_timestamp(), deletion_time); - testlog.info("make_delete: {}", tomb); - m.partition().apply(tomb); - return m; - }; - - auto assert_that_produces_dead_cell = [&] (auto& sst, partition_key& key) { - auto reader = make_lw_shared(sstable_reader(sst, s, env.make_reader_permit())); - read_mutation_from_mutation_reader(*reader).then([reader, s, &key] (mutation_opt m) { - BOOST_REQUIRE(m); - BOOST_REQUIRE(m->key().equal(*s, key)); - auto rows = m->partition().clustered_rows(); - BOOST_REQUIRE_EQUAL(rows.calculate_size(), 1); - auto& row = rows.begin()->row(); - auto& cells = row.cells(); - BOOST_REQUIRE_EQUAL(cells.size(), 1); - auto& cdef = *s->get_column_definition("value"); - BOOST_REQUIRE(!cells.cell_at(cdef.id).as_atomic_cell(cdef).is_live()); - return (*reader)(); - }).then([reader, s] (mutation_fragment_v2_opt m) { - BOOST_REQUIRE(!m); - }).finally([reader] { - return reader->close(); - }).get(); - }; - - auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")}); - auto beta = partition_key::from_exploded(*s, {to_bytes("beta")}); - - auto ttl = 10; - - { - auto mut1 = make_insert(alpha); - auto mut2 = make_insert(beta); - auto mut3 = make_delete(alpha); - - std::vector sstables = { - make_sstable_containing(sst_gen, {mut1, mut2}), - make_sstable_containing(sst_gen, {mut3}) - }; - - forward_jump_clocks(std::chrono::seconds(ttl)); - - auto result = compact(sstables, sstables); - BOOST_REQUIRE_EQUAL(1, result.size()); - - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut2) - .produces_end_of_stream(); + auto compact = [&, s] (std::vector all, std::vector to_compact) -> std::vector { + auto cf = env.make_table_for_tests(s); + auto stop_cf = deferred_stop(cf); + for (auto&& sst : all) { + column_family_test(cf).add_sstable(sst).get(); } + return compact_sstables(env, compaction::compaction_descriptor(to_compact), cf, sst_gen).get().new_sstables; + }; - { - auto mut1 = make_insert(alpha); - auto mut2 = make_insert(alpha); - auto mut3 = make_delete(alpha); + auto next_timestamp = [] { + static thread_local api::timestamp_type next = 1; + return next++; + }; - auto sst1 = make_sstable_containing(sst_gen, {mut1}); - auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); - - forward_jump_clocks(std::chrono::seconds(ttl)); - - auto result = compact({sst1, sst2}, {sst2}); - BOOST_REQUIRE_EQUAL(1, result.size()); - - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut3) - .produces_end_of_stream(); + auto make_insert = [&] (partition_key key, int32_t ck = 0, int32_t value = 1, std::optional timestamp = std::nullopt, std::optional created_at = std::nullopt) { + mutation m(s, key); + if (!timestamp) { + created_at = timestamp = next_timestamp(); + } else if (!created_at) { + created_at = next_timestamp(); } + auto c_key = clustering_key::from_single_value(*s, int32_type->decompose(data_value(ck))); + testlog.info("make_insert: key={} ck={} timestamp={} created_at={}", dht::decorate_key(*s, key), ck, *timestamp, *created_at); + m.set_clustered_cell( + c_key, + bytes("value"), + data_value(value), + *timestamp); + m.partition().clustered_row(*s, c_key).apply(row_marker(*created_at)); + return m; + }; - { - auto mut1 = make_insert(alpha); - auto mut2 = make_delete(alpha); - auto mut3 = make_insert(beta); - auto mut4 = make_insert(alpha); - - auto sst1 = make_sstable_containing(sst_gen, {mut1, mut2, mut3}); - auto sst2 = make_sstable_containing(sst_gen, {mut4}); - - forward_jump_clocks(std::chrono::seconds(ttl)); - - auto result = compact({sst1, sst2}, {sst1}); - BOOST_REQUIRE_EQUAL(1, result.size()); - - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut3) - .produces_end_of_stream(); + auto make_delete_row = [&] (partition_key key, int32_t ck = 0, gc_clock::time_point deletion_time = gc_clock::now(), std::optional deleted_at = std::nullopt) { + mutation m(s, key); + if (!deleted_at) { + deleted_at = next_timestamp(); } + shadowable_tombstone shadowable(*deleted_at, deletion_time); + auto c_key = clustering_key::from_single_value(*s, int32_type->decompose(data_value(ck))); + testlog.info("make_delete_row: key={} ck={} shadowable_tombstone={}", dht::decorate_key(*s, key), ck, shadowable); + m.partition().clustered_row(*s, c_key).apply(shadowable); + return m; + }; - { - auto mut1 = make_insert(alpha); - auto mut2 = make_delete(alpha); - auto mut3 = make_insert(beta); - auto mut4 = make_insert(beta); + auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")}); - auto sst1 = make_sstable_containing(sst_gen, {mut1, mut2, mut3}); - auto sst2 = make_sstable_containing(sst_gen, {mut4}); + { + // Simulate materialized views update + // We expect mut2 to delete mut1, and be purged + // Since the shadowable tombstone in mut4 is ignored as it is dead + // and insert in mut5 has higher timestamp. + // This will leave only the insert in mut3 when compacting mut1-3 together. + // + // cql commands that reproduce the following mutation: + // create keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}; + // use ks; + // create table base_table (id text primary key, ck int, value int); + // create materialized view mv as select id, ck, value from base_table where id is not null and ck is not null primary key (id, ck); + // + // insert into base_table (id, ck, value) values ('alpha', 1, 1) using timestamp 1; + auto mut1 = make_insert(alpha, 1, 1, api::timestamp_type(1), api::timestamp_type(1)); + // insert into base_table (id, ck) values ('alpha', 2) using timestamp 2; + auto mut2 = make_delete_row(alpha, 1, gc_clock::now(), api::timestamp_type(1)); + auto mut3 = make_insert(alpha, 2, 1, api::timestamp_type(1), api::timestamp_type(2)); + // insert into base_table (id, ck) values ('alpha', 3) using timestamp 3; + auto mut4 = make_delete_row(alpha, 2, gc_clock::now(), api::timestamp_type(2)); + auto mut5 = make_insert(alpha, 3, 1, api::timestamp_type(1), api::timestamp_type(3)); - forward_jump_clocks(std::chrono::seconds(ttl)); + auto sst1 = make_sstable_containing(sst_gen, {mut1}); + auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); + auto sst3 = make_sstable_containing(sst_gen, {mut4, mut5}); - auto result = compact({sst1, sst2}, {sst1}); - BOOST_REQUIRE_EQUAL(1, result.size()); + forward_jump_clocks(std::chrono::seconds(1)); - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut3) - .produces_end_of_stream(); - } - - { - // check that expired cell will not be purged if it will resurrect overwritten data. - auto mut1 = make_insert(alpha); - auto mut2 = make_expiring(alpha, ttl); - - auto sst1 = make_sstable_containing(sst_gen, {mut1}); - auto sst2 = make_sstable_containing(sst_gen, {mut2}); - - forward_jump_clocks(std::chrono::seconds(ttl)); - - auto result = compact({sst1, sst2}, {sst2}); - BOOST_REQUIRE_EQUAL(1, result.size()); - assert_that_produces_dead_cell(result[0], alpha); - - result = compact({sst1, sst2}, {sst1, sst2}); - BOOST_REQUIRE_EQUAL(0, result.size()); - } - { - auto mut1 = make_insert(alpha); - auto mut2 = make_expiring(beta, ttl); - - auto sst1 = make_sstable_containing(sst_gen, {mut1}); - auto sst2 = make_sstable_containing(sst_gen, {mut2}); - - forward_jump_clocks(std::chrono::seconds(ttl)); - - auto result = compact({sst1, sst2}, {sst2}); - BOOST_REQUIRE_EQUAL(0, result.size()); - } - { - auto mut1 = make_insert(alpha); - auto mut2 = make_expiring(alpha, ttl); - auto mut3 = make_insert(beta); - - auto sst1 = make_sstable_containing(sst_gen, {mut1}); - auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); - - forward_jump_clocks(std::chrono::seconds(ttl)); - - auto result = compact({sst1, sst2}, {sst1, sst2}); - BOOST_REQUIRE_EQUAL(1, result.size()); - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut3) - .produces_end_of_stream(); - } - { - // We use int32_t for representing a timestamp in seconds since the - // UNIX epoch. This timestamp "local_deletion_time" (ldt for short) - // notes the time at which a tombstone was created. It is used for - // purging the tombstone after gc_grace_seconds. - // - // If ldt is greater than INT32_MAX (2147483647), then it cannot be - // represented using a int32_t. probably more importantly, it - // represents a time point too far in the future -- after 19 Jan 2028. - // so the tombstone would practically live with us forever. so, the - // sstable writer just caps it to INT32_MAX - 1 when reading a - // tombstone with a TTL after this timepoint. and we also consider - // it as the indication of a problem and report it using the metrics - // named "scylla_sstables_capped_tombstone_deletion_time" which - // notes the total number of tombstones whose deletion_time breaches - // the limit. - // - // This test verifies that the metrics reflecting the number of - // tombstones with far-into-the-future ldts by inserting tombstones - // with a ldt greater than the date. - auto deletion_time = gc_clock::from_time_t(sstables::max_deletion_time + 1); - auto sst1 = make_sstable_containing(sst_gen, - {make_insert(alpha), - make_delete(alpha, deletion_time)}, - validate::no); - auto result = compact({sst1}, {sst1}); - BOOST_CHECK_EQUAL(1, sstables_stats::get_shard_stats().capped_tombstone_deletion_time); - } - { - // Verify that old live data inhibit tombstone_gc of partition tombstone - auto mut1 = make_insert(alpha); - auto mut2 = make_delete(alpha); - auto mut3 = make_insert(beta); - - auto sst1 = make_sstable_containing(sst_gen, {mut1}); - auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); - - forward_jump_clocks(std::chrono::seconds(1)); - - auto result = compact({sst1, sst2}, {sst2}); - BOOST_REQUIRE_EQUAL(1, result.size()); - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut3) - .produces(mut2) - .produces_end_of_stream(); - } - { - // Verify that old deleted data do not inhibit tombstone_gc of partition tombstone - auto mut1 = make_delete(alpha); - auto mut2 = make_delete(alpha); - auto mut3 = make_insert(beta); - - auto sst1 = make_sstable_containing(sst_gen, {mut1}); - auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); - - forward_jump_clocks(std::chrono::seconds(1)); - - auto result = compact({sst1, sst2}, {sst2}); - BOOST_REQUIRE_EQUAL(1, result.size()); - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut3) - .produces_end_of_stream(); - } - { - // Verify that old live data inhibit tombstone_gc of expired cell - auto mut1 = make_insert(alpha); - auto mut2 = make_expiring(alpha, ttl); - - auto sst1 = make_sstable_containing(sst_gen, {mut1}); - auto sst2 = make_sstable_containing(sst_gen, {mut2}); - - forward_jump_clocks(std::chrono::seconds(ttl)); - - auto result = compact({sst1, sst2}, {sst2}); - BOOST_REQUIRE_EQUAL(1, result.size()); - assert_that_produces_dead_cell(result[0], alpha); - } - { - // Verify that old deleted data do not inhibit tombstone_gc of expired cell - auto mut1 = make_delete(alpha); - auto mut2 = make_expiring(alpha, ttl); - - auto sst1 = make_sstable_containing(sst_gen, {mut1}); - auto sst2 = make_sstable_containing(sst_gen, {mut2}); - - forward_jump_clocks(std::chrono::seconds(ttl)); - - auto result = compact({sst1, sst2}, {sst2}); - BOOST_REQUIRE_EQUAL(0, result.size()); - } - }); + auto result = compact({sst1, sst2, sst3}, {sst1, sst2}); + BOOST_REQUIRE_EQUAL(1, result.size()); + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut3) + .produces_end_of_stream(); + } + return make_ready_future(); } SEASTAR_TEST_CASE(mv_tombstone_purge_test) { - BOOST_REQUIRE(smp::count == 1); - return test_env::do_with_async([] (test_env& env) { - // In a column family with gc_grace_seconds set to 0, check that a tombstone - // is purged after compaction. - auto builder = schema_builder("tests", "tombstone_purge") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("ck", int32_type, column_kind::clustering_key) - .with_column("value", int32_type); - builder.set_gc_grace_seconds(0); - auto s = builder.build(); - - auto sst_gen = env.make_sst_factory(s); - - auto compact = [&, s] (std::vector all, std::vector to_compact) -> std::vector { - auto cf = env.make_table_for_tests(s); - auto stop_cf = deferred_stop(cf); - for (auto&& sst : all) { - column_family_test(cf).add_sstable(sst).get(); - } - return compact_sstables(env, compaction::compaction_descriptor(to_compact), cf, sst_gen).get().new_sstables; - }; - - auto next_timestamp = [] { - static thread_local api::timestamp_type next = 1; - return next++; - }; - - auto make_insert = [&] (partition_key key, int32_t ck = 0, int32_t value = 1, std::optional timestamp = std::nullopt, std::optional created_at = std::nullopt) { - mutation m(s, key); - if (!timestamp) { - created_at = timestamp = next_timestamp(); - } else if (!created_at) { - created_at = next_timestamp(); - } - auto c_key = clustering_key::from_single_value(*s, int32_type->decompose(data_value(ck))); - testlog.info("make_insert: key={} ck={} timestamp={} created_at={}", dht::decorate_key(*s, key), ck, *timestamp, *created_at); - m.set_clustered_cell( - c_key, - bytes("value"), - data_value(value), - *timestamp); - m.partition().clustered_row(*s, c_key).apply(row_marker(*created_at)); - return m; - }; - - auto make_delete_row = [&] (partition_key key, int32_t ck = 0, gc_clock::time_point deletion_time = gc_clock::now(), std::optional deleted_at = std::nullopt) { - mutation m(s, key); - if (!deleted_at) { - deleted_at = next_timestamp(); - } - shadowable_tombstone shadowable(*deleted_at, deletion_time); - auto c_key = clustering_key::from_single_value(*s, int32_type->decompose(data_value(ck))); - testlog.info("make_delete_row: key={} ck={} shadowable_tombstone={}", dht::decorate_key(*s, key), ck, shadowable); - m.partition().clustered_row(*s, c_key).apply(shadowable); - return m; - }; - - auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")}); - - { - // Simulate materialized views update - // We expect mut2 to delete mut1, and be purged - // Since the shadowable tombstone in mut4 is ignored as it is dead - // and insert in mut5 has higher timestamp. - // This will leave only the insert in mut3 when compacting mut1-3 together. - // - // cql commands that reproduce the following mutation: - // create keyspace ks with replication = {'class': 'NetworkTopologyStrategy', 'replication_factor': 1}; - // use ks; - // create table base_table (id text primary key, ck int, value int); - // create materialized view mv as select id, ck, value from base_table where id is not null and ck is not null primary key (id, ck); - // - // insert into base_table (id, ck, value) values ('alpha', 1, 1) using timestamp 1; - auto mut1 = make_insert(alpha, 1, 1, api::timestamp_type(1), api::timestamp_type(1)); - // insert into base_table (id, ck) values ('alpha', 2) using timestamp 2; - auto mut2 = make_delete_row(alpha, 1, gc_clock::now(), api::timestamp_type(1)); - auto mut3 = make_insert(alpha, 2, 1, api::timestamp_type(1), api::timestamp_type(2)); - // insert into base_table (id, ck) values ('alpha', 3) using timestamp 3; - auto mut4 = make_delete_row(alpha, 2, gc_clock::now(), api::timestamp_type(2)); - auto mut5 = make_insert(alpha, 3, 1, api::timestamp_type(1), api::timestamp_type(3)); - - auto sst1 = make_sstable_containing(sst_gen, {mut1}); - auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); - auto sst3 = make_sstable_containing(sst_gen, {mut4, mut5}); - - forward_jump_clocks(std::chrono::seconds(1)); - - auto result = compact({sst1, sst2, sst3}, {sst1, sst2}); - BOOST_REQUIRE_EQUAL(1, result.size()); - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut3) - .produces_end_of_stream(); - } - }); + return test_env::do_with_async([](test_env& env) { mv_tombstone_purge(env).get(); }); } -SEASTAR_TEST_CASE(sstable_rewrite) { +SEASTAR_TEST_CASE(mv_tombstone_purge_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { mv_tombstone_purge(env).get(); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(mv_tombstone_purge_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { mv_tombstone_purge(env).get(); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +future<> sstable_rewrite(test_env& env) { BOOST_REQUIRE(smp::count == 1); - return test_env::do_with_async([] (test_env& env) { - auto s = schema_builder(some_keyspace, some_column_family) - .with_column("p1", utf8_type, column_kind::partition_key) - .with_column("c1", utf8_type, column_kind::clustering_key) - .with_column("r1", utf8_type) - .build(); - auto sst_gen = env.make_sst_factory(s); + auto s = schema_builder(some_keyspace, some_column_family) + .with_column("p1", utf8_type, column_kind::partition_key) + .with_column("c1", utf8_type, column_kind::clustering_key) + .with_column("r1", utf8_type) + .build(); + auto sst_gen = env.make_sst_factory(s); - const column_definition& r1_col = *s->get_column_definition("r1"); + const column_definition& r1_col = *s->get_column_definition("r1"); - auto key_for_this_shard = tests::generate_partition_keys(1, s); - auto c_key = clustering_key::from_exploded(*s, {to_bytes("c1")}); - mutation mut(s, key_for_this_shard[0]); - mut.set_clustered_cell(c_key, r1_col, make_atomic_cell(utf8_type, bytes("a"))); + auto key_for_this_shard = tests::generate_partition_keys(1, s); + auto c_key = clustering_key::from_exploded(*s, {to_bytes("c1")}); + mutation mut(s, key_for_this_shard[0]); + mut.set_clustered_cell(c_key, r1_col, make_atomic_cell(utf8_type, bytes("a"))); - auto sstp = make_sstable_containing(sst_gen, {std::move(mut)}); - auto key = key_for_this_shard[0]; - std::vector new_tables; - auto creator = [&] { - auto sst = sst_gen(); - new_tables.emplace_back(sst); - return sst; - }; - auto cf = env.make_table_for_tests(s); - auto stop_cf = deferred_stop(cf); - std::vector sstables; - sstables.push_back(std::move(sstp)); + auto sstp = make_sstable_containing(sst_gen, {std::move(mut)}); + auto key = key_for_this_shard[0]; + std::vector new_tables; + auto creator = [&] { + auto sst = sst_gen(); + new_tables.emplace_back(sst); + return sst; + }; + auto cf = env.make_table_for_tests(s); + auto stop_cf = deferred_stop(cf); + std::vector sstables; + sstables.push_back(std::move(sstp)); - compact_sstables(env, compaction::compaction_descriptor(std::move(sstables)), cf, creator).get(); - BOOST_REQUIRE(new_tables.size() == 1); - auto newsst = new_tables[0]; - auto reader = sstable_reader(newsst, s, env.make_reader_permit()); - auto close_reader = deferred_close(reader); - auto m = reader().get(); - BOOST_REQUIRE(m); - BOOST_REQUIRE(m->is_partition_start()); - BOOST_REQUIRE(m->as_partition_start().key().equal(*s, key)); - reader.next_partition().get(); - m = reader().get(); - BOOST_REQUIRE(!m); - }); + compact_sstables(env, compaction::compaction_descriptor(std::move(sstables)), cf, creator).get(); + BOOST_REQUIRE(new_tables.size() == 1); + auto newsst = new_tables[0]; + auto reader = sstable_reader(newsst, s, env.make_reader_permit()); + auto close_reader = deferred_close(reader); + auto m = reader().get(); + BOOST_REQUIRE(m); + BOOST_REQUIRE(m->is_partition_start()); + BOOST_REQUIRE(m->as_partition_start().key().equal(*s, key)); + reader.next_partition().get(); + m = reader().get(); + BOOST_REQUIRE(!m); + return make_ready_future(); +} + +SEASTAR_TEST_CASE(sstable_rewrite_test) { + return test_env::do_with_async([](test_env& env) { sstable_rewrite(env).get(); }); +} + +SEASTAR_TEST_CASE(sstable_rewrite_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { sstable_rewrite(env).get(); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(sstable_rewrite_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { sstable_rewrite(env).get(); }, test_env_config{.storage = make_test_object_storage_options("GS")}); } -SEASTAR_TEST_CASE(test_sstable_max_local_deletion_time_2) { +future<> sstable_max_local_deletion_time_2(test_env& env) { // Create sstable A with 5x column with TTL 100 and 1x column with TTL 1000 // Create sstable B with tombstone for column in sstable A with TTL 1000. // Compact them and expect that maximum deletion time is that of column with TTL 100. - return test_env::do_with_async([] (test_env& env) { - for (auto version : writable_sstable_versions) { - schema_builder builder(some_keyspace, some_column_family); - builder.with_column("p1", utf8_type, column_kind::partition_key); - builder.with_column("c1", utf8_type, column_kind::clustering_key); - builder.with_column("r1", utf8_type); - schema_ptr s = builder.build(schema_builder::compact_storage::no); - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - auto sst_gen = env.make_sst_factory(s, version); - auto mt = make_lw_shared(s); - auto now = gc_clock::now(); - int32_t last_expiry = 0; - auto add_row = [&now, &mt, &s, &last_expiry](mutation &m, bytes column_name, uint32_t ttl) { - auto c_key = clustering_key::from_exploded(*s, {column_name}); - last_expiry = (now + gc_clock::duration(ttl)).time_since_epoch().count(); - m.set_clustered_cell(c_key, *s->get_column_definition("r1"), - make_atomic_cell(utf8_type, bytes(""), ttl, last_expiry)); - mt->apply(std::move(m)); - }; + for (auto version : writable_sstable_versions) { + schema_builder builder(some_keyspace, some_column_family); + builder.with_column("p1", utf8_type, column_kind::partition_key); + builder.with_column("c1", utf8_type, column_kind::clustering_key); + builder.with_column("r1", utf8_type); + schema_ptr s = builder.build(schema_builder::compact_storage::no); + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + auto sst_gen = env.make_sst_factory(s, version); + auto mt = make_lw_shared(s); + auto now = gc_clock::now(); + int32_t last_expiry = 0; + auto add_row = [&now, &mt, &s, &last_expiry](mutation &m, bytes column_name, uint32_t ttl) { + auto c_key = clustering_key::from_exploded(*s, {column_name}); + last_expiry = (now + gc_clock::duration(ttl)).time_since_epoch().count(); + m.set_clustered_cell(c_key, *s->get_column_definition("r1"), + make_atomic_cell(utf8_type, bytes(""), ttl, last_expiry)); + mt->apply(std::move(m)); + }; - mutation m(s, partition_key::from_exploded(*s, {to_bytes("deletetest")})); - for (auto i = 0; i < 5; i++) { - add_row(m, to_bytes("deletecolumn" + to_sstring(i)), 100); - } - add_row(m, to_bytes("todelete"), 1000); - auto sst1 = make_sstable_containing(sst_gen, mt); - BOOST_REQUIRE(last_expiry == sst1->get_stats_metadata().max_local_deletion_time); + mutation m(s, partition_key::from_exploded(*s, {to_bytes("deletetest")})); + for (auto i = 0; i < 5; i++) { + add_row(m, to_bytes("deletecolumn" + to_sstring(i)), 100); + } + add_row(m, to_bytes("todelete"), 1000); + auto sst1 = make_sstable_containing(sst_gen, mt); + BOOST_REQUIRE(last_expiry == sst1->get_stats_metadata().max_local_deletion_time); - mt = make_lw_shared(s); - m = mutation(s, partition_key::from_exploded(*s, {to_bytes("deletetest")})); - tombstone tomb(api::new_timestamp(), now); - m.partition().apply_delete(*s, clustering_key::from_exploded(*s, {to_bytes("todelete")}), tomb); - mt->apply(std::move(m)); - auto sst2 = make_sstable_containing(sst_gen, mt); - BOOST_REQUIRE(now.time_since_epoch().count() == sst2->get_stats_metadata().max_local_deletion_time); + mt = make_lw_shared(s); + m = mutation(s, partition_key::from_exploded(*s, {to_bytes("deletetest")})); + tombstone tomb(api::new_timestamp(), now); + m.partition().apply_delete(*s, clustering_key::from_exploded(*s, {to_bytes("todelete")}), tomb); + mt->apply(std::move(m)); + auto sst2 = make_sstable_containing(sst_gen, mt); + BOOST_REQUIRE(now.time_since_epoch().count() == sst2->get_stats_metadata().max_local_deletion_time); - auto creator = sst_gen; - auto info = compact_sstables(env, compaction::compaction_descriptor({sst1, sst2}), cf, creator).get(); - BOOST_REQUIRE(info.new_sstables.size() == 1); - BOOST_REQUIRE(((now + gc_clock::duration(100)).time_since_epoch().count()) == - info.new_sstables.front()->get_stats_metadata().max_local_deletion_time); - } - }); + auto creator = sst_gen; + auto info = compact_sstables(env, compaction::compaction_descriptor({sst1, sst2}), cf, creator).get(); + BOOST_REQUIRE(info.new_sstables.size() == 1); + BOOST_REQUIRE(((now + gc_clock::duration(100)).time_since_epoch().count()) == + info.new_sstables.front()->get_stats_metadata().max_local_deletion_time); + } + return make_ready_future(); +} + +SEASTAR_TEST_CASE(test_sstable_max_local_deletion_time_2) { + return test_env::do_with_async([](test_env& env) { sstable_max_local_deletion_time_2(env).get(); }); +} + +SEASTAR_TEST_CASE(test_sstable_max_local_deletion_time_2_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { sstable_max_local_deletion_time_2(env).get(); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(test_sstable_max_local_deletion_time_2_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { sstable_max_local_deletion_time_2(env).get(); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); } static stats_metadata build_stats(int64_t min_timestamp, int64_t max_timestamp, int32_t max_local_deletion_time) { @@ -1574,8 +1886,7 @@ static stats_metadata build_stats(int64_t min_timestamp, int64_t max_timestamp, return stats; } -SEASTAR_TEST_CASE(get_fully_expired_sstables_test) { - return test_env::do_with_async([] (test_env& env) { +void get_fully_expired_sstables_fn(test_env& env) { const auto keys = tests::generate_partition_keys(4, table_for_tests::make_default_schema()); const auto& min_key = keys.front(); const auto& max_key = keys.back(); @@ -1586,8 +1897,9 @@ SEASTAR_TEST_CASE(get_fully_expired_sstables_test) { auto t3 = gc_clock::from_time_t(20).time_since_epoch().count(); auto t4 = gc_clock::from_time_t(30).time_since_epoch().count(); + auto schema = table_for_tests::make_default_schema(); { - auto cf = env.make_table_for_tests(); + auto cf = env.make_table_for_tests(schema); auto close_cf = deferred_stop(cf); auto sst1 = add_sstable_for_overlapping_test(env, cf, min_key.key(), keys[1].key(), build_stats(t0, t1, t1)); @@ -1599,7 +1911,7 @@ SEASTAR_TEST_CASE(get_fully_expired_sstables_test) { } { - auto cf = env.make_table_for_tests(); + auto cf = env.make_table_for_tests(schema); auto close_cf = deferred_stop(cf); auto sst1 = add_sstable_for_overlapping_test(env, cf, min_key.key(), keys[1].key(), build_stats(t0, t1, t1)); @@ -1611,42 +1923,67 @@ SEASTAR_TEST_CASE(get_fully_expired_sstables_test) { auto expired_sst = *expired.begin(); BOOST_REQUIRE(expired_sst == sst1); } - }); +} + +SEASTAR_TEST_CASE(get_fully_expired_sstables_test) { + return test_env::do_with_async([](test_env& env) { get_fully_expired_sstables_fn(env); }); +} + +SEASTAR_TEST_CASE(get_fully_expired_sstables_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { get_fully_expired_sstables_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(get_fully_expired_sstables_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { get_fully_expired_sstables_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void compaction_with_fully_expired_table_fn(test_env& env) { + auto builder = schema_builder("la", "cf") + .with_column("pk", utf8_type, column_kind::partition_key) + .with_column("ck1", utf8_type, column_kind::clustering_key) + .with_column("r1", int32_type); + + builder.set_gc_grace_seconds(0); + auto s = builder.build(); + + auto key = partition_key::from_exploded(*s, {to_bytes("key1")}); + auto c_key = clustering_key_prefix::from_exploded(*s, {to_bytes("c1")}); + auto sst_gen = env.make_sst_factory(s); + + mutation m(s, key); + tombstone tomb(api::new_timestamp(), gc_clock::now() - std::chrono::seconds(3600)); + m.partition().apply_delete(*s, c_key, tomb); + auto sst = make_sstable_containing(sst_gen, {std::move(m)}); + + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + + auto ssts = std::vector{ sst }; + + auto expired = get_fully_expired_sstables(cf.as_compaction_group_view(), ssts, gc_clock::now()); + BOOST_REQUIRE(expired.size() == 1); + auto expired_sst = *expired.begin(); + BOOST_REQUIRE(expired_sst == sst); + + auto ret = compact_sstables(env, compaction::compaction_descriptor(ssts), cf, sst_gen).get(); + BOOST_REQUIRE(ret.new_sstables.empty()); + BOOST_REQUIRE(ret.stats.end_size == 0); } SEASTAR_TEST_CASE(compaction_with_fully_expired_table) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("la", "cf") - .with_column("pk", utf8_type, column_kind::partition_key) - .with_column("ck1", utf8_type, column_kind::clustering_key) - .with_column("r1", int32_type); + return test_env::do_with_async([](test_env& env) { compaction_with_fully_expired_table_fn(env); }); +} - builder.set_gc_grace_seconds(0); - auto s = builder.build(); +SEASTAR_TEST_CASE(compaction_with_fully_expired_table_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { compaction_with_fully_expired_table_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto key = partition_key::from_exploded(*s, {to_bytes("key1")}); - auto c_key = clustering_key_prefix::from_exploded(*s, {to_bytes("c1")}); - auto sst_gen = env.make_sst_factory(s); - - mutation m(s, key); - tombstone tomb(api::new_timestamp(), gc_clock::now() - std::chrono::seconds(3600)); - m.partition().apply_delete(*s, c_key, tomb); - auto sst = make_sstable_containing(sst_gen, {std::move(m)}); - - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - - auto ssts = std::vector{ sst }; - - auto expired = get_fully_expired_sstables(cf.as_compaction_group_view(), ssts, gc_clock::now()); - BOOST_REQUIRE(expired.size() == 1); - auto expired_sst = *expired.begin(); - BOOST_REQUIRE(expired_sst == sst); - - auto ret = compact_sstables(env, compaction::compaction_descriptor(ssts), cf, sst_gen).get(); - BOOST_REQUIRE(ret.new_sstables.empty()); - BOOST_REQUIRE(ret.stats.end_size == 0); - }); +SEASTAR_FIXTURE_TEST_CASE(compaction_with_fully_expired_table_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { compaction_with_fully_expired_table_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); } SEASTAR_TEST_CASE(time_window_strategy_time_window_tests) { @@ -1672,8 +2009,7 @@ SEASTAR_TEST_CASE(time_window_strategy_time_window_tests) { return make_ready_future<>(); } -SEASTAR_TEST_CASE(time_window_strategy_ts_resolution_check) { - return test_env::do_with_async([] (test_env& env) { +void time_window_strategy_ts_resolution_check_fn(test_env& env) { auto ts = 1451001601000L; // 2015-12-25 @ 00:00:01, in milliseconds auto ts_in_ms = std::chrono::milliseconds(ts); auto ts_in_us = std::chrono::duration_cast(ts_in_ms); @@ -1709,190 +2045,227 @@ SEASTAR_TEST_CASE(time_window_strategy_ts_resolution_check) { BOOST_REQUIRE(ret.second == expected); } - }); +} + +SEASTAR_TEST_CASE(time_window_strategy_ts_resolution_check) { + return test_env::do_with_async([](test_env& env) { time_window_strategy_ts_resolution_check_fn(env); }); +} + +SEASTAR_TEST_CASE(time_window_strategy_ts_resolution_check_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { time_window_strategy_ts_resolution_check_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(time_window_strategy_ts_resolution_check_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { time_window_strategy_ts_resolution_check_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void time_window_strategy_correctness_fn(test_env& env) { + using namespace std::chrono; + auto builder = schema_builder("tests", "time_window_strategy") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); + auto s = builder.build(); + + auto make_insert = [&] (partition_key key, api::timestamp_type t) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), t); + return m; + }; + + api::timestamp_type tstamp = api::timestamp_clock::now().time_since_epoch().count(); + api::timestamp_type tstamp2 = tstamp - duration_cast(seconds(2L * 3600L)).count(); + + std::vector sstables; + + // create 5 sstables + for (api::timestamp_type t = 0; t < 3; t++) { + auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(t))}); + auto mut = make_insert(std::move(key), t); + sstables.push_back(make_sstable_containing(env.make_sstable(s), {std::move(mut)})); + } + // Decrement the timestamp to simulate a timestamp in the past hour + for (api::timestamp_type t = 3; t < 5; t++) { + // And add progressively more cells into each sstable + auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(t))}); + auto mut = make_insert(std::move(key), t); + sstables.push_back(make_sstable_containing(env.make_sstable(s), {std::move(mut)})); + } + + std::map options; + compaction::time_window_compaction_strategy twcs(options); + std::map> buckets; + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + auto control = make_strategy_control_for_test(false); + + // We'll put 3 sstables into the newest bucket + for (api::timestamp_type i = 0; i < 3; i++) { + auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), tstamp); + buckets[bound].push_back(sstables[i]); + } + + auto state = cf.as_compaction_group_view().get_compaction_strategy_state().get(); + auto now = api::timestamp_clock::now().time_since_epoch().count(); + auto new_bucket = twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, 4, 32, + compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), now), *state); + // incoming bucket should not be accepted when it has below the min threshold SSTables + BOOST_REQUIRE(new_bucket.empty()); + + now = api::timestamp_clock::now().time_since_epoch().count(); + new_bucket = twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, 2, 32, + compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), now), *state); + // incoming bucket should be accepted when it is larger than the min threshold SSTables + BOOST_REQUIRE(!new_bucket.empty()); + + // And 2 into the second bucket (1 hour back) + for (api::timestamp_type i = 3; i < 5; i++) { + auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), tstamp2); + buckets[bound].push_back(sstables[i]); + } + + // "an sstable with a single value should have equal min/max timestamps" + for (auto& sst : sstables) { + BOOST_REQUIRE(sst->get_stats_metadata().min_timestamp == sst->get_stats_metadata().max_timestamp); + } + + // Test trim + auto num_sstables = 40; + for (int r = 5; r < num_sstables; r++) { + auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(r))}); + utils::chunked_vector mutations; + for (int i = 0 ; i < r ; i++) { + mutations.push_back(make_insert(key, tstamp + r)); + } + sstables.push_back(make_sstable_containing(env.make_sstable(s), std::move(mutations))); + } + + // Reset the buckets, overfill it now + for (int i = 0 ; i < 40; i++) { + auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), + sstables[i]->get_stats_metadata().max_timestamp); + buckets[bound].push_back(sstables[i]); + } + + now = api::timestamp_clock::now().time_since_epoch().count(); + new_bucket = twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, 4, 32, + compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), now), *state); + // new bucket should be trimmed to max threshold of 32 + BOOST_REQUIRE(new_bucket.size() == size_t(32)); } SEASTAR_TEST_CASE(time_window_strategy_correctness_test) { - using namespace std::chrono; + return test_env::do_with_async([](test_env& env) { time_window_strategy_correctness_fn(env); }); +} - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "time_window_strategy") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); - auto s = builder.build(); +SEASTAR_TEST_CASE(time_window_strategy_correctness_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { time_window_strategy_correctness_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto make_insert = [&] (partition_key key, api::timestamp_type t) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), t); - return m; - }; - - api::timestamp_type tstamp = api::timestamp_clock::now().time_since_epoch().count(); - api::timestamp_type tstamp2 = tstamp - duration_cast(seconds(2L * 3600L)).count(); - - std::vector sstables; - - // create 5 sstables - for (api::timestamp_type t = 0; t < 3; t++) { - auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(t))}); - auto mut = make_insert(std::move(key), t); - sstables.push_back(make_sstable_containing(env.make_sstable(s), {std::move(mut)})); - } - // Decrement the timestamp to simulate a timestamp in the past hour - for (api::timestamp_type t = 3; t < 5; t++) { - // And add progressively more cells into each sstable - auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(t))}); - auto mut = make_insert(std::move(key), t); - sstables.push_back(make_sstable_containing(env.make_sstable(s), {std::move(mut)})); - } - - std::map options; - compaction::time_window_compaction_strategy twcs(options); - std::map> buckets; - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - auto control = make_strategy_control_for_test(false); - - // We'll put 3 sstables into the newest bucket - for (api::timestamp_type i = 0; i < 3; i++) { - auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), tstamp); - buckets[bound].push_back(sstables[i]); - } - - auto state = cf.as_compaction_group_view().get_compaction_strategy_state().get(); - auto now = api::timestamp_clock::now().time_since_epoch().count(); - auto new_bucket = twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, 4, 32, - compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), now), *state); - // incoming bucket should not be accepted when it has below the min threshold SSTables - BOOST_REQUIRE(new_bucket.empty()); - - now = api::timestamp_clock::now().time_since_epoch().count(); - new_bucket = twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, 2, 32, - compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), now), *state); - // incoming bucket should be accepted when it is larger than the min threshold SSTables - BOOST_REQUIRE(!new_bucket.empty()); - - // And 2 into the second bucket (1 hour back) - for (api::timestamp_type i = 3; i < 5; i++) { - auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), tstamp2); - buckets[bound].push_back(sstables[i]); - } - - // "an sstable with a single value should have equal min/max timestamps" - for (auto& sst : sstables) { - BOOST_REQUIRE(sst->get_stats_metadata().min_timestamp == sst->get_stats_metadata().max_timestamp); - } - - // Test trim - auto num_sstables = 40; - for (int r = 5; r < num_sstables; r++) { - auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(r))}); - utils::chunked_vector mutations; - for (int i = 0 ; i < r ; i++) { - mutations.push_back(make_insert(key, tstamp + r)); - } - sstables.push_back(make_sstable_containing(env.make_sstable(s), std::move(mutations))); - } - - // Reset the buckets, overfill it now - for (int i = 0 ; i < 40; i++) { - auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), - sstables[i]->get_stats_metadata().max_timestamp); - buckets[bound].push_back(sstables[i]); - } - - now = api::timestamp_clock::now().time_since_epoch().count(); - new_bucket = twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, 4, 32, - compaction::time_window_compaction_strategy::get_window_lower_bound(duration_cast(hours(1)), now), *state); - // new bucket should be trimmed to max threshold of 32 - BOOST_REQUIRE(new_bucket.size() == size_t(32)); - }); +SEASTAR_FIXTURE_TEST_CASE(time_window_strategy_correctness_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { time_window_strategy_correctness_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); } // Check that TWCS will only perform size-tiered on the current window and also // the past windows that were already previously compacted into a single SSTable. -SEASTAR_TEST_CASE(time_window_strategy_size_tiered_behavior_correctness) { +void time_window_strategy_size_tiered_behavior_correctness_fn(test_env& env) { using namespace std::chrono; + auto builder = schema_builder("tests", "time_window_strategy") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); + auto s = builder.build(); - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "time_window_strategy") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); - auto s = builder.build(); + auto sst_gen = env.make_sst_factory(s); - auto sst_gen = env.make_sst_factory(s); + auto make_insert = [&] (partition_key key, api::timestamp_type t) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), t); + return m; + }; - auto make_insert = [&] (partition_key key, api::timestamp_type t) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), t); - return m; - }; + std::map options; + compaction::time_window_compaction_strategy twcs(options); + std::map> buckets; // windows + int min_threshold = 4; + int max_threshold = 32; + auto window_size = duration_cast(hours(1)); - std::map options; - compaction::time_window_compaction_strategy twcs(options); - std::map> buckets; // windows - int min_threshold = 4; - int max_threshold = 32; - auto window_size = duration_cast(hours(1)); + auto add_new_sstable_to_bucket = [&] (api::timestamp_type ts, api::timestamp_type window_ts) { + auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(ts))}); + auto mut = make_insert(std::move(key), ts); + auto sst = make_sstable_containing(sst_gen, {std::move(mut)}); + auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(window_size, window_ts); + buckets[bound].push_back(std::move(sst)); + }; - auto add_new_sstable_to_bucket = [&] (api::timestamp_type ts, api::timestamp_type window_ts) { - auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(ts))}); - auto mut = make_insert(std::move(key), ts); - auto sst = make_sstable_containing(sst_gen, {std::move(mut)}); - auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(window_size, window_ts); - buckets[bound].push_back(std::move(sst)); - }; + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + auto major_compact_bucket = [&] (api::timestamp_type window_ts) { + auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(window_size, window_ts); + auto ret = compact_sstables(env, compaction::compaction_descriptor(std::move(buckets[bound])), cf, sst_gen).get(); + BOOST_REQUIRE(ret.new_sstables.size() == 1); + buckets[bound] = std::move(ret.new_sstables); + }; - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - auto major_compact_bucket = [&] (api::timestamp_type window_ts) { - auto bound = compaction::time_window_compaction_strategy::get_window_lower_bound(window_size, window_ts); - auto ret = compact_sstables(env, compaction::compaction_descriptor(std::move(buckets[bound])), cf, sst_gen).get(); - BOOST_REQUIRE(ret.new_sstables.size() == 1); - buckets[bound] = std::move(ret.new_sstables); - }; + api::timestamp_type current_window_ts = api::timestamp_clock::now().time_since_epoch().count(); + api::timestamp_type past_window_ts = current_window_ts - duration_cast(seconds(2L * 3600L)).count(); - api::timestamp_type current_window_ts = api::timestamp_clock::now().time_since_epoch().count(); - api::timestamp_type past_window_ts = current_window_ts - duration_cast(seconds(2L * 3600L)).count(); + // create 1 sstable into past time window and let the strategy know about it + add_new_sstable_to_bucket(0, past_window_ts); - // create 1 sstable into past time window and let the strategy know about it - add_new_sstable_to_bucket(0, past_window_ts); + auto now = compaction::time_window_compaction_strategy::get_window_lower_bound(window_size, past_window_ts); + auto control = make_strategy_control_for_test(false); - auto now = compaction::time_window_compaction_strategy::get_window_lower_bound(window_size, past_window_ts); - auto control = make_strategy_control_for_test(false); + // past window cannot be compacted because it has a single SSTable + auto state = cf.as_compaction_group_view().get_compaction_strategy_state().get(); + BOOST_REQUIRE(twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, min_threshold, max_threshold, now, *state).size() == 0); - // past window cannot be compacted because it has a single SSTable - auto state = cf.as_compaction_group_view().get_compaction_strategy_state().get(); - BOOST_REQUIRE(twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, min_threshold, max_threshold, now, *state).size() == 0); + // create min_threshold-1 sstables into current time window + for (api::timestamp_type t = 0; t < min_threshold - 1; t++) { + add_new_sstable_to_bucket(t, current_window_ts); + } + // add 1 sstable into past window. + add_new_sstable_to_bucket(1, past_window_ts); - // create min_threshold-1 sstables into current time window - for (api::timestamp_type t = 0; t < min_threshold - 1; t++) { - add_new_sstable_to_bucket(t, current_window_ts); - } - // add 1 sstable into past window. - add_new_sstable_to_bucket(1, past_window_ts); + now = compaction::time_window_compaction_strategy::get_window_lower_bound(window_size, current_window_ts); - now = compaction::time_window_compaction_strategy::get_window_lower_bound(window_size, current_window_ts); + // past window can now be compacted into a single SSTable because it was the previous current (active) window. + // current window cannot be compacted because it has less than min_threshold SSTables + BOOST_REQUIRE(twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, min_threshold, max_threshold, now, *state).size() == 2); - // past window can now be compacted into a single SSTable because it was the previous current (active) window. - // current window cannot be compacted because it has less than min_threshold SSTables - BOOST_REQUIRE(twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, min_threshold, max_threshold, now, *state).size() == 2); + major_compact_bucket(past_window_ts); - major_compact_bucket(past_window_ts); + // now past window cannot be compacted again, because it was already compacted into a single SSTable, now it switches to STCS mode. + BOOST_REQUIRE(twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, min_threshold, max_threshold, now, *state).size() == 0); - // now past window cannot be compacted again, because it was already compacted into a single SSTable, now it switches to STCS mode. - BOOST_REQUIRE(twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, min_threshold, max_threshold, now, *state).size() == 0); + // make past window contain more than min_threshold similar-sized SSTables, allowing it to be compacted again. + for (api::timestamp_type t = 1; t < min_threshold; t++) { + add_new_sstable_to_bucket(t, past_window_ts); + } - // make past window contain more than min_threshold similar-sized SSTables, allowing it to be compacted again. - for (api::timestamp_type t = 1; t < min_threshold; t++) { - add_new_sstable_to_bucket(t, past_window_ts); - } + // now past window can be compacted again because it switched to STCS mode and has more than min_threshold SSTables. + BOOST_REQUIRE(twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, min_threshold, max_threshold, now, *state).size() == size_t(min_threshold)); +} - // now past window can be compacted again because it switched to STCS mode and has more than min_threshold SSTables. - BOOST_REQUIRE(twcs.newest_bucket(cf.as_compaction_group_view(), *control, buckets, min_threshold, max_threshold, now, *state).size() == size_t(min_threshold)); - }); +SEASTAR_TEST_CASE(time_window_strategy_size_tiered_behavior_correctness) { + return test_env::do_with_async([](test_env& env) { time_window_strategy_size_tiered_behavior_correctness_fn(env); }); +} + +SEASTAR_TEST_CASE(time_window_strategy_size_tiered_behavior_correctness_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { time_window_strategy_size_tiered_behavior_correctness_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(time_window_strategy_size_tiered_behavior_correctness_gcs, + gcs_fixture, + *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { time_window_strategy_size_tiered_behavior_correctness_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); } static void check_min_max_column_names(const sstable_ptr& sst, std::vector min_components, std::vector max_components) { @@ -1908,54 +2281,67 @@ static void check_min_max_column_names(const sstable_ptr& sst, std::vector(s); - const column_definition &r1_col = *s->get_column_definition("r1"); +future<> min_max_clustering_key_2(test_env& env) { + for (const auto version : writable_sstable_versions) { + auto s = schema_builder("ks", "cf") + .with_column("pk", utf8_type, column_kind::partition_key) + .with_column("ck1", utf8_type, column_kind::clustering_key) + .with_column("r1", int32_type) + .build(); + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + auto sst_gen = env.make_sst_factory(s, version); + auto mt = make_lw_shared(s); + const column_definition &r1_col = *s->get_column_definition("r1"); - for (auto j = 0; j < 8; j++) { - auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(j))}); - mutation m(s, key); - for (auto i = 100; i < 150; i++) { - auto c_key = clustering_key::from_exploded(*s, {to_bytes(to_sstring(j) + "ck" + to_sstring(i))}); - m.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1))); - } - mt->apply(std::move(m)); - } - auto sst = make_sstable_containing(sst_gen, mt); - check_min_max_column_names(sst, {"0ck100"}, {"7ck149"}); - - mt = make_lw_shared(s); - auto key = partition_key::from_exploded(*s, {to_bytes("key9")}); + for (auto j = 0; j < 8; j++) { + auto key = partition_key::from_exploded(*s, {to_bytes("key" + to_sstring(j))}); mutation m(s, key); - for (auto i = 101; i < 299; i++) { - auto c_key = clustering_key::from_exploded(*s, {to_bytes(to_sstring(9) + "ck" + to_sstring(i))}); + for (auto i = 100; i < 150; i++) { + auto c_key = clustering_key::from_exploded(*s, {to_bytes(to_sstring(j) + "ck" + to_sstring(i))}); m.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1))); } mt->apply(std::move(m)); - auto sst2 = make_sstable_containing(sst_gen, mt); - check_min_max_column_names(sst2, {"9ck101"}, {"9ck298"}); - - auto creator = sst_gen; - auto info = compact_sstables(env, compaction::compaction_descriptor({sst, sst2}), cf, creator).get(); - BOOST_REQUIRE(info.new_sstables.size() == 1); - check_min_max_column_names(info.new_sstables.front(), {"0ck100"}, {"9ck298"}); } - }); + auto sst = make_sstable_containing(sst_gen, mt); + check_min_max_column_names(sst, {"0ck100"}, {"7ck149"}); + + mt = make_lw_shared(s); + auto key = partition_key::from_exploded(*s, {to_bytes("key9")}); + mutation m(s, key); + for (auto i = 101; i < 299; i++) { + auto c_key = clustering_key::from_exploded(*s, {to_bytes(to_sstring(9) + "ck" + to_sstring(i))}); + m.set_clustered_cell(c_key, r1_col, make_atomic_cell(int32_type, int32_type->decompose(1))); + } + mt->apply(std::move(m)); + auto sst2 = make_sstable_containing(sst_gen, mt); + check_min_max_column_names(sst2, {"9ck101"}, {"9ck298"}); + + auto creator = sst_gen; + auto info = compact_sstables(env, compaction::compaction_descriptor({sst, sst2}), cf, creator).get(); + BOOST_REQUIRE(info.new_sstables.size() == 1); + check_min_max_column_names(info.new_sstables.front(), {"0ck100"}, {"9ck298"}); + } + return make_ready_future(); } -SEASTAR_TEST_CASE(size_tiered_beyond_max_threshold_test) { - return test_env::do_with_async([] (test_env& env) { - auto cf = env.make_table_for_tests(); +SEASTAR_TEST_CASE(min_max_clustering_key_test_2_test) { + return test_env::do_with_async([](test_env& env) { min_max_clustering_key_2(env).get(); }, {}); +} + +SEASTAR_TEST_CASE(min_max_clustering_key_test_2_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { min_max_clustering_key_2(env).get(); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(min_max_clustering_key_test_2_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { min_max_clustering_key_2(env).get(); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void size_tiered_beyond_max_threshold_fn(test_env& env) { + auto schema = table_for_tests::make_default_schema(); + auto cf = env.make_table_for_tests(schema); auto stop_cf = deferred_stop(cf); auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, cf.schema()->compaction_strategy_options()); @@ -1972,301 +2358,363 @@ SEASTAR_TEST_CASE(size_tiered_beyond_max_threshold_test) { } auto desc = get_sstables_for_compaction(cs, cf.as_compaction_group_view(), std::move(candidates)).get(); BOOST_REQUIRE(desc.sstables.size() == size_t(max_threshold)); - }); } -SEASTAR_TEST_CASE(sstable_expired_data_ratio) { - return test_env::do_with_async([] (test_env& env) { - auto make_schema = [&] (std::string_view cf, compaction::compaction_strategy_type cst) { - auto builder = schema_builder("tests", cf) - .with_column("p1", utf8_type, column_kind::partition_key) - .with_column("c1", utf8_type, column_kind::clustering_key) - .with_column("r1", utf8_type); - builder.set_compaction_strategy(cst); - return builder.build(); - }; +SEASTAR_TEST_CASE(size_tiered_beyond_max_threshold_test) { + return test_env::do_with_async([](test_env& env) { size_tiered_beyond_max_threshold_fn(env); }); +} - auto stcs_schema = make_schema("stcs", compaction::compaction_strategy_type::size_tiered); - auto stcs_table = env.make_table_for_tests(stcs_schema); - auto close_stcs_table = deferred_stop(stcs_table); - auto sst_gen = env.make_sst_factory(stcs_schema); +SEASTAR_TEST_CASE(size_tiered_beyond_max_threshold_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { size_tiered_beyond_max_threshold_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto mt = make_lw_shared(stcs_schema); +SEASTAR_FIXTURE_TEST_CASE(size_tiered_beyond_max_threshold_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { size_tiered_beyond_max_threshold_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - static constexpr float expired = 0.33; - // we want number of expired keys to be ~ 1.5*sstables::TOMBSTONE_HISTOGRAM_BIN_SIZE so as to - // test ability of histogram to return a good estimation after merging keys. - static int total_keys = std::ceil(sstables::TOMBSTONE_HISTOGRAM_BIN_SIZE/expired)*1.5; +void sstable_expired_data_ratio(test_env& env) { + auto make_schema = [&] (std::string_view cf, compaction::compaction_strategy_type cst) { + auto builder = schema_builder("tests", cf) + .with_column("p1", utf8_type, column_kind::partition_key) + .with_column("c1", utf8_type, column_kind::clustering_key) + .with_column("r1", utf8_type); + builder.set_compaction_strategy(cst); + return builder.build(); + }; - auto insert_key = [&stcs_schema, &mt] (bytes k, uint32_t ttl, uint32_t expiration_time) { - auto key = partition_key::from_exploded(*stcs_schema, {k}); - mutation m(stcs_schema, key); - auto c_key = clustering_key::from_exploded(*stcs_schema, {to_bytes("c1")}); - m.set_clustered_cell(c_key, *stcs_schema->get_column_definition("r1"), make_atomic_cell(utf8_type, bytes("a"), ttl, expiration_time)); - mt->apply(std::move(m)); - }; + auto stcs_schema = make_schema("stcs", compaction::compaction_strategy_type::size_tiered); + auto stcs_table = env.make_table_for_tests(stcs_schema); + auto close_stcs_table = deferred_stop(stcs_table); + auto sst_gen = env.make_sst_factory(stcs_schema); - auto expired_keys = total_keys*expired; - auto now = gc_clock::now(); - for (auto i = 0; i < expired_keys; i++) { - // generate expiration time at different time points or only a few entries would be created in histogram - auto expiration_time = (now - gc_clock::duration(DEFAULT_GC_GRACE_SECONDS*2+i)).time_since_epoch().count(); - insert_key(to_bytes("expired_key" + to_sstring(i)), 1, expiration_time); - } - auto remaining = total_keys-expired_keys; - auto expiration_time = (now + gc_clock::duration(3600)).time_since_epoch().count(); - for (auto i = 0; i < remaining; i++) { - insert_key(to_bytes("key" + to_sstring(i)), 3600, expiration_time); - } - auto sst = make_sstable_containing(sst_gen, mt); - const auto& stats = sst->get_stats_metadata(); - BOOST_REQUIRE(stats.estimated_tombstone_drop_time.bin.size() == sstables::TOMBSTONE_HISTOGRAM_BIN_SIZE); - auto uncompacted_size = sst->data_size(); - // Asserts that two keys are equal to within a positive delta + auto mt = make_lw_shared(stcs_schema); + + static constexpr float expired = 0.33; + // we want number of expired keys to be ~ 1.5*sstables::TOMBSTONE_HISTOGRAM_BIN_SIZE so as to + // test ability of histogram to return a good estimation after merging keys. + static int total_keys = std::ceil(sstables::TOMBSTONE_HISTOGRAM_BIN_SIZE/expired)*1.5; + + auto insert_key = [&stcs_schema, &mt] (bytes k, uint32_t ttl, uint32_t expiration_time) { + auto key = partition_key::from_exploded(*stcs_schema, {k}); + mutation m(stcs_schema, key); + auto c_key = clustering_key::from_exploded(*stcs_schema, {to_bytes("c1")}); + m.set_clustered_cell(c_key, *stcs_schema->get_column_definition("r1"), make_atomic_cell(utf8_type, bytes("a"), ttl, expiration_time)); + mt->apply(std::move(m)); + }; + + auto expired_keys = total_keys*expired; + auto now = gc_clock::now(); + for (auto i = 0; i < expired_keys; i++) { + // generate expiration time at different time points or only a few entries would be created in histogram + auto expiration_time = (now - gc_clock::duration(DEFAULT_GC_GRACE_SECONDS*2+i)).time_since_epoch().count(); + insert_key(to_bytes("expired_key" + to_sstring(i)), 1, expiration_time); + } + auto remaining = total_keys-expired_keys; + auto expiration_time = (now + gc_clock::duration(3600)).time_since_epoch().count(); + for (auto i = 0; i < remaining; i++) { + insert_key(to_bytes("key" + to_sstring(i)), 3600, expiration_time); + } + auto sst = make_sstable_containing(sst_gen, mt); + const auto& stats = sst->get_stats_metadata(); + BOOST_REQUIRE(stats.estimated_tombstone_drop_time.bin.size() == sstables::TOMBSTONE_HISTOGRAM_BIN_SIZE); + auto uncompacted_size = sst->data_size(); + // Asserts that two keys are equal to within a positive delta tombstone_gc_state gc_state = tombstone_gc_state::for_tests(); - BOOST_REQUIRE(std::fabs(sst->estimate_droppable_tombstone_ratio(now, gc_state, stcs_schema) - expired) <= 0.1); - sstable_run run; - BOOST_REQUIRE(run.insert(sst)); - BOOST_REQUIRE(std::fabs(run.estimate_droppable_tombstone_ratio(now, gc_state, stcs_schema) - expired) <= 0.1); + BOOST_REQUIRE(std::fabs(sst->estimate_droppable_tombstone_ratio(now, gc_state, stcs_schema) - expired) <= 0.1); + sstable_run run; + BOOST_REQUIRE(run.insert(sst)); + BOOST_REQUIRE(std::fabs(run.estimate_droppable_tombstone_ratio(now, gc_state, stcs_schema) - expired) <= 0.1); - auto creator = sst_gen; - auto info = compact_sstables(env, compaction::compaction_descriptor({ sst }), stcs_table, creator).get(); - BOOST_REQUIRE(info.new_sstables.size() == 1); - BOOST_REQUIRE(info.new_sstables.front()->estimate_droppable_tombstone_ratio(now, gc_state, stcs_schema) == 0.0f); - BOOST_REQUIRE_CLOSE(info.new_sstables.front()->data_size(), uncompacted_size*(1-expired), 5); + auto creator = sst_gen; + auto info = compact_sstables(env, compaction::compaction_descriptor({ sst }), stcs_table, creator).get(); + BOOST_REQUIRE(info.new_sstables.size() == 1); + BOOST_REQUIRE(info.new_sstables.front()->estimate_droppable_tombstone_ratio(now, gc_state, stcs_schema) == 0.0f); + BOOST_REQUIRE_CLOSE(info.new_sstables.front()->data_size(), uncompacted_size*(1-expired), 5); + std::map options; + options.emplace("tombstone_threshold", "0.3f"); + + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, options); + // that's needed because sstable with expired data should be old enough. + sstables::test(sst).set_data_file_write_time(db_clock::time_point::min()); + auto descriptor = get_sstables_for_compaction(cs, stcs_table.as_compaction_group_view(), { sst }).get(); + BOOST_REQUIRE(descriptor.sstables.size() == 1); + BOOST_REQUIRE(descriptor.sstables.front() == sst); + + // Makes sure that get_sstables_for_compaction() is called with a compaction_group_view which will provide + // the correct LCS state. + auto lcs_table = env.make_table_for_tests(make_schema("lcs", compaction::compaction_strategy_type::leveled)); + auto close_lcs_table = deferred_stop(lcs_table); + cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::leveled, options); + sst->set_sstable_level(1); + descriptor = get_sstables_for_compaction(cs, lcs_table.as_compaction_group_view(), { sst }).get(); + BOOST_REQUIRE(descriptor.sstables.size() == 1); + BOOST_REQUIRE(descriptor.sstables.front() == sst); + // make sure sstable picked for tombstone compaction removal won't be promoted or demoted. + BOOST_REQUIRE(descriptor.sstables.front()->get_sstable_level() == 1U); + + // check tombstone compaction is disabled by default for TWCS + auto twcs_table = env.make_table_for_tests(make_schema("twcs", compaction::compaction_strategy_type::time_window)); + auto close_twcs_table = deferred_stop(twcs_table); + cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, {}); + descriptor = get_sstables_for_compaction(cs, twcs_table.as_compaction_group_view(), { sst }).get(); + BOOST_REQUIRE(descriptor.sstables.size() == 0); + cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, options); + descriptor = get_sstables_for_compaction(cs, twcs_table.as_compaction_group_view(), { sst }).get(); + BOOST_REQUIRE(descriptor.sstables.size() == 1); + BOOST_REQUIRE(descriptor.sstables.front() == sst); + + // sstable with droppable ratio of 0.3 won't be included due to threshold + { std::map options; - options.emplace("tombstone_threshold", "0.3f"); - + options.emplace("tombstone_threshold", "0.5f"); auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, options); - // that's needed because sstable with expired data should be old enough. - sstables::test(sst).set_data_file_write_time(db_clock::time_point::min()); + auto descriptor = get_sstables_for_compaction(cs, stcs_table.as_compaction_group_view(), { sst }).get(); + BOOST_REQUIRE(descriptor.sstables.size() == 0); + } + // sstable which was recently created won't be included due to min interval + { + std::map options; + options.emplace("tombstone_compaction_interval", "3600"); + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, options); + sstables::test(sst).set_data_file_write_time(db_clock::now()); + auto descriptor = get_sstables_for_compaction(cs, stcs_table.as_compaction_group_view(), { sst }).get(); + BOOST_REQUIRE(descriptor.sstables.size() == 0); + } + // sstable which should not be included because of droppable ratio of 0.3, will actually be included + // because the droppable ratio check has been disabled with unchecked_tombstone_compaction set to true + { + std::map options; + options.emplace("tombstone_threshold", "0.5f"); + options.emplace("tombstone_compaction_interval", "3600"); + options.emplace("unchecked_tombstone_compaction", "true"); + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, options); + sstables::test(sst).set_data_file_write_time(db_clock::now() - std::chrono::seconds(7200)); auto descriptor = get_sstables_for_compaction(cs, stcs_table.as_compaction_group_view(), { sst }).get(); BOOST_REQUIRE(descriptor.sstables.size() == 1); - BOOST_REQUIRE(descriptor.sstables.front() == sst); + } +} - // Makes sure that get_sstables_for_compaction() is called with a compaction_group_view which will provide - // the correct LCS state. - auto lcs_table = env.make_table_for_tests(make_schema("lcs", compaction::compaction_strategy_type::leveled)); - auto close_lcs_table = deferred_stop(lcs_table); - cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::leveled, options); - sst->set_sstable_level(1); - descriptor = get_sstables_for_compaction(cs, lcs_table.as_compaction_group_view(), { sst }).get(); - BOOST_REQUIRE(descriptor.sstables.size() == 1); - BOOST_REQUIRE(descriptor.sstables.front() == sst); - // make sure sstable picked for tombstone compaction removal won't be promoted or demoted. - BOOST_REQUIRE(descriptor.sstables.front()->get_sstable_level() == 1U); +SEASTAR_TEST_CASE(sstable_expired_data_ratio_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { sstable_expired_data_ratio(env); }); +} - // check tombstone compaction is disabled by default for TWCS - auto twcs_table = env.make_table_for_tests(make_schema("twcs", compaction::compaction_strategy_type::time_window)); - auto close_twcs_table = deferred_stop(twcs_table); - cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, {}); - descriptor = get_sstables_for_compaction(cs, twcs_table.as_compaction_group_view(), { sst }).get(); - BOOST_REQUIRE(descriptor.sstables.size() == 0); - cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, options); - descriptor = get_sstables_for_compaction(cs, twcs_table.as_compaction_group_view(), { sst }).get(); - BOOST_REQUIRE(descriptor.sstables.size() == 1); - BOOST_REQUIRE(descriptor.sstables.front() == sst); +SEASTAR_TEST_CASE(sstable_expired_data_ratio_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { sstable_expired_data_ratio(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} - // sstable with droppable ratio of 0.3 won't be included due to threshold - { - std::map options; - options.emplace("tombstone_threshold", "0.5f"); - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, options); - auto descriptor = get_sstables_for_compaction(cs, stcs_table.as_compaction_group_view(), { sst }).get(); - BOOST_REQUIRE(descriptor.sstables.size() == 0); - } - // sstable which was recently created won't be included due to min interval - { - std::map options; - options.emplace("tombstone_compaction_interval", "3600"); - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, options); - sstables::test(sst).set_data_file_write_time(db_clock::now()); - auto descriptor = get_sstables_for_compaction(cs, stcs_table.as_compaction_group_view(), { sst }).get(); - BOOST_REQUIRE(descriptor.sstables.size() == 0); - } - // sstable which should not be included because of droppable ratio of 0.3, will actually be included - // because the droppable ratio check has been disabled with unchecked_tombstone_compaction set to true - { - std::map options; - options.emplace("tombstone_threshold", "0.5f"); - options.emplace("tombstone_compaction_interval", "3600"); - options.emplace("unchecked_tombstone_compaction", "true"); - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, options); - sstables::test(sst).set_data_file_write_time(db_clock::now() - std::chrono::seconds(7200)); - auto descriptor = get_sstables_for_compaction(cs, stcs_table.as_compaction_group_view(), { sst }).get(); - BOOST_REQUIRE(descriptor.sstables.size() == 1); - } - }); +SEASTAR_FIXTURE_TEST_CASE(sstable_expired_data_ratio_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { sstable_expired_data_ratio(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void compaction_correctness_with_partitioned_sstable_set_fn(test_env& env) { + auto builder = schema_builder("tests", "tombstone_purge") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + builder.set_gc_grace_seconds(0); + builder.set_compaction_strategy(compaction::compaction_strategy_type::leveled); + auto s = builder.build(); + + auto sst_gen = env.make_sst_factory(s); + + auto compact = [&, s] (std::vector all) -> std::vector { + // NEEDED for partitioned_sstable_set to actually have an effect + std::for_each(all.begin(), all.end(), [] (auto& sst) { sst->set_sstable_level(1); }); + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + return compact_sstables(env, compaction::compaction_descriptor(std::move(all), 0, 0 /*std::numeric_limits::max()*/), + cf, sst_gen).get().new_sstables; + }; + + auto make_insert = [&] (const dht::decorated_key& key) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 1 /* ts */); + return m; + }; + + const auto keys = tests::generate_partition_keys(4, s); + auto mut1 = make_insert(keys[0]); + auto mut2 = make_insert(keys[1]); + auto mut3 = make_insert(keys[2]); + auto mut4 = make_insert(keys[3]); + + { + std::vector sstables = { + make_sstable_containing(sst_gen, {mut1, mut2}), + make_sstable_containing(sst_gen, {mut3, mut4}) + }; + + auto result = compact(std::move(sstables)); + BOOST_REQUIRE_EQUAL(4, result.size()); + + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut1) + .produces_end_of_stream(); + assert_that(sstable_reader(result[1], s, env.make_reader_permit())) + .produces(mut2) + .produces_end_of_stream(); + assert_that(sstable_reader(result[2], s, env.make_reader_permit())) + .produces(mut3) + .produces_end_of_stream(); + assert_that(sstable_reader(result[3], s, env.make_reader_permit())) + .produces(mut4) + .produces_end_of_stream(); + } + + { + // with partitioned_sstable_set having an interval with exclusive lower boundary, example: + // [mut1, mut2] + // (mut2, mut3] + std::vector sstables = { + make_sstable_containing(sst_gen, {mut1, mut2}), + make_sstable_containing(sst_gen, {mut2, mut3}), + make_sstable_containing(sst_gen, {mut3, mut4}) + }; + + auto result = compact(std::move(sstables)); + BOOST_REQUIRE_EQUAL(4, result.size()); + + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut1) + .produces_end_of_stream(); + assert_that(sstable_reader(result[1], s, env.make_reader_permit())) + .produces(mut2) + .produces_end_of_stream(); + assert_that(sstable_reader(result[2], s, env.make_reader_permit())) + .produces(mut3) + .produces_end_of_stream(); + assert_that(sstable_reader(result[3], s, env.make_reader_permit())) + .produces(mut4) + .produces_end_of_stream(); + } + + { + // with gap between tables + std::vector sstables = { + make_sstable_containing(sst_gen, {mut1, mut2}), + make_sstable_containing(sst_gen, {mut4, mut4}) + }; + + auto result = compact(std::move(sstables)); + BOOST_REQUIRE_EQUAL(3, result.size()); + + assert_that(sstable_reader(result[0], s, env.make_reader_permit())) + .produces(mut1) + .produces_end_of_stream(); + assert_that(sstable_reader(result[1], s, env.make_reader_permit())) + .produces(mut2) + .produces_end_of_stream(); + assert_that(sstable_reader(result[2], s, env.make_reader_permit())) + .produces(mut4) + .produces_end_of_stream(); + } } SEASTAR_TEST_CASE(compaction_correctness_with_partitioned_sstable_set) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "tombstone_purge") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - builder.set_gc_grace_seconds(0); - builder.set_compaction_strategy(compaction::compaction_strategy_type::leveled); - auto s = builder.build(); + return test_env::do_with_async([](test_env& env) { compaction_correctness_with_partitioned_sstable_set_fn(env); }); +} - auto sst_gen = env.make_sst_factory(s); +SEASTAR_TEST_CASE(compaction_correctness_with_partitioned_sstable_set_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { compaction_correctness_with_partitioned_sstable_set_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto compact = [&, s] (std::vector all) -> std::vector { - // NEEDED for partitioned_sstable_set to actually have an effect - std::for_each(all.begin(), all.end(), [] (auto& sst) { sst->set_sstable_level(1); }); - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - return compact_sstables(env, compaction::compaction_descriptor(std::move(all), 0, 0 /*std::numeric_limits::max()*/), - cf, sst_gen).get().new_sstables; - }; +SEASTAR_FIXTURE_TEST_CASE(compaction_correctness_with_partitioned_sstable_set_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { compaction_correctness_with_partitioned_sstable_set_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - auto make_insert = [&] (const dht::decorated_key& key) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 1 /* ts */); - return m; - }; +void sstable_cleanup_correctness_fn(cql_test_env& cql_env, test_env& env) { + auto& db = cql_env.local_db(); + auto ks_name = "ks"; // single_node_cql_env::ks_name + auto s = schema_builder(ks_name, "correcness_test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type).build(); - const auto keys = tests::generate_partition_keys(4, s); - auto mut1 = make_insert(keys[0]); - auto mut2 = make_insert(keys[1]); - auto mut3 = make_insert(keys[2]); - auto mut4 = make_insert(keys[3]); + auto sst_gen = env.make_sst_factory(s); - { - std::vector sstables = { - make_sstable_containing(sst_gen, {mut1, mut2}), - make_sstable_containing(sst_gen, {mut3, mut4}) - }; + auto make_insert = [&] (dht::decorated_key key) { + mutation m(s, std::move(key)); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::timestamp_type(0)); + return m; + }; - auto result = compact(std::move(sstables)); - BOOST_REQUIRE_EQUAL(4, result.size()); + auto total_partitions = 10000U; + auto local_keys = tests::generate_partition_keys(total_partitions, s); + dht::decorated_key::less_comparator cmp(s); + std::sort(local_keys.begin(), local_keys.end(), cmp); + utils::chunked_vector mutations; + for (auto i = 0U; i < total_partitions; i++) { + mutations.push_back(make_insert(local_keys.at(i))); + } + auto sst = make_sstable_containing(sst_gen, mutations); + auto run_identifier = sst->run_identifier(); - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut1) - .produces_end_of_stream(); - assert_that(sstable_reader(result[1], s, env.make_reader_permit())) - .produces(mut2) - .produces_end_of_stream(); - assert_that(sstable_reader(result[2], s, env.make_reader_permit())) - .produces(mut3) - .produces_end_of_stream(); - assert_that(sstable_reader(result[3], s, env.make_reader_permit())) - .produces(mut4) - .produces_end_of_stream(); - } + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->start(); - { - // with partitioned_sstable_set having an interval with exclusive lower boundary, example: - // [mut1, mut2] - // (mut2, mut3] - std::vector sstables = { - make_sstable_containing(sst_gen, {mut1, mut2}), - make_sstable_containing(sst_gen, {mut2, mut3}), - make_sstable_containing(sst_gen, {mut3, mut4}) - }; + const auto& erm = db.find_keyspace(ks_name).get_static_effective_replication_map(); + auto local_ranges = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(erm).get()); + auto descriptor = compaction::compaction_descriptor({sst}, compaction::compaction_descriptor::default_level, + compaction::compaction_descriptor::default_max_sstable_bytes, run_identifier, compaction::compaction_type_options::make_cleanup(), std::move(local_ranges)); + auto ret = compact_sstables(env, std::move(descriptor), cf, sst_gen).get(); - auto result = compact(std::move(sstables)); - BOOST_REQUIRE_EQUAL(4, result.size()); + BOOST_REQUIRE(ret.new_sstables.size() == 1); + BOOST_REQUIRE(ret.new_sstables.front()->get_estimated_key_count() >= total_partitions); + BOOST_REQUIRE((ret.new_sstables.front()->get_estimated_key_count() - total_partitions) <= uint64_t(s->min_index_interval())); + BOOST_REQUIRE(ret.new_sstables.front()->run_identifier() == run_identifier); - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut1) - .produces_end_of_stream(); - assert_that(sstable_reader(result[1], s, env.make_reader_permit())) - .produces(mut2) - .produces_end_of_stream(); - assert_that(sstable_reader(result[2], s, env.make_reader_permit())) - .produces(mut3) - .produces_end_of_stream(); - assert_that(sstable_reader(result[3], s, env.make_reader_permit())) - .produces(mut4) - .produces_end_of_stream(); - } - - { - // with gap between tables - std::vector sstables = { - make_sstable_containing(sst_gen, {mut1, mut2}), - make_sstable_containing(sst_gen, {mut4, mut4}) - }; - - auto result = compact(std::move(sstables)); - BOOST_REQUIRE_EQUAL(3, result.size()); - - assert_that(sstable_reader(result[0], s, env.make_reader_permit())) - .produces(mut1) - .produces_end_of_stream(); - assert_that(sstable_reader(result[1], s, env.make_reader_permit())) - .produces(mut2) - .produces_end_of_stream(); - assert_that(sstable_reader(result[2], s, env.make_reader_permit())) - .produces(mut4) - .produces_end_of_stream(); - } - }); + dht::token_range_vector ranges; + ranges.push_back(dht::token_range::make_singular(local_keys.at(0).token())); + ranges.push_back(dht::token_range::make_singular(local_keys.at(10).token())); + ranges.push_back(dht::token_range::make_singular(local_keys.at(100).token())); + ranges.push_back(dht::token_range::make_singular(local_keys.at(900).token())); + local_ranges = compaction::make_owned_ranges_ptr(std::move(ranges)); + descriptor = compaction::compaction_descriptor({sst}, compaction::compaction_descriptor::default_level, + compaction::compaction_descriptor::default_max_sstable_bytes, run_identifier, + compaction::compaction_type_options::make_cleanup(), std::move(local_ranges)); + ret = compact_sstables(env, std::move(descriptor), cf, sst_gen).get(); + BOOST_REQUIRE(ret.new_sstables.size() == 1); + auto reader = ret.new_sstables[0]->as_mutation_source().make_mutation_reader(s, env.make_reader_permit(), query::full_partition_range, s->full_slice()); + assert_that(std::move(reader)) + .produces(local_keys[0]) + .produces(local_keys[10]) + .produces(local_keys[100]) + .produces(local_keys[900]) + .produces_end_of_stream(); } SEASTAR_TEST_CASE(sstable_cleanup_correctness_test) { - return do_with_cql_env([] (auto& e) { - return test_env::do_with_async([&db = e.local_db()] (test_env& env) { - auto ks_name = "ks"; // single_node_cql_env::ks_name - auto s = schema_builder(ks_name, "correcness_test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type).build(); + return do_with_cql_env([](auto& e) { return test_env::do_with_async([&e](test_env& env) { sstable_cleanup_correctness_fn(e, env); }); }); +} - auto sst_gen = env.make_sst_factory(s); - - auto make_insert = [&] (dht::decorated_key key) { - mutation m(s, std::move(key)); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::timestamp_type(0)); - return m; - }; - - auto total_partitions = 10000U; - auto local_keys = tests::generate_partition_keys(total_partitions, s); - dht::decorated_key::less_comparator cmp(s); - std::sort(local_keys.begin(), local_keys.end(), cmp); - utils::chunked_vector mutations; - for (auto i = 0U; i < total_partitions; i++) { - mutations.push_back(make_insert(local_keys.at(i))); - } - auto sst = make_sstable_containing(sst_gen, mutations); - auto run_identifier = sst->run_identifier(); - - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->start(); - - const auto& erm = db.find_keyspace(ks_name).get_static_effective_replication_map(); - auto local_ranges = compaction::make_owned_ranges_ptr(db.get_keyspace_local_ranges(erm).get()); - auto descriptor = compaction::compaction_descriptor({sst}, compaction::compaction_descriptor::default_level, - compaction::compaction_descriptor::default_max_sstable_bytes, run_identifier, compaction::compaction_type_options::make_cleanup(), std::move(local_ranges)); - auto ret = compact_sstables(env, std::move(descriptor), cf, sst_gen).get(); - - BOOST_REQUIRE(ret.new_sstables.size() == 1); - BOOST_REQUIRE(ret.new_sstables.front()->get_estimated_key_count() >= total_partitions); - BOOST_REQUIRE((ret.new_sstables.front()->get_estimated_key_count() - total_partitions) <= uint64_t(s->min_index_interval())); - BOOST_REQUIRE(ret.new_sstables.front()->run_identifier() == run_identifier); - - dht::token_range_vector ranges; - ranges.push_back(dht::token_range::make_singular(local_keys.at(0).token())); - ranges.push_back(dht::token_range::make_singular(local_keys.at(10).token())); - ranges.push_back(dht::token_range::make_singular(local_keys.at(100).token())); - ranges.push_back(dht::token_range::make_singular(local_keys.at(900).token())); - local_ranges = compaction::make_owned_ranges_ptr(std::move(ranges)); - descriptor = compaction::compaction_descriptor({sst}, compaction::compaction_descriptor::default_level, - compaction::compaction_descriptor::default_max_sstable_bytes, run_identifier, - compaction::compaction_type_options::make_cleanup(), std::move(local_ranges)); - ret = compact_sstables(env, std::move(descriptor), cf, sst_gen).get(); - BOOST_REQUIRE(ret.new_sstables.size() == 1); - auto reader = ret.new_sstables[0]->as_mutation_source().make_mutation_reader(s, env.make_reader_permit(), query::full_partition_range, s->full_slice()); - assert_that(std::move(reader)) - .produces(local_keys[0]) - .produces(local_keys[10]) - .produces(local_keys[100]) - .produces(local_keys[900]) - .produces_end_of_stream(); - }); +SEASTAR_TEST_CASE(sstable_cleanup_correctness_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + // TODO: When configuring object storage it is not possible to use cql_env and test_env together since they both initiate storage, which causes conflicts. + // Needs deeper investigation to figure out how to properly configure storage for both environments. + testlog.info("sstable_cleanup_correctness_s3_test is not supported for S3 storage yet, skipping test"); + return make_ready_future(); +#if 0 + return do_with_cql_env([](auto& e) { + return test_env::do_with_async([&e](test_env& env) { sstable_cleanup_correctness_fn(e, env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); }); +#endif +} + +SEASTAR_FIXTURE_TEST_CASE(sstable_cleanup_correctness_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + // TODO: When configuring object storage it is not possible to use cql_env and test_env together since they both initiate storage, which causes conflicts. + // Needs deeper investigation to figure out how to properly configure storage for both environments. + testlog.info("sstable_cleanup_correctness_gcs_test is not supported for GCS storage yet, skipping test"); + return make_ready_future(); +#if 0 + return do_with_cql_env([](auto& e) { + return test_env::do_with_async([&e](test_env& env) { sstable_cleanup_correctness_fn(e, env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); + }); +#endif } future<> foreach_compaction_group_view_with_thread(table_for_tests& table, std::function action) { @@ -2535,13 +2983,16 @@ void scrub_validate_corrupted_digest(compress_sstable compress) { // This test is about corrupted data with valid per-chunk checksums. // This kind of corruption should be detected by the digest check. // Triggering this is not trivial, so we corrupt the Digest file instead. - auto f = open_file_dma(sstables::test(sst).filename(component_type::Digest).native(), open_flags::rw).get(); + auto f = sstables::test(sst).open_file(component_type::Digest, {}, {}).get(); auto stream = make_file_input_stream(f); auto close_stream = deferred_close(stream); auto digest_str = util::read_entire_stream_contiguous(stream).get(); auto digest = boost::lexical_cast(digest_str); auto new_digest = to_sstring(digest + 1); // a random invalid digest - f.dma_write(0, new_digest.c_str(), new_digest.size()).get(); + auto os = output_stream(sstables::test(sst).get_storage().make_component_sink(*sst, component_type::Digest, open_flags::wo, {}).get()); + auto close_os = deferred_close(os); + os.write(std::move(new_digest)).get(); + os.flush().get(); compaction::compaction_type_options::scrub opts = { .operation_mode = compaction::compaction_type_options::scrub::mode::validate, @@ -2841,141 +3292,151 @@ SEASTAR_THREAD_TEST_CASE(sstable_scrub_validate_mode_test_corrupted_file_digest_ } } -SEASTAR_TEST_CASE(sstable_validate_test) { - return test_env::do_with_async([] (test_env& env) { - for (const auto sst_version : {sstable_version_types::me, sstable_version_types::ms}) { - auto schema = schema_builder("ks", get_name()) - .with_column("pk", utf8_type, column_kind::partition_key) - .with_column("ck", int32_type, column_kind::clustering_key) - .with_column("s", int32_type, column_kind::static_column) - .with_column("v", int32_type).build(); - tests::reader_concurrency_semaphore_wrapper semaphore; - auto permit = semaphore.make_permit(); +void sstable_validate_fn(test_env& env) { + for (const auto sst_version : {sstable_version_types::me, sstable_version_types::ms}) { + auto schema = schema_builder("ks", testing::seastar_test::get_name()) + .with_column("pk", utf8_type, column_kind::partition_key) + .with_column("ck", int32_type, column_kind::clustering_key) + .with_column("s", int32_type, column_kind::static_column) + .with_column("v", int32_type).build(); + tests::reader_concurrency_semaphore_wrapper semaphore; + auto permit = semaphore.make_permit(); - std::deque frags; + std::deque frags; - abort_source abort; + abort_source abort; - const auto ts = api::timestamp_type{1}; - auto local_keys = tests::generate_partition_keys(5, schema); + const auto ts = api::timestamp_type{1}; + auto local_keys = tests::generate_partition_keys(5, schema); - auto make_partition_start = [&, schema] (unsigned pk) { - auto dkey = local_keys.at(pk); - return mutation_fragment_v2(*schema, permit, partition_start(std::move(dkey), {})); - }; + auto make_partition_start = [&, schema] (unsigned pk) { + auto dkey = local_keys.at(pk); + return mutation_fragment_v2(*schema, permit, partition_start(std::move(dkey), {})); + }; - auto make_partition_end = [&, schema] { - return mutation_fragment_v2(*schema, permit, partition_end()); - }; + auto make_partition_end = [&, schema] { + return mutation_fragment_v2(*schema, permit, partition_end()); + }; - auto make_static_row = [&, schema] { - auto r = row{}; - auto cdef = schema->static_column_at(0); - auto ac = atomic_cell::make_live(*cdef.type, ts, cdef.type->decompose(data_value(1))); - r.apply(cdef, atomic_cell_or_collection{std::move(ac)}); - return mutation_fragment_v2(*schema, permit, static_row(*schema, std::move(r))); - }; + auto make_static_row = [&, schema] { + auto r = row{}; + auto cdef = schema->static_column_at(0); + auto ac = atomic_cell::make_live(*cdef.type, ts, cdef.type->decompose(data_value(1))); + r.apply(cdef, atomic_cell_or_collection{std::move(ac)}); + return mutation_fragment_v2(*schema, permit, static_row(*schema, std::move(r))); + }; - auto make_clustering_row = [&, schema] (unsigned i) { - auto r = row{}; - auto cdef = schema->regular_column_at(0); - auto ac = atomic_cell::make_live(*cdef.type, ts, cdef.type->decompose(data_value(1))); - r.apply(cdef, atomic_cell_or_collection{std::move(ac)}); - return mutation_fragment_v2(*schema, permit, - clustering_row(clustering_key::from_single_value(*schema, int32_type->decompose(data_value(int(i)))), {}, {}, std::move(r))); - }; + auto make_clustering_row = [&, schema] (unsigned i) { + auto r = row{}; + auto cdef = schema->regular_column_at(0); + auto ac = atomic_cell::make_live(*cdef.type, ts, cdef.type->decompose(data_value(1))); + r.apply(cdef, atomic_cell_or_collection{std::move(ac)}); + return mutation_fragment_v2(*schema, permit, + clustering_row(clustering_key::from_single_value(*schema, int32_type->decompose(data_value(int(i)))), {}, {}, std::move(r))); + }; - auto make_sst = [&] (std::deque frags) { - auto rd = make_mutation_reader_from_fragments(schema, permit, std::move(frags)); - auto config = env.manager().configure_writer(); - config.validation_level = mutation_fragment_stream_validation_level::partition_region; // this test violates key order on purpose - return make_sstable_easy(env, std::move(rd), std::move(config), sst_version, local_keys.size()); - }; + auto make_sst = [&] (std::deque frags) { + auto rd = make_mutation_reader_from_fragments(schema, permit, std::move(frags)); + auto config = env.manager().configure_writer(); + config.validation_level = mutation_fragment_stream_validation_level::partition_region; // this test violates key order on purpose + return make_sstable_easy(env, std::move(rd), std::move(config), sst_version, local_keys.size()); + }; - auto info = make_lw_shared(); + auto info = make_lw_shared(); - struct error_handler { - uint64_t& count; - void operator()(sstring what) { - ++count; - testlog.trace("validation error: ", what); + struct error_handler { + uint64_t& count; + void operator()(sstring what) { + ++count; + testlog.trace("validation error: ", what); + } + }; + + BOOST_TEST_MESSAGE("valid"); + { + frags.emplace_back(make_partition_start(0)); + frags.emplace_back(make_static_row()); + frags.emplace_back(make_clustering_row(0)); + frags.emplace_back(make_clustering_row(1)); + frags.emplace_back(make_partition_end()); + frags.emplace_back(make_partition_start(2)); + frags.emplace_back(make_partition_end()); + + uint64_t count = 0; + auto sst = make_sst(std::move(frags)); + const auto errors = sst->validate(permit, abort, error_handler{count}).get(); + BOOST_REQUIRE_EQUAL(errors, 0); + BOOST_REQUIRE_EQUAL(errors, count); } - }; - BOOST_TEST_MESSAGE("valid"); - { - frags.emplace_back(make_partition_start(0)); - frags.emplace_back(make_static_row()); - frags.emplace_back(make_clustering_row(0)); - frags.emplace_back(make_clustering_row(1)); - frags.emplace_back(make_partition_end()); - frags.emplace_back(make_partition_start(2)); - frags.emplace_back(make_partition_end()); + // BTI index writers won't accept out-of-order keys. + if (has_summary_and_index(sst_version)) { + BOOST_TEST_MESSAGE("out-of-order clustering row"); + frags.emplace_back(make_partition_start(0)); + frags.emplace_back(make_clustering_row(1)); + frags.emplace_back(make_clustering_row(0)); + frags.emplace_back(make_partition_end()); - uint64_t count = 0; - auto sst = make_sst(std::move(frags)); - const auto errors = sst->validate(permit, abort, error_handler{count}).get(); - BOOST_REQUIRE_EQUAL(errors, 0); - BOOST_REQUIRE_EQUAL(errors, count); - } + uint64_t count = 0; + auto sst = make_sst(std::move(frags)); + const auto errors = sst->validate(permit, abort, error_handler{count}).get(); + BOOST_REQUIRE_NE(errors, 0); + BOOST_REQUIRE_EQUAL(errors, count); + } - // BTI index writers won't accept out-of-order keys. - if (has_summary_and_index(sst_version)) { - BOOST_TEST_MESSAGE("out-of-order clustering row"); - frags.emplace_back(make_partition_start(0)); - frags.emplace_back(make_clustering_row(1)); - frags.emplace_back(make_clustering_row(0)); - frags.emplace_back(make_partition_end()); + // BTI index writers won't accept out-of-order keys. + if (has_summary_and_index(sst_version)) { + BOOST_TEST_MESSAGE("out-of-order partition"); + frags.emplace_back(make_partition_start(0)); + frags.emplace_back(make_clustering_row(0)); + frags.emplace_back(make_partition_end()); + frags.emplace_back(make_partition_start(2)); + frags.emplace_back(make_clustering_row(0)); + frags.emplace_back(make_partition_end()); + frags.emplace_back(make_partition_start(1)); + frags.emplace_back(make_partition_end()); - uint64_t count = 0; - auto sst = make_sst(std::move(frags)); - const auto errors = sst->validate(permit, abort, error_handler{count}).get(); - BOOST_REQUIRE_NE(errors, 0); - BOOST_REQUIRE_EQUAL(errors, count); - } + uint64_t count = 0; + auto sst = make_sst(std::move(frags)); + const auto errors = sst->validate(permit, abort, error_handler{count}).get(); + BOOST_REQUIRE_NE(errors, 0); + BOOST_REQUIRE_EQUAL(errors, count); + } - // BTI index writers won't accept out-of-order keys. - if (has_summary_and_index(sst_version)) { - BOOST_TEST_MESSAGE("out-of-order partition"); - frags.emplace_back(make_partition_start(0)); - frags.emplace_back(make_clustering_row(0)); - frags.emplace_back(make_partition_end()); - frags.emplace_back(make_partition_start(2)); - frags.emplace_back(make_clustering_row(0)); - frags.emplace_back(make_partition_end()); - frags.emplace_back(make_partition_start(1)); - frags.emplace_back(make_partition_end()); + BOOST_TEST_MESSAGE("malformed_sstable_exception"); + { + frags.emplace_back(make_partition_start(0)); + frags.emplace_back(make_clustering_row(0)); + frags.emplace_back(make_partition_end()); - uint64_t count = 0; - auto sst = make_sst(std::move(frags)); - const auto errors = sst->validate(permit, abort, error_handler{count}).get(); - BOOST_REQUIRE_NE(errors, 0); - BOOST_REQUIRE_EQUAL(errors, count); - } + uint64_t count = 0; + auto sst = make_sst(std::move(frags)); - BOOST_TEST_MESSAGE("malformed_sstable_exception"); - { - frags.emplace_back(make_partition_start(0)); - frags.emplace_back(make_clustering_row(0)); - frags.emplace_back(make_partition_end()); + // Corrupt the data to cause an invalid checksum. + corrupt_sstable(sst); - uint64_t count = 0; - auto sst = make_sst(std::move(frags)); - - // Corrupt the data to cause an invalid checksum. - corrupt_sstable(sst); - - auto res = sstables::validate_checksums(sst, permit).get(); - BOOST_REQUIRE(res.status == validate_checksums_status::invalid); - BOOST_REQUIRE(res.has_digest); + auto res = sstables::validate_checksums(sst, permit).get(); + BOOST_REQUIRE(res.status == validate_checksums_status::invalid); + BOOST_REQUIRE(res.has_digest); - const auto errors = sst->validate(permit, abort, error_handler{count}).get(); - BOOST_REQUIRE_NE(errors, 0); - BOOST_REQUIRE_EQUAL(errors, count); - } - } - }); + const auto errors = sst->validate(permit, abort, error_handler{count}).get(); + BOOST_REQUIRE_NE(errors, 0); + BOOST_REQUIRE_EQUAL(errors, count); + } + } +} + +SEASTAR_TEST_CASE(sstable_validate_test) { + return test_env::do_with_async([](test_env& env) { sstable_validate_fn(env); }); +} +SEASTAR_TEST_CASE(sstable_validate_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { sstable_validate_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} +SEASTAR_FIXTURE_TEST_CASE(sstable_validate_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { sstable_validate_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); } SEASTAR_THREAD_TEST_CASE(sstable_scrub_abort_mode_test) { @@ -3474,481 +3935,576 @@ SEASTAR_THREAD_TEST_CASE(sstable_scrub_reader_test) { r.produces_end_of_stream(); } -SEASTAR_TEST_CASE(scrubbed_sstable_removal_test) { +void scrubbed_sstable_removal_fn(test_env& env) { // Test to verify that scrub removes the source sstable from the table upon completion // https://github.com/scylladb/scylladb/issues/20030 - return test_env::do_with_async([] (test_env& env) { - simple_schema ss; - auto s = ss.schema(); - auto pk = ss.make_pkey(); + simple_schema ss; + auto s = ss.schema(); + auto pk = ss.make_pkey(); - auto mut1 = mutation(s, pk); - mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp()); - auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}); + auto mut1 = mutation(s, pk); + mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp()); + auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}); - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); - // add the sstable to cf's maintenance set - cf->add_sstable_and_update_cache(sst, sstables::offstrategy::yes).get(); - auto& cf_ts = cf.as_compaction_group_view(); - auto maintenance_sst_set = cf_ts.maintenance_sstable_set().get(); - BOOST_REQUIRE_EQUAL(maintenance_sst_set->size(), 1); - BOOST_REQUIRE_EQUAL(*maintenance_sst_set->all()->begin(), sst); - // confirm main sstable_set is empty - BOOST_REQUIRE_EQUAL(cf_ts.main_sstable_set().get()->size(), 0); + // add the sstable to cf's maintenance set + cf->add_sstable_and_update_cache(sst, sstables::offstrategy::yes).get(); + auto& cf_ts = cf.as_compaction_group_view(); + auto maintenance_sst_set = cf_ts.maintenance_sstable_set().get(); + BOOST_REQUIRE_EQUAL(maintenance_sst_set->size(), 1); + BOOST_REQUIRE_EQUAL(*maintenance_sst_set->all()->begin(), sst); + // confirm main sstable_set is empty + BOOST_REQUIRE_EQUAL(cf_ts.main_sstable_set().get()->size(), 0); - // Perform scrub on the table - cf->get_compaction_manager().perform_sstable_scrub(cf_ts, {}, {}).get(); + // Perform scrub on the table + cf->get_compaction_manager().perform_sstable_scrub(cf_ts, {}, {}).get(); - // main set should have the resultant sst and the maintenance set should be empty now - BOOST_REQUIRE_EQUAL(cf_ts.main_sstable_set().get()->size(), 1); - BOOST_REQUIRE_EQUAL(cf_ts.maintenance_sstable_set().get()->size(), 0); + // main set should have the resultant sst and the maintenance set should be empty now + BOOST_REQUIRE_EQUAL(cf_ts.main_sstable_set().get()->size(), 1); + BOOST_REQUIRE_EQUAL(cf_ts.maintenance_sstable_set().get()->size(), 0); - // Now that there is an sstable in main set, perform scrub on the table - // again to verify that the result ends up again in main sstable_set - cf->get_compaction_manager().perform_sstable_scrub(cf_ts, {}, {}).get(); - BOOST_REQUIRE_EQUAL(cf_ts.main_sstable_set().get()->size(), 1); - BOOST_REQUIRE_EQUAL(cf_ts.maintenance_sstable_set().get()->size(), 0); - }); + // Now that there is an sstable in main set, perform scrub on the table + // again to verify that the result ends up again in main sstable_set + cf->get_compaction_manager().perform_sstable_scrub(cf_ts, {}, {}).get(); + BOOST_REQUIRE_EQUAL(cf_ts.main_sstable_set().get()->size(), 1); + BOOST_REQUIRE_EQUAL(cf_ts.maintenance_sstable_set().get()->size(), 0); +} + +SEASTAR_TEST_CASE(scrubbed_sstable_removal_test) { + return test_env::do_with_async([](test_env& env) { scrubbed_sstable_removal_fn(env); }); +} + +SEASTAR_TEST_CASE(scrubbed_sstable_removal_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { scrubbed_sstable_removal_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(scrubbed_sstable_removal_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { scrubbed_sstable_removal_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); } // Test to verify that `scrub --validate` is not affected by a concurrent regular compaction + +void compact_uncompressed_sstable_during_scrub_validate_fn(test_env& env) { + auto s = schema_builder("unlinked_sstable_scrub_test", "t1") + .with_column("pk", utf8_type, column_kind::partition_key) + .with_column("ck", utf8_type, column_kind::clustering_key) + .with_column("v", utf8_type) + .set_compressor_params(compression_parameters::no_compression()) + .build(); + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->disable_auto_compaction().get(); + + // Add 2 sstables to the column family + api::timestamp_type timestamp = api::min_timestamp; + for (int i = 0; i < 2; i++) { + auto mut = mutation(s, tests::generate_partition_key(s)); + mut.partition().apply_insert(*s, tests::generate_clustering_key(s), timestamp++); + auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut)}); + cf->add_sstable_and_update_cache(std::move(sst)).get(); + } + + // Start a scrub on the table; Use an injector to pause the scrub after it has collected the sstables to be scrubbed. + utils::get_local_injector().enable("sstable_validate/pause"); + compaction::compaction_type_options::scrub opts = {}; + opts.operation_mode = compaction::compaction_type_options::scrub::mode::validate; + auto scrub_task = cf->get_compaction_manager().perform_sstable_scrub(cf.as_compaction_group_view(), opts, {}); + + // When the scrub is paused, compact the two sstables in the table; this should not affect the scrub + cf->get_compaction_manager().perform_major_compaction(cf.as_compaction_group_view(), {}).get(); + + // Now resume the scrub and ensure it completes without error + utils::get_local_injector().receive_message("sstable_validate/pause"); + BOOST_REQUIRE_EQUAL(scrub_task.get().value().validation_errors, 0); + + // Test the reverse case : start a compaction and pause it, then start a scrub --validate + utils::get_local_injector().enable("major_compaction_wait"); + auto compaction_task = cf->get_compaction_manager().perform_major_compaction(cf.as_compaction_group_view(), {}); + // Perform scrub --validate while compaction is in progress + scrub_task = cf->get_compaction_manager().perform_sstable_scrub(cf.as_compaction_group_view(), opts, {}); + // Resume compaction and ensure that it doesn't interfere with the scrub + utils::get_local_injector().receive_message("major_compaction_wait"); + BOOST_REQUIRE_EQUAL(scrub_task.get().value().validation_errors, 0); + compaction_task.get(); +} + SEASTAR_TEST_CASE(compact_uncompressed_sstable_during_scrub_validate_test) { #ifndef SCYLLA_ENABLE_ERROR_INJECTION fmt::print("Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n"); return make_ready_future(); #endif - return test_env::do_with_async([] (test_env& env) { - auto s = schema_builder("unlinked_sstable_scrub_test", "t1") - .with_column("pk", utf8_type, column_kind::partition_key) - .with_column("ck", utf8_type, column_kind::clustering_key) - .with_column("v", utf8_type) - .set_compressor_params(compression_parameters::no_compression()) - .build(); - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->disable_auto_compaction().get(); + return test_env::do_with_async([](test_env& env) { compact_uncompressed_sstable_during_scrub_validate_fn(env); }); +} - // Add 2 sstables to the column family - api::timestamp_type timestamp = api::min_timestamp; - for (int i = 0; i < 2; i++) { - auto mut = mutation(s, tests::generate_partition_key(s)); - mut.partition().apply_insert(*s, tests::generate_clustering_key(s), timestamp++); - auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut)}); - cf->add_sstable_and_update_cache(std::move(sst)).get(); +SEASTAR_TEST_CASE(compact_uncompressed_sstable_during_scrub_validate_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { +#ifndef SCYLLA_ENABLE_ERROR_INJECTION + fmt::print("Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n"); + return make_ready_future(); +#endif + return test_env::do_with_async([](test_env& env) { compact_uncompressed_sstable_during_scrub_validate_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(compact_uncompressed_sstable_during_scrub_validate_test_gcs, + gcs_fixture, + *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { +#ifndef SCYLLA_ENABLE_ERROR_INJECTION + fmt::print("Skipping test as it depends on error injection. Please run in mode where it's enabled (debug,dev).\n"); + return make_ready_future(); +#endif + return test_env::do_with_async([](test_env& env) { compact_uncompressed_sstable_during_scrub_validate_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void sstable_run_based_compaction_fn(test_env& env) { + auto builder = schema_builder("tests", "sstable_run_based_compaction_test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + auto s = builder.build(); + + auto sst_gen = env.make_sst_factory(s); + + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->start(); + cf->set_compaction_strategy(compaction::compaction_strategy_type::size_tiered); + auto compact = [&, s] (std::vector all, auto replacer) -> std::vector { + return compact_sstables(env, compaction::compaction_descriptor(std::move(all), 1, 0), cf, sst_gen, replacer).get().new_sstables; + }; + auto make_insert = [&] (const dht::decorated_key& key) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 1 /* ts */); + return m; + }; + + const auto keys = tests::generate_partition_keys(16, s, local_shard_only::yes, tests::key_size{8, 8}); + std::unordered_set sstables; + std::vector> observers; + sstables::sstable_run_based_compaction_strategy_for_tests cs; + + auto do_replace = [&] (const std::vector& old_sstables, const std::vector& new_sstables) { + for (auto& old_sst : old_sstables) { + BOOST_REQUIRE(sstables.contains(old_sst)); + sstables.erase(old_sst); } + for (auto& new_sst : new_sstables) { + BOOST_REQUIRE(!sstables.contains(new_sst)); + sstables.insert(new_sst); + } + column_family_test(cf).rebuild_sstable_list(cf.as_compaction_group_view(), new_sstables, old_sstables).get(); + env.test_compaction_manager().propagate_replacement(cf.as_compaction_group_view(), old_sstables, new_sstables); + }; - // Start a scrub on the table; Use an injector to pause the scrub after it has collected the sstables to be scrubbed. - utils::get_local_injector().enable("sstable_validate/pause"); - compaction::compaction_type_options::scrub opts = {}; - opts.operation_mode = compaction::compaction_type_options::scrub::mode::validate; - auto scrub_task = cf->get_compaction_manager().perform_sstable_scrub(cf.as_compaction_group_view(), opts, {}); + auto do_incremental_replace = [&] (auto old_sstables, auto new_sstables, auto& expected_sst, auto& closed_sstables_tracker) { + // that's because each sstable will contain only 1 mutation. + BOOST_REQUIRE_EQUAL(old_sstables.size(), 1); + BOOST_REQUIRE_EQUAL(new_sstables.size(), 1); + auto old_sstable = old_sstables.front(); + // check that sstable replacement follows token order + BOOST_REQUIRE_EQUAL(*expected_sst, old_sstable->generation()); + expected_sst++; + // check that previously released sstables were already closed + BOOST_REQUIRE_EQUAL(*closed_sstables_tracker, old_sstable->generation()); - // When the scrub is paused, compact the two sstables in the table; this should not affect the scrub - cf->get_compaction_manager().perform_major_compaction(cf.as_compaction_group_view(), {}).get(); + do_replace(old_sstables, new_sstables); - // Now resume the scrub and ensure it completes without error - utils::get_local_injector().receive_message("sstable_validate/pause"); - BOOST_REQUIRE_EQUAL(scrub_task.get().value().validation_errors, 0); + observers.push_back(old_sstable->add_on_closed_handler([&] (sstable& sst) { + testlog.info("Closing sstable of generation {}", sst.generation()); + closed_sstables_tracker++; + })); - // Test the reverse case : start a compaction and pause it, then start a scrub --validate - utils::get_local_injector().enable("major_compaction_wait"); - auto compaction_task = cf->get_compaction_manager().perform_major_compaction(cf.as_compaction_group_view(), {}); - // Perform scrub --validate while compaction is in progress - scrub_task = cf->get_compaction_manager().perform_sstable_scrub(cf.as_compaction_group_view(), opts, {}); - // Resume compaction and ensure that it doesn't interfere with the scrub - utils::get_local_injector().receive_message("major_compaction_wait"); - BOOST_REQUIRE_EQUAL(scrub_task.get().value().validation_errors, 0); - compaction_task.get(); - }); + testlog.info("Removing sstable of generation {}, refcnt: {}", old_sstable->generation(), old_sstable.use_count()); + }; + + auto do_compaction = [&] (size_t expected_input, size_t expected_output) mutable -> std::vector { + auto input_ssts = std::vector(sstables.begin(), sstables.end()); + auto desc = get_sstables_for_compaction(cs, cf.as_compaction_group_view(), std::move(input_ssts)).get(); + + // nothing to compact, move on. + if (desc.sstables.empty()) { + return {}; + } + std::unordered_set run_ids; + bool incremental_enabled = std::any_of(desc.sstables.begin(), desc.sstables.end(), [&run_ids] (shared_sstable& sst) { + return !run_ids.insert(sst->run_identifier()).second; + }); + + BOOST_REQUIRE_EQUAL(desc.sstables.size(), expected_input); + auto sstable_run = desc.sstables + | std::views::transform([] (auto& sst) { return sst->generation(); }) + | std::ranges::to>(); + auto expected_sst = sstable_run.begin(); + auto closed_sstables_tracker = sstable_run.begin(); + auto replacer = [&] (compaction::compaction_completion_desc desc) { + auto old_sstables = std::move(desc.old_sstables); + auto new_sstables = std::move(desc.new_sstables); + BOOST_REQUIRE(expected_sst != sstable_run.end()); + if (incremental_enabled) { + do_incremental_replace(std::move(old_sstables), std::move(new_sstables), expected_sst, closed_sstables_tracker); + } else { + do_replace(std::move(old_sstables), std::move(new_sstables)); + expected_sst = sstable_run.end(); + } + }; + + auto result = compact(std::move(desc.sstables), replacer); + BOOST_REQUIRE_EQUAL(expected_output, result.size()); + BOOST_REQUIRE(expected_sst == sstable_run.end()); + return result; + }; + + // Generate 4 sstable runs composed of 4 fragments each after 4 compactions. + // All fragments non-overlapping. + for (auto i = 0U; i < keys.size(); i++) { + auto sst = make_sstable_containing(sst_gen, { make_insert(keys[i]) }); + sst->set_sstable_level(1); + BOOST_REQUIRE_EQUAL(sst->get_sstable_level(), 1); + column_family_test(cf).add_sstable(sst).get(); + sstables.insert(std::move(sst)); + do_compaction(4, 4); + } + BOOST_REQUIRE_EQUAL(sstables.size(), 16); + + // Generate 1 sstable run from 4 sstables runs of similar size + auto result = do_compaction(16, 16); + BOOST_REQUIRE_EQUAL(result.size(), 16); + for (auto i = 0U; i < keys.size(); i++) { + assert_that(sstable_reader(result[i], s, env.make_reader_permit())) + .produces(make_insert(keys[i])) + .produces_end_of_stream(); + } } SEASTAR_TEST_CASE(sstable_run_based_compaction_test) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "sstable_run_based_compaction_test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - auto s = builder.build(); + return test_env::do_with_async([](test_env& env) { sstable_run_based_compaction_fn(env); }); +} - auto sst_gen = env.make_sst_factory(s); +SEASTAR_TEST_CASE(sstable_run_based_compaction_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { sstable_run_based_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->start(); - cf->set_compaction_strategy(compaction::compaction_strategy_type::size_tiered); - auto compact = [&, s] (std::vector all, auto replacer) -> std::vector { - return compact_sstables(env, compaction::compaction_descriptor(std::move(all), 1, 0), cf, sst_gen, replacer).get().new_sstables; - }; +SEASTAR_FIXTURE_TEST_CASE(sstable_run_based_compaction_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { sstable_run_based_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void compaction_strategy_aware_major_compaction_fn(test_env& env) { + auto s = schema_builder("tests", "compaction_strategy_aware_major_compaction_test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type).build(); + + auto make_insert = [&] (partition_key key) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::timestamp_type(0)); + return m; + }; + + auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")}); + auto sst = make_sstable_containing(env.make_sstable(s), {make_insert(alpha)}); + sst->set_sstable_level(2); + auto sst2 = make_sstable_containing(env.make_sstable(s), {make_insert(alpha)}); + sst2->set_sstable_level(3); + auto candidates = std::vector({ sst, sst2 }); + + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + + { + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::leveled, cf.schema()->compaction_strategy_options()); + auto descriptor = cs.get_major_compaction_job(cf.as_compaction_group_view(), candidates); + BOOST_REQUIRE(descriptor.sstables.size() == candidates.size()); + BOOST_REQUIRE(uint32_t(descriptor.level) == compaction::leveled_compaction_strategy::ideal_level_for_input(candidates, 160*1024*1024)); + } + + { + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, cf.schema()->compaction_strategy_options()); + auto descriptor = cs.get_major_compaction_job(cf.as_compaction_group_view(), candidates); + BOOST_REQUIRE(descriptor.sstables.size() == candidates.size()); + BOOST_REQUIRE(descriptor.level == 0); + } +} + +SEASTAR_TEST_CASE(compaction_strategy_aware_major_compaction_test) { + return test_env::do_with_async([](test_env& env) { compaction_strategy_aware_major_compaction_fn(env); }); +} + +SEASTAR_TEST_CASE(compaction_strategy_aware_major_compaction_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { compaction_strategy_aware_major_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(compaction_strategy_aware_major_compaction_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { compaction_strategy_aware_major_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void backlog_tracker_correctness_after_changing_compaction_strategy_fn(test_env& env) { + auto builder = schema_builder("tests", "backlog_tracker_correctness_after_changing_compaction_strategy") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + auto s = builder.build(); + + auto sst_gen = env.make_sst_factory(s); + + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->set_compaction_strategy(compaction::compaction_strategy_type::leveled); + + { + const auto keys = tests::generate_partition_keys(4, s); auto make_insert = [&] (const dht::decorated_key& key) { mutation m(s, key); m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 1 /* ts */); return m; }; - - const auto keys = tests::generate_partition_keys(16, s, local_shard_only::yes, tests::key_size{8, 8}); - std::unordered_set sstables; - std::vector> observers; - sstables::sstable_run_based_compaction_strategy_for_tests cs; - - auto do_replace = [&] (const std::vector& old_sstables, const std::vector& new_sstables) { - for (auto& old_sst : old_sstables) { - BOOST_REQUIRE(sstables.contains(old_sst)); - sstables.erase(old_sst); - } - for (auto& new_sst : new_sstables) { - BOOST_REQUIRE(!sstables.contains(new_sst)); - sstables.insert(new_sst); - } - column_family_test(cf).rebuild_sstable_list(cf.as_compaction_group_view(), new_sstables, old_sstables).get(); - env.test_compaction_manager().propagate_replacement(cf.as_compaction_group_view(), old_sstables, new_sstables); + auto mut1 = make_insert(keys[0]); + auto mut2 = make_insert(keys[1]); + auto mut3 = make_insert(keys[2]); + auto mut4 = make_insert(keys[3]); + std::vector ssts = { + make_sstable_containing(sst_gen, {mut1, mut2}), + make_sstable_containing(sst_gen, {mut3, mut4}) }; - auto do_incremental_replace = [&] (auto old_sstables, auto new_sstables, auto& expected_sst, auto& closed_sstables_tracker) { - // that's because each sstable will contain only 1 mutation. - BOOST_REQUIRE_EQUAL(old_sstables.size(), 1); - BOOST_REQUIRE_EQUAL(new_sstables.size(), 1); - auto old_sstable = old_sstables.front(); - // check that sstable replacement follows token order - BOOST_REQUIRE_EQUAL(*expected_sst, old_sstable->generation()); - expected_sst++; - // check that previously released sstables were already closed - BOOST_REQUIRE_EQUAL(*closed_sstables_tracker, old_sstable->generation()); - - do_replace(old_sstables, new_sstables); - - observers.push_back(old_sstable->add_on_closed_handler([&] (sstable& sst) { - testlog.info("Closing sstable of generation {}", sst.generation()); - closed_sstables_tracker++; - })); - - testlog.info("Removing sstable of generation {}, refcnt: {}", old_sstable->generation(), old_sstable.use_count()); - }; - - auto do_compaction = [&] (size_t expected_input, size_t expected_output) mutable -> std::vector { - auto input_ssts = std::vector(sstables.begin(), sstables.end()); - auto desc = get_sstables_for_compaction(cs, cf.as_compaction_group_view(), std::move(input_ssts)).get(); - - // nothing to compact, move on. - if (desc.sstables.empty()) { - return {}; - } - std::unordered_set run_ids; - bool incremental_enabled = std::any_of(desc.sstables.begin(), desc.sstables.end(), [&run_ids] (shared_sstable& sst) { - return !run_ids.insert(sst->run_identifier()).second; - }); - - BOOST_REQUIRE_EQUAL(desc.sstables.size(), expected_input); - auto sstable_run = desc.sstables - | std::views::transform([] (auto& sst) { return sst->generation(); }) - | std::ranges::to>(); - auto expected_sst = sstable_run.begin(); - auto closed_sstables_tracker = sstable_run.begin(); - auto replacer = [&] (compaction::compaction_completion_desc desc) { - auto old_sstables = std::move(desc.old_sstables); - auto new_sstables = std::move(desc.new_sstables); - BOOST_REQUIRE(expected_sst != sstable_run.end()); - if (incremental_enabled) { - do_incremental_replace(std::move(old_sstables), std::move(new_sstables), expected_sst, closed_sstables_tracker); - } else { - do_replace(std::move(old_sstables), std::move(new_sstables)); - expected_sst = sstable_run.end(); - } - }; - - auto result = compact(std::move(desc.sstables), replacer); - BOOST_REQUIRE_EQUAL(expected_output, result.size()); - BOOST_REQUIRE(expected_sst == sstable_run.end()); - return result; - }; - - // Generate 4 sstable runs composed of 4 fragments each after 4 compactions. - // All fragments non-overlapping. - for (auto i = 0U; i < keys.size(); i++) { - auto sst = make_sstable_containing(sst_gen, { make_insert(keys[i]) }); - sst->set_sstable_level(1); - BOOST_REQUIRE_EQUAL(sst->get_sstable_level(), 1); - column_family_test(cf).add_sstable(sst).get(); - sstables.insert(std::move(sst)); - do_compaction(4, 4); - } - BOOST_REQUIRE_EQUAL(sstables.size(), 16); - - // Generate 1 sstable run from 4 sstables runs of similar size - auto result = do_compaction(16, 16); - BOOST_REQUIRE_EQUAL(result.size(), 16); - for (auto i = 0U; i < keys.size(); i++) { - assert_that(sstable_reader(result[i], s, env.make_reader_permit())) - .produces(make_insert(keys[i])) - .produces_end_of_stream(); - } - }); -} - -SEASTAR_TEST_CASE(compaction_strategy_aware_major_compaction_test) { - return test_env::do_with_async([] (test_env& env) { - auto s = schema_builder("tests", "compaction_strategy_aware_major_compaction_test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type).build(); - - auto make_insert = [&] (partition_key key) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::timestamp_type(0)); - return m; - }; - - auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")}); - auto sst = make_sstable_containing(env.make_sstable(s), {make_insert(alpha)}); - sst->set_sstable_level(2); - auto sst2 = make_sstable_containing(env.make_sstable(s), {make_insert(alpha)}); - sst2->set_sstable_level(3); - auto candidates = std::vector({ sst, sst2 }); - - auto cf = env.make_table_for_tests(); - auto close_cf = deferred_stop(cf); - - { - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::leveled, cf.schema()->compaction_strategy_options()); - auto descriptor = cs.get_major_compaction_job(cf.as_compaction_group_view(), candidates); - BOOST_REQUIRE(descriptor.sstables.size() == candidates.size()); - BOOST_REQUIRE(uint32_t(descriptor.level) == compaction::leveled_compaction_strategy::ideal_level_for_input(candidates, 160*1024*1024)); + for (auto& sst : ssts) { + cf.as_compaction_group_view().get_backlog_tracker().replace_sstables({}, {sst}); } - { - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, cf.schema()->compaction_strategy_options()); - auto descriptor = cs.get_major_compaction_job(cf.as_compaction_group_view(), candidates); - BOOST_REQUIRE(descriptor.sstables.size() == candidates.size()); - BOOST_REQUIRE(descriptor.level == 0); + // Start compaction, then stop tracking compaction, switch to TWCS, wait for compaction to finish and check for backlog. + // That's done to SCYLLA_ASSERT backlog will work for compaction that is finished and was stopped tracking. + + auto fut = compact_sstables(env, compaction::compaction_descriptor(ssts), cf, sst_gen); + + // set_compaction_strategy() itself is responsible for transferring charges from old to new backlog tracker. + cf->set_compaction_strategy(compaction::compaction_strategy_type::time_window); + for (auto& sst : ssts) { + cf.as_compaction_group_view().get_backlog_tracker().replace_sstables({}, {sst}); } - }); + + auto ret = fut.get(); + BOOST_REQUIRE(ret.new_sstables.size() == 1); + } + // triggers code that iterates through registered compactions. + cf->get_compaction_manager().backlog(); + cf.as_compaction_group_view().get_backlog_tracker().backlog(); } SEASTAR_TEST_CASE(backlog_tracker_correctness_after_changing_compaction_strategy) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "backlog_tracker_correctness_after_changing_compaction_strategy") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - auto s = builder.build(); + return test_env::do_with_async([](test_env& env) { backlog_tracker_correctness_after_changing_compaction_strategy_fn(env); }); +} - auto sst_gen = env.make_sst_factory(s); +SEASTAR_TEST_CASE(backlog_tracker_correctness_after_changing_compaction_strategy_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { backlog_tracker_correctness_after_changing_compaction_strategy_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->set_compaction_strategy(compaction::compaction_strategy_type::leveled); +SEASTAR_FIXTURE_TEST_CASE(backlog_tracker_correctness_after_changing_compaction_strategy_gcs, + gcs_fixture, + *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { backlog_tracker_correctness_after_changing_compaction_strategy_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - { - const auto keys = tests::generate_partition_keys(4, s); - auto make_insert = [&] (const dht::decorated_key& key) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 1 /* ts */); - return m; - }; - auto mut1 = make_insert(keys[0]); - auto mut2 = make_insert(keys[1]); - auto mut3 = make_insert(keys[2]); - auto mut4 = make_insert(keys[3]); - std::vector ssts = { - make_sstable_containing(sst_gen, {mut1, mut2}), - make_sstable_containing(sst_gen, {mut3, mut4}) - }; +void partial_sstable_run_filtered_out_fn(test_env& env) { + BOOST_REQUIRE(smp::count == 1); + auto s = schema_builder("tests", "partial_sstable_run_filtered_out_test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type).build(); - for (auto& sst : ssts) { - cf.as_compaction_group_view().get_backlog_tracker().replace_sstables({}, {sst}); - } + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->start(); - // Start compaction, then stop tracking compaction, switch to TWCS, wait for compaction to finish and check for backlog. - // That's done to SCYLLA_ASSERT backlog will work for compaction that is finished and was stopped tracking. + sstables::run_id partial_sstable_run_identifier = sstables::run_id::create_random_id(); + mutation mut(s, partition_key::from_exploded(*s, {to_bytes("alpha")})); + mut.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 0); - auto fut = compact_sstables(env, compaction::compaction_descriptor(ssts), cf, sst_gen); + sstable_writer_config sst_cfg = env.manager().configure_writer(); + sst_cfg.run_identifier = partial_sstable_run_identifier; + auto partial_sstable_run_sst = make_sstable_easy(env, make_mutation_reader_from_mutations(s, env.make_reader_permit(), std::move(mut)), sst_cfg); - // set_compaction_strategy() itself is responsible for transferring charges from old to new backlog tracker. - cf->set_compaction_strategy(compaction::compaction_strategy_type::time_window); - for (auto& sst : ssts) { - cf.as_compaction_group_view().get_backlog_tracker().replace_sstables({}, {sst}); - } + column_family_test(cf).add_sstable(partial_sstable_run_sst).get(); - auto ret = fut.get(); - BOOST_REQUIRE(ret.new_sstables.size() == 1); - } - // triggers code that iterates through registered compactions. - cf->get_compaction_manager().backlog(); - cf.as_compaction_group_view().get_backlog_tracker().backlog(); - }); + auto generation_exists = [&cf] (sstables::generation_type generation) { + auto sstables = cf->get_sstables(); + auto entry = std::ranges::find_if(*sstables, [generation] (shared_sstable sst) { return generation == sst->generation(); }); + return entry != sstables->end(); + }; + + BOOST_REQUIRE(generation_exists(partial_sstable_run_sst->generation())); + + // register partial sstable run + run_compaction_task(env, partial_sstable_run_identifier, cf.as_compaction_group_view(), [&cf] (compaction::compaction_data&) { + return cf->compact_all_sstables(tasks::task_info{}); + }).get(); + + // make sure partial sstable run has none of its fragments compacted. + BOOST_REQUIRE(generation_exists(partial_sstable_run_sst->generation())); } SEASTAR_TEST_CASE(partial_sstable_run_filtered_out_test) { - BOOST_REQUIRE(smp::count == 1); - return test_env::do_with_async([] (test_env& env) { - auto s = schema_builder("tests", "partial_sstable_run_filtered_out_test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type).build(); + return test_env::do_with_async([](test_env& env) { partial_sstable_run_filtered_out_fn(env); }); +} - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->start(); +SEASTAR_TEST_CASE(partial_sstable_run_filtered_out_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { partial_sstable_run_filtered_out_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - sstables::run_id partial_sstable_run_identifier = sstables::run_id::create_random_id(); - mutation mut(s, partition_key::from_exploded(*s, {to_bytes("alpha")})); - mut.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 0); - - sstable_writer_config sst_cfg = env.manager().configure_writer(); - sst_cfg.run_identifier = partial_sstable_run_identifier; - auto partial_sstable_run_sst = make_sstable_easy(env, make_mutation_reader_from_mutations(s, env.make_reader_permit(), std::move(mut)), sst_cfg); - - column_family_test(cf).add_sstable(partial_sstable_run_sst).get(); - - auto generation_exists = [&cf] (sstables::generation_type generation) { - auto sstables = cf->get_sstables(); - auto entry = std::ranges::find_if(*sstables, [generation] (shared_sstable sst) { return generation == sst->generation(); }); - return entry != sstables->end(); - }; - - BOOST_REQUIRE(generation_exists(partial_sstable_run_sst->generation())); - - // register partial sstable run - run_compaction_task(env, partial_sstable_run_identifier, cf.as_compaction_group_view(), [&cf] (compaction::compaction_data&) { - return cf->compact_all_sstables(tasks::task_info{}); - }).get(); - - // make sure partial sstable run has none of its fragments compacted. - BOOST_REQUIRE(generation_exists(partial_sstable_run_sst->generation())); - }); +SEASTAR_FIXTURE_TEST_CASE(partial_sstable_run_filtered_out_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { partial_sstable_run_filtered_out_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); } // Make sure that a custom tombstone-gced-only writer will be fed with gc'able tombstone // from the regular compaction's input sstable. -SEASTAR_TEST_CASE(purged_tombstone_consumer_sstable_test) { +void purged_tombstone_consumer_sstable_fn(test_env& env) { BOOST_REQUIRE(smp::count == 1); - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "purged_tombstone_consumer_sstable_test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - builder.set_gc_grace_seconds(0); - auto s = builder.build(); + auto builder = schema_builder("tests", "purged_tombstone_consumer_sstable_test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + builder.set_gc_grace_seconds(0); + auto s = builder.build(); - class compacting_sstable_writer_test { - shared_sstable& _sst; - sstable_writer _writer; - public: - explicit compacting_sstable_writer_test(const schema_ptr& s, shared_sstable& sst, sstables_manager& manager) - : _sst(sst), - _writer(sst->get_writer(*s, 1, manager.configure_writer("test"), encoding_stats{})) {} + class compacting_sstable_writer_test { + shared_sstable& _sst; + sstable_writer _writer; + public: + explicit compacting_sstable_writer_test(const schema_ptr& s, shared_sstable& sst, sstables_manager& manager) + : _sst(sst), + _writer(sst->get_writer(*s, 1, manager.configure_writer("test"), encoding_stats{})) {} - void consume_new_partition(const dht::decorated_key& dk) { _writer.consume_new_partition(dk); } - void consume(tombstone t) { _writer.consume(t); } - stop_iteration consume(static_row&& sr, tombstone, bool) { return _writer.consume(std::move(sr)); } - stop_iteration consume(clustering_row&& cr, row_tombstone tomb, bool) { return _writer.consume(std::move(cr)); } - stop_iteration consume(range_tombstone_change&& rtc) { return _writer.consume(std::move(rtc)); } + void consume_new_partition(const dht::decorated_key& dk) { _writer.consume_new_partition(dk); } + void consume(tombstone t) { _writer.consume(t); } + stop_iteration consume(static_row&& sr, tombstone, bool) { return _writer.consume(std::move(sr)); } + stop_iteration consume(clustering_row&& cr, row_tombstone tomb, bool) { return _writer.consume(std::move(cr)); } + stop_iteration consume(range_tombstone_change&& rtc) { return _writer.consume(std::move(rtc)); } - stop_iteration consume_end_of_partition() { return _writer.consume_end_of_partition(); } - void consume_end_of_stream() { _writer.consume_end_of_stream(); _sst->open_data().get(); } + stop_iteration consume_end_of_partition() { return _writer.consume_end_of_partition(); } + void consume_end_of_stream() { _writer.consume_end_of_stream(); _sst->open_data().get(); } + }; + + std::optional gc_before; + auto max_purgeable_ts = api::max_timestamp; + auto is_tombstone_purgeable = [&gc_before, max_purgeable_ts](const tombstone& t) { + bool can_gc = t.deletion_time < *gc_before; + return t && can_gc && t.timestamp < max_purgeable_ts; + }; + + auto compact = [&] (std::vector all) -> std::pair { + auto max_purgeable_func = [max_purgeable_ts] (const dht::decorated_key& dk, is_shadowable) -> max_purgeable { + return max_purgeable(max_purgeable_ts); }; - std::optional gc_before; - auto max_purgeable_ts = api::max_timestamp; - auto is_tombstone_purgeable = [&gc_before, max_purgeable_ts](const tombstone& t) { - bool can_gc = t.deletion_time < *gc_before; - return t && can_gc && t.timestamp < max_purgeable_ts; - }; + auto non_purged = env.make_sstable(s); + auto purged_only = env.make_sstable(s); - auto compact = [&] (std::vector all) -> std::pair { - auto max_purgeable_func = [max_purgeable_ts] (const dht::decorated_key& dk, is_shadowable) -> max_purgeable { - return max_purgeable(max_purgeable_ts); - }; + auto cr = compacting_sstable_writer_test(s, non_purged, env.manager()); + auto purged_cr = compacting_sstable_writer_test(s, purged_only, env.manager()); - auto non_purged = env.make_sstable(s); - auto purged_only = env.make_sstable(s); + auto gc_now = gc_clock::now(); + gc_before = gc_now - s->gc_grace_seconds(); - auto cr = compacting_sstable_writer_test(s, non_purged, env.manager()); - auto purged_cr = compacting_sstable_writer_test(s, purged_only, env.manager()); - - auto gc_now = gc_clock::now(); - gc_before = gc_now - s->gc_grace_seconds(); - - auto cfc = compact_for_compaction( + auto cfc = compact_for_compaction( *s, gc_now, max_purgeable_func, tombstone_gc_state::for_tests(), std::move(cr), std::move(purged_cr)); - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, s->compaction_strategy_options()); - auto compacting = make_lw_shared(env.make_sstable_set(cs, s)); - for (auto&& sst : all) { - compacting->insert(std::move(sst)); - } - auto r = compacting->make_range_sstable_reader(s, - env.make_reader_permit(), - query::full_partition_range, - s->full_slice(), - nullptr, - ::streamed_mutation::forwarding::no, - ::mutation_reader::forwarding::no); - - auto close_r = deferred_close(r); - r.consume_in_thread(std::move(cfc)); - - return {std::move(non_purged), std::move(purged_only)}; - }; - - auto next_timestamp = [] { - static thread_local api::timestamp_type next = 1; - return next++; - }; - - auto make_insert = [&] (partition_key key) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), next_timestamp()); - return m; - }; - - auto make_delete = [&] (partition_key key) -> std::pair { - mutation m(s, key); - tombstone tomb(next_timestamp(), gc_clock::now()); - m.partition().apply(tomb); - return {m, tomb}; - }; - - auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")}); - auto beta = partition_key::from_exploded(*s, {to_bytes("beta")}); - - auto ttl = 5; - - auto assert_that_produces_purged_tombstone = [&] (auto& sst, partition_key& key, tombstone tomb) { - auto reader = make_lw_shared(sstable_reader(sst, s, env.make_reader_permit())); - read_mutation_from_mutation_reader(*reader).then([reader, s, &key, is_tombstone_purgeable, &tomb] (mutation_opt m) { - BOOST_REQUIRE(m); - BOOST_REQUIRE(m->key().equal(*s, key)); - auto rows = m->partition().clustered_rows(); - BOOST_REQUIRE_EQUAL(rows.calculate_size(), 0); - BOOST_REQUIRE(is_tombstone_purgeable(m->partition().partition_tombstone())); - BOOST_REQUIRE(m->partition().partition_tombstone() == tomb); - return (*reader)(); - }).then([reader, s] (mutation_fragment_v2_opt m) { - BOOST_REQUIRE(!m); - }).finally([reader] { - return reader->close(); - }).get(); - }; - - // gc'ed tombstone for alpha will go to gc-only consumer, whereas live data goes to regular consumer. - { - auto mut1 = make_insert(alpha); - auto mut2 = make_insert(beta); - auto [mut3, mut3_tombstone] = make_delete(alpha); - - std::vector sstables = { - make_sstable_containing(env.make_sstable(s), {mut1, mut2}), - make_sstable_containing(env.make_sstable(s), {mut3}) - }; - - forward_jump_clocks(std::chrono::seconds(ttl)); - - auto [non_purged, purged_only] = compact(std::move(sstables)); - - assert_that(sstable_reader(non_purged, s, env.make_reader_permit())) - .produces(mut2) - .produces_end_of_stream(); - - assert_that_produces_purged_tombstone(purged_only, alpha, mut3_tombstone); + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, s->compaction_strategy_options()); + auto compacting = make_lw_shared(env.make_sstable_set(cs, s)); + for (auto&& sst : all) { + compacting->insert(std::move(sst)); } - }); + auto r = compacting->make_range_sstable_reader(s, + env.make_reader_permit(), + query::full_partition_range, + s->full_slice(), + nullptr, + ::streamed_mutation::forwarding::no, + ::mutation_reader::forwarding::no); + + auto close_r = deferred_close(r); + r.consume_in_thread(std::move(cfc)); + + return {std::move(non_purged), std::move(purged_only)}; + }; + + auto next_timestamp = [] { + static thread_local api::timestamp_type next = 1; + return next++; + }; + + auto make_insert = [&] (partition_key key) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), next_timestamp()); + return m; + }; + + auto make_delete = [&] (partition_key key) -> std::pair { + mutation m(s, key); + tombstone tomb(next_timestamp(), gc_clock::now()); + m.partition().apply(tomb); + return {m, tomb}; + }; + + auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")}); + auto beta = partition_key::from_exploded(*s, {to_bytes("beta")}); + + auto ttl = 5; + + auto assert_that_produces_purged_tombstone = [&] (auto& sst, partition_key& key, tombstone tomb) { + auto reader = make_lw_shared(sstable_reader(sst, s, env.make_reader_permit())); + read_mutation_from_mutation_reader(*reader).then([reader, s, &key, is_tombstone_purgeable, &tomb] (mutation_opt m) { + BOOST_REQUIRE(m); + BOOST_REQUIRE(m->key().equal(*s, key)); + auto rows = m->partition().clustered_rows(); + BOOST_REQUIRE_EQUAL(rows.calculate_size(), 0); + BOOST_REQUIRE(is_tombstone_purgeable(m->partition().partition_tombstone())); + BOOST_REQUIRE(m->partition().partition_tombstone() == tomb); + return (*reader)(); + }).then([reader, s] (mutation_fragment_v2_opt m) { + BOOST_REQUIRE(!m); + }).finally([reader] { + return reader->close(); + }).get(); + }; + + // gc'ed tombstone for alpha will go to gc-only consumer, whereas live data goes to regular consumer. + { + auto mut1 = make_insert(alpha); + auto mut2 = make_insert(beta); + auto [mut3, mut3_tombstone] = make_delete(alpha); + + std::vector sstables = { + make_sstable_containing(env.make_sstable(s), {mut1, mut2}), + make_sstable_containing(env.make_sstable(s), {mut3}) + }; + + forward_jump_clocks(std::chrono::seconds(ttl)); + + auto [non_purged, purged_only] = compact(std::move(sstables)); + + assert_that(sstable_reader(non_purged, s, env.make_reader_permit())) + .produces(mut2) + .produces_end_of_stream(); + + assert_that_produces_purged_tombstone(purged_only, alpha, mut3_tombstone); + } +} + +SEASTAR_TEST_CASE(purged_tombstone_consumer_sstable_test) { + return test_env::do_with_async([](test_env& env) { purged_tombstone_consumer_sstable_fn(env); }); +} + +SEASTAR_TEST_CASE(purged_tombstone_consumer_sstable_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { purged_tombstone_consumer_sstable_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(purged_tombstone_consumer_sstable_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { purged_tombstone_consumer_sstable_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); } /* Make sure data is not resurrected. @@ -3966,309 +4522,352 @@ SEASTAR_TEST_CASE(purged_tombstone_consumer_sstable_test) { if key A can be read from table, data was resurrected. */ -SEASTAR_TEST_CASE(incremental_compaction_data_resurrection_test) { - return test_env::do_with_async([] (test_env& env) { - // In a column family with gc_grace_seconds set to 0, check that a tombstone - // is purged after compaction. - auto builder = schema_builder("tests", "incremental_compaction_data_resurrection_test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - builder.set_gc_grace_seconds(0); - auto s = builder.build(); - auto sst_gen = env.make_sst_factory(s); +void incremental_compaction_data_resurrection_fn(test_env& env) { + // In a column family with gc_grace_seconds set to 0, check that a tombstone + // is purged after compaction. + auto builder = schema_builder("tests", "incremental_compaction_data_resurrection_test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + builder.set_gc_grace_seconds(0); + auto s = builder.build(); - auto next_timestamp = [] { - static thread_local api::timestamp_type next = 1; - return next++; - }; + auto sst_gen = env.make_sst_factory(s); - auto make_insert = [&] (const dht::decorated_key& key) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), next_timestamp()); - return m; - }; + auto next_timestamp = [] { + static thread_local api::timestamp_type next = 1; + return next++; + }; - auto deletion_time = gc_clock::now(); - auto make_delete = [&] (const dht::decorated_key& key) { - mutation m(s, key); - tombstone tomb(next_timestamp(), deletion_time); - m.partition().apply(tomb); - return m; - }; + auto make_insert = [&] (const dht::decorated_key& key) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), next_timestamp()); + return m; + }; - const auto keys = tests::generate_partition_keys(4, s); - const auto& alpha = keys[0]; - const auto& beta = keys[1]; - const auto& gamma = keys[2]; - const auto& zetta = keys[3]; + auto deletion_time = gc_clock::now(); + auto make_delete = [&] (const dht::decorated_key& key) { + mutation m(s, key); + tombstone tomb(next_timestamp(), deletion_time); + m.partition().apply(tomb); + return m; + }; - auto ttl = 5; + const auto keys = tests::generate_partition_keys(4, s); + const auto& alpha = keys[0]; + const auto& beta = keys[1]; + const auto& gamma = keys[2]; + const auto& zetta = keys[3]; - auto mut1 = make_insert(alpha); - auto mut2 = make_insert(beta); - auto mut3 = make_insert(gamma); - auto mut4 = make_insert(zetta); - auto mut1_deletion = make_delete(alpha); + auto ttl = 5; - auto non_expired_sst = make_sstable_containing(sst_gen, {mut1, mut2, mut3}); - auto non_expired_sst_2 = make_sstable_containing(sst_gen, {mut4}); - auto expired_sst = make_sstable_containing(sst_gen, {mut1_deletion}); + auto mut1 = make_insert(alpha); + auto mut2 = make_insert(beta); + auto mut3 = make_insert(gamma); + auto mut4 = make_insert(zetta); + auto mut1_deletion = make_delete(alpha); - std::vector sstables = { - non_expired_sst, - non_expired_sst_2, - expired_sst, - }; + auto non_expired_sst = make_sstable_containing(sst_gen, {mut1, mut2, mut3}); + auto non_expired_sst_2 = make_sstable_containing(sst_gen, {mut4}); + auto expired_sst = make_sstable_containing(sst_gen, {mut1_deletion}); - // make mut1_deletion gc'able. - forward_jump_clocks(std::chrono::seconds(ttl)); + std::vector sstables = { + non_expired_sst, + non_expired_sst_2, + expired_sst, + }; - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->start(); - cf->set_compaction_strategy(compaction::compaction_strategy_type::null); + // make mut1_deletion gc'able. + forward_jump_clocks(std::chrono::seconds(ttl)); - // since we use compacting_reader expired tombstones shouldn't be read from sstables - // so we just check there are no live (resurrected for this test) rows in partition - auto is_partition_dead = [&s, &cf, &env] (const dht::decorated_key& key) { - replica::column_family::const_mutation_partition_ptr mp = cf->find_partition(s, env.make_reader_permit(), key).get(); - return mp && mp->live_row_count(*s, gc_clock::time_point::max()) == 0; - }; + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->start(); + cf->set_compaction_strategy(compaction::compaction_strategy_type::null); - cf->add_sstable_and_update_cache(non_expired_sst).get(); - BOOST_REQUIRE(!is_partition_dead(alpha)); - cf->add_sstable_and_update_cache(expired_sst).get(); - BOOST_REQUIRE(is_partition_dead(alpha)); + // since we use compacting_reader expired tombstones shouldn't be read from sstables + // so we just check there are no live (resurrected for this test) rows in partition + auto is_partition_dead = [&s, &cf, &env] (const dht::decorated_key& key) { + replica::column_family::const_mutation_partition_ptr mp = cf->find_partition(s, env.make_reader_permit(), key).get(); + return mp && mp->live_row_count(*s, gc_clock::time_point::max()) == 0; + }; - auto replacer = [&] (compaction::compaction_completion_desc desc) { - auto old_sstables = std::move(desc.old_sstables); - auto new_sstables = std::move(desc.new_sstables); - // expired_sst is exhausted, and new sstable is written with mut 2. - BOOST_REQUIRE_EQUAL(old_sstables.size(), 1); - BOOST_REQUIRE(old_sstables.front() == expired_sst); - BOOST_REQUIRE_EQUAL(new_sstables.size(), 2); - for (auto& new_sstable : new_sstables) { - if (new_sstable->get_max_local_deletion_time() == deletion_time) { // Skipping GC SSTable. - continue; - } - assert_that(sstable_reader(new_sstable, s, env.make_reader_permit())) - .produces(mut2) - .produces_end_of_stream(); + cf->add_sstable_and_update_cache(non_expired_sst).get(); + BOOST_REQUIRE(!is_partition_dead(alpha)); + cf->add_sstable_and_update_cache(expired_sst).get(); + BOOST_REQUIRE(is_partition_dead(alpha)); + + auto replacer = [&] (compaction::compaction_completion_desc desc) { + auto old_sstables = std::move(desc.old_sstables); + auto new_sstables = std::move(desc.new_sstables); + // expired_sst is exhausted, and new sstable is written with mut 2. + BOOST_REQUIRE_EQUAL(old_sstables.size(), 1); + BOOST_REQUIRE(old_sstables.front() == expired_sst); + BOOST_REQUIRE_EQUAL(new_sstables.size(), 2); + for (auto& new_sstable : new_sstables) { + if (new_sstable->get_max_local_deletion_time() == deletion_time) { // Skipping GC SSTable. + continue; } - column_family_test(cf).rebuild_sstable_list(cf.as_compaction_group_view(), new_sstables, old_sstables).get(); - // force compaction failure after sstable containing expired tombstone is removed from set. - throw std::runtime_error("forcing compaction failure on early replacement"); - }; - - // make ssts belong to same run for compaction to enable incremental approach. - // That needs to happen after fragments were inserted into sstable_set, as they'll placed into different runs due to detected overlapping. - auto run_id = sstables::run_id::create_random_id(); - sstables::test(non_expired_sst).set_run_identifier(run_id); - sstables::test(non_expired_sst_2).set_run_identifier(run_id); - - bool swallowed = false; - try { - // The goal is to have one sstable generated for each mutation to trigger the issue. - auto max_sstable_size = 0; - auto result = compact_sstables(env, compaction::compaction_descriptor(sstables, 0, max_sstable_size), cf, sst_gen, replacer).get().new_sstables; - BOOST_REQUIRE_EQUAL(2, result.size()); - } catch (...) { - // swallow exception - swallowed = true; + assert_that(sstable_reader(new_sstable, s, env.make_reader_permit())) + .produces(mut2) + .produces_end_of_stream(); } - BOOST_REQUIRE(swallowed); - // check there's no data resurrection - BOOST_REQUIRE(is_partition_dead(alpha)); - }); + column_family_test(cf).rebuild_sstable_list(cf.as_compaction_group_view(), new_sstables, old_sstables).get(); + // force compaction failure after sstable containing expired tombstone is removed from set. + throw std::runtime_error("forcing compaction failure on early replacement"); + }; + + // make ssts belong to same run for compaction to enable incremental approach. + // That needs to happen after fragments were inserted into sstable_set, as they'll placed into different runs due to detected overlapping. + auto run_id = sstables::run_id::create_random_id(); + sstables::test(non_expired_sst).set_run_identifier(run_id); + sstables::test(non_expired_sst_2).set_run_identifier(run_id); + + bool swallowed = false; + try { + // The goal is to have one sstable generated for each mutation to trigger the issue. + auto max_sstable_size = 0; + auto result = compact_sstables(env, compaction::compaction_descriptor(sstables, 0, max_sstable_size), cf, sst_gen, replacer).get().new_sstables; + BOOST_REQUIRE_EQUAL(2, result.size()); + } catch (...) { + // swallow exception + swallowed = true; + } + BOOST_REQUIRE(swallowed); + // check there's no data resurrection + BOOST_REQUIRE(is_partition_dead(alpha)); } -SEASTAR_TEST_CASE(twcs_major_compaction_test) { +SEASTAR_TEST_CASE(incremental_compaction_data_resurrection_test) { + return test_env::do_with_async([](test_env& env) { incremental_compaction_data_resurrection_fn(env); }); +} + +SEASTAR_TEST_CASE(incremental_compaction_data_resurrection_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { incremental_compaction_data_resurrection_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(incremental_compaction_data_resurrection_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { incremental_compaction_data_resurrection_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void twcs_major_compaction_fn(test_env& env) { // Tests that two mutations that were written a month apart are compacted // to two different SSTables, whereas two mutations that were written 1ms apart // are compacted to the same SSTable. - return test_env::do_with_async([] (test_env& env) { - // In a column family with gc_grace_seconds set to 0, check that a tombstone - // is purged after compaction. - auto builder = schema_builder("tests", "twcs_major") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", int32_type, column_kind::clustering_key) - .with_column("value", int32_type); - auto s = builder.build(); + // In a column family with gc_grace_seconds set to 0, check that a tombstone + // is purged after compaction. + auto builder = schema_builder("tests", "twcs_major") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", int32_type, column_kind::clustering_key) + .with_column("value", int32_type); + auto s = builder.build(); - auto sst_gen = env.make_sst_factory(s); + auto sst_gen = env.make_sst_factory(s); - auto next_timestamp = [] (auto step) { - using namespace std::chrono; - return (api::timestamp_clock::now().time_since_epoch() - duration_cast(step)).count(); - }; + auto next_timestamp = [] (auto step) { + using namespace std::chrono; + return (api::timestamp_clock::now().time_since_epoch() - duration_cast(step)).count(); + }; - auto make_insert = [&] (api::timestamp_clock::duration step) { - static thread_local int32_t value = 1; + auto make_insert = [&] (api::timestamp_clock::duration step) { + static thread_local int32_t value = 1; - auto key = tests::generate_partition_key(s); + auto key = tests::generate_partition_key(s); - mutation m(s, key); - auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step)); - return m; - }; + mutation m(s, key); + auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step)); + return m; + }; - // Two mutations, one of them 30 days ago. Should be split when - // compacting - auto mut1 = make_insert(0ms); - auto mut2 = make_insert(720h); + // Two mutations, one of them 30 days ago. Should be split when + // compacting + auto mut1 = make_insert(0ms); + auto mut2 = make_insert(720h); - // Two mutations, close together. Should end up in the same SSTable - auto mut3 = make_insert(0ms); - auto mut4 = make_insert(1ms); + // Two mutations, close together. Should end up in the same SSTable + auto mut3 = make_insert(0ms); + auto mut4 = make_insert(1ms); - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->start(); - cf->set_compaction_strategy(compaction::compaction_strategy_type::time_window); + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->start(); + cf->set_compaction_strategy(compaction::compaction_strategy_type::time_window); - auto original_together = make_sstable_containing(sst_gen, {mut3, mut4}); + auto original_together = make_sstable_containing(sst_gen, {mut3, mut4}); - auto ret = compact_sstables(env, compaction::compaction_descriptor({original_together}), cf, sst_gen, replacer_fn_no_op()).get(); - BOOST_REQUIRE(ret.new_sstables.size() == 1); + auto ret = compact_sstables(env, compaction::compaction_descriptor({original_together}), cf, sst_gen, replacer_fn_no_op()).get(); + BOOST_REQUIRE(ret.new_sstables.size() == 1); - auto original_apart = make_sstable_containing(sst_gen, {mut1, mut2}); - ret = compact_sstables(env, compaction::compaction_descriptor({original_apart}), cf, sst_gen, replacer_fn_no_op()).get(); - BOOST_REQUIRE(ret.new_sstables.size() == 2); - }); + auto original_apart = make_sstable_containing(sst_gen, {mut1, mut2}); + ret = compact_sstables(env, compaction::compaction_descriptor({original_apart}), cf, sst_gen, replacer_fn_no_op()).get(); + BOOST_REQUIRE(ret.new_sstables.size() == 2); +} + +SEASTAR_TEST_CASE(twcs_major_compaction_test) { + return test_env::do_with_async([](test_env& env) { twcs_major_compaction_fn(env); }); +} + +SEASTAR_TEST_CASE(twcs_major_compaction_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { twcs_major_compaction_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(twcs_major_compaction_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { twcs_major_compaction_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void autocompaction_control_fn(test_env& env) { + auto s = schema_builder(some_keyspace, some_column_family) + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type) + .build(); + + auto cf = env.make_table_for_tests(s); + auto& cm = cf->get_compaction_manager(); + auto close_cf = deferred_stop(cf); + cf->set_compaction_strategy(compaction::compaction_strategy_type::size_tiered); + + // no compactions done yet + auto& ss = cm.get_stats(); + BOOST_REQUIRE(cm.get_stats().pending_tasks == 0 && cm.get_stats().active_tasks == 0 && ss.completed_tasks == 0); + // auto compaction is enabled by default + BOOST_REQUIRE(!cf->is_auto_compaction_disabled_by_user()); + // disable auto compaction by user + cf->disable_auto_compaction().get(); + // check it is disabled + BOOST_REQUIRE(cf->is_auto_compaction_disabled_by_user()); + + auto make_insert = [&] (const dht::decorated_key& key) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 1 /* ts */); + return m; + }; + auto min_threshold = cf->schema()->min_compaction_threshold(); + const auto keys = tests::generate_partition_keys(1, s); + for (auto i = 0; i < 2 * min_threshold; ++i) { + auto mut = make_insert(keys[0]); + auto sst = make_sstable_containing(env.make_sstable(s), {mut}); + cf->add_sstable_and_update_cache(sst).wait(); + } + + // check compaction manager does not receive background compaction submissions + cf->start(); + auto stop_cf = deferred_stop(*cf); + cf->trigger_compaction(); + cm.submit(cf.as_compaction_group_view()); + BOOST_REQUIRE(cm.get_stats().pending_tasks == 0 && cm.get_stats().active_tasks == 0 && ss.completed_tasks == 0); + // enable auto compaction + cf->enable_auto_compaction(); + // check enabled + BOOST_REQUIRE(!cf->is_auto_compaction_disabled_by_user()); + // trigger background compaction + cf->trigger_compaction(); + // wait until compaction finished + do_until([&ss] { return ss.completed_tasks > 0 && ss.pending_tasks == 0; }, [] { + return sleep(std::chrono::milliseconds(1)); + }).wait(); + // test no more running compactions + BOOST_REQUIRE(ss.active_tasks == 0); + // test compaction successfully finished + BOOST_REQUIRE(ss.errors == 0); + BOOST_REQUIRE(ss.completed_tasks == 1); } SEASTAR_TEST_CASE(autocompaction_control_test) { - return test_env::do_with_async([] (test_env& env) { - auto s = schema_builder(some_keyspace, some_column_family) - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type) - .build(); + return test_env::do_with_async([](test_env& env) { autocompaction_control_fn(env); }); +} - auto cf = env.make_table_for_tests(s); - auto& cm = cf->get_compaction_manager(); - auto close_cf = deferred_stop(cf); - cf->set_compaction_strategy(compaction::compaction_strategy_type::size_tiered); +SEASTAR_TEST_CASE(autocompaction_control_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { autocompaction_control_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} - // no compactions done yet - auto& ss = cm.get_stats(); - BOOST_REQUIRE(cm.get_stats().pending_tasks == 0 && cm.get_stats().active_tasks == 0 && ss.completed_tasks == 0); - // auto compaction is enabled by default - BOOST_REQUIRE(!cf->is_auto_compaction_disabled_by_user()); - // disable auto compaction by user - cf->disable_auto_compaction().get(); - // check it is disabled - BOOST_REQUIRE(cf->is_auto_compaction_disabled_by_user()); - - auto make_insert = [&] (const dht::decorated_key& key) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 1 /* ts */); - return m; - }; - auto min_threshold = cf->schema()->min_compaction_threshold(); - const auto keys = tests::generate_partition_keys(1, s); - for (auto i = 0; i < 2 * min_threshold; ++i) { - auto mut = make_insert(keys[0]); - auto sst = make_sstable_containing(env.make_sstable(s), {mut}); - cf->add_sstable_and_update_cache(sst).wait(); - } - - // check compaction manager does not receive background compaction submissions - cf->start(); - auto stop_cf = deferred_stop(*cf); - cf->trigger_compaction(); - cm.submit(cf.as_compaction_group_view()); - BOOST_REQUIRE(cm.get_stats().pending_tasks == 0 && cm.get_stats().active_tasks == 0 && ss.completed_tasks == 0); - // enable auto compaction - cf->enable_auto_compaction(); - // check enabled - BOOST_REQUIRE(!cf->is_auto_compaction_disabled_by_user()); - // trigger background compaction - cf->trigger_compaction(); - // wait until compaction finished - do_until([&ss] { return ss.completed_tasks > 0 && ss.pending_tasks == 0; }, [] { - return sleep(std::chrono::milliseconds(1)); - }).wait(); - // test no more running compactions - BOOST_REQUIRE(ss.active_tasks == 0); - // test compaction successfully finished - BOOST_REQUIRE(ss.errors == 0); - BOOST_REQUIRE(ss.completed_tasks == 1); - }); +SEASTAR_FIXTURE_TEST_CASE(autocompaction_control_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { autocompaction_control_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); } // // Test that https://github.com/scylladb/scylla/issues/6472 is gone // -SEASTAR_TEST_CASE(test_bug_6472) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "test_bug_6472") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", int32_type, column_kind::clustering_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); - std::map opts = { - { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS" }, - { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1" }, - }; - builder.set_compaction_strategy_options(std::move(opts)); - builder.set_gc_grace_seconds(0); - auto s = builder.build(); - auto sst_gen = env.make_sst_factory(s); +void test_bug_6472_fn(test_env& env) { + auto builder = schema_builder("tests", "test_bug_6472") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", int32_type, column_kind::clustering_key) + .with_column("value", int32_type); + builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); + std::map opts = { + { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS" }, + { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1" }, + }; + builder.set_compaction_strategy_options(std::move(opts)); + builder.set_gc_grace_seconds(0); + auto s = builder.build(); - auto next_timestamp = [] (auto step) { - using namespace std::chrono; - return (gc_clock::now().time_since_epoch() - duration_cast(step)).count(); - }; + auto sst_gen = env.make_sst_factory(s); - auto key = tests::generate_partition_key(s); + auto next_timestamp = [] (auto step) { + using namespace std::chrono; + return (gc_clock::now().time_since_epoch() - duration_cast(step)).count(); + }; - auto make_expiring_cell = [&] (std::chrono::hours step) { - static thread_local int32_t value = 1; + auto key = tests::generate_partition_key(s); - mutation m(s, key); - auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step), gc_clock::duration(step + 5s)); - return m; - }; + auto make_expiring_cell = [&] (std::chrono::hours step) { + static thread_local int32_t value = 1; - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->start(); + mutation m(s, key); + auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step), gc_clock::duration(step + 5s)); + return m; + }; - // Make 100 expiring cells which belong to different time windows - utils::chunked_vector muts; - muts.reserve(101); - for (auto i = 1; i < 101; i++) { - muts.push_back(make_expiring_cell(std::chrono::hours(i))); - } - muts.push_back(make_expiring_cell(std::chrono::hours(110))); + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->start(); - // - // Reproduce issue 6472 by making an input set which causes both interposer and GC writer to be enabled - // - std::vector sstables_spanning_many_windows = { - make_sstable_containing(sst_gen, muts), - make_sstable_containing(sst_gen, muts), - }; - sstables::run_id run_id = sstables::run_id::create_random_id(); - for (auto& sst : sstables_spanning_many_windows) { - sstables::test(sst).set_run_identifier(run_id); - } + // Make 100 expiring cells which belong to different time windows + utils::chunked_vector muts; + muts.reserve(101); + for (auto i = 1; i < 101; i++) { + muts.push_back(make_expiring_cell(std::chrono::hours(i))); + } + muts.push_back(make_expiring_cell(std::chrono::hours(110))); - // Make sure everything we wanted expired is expired by now. - forward_jump_clocks(std::chrono::hours(101)); + // + // Reproduce issue 6472 by making an input set which causes both interposer and GC writer to be enabled + // + std::vector sstables_spanning_many_windows = { + make_sstable_containing(sst_gen, muts), + make_sstable_containing(sst_gen, muts), + }; + sstables::run_id run_id = sstables::run_id::create_random_id(); + for (auto& sst : sstables_spanning_many_windows) { + sstables::test(sst).set_run_identifier(run_id); + } - auto ret = compact_sstables(env, compaction::compaction_descriptor(sstables_spanning_many_windows), cf, sst_gen, replacer_fn_no_op()).get(); - BOOST_REQUIRE(ret.new_sstables.size() == 1); - }); + // Make sure everything we wanted expired is expired by now. + forward_jump_clocks(std::chrono::hours(101)); + + auto ret = compact_sstables(env, compaction::compaction_descriptor(sstables_spanning_many_windows), cf, sst_gen, replacer_fn_no_op()).get(); + BOOST_REQUIRE(ret.new_sstables.size() == 1); } -SEASTAR_TEST_CASE(sstable_needs_cleanup_test) { - return test_env::do_with_async([] (test_env& env) { +SEASTAR_TEST_CASE(test_bug_6472) { + return test_env::do_with_async([](test_env& env) { test_bug_6472_fn(env); }); +} + +SEASTAR_TEST_CASE(test_bug_6472_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { test_bug_6472_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(test_bug_6472_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { test_bug_6472_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void sstable_needs_cleanup_fn(test_env& env) { auto s = schema_builder(some_keyspace, some_column_family).with_column("p1", utf8_type, column_kind::partition_key).build(); const auto keys = tests::generate_partition_keys(10, s); @@ -4300,89 +4899,112 @@ SEASTAR_TEST_CASE(sstable_needs_cleanup_test) { auto sst5 = sst_gen(keys[7], keys[7]); BOOST_REQUIRE(compaction::needs_cleanup(sst5, local_ranges)); } - }); +} + +SEASTAR_TEST_CASE(sstable_needs_cleanup_test) { + return test_env::do_with_async([](test_env& env) { sstable_needs_cleanup_fn(env); }); +} + +SEASTAR_TEST_CASE(sstable_needs_cleanup_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { sstable_needs_cleanup_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(sstable_needs_cleanup_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { sstable_needs_cleanup_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void test_twcs_partition_estimate_fn(test_env& env) { + auto builder = schema_builder("tests", "test_bug_6472") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", int32_type, column_kind::clustering_key) + .with_column("value", int32_type); + builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); + std::map opts = { + { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS" }, + { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1" }, + }; + builder.set_compaction_strategy_options(std::move(opts)); + builder.set_gc_grace_seconds(0); + auto s = builder.build(); + + const auto rows_per_partition = 200; + + auto sst_gen = env.make_sst_factory(s); + + auto next_timestamp = [] (int sstable_idx, int ck_idx) { + using namespace std::chrono; + auto window = hours(sstable_idx * rows_per_partition + ck_idx); + return (gc_clock::now().time_since_epoch() - duration_cast(window)).count(); + }; + + auto keys = tests::generate_partition_keys(4, s); + + auto make_sstable = [&] (int sstable_idx) { + static thread_local int32_t value = 1; + + auto key = keys[sstable_idx]; + + mutation m(s, key); + for (auto ck = 0; ck < rows_per_partition; ++ck) { + auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(sstable_idx, ck)); + } + return make_sstable_containing(sst_gen, {m}); + }; + + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->start(); + + auto ceil_div = [] (int dividend, int divisor) { return (dividend + divisor - 1) / divisor; }; + + auto estimation_test = [ceil_div] (schema_ptr s, uint64_t window_count) { + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, s->compaction_strategy_options()); + mutation_source_metadata ms_metadata{}; + const int partitions = 100; + BOOST_REQUIRE_EQUAL(cs.adjust_partition_estimate(ms_metadata, partitions, s), + ceil_div(partitions, window_count)); + }; + { + static constexpr int window_count = 20; + builder.set_default_time_to_live(std::chrono::duration_cast(std::chrono::hours(window_count))); + auto s = builder.build(); + estimation_test(s, window_count); + } + + { + builder.set_default_time_to_live(0s); + auto s = builder.build(); + estimation_test(s, compaction::time_window_compaction_strategy::max_data_segregation_window_count); + } + + std::vector sstables_spanning_many_windows = { + make_sstable(0), + make_sstable(1), + make_sstable(2), + make_sstable(3), + }; + + auto ret = compact_sstables(env, compaction::compaction_descriptor(sstables_spanning_many_windows), cf, sst_gen, replacer_fn_no_op()).get(); + // The real test here is that we don't SCYLLA_ASSERT() in + // sstables::prepare_summary() with the compact_sstables() call above, + // this is only here as a sanity check. + BOOST_REQUIRE_EQUAL(ret.new_sstables.size(), std::min(sstables_spanning_many_windows.size() * rows_per_partition, + compaction::time_window_compaction_strategy::max_data_segregation_window_count)); } SEASTAR_TEST_CASE(test_twcs_partition_estimate) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "test_bug_6472") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", int32_type, column_kind::clustering_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); - std::map opts = { - { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS" }, - { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1" }, - }; - builder.set_compaction_strategy_options(std::move(opts)); - builder.set_gc_grace_seconds(0); - auto s = builder.build(); + return test_env::do_with_async([](test_env& env) { test_twcs_partition_estimate_fn(env); }); +} - const auto rows_per_partition = 200; +SEASTAR_TEST_CASE(test_twcs_partition_estimate_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { test_twcs_partition_estimate_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto sst_gen = env.make_sst_factory(s); - - auto next_timestamp = [] (int sstable_idx, int ck_idx) { - using namespace std::chrono; - auto window = hours(sstable_idx * rows_per_partition + ck_idx); - return (gc_clock::now().time_since_epoch() - duration_cast(window)).count(); - }; - - auto keys = tests::generate_partition_keys(4, s); - - auto make_sstable = [&] (int sstable_idx) { - static thread_local int32_t value = 1; - - auto key = keys[sstable_idx]; - - mutation m(s, key); - for (auto ck = 0; ck < rows_per_partition; ++ck) { - auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(sstable_idx, ck)); - } - return make_sstable_containing(sst_gen, {m}); - }; - - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->start(); - - auto ceil_div = [] (int dividend, int divisor) { return (dividend + divisor - 1) / divisor; }; - - auto estimation_test = [ceil_div] (schema_ptr s, uint64_t window_count) { - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, s->compaction_strategy_options()); - mutation_source_metadata ms_metadata{}; - const int partitions = 100; - BOOST_REQUIRE_EQUAL(cs.adjust_partition_estimate(ms_metadata, partitions, s), - ceil_div(partitions, window_count)); - }; - { - static constexpr int window_count = 20; - builder.set_default_time_to_live(std::chrono::duration_cast(std::chrono::hours(window_count))); - auto s = builder.build(); - estimation_test(s, window_count); - } - - { - builder.set_default_time_to_live(0s); - auto s = builder.build(); - estimation_test(s, compaction::time_window_compaction_strategy::max_data_segregation_window_count); - } - - std::vector sstables_spanning_many_windows = { - make_sstable(0), - make_sstable(1), - make_sstable(2), - make_sstable(3), - }; - - auto ret = compact_sstables(env, compaction::compaction_descriptor(sstables_spanning_many_windows), cf, sst_gen, replacer_fn_no_op()).get(); - // The real test here is that we don't SCYLLA_ASSERT() in - // sstables::prepare_summary() with the compact_sstables() call above, - // this is only here as a sanity check. - BOOST_REQUIRE_EQUAL(ret.new_sstables.size(), std::min(sstables_spanning_many_windows.size() * rows_per_partition, - compaction::time_window_compaction_strategy::max_data_segregation_window_count)); - }); +SEASTAR_FIXTURE_TEST_CASE(test_twcs_partition_estimate_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { test_twcs_partition_estimate_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); } static compaction::compaction_descriptor get_reshaping_job(compaction::compaction_strategy& cs, const std::vector& input, @@ -4394,72 +5016,92 @@ static compaction::compaction_descriptor get_reshaping_job(compaction::compactio return cs.get_reshaping_job(input, s, cfg); } +void stcs_reshape_fn(test_env& env) { + simple_schema ss; + auto s = ss.schema(); + std::vector sstables; + sstables.reserve(s->max_compaction_threshold()); + const auto keys = tests::generate_partition_keys(s->max_compaction_threshold() + 2, s); + for (auto gen = 1; gen <= s->max_compaction_threshold(); gen++) { + auto sst = env.make_sstable(s); + sstables::test(sst).set_data_file_size(1); + sstables::test(sst).set_values(keys[gen - 1].key(), keys[gen + 1].key(), stats_metadata{}); + sstables.push_back(std::move(sst)); + } + + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, + s->compaction_strategy_options()); + + BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size()); + BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::relaxed).sstables.size()); +} + SEASTAR_TEST_CASE(stcs_reshape_test) { - return test_env::do_with_async([] (test_env& env) { - simple_schema ss; - auto s = ss.schema(); - std::vector sstables; - sstables.reserve(s->max_compaction_threshold()); - const auto keys = tests::generate_partition_keys(s->max_compaction_threshold() + 2, s); - for (auto gen = 1; gen <= s->max_compaction_threshold(); gen++) { + return test_env::do_with_async([](test_env& env) { stcs_reshape_fn(env); }); +} + +SEASTAR_TEST_CASE(stcs_reshape_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { stcs_reshape_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(stcs_reshape_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { stcs_reshape_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void lcs_reshape_fn(test_env& env) { + simple_schema ss; + auto s = ss.schema(); + const auto keys = tests::generate_partition_keys(256, s); + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::leveled, + s->compaction_strategy_options()); + + // non overlapping + { + std::vector sstables; + for (auto i = 0; i < 256; i++) { auto sst = env.make_sstable(s); - sstables::test(sst).set_data_file_size(1); - sstables::test(sst).set_values(keys[gen - 1].key(), keys[gen + 1].key(), stats_metadata{}); + auto key = keys[i].key(); + sstables::test(sst).set_values_for_leveled_strategy(1 /* size */, 0 /* level */, 0 /* max ts */, key, key); sstables.push_back(std::move(sst)); } - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, - s->compaction_strategy_options()); - - BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size()); - BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::relaxed).sstables.size()); - }); -} - -SEASTAR_TEST_CASE(lcs_reshape_test) { - return test_env::do_with_async([] (test_env& env) { - simple_schema ss; - auto s = ss.schema(); - const auto keys = tests::generate_partition_keys(256, s); - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::leveled, - s->compaction_strategy_options()); - - // non overlapping - { - std::vector sstables; - for (auto i = 0; i < 256; i++) { - auto sst = env.make_sstable(s); - auto key = keys[i].key(); - sstables::test(sst).set_values_for_leveled_strategy(1 /* size */, 0 /* level */, 0 /* max ts */, key, key); - sstables.push_back(std::move(sst)); - } - - BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size() == 256); - } - // all overlapping - { - std::vector sstables; - for (auto i = 0; i < 256; i++) { - auto sst = env.make_sstable(s); - auto key = keys[0].key(); - sstables::test(sst).set_values_for_leveled_strategy(1 /* size */, 0 /* level */, 0 /* max ts */, key, key); - sstables.push_back(std::move(sst)); - } - - BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size() == uint64_t(s->max_compaction_threshold())); - } - // single sstable - { + BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size() == 256); + } + // all overlapping + { + std::vector sstables; + for (auto i = 0; i < 256; i++) { auto sst = env.make_sstable(s); auto key = keys[0].key(); sstables::test(sst).set_values_for_leveled_strategy(1 /* size */, 0 /* level */, 0 /* max ts */, key, key); - - BOOST_REQUIRE(get_reshaping_job(cs, { sst }, s, compaction::reshape_mode::strict).sstables.size() == 0); + sstables.push_back(std::move(sst)); } - }); + + BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size() == uint64_t(s->max_compaction_threshold())); + } + // single sstable + { + auto sst = env.make_sstable(s); + auto key = keys[0].key(); + sstables::test(sst).set_values_for_leveled_strategy(1 /* size */, 0 /* level */, 0 /* max ts */, key, key); + + BOOST_REQUIRE(get_reshaping_job(cs, { sst }, s, compaction::reshape_mode::strict).sstables.size() == 0); + } } -future<> test_twcs_interposer_on_memtable_flush(bool split_during_flush) { +SEASTAR_TEST_CASE(lcs_reshape_test) { + return test_env::do_with_async([](test_env& env) { lcs_reshape_fn(env); }); +} + +SEASTAR_TEST_CASE(lcs_reshape_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { lcs_reshape_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(lcs_reshape_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { lcs_reshape_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +future<> test_twcs_interposer_on_memtable_flush(bool split_during_flush, test_env_config cfg = {}) { return test_env::do_with_async([split_during_flush] (test_env& env) { auto builder = schema_builder("tests", "test_twcs_interposer_on_flush") .with_column("id", utf8_type, column_kind::partition_key) @@ -4505,821 +5147,959 @@ future<> test_twcs_interposer_on_memtable_flush(bool split_during_flush) { auto expected_ssts = (split_during_flush) ? target_windows_span : 1; testlog.info("split_during_flush={}, actual={}, expected={}", split_during_flush, cf->get_sstables()->size(), expected_ssts); assert_table_sstable_count(cf, expected_ssts); - }); + }, std::move(cfg)); } SEASTAR_TEST_CASE(test_twcs_interposer_on_memtable_flush_split) { return test_twcs_interposer_on_memtable_flush(true); } +SEASTAR_TEST_CASE(test_twcs_interposer_on_memtable_flush_split_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_twcs_interposer_on_memtable_flush(true, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(test_twcs_interposer_on_memtable_flush_split_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_twcs_interposer_on_memtable_flush(true, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + SEASTAR_TEST_CASE(test_twcs_interposer_on_memtable_flush_no_split) { return test_twcs_interposer_on_memtable_flush(false); } +SEASTAR_TEST_CASE(test_twcs_interposer_on_memtable_flush_no_split_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_twcs_interposer_on_memtable_flush(false, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(test_twcs_interposer_on_memtable_flush_no_split_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_twcs_interposer_on_memtable_flush(false, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void test_twcs_compaction_across_buckets_fn(test_env& env) { + auto builder = schema_builder("tests", "test_twcs_compaction_across_buckets") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", int32_type, column_kind::clustering_key) + .with_column("value", int32_type); + builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); + std::map opts = { + { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS" }, + { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1" }, + }; + builder.set_compaction_strategy_options(std::move(opts)); + auto s = builder.build(); + + auto next_timestamp = [] (std::chrono::hours step = std::chrono::hours(0)) { + return (gc_clock::now().time_since_epoch() - std::chrono::duration_cast(step)).count(); + }; + + auto sst_gen = env.make_sst_factory(s); + auto pkey = tests::generate_partition_key(s); + + auto make_row = [&] (std::chrono::hours step) { + static thread_local int32_t value = 1; + mutation m(s, pkey); + auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step)); + return m; + }; + + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + + constexpr unsigned windows = 10; + + std::vector sstables_spanning_many_windows; + sstables_spanning_many_windows.reserve(windows + 1); + + for (unsigned w = 0; w < windows; w++) { + sstables_spanning_many_windows.push_back(make_sstable_containing(sst_gen, {make_row(std::chrono::hours((w + 1) * 2))})); + } + auto deletion_mut = [&] () { + mutation m(s, pkey); + tombstone tomb(next_timestamp(), gc_clock::now()); + m.partition().apply(tomb); + return m; + }(); + sstables_spanning_many_windows.push_back(make_sstable_containing(sst_gen, {deletion_mut})); + + auto ret = compact_sstables(env, compaction::compaction_descriptor(std::move(sstables_spanning_many_windows)), cf, sst_gen, replacer_fn_no_op(), can_purge_tombstones::no).get(); + + BOOST_REQUIRE(ret.new_sstables.size() == 1); + assert_that(sstable_reader(ret.new_sstables[0], s, env.make_reader_permit())) + .produces(deletion_mut) + .produces_end_of_stream(); +} + SEASTAR_TEST_CASE(test_twcs_compaction_across_buckets) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "test_twcs_compaction_across_buckets") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", int32_type, column_kind::clustering_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); - std::map opts = { - { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS" }, - { compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1" }, - }; - builder.set_compaction_strategy_options(std::move(opts)); - auto s = builder.build(); + return test_env::do_with_async([](test_env& env) { test_twcs_compaction_across_buckets_fn(env); }); +} - auto next_timestamp = [] (std::chrono::hours step = std::chrono::hours(0)) { - return (gc_clock::now().time_since_epoch() - std::chrono::duration_cast(step)).count(); - }; +SEASTAR_TEST_CASE(test_twcs_compaction_across_buckets_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { test_twcs_compaction_across_buckets_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto sst_gen = env.make_sst_factory(s); - auto pkey = tests::generate_partition_key(s); +SEASTAR_FIXTURE_TEST_CASE(test_twcs_compaction_across_buckets_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { test_twcs_compaction_across_buckets_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - auto make_row = [&] (std::chrono::hours step) { - static thread_local int32_t value = 1; - mutation m(s, pkey); - auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step)); - return m; - }; +void test_offstrategy_sstable_compaction_fn(test_env& env) { + auto tmpdirs = std::vector(); + for (const auto version : writable_sstable_versions) { + tmpdirs.push_back(tmpdir()); + auto& tmp = tmpdirs.back(); + simple_schema ss; + auto s = ss.schema(); - auto cf = env.make_table_for_tests(s); + auto pk = tests::generate_partition_key(s); + auto mut = mutation(s, pk); + ss.add_row(mut, ss.make_ckey(0), "val"); + + auto cf = env.make_table_for_tests(s, tmp.path().string()); auto close_cf = deferred_stop(cf); + auto sst_gen = [&] () mutable { + return env.make_sstable(cf->schema(), version); + }; - constexpr unsigned windows = 10; + cf->start(); - std::vector sstables_spanning_many_windows; - sstables_spanning_many_windows.reserve(windows + 1); - - for (unsigned w = 0; w < windows; w++) { - sstables_spanning_many_windows.push_back(make_sstable_containing(sst_gen, {make_row(std::chrono::hours((w + 1) * 2))})); + for (auto i = 0; i < cf->schema()->max_compaction_threshold(); i++) { + auto sst = make_sstable_containing(sst_gen, {mut}); + cf->add_sstable_and_update_cache(std::move(sst), sstables::offstrategy::yes).get(); } - auto deletion_mut = [&] () { - mutation m(s, pkey); - tombstone tomb(next_timestamp(), gc_clock::now()); - m.partition().apply(tomb); - return m; - }(); - sstables_spanning_many_windows.push_back(make_sstable_containing(sst_gen, {deletion_mut})); - - auto ret = compact_sstables(env, compaction::compaction_descriptor(std::move(sstables_spanning_many_windows)), cf, sst_gen, replacer_fn_no_op(), can_purge_tombstones::no).get(); - - BOOST_REQUIRE(ret.new_sstables.size() == 1); - assert_that(sstable_reader(ret.new_sstables[0], s, env.make_reader_permit())) - .produces(deletion_mut) - .produces_end_of_stream(); - }); + BOOST_REQUIRE(cf->perform_offstrategy_compaction(tasks::task_info{}).get()); + } } SEASTAR_TEST_CASE(test_offstrategy_sstable_compaction) { - return test_env::do_with_async([tmpdirs = std::vector()] (test_env& env) mutable { - for (const auto version : writable_sstable_versions) { - tmpdirs.push_back(tmpdir()); - auto& tmp = tmpdirs.back(); - simple_schema ss; - auto s = ss.schema(); + return test_env::do_with_async([](test_env& env) { test_offstrategy_sstable_compaction_fn(env); }); +} - auto pk = tests::generate_partition_key(s); - auto mut = mutation(s, pk); - ss.add_row(mut, ss.make_ckey(0), "val"); +SEASTAR_TEST_CASE(test_offstrategy_sstable_compaction_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { test_offstrategy_sstable_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto cf = env.make_table_for_tests(s, tmp.path().string()); - auto close_cf = deferred_stop(cf); - auto sst_gen = [&] () mutable { - return env.make_sstable(cf->schema(), version); - }; +SEASTAR_FIXTURE_TEST_CASE(test_offstrategy_sstable_compaction_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { test_offstrategy_sstable_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - cf->start(); +void twcs_reshape_with_disjoint_set_fn(test_env& env) { + static constexpr unsigned disjoint_sstable_count = 256; + auto builder = schema_builder("tests", "twcs_reshape_test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", ::timestamp_type, column_kind::clustering_key) + .with_column("value", int32_type); + builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); + std::map opts = { + {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS"}, + {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "8"}, + {"min_sstable_size", "1"}, + }; + builder.set_compaction_strategy_options(std::move(opts)); + size_t min_threshold = tests::random::get_int(4, 8); + builder.set_min_compaction_threshold(min_threshold); + auto s = builder.build(); + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, s->compaction_strategy_options()); - for (auto i = 0; i < cf->schema()->max_compaction_threshold(); i++) { - auto sst = make_sstable_containing(sst_gen, {mut}); - cf->add_sstable_and_update_cache(std::move(sst), sstables::offstrategy::yes).get(); - } - BOOST_REQUIRE(cf->perform_offstrategy_compaction(tasks::task_info{}).get()); + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution<> distrib(1, 3600*24); + + using namespace std::chrono; + + // Make it easier to reproduce timing-based issues by running this test multiple times. + auto offset_duration = duration_cast(minutes(distrib(gen))); + + auto now = gc_clock::now().time_since_epoch() + offset_duration; + // The twcs is configured with 8-hours time window. If the starting time + // is not aligned with that then some buckets may get less than this + // number of sstables in and potentially hit the minimal threshold of + // 4 sstables. Align the starting time not to make this happen. + auto now_in_minutes = duration_cast(now); + constexpr auto window_size_in_minutes = 8 * 60; + forward_jump_clocks(minutes(window_size_in_minutes - now_in_minutes.count() % window_size_in_minutes)); + now = gc_clock::now().time_since_epoch() + offset_duration; + SCYLLA_ASSERT(std::chrono::duration_cast(now).count() % window_size_in_minutes == 0); + + auto next_timestamp = [now](auto step) { + return (now + duration_cast(step)).count(); + }; + + const auto keys = tests::generate_partition_keys(disjoint_sstable_count, s); + + auto make_row = [&](unsigned token_idx, auto step) { + static thread_local int32_t value = 1; + auto key = keys[token_idx]; + + mutation m(s, key); + auto next_ts = next_timestamp(step); + auto c_key = clustering_key::from_exploded(*s, {::timestamp_type->decompose(next_ts)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value++)), next_ts); + return m; + }; + + auto sst_gen = env.make_sst_factory(s); + + { + // create set of 256 disjoint ssts that belong to the same time window and expect that twcs reshape allows them all to be compacted at once + + std::vector sstables; + sstables.reserve(disjoint_sstable_count); + for (unsigned i = 0; i < disjoint_sstable_count; i++) { + auto sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(1))}); + sstables.push_back(std::move(sst)); } - }); + + BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(), disjoint_sstable_count); + } + + { + // create set of 256 disjoint ssts that belong to different windows and expect that twcs reshape allows them all to be compacted at once + + std::vector sstables; + sstables.reserve(disjoint_sstable_count); + for (unsigned i = 0; i < disjoint_sstable_count; i++) { + auto sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(i))}); + sstables.push_back(std::move(sst)); + } + + auto reshaping_count = get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(); + BOOST_REQUIRE_GE(reshaping_count, disjoint_sstable_count - min_threshold + 1); + BOOST_REQUIRE_LE(reshaping_count, disjoint_sstable_count); + } + + { + // create set of 256 disjoint ssts that belong to different windows with none over the threshold and expect that twcs reshape selects none of them + + std::vector sstables; + sstables.reserve(disjoint_sstable_count); + for (unsigned i = 0; i < disjoint_sstable_count; i++) { + auto sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(24*i))}); + sstables.push_back(std::move(sst)); + i++; + sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(24*i + 1))}); + sstables.push_back(std::move(sst)); + } + + BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(), 0); + } + + { + // create set of 256 overlapping ssts that belong to the same time window and expect that twcs reshape allows only 32 to be compacted at once + + std::vector sstables; + sstables.reserve(disjoint_sstable_count); + for (unsigned i = 0; i < disjoint_sstable_count; i++) { + auto sst = make_sstable_containing(sst_gen, {make_row(0, std::chrono::hours(1))}); + sstables.push_back(std::move(sst)); + } + + BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(), uint64_t(s->max_compaction_threshold())); + } + + { + // create set of 64 files which size is either small or big. as STCS reshape logic reused by TWCS favor compaction of smaller files + // first, verify that only 32 small (similar-sized) files are returned + + utils::chunked_vector mutations_for_small_files; + mutations_for_small_files.push_back(make_row(0, std::chrono::hours(1))); + + utils::chunked_vector mutations_for_big_files; + for (unsigned i = 0; i < keys.size(); i++) { + mutations_for_big_files.push_back(make_row(i, std::chrono::hours(1))); + } + + std::unordered_set generations_for_small_files; + + std::vector sstables; + sstables.reserve(64); + + for (unsigned i = 0; i < 64; i++) { + sstables::shared_sstable sst; + // + // intermix big and small files, to make sure STCS logic is really applied to favor similar-sized reshape jobs. + // + if (i % 2 == 0) { + sst = make_sstable_containing(sst_gen, mutations_for_small_files); + generations_for_small_files.insert(sst->generation()); + } else { + sst = make_sstable_containing(sst_gen, mutations_for_big_files); + } + sstables.push_back(std::move(sst)); + } + + auto check_mode_correctness = [&] (compaction::reshape_mode mode) { + auto ret = get_reshaping_job(cs, sstables, s, mode); + BOOST_REQUIRE_EQUAL(ret.sstables.size(), uint64_t(s->max_compaction_threshold())); + // fail if any file doesn't belong to set of small files + bool has_big_sized_files = std::ranges::any_of(ret.sstables, [&] (const sstables::shared_sstable& sst) { + return !generations_for_small_files.contains(sst->generation()); + }); + BOOST_REQUIRE(!has_big_sized_files); + }; + + check_mode_correctness(compaction::reshape_mode::strict); + check_mode_correctness(compaction::reshape_mode::relaxed); + } + + { + // create set of 256 disjoint ssts that spans multiple windows (essentially what happens in off-strategy during node op) + + std::vector sstables; + sstables.reserve(disjoint_sstable_count); + for (auto i = 0U; i < disjoint_sstable_count; i++) { + utils::chunked_vector muts; + muts.reserve(5); + for (auto j = 0; j < 5; j++) { + muts.push_back(make_row(i, std::chrono::hours(j * 8))); + } + auto sst = make_sstable_containing(sst_gen, std::move(muts)); + sstables.push_back(std::move(sst)); + } + + auto job_size = [] (auto&& sst_range) { + return std::ranges::fold_left(sst_range | std::views::transform(std::mem_fn(&sstable::bytes_on_disk)), uint64_t(0), std::plus{}); + }; + auto free_space_for_reshaping_sstables = [&job_size] (auto&& sst_range) { + return job_size(std::move(sst_range)) * (compaction::time_window_compaction_strategy::reshape_target_space_overhead * 100); + }; + + // all sstables can be reshaped in a single round if there's enough space + { + uint64_t free_space = free_space_for_reshaping_sstables(std::ranges::subrange(sstables)); + BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict, free_space).sstables.size() == sstables.size()); + } + + // only a subset can be reshaped in a single round to respect the 10% space overhead + { + const size_t sstables_that_fit_in_target_overhead = 10; + uint64_t free_space = free_space_for_reshaping_sstables(std::ranges::subrange(sstables.begin(), sstables.begin() + sstables_that_fit_in_target_overhead)); + auto target_space_overhead = free_space * compaction::time_window_compaction_strategy::reshape_target_space_overhead; + auto job = get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict, free_space); + BOOST_REQUIRE(job.sstables.size() < sstables.size()); + BOOST_REQUIRE(job_size(std::ranges::subrange(job.sstables)) <= target_space_overhead); + } + } } SEASTAR_TEST_CASE(twcs_reshape_with_disjoint_set_test) { - static constexpr unsigned disjoint_sstable_count = 256; - - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "twcs_reshape_test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", ::timestamp_type, column_kind::clustering_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); - std::map opts = { - {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS"}, - {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "8"}, - {"min_sstable_size", "1"}, - }; - builder.set_compaction_strategy_options(std::move(opts)); - size_t min_threshold = tests::random::get_int(4, 8); - builder.set_min_compaction_threshold(min_threshold); - auto s = builder.build(); - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, s->compaction_strategy_options()); - - std::random_device rd; - std::mt19937 gen(rd()); - std::uniform_int_distribution<> distrib(1, 3600*24); - - using namespace std::chrono; - - // Make it easier to reproduce timing-based issues by running this test multiple times. - auto offset_duration = duration_cast(minutes(distrib(gen))); - - auto now = gc_clock::now().time_since_epoch() + offset_duration; - // The twcs is configured with 8-hours time window. If the starting time - // is not aligned with that then some buckets may get less than this - // number of sstables in and potentially hit the minimal threshold of - // 4 sstables. Align the starting time not to make this happen. - auto now_in_minutes = duration_cast(now); - constexpr auto window_size_in_minutes = 8 * 60; - forward_jump_clocks(minutes(window_size_in_minutes - now_in_minutes.count() % window_size_in_minutes)); - now = gc_clock::now().time_since_epoch() + offset_duration; - SCYLLA_ASSERT(std::chrono::duration_cast(now).count() % window_size_in_minutes == 0); - - auto next_timestamp = [now](auto step) { - return (now + duration_cast(step)).count(); - }; - - const auto keys = tests::generate_partition_keys(disjoint_sstable_count, s); - - auto make_row = [&](unsigned token_idx, auto step) { - static thread_local int32_t value = 1; - auto key = keys[token_idx]; - - mutation m(s, key); - auto next_ts = next_timestamp(step); - auto c_key = clustering_key::from_exploded(*s, {::timestamp_type->decompose(next_ts)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value++)), next_ts); - return m; - }; - - auto sst_gen = env.make_sst_factory(s); - - { - // create set of 256 disjoint ssts that belong to the same time window and expect that twcs reshape allows them all to be compacted at once - - std::vector sstables; - sstables.reserve(disjoint_sstable_count); - for (unsigned i = 0; i < disjoint_sstable_count; i++) { - auto sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(1))}); - sstables.push_back(std::move(sst)); - } - - BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(), disjoint_sstable_count); - } - - { - // create set of 256 disjoint ssts that belong to different windows and expect that twcs reshape allows them all to be compacted at once - - std::vector sstables; - sstables.reserve(disjoint_sstable_count); - for (unsigned i = 0; i < disjoint_sstable_count; i++) { - auto sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(i))}); - sstables.push_back(std::move(sst)); - } - - auto reshaping_count = get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(); - BOOST_REQUIRE_GE(reshaping_count, disjoint_sstable_count - min_threshold + 1); - BOOST_REQUIRE_LE(reshaping_count, disjoint_sstable_count); - } - - { - // create set of 256 disjoint ssts that belong to different windows with none over the threshold and expect that twcs reshape selects none of them - - std::vector sstables; - sstables.reserve(disjoint_sstable_count); - for (unsigned i = 0; i < disjoint_sstable_count; i++) { - auto sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(24*i))}); - sstables.push_back(std::move(sst)); - i++; - sst = make_sstable_containing(sst_gen, {make_row(i, std::chrono::hours(24*i + 1))}); - sstables.push_back(std::move(sst)); - } - - BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(), 0); - } - - { - // create set of 256 overlapping ssts that belong to the same time window and expect that twcs reshape allows only 32 to be compacted at once - - std::vector sstables; - sstables.reserve(disjoint_sstable_count); - for (unsigned i = 0; i < disjoint_sstable_count; i++) { - auto sst = make_sstable_containing(sst_gen, {make_row(0, std::chrono::hours(1))}); - sstables.push_back(std::move(sst)); - } - - BOOST_REQUIRE_EQUAL(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size(), uint64_t(s->max_compaction_threshold())); - } - - { - // create set of 64 files which size is either small or big. as STCS reshape logic reused by TWCS favor compaction of smaller files - // first, verify that only 32 small (similar-sized) files are returned - - utils::chunked_vector mutations_for_small_files; - mutations_for_small_files.push_back(make_row(0, std::chrono::hours(1))); - - utils::chunked_vector mutations_for_big_files; - for (unsigned i = 0; i < keys.size(); i++) { - mutations_for_big_files.push_back(make_row(i, std::chrono::hours(1))); - } - - std::unordered_set generations_for_small_files; - - std::vector sstables; - sstables.reserve(64); - - for (unsigned i = 0; i < 64; i++) { - sstables::shared_sstable sst; - // - // intermix big and small files, to make sure STCS logic is really applied to favor similar-sized reshape jobs. - // - if (i % 2 == 0) { - sst = make_sstable_containing(sst_gen, mutations_for_small_files); - generations_for_small_files.insert(sst->generation()); - } else { - sst = make_sstable_containing(sst_gen, mutations_for_big_files); - } - sstables.push_back(std::move(sst)); - } - - auto check_mode_correctness = [&] (compaction::reshape_mode mode) { - auto ret = get_reshaping_job(cs, sstables, s, mode); - BOOST_REQUIRE_EQUAL(ret.sstables.size(), uint64_t(s->max_compaction_threshold())); - // fail if any file doesn't belong to set of small files - bool has_big_sized_files = std::ranges::any_of(ret.sstables, [&] (const sstables::shared_sstable& sst) { - return !generations_for_small_files.contains(sst->generation()); - }); - BOOST_REQUIRE(!has_big_sized_files); - }; - - check_mode_correctness(compaction::reshape_mode::strict); - check_mode_correctness(compaction::reshape_mode::relaxed); - } - - { - // create set of 256 disjoint ssts that spans multiple windows (essentially what happens in off-strategy during node op) - - std::vector sstables; - sstables.reserve(disjoint_sstable_count); - for (auto i = 0U; i < disjoint_sstable_count; i++) { - utils::chunked_vector muts; - muts.reserve(5); - for (auto j = 0; j < 5; j++) { - muts.push_back(make_row(i, std::chrono::hours(j * 8))); - } - auto sst = make_sstable_containing(sst_gen, std::move(muts)); - sstables.push_back(std::move(sst)); - } - - auto job_size = [] (auto&& sst_range) { - return std::ranges::fold_left(sst_range | std::views::transform(std::mem_fn(&sstable::bytes_on_disk)), uint64_t(0), std::plus{}); - }; - auto free_space_for_reshaping_sstables = [&job_size] (auto&& sst_range) { - return job_size(std::move(sst_range)) * (compaction::time_window_compaction_strategy::reshape_target_space_overhead * 100); - }; - - // all sstables can be reshaped in a single round if there's enough space - { - uint64_t free_space = free_space_for_reshaping_sstables(std::ranges::subrange(sstables)); - BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict, free_space).sstables.size() == sstables.size()); - } - - // only a subset can be reshaped in a single round to respect the 10% space overhead - { - const size_t sstables_that_fit_in_target_overhead = 10; - uint64_t free_space = free_space_for_reshaping_sstables(std::ranges::subrange(sstables.begin(), sstables.begin() + sstables_that_fit_in_target_overhead)); - auto target_space_overhead = free_space * compaction::time_window_compaction_strategy::reshape_target_space_overhead; - auto job = get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict, free_space); - BOOST_REQUIRE(job.sstables.size() < sstables.size()); - BOOST_REQUIRE(job_size(std::ranges::subrange(job.sstables)) <= target_space_overhead); - } - } - }); + return test_env::do_with_async([](test_env& env) { twcs_reshape_with_disjoint_set_fn(env); }); } +SEASTAR_TEST_CASE(twcs_reshape_with_disjoint_set_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + // TODO: Deeper investigation needed to figure out why it takes 4+ minutes to run on S3 storage, while it runs in seconds on local storage. For now, + // skipping the test for S3. + testlog.info("cleanup_during_offstrategy_incremental_compaction_test_s3 is not supported for S3 storage yet, skipping test"); + return make_ready_future(); +#if 0 + return test_env::do_with_async([](test_env& env) { twcs_reshape_with_disjoint_set_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +#endif +} + +SEASTAR_FIXTURE_TEST_CASE(twcs_reshape_with_disjoint_set_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { twcs_reshape_with_disjoint_set_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void stcs_reshape_overlapping_fn(test_env& env) { + static constexpr unsigned disjoint_sstable_count = 256; + auto builder = schema_builder("tests", "stcs_reshape_test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", ::timestamp_type, column_kind::clustering_key) + .with_column("value", int32_type); + builder.set_compaction_strategy(compaction::compaction_strategy_type::size_tiered); + auto s = builder.build(); + std::map opts; + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, std::move(opts)); + + auto keys = tests::generate_partition_keys(disjoint_sstable_count, s); + + auto make_row = [&](unsigned token_idx) { + auto key = keys[token_idx]; + + mutation m(s, key); + auto value = 1; + auto next_ts = 1; + auto c_key = clustering_key::from_exploded(*s, {::timestamp_type->decompose(next_ts)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_ts); + return m; + }; + + auto sst_gen = env.make_sst_factory(s); + + { + // create set of 256 disjoint ssts and expect that stcs reshape allows them all to be compacted at once + + std::vector sstables; + sstables.reserve(disjoint_sstable_count); + for (unsigned i = 0; i < disjoint_sstable_count; i++) { + auto sst = make_sstable_containing(sst_gen, {make_row(i)}); + sstables.push_back(std::move(sst)); + } + + BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size() == disjoint_sstable_count); + } + + { + // create set of 256 overlapping ssts and expect that stcs reshape allows only 32 to be compacted at once + + std::vector sstables; + sstables.reserve(disjoint_sstable_count); + for (unsigned i = 0; i < disjoint_sstable_count; i++) { + auto sst = make_sstable_containing(sst_gen, {make_row(0)}); + sstables.push_back(std::move(sst)); + } + + BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size() == uint64_t(s->max_compaction_threshold())); + } +} SEASTAR_TEST_CASE(stcs_reshape_overlapping_test) { - static constexpr unsigned disjoint_sstable_count = 256; + return test_env::do_with_async([](test_env& env) { stcs_reshape_overlapping_fn(env); }); +} - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "stcs_reshape_test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", ::timestamp_type, column_kind::clustering_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction::compaction_strategy_type::size_tiered); - auto s = builder.build(); - std::map opts; - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::size_tiered, std::move(opts)); +SEASTAR_TEST_CASE(stcs_reshape_overlapping_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { stcs_reshape_overlapping_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto keys = tests::generate_partition_keys(disjoint_sstable_count, s); - - auto make_row = [&](unsigned token_idx) { - auto key = keys[token_idx]; - - mutation m(s, key); - auto value = 1; - auto next_ts = 1; - auto c_key = clustering_key::from_exploded(*s, {::timestamp_type->decompose(next_ts)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_ts); - return m; - }; - - auto sst_gen = env.make_sst_factory(s); - - { - // create set of 256 disjoint ssts and expect that stcs reshape allows them all to be compacted at once - - std::vector sstables; - sstables.reserve(disjoint_sstable_count); - for (unsigned i = 0; i < disjoint_sstable_count; i++) { - auto sst = make_sstable_containing(sst_gen, {make_row(i)}); - sstables.push_back(std::move(sst)); - } - - BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size() == disjoint_sstable_count); - } - - { - // create set of 256 overlapping ssts and expect that stcs reshape allows only 32 to be compacted at once - - std::vector sstables; - sstables.reserve(disjoint_sstable_count); - for (unsigned i = 0; i < disjoint_sstable_count; i++) { - auto sst = make_sstable_containing(sst_gen, {make_row(0)}); - sstables.push_back(std::move(sst)); - } - - BOOST_REQUIRE(get_reshaping_job(cs, sstables, s, compaction::reshape_mode::strict).sstables.size() == uint64_t(s->max_compaction_threshold())); - } - }); +SEASTAR_FIXTURE_TEST_CASE(stcs_reshape_overlapping_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { stcs_reshape_overlapping_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); } // Regression test for #8432 + +void test_twcs_single_key_reader_filtering_fn(test_env& env) { + auto builder = schema_builder("tests", "twcs_single_key_reader_filtering") + .with_column("pk", int32_type, column_kind::partition_key) + .with_column("ck", int32_type, column_kind::clustering_key) + .with_column("v", int32_type); + builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); + auto s = builder.build(); + + auto sst_gen = env.make_sst_factory(s); + + auto make_row = [&] (int32_t pk, int32_t ck) { + mutation m(s, partition_key::from_single_value(*s, int32_type->decompose(pk))); + m.set_clustered_cell(clustering_key::from_single_value(*s, int32_type->decompose(ck)), to_bytes("v"), int32_t(0), api::new_timestamp()); + return m; + }; + + auto sst1 = make_sstable_containing(sst_gen, {make_row(0, 0)}); + auto sst2 = make_sstable_containing(sst_gen, {make_row(0, 1)}); + auto dkey = sst1->get_first_decorated_key(); + + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->start(); + + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, {}); + + auto set = cs.make_sstable_set(cf.as_compaction_group_view()); + set.insert(std::move(sst1)); + set.insert(std::move(sst2)); + + reader_permit permit = env.make_reader_permit(); + utils::estimated_histogram eh; + auto pr = dht::partition_range::make_singular(dkey); + + auto slice = partition_slice_builder(*s) + .with_range(query::clustering_range { + query::clustering_range::bound { clustering_key_prefix::from_single_value(*s, int32_type->decompose(0)) }, + query::clustering_range::bound { clustering_key_prefix::from_single_value(*s, int32_type->decompose(1)) }, + }).build(); + + auto reader = set.create_single_key_sstable_reader( + &*cf, s, permit, eh, pr, slice, + tracing::trace_state_ptr(), ::streamed_mutation::forwarding::no, + ::mutation_reader::forwarding::no); + auto close_reader = deferred_close(reader); + + auto& cf_stats = cf.cf_stats(); + auto checked_by_ck = cf_stats.sstables_checked_by_clustering_filter; + auto surviving_after_ck = cf_stats.surviving_sstables_after_clustering_filter; + + // consume all fragments + while (reader().get()); + + // At least sst2 should be checked by the CK filter during fragment consumption and should pass. + // With the bug in #8432, sst2 wouldn't even be checked by the CK filter since it would pass right after checking the PK filter. + BOOST_REQUIRE_GE(cf_stats.sstables_checked_by_clustering_filter - checked_by_ck, 1); + BOOST_REQUIRE_EQUAL( + cf_stats.surviving_sstables_after_clustering_filter - surviving_after_ck, + cf_stats.sstables_checked_by_clustering_filter - checked_by_ck); +} + SEASTAR_TEST_CASE(test_twcs_single_key_reader_filtering) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "twcs_single_key_reader_filtering") - .with_column("pk", int32_type, column_kind::partition_key) - .with_column("ck", int32_type, column_kind::clustering_key) - .with_column("v", int32_type); + return test_env::do_with_async([](test_env& env) { test_twcs_single_key_reader_filtering_fn(env); }); +} + +SEASTAR_TEST_CASE(test_twcs_single_key_reader_filtering_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { test_twcs_single_key_reader_filtering_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(test_twcs_single_key_reader_filtering_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { test_twcs_single_key_reader_filtering_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void max_ongoing_compaction_fn(test_env& env) { + BOOST_REQUIRE(smp::count == 1); + + auto make_schema = [] (auto idx) { + auto builder = schema_builder("tests", std::to_string(idx)) + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", int32_type, column_kind::clustering_key) + .with_column("value", int32_type); builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); - auto s = builder.build(); - - auto sst_gen = env.make_sst_factory(s); - - auto make_row = [&] (int32_t pk, int32_t ck) { - mutation m(s, partition_key::from_single_value(*s, int32_type->decompose(pk))); - m.set_clustered_cell(clustering_key::from_single_value(*s, int32_type->decompose(ck)), to_bytes("v"), int32_t(0), api::new_timestamp()); - return m; + std::map opts = { + {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS"}, + {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1"}, + {compaction::time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY, "0"}, }; + builder.set_compaction_strategy_options(std::move(opts)); + builder.set_gc_grace_seconds(0); + return builder.build(); + }; - auto sst1 = make_sstable_containing(sst_gen, {make_row(0, 0)}); - auto sst2 = make_sstable_containing(sst_gen, {make_row(0, 1)}); - auto dkey = sst1->get_first_decorated_key(); + // makes sure all data belonging to a table falls into the same time bucket. + auto now = gc_clock::now(); + auto next_timestamp = [&now] (auto step) { + using namespace std::chrono; + return (now.time_since_epoch() - duration_cast(step)).count(); + }; + auto make_expiring_cell = [&] (schema_ptr s, std::chrono::hours step) { + static thread_local int32_t value = 1; - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->start(); + auto key = tests::generate_partition_key(s); - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, {}); + mutation m(s, key); + auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step), gc_clock::duration(step + 5s)); + return m; + }; - auto set = cs.make_sstable_set(cf.as_compaction_group_view()); - set.insert(std::move(sst1)); - set.insert(std::move(sst2)); - - reader_permit permit = env.make_reader_permit(); - utils::estimated_histogram eh; - auto pr = dht::partition_range::make_singular(dkey); - - auto slice = partition_slice_builder(*s) - .with_range(query::clustering_range { - query::clustering_range::bound { clustering_key_prefix::from_single_value(*s, int32_type->decompose(0)) }, - query::clustering_range::bound { clustering_key_prefix::from_single_value(*s, int32_type->decompose(1)) }, - }).build(); - - auto reader = set.create_single_key_sstable_reader( - &*cf, s, permit, eh, pr, slice, - tracing::trace_state_ptr(), ::streamed_mutation::forwarding::no, - ::mutation_reader::forwarding::no); - auto close_reader = deferred_close(reader); - - auto& cf_stats = cf.cf_stats(); - auto checked_by_ck = cf_stats.sstables_checked_by_clustering_filter; - auto surviving_after_ck = cf_stats.surviving_sstables_after_clustering_filter; - - // consume all fragments - while (reader().get()); - - // At least sst2 should be checked by the CK filter during fragment consumption and should pass. - // With the bug in #8432, sst2 wouldn't even be checked by the CK filter since it would pass right after checking the PK filter. - BOOST_REQUIRE_GE(cf_stats.sstables_checked_by_clustering_filter - checked_by_ck, 1); - BOOST_REQUIRE_EQUAL( - cf_stats.surviving_sstables_after_clustering_filter - surviving_after_ck, - cf_stats.sstables_checked_by_clustering_filter - checked_by_ck); + constexpr size_t num_tables = 10; + std::vector schemas; + std::vector tables; + auto stop_tables = defer([&tables] { + for (auto& t : tables) { + t->stop().get(); + } }); + + // Make tables + for (unsigned idx = 0; idx < num_tables; idx++) { + auto s = make_schema(idx); + schemas.push_back(s); + auto cf = env.make_table_for_tests(s); + tables.push_back(cf); + } + + auto sst_gen = [&] (size_t idx) mutable { + auto t = tables[idx]; + return t->make_sstable(); + }; + + auto add_single_fully_expired_sstable_to_table = [&] (auto idx) { + auto s = schemas[idx]; + auto cf = tables[idx]; + auto muts = { make_expiring_cell(s, std::chrono::hours(1)) }; + auto sst = make_sstable_containing([&sst_gen, idx] { return sst_gen(idx); }, muts); + column_family_test(cf).add_sstable(sst).get(); + }; + + for (unsigned i = 0; i < num_tables; i++) { + add_single_fully_expired_sstable_to_table(i); + } + + // Make sure everything is expired + forward_jump_clocks(std::chrono::hours(100)); + now = gc_clock::now(); + + auto compact_all_tables = [&] (size_t expected_before, size_t expected_after) { + for (auto& t : tables) { + BOOST_REQUIRE_EQUAL(t->sstables_count(), expected_before); + t->trigger_compaction(); + } + + size_t max_ongoing_compaction = 0; + auto& cm = env.test_compaction_manager().get_compaction_manager(); + + // wait for submitted jobs to finish. + auto end = [&cm, &tables, expected_after] { + return cm.get_stats().pending_tasks == 0 && cm.get_stats().active_tasks == 0 + && std::ranges::all_of(tables, [expected_after] (auto& t) { return t->sstables_count() == expected_after; }); + }; + while (!end()) { + if (!cm.get_stats().pending_tasks && !cm.get_stats().active_tasks) { + for (auto& t : tables) { + if (t->sstables_count()) { + t->trigger_compaction(); + } + } + } + max_ongoing_compaction = std::max(cm.get_stats().active_tasks, max_ongoing_compaction); + yield().get(); + } + BOOST_REQUIRE(cm.get_stats().errors == 0); + return max_ongoing_compaction; + }; + + // Allow fully expired sstables to be compacted in parallel, as they have the same weight 0 (== weightless). + BOOST_REQUIRE_LE(compact_all_tables(1, 0), num_tables); + + auto add_sstables_to_table = [&] (auto idx, size_t num_sstables) { + auto s = schemas[idx]; + auto cf = tables[idx]; + auto cft = column_family_test(cf); + for (size_t i = 0; i < num_sstables; i++) { + auto muts = { make_expiring_cell(s, std::chrono::hours(1)) }; + cft.add_sstable(make_sstable_containing([&sst_gen, idx] { return sst_gen(idx); }, muts)).get(); + } + }; + + for (size_t i = 0; i < num_tables; i++) { + add_sstables_to_table(i, DEFAULT_MIN_COMPACTION_THRESHOLD); + } + + // All buckets are expected to have the same weight (>0) + // and therefore their compaction is expected to be serialized + BOOST_REQUIRE_EQUAL(compact_all_tables(DEFAULT_MIN_COMPACTION_THRESHOLD, 1), 1); } SEASTAR_TEST_CASE(max_ongoing_compaction_test) { - return test_env::do_with_async([] (test_env& env) { - BOOST_REQUIRE(smp::count == 1); + return test_env::do_with_async([](test_env& env) { max_ongoing_compaction_fn(env); }); +} - auto make_schema = [] (auto idx) { - auto builder = schema_builder("tests", std::to_string(idx)) - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", int32_type, column_kind::clustering_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); - std::map opts = { - {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS"}, - {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1"}, - {compaction::time_window_compaction_strategy_options::EXPIRED_SSTABLE_CHECK_FREQUENCY_SECONDS_KEY, "0"}, - }; - builder.set_compaction_strategy_options(std::move(opts)); - builder.set_gc_grace_seconds(0); - return builder.build(); - }; +SEASTAR_TEST_CASE(max_ongoing_compaction_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { max_ongoing_compaction_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} - // makes sure all data belonging to a table falls into the same time bucket. - auto now = gc_clock::now(); - auto next_timestamp = [&now] (auto step) { - using namespace std::chrono; - return (now.time_since_epoch() - duration_cast(step)).count(); - }; - auto make_expiring_cell = [&] (schema_ptr s, std::chrono::hours step) { - static thread_local int32_t value = 1; +SEASTAR_FIXTURE_TEST_CASE(max_ongoing_compaction_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { max_ongoing_compaction_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +} - auto key = tests::generate_partition_key(s); +void compound_sstable_set_incremental_selector_fn(test_env& env) { + auto s = schema_builder(some_keyspace, some_column_family).with_column("p1", utf8_type, column_kind::partition_key).build(); + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::leveled, s->compaction_strategy_options()); + const auto keys = tests::generate_partition_keys(8, s); - mutation m(s, key); - auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step), gc_clock::duration(step + 5s)); - return m; - }; + auto new_sstable = [&] (lw_shared_ptr set, size_t k0, size_t k1, uint32_t level) { + auto key0 = keys[k0]; + auto tok0 = key0.token(); + auto key1 = keys[k1]; + auto tok1 = key1.token(); + testlog.debug("creating sstable with k[{}] token={} k[{}] token={} level={}", k0, tok0, k1, tok1, level); + auto sst = sstable_for_overlapping_test(env, s, key0.key(), key1.key(), level); + set->insert(sst); + return sst; + }; - constexpr size_t num_tables = 10; - std::vector schemas; - std::vector tables; - auto stop_tables = defer([&tables] { - for (auto& t : tables) { - t->stop().get(); - } - }); - - // Make tables - for (unsigned idx = 0; idx < num_tables; idx++) { - auto s = make_schema(idx); - schemas.push_back(s); - auto cf = env.make_table_for_tests(s); - tables.push_back(cf); + auto check = [&] (sstable_set::incremental_selector& selector, size_t k, std::unordered_set expected_ssts) { + const dht::decorated_key& key = keys[k]; + auto sstables = selector.select(key).sstables; + testlog.debug("checking sstables for key[{}] token={} found={} expected={}", k, keys[k].token(), sstables.size(), expected_ssts.size()); + BOOST_REQUIRE_EQUAL(sstables.size(), expected_ssts.size()); + for (auto& sst : sstables) { + BOOST_REQUIRE(expected_ssts.contains(sst)); + expected_ssts.erase(sst); } + BOOST_REQUIRE(expected_ssts.empty()); + }; - auto sst_gen = [&] (size_t idx) mutable { - auto t = tables[idx]; - return t->make_sstable(); + { + auto set1 = make_lw_shared(env.make_sstable_set(cs, s)); + auto set2 = make_lw_shared(env.make_sstable_set(cs, s)); + std::vector ssts; + ssts.push_back(new_sstable(set1, 0, 1, 1)); + ssts.push_back(new_sstable(set2, 0, 1, 1)); + ssts.push_back(new_sstable(set1, 3, 4, 1)); + ssts.push_back(new_sstable(set2, 4, 4, 1)); + ssts.push_back(new_sstable(set1, 4, 5, 1)); + + sstable_set compound = sstables::make_compound_sstable_set(s, { set1, set2 }); + sstable_set::incremental_selector sel = compound.make_incremental_selector(); + check(sel, 0, std::unordered_set{ssts[0], ssts[1]}); + check(sel, 1, std::unordered_set{ssts[0], ssts[1]}); + check(sel, 2, std::unordered_set{}); + check(sel, 3, std::unordered_set{ssts[2]}); + check(sel, 4, std::unordered_set{ssts[2], ssts[3], ssts[4]}); + check(sel, 5, std::unordered_set{ssts[4]}); + check(sel, 6, std::unordered_set{}); + check(sel, 7, std::unordered_set{}); + } + + { + auto set1 = make_lw_shared(env.make_sstable_set(cs, s)); + auto set2 = make_lw_shared(env.make_sstable_set(cs, s)); + std::vector ssts; + ssts.push_back(new_sstable(set1, 0, 7, 0)); // simulates L0 sstable spanning most of the range. + ssts.push_back(new_sstable(set2, 0, 1, 1)); + ssts.push_back(new_sstable(set1, 0, 1, 1)); + ssts.push_back(new_sstable(set2, 3, 4, 1)); + ssts.push_back(new_sstable(set1, 4, 4, 1)); + ssts.push_back(new_sstable(set2, 4, 5, 1)); + + sstable_set compound = sstables::make_compound_sstable_set(s, { set1, set2 }); + sstable_set::incremental_selector sel = compound.make_incremental_selector(); + check(sel, 0, std::unordered_set{ssts[0], ssts[1], ssts[2]}); + check(sel, 1, std::unordered_set{ssts[0], ssts[1], ssts[2]}); + check(sel, 2, std::unordered_set{ssts[0]}); + check(sel, 3, std::unordered_set{ssts[0], ssts[3]}); + check(sel, 4, std::unordered_set{ssts[0], ssts[3], ssts[4], ssts[5]}); + check(sel, 5, std::unordered_set{ssts[0], ssts[5]}); + check(sel, 6, std::unordered_set{ssts[0]}); + check(sel, 7, std::unordered_set{ssts[0]}); + } + + { + // reproduces use-after-free failure in incremental reader selector with compound set where the next position + // returned by a set can be used after freed as selector position in another set, producing incorrect results. + + enum class strategy_param : bool { + ICS = false, + LCS = true, }; - auto add_single_fully_expired_sstable_to_table = [&] (auto idx) { - auto s = schemas[idx]; - auto cf = tables[idx]; - auto muts = { make_expiring_cell(s, std::chrono::hours(1)) }; - auto sst = make_sstable_containing([&sst_gen, idx] { return sst_gen(idx); }, muts); - column_family_test(cf).add_sstable(sst).get(); + auto incremental_selection_test = [&] (strategy_param param) { + auto token_range = dht::token_range::make(dht::first_token(), dht::last_token()); + auto set1 = make_lw_shared(sstables::make_partitioned_sstable_set(s, token_range)); + auto set2 = make_lw_shared(sstables::make_partitioned_sstable_set(s, token_range)); + new_sstable(set1, 1, 1, 1); + new_sstable(set2, 0, 2, 1); + new_sstable(set2, 3, 3, 1); + new_sstable(set2, 4, 4, 1); + + sstable_set compound = sstables::make_compound_sstable_set(s, { set1, set2 }); + sstable_set::incremental_selector sel = compound.make_incremental_selector(); + + dht::ring_position_view pos = dht::ring_position_view::min(); + std::unordered_set sstables; + do { + auto ret = sel.select(pos); + pos = ret.next_position; + sstables.insert(ret.sstables.begin(), ret.sstables.end()); + } while (!pos.is_max()); + + BOOST_REQUIRE(sstables.size() == 4); }; - for (unsigned i = 0; i < num_tables; i++) { - add_single_fully_expired_sstable_to_table(i); - } - - // Make sure everything is expired - forward_jump_clocks(std::chrono::hours(100)); - now = gc_clock::now(); - - auto compact_all_tables = [&] (size_t expected_before, size_t expected_after) { - for (auto& t : tables) { - BOOST_REQUIRE_EQUAL(t->sstables_count(), expected_before); - t->trigger_compaction(); - } - - size_t max_ongoing_compaction = 0; - auto& cm = env.test_compaction_manager().get_compaction_manager(); - - // wait for submitted jobs to finish. - auto end = [&cm, &tables, expected_after] { - return cm.get_stats().pending_tasks == 0 && cm.get_stats().active_tasks == 0 - && std::ranges::all_of(tables, [expected_after] (auto& t) { return t->sstables_count() == expected_after; }); - }; - while (!end()) { - if (!cm.get_stats().pending_tasks && !cm.get_stats().active_tasks) { - for (auto& t : tables) { - if (t->sstables_count()) { - t->trigger_compaction(); - } - } - } - max_ongoing_compaction = std::max(cm.get_stats().active_tasks, max_ongoing_compaction); - yield().get(); - } - BOOST_REQUIRE(cm.get_stats().errors == 0); - return max_ongoing_compaction; - }; - - // Allow fully expired sstables to be compacted in parallel, as they have the same weight 0 (== weightless). - BOOST_REQUIRE_LE(compact_all_tables(1, 0), num_tables); - - auto add_sstables_to_table = [&] (auto idx, size_t num_sstables) { - auto s = schemas[idx]; - auto cf = tables[idx]; - auto cft = column_family_test(cf); - for (size_t i = 0; i < num_sstables; i++) { - auto muts = { make_expiring_cell(s, std::chrono::hours(1)) }; - cft.add_sstable(make_sstable_containing([&sst_gen, idx] { return sst_gen(idx); }, muts)).get(); - } - }; - - for (size_t i = 0; i < num_tables; i++) { - add_sstables_to_table(i, DEFAULT_MIN_COMPACTION_THRESHOLD); - } - - // All buckets are expected to have the same weight (>0) - // and therefore their compaction is expected to be serialized - BOOST_REQUIRE_EQUAL(compact_all_tables(DEFAULT_MIN_COMPACTION_THRESHOLD, 1), 1); - }); + incremental_selection_test(strategy_param::ICS); + incremental_selection_test(strategy_param::LCS); + } } SEASTAR_TEST_CASE(compound_sstable_set_incremental_selector_test) { - return test_env::do_with_async([] (test_env& env) { - auto s = schema_builder(some_keyspace, some_column_family).with_column("p1", utf8_type, column_kind::partition_key).build(); - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::leveled, s->compaction_strategy_options()); - const auto keys = tests::generate_partition_keys(8, s); + return test_env::do_with_async([](test_env& env) { compound_sstable_set_incremental_selector_fn(env); }); +} - auto new_sstable = [&] (lw_shared_ptr set, size_t k0, size_t k1, uint32_t level) { - auto key0 = keys[k0]; - auto tok0 = key0.token(); - auto key1 = keys[k1]; - auto tok1 = key1.token(); - testlog.debug("creating sstable with k[{}] token={} k[{}] token={} level={}", k0, tok0, k1, tok1, level); - auto sst = sstable_for_overlapping_test(env, s, key0.key(), key1.key(), level); - set->insert(sst); - return sst; - }; +SEASTAR_TEST_CASE(compound_sstable_set_incremental_selector_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { compound_sstable_set_incremental_selector_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto check = [&] (sstable_set::incremental_selector& selector, size_t k, std::unordered_set expected_ssts) { - const dht::decorated_key& key = keys[k]; - auto sstables = selector.select(key).sstables; - testlog.debug("checking sstables for key[{}] token={} found={} expected={}", k, keys[k].token(), sstables.size(), expected_ssts.size()); - BOOST_REQUIRE_EQUAL(sstables.size(), expected_ssts.size()); - for (auto& sst : sstables) { - BOOST_REQUIRE(expected_ssts.contains(sst)); - expected_ssts.erase(sst); - } - BOOST_REQUIRE(expected_ssts.empty()); - }; +SEASTAR_FIXTURE_TEST_CASE(compound_sstable_set_incremental_selector_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { compound_sstable_set_incremental_selector_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - { - auto set1 = make_lw_shared(env.make_sstable_set(cs, s)); - auto set2 = make_lw_shared(env.make_sstable_set(cs, s)); - std::vector ssts; - ssts.push_back(new_sstable(set1, 0, 1, 1)); - ssts.push_back(new_sstable(set2, 0, 1, 1)); - ssts.push_back(new_sstable(set1, 3, 4, 1)); - ssts.push_back(new_sstable(set2, 4, 4, 1)); - ssts.push_back(new_sstable(set1, 4, 5, 1)); +void twcs_single_key_reader_through_compound_set_fn(test_env& env) { + auto builder = schema_builder("tests", "single_key_reader_through_compound_set_test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", ::timestamp_type, column_kind::clustering_key) + .with_column("value", int32_type); + builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); + std::map opts = { + {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS"}, + {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1"}, + }; + builder.set_compaction_strategy_options(std::move(opts)); + auto s = builder.build(); + auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, std::move(opts)); - sstable_set compound = sstables::make_compound_sstable_set(s, { set1, set2 }); - sstable_set::incremental_selector sel = compound.make_incremental_selector(); - check(sel, 0, std::unordered_set{ssts[0], ssts[1]}); - check(sel, 1, std::unordered_set{ssts[0], ssts[1]}); - check(sel, 2, std::unordered_set{}); - check(sel, 3, std::unordered_set{ssts[2]}); - check(sel, 4, std::unordered_set{ssts[2], ssts[3], ssts[4]}); - check(sel, 5, std::unordered_set{ssts[4]}); - check(sel, 6, std::unordered_set{}); - check(sel, 7, std::unordered_set{}); - } + auto next_timestamp = [](auto step) { + using namespace std::chrono; + return (gc_clock::now().time_since_epoch() + duration_cast(step)).count(); + }; + auto key = tests::generate_partition_key(s); - { - auto set1 = make_lw_shared(env.make_sstable_set(cs, s)); - auto set2 = make_lw_shared(env.make_sstable_set(cs, s)); - std::vector ssts; - ssts.push_back(new_sstable(set1, 0, 7, 0)); // simulates L0 sstable spanning most of the range. - ssts.push_back(new_sstable(set2, 0, 1, 1)); - ssts.push_back(new_sstable(set1, 0, 1, 1)); - ssts.push_back(new_sstable(set2, 3, 4, 1)); - ssts.push_back(new_sstable(set1, 4, 4, 1)); - ssts.push_back(new_sstable(set2, 4, 5, 1)); + auto make_row = [&](std::chrono::hours step) { + static thread_local int32_t value = 1; - sstable_set compound = sstables::make_compound_sstable_set(s, { set1, set2 }); - sstable_set::incremental_selector sel = compound.make_incremental_selector(); - check(sel, 0, std::unordered_set{ssts[0], ssts[1], ssts[2]}); - check(sel, 1, std::unordered_set{ssts[0], ssts[1], ssts[2]}); - check(sel, 2, std::unordered_set{ssts[0]}); - check(sel, 3, std::unordered_set{ssts[0], ssts[3]}); - check(sel, 4, std::unordered_set{ssts[0], ssts[3], ssts[4], ssts[5]}); - check(sel, 5, std::unordered_set{ssts[0], ssts[5]}); - check(sel, 6, std::unordered_set{ssts[0]}); - check(sel, 7, std::unordered_set{ssts[0]}); - } + mutation m(s, key); + auto next_ts = next_timestamp(step); + auto c_key = clustering_key::from_exploded(*s, {::timestamp_type->decompose(next_ts)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value++)), next_ts); + return m; + }; - { - // reproduces use-after-free failure in incremental reader selector with compound set where the next position - // returned by a set can be used after freed as selector position in another set, producing incorrect results. + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + cf->start(); - enum class strategy_param : bool { - ICS = false, - LCS = true, - }; + auto set1 = make_lw_shared(cs.make_sstable_set(cf.as_compaction_group_view())); + auto set2 = make_lw_shared(cs.make_sstable_set(cf.as_compaction_group_view())); - auto incremental_selection_test = [&] (strategy_param param) { - auto token_range = dht::token_range::make(dht::first_token(), dht::last_token()); - auto set1 = make_lw_shared(sstables::make_partitioned_sstable_set(s, token_range)); - auto set2 = make_lw_shared(sstables::make_partitioned_sstable_set(s, token_range)); - new_sstable(set1, 1, 1, 1); - new_sstable(set2, 0, 2, 1); - new_sstable(set2, 3, 3, 1); - new_sstable(set2, 4, 4, 1); + auto sst_gen = env.make_sst_factory(s); - sstable_set compound = sstables::make_compound_sstable_set(s, { set1, set2 }); - sstable_set::incremental_selector sel = compound.make_incremental_selector(); + // sstables with same key but belonging to different windows + auto sst1 = make_sstable_containing(sst_gen, {make_row(std::chrono::hours(1))}); + auto sst2 = make_sstable_containing(sst_gen, {make_row(std::chrono::hours(5))}); + BOOST_REQUIRE(sst1->get_first_decorated_key().token() == sst2->get_last_decorated_key().token()); + auto dkey = sst1->get_first_decorated_key(); - dht::ring_position_view pos = dht::ring_position_view::min(); - std::unordered_set sstables; - do { - auto ret = sel.select(pos); - pos = ret.next_position; - sstables.insert(ret.sstables.begin(), ret.sstables.end()); - } while (!pos.is_max()); + set1->insert(std::move(sst1)); + set2->insert(std::move(sst2)); + sstable_set compound = sstables::make_compound_sstable_set(s, {set1, set2}); - BOOST_REQUIRE(sstables.size() == 4); - }; + reader_permit permit = env.make_reader_permit(); + utils::estimated_histogram eh; + auto pr = dht::partition_range::make_singular(dkey); - incremental_selection_test(strategy_param::ICS); - incremental_selection_test(strategy_param::LCS); - } - }); + auto reader = compound.create_single_key_sstable_reader(&*cf, s, permit, eh, pr, s->full_slice(), + tracing::trace_state_ptr(), ::streamed_mutation::forwarding::no, + ::mutation_reader::forwarding::no); + auto close_reader = deferred_close(reader); + auto mfopt = read_mutation_from_mutation_reader(reader).get(); + BOOST_REQUIRE(mfopt); + mfopt = read_mutation_from_mutation_reader(reader).get(); + BOOST_REQUIRE(!mfopt); + BOOST_REQUIRE(cf.cf_stats().clustering_filter_count > 0); } SEASTAR_TEST_CASE(twcs_single_key_reader_through_compound_set_test) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "single_key_reader_through_compound_set_test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", ::timestamp_type, column_kind::clustering_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); - std::map opts = { - {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS"}, - {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1"}, - }; - builder.set_compaction_strategy_options(std::move(opts)); - auto s = builder.build(); - auto cs = compaction::make_compaction_strategy(compaction::compaction_strategy_type::time_window, std::move(opts)); + return test_env::do_with_async([](test_env& env) { twcs_single_key_reader_through_compound_set_fn(env); }); +} - auto next_timestamp = [](auto step) { - using namespace std::chrono; - return (gc_clock::now().time_since_epoch() + duration_cast(step)).count(); - }; - auto key = tests::generate_partition_key(s); +SEASTAR_TEST_CASE(twcs_single_key_reader_through_compound_set_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { twcs_single_key_reader_through_compound_set_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto make_row = [&](std::chrono::hours step) { - static thread_local int32_t value = 1; +SEASTAR_FIXTURE_TEST_CASE(twcs_single_key_reader_through_compound_set_test_gcs, + gcs_fixture, + *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { twcs_single_key_reader_through_compound_set_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - mutation m(s, key); - auto next_ts = next_timestamp(step); - auto c_key = clustering_key::from_exploded(*s, {::timestamp_type->decompose(next_ts)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value++)), next_ts); - return m; - }; +void basic_ics_controller_correctness_fn(test_env& env) { + static constexpr uint64_t default_fragment_size = 1UL*1024UL*1024UL*1024UL; - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - cf->start(); + auto backlog = [&env] (compaction::compaction_backlog_tracker backlog_tracker, uint64_t max_fragment_size) { + auto schema = table_for_tests::make_default_schema(); + table_for_tests cf = env.make_table_for_tests(schema); + auto stop_cf = defer([&] { cf.stop().get(); }); - auto set1 = make_lw_shared(cs.make_sstable_set(cf.as_compaction_group_view())); - auto set2 = make_lw_shared(cs.make_sstable_set(cf.as_compaction_group_view())); + uint64_t current_sstable_size = default_fragment_size; + uint64_t data_set_size = 0; + static constexpr uint64_t target_data_set_size = 1000UL*1024UL*1024UL*1024UL; - auto sst_gen = env.make_sst_factory(s); + while (data_set_size < target_data_set_size) { + auto run_identifier = sstables::run_id::create_random_id(); - // sstables with same key but belonging to different windows - auto sst1 = make_sstable_containing(sst_gen, {make_row(std::chrono::hours(1))}); - auto sst2 = make_sstable_containing(sst_gen, {make_row(std::chrono::hours(5))}); - BOOST_REQUIRE(sst1->get_first_decorated_key().token() == sst2->get_last_decorated_key().token()); - auto dkey = sst1->get_first_decorated_key(); + auto expected_fragments = std::max(1UL, current_sstable_size / max_fragment_size); + uint64_t fragment_size = std::max(default_fragment_size, current_sstable_size / expected_fragments); + auto tokens = tests::generate_partition_keys(expected_fragments, schema, local_shard_only::yes); - set1->insert(std::move(sst1)); - set2->insert(std::move(sst2)); - sstable_set compound = sstables::make_compound_sstable_set(s, {set1, set2}); + for (auto i = 0UL; i < expected_fragments; i++) { + auto sst = sstable_for_overlapping_test(env, cf->schema(), tokens[i].key(), tokens[i].key()); + sstables::test(sst).set_data_file_size(fragment_size); + sstables::test(sst).set_run_identifier(run_identifier); + backlog_tracker.replace_sstables({}, {std::move(sst)}); + } + data_set_size += current_sstable_size; + current_sstable_size *= 2; + } - reader_permit permit = env.make_reader_permit(); - utils::estimated_histogram eh; - auto pr = dht::partition_range::make_singular(dkey); + return backlog_tracker.backlog(); + }; - auto reader = compound.create_single_key_sstable_reader(&*cf, s, permit, eh, pr, s->full_slice(), - tracing::trace_state_ptr(), ::streamed_mutation::forwarding::no, - ::mutation_reader::forwarding::no); - auto close_reader = deferred_close(reader); - auto mfopt = read_mutation_from_mutation_reader(reader).get(); - BOOST_REQUIRE(mfopt); - mfopt = read_mutation_from_mutation_reader(reader).get(); - BOOST_REQUIRE(!mfopt); - BOOST_REQUIRE(cf.cf_stats().clustering_filter_count > 0); - }); + compaction::incremental_compaction_strategy_options ics_options; + auto ics_backlog = backlog(compaction::compaction_backlog_tracker(std::make_unique(ics_options)), default_fragment_size); + compaction::size_tiered_compaction_strategy_options stcs_options; + auto stcs_backlog = backlog(compaction::compaction_backlog_tracker(std::make_unique(stcs_options)), std::numeric_limits::max()); + + // don't expect ics and stcs to yield different backlogs for the same workload. + BOOST_CHECK_CLOSE(ics_backlog, stcs_backlog, 0.0001); } SEASTAR_TEST_CASE(basic_ics_controller_correctness_test) { - return test_env::do_with_async([] (test_env& env) { - static constexpr uint64_t default_fragment_size = 1UL*1024UL*1024UL*1024UL; + return test_env::do_with_async([](test_env& env) { basic_ics_controller_correctness_fn(env); }); +} - auto s = simple_schema().schema(); +SEASTAR_TEST_CASE(basic_ics_controller_correctness_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { basic_ics_controller_correctness_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto backlog = [&] (compaction::compaction_backlog_tracker backlog_tracker, uint64_t max_fragment_size) { - table_for_tests cf = env.make_table_for_tests(); - auto stop_cf = defer([&] { cf.stop().get(); }); +SEASTAR_FIXTURE_TEST_CASE(basic_ics_controller_correctness_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { basic_ics_controller_correctness_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - uint64_t current_sstable_size = default_fragment_size; - uint64_t data_set_size = 0; - static constexpr uint64_t target_data_set_size = 1000UL*1024UL*1024UL*1024UL; +void test_major_does_not_miss_data_in_memtable_fn(test_env& env) { + auto builder = schema_builder("tests", "test_major_does_not_miss_data_in_memtable") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", int32_type, column_kind::clustering_key) + .with_column("value", int32_type); + auto s = builder.build(); - while (data_set_size < target_data_set_size) { - auto run_identifier = sstables::run_id::create_random_id(); + auto pkey = tests::generate_partition_key(s); - auto expected_fragments = std::max(1UL, current_sstable_size / max_fragment_size); - uint64_t fragment_size = std::max(default_fragment_size, current_sstable_size / expected_fragments); - auto tokens = tests::generate_partition_keys(expected_fragments, s, local_shard_only::yes); + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + auto sst_gen = [&] () mutable { + return env.make_sstable(cf->schema()); + }; - for (auto i = 0UL; i < expected_fragments; i++) { - auto sst = sstable_for_overlapping_test(env, cf->schema(), tokens[i].key(), tokens[i].key()); - sstables::test(sst).set_data_file_size(fragment_size); - sstables::test(sst).set_run_identifier(run_identifier); - backlog_tracker.replace_sstables({}, {std::move(sst)}); - } - data_set_size += current_sstable_size; - current_sstable_size *= 2; - } + auto row_mut = [&] () { + static thread_local int32_t value = 1; + mutation m(s, pkey); + auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), gc_clock::now().time_since_epoch().count()); + return m; + }(); + auto sst = make_sstable_containing(sst_gen, {std::move(row_mut)}); + cf->add_sstable_and_update_cache(sst).get(); + assert_table_sstable_count(cf, 1); - return backlog_tracker.backlog(); - }; + auto deletion_mut = [&] () { + mutation m(s, pkey); + tombstone tomb(gc_clock::now().time_since_epoch().count(), gc_clock::now()); + m.partition().apply(tomb); + return m; + }(); + cf->apply(deletion_mut); - compaction::incremental_compaction_strategy_options ics_options; - auto ics_backlog = backlog(compaction::compaction_backlog_tracker(std::make_unique(ics_options)), default_fragment_size); - compaction::size_tiered_compaction_strategy_options stcs_options; - auto stcs_backlog = backlog(compaction::compaction_backlog_tracker(std::make_unique(stcs_options)), std::numeric_limits::max()); - - // don't expect ics and stcs to yield different backlogs for the same workload. - BOOST_CHECK_CLOSE(ics_backlog, stcs_backlog, 0.0001); - }); + cf->compact_all_sstables(tasks::task_info{}).get(); + assert_table_sstable_count(cf, 1); + auto new_sst = *(cf->get_sstables()->begin()); + BOOST_REQUIRE(new_sst->generation() != sst->generation()); + assert_that(sstable_reader(new_sst, s, env.make_reader_permit())) + .produces(deletion_mut) + .produces_end_of_stream(); } SEASTAR_TEST_CASE(test_major_does_not_miss_data_in_memtable) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "test_major_does_not_miss_data_in_memtable") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", int32_type, column_kind::clustering_key) - .with_column("value", int32_type); - auto s = builder.build(); - - auto pkey = tests::generate_partition_key(s); - - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - auto sst_gen = [&] () mutable { - return env.make_sstable(cf->schema()); - }; - - auto row_mut = [&] () { - static thread_local int32_t value = 1; - mutation m(s, pkey); - auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), gc_clock::now().time_since_epoch().count()); - return m; - }(); - auto sst = make_sstable_containing(sst_gen, {std::move(row_mut)}); - cf->add_sstable_and_update_cache(sst).get(); - assert_table_sstable_count(cf, 1); - - auto deletion_mut = [&] () { - mutation m(s, pkey); - tombstone tomb(gc_clock::now().time_since_epoch().count(), gc_clock::now()); - m.partition().apply(tomb); - return m; - }(); - cf->apply(deletion_mut); - - cf->compact_all_sstables(tasks::task_info{}).get(); - assert_table_sstable_count(cf, 1); - auto new_sst = *(cf->get_sstables()->begin()); - BOOST_REQUIRE(new_sst->generation() != sst->generation()); - assert_that(sstable_reader(new_sst, s, env.make_reader_permit())) - .produces(deletion_mut) - .produces_end_of_stream(); - }); + return test_env::do_with_async([](test_env& env) { test_major_does_not_miss_data_in_memtable_fn(env); }); } -future<> run_controller_test(compaction::compaction_strategy_type compaction_strategy_type) { +SEASTAR_TEST_CASE(test_major_does_not_miss_data_in_memtable_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { test_major_does_not_miss_data_in_memtable_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(test_major_does_not_miss_data_in_memtable_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { test_major_does_not_miss_data_in_memtable_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +future<> run_controller_test(compaction::compaction_strategy_type compaction_strategy_type, test_env_config config = {}) { return test_env::do_with_async([compaction_strategy_type] (test_env& env) { ///////////// // settings @@ -5436,7 +6216,7 @@ future<> run_controller_test(compaction::compaction_strategy_type compaction_str auto max_expected = compaction_strategy_type == compaction::compaction_strategy_type::leveled ? 0.4f : 0.0f; BOOST_REQUIRE(r.normalized_backlog <= max_expected); } - }); + }, std::move(config)); } SEASTAR_TEST_CASE(simple_backlog_controller_test_size_tiered) { @@ -5455,443 +6235,562 @@ SEASTAR_TEST_CASE(simple_backlog_controller_test_incremental) { return run_controller_test(compaction::compaction_strategy_type::incremental); } -SEASTAR_TEST_CASE(test_compaction_strategy_cleanup_method) { - return test_env::do_with_async([] (test_env& env) { - constexpr size_t all_files = 64; - - auto get_cleanup_jobs = [&env] (compaction::compaction_strategy_type compaction_strategy_type, - std::map strategy_options = {}, - const api::timestamp_clock::duration step_base = 0ms, - unsigned sstable_level = 0) { - auto builder = schema_builder("tests", "test_compaction_strategy_cleanup_method") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", int32_type, column_kind::clustering_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction_strategy_type); - builder.set_compaction_strategy_options(std::move(strategy_options)); - auto s = builder.build(); - - auto _ = env.tempdir().make_sweeper(); - auto keys = tests::generate_partition_keys(all_files, s); - - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - auto sst_gen = env.make_sst_factory(s); - - using namespace std::chrono; - auto now = gc_clock::now().time_since_epoch() + duration_cast(seconds(tests::random::get_int(0, 3600*24))); - auto next_timestamp = [&now] (microseconds step) mutable -> api::timestamp_type { - return (now + step).count(); - }; - auto make_mutation = [&] (unsigned pkey_idx, api::timestamp_type ts) { - mutation m(s, keys[pkey_idx]); - auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(1)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(1)), ts); - return m; - }; - - std::vector candidates; - candidates.reserve(all_files); - for (size_t i = 0; i < all_files; i++) { - auto current_step = duration_cast(step_base) * i; - auto sst = make_sstable_containing(sst_gen, {make_mutation(i, next_timestamp(current_step))}); - sst->set_sstable_level(sstable_level); - candidates.push_back(std::move(sst)); - } - - auto strategy = cf->get_compaction_strategy(); - auto jobs = strategy.get_cleanup_compaction_jobs(cf.as_compaction_group_view(), candidates); - return std::make_pair(std::move(candidates), std::move(jobs)); - }; - - auto run_cleanup_strategy_test = [&] (compaction::compaction_strategy_type compaction_strategy_type, size_t per_job_files, auto&&... args) { - testlog.info("Running cleanup test for strategy type {}", compaction::compaction_strategy::name(compaction_strategy_type)); - size_t target_job_count = all_files / per_job_files; - auto [candidates, descriptors] = get_cleanup_jobs(compaction_strategy_type, std::forward(args)...); - testlog.info("get_cleanup_jobs() returned {} descriptors; expected={}", descriptors.size(), target_job_count); - BOOST_REQUIRE(descriptors.size() == target_job_count); - auto generations = candidates | std::views::transform(std::mem_fn(&sstables::sstable::generation)) | std::ranges::to>(); - auto check_desc = [&] (const auto& desc) { - BOOST_REQUIRE(desc.sstables.size() == per_job_files); - for (auto& sst: desc.sstables) { - BOOST_REQUIRE(generations.erase(sst->generation())); - } - }; - for (auto& desc : descriptors) { - check_desc(desc); - } - }; - - // STCS: Check that 2 jobs are returned for a size tier containing 2x more files than max threshold. - run_cleanup_strategy_test(compaction::compaction_strategy_type::size_tiered, 32); - - // Default implementation: check that it will return one job for each file - run_cleanup_strategy_test(compaction::compaction_strategy_type::null, 1); - - // TWCS: Check that it will return one job for each time window - std::map twcs_opts = { - {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS"}, - {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1"}, - }; - run_cleanup_strategy_test(compaction::compaction_strategy_type::time_window, 1, std::move(twcs_opts), 1h); - - const std::map empty_opts; - // LCS: Check that 2 jobs are returned for all similar-sized files in level 0. - run_cleanup_strategy_test(compaction::compaction_strategy_type::leveled, 32, empty_opts, 0ms, 0); - // LCS: Check that 1 jobs is returned for all non-overlapping files in level 1, as incremental compaction can be employed - // to limit memory usage and space requirement. - run_cleanup_strategy_test(compaction::compaction_strategy_type::leveled, 64, empty_opts, 0ms, 1); - - // ICS: Check that 2 jobs are returned for a size tier containing 2x more files (single-fragment runs) than max threshold. - run_cleanup_strategy_test(compaction::compaction_strategy_type::incremental, 32); - }); +SEASTAR_TEST_CASE(simple_backlog_controller_test_size_tiered_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return run_controller_test(compaction::compaction_strategy_type::size_tiered, test_env_config{.storage = make_test_object_storage_options("S3")}); } -SEASTAR_TEST_CASE(test_large_partition_splitting_on_compaction) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "test_large_partition_splitting_on_compaction") +SEASTAR_TEST_CASE(simple_backlog_controller_test_time_window_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return run_controller_test(compaction::compaction_strategy_type::time_window, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_TEST_CASE(simple_backlog_controller_test_leveled_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return run_controller_test(compaction::compaction_strategy_type::leveled, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_TEST_CASE(simple_backlog_controller_test_incremental_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return run_controller_test(compaction::compaction_strategy_type::incremental, test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(simple_backlog_controller_test_size_tiered_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return run_controller_test(compaction::compaction_strategy_type::size_tiered, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +SEASTAR_FIXTURE_TEST_CASE(simple_backlog_controller_test_time_window_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return run_controller_test(compaction::compaction_strategy_type::time_window, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +SEASTAR_FIXTURE_TEST_CASE(simple_backlog_controller_test_leveled_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return run_controller_test(compaction::compaction_strategy_type::leveled, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +SEASTAR_FIXTURE_TEST_CASE(simple_backlog_controller_test_incremental_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return run_controller_test(compaction::compaction_strategy_type::incremental, test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void test_compaction_strategy_cleanup_method_fn(test_env& env) { + constexpr size_t all_files = 64; + + auto get_cleanup_jobs = [&env] (compaction::compaction_strategy_type compaction_strategy_type, + std::map strategy_options = {}, + const api::timestamp_clock::duration step_base = 0ms, + unsigned sstable_level = 0) { + auto builder = schema_builder("tests", "test_compaction_strategy_cleanup_method") .with_column("id", utf8_type, column_kind::partition_key) .with_column("cl", int32_type, column_kind::clustering_key) .with_column("value", int32_type); - builder.set_compressor_params(compression_parameters::no_compression()); - builder.set_gc_grace_seconds(0); // Don't purge any tombstone + builder.set_compaction_strategy(compaction_strategy_type); + builder.set_compaction_strategy_options(std::move(strategy_options)); auto s = builder.build(); - using namespace std::chrono; - auto next_timestamp = [] (std::chrono::seconds step = 0s) { - return (gc_clock::now().time_since_epoch() + duration_cast(step)).count(); - }; - auto sst_gen = env.make_sst_factory(s); - auto pkey = tests::generate_partition_key(s); + auto _ = env.tempdir().make_sweeper(); + auto keys = tests::generate_partition_keys(all_files, s); + auto cf = env.make_table_for_tests(s); auto close_cf = deferred_stop(cf); + auto sst_gen = env.make_sst_factory(s); - auto get_next_ckey = [&] { - static thread_local int32_t row_value = 1; - return clustering_key::from_exploded(*s, {int32_type->decompose(row_value++)}); + using namespace std::chrono; + auto now = gc_clock::now().time_since_epoch() + duration_cast(seconds(tests::random::get_int(0, 3600*24))); + auto next_timestamp = [&now] (microseconds step) mutable -> api::timestamp_type { + return (now + step).count(); }; - - auto make_row = [&] () { - mutation m(s, pkey); - auto c_key = get_next_ckey(); - // Use a step to make sure that rows aren't covered by tombstone. - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(0)), next_timestamp(seconds(3600))); + auto make_mutation = [&] (unsigned pkey_idx, api::timestamp_type ts) { + mutation m(s, keys[pkey_idx]); + auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(1)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(1)), ts); return m; }; - auto make_open_ended_range_tombstone = [&] () { - mutation m(s, pkey); - tombstone tomb(api::new_timestamp(), gc_clock::now()); - auto start_key = get_next_ckey(); - auto start_bound = bound_view(start_key, bound_kind::incl_start); - auto end_bound = bound_view::top(); - range_tombstone rt(start_bound, - end_bound, - tomb); - m.partition().apply_delete(*s, std::move(rt)); - return m; - }; - - auto deletion_mut = [&] () { - mutation m(s, pkey); - tombstone tomb(next_timestamp(), gc_clock::now()); - m.partition().apply(tomb); - return m; - }(); - - utils::chunked_vector mutations; - static constexpr size_t rows = 20; - mutations.reserve(1 + rows); - mutations.push_back(std::move(deletion_mut)); - - for (size_t i = 0; i < rows; i++) { - mutations.push_back(make_row()); - mutations.push_back(make_open_ended_range_tombstone()); + std::vector candidates; + candidates.reserve(all_files); + for (size_t i = 0; i < all_files; i++) { + auto current_step = duration_cast(step_base) * i; + auto sst = make_sstable_containing(sst_gen, {make_mutation(i, next_timestamp(current_step))}); + sst->set_sstable_level(sstable_level); + candidates.push_back(std::move(sst)); } - auto sst = make_sstable_containing(sst_gen, std::move(mutations)); + auto strategy = cf->get_compaction_strategy(); + auto jobs = strategy.get_cleanup_compaction_jobs(cf.as_compaction_group_view(), candidates); + return std::make_pair(std::move(candidates), std::move(jobs)); + }; - auto desc = compaction::compaction_descriptor({ sst }); - // With max_sstable_bytes of 1, we'll perform the splitting of the partition as soon as possible. - desc.max_sstable_bytes = 1; - desc.can_split_large_partition = true; - // Set block size to 1, so promoted index is generated for every row written, allowing the split to happen as soon as possible. - env.manager().set_promoted_index_block_size(1); - - auto ret = compact_sstables(env, std::move(desc), cf, sst_gen, replacer_fn_no_op(), can_purge_tombstones::no).get(); - - testlog.info("Large partition splitting on compaction created {} sstables", ret.new_sstables.size()); - BOOST_REQUIRE(ret.new_sstables.size() > 1); - - sstable_run sst_run; - - std::optional last_rt; - std::optional last_pos; - position_in_partition::tri_compare pos_tri_cmp(*s); - - for (auto& sst : ret.new_sstables) { - sst = env.reusable_sst(sst).get(); - BOOST_REQUIRE(sst->may_have_partition_tombstones()); - - auto reader = sstable_reader(sst, s, env.make_reader_permit()); - - mutation_opt m = read_mutation_from_mutation_reader(reader).get(); - BOOST_REQUIRE(m); - BOOST_REQUIRE(m->decorated_key().equal(*s, pkey)); - // ASSERT that partition tobmstone is replicated to every fragment. - BOOST_REQUIRE(m->partition().partition_tombstone()); - auto rows = m->partition().clustered_rows(); - BOOST_REQUIRE(rows.calculate_size() >= 1); - auto& row = rows.begin()->row(); - auto& cells = row.cells(); - BOOST_REQUIRE_EQUAL(cells.size(), 1); - auto& cdef = *s->get_column_definition("value"); - BOOST_REQUIRE(cells.cell_at(cdef.id).as_atomic_cell(cdef).is_live()); - - testlog.info("SSTable of generation {} has position range [{}, {}]", sst->generation(), sst->first_partition_first_position(), sst->last_partition_last_position()); - - // Check that if we split partition with active range tombstone, check we will issue properly - // the end bound in fragment A and re-emit it as start bound in fragment B. - // Fragment A will contain range [r1, r2] - // And fragment B will contain range (r2, ...] - // assuming the split happened when last position was r2. - auto& current_first_rt = *m->partition().row_tombstones().begin(); - if (auto previous_last_rt = std::exchange(last_rt, *m->partition().row_tombstones().rbegin())) { - testlog.info("\tprevious last rt's end bound: {}", previous_last_rt->end_bound()); - testlog.info("\tcurrent first rt's start bound: {}", current_first_rt.start_bound()); - BOOST_REQUIRE(previous_last_rt->end_bound().prefix() == current_first_rt.start_bound().prefix()); - BOOST_REQUIRE(previous_last_rt->end_bound().kind() == bound_kind::incl_end); - BOOST_REQUIRE(current_first_rt.start_bound().kind() == bound_kind::excl_start); - } - const auto& current_first_pos = sst->first_partition_first_position(); - if (auto previous_last_pos = std::exchange(last_pos, sst->last_partition_last_position())) { - testlog.info("\tprevious last pos: {}", previous_last_pos); - testlog.info("\tcurrent first pos: {}", current_first_pos); - BOOST_REQUIRE(pos_tri_cmp(*previous_last_pos, current_first_pos) == 0); + auto run_cleanup_strategy_test = [&] (compaction::compaction_strategy_type compaction_strategy_type, size_t per_job_files, auto&&... args) { + testlog.info("Running cleanup test for strategy type {}", compaction::compaction_strategy::name(compaction_strategy_type)); + size_t target_job_count = all_files / per_job_files; + auto [candidates, descriptors] = get_cleanup_jobs(compaction_strategy_type, std::forward(args)...); + testlog.info("get_cleanup_jobs() returned {} descriptors; expected={}", descriptors.size(), target_job_count); + BOOST_REQUIRE(descriptors.size() == target_job_count); + auto generations = candidates | std::views::transform(std::mem_fn(&sstables::sstable::generation)) | std::ranges::to>(); + auto check_desc = [&] (const auto& desc) { + BOOST_REQUIRE(desc.sstables.size() == per_job_files); + for (auto& sst: desc.sstables) { + BOOST_REQUIRE(generations.erase(sst->generation())); } + }; + for (auto& desc : descriptors) { + check_desc(desc); + } + }; - BOOST_REQUIRE(!(reader)().get()); + // STCS: Check that 2 jobs are returned for a size tier containing 2x more files than max threshold. + run_cleanup_strategy_test(compaction::compaction_strategy_type::size_tiered, 32); - reader.close().get(); + // Default implementation: check that it will return one job for each file + run_cleanup_strategy_test(compaction::compaction_strategy_type::null, 1); - // CHECK that all fragments generated by compaction are disjoint. - BOOST_REQUIRE(sst_run.insert(sst) == true); + // TWCS: Check that it will return one job for each time window + std::map twcs_opts = { + {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_UNIT_KEY, "HOURS"}, + {compaction::time_window_compaction_strategy_options::COMPACTION_WINDOW_SIZE_KEY, "1"}, + }; + run_cleanup_strategy_test(compaction::compaction_strategy_type::time_window, 1, std::move(twcs_opts), 1h); + + const std::map empty_opts; + // LCS: Check that 2 jobs are returned for all similar-sized files in level 0. + run_cleanup_strategy_test(compaction::compaction_strategy_type::leveled, 32, empty_opts, 0ms, 0); + // LCS: Check that 1 jobs is returned for all non-overlapping files in level 1, as incremental compaction can be employed + // to limit memory usage and space requirement. + run_cleanup_strategy_test(compaction::compaction_strategy_type::leveled, 64, empty_opts, 0ms, 1); + + // ICS: Check that 2 jobs are returned for a size tier containing 2x more files (single-fragment runs) than max threshold. + run_cleanup_strategy_test(compaction::compaction_strategy_type::incremental, 32); +} + +SEASTAR_TEST_CASE(test_compaction_strategy_cleanup_method) { + return test_env::do_with_async([](test_env& env) { test_compaction_strategy_cleanup_method_fn(env); }); +} + +SEASTAR_TEST_CASE(test_compaction_strategy_cleanup_method_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { test_compaction_strategy_cleanup_method_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(test_compaction_strategy_cleanup_method_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { test_compaction_strategy_cleanup_method_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void test_large_partition_splitting_on_compaction_fn(test_env& env) { + auto builder = schema_builder("tests", "test_large_partition_splitting_on_compaction") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", int32_type, column_kind::clustering_key) + .with_column("value", int32_type); + builder.set_compressor_params(compression_parameters::no_compression()); + builder.set_gc_grace_seconds(0); // Don't purge any tombstone + auto s = builder.build(); + + using namespace std::chrono; + auto next_timestamp = [] (std::chrono::seconds step = 0s) { + return (gc_clock::now().time_since_epoch() + duration_cast(step)).count(); + }; + auto sst_gen = env.make_sst_factory(s); + auto pkey = tests::generate_partition_key(s); + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + + auto get_next_ckey = [&] { + static thread_local int32_t row_value = 1; + return clustering_key::from_exploded(*s, {int32_type->decompose(row_value++)}); + }; + + auto make_row = [&] () { + mutation m(s, pkey); + auto c_key = get_next_ckey(); + // Use a step to make sure that rows aren't covered by tombstone. + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(0)), next_timestamp(seconds(3600))); + return m; + }; + + auto make_open_ended_range_tombstone = [&] () { + mutation m(s, pkey); + tombstone tomb(api::new_timestamp(), gc_clock::now()); + auto start_key = get_next_ckey(); + auto start_bound = bound_view(start_key, bound_kind::incl_start); + auto end_bound = bound_view::top(); + range_tombstone rt(start_bound, + end_bound, + tomb); + m.partition().apply_delete(*s, std::move(rt)); + return m; + }; + + auto deletion_mut = [&] () { + mutation m(s, pkey); + tombstone tomb(next_timestamp(), gc_clock::now()); + m.partition().apply(tomb); + return m; + }(); + + utils::chunked_vector mutations; + static constexpr size_t rows = 20; + mutations.reserve(1 + rows); + mutations.push_back(std::move(deletion_mut)); + + for (size_t i = 0; i < rows; i++) { + mutations.push_back(make_row()); + mutations.push_back(make_open_ended_range_tombstone()); + } + + auto sst = make_sstable_containing(sst_gen, std::move(mutations)); + + auto desc = compaction::compaction_descriptor({ sst }); + // With max_sstable_bytes of 1, we'll perform the splitting of the partition as soon as possible. + desc.max_sstable_bytes = 1; + desc.can_split_large_partition = true; + // Set block size to 1, so promoted index is generated for every row written, allowing the split to happen as soon as possible. + env.manager().set_promoted_index_block_size(1); + + auto ret = compact_sstables(env, std::move(desc), cf, sst_gen, replacer_fn_no_op(), can_purge_tombstones::no).get(); + + testlog.info("Large partition splitting on compaction created {} sstables", ret.new_sstables.size()); + BOOST_REQUIRE(ret.new_sstables.size() > 1); + + sstable_run sst_run; + + std::optional last_rt; + std::optional last_pos; + position_in_partition::tri_compare pos_tri_cmp(*s); + + for (auto& sst : ret.new_sstables) { + sst = env.reusable_sst(sst).get(); + BOOST_REQUIRE(sst->may_have_partition_tombstones()); + + auto reader = sstable_reader(sst, s, env.make_reader_permit()); + + mutation_opt m = read_mutation_from_mutation_reader(reader).get(); + BOOST_REQUIRE(m); + BOOST_REQUIRE(m->decorated_key().equal(*s, pkey)); + // ASSERT that partition tobmstone is replicated to every fragment. + BOOST_REQUIRE(m->partition().partition_tombstone()); + auto rows = m->partition().clustered_rows(); + BOOST_REQUIRE(rows.calculate_size() >= 1); + auto& row = rows.begin()->row(); + auto& cells = row.cells(); + BOOST_REQUIRE_EQUAL(cells.size(), 1); + auto& cdef = *s->get_column_definition("value"); + BOOST_REQUIRE(cells.cell_at(cdef.id).as_atomic_cell(cdef).is_live()); + + testlog.info("SSTable of generation {} has position range [{}, {}]", sst->generation(), sst->first_partition_first_position(), sst->last_partition_last_position()); + + // Check that if we split partition with active range tombstone, check we will issue properly + // the end bound in fragment A and re-emit it as start bound in fragment B. + // Fragment A will contain range [r1, r2] + // And fragment B will contain range (r2, ...] + // assuming the split happened when last position was r2. + auto& current_first_rt = *m->partition().row_tombstones().begin(); + if (auto previous_last_rt = std::exchange(last_rt, *m->partition().row_tombstones().rbegin())) { + testlog.info("\tprevious last rt's end bound: {}", previous_last_rt->end_bound()); + testlog.info("\tcurrent first rt's start bound: {}", current_first_rt.start_bound()); + BOOST_REQUIRE(previous_last_rt->end_bound().prefix() == current_first_rt.start_bound().prefix()); + BOOST_REQUIRE(previous_last_rt->end_bound().kind() == bound_kind::incl_end); + BOOST_REQUIRE(current_first_rt.start_bound().kind() == bound_kind::excl_start); + } + const auto& current_first_pos = sst->first_partition_first_position(); + if (auto previous_last_pos = std::exchange(last_pos, sst->last_partition_last_position())) { + testlog.info("\tprevious last pos: {}", previous_last_pos); + testlog.info("\tcurrent first pos: {}", current_first_pos); + BOOST_REQUIRE(pos_tri_cmp(*previous_last_pos, current_first_pos) == 0); } - }); + BOOST_REQUIRE(!(reader)().get()); + + reader.close().get(); + + // CHECK that all fragments generated by compaction are disjoint. + BOOST_REQUIRE(sst_run.insert(sst) == true); + } + +} + +SEASTAR_TEST_CASE(test_large_partition_splitting_on_compaction) { + return test_env::do_with_async([](test_env& env) { test_large_partition_splitting_on_compaction_fn(env); }); +} + +SEASTAR_TEST_CASE(test_large_partition_splitting_on_compaction_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { test_large_partition_splitting_on_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(test_large_partition_splitting_on_compaction_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { test_large_partition_splitting_on_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +void check_table_sstable_set_includes_maintenance_sstables_fn(test_env& env) { + simple_schema ss; + auto s = ss.schema(); + auto pks = ss.make_pkeys(1); + + auto mut1 = mutation(s, pks[0]); + mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp()); + auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}); + + auto cf = env.make_table_for_tests(s); + auto close_cf = deferred_stop(cf); + + cf->add_sstable_and_update_cache(sst, sstables::offstrategy::yes).get(); + + BOOST_REQUIRE(cf->get_sstable_set().all()->size() == 1); + BOOST_REQUIRE(cf->get_sstable_set().size() == 1); } SEASTAR_TEST_CASE(check_table_sstable_set_includes_maintenance_sstables) { - return test_env::do_with_async([] (test_env& env) { - simple_schema ss; - auto s = ss.schema(); - auto pks = ss.make_pkeys(1); + return test_env::do_with_async([](test_env& env) { check_table_sstable_set_includes_maintenance_sstables_fn(env); }); +} - auto mut1 = mutation(s, pks[0]); - mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp()); - auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}); +SEASTAR_TEST_CASE(check_table_sstable_set_includes_maintenance_sstables_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { check_table_sstable_set_includes_maintenance_sstables_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto cf = env.make_table_for_tests(s); - auto close_cf = deferred_stop(cf); - - cf->add_sstable_and_update_cache(sst, sstables::offstrategy::yes).get(); - - BOOST_REQUIRE(cf->get_sstable_set().all()->size() == 1); - BOOST_REQUIRE(cf->get_sstable_set().size() == 1); - }); +SEASTAR_FIXTURE_TEST_CASE(check_table_sstable_set_includes_maintenance_sstables_gcs, + gcs_fixture, + *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { check_table_sstable_set_includes_maintenance_sstables_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); } // Without commit aba475fe1d24d5c, scylla will fail miserably (either with abort or segfault; depends on the version). +void compaction_manager_stop_and_drain_race_fn(test_env& env) { + abort_source as; + + auto cfg = compaction::compaction_manager::config{ .available_memory = 1 }; + auto task_manager = tasks::task_manager({}, as); + auto stop_task_manager = deferred_stop(task_manager); + auto cm = compaction::compaction_manager(cfg, as, task_manager); + auto stop_cm = deferred_stop(cm); + cm.enable(); + + testlog.info("requesting abort"); + as.request_abort(); + + testlog.info("draining compaction manager"); + cm.drain().get(); + + testlog.info("stopping compaction manager"); + stop_cm.stop_now(); +} + SEASTAR_TEST_CASE(compaction_manager_stop_and_drain_race_test) { - return test_env::do_with_async([] (test_env& env) { - abort_source as; + return test_env::do_with_async([](test_env& env) { compaction_manager_stop_and_drain_race_fn(env); }); +} - auto cfg = compaction::compaction_manager::config{ .available_memory = 1 }; - auto task_manager = tasks::task_manager({}, as); - auto stop_task_manager = deferred_stop(task_manager); - auto cm = compaction::compaction_manager(cfg, as, task_manager); - auto stop_cm = deferred_stop(cm); - cm.enable(); +SEASTAR_TEST_CASE(compaction_manager_stop_and_drain_race_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { compaction_manager_stop_and_drain_race_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - testlog.info("requesting abort"); - as.request_abort(); +SEASTAR_FIXTURE_TEST_CASE(compaction_manager_stop_and_drain_race_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { compaction_manager_stop_and_drain_race_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - testlog.info("draining compaction manager"); - cm.drain().get(); +void test_print_shared_sstables_vector_fn(test_env& env) { + simple_schema ss; + auto s = ss.schema(); + auto pks = ss.make_pkeys(2); + auto sst_gen = env.make_sst_factory(s); - testlog.info("stopping compaction manager"); - stop_cm.stop_now(); - }); + std::vector ssts(2); + + auto mut0 = mutation(s, pks[0]); + mut0.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp()); + ssts[0] = make_sstable_containing(sst_gen, {std::move(mut0)}); + + auto mut1 = mutation(s, pks[1]); + mut1.partition().apply_insert(*s, ss.make_ckey(1), ss.new_timestamp()); + ssts[1] = make_sstable_containing(sst_gen, {std::move(mut1)}); + + std::string msg = seastar::format("{}", ssts); + for (const auto& sst : ssts) { + auto gen_str = format("{}", sst->generation()); + BOOST_REQUIRE(msg.find(gen_str) != std::string::npos); + } } SEASTAR_TEST_CASE(test_print_shared_sstables_vector) { - return test_env::do_with_async([] (test_env& env) { - simple_schema ss; - auto s = ss.schema(); - auto pks = ss.make_pkeys(2); - auto sst_gen = env.make_sst_factory(s); + return test_env::do_with_async([](test_env& env) { test_print_shared_sstables_vector_fn(env); }); +} - std::vector ssts(2); +SEASTAR_TEST_CASE(test_print_shared_sstables_vector_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { test_print_shared_sstables_vector_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto mut0 = mutation(s, pks[0]); - mut0.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp()); - ssts[0] = make_sstable_containing(sst_gen, {std::move(mut0)}); +SEASTAR_FIXTURE_TEST_CASE(test_print_shared_sstables_vector_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { test_print_shared_sstables_vector_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - auto mut1 = mutation(s, pks[1]); - mut1.partition().apply_insert(*s, ss.make_ckey(1), ss.new_timestamp()); - ssts[1] = make_sstable_containing(sst_gen, {std::move(mut1)}); +void tombstone_gc_disabled_fn(test_env& env) { + auto builder = schema_builder("tests", "tombstone_purge") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + builder.set_gc_grace_seconds(0); + auto s = builder.build(); - std::string msg = seastar::format("{}", ssts); - for (const auto& sst : ssts) { - auto gen_str = format("{}", sst->generation()); - BOOST_REQUIRE(msg.find(gen_str) != std::string::npos); + auto sst_gen = env.make_sst_factory(s); + + auto compact = [&, s] (bool tombstone_gc_enabled, bool update_tomb_gc_during_compaction, std::vector all) -> std::vector { + auto t = env.make_table_for_tests(s); + auto my_sst_gen = sst_gen; + if (update_tomb_gc_during_compaction) { + // update tombstone_gc setting after compaction was initialized, + // when it creates the first output SSTable, to stress the + // ability of tombstone gc update taking immediate effect + // even on ongoing compactions. + my_sst_gen = [&] () -> sstables::shared_sstable { + t.set_tombstone_gc_enabled(tombstone_gc_enabled); + return sst_gen(); + }; + } else { + t.set_tombstone_gc_enabled(tombstone_gc_enabled); } - }); + auto stop = deferred_stop(t); + for (auto& sst : all) { + column_family_test(t).add_sstable(sst).get(); + } + return compact_sstables(env, compaction::compaction_descriptor(all), t, my_sst_gen).get().new_sstables; + }; + + auto next_timestamp = [] { + static thread_local api::timestamp_type next = 1; + return next++; + }; + + auto make_insert = [&] (partition_key key) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), next_timestamp()); + return m; + }; + + auto make_delete = [&] (partition_key key) { + mutation m(s, key); + tombstone tomb(next_timestamp(), gc_clock::now()); + m.partition().apply(tomb); + return m; + }; + + auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")}); + auto beta = partition_key::from_exploded(*s, {to_bytes("beta")}); + + auto perform_tombstone_gc_test = [&] (bool tombstone_gc_enabled) { + auto mut1 = make_insert(alpha); + auto mut2 = make_delete(alpha); + auto mut3 = make_insert(beta); + + auto sst1 = make_sstable_containing(sst_gen, {mut1}); + auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); + + forward_jump_clocks(std::chrono::seconds(1)); + + auto do_perform_tombstone_gc_test = [&] (bool update_tomb_gc_during_compaction) { + auto result = compact(tombstone_gc_enabled, update_tomb_gc_during_compaction, {sst1, sst2}); + BOOST_REQUIRE_EQUAL(1, result.size()); + + std::set sorted_mut; + sorted_mut.insert(mut2); + sorted_mut.insert(mut3); + + auto r = assert_that(sstable_reader(result[0], s, env.make_reader_permit())); + for (auto&& mut: sorted_mut) { + bool is_tombstone = bool(mut.partition().partition_tombstone()); + // if tombstone compaction is enabled, expired tombstone is purged + if (is_tombstone && tombstone_gc_enabled) { + continue; + } + r.produces(mut); + } + r.produces_end_of_stream(); + }; + + do_perform_tombstone_gc_test(false); + do_perform_tombstone_gc_test(true); + }; + + perform_tombstone_gc_test(false); + perform_tombstone_gc_test(true); } SEASTAR_TEST_CASE(tombstone_gc_disabled_test) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "tombstone_purge") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - builder.set_gc_grace_seconds(0); - auto s = builder.build(); + return test_env::do_with_async([](test_env& env) { tombstone_gc_disabled_fn(env); }); +} - auto sst_gen = env.make_sst_factory(s); +SEASTAR_TEST_CASE(tombstone_gc_disabled_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { tombstone_gc_disabled_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto compact = [&, s] (bool tombstone_gc_enabled, bool update_tomb_gc_during_compaction, std::vector all) -> std::vector { - auto t = env.make_table_for_tests(s); - auto my_sst_gen = sst_gen; - if (update_tomb_gc_during_compaction) { - // update tombstone_gc setting after compaction was initialized, - // when it creates the first output SSTable, to stress the - // ability of tombstone gc update taking immediate effect - // even on ongoing compactions. - my_sst_gen = [&] () -> sstables::shared_sstable { - t.set_tombstone_gc_enabled(tombstone_gc_enabled); - return sst_gen(); - }; - } else { - t.set_tombstone_gc_enabled(tombstone_gc_enabled); - } - auto stop = deferred_stop(t); - for (auto& sst : all) { - column_family_test(t).add_sstable(sst).get(); - } - return compact_sstables(env, compaction::compaction_descriptor(all), t, my_sst_gen).get().new_sstables; - }; - - auto next_timestamp = [] { - static thread_local api::timestamp_type next = 1; - return next++; - }; - - auto make_insert = [&] (partition_key key) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), next_timestamp()); - return m; - }; - - auto make_delete = [&] (partition_key key) { - mutation m(s, key); - tombstone tomb(next_timestamp(), gc_clock::now()); - m.partition().apply(tomb); - return m; - }; - - auto alpha = partition_key::from_exploded(*s, {to_bytes("alpha")}); - auto beta = partition_key::from_exploded(*s, {to_bytes("beta")}); - - auto perform_tombstone_gc_test = [&] (bool tombstone_gc_enabled) { - auto mut1 = make_insert(alpha); - auto mut2 = make_delete(alpha); - auto mut3 = make_insert(beta); - - auto sst1 = make_sstable_containing(sst_gen, {mut1}); - auto sst2 = make_sstable_containing(sst_gen, {mut2, mut3}); - - forward_jump_clocks(std::chrono::seconds(1)); - - auto do_perform_tombstone_gc_test = [&] (bool update_tomb_gc_during_compaction) { - auto result = compact(tombstone_gc_enabled, update_tomb_gc_during_compaction, {sst1, sst2}); - BOOST_REQUIRE_EQUAL(1, result.size()); - - std::set sorted_mut; - sorted_mut.insert(mut2); - sorted_mut.insert(mut3); - - auto r = assert_that(sstable_reader(result[0], s, env.make_reader_permit())); - for (auto&& mut: sorted_mut) { - bool is_tombstone = bool(mut.partition().partition_tombstone()); - // if tombstone compaction is enabled, expired tombstone is purged - if (is_tombstone && tombstone_gc_enabled) { - continue; - } - r.produces(mut); - } - r.produces_end_of_stream(); - }; - - do_perform_tombstone_gc_test(false); - do_perform_tombstone_gc_test(true); - }; - - perform_tombstone_gc_test(false); - perform_tombstone_gc_test(true); - }); +SEASTAR_FIXTURE_TEST_CASE(tombstone_gc_disabled_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { tombstone_gc_disabled_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); } // Check that tombstone newer than grace period won't trigger bloom filter check // against uncompacting sstable, during compaction. -SEASTAR_TEST_CASE(compaction_optimization_to_avoid_bloom_filter_checks) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "tombstone_purge") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - builder.set_gc_grace_seconds(10000); - auto s = builder.build(); - auto sst_gen = env.make_sst_factory(s); - auto compact = [&, s] (std::vector all, std::vector c) -> compaction::compaction_result { - auto t = env.make_table_for_tests(s); - t->disable_auto_compaction().get(); - auto stop = deferred_stop(t); - for (auto& sst : all) { - column_family_test(t).add_sstable(sst).get(); - } - auto desc = compaction::compaction_descriptor(std::move(c)); - desc.enable_garbage_collection(t->get_sstable_set()); - return compact_sstables(env, std::move(desc), t, sst_gen).get(); - }; +void compaction_optimization_to_avoid_bloom_filter_checks_fn(test_env& env) { + auto builder = schema_builder("tests", "tombstone_purge") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + builder.set_gc_grace_seconds(10000); + auto s = builder.build(); + auto sst_gen = env.make_sst_factory(s); - auto make_insert = [&] (partition_key key) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::new_timestamp()); - return m; - }; - auto make_delete = [&] (partition_key key) { - mutation m(s, key); - tombstone tomb(api::new_timestamp(), gc_clock::now()); - m.partition().apply(tomb); - return m; - }; + auto compact = [&, s] (std::vector all, std::vector c) -> compaction::compaction_result { + auto t = env.make_table_for_tests(s); + t->disable_auto_compaction().get(); + auto stop = deferred_stop(t); + for (auto& sst : all) { + column_family_test(t).add_sstable(sst).get(); + } + auto desc = compaction::compaction_descriptor(std::move(c)); + desc.enable_garbage_collection(t->get_sstable_set()); + return compact_sstables(env, std::move(desc), t, sst_gen).get(); + }; - auto uncompacting = make_sstable_containing(sst_gen, { make_insert(partition_key::from_exploded(*s, {to_bytes("pk1")}) )}); - auto compacting = make_sstable_containing(sst_gen, { make_delete(partition_key::from_exploded(*s, {to_bytes("pk1")}) )}); + auto make_insert = [&] (partition_key key) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::new_timestamp()); + return m; + }; + auto make_delete = [&] (partition_key key) { + mutation m(s, key); + tombstone tomb(api::new_timestamp(), gc_clock::now()); + m.partition().apply(tomb); + return m; + }; - auto result = compact({uncompacting, compacting}, {compacting}); - BOOST_REQUIRE_EQUAL(1, result.new_sstables.size()); - BOOST_REQUIRE_EQUAL(0, result.stats.bloom_filter_checks); + auto uncompacting = make_sstable_containing(sst_gen, { make_insert(partition_key::from_exploded(*s, {to_bytes("pk1")}) )}); + auto compacting = make_sstable_containing(sst_gen, { make_delete(partition_key::from_exploded(*s, {to_bytes("pk1")}) )}); - forward_jump_clocks(std::chrono::seconds(s->gc_grace_seconds()) + 1s); + auto result = compact({uncompacting, compacting}, {compacting}); + BOOST_REQUIRE_EQUAL(1, result.new_sstables.size()); + BOOST_REQUIRE_EQUAL(0, result.stats.bloom_filter_checks); - result = compact({uncompacting, compacting}, {compacting}); - BOOST_REQUIRE_EQUAL(1, result.new_sstables.size()); - BOOST_REQUIRE_EQUAL(1, result.stats.bloom_filter_checks); - }); + forward_jump_clocks(std::chrono::seconds(s->gc_grace_seconds()) + 1s); + + result = compact({uncompacting, compacting}, {compacting}); + BOOST_REQUIRE_EQUAL(1, result.new_sstables.size()); + BOOST_REQUIRE_EQUAL(1, result.stats.bloom_filter_checks); } -static future<> run_incremental_compaction_test(sstables::offstrategy offstrategy, std::function(table_for_tests&, compaction::owned_ranges_ptr)> run_compaction) { +SEASTAR_TEST_CASE(compaction_optimization_to_avoid_bloom_filter_checks) { + return test_env::do_with_async([](test_env& env) { compaction_optimization_to_avoid_bloom_filter_checks_fn(env); }); +} + +SEASTAR_TEST_CASE(compaction_optimization_to_avoid_bloom_filter_checks_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { compaction_optimization_to_avoid_bloom_filter_checks_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(compaction_optimization_to_avoid_bloom_filter_checks_gcs, + gcs_fixture, + *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { compaction_optimization_to_avoid_bloom_filter_checks_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + +static future<> run_incremental_compaction_test(sstables::offstrategy offstrategy, std::function(table_for_tests&, compaction::owned_ranges_ptr)> run_compaction, test_env_config cfg = {}) { return test_env::do_with_async([run_compaction = std::move(run_compaction), offstrategy] (test_env& env) { auto builder = schema_builder("tests", "test") .with_column("id", utf8_type, column_kind::partition_key) @@ -5985,15 +6884,33 @@ static future<> run_incremental_compaction_test(sstables::offstrategy offstrateg BOOST_REQUIRE(sstables_closed == sstables_nr); BOOST_REQUIRE(sstables_closed_during_cleanup >= sstables_nr / 2); - }); + }, std::move(cfg)); } SEASTAR_TEST_CASE(cleanup_incremental_compaction_test) { - return run_incremental_compaction_test(sstables::offstrategy::no, [] (table_for_tests& t, compaction::owned_ranges_ptr owned_ranges) -> future<> { + return run_incremental_compaction_test(sstables::offstrategy::no, [](table_for_tests& t, compaction::owned_ranges_ptr owned_ranges) -> future<> { return t->perform_cleanup_compaction(std::move(owned_ranges), tasks::task_info{}); }); } +SEASTAR_TEST_CASE(cleanup_incremental_compaction_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return run_incremental_compaction_test( + sstables::offstrategy::no, + [](table_for_tests& t, compaction::owned_ranges_ptr owned_ranges) -> future<> { + return t->perform_cleanup_compaction(std::move(owned_ranges), tasks::task_info{}); + }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} + +SEASTAR_FIXTURE_TEST_CASE(cleanup_incremental_compaction_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return run_incremental_compaction_test( + sstables::offstrategy::no, + [](table_for_tests& t, compaction::owned_ranges_ptr owned_ranges) -> future<> { + return t->perform_cleanup_compaction(std::move(owned_ranges), tasks::task_info{}); + }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} + SEASTAR_TEST_CASE(offstrategy_incremental_compaction_test) { return run_incremental_compaction_test(sstables::offstrategy::yes, [] (table_for_tests& t, compaction::owned_ranges_ptr owned_ranges) -> future<> { bool performed = co_await t->perform_offstrategy_compaction(tasks::task_info{}); @@ -6001,99 +6918,143 @@ SEASTAR_TEST_CASE(offstrategy_incremental_compaction_test) { }); } -SEASTAR_TEST_CASE(cleanup_during_offstrategy_incremental_compaction_test) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - builder.set_gc_grace_seconds(10000); - builder.set_compaction_strategy(compaction::compaction_strategy_type::leveled); - std::map opts = { - { "sstable_size_in_mb", "0" }, // makes sure that every mutation produces one fragment, to trigger incremental compaction - }; - builder.set_compaction_strategy_options(std::move(opts)); - auto s = builder.build(); - auto sst_gen = env.make_sst_factory(s); +SEASTAR_TEST_CASE(offstrategy_incremental_compaction_s3_test, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return run_incremental_compaction_test( + sstables::offstrategy::yes, + [](table_for_tests& t, compaction::owned_ranges_ptr owned_ranges) -> future<> { + bool performed = co_await t->perform_offstrategy_compaction(tasks::task_info{}); + BOOST_REQUIRE(performed); + }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto make_insert = [&] (partition_key key) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::new_timestamp()); - return m; - }; +SEASTAR_FIXTURE_TEST_CASE(offstrategy_incremental_compaction_gcs_test, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return run_incremental_compaction_test( + sstables::offstrategy::yes, + [](table_for_tests& t, compaction::owned_ranges_ptr owned_ranges) -> future<> { + bool performed = co_await t->perform_offstrategy_compaction(tasks::task_info{}); + BOOST_REQUIRE(performed); + }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - std::vector> observers; - std::vector ssts; - size_t sstables_closed = 0; - size_t sstables_missing_on_delete = 0; - static constexpr size_t sstables_nr = 10; +void cleanup_during_offstrategy_incremental_compaction_fn(test_env& env) { + auto builder = schema_builder("tests", "test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + builder.set_gc_grace_seconds(10000); + builder.set_compaction_strategy(compaction::compaction_strategy_type::leveled); + std::map opts = { + { "sstable_size_in_mb", "0" }, // makes sure that every mutation produces one fragment, to trigger incremental compaction + }; + builder.set_compaction_strategy_options(std::move(opts)); + auto s = builder.build(); + auto sst_gen = env.make_sst_factory(s); - dht::token_range_vector owned_token_ranges; + auto make_insert = [&] (partition_key key) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::new_timestamp()); + return m; + }; - std::set merged; - for (unsigned i = 0; i < sstables_nr * 2; i++) { - merged.insert(make_insert(partition_key::from_exploded(*s, {to_bytes(to_sstring(i))}))); - } + std::vector> observers; + std::vector ssts; + size_t sstables_closed = 0; + size_t sstables_missing_on_delete = 0; + static constexpr size_t sstables_nr = 10; - std::unordered_set gens; // input sstable generations - auto merged_it = merged.begin(); - for (unsigned i = 0; i < sstables_nr; i++) { - auto mut1 = std::move(*merged_it); - merged_it++; - auto mut2 = std::move(*merged_it); - merged_it++; - auto sst = make_sstable_containing(sst_gen, { - std::move(mut1), - std::move(mut2) - }); - // Force a new run_id to trigger offstrategy compaction - sstables::test(sst).set_run_identifier(run_id::create_random_id()); - // Set level to 0 to trigger offstrategy compaction - sst->set_sstable_level(0); + dht::token_range_vector owned_token_ranges; - // every sstable will be eligible for cleanup, by having both an owned and unowned token. - owned_token_ranges.push_back(dht::token_range::make_singular(sst->get_last_decorated_key().token())); + std::set merged; + for (unsigned i = 0; i < sstables_nr * 2; i++) { + merged.insert(make_insert(partition_key::from_exploded(*s, {to_bytes(to_sstring(i))}))); + } - gens.insert(sst->generation()); - ssts.push_back(std::move(sst)); - } + std::unordered_set gens; // input sstable generations + auto merged_it = merged.begin(); + for (unsigned i = 0; i < sstables_nr; i++) { + auto mut1 = std::move(*merged_it); + merged_it++; + auto mut2 = std::move(*merged_it); + merged_it++; + auto sst = make_sstable_containing(sst_gen, { + std::move(mut1), + std::move(mut2) + }); + // Force a new run_id to trigger offstrategy compaction + sstables::test(sst).set_run_identifier(run_id::create_random_id()); + // Set level to 0 to trigger offstrategy compaction + sst->set_sstable_level(0); - { - auto t = env.make_table_for_tests(s); - auto& cm = t->get_compaction_manager(); - auto stop = deferred_stop(t); - t->disable_auto_compaction().get(); - const dht::token_range_vector empty_owned_ranges; - for (auto&& sst : ssts) { - testlog.info("run id {}", sst->run_identifier()); - column_family_test(t).add_sstable(sst, sstables::offstrategy::yes).get(); - observers.push_back(sst->add_on_closed_handler([&] (sstable& sst) mutable { - auto sstables = t->get_sstables(); - testlog.info("Closing sstable of generation {}, table set size: {}", sst.generation(), sstables->size()); - sstables_closed++; - })); + // every sstable will be eligible for cleanup, by having both an owned and unowned token. + owned_token_ranges.push_back(dht::token_range::make_singular(sst->get_last_decorated_key().token())); + + gens.insert(sst->generation()); + ssts.push_back(std::move(sst)); + } + + { + auto t = env.make_table_for_tests(s); + auto& cm = t->get_compaction_manager(); + auto stop = deferred_stop(t); + t->disable_auto_compaction().get(); + const dht::token_range_vector empty_owned_ranges; + for (auto&& sst : ssts) { + testlog.info("run id {}", sst->run_identifier()); + column_family_test(t).add_sstable(sst, sstables::offstrategy::yes).get(); + observers.push_back(sst->add_on_closed_handler([&] (sstable& sst) mutable { + auto sstables = t->get_sstables(); + testlog.info("Closing sstable of generation {}, table set size: {}", sst.generation(), sstables->size()); + sstables_closed++; + })); observers.push_back(sst->add_on_delete_handler([&] (sstable& sst) mutable { - // ATTN -- the _on_delete callback is not necessarily running in thread + // ATTN -- the _on_delete callback is not necessarily running in thread auto missing = (::access(fmt::to_string(sst.get_filename()).c_str(), F_OK) != 0); testlog.info("Deleting sstable of generation {}: missing={}", sst.generation(), missing); sstables_missing_on_delete += missing; - })); - } - ssts = {}; // releases references - auto owned_ranges_ptr = make_lw_shared(std::move(owned_token_ranges)); - t->perform_cleanup_compaction(std::move(owned_ranges_ptr), tasks::task_info{}).get(); - BOOST_REQUIRE(cm.sstables_requiring_cleanup(t->try_get_compaction_group_view_with_static_sharding()).empty()); - testlog.info("Cleanup has finished"); + })); } + ssts = {}; // releases references + auto owned_ranges_ptr = make_lw_shared(std::move(owned_token_ranges)); + t->perform_cleanup_compaction(std::move(owned_ranges_ptr), tasks::task_info{}).get(); + BOOST_REQUIRE(cm.sstables_requiring_cleanup(t->try_get_compaction_group_view_with_static_sharding()).empty()); + testlog.info("Cleanup has finished"); + } - while (sstables_closed != sstables_nr) { - yield().get(); - } + while (sstables_closed != sstables_nr) { + yield().get(); + } - testlog.info("Closed sstables {}, missing on delete {}", sstables_closed, sstables_missing_on_delete); + testlog.info("Closed sstables {}, missing on delete {}", sstables_closed, sstables_missing_on_delete); - BOOST_REQUIRE_EQUAL(sstables_closed, sstables_nr); - BOOST_REQUIRE_EQUAL(sstables_missing_on_delete, 0); - }); + BOOST_REQUIRE_EQUAL(sstables_closed, sstables_nr); + BOOST_REQUIRE_EQUAL(sstables_missing_on_delete, 0); +} + +SEASTAR_TEST_CASE(cleanup_during_offstrategy_incremental_compaction_test) { + return test_env::do_with_async([](test_env& env) { cleanup_during_offstrategy_incremental_compaction_fn(env); }); +} + +SEASTAR_TEST_CASE(cleanup_during_offstrategy_incremental_compaction_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + // TODO: Figure out how to make the `add_on_delete_handler` synchronous + testlog.info("cleanup_during_offstrategy_incremental_compaction_test_s3 is not supported for S3 storage yet, skipping test"); + return make_ready_future(); +#if 0 + return test_env::do_with_async([](test_env& env) { cleanup_during_offstrategy_incremental_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +#endif +} + +SEASTAR_FIXTURE_TEST_CASE(cleanup_during_offstrategy_incremental_compaction_test_gcs, + gcs_fixture, + *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + // TODO: Figure out how to make the `add_on_delete_handler` synchronous + testlog.info("cleanup_during_offstrategy_incremental_compaction_test_gcs is not supported for S3 storage yet, skipping test"); + return make_ready_future(); +#if 0 + return test_env::do_with_async([](test_env& env) { cleanup_during_offstrategy_incremental_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +#endif } future<> test_sstables_excluding_staging_correctness(test_env_config cfg) { @@ -6157,238 +7118,327 @@ SEASTAR_FIXTURE_TEST_CASE(test_sstables_excluding_staging_correctness_gs, gcs_fi } // Reproducer for https://github.com/scylladb/scylladb/issues/15726. + +void produces_optimal_filter_by_estimating_correctly_partitions_per_sstable_fn(test_env& env) { + auto builder = schema_builder("tests", "test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type); + builder.set_compressor_params(compression_parameters::no_compression()); + auto s = builder.build(); + auto sst_gen = env.make_sst_factory(s); + + auto compact = [&, s] (std::vector c, uint64_t max_size) -> compaction::compaction_result { + auto t = env.make_table_for_tests(s); + auto stop = deferred_stop(t); + t->disable_auto_compaction().get(); + auto desc = compaction::compaction_descriptor(std::move(c)); + desc.max_sstable_bytes = max_size; + return compact_sstables(env, std::move(desc), t, sst_gen).get(); + }; + + auto make_insert = [&] (partition_key key) { + mutation m(s, key); + m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::new_timestamp()); + return m; + }; + + const sstring shared_key_prefix = "832193982198319823hsdjahdashjdsa81923189381931829sdajidjkas812938219jdsalljdadsajk319820"; + + utils::chunked_vector muts; + constexpr int keys = 200; + muts.reserve(keys); + for (auto i = 0; i < keys; i++) { + muts.push_back(make_insert(partition_key::from_exploded(*s, {to_bytes(shared_key_prefix + to_sstring(i))}))); + } + auto sst = make_sstable_containing(sst_gen, std::move(muts)); + + testlog.info("index size: {}, data_size: {}", sst->index_size(), sst->ondisk_data_size()); + + uint64_t max_sstable_size = std::ceil(double(sst->ondisk_data_size()) / 10); + auto ret = compact({sst}, max_sstable_size); + + uint64_t partitions_per_sstable = keys / ret.new_sstables.size(); + auto filter = utils::i_filter::get_filter(partitions_per_sstable, s->bloom_filter_fp_chance(), utils::filter_format::m_format); + + auto comp = ret.new_sstables.front()->get_open_info().get(); + + // Filter for SSTable generated cannot be lower than the one expected + testlog.info("filter size: actual={}, expected>={}", comp.components->filter->memory_size(), filter->memory_size()); + BOOST_REQUIRE(comp.components->filter->memory_size() >= filter->memory_size()); +} + SEASTAR_TEST_CASE(produces_optimal_filter_by_estimating_correctly_partitions_per_sstable) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type); - builder.set_compressor_params(compression_parameters::no_compression()); - auto s = builder.build(); - auto sst_gen = env.make_sst_factory(s); + return test_env::do_with_async([](test_env& env) { produces_optimal_filter_by_estimating_correctly_partitions_per_sstable_fn(env); }); +} - auto compact = [&, s] (std::vector c, uint64_t max_size) -> compaction::compaction_result { - auto t = env.make_table_for_tests(s); - auto stop = deferred_stop(t); - t->disable_auto_compaction().get(); - auto desc = compaction::compaction_descriptor(std::move(c)); - desc.max_sstable_bytes = max_size; - return compact_sstables(env, std::move(desc), t, sst_gen).get(); - }; +SEASTAR_TEST_CASE(produces_optimal_filter_by_estimating_correctly_partitions_per_sstable_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { produces_optimal_filter_by_estimating_correctly_partitions_per_sstable_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto make_insert = [&] (partition_key key) { - mutation m(s, key); - m.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), api::new_timestamp()); - return m; - }; +SEASTAR_FIXTURE_TEST_CASE(produces_optimal_filter_by_estimating_correctly_partitions_per_sstable_gcs, + gcs_fixture, + *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { produces_optimal_filter_by_estimating_correctly_partitions_per_sstable_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +} - const sstring shared_key_prefix = "832193982198319823hsdjahdashjdsa81923189381931829sdajidjkas812938219jdsalljdadsajk319820"; +void splitting_compaction_fn(test_env& env) { + auto builder = schema_builder("tests", "twcs_splitting") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("cl", int32_type, column_kind::clustering_key) + .with_column("value", int32_type); + builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); + auto s = builder.build(); - utils::chunked_vector muts; - constexpr int keys = 200; - muts.reserve(keys); - for (auto i = 0; i < keys; i++) { - muts.push_back(make_insert(partition_key::from_exploded(*s, {to_bytes(shared_key_prefix + to_sstring(i))}))); + auto sst_gen = env.make_sst_factory(s); + + auto next_timestamp = [] (auto step) { + using namespace std::chrono; + return (api::timestamp_clock::now().time_since_epoch() - duration_cast(step)).count(); + }; + + auto make_insert = [&] (const dht::decorated_key& key, api::timestamp_clock::duration step) { + static thread_local int32_t value = 1; + + mutation m(s, key); + auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); + m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step)); + return m; + }; + + auto keys = tests::generate_partition_keys(100, s); + utils::chunked_vector muts; + + muts.reserve(keys.size() * 2); + for (auto& k : keys) { + muts.push_back(make_insert(k, 0ms)); + muts.push_back(make_insert(k, 720h)); + } + + auto t = env.make_table_for_tests(s); + auto close_table = deferred_stop(t); + t->start(); + + auto input = make_sstable_containing(sst_gen, std::move(muts)); + + std::unordered_set groups; + auto classify_fn = [&groups] (dht::token t) -> mutation_writer::token_group_id { + auto r = dht::compaction_group_of(1, t); + if (groups.insert(r).second) { + testlog.info("Group {} detected!", r); } - auto sst = make_sstable_containing(sst_gen, std::move(muts)); + return r; + }; - testlog.info("index size: {}, data_size: {}", sst->index_size(), sst->ondisk_data_size()); + auto desc = compaction::compaction_descriptor({input}); + desc.options = compaction::compaction_type_options::make_split(classify_fn); - uint64_t max_sstable_size = std::ceil(double(sst->ondisk_data_size()) / 10); - auto ret = compact({sst}, max_sstable_size); + auto ret = compact_sstables(env, std::move(desc), t, sst_gen, replacer_fn_no_op()).get(); - uint64_t partitions_per_sstable = keys / ret.new_sstables.size(); - auto filter = utils::i_filter::get_filter(partitions_per_sstable, s->bloom_filter_fp_chance(), utils::filter_format::m_format); + auto twcs_options = compaction::time_window_compaction_strategy_options(s->compaction_strategy_options()); + auto window_for = [&twcs_options] (api::timestamp_type timestamp) { + return compaction::time_window_compaction_strategy::get_window_for(twcs_options, timestamp); + }; - auto comp = ret.new_sstables.front()->get_open_info().get(); + // Assert that data was segregated both by token and timestamp. + for (auto& sst : ret.new_sstables) { + testlog.info("{}: token_groups: [{}, {}], windows: [{}, {}]", + sst->get_filename(), + classify_fn(sst->get_first_decorated_key().token()), + classify_fn(sst->get_last_decorated_key().token()), + window_for(sst->get_stats_metadata().min_timestamp), + window_for(sst->get_stats_metadata().max_timestamp)); + BOOST_REQUIRE_EQUAL(classify_fn(sst->get_first_decorated_key().token()), classify_fn(sst->get_last_decorated_key().token())); + BOOST_REQUIRE_EQUAL(window_for(sst->get_stats_metadata().min_timestamp), window_for(sst->get_stats_metadata().max_timestamp)); + } + const size_t expected_output_size = 4; // 2 token groups * 2 windows. + BOOST_REQUIRE(ret.new_sstables.size() == expected_output_size); - // Filter for SSTable generated cannot be lower than the one expected - testlog.info("filter size: actual={}, expected>={}", comp.components->filter->memory_size(), filter->memory_size()); - BOOST_REQUIRE(comp.components->filter->memory_size() >= filter->memory_size()); - }); + auto& cm = t->get_compaction_manager(); + auto split_opt = compaction::compaction_type_options::split{classify_fn}; + auto new_ssts = cm.maybe_split_new_sstable(input, t.as_compaction_group_view(), split_opt).get(); + BOOST_REQUIRE(new_ssts.size() == expected_output_size); + for (auto& sst : new_ssts) { + // split sstables don't require further split. + auto ssts = cm.maybe_split_new_sstable(sst, t.as_compaction_group_view(), split_opt).get(); + BOOST_REQUIRE(ssts.size() == 1); + BOOST_REQUIRE(ssts.front() == sst); + } + // test exception propagation + auto throwing_classifier = [&] (dht::token t) -> mutation_writer::token_group_id { + // skip first and last token, to not trigger exception when checking if sstable needs split. + if (t != input->get_first_decorated_key().token() && t != input->get_last_decorated_key().token()) { + throw std::runtime_error("exception"); + } + return classify_fn(t); + }; + BOOST_REQUIRE_THROW(cm.maybe_split_new_sstable(input, t.as_compaction_group_view(), compaction::compaction_type_options::split{throwing_classifier}).get(), + std::runtime_error); } SEASTAR_TEST_CASE(splitting_compaction_test) { - return test_env::do_with_async([] (test_env& env) { - auto builder = schema_builder("tests", "twcs_splitting") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("cl", int32_type, column_kind::clustering_key) - .with_column("value", int32_type); - builder.set_compaction_strategy(compaction::compaction_strategy_type::time_window); - auto s = builder.build(); + return test_env::do_with_async([](test_env& env) { splitting_compaction_fn(env); }); +} - auto sst_gen = env.make_sst_factory(s); +SEASTAR_TEST_CASE(splitting_compaction_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + // TODO: Needs deeper investigation to figure out why the test fails, looks like some scheduling problem + testlog.info("splitting_compaction_test_s3 is not supported for S3 storage yet, skipping test"); + return make_ready_future(); +#if 0 + return test_env::do_with_async([](test_env& env) { splitting_compaction_fn(env); }, test_env_config{.storage = make_test_object_storage_options("S3")}); +#endif +} - auto next_timestamp = [] (auto step) { - using namespace std::chrono; - return (api::timestamp_clock::now().time_since_epoch() - duration_cast(step)).count(); - }; +SEASTAR_FIXTURE_TEST_CASE(splitting_compaction_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + // TODO: Needs deeper investigation to figure out why the test fails, looks like some scheduling problem + testlog.info("splitting_compaction_test_gcs is not supported for GCP storage yet, skipping test"); + return make_ready_future(); +#if 0 + return test_env::do_with_async([](test_env& env) { splitting_compaction_fn(env); }, test_env_config{.storage = make_test_object_storage_options("GS")}); +#endif +} - auto make_insert = [&] (const dht::decorated_key& key, api::timestamp_clock::duration step) { - static thread_local int32_t value = 1; +void unsealed_sstable_compaction_fn(test_env& env) { + BOOST_REQUIRE(smp::count == 1); + auto s = schema_builder("tests", "unsealed_sstable_compaction_test") + .with_column("id", utf8_type, column_kind::partition_key) + .with_column("value", int32_type).build(); - mutation m(s, key); - auto c_key = clustering_key::from_exploded(*s, {int32_type->decompose(value++)}); - m.set_clustered_cell(c_key, bytes("value"), data_value(int32_t(value)), next_timestamp(step)); - return m; - }; + auto t = env.make_table_for_tests(s); + auto close_t = deferred_stop(t); + t->start(); - auto keys = tests::generate_partition_keys(100, s); - utils::chunked_vector muts; + mutation mut(s, partition_key::from_exploded(*s, {to_bytes("alpha")})); + mut.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 0); - muts.reserve(keys.size() * 2); - for (auto& k : keys) { - muts.push_back(make_insert(k, 0ms)); - muts.push_back(make_insert(k, 720h)); - } + sstable_writer_config sst_cfg = env.manager().configure_writer(); + sst_cfg.leave_unsealed = true; + auto unsealed_sstable = make_sstable_easy(env, make_mutation_reader_from_mutations(s, env.make_reader_permit(), std::move(mut)), sst_cfg); - auto t = env.make_table_for_tests(s); - auto close_table = deferred_stop(t); - t->start(); + BOOST_REQUIRE(unsealed_sstable->get_storage().exists(*unsealed_sstable, sstables::component_type::TemporaryTOC).get()); - auto input = make_sstable_containing(sst_gen, std::move(muts)); - - std::unordered_set groups; - auto classify_fn = [&groups] (dht::token t) -> mutation_writer::token_group_id { - auto r = dht::compaction_group_of(1, t); - if (groups.insert(r).second) { - testlog.info("Group {} detected!", r); - } - return r; - }; - - auto desc = compaction::compaction_descriptor({input}); - desc.options = compaction::compaction_type_options::make_split(classify_fn); - - auto ret = compact_sstables(env, std::move(desc), t, sst_gen, replacer_fn_no_op()).get(); - - auto twcs_options = compaction::time_window_compaction_strategy_options(s->compaction_strategy_options()); - auto window_for = [&twcs_options] (api::timestamp_type timestamp) { - return compaction::time_window_compaction_strategy::get_window_for(twcs_options, timestamp); - }; - - // Assert that data was segregated both by token and timestamp. - for (auto& sst : ret.new_sstables) { - testlog.info("{}: token_groups: [{}, {}], windows: [{}, {}]", - sst->get_filename(), - classify_fn(sst->get_first_decorated_key().token()), - classify_fn(sst->get_last_decorated_key().token()), - window_for(sst->get_stats_metadata().min_timestamp), - window_for(sst->get_stats_metadata().max_timestamp)); - BOOST_REQUIRE_EQUAL(classify_fn(sst->get_first_decorated_key().token()), classify_fn(sst->get_last_decorated_key().token())); - BOOST_REQUIRE_EQUAL(window_for(sst->get_stats_metadata().min_timestamp), window_for(sst->get_stats_metadata().max_timestamp)); - } - const size_t expected_output_size = 4; // 2 token groups * 2 windows. - BOOST_REQUIRE(ret.new_sstables.size() == expected_output_size); - - auto& cm = t->get_compaction_manager(); - auto split_opt = compaction::compaction_type_options::split{classify_fn}; - auto new_ssts = cm.maybe_split_new_sstable(input, t.as_compaction_group_view(), split_opt).get(); - BOOST_REQUIRE(new_ssts.size() == expected_output_size); - for (auto& sst : new_ssts) { - // split sstables don't require further split. - auto ssts = cm.maybe_split_new_sstable(sst, t.as_compaction_group_view(), split_opt).get(); - BOOST_REQUIRE(ssts.size() == 1); - BOOST_REQUIRE(ssts.front() == sst); - } - // test exception propagation - auto throwing_classifier = [&] (dht::token t) -> mutation_writer::token_group_id { - // skip first and last token, to not trigger exception when checking if sstable needs split. - if (t != input->get_first_decorated_key().token() && t != input->get_last_decorated_key().token()) { - throw std::runtime_error("exception"); - } - return classify_fn(t); - }; - BOOST_REQUIRE_THROW(cm.maybe_split_new_sstable(input, t.as_compaction_group_view(), compaction::compaction_type_options::split{throwing_classifier}).get(), - std::runtime_error); - }); + auto sst_gen = env.make_sst_factory(s); + auto info = compact_sstables(env, compaction::compaction_descriptor({ unsealed_sstable }), t, sst_gen).get(); + BOOST_REQUIRE(info.new_sstables.size() == 1); } SEASTAR_TEST_CASE(unsealed_sstable_compaction_test) { - BOOST_REQUIRE(smp::count == 1); - return test_env::do_with_async([] (test_env& env) { - auto s = schema_builder("tests", "unsealed_sstable_compaction_test") - .with_column("id", utf8_type, column_kind::partition_key) - .with_column("value", int32_type).build(); + return test_env::do_with_async([](test_env& env) { unsealed_sstable_compaction_fn(env); }); +} - auto t = env.make_table_for_tests(s); - auto close_t = deferred_stop(t); - t->start(); +SEASTAR_TEST_CASE(unsealed_sstable_compaction_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + // TODO: Needs deeper investigation to figure out why the TemporaryTOC is missing + testlog.info("unsealed_sstable_compaction_test_s3 is not supported for S3 storage yet, skipping test"); + return make_ready_future(); +#if 0 + return test_env::do_with_async([](test_env& env) { unsealed_sstable_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +#endif +} - mutation mut(s, partition_key::from_exploded(*s, {to_bytes("alpha")})); - mut.set_clustered_cell(clustering_key::make_empty(), bytes("value"), data_value(int32_t(1)), 0); +SEASTAR_FIXTURE_TEST_CASE(unsealed_sstable_compaction_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + // TODO: Needs deeper investigation to figure out why the TemporaryTOC is missing + testlog.info("unsealed_sstable_compaction_test_gcs is not supported for S3 storage yet, skipping test"); + return make_ready_future(); +#if 0 + return test_env::do_with_async([](test_env& env) { unsealed_sstable_compaction_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +#endif +} - sstable_writer_config sst_cfg = env.manager().configure_writer(); - sst_cfg.leave_unsealed = true; - auto unsealed_sstable = make_sstable_easy(env, make_mutation_reader_from_mutations(s, env.make_reader_permit(), std::move(mut)), sst_cfg); +void sstable_clone_leaving_unsealed_dest_sstable_fn(test_env& env) { + simple_schema ss; + auto s = ss.schema(); + auto pk = ss.make_pkey(); - BOOST_REQUIRE(file_exists(unsealed_sstable->get_filename(sstables::component_type::TemporaryTOC).format()).get()); + auto mut1 = mutation(s, pk); + mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp()); + auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}); - auto sst_gen = env.make_sst_factory(s); - auto info = compact_sstables(env, compaction::compaction_descriptor({ unsealed_sstable }), t, sst_gen).get(); - BOOST_REQUIRE(info.new_sstables.size() == 1); - }); + auto table = env.make_table_for_tests(s); + auto close_table = deferred_stop(table); + + sstables::sstable_generation_generator gen_generator; + + bool leave_unsealed = true; + auto d = sst->clone(gen_generator(), leave_unsealed).get(); + + auto sst2 = env.make_sstable(s, d.generation, d.version, d.format); + sst2->load(s->get_sharder(), sstable_open_config{ .unsealed_sstable = leave_unsealed }).get(); + BOOST_REQUIRE(!sst2->get_storage().exists(*sst2, sstables::component_type::TOC).get()); + BOOST_REQUIRE(sst2->get_storage().exists(*sst2, sstables::component_type::TemporaryTOC).get()); + + leave_unsealed = false; + d = sst->clone(gen_generator(), leave_unsealed).get(); + + auto sst3 = env.make_sstable(s, d.generation, d.version, d.format); + sst3->load(s->get_sharder(), sstable_open_config{ .unsealed_sstable = leave_unsealed }).get(); + BOOST_REQUIRE(sst3->get_storage().exists(*sst3, sstables::component_type::TOC).get()); + BOOST_REQUIRE(!sst3->get_storage().exists(*sst3, sstables::component_type::TemporaryTOC).get()); } SEASTAR_TEST_CASE(sstable_clone_leaving_unsealed_dest_sstable) { - return test_env::do_with_async([] (test_env& env) { - simple_schema ss; - auto s = ss.schema(); - auto pk = ss.make_pkey(); + return test_env::do_with_async([](test_env& env) { sstable_clone_leaving_unsealed_dest_sstable_fn(env); }); +} - auto mut1 = mutation(s, pk); - mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp()); - auto sst = make_sstable_containing(env.make_sstable(s), {std::move(mut1)}); +SEASTAR_TEST_CASE(sstable_clone_leaving_unsealed_dest_sstable_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + testlog.info("Clone is not supported for S3 storage yet, skipping test"); + return make_ready_future(); +#if 0 + return test_env::do_with_async([](test_env& env) { sstable_clone_leaving_unsealed_dest_sstable_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +#endif +} - auto table = env.make_table_for_tests(s); - auto close_table = deferred_stop(table); +SEASTAR_FIXTURE_TEST_CASE(sstable_clone_leaving_unsealed_dest_sstable_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + testlog.info("Clone is not supported for GCS storage yet, skipping test"); + return make_ready_future(); +#if 0 + return test_env::do_with_async([](test_env& env) { sstable_clone_leaving_unsealed_dest_sstable_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); +#endif +} - sstables::sstable_generation_generator gen_generator; +void failure_when_adding_new_sstable_fn(test_env& env) { + simple_schema ss; + auto s = ss.schema(); + auto pk = ss.make_pkey(); - bool leave_unsealed = true; - auto d = sst->clone(gen_generator(), leave_unsealed).get(); + auto mut1 = mutation(s, pk); + mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp()); + auto sst = make_sstable_containing(env.make_sstable(s), {mut1}); - auto sst2 = env.make_sstable(s, d.generation, d.version, d.format); - sst2->load(s->get_sharder(), sstable_open_config{ .unsealed_sstable = leave_unsealed }).get(); - BOOST_REQUIRE(!file_exists(sst2->get_filename(sstables::component_type::TOC).format()).get()); - BOOST_REQUIRE(file_exists(sst2->get_filename(sstables::component_type::TemporaryTOC).format()).get()); + auto table = env.make_table_for_tests(s); + auto close_table = deferred_stop(table); - leave_unsealed = false; - d = sst->clone(gen_generator(), leave_unsealed).get(); + auto on_add = [] (sstables::shared_sstable) { throw std::runtime_error("fail to seal"); return make_ready_future<>(); }; + BOOST_REQUIRE_THROW(table->add_new_sstable_and_update_cache(sst, on_add).get(), std::runtime_error); - auto sst3 = env.make_sstable(s, d.generation, d.version, d.format); - sst3->load(s->get_sharder(), sstable_open_config{ .unsealed_sstable = leave_unsealed }).get(); - BOOST_REQUIRE(file_exists(sst3->get_filename(sstables::component_type::TOC).format()).get()); - BOOST_REQUIRE(!file_exists(sst3->get_filename(sstables::component_type::TemporaryTOC).format()).get()); - }); + // Verify new sstable was unlinked on failure. + BOOST_REQUIRE(!sst->get_storage().exists(*sst, sstables::component_type::Data).get()); + + auto sst2 = make_sstable_containing(env.make_sstable(s), {mut1}); + auto sst3 = make_sstable_containing(env.make_sstable(s), {mut1}); + BOOST_REQUIRE_THROW(table->add_new_sstables_and_update_cache({sst2, sst3}, on_add).get(), std::runtime_error); + + // Verify both sstables are unlinked on failure. + BOOST_REQUIRE(!sst2->get_storage().exists(*sst2, sstables::component_type::Data).get()); + BOOST_REQUIRE(!sst3->get_storage().exists(*sst3, sstables::component_type::Data).get()); } SEASTAR_TEST_CASE(failure_when_adding_new_sstable_test) { - return test_env::do_with_async([] (test_env& env) { - simple_schema ss; - auto s = ss.schema(); - auto pk = ss.make_pkey(); + return test_env::do_with_async([](test_env& env) { failure_when_adding_new_sstable_fn(env); }); +} - auto mut1 = mutation(s, pk); - mut1.partition().apply_insert(*s, ss.make_ckey(0), ss.new_timestamp()); - auto sst = make_sstable_containing(env.make_sstable(s), {mut1}); +SEASTAR_TEST_CASE(failure_when_adding_new_sstable_test_s3, *boost::unit_test::precondition(tests::has_scylla_test_env)) { + return test_env::do_with_async([](test_env& env) { failure_when_adding_new_sstable_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("S3")}); +} - auto table = env.make_table_for_tests(s); - auto close_table = deferred_stop(table); - - auto on_add = [] (sstables::shared_sstable) { throw std::runtime_error("fail to seal"); return make_ready_future<>(); }; - BOOST_REQUIRE_THROW(table->add_new_sstable_and_update_cache(sst, on_add).get(), std::runtime_error); - - // Verify new sstable was unlinked on failure. - BOOST_REQUIRE(!file_exists(sst->get_filename(sstables::component_type::Data).format()).get()); - - auto sst2 = make_sstable_containing(env.make_sstable(s), {mut1}); - auto sst3 = make_sstable_containing(env.make_sstable(s), {mut1}); - BOOST_REQUIRE_THROW(table->add_new_sstables_and_update_cache({sst2, sst3}, on_add).get(), std::runtime_error); - - // Verify both sstables are unlinked on failure. - BOOST_REQUIRE(!file_exists(sst2->get_filename(sstables::component_type::Data).format()).get()); - BOOST_REQUIRE(!file_exists(sst3->get_filename(sstables::component_type::Data).format()).get()); - }); +SEASTAR_FIXTURE_TEST_CASE(failure_when_adding_new_sstable_test_gcs, gcs_fixture, *tests::check_run_test_decorator("ENABLE_GCP_STORAGE_TEST", true)) { + return test_env::do_with_async([](test_env& env) { failure_when_adding_new_sstable_fn(env); }, + test_env_config{.storage = make_test_object_storage_options("GS")}); } static future<> test_perform_component_rewrite_single_sstable(compaction::compaction_type_options::component_rewrite::update_sstable_id update_id) { diff --git a/test/boost/test_config.yaml b/test/boost/test_config.yaml index fedde68719..d87ed73f8c 100644 --- a/test/boost/test_config.yaml +++ b/test/boost/test_config.yaml @@ -32,7 +32,7 @@ custom_args: sstable_datafile_test: - '-c1 -m2G' sstable_compaction_test: - - '-c1 -m2G --logger-log-level compaction=debug --logger-log-level compaction_manager=debug' + - '-c1 -m2G --logger-log-level compaction=debug --logger-log-level compaction_manager=debug --logger-log-level s3=debug --logger-log-level gcp_storage=debug' sstable_3_x_test: - '-c1 -m2G' cql_query_test: diff --git a/test/lib/sstable_utils.hh b/test/lib/sstable_utils.hh index 30915b4e0e..2afceafad6 100644 --- a/test/lib/sstable_utils.hh +++ b/test/lib/sstable_utils.hh @@ -226,6 +226,14 @@ public: void set_digest(std::optional digest) { _sst->_components->digest = digest; } + + storage& get_storage() { + return *_sst->_storage; + } + + future open_file(component_type type, open_flags flags, file_open_options opts) { + return _sst->open_file(type, flags, opts); + } }; inline auto replacer_fn_no_op() { diff --git a/test/lib/test_services.cc b/test/lib/test_services.cc index 49f608295b..f8bdafd15b 100644 --- a/test/lib/test_services.cc +++ b/test/lib/test_services.cc @@ -301,6 +301,7 @@ future<> test_env::stop() { } } co_await _impl->mgr.close(); + _impl->mgr.unplug_sstables_registry(); co_await _impl->semaphore.stop(); } @@ -366,7 +367,6 @@ future<> test_env::do_with_async(noncopyable_function func, te test_env env(std::move(cfg), *scf, &sstm.local()); auto close_env = defer([&] { env.stop().get(); }); env.manager().plug_sstables_registry(std::make_unique()); - auto unplu = defer([&env] { env.manager().unplug_sstables_registry(); }); func(env); }); } diff --git a/utils/aws_sigv4.cc b/utils/aws_sigv4.cc index 5fa074b8c9..dc5654a911 100644 --- a/utils/aws_sigv4.cc +++ b/utils/aws_sigv4.cc @@ -10,7 +10,6 @@ #include "utils/aws_sigv4.hh" #include "utils/hashers.hh" #include "bytes.hh" -#include "db_clock.hh" using namespace std::chrono_literals; @@ -48,8 +47,8 @@ static std::string apply_sha256(const std::vector>& msg) return to_hex(hasher.finalize()); } -std::string format_time_point(db_clock::time_point tp) { - time_t time_point_repr = db_clock::to_time_t(tp); +std::string format_time_point(lowres_system_clock::time_point tp) { + time_t time_point_repr = lowres_system_clock::to_time_t(tp); std::string time_point_str; time_point_str.resize(17); ::tm time_buf; @@ -61,8 +60,8 @@ std::string format_time_point(db_clock::time_point tp) { void check_expiry(std::string_view signature_date) { //FIXME: The default 15min can be changed with X-Amz-Expires header - we should honor it - std::string expiration_str = format_time_point(db_clock::now() - 15min); - std::string validity_str = format_time_point(db_clock::now() + 15min); + std::string expiration_str = format_time_point(lowres_system_clock::now() - 15min); + std::string validity_str = format_time_point(lowres_system_clock::now() + 15min); if (signature_date < expiration_str) { throw std::runtime_error( fmt::format("Signature expired: {} is now earlier than {} (current time - 15 min.)", diff --git a/utils/aws_sigv4.hh b/utils/aws_sigv4.hh index 023e341f65..444b65a1d2 100644 --- a/utils/aws_sigv4.hh +++ b/utils/aws_sigv4.hh @@ -8,7 +8,8 @@ #pragma once -#include "db_clock.hh" +#include +#include // The declared below get_signature() method makes the Signature string for AWS // authenticated requests as described in [1]. It can be used in two ways. @@ -33,14 +34,14 @@ namespace aws { std::string get_signature(std::string_view access_key_id, std::string_view secret_access_key, std::string_view host, std::string_view canonical_uri, std::string_view method, std::optional orig_datestamp, std::string_view signed_headers_str, const std::map& signed_headers_map, - const std::vector>* body_content, std::string_view region, std::string_view service, std::string_view query_string); + const std::vector>* body_content, std::string_view region, std::string_view service, std::string_view query_string); // Convenience alias not to pass obscure nullptr argument to get_signature() -inline constexpr std::vector>* unsigned_content = nullptr; +inline constexpr std::vector>* unsigned_content = nullptr; // Same for datestamp checking inline auto omit_datestamp_expiration_check = std::nullopt; -std::string format_time_point(db_clock::time_point tp); +std::string format_time_point(seastar::lowres_system_clock::time_point tp); } // aws namespace } // utils namespace diff --git a/utils/gcp/object_storage.cc b/utils/gcp/object_storage.cc index 2a34921c2a..147ad376bd 100644 --- a/utils/gcp/object_storage.cc +++ b/utils/gcp/object_storage.cc @@ -1156,6 +1156,25 @@ seekable_data_source utils::gcp::storage::client::create_download_source(std::st return seekable_data_source(std::make_unique(_impl, bucket, object_name, as)); } +future storage::client::object_exists(std::string_view bucket, std::string_view object_name, seastar::abort_source* as) const { + gcp_storage.debug("Get object metadata {}:{}", bucket, object_name); + + auto path = fmt::format("/storage/v1/b/{}/o/{}", bucket, seastar::http::internal::url_encode(object_name)); + try { + auto res = co_await _impl->send_with_retry(path, GCP_OBJECT_SCOPE_READ_ONLY, ""s, ""s, httpclient::method_type::GET, {}, as); + if (res.result() != status_type::ok) { + throw failed_operation( + fmt::format("Could not retrieve object metadata {}:{}: {} ({})", bucket, object_name, res.result(), get_gcp_error_message(res.body()))); + } + } catch (const storage_io_error& e) { + if (e.code().value() == ENOENT) { + co_return false; + } + throw; + } + co_return true; +} + future<> utils::gcp::storage::client::close() { return _impl->close(); } diff --git a/utils/gcp/object_storage.hh b/utils/gcp/object_storage.hh index 1f62ea5d23..86b20774b0 100644 --- a/utils/gcp/object_storage.hh +++ b/utils/gcp/object_storage.hh @@ -154,7 +154,10 @@ namespace utils::gcp::storage { * Creates a data_source for reading from a named object. */ seekable_data_source create_download_source(std::string_view bucket, std::string_view object_name, seastar::abort_source* = nullptr) const; - + /** + * Checks if an object exists. + */ + future object_exists(std::string_view bucket, std::string_view object_name, seastar::abort_source* as = nullptr) const; /** * Destroys resources. Must be called before releasing object */ diff --git a/utils/s3/client.cc b/utils/s3/client.cc index 0e8a14628e..cf1e416e59 100644 --- a/utils/s3/client.cc +++ b/utils/s3/client.cc @@ -166,7 +166,7 @@ future<> client::authorize(http::request& req) { } } - auto time_point_str = utils::aws::format_time_point(db_clock::now()); + auto time_point_str = utils::aws::format_time_point(lowres_system_clock::now()); auto time_point_st = time_point_str.substr(0, 8); req._headers["x-amz-date"] = time_point_str; req._headers["x-amz-content-sha256"] = "UNSIGNED-PAYLOAD"; @@ -328,7 +328,7 @@ http::experimental::client::reply_handler client::wrap_handler(http::request& re auto should_retry = possible_error->is_retryable(); if (possible_error->get_error_type() == aws::aws_error_type::REQUEST_TIME_TOO_SKEWED) { s3l.warn("Request failed with REQUEST_TIME_TOO_SKEWED. Machine time: {}, request timestamp: {}", - utils::aws::format_time_point(db_clock::now()), + utils::aws::format_time_point(lowres_system_clock::now()), request.get_header("x-amz-date")); should_retry = utils::http::retryable::yes; co_await authorize(request); @@ -447,6 +447,18 @@ future client::get_object_stats(sstring object_name, seastar::abort_sourc co_return st; } +future client::object_exists(sstring object_name, seastar::abort_source* as) { + try { + co_await get_object_header(object_name, ignore_reply, as); + } catch (const storage_io_error& e) { + if (e.code().value() == ENOENT) { + co_return false; + } + throw; + } + co_return true; +} + static rapidxml::xml_node<>* first_node_of(rapidxml::xml_node<>* root, std::initializer_list names) { SCYLLA_ASSERT(root); diff --git a/utils/s3/client.hh b/utils/s3/client.hh index 8933a2a9da..58757af80c 100644 --- a/utils/s3/client.hh +++ b/utils/s3/client.hh @@ -189,6 +189,7 @@ public: future get_object_size(sstring object_name, seastar::abort_source* = nullptr); future get_object_stats(sstring object_name, seastar::abort_source* = nullptr); + future object_exists(sstring object_name, seastar::abort_source* = nullptr); future get_object_tagging(sstring object_name, seastar::abort_source* = nullptr); future<> put_object_tagging(sstring object_name, tag_set tagging, seastar::abort_source* = nullptr); future<> delete_object_tagging(sstring object_name, seastar::abort_source* = nullptr);