From 827cd0d37b1e0895e47c953282c9fd003d6b469e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Mon, 19 Dec 2022 09:27:23 -0500 Subject: [PATCH 1/5] sstables: coroutinize sstable::load() It nicely simplified by it. No regression expected, this method is supposedly only used by tests and tools. --- sstables/sstables.cc | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 5a9ceb1cd3..4a5023f06a 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -1505,25 +1506,21 @@ void sstable::write_filter(const io_priority_class& pc) { // This interface is only used during tests, snapshot loading and early initialization. // No need to set tunable priorities for it. future<> sstable::load(const io_priority_class& pc) noexcept { - return read_toc().then([this, &pc] { + co_await read_toc(); // read scylla-meta after toc. Might need it to parse // rest (hint extensions) - return read_scylla_metadata(pc).then([this, &pc] { + co_await read_scylla_metadata(pc); // Read statistics ahead of others - if summary is missing // we'll attempt to re-generate it and we need statistics for that - return read_statistics(pc).then([this, &pc] { - return seastar::when_all_succeed( - read_compression(pc), - read_filter(pc), - read_summary(pc)).then_unpack([this] { + co_await read_statistics(pc); + co_await coroutine::all( + [&] { return read_compression(pc); }, + [&] { return read_filter(pc); }, + [&] { return read_summary(pc); }); validate_min_max_metadata(); validate_max_local_deletion_time(); validate_partitioner(); - return open_data(); - }); - }); - }); - }); + co_await open_data(); } future<> sstable::load(sstables::foreign_sstable_open_info info) noexcept { From 15966a0b1bc2d7e2790407792acd53eba762f6e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Mon, 19 Dec 2022 09:50:13 -0500 Subject: [PATCH 2/5] sstables: sstable::open_data(): use clear_gently() to clear token ranges Instead of an open-coded loop. It also makes the code easier to coroutinize (next patch). --- sstables/sstables.cc | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 4a5023f06a..951086ad8d 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -60,6 +60,7 @@ #include "utils/bloom_filter.hh" #include "utils/memory_data_sink.hh" #include "utils/cached_file.hh" +#include "utils/stall_free.hh" #include "checked-file-impl.hh" #include "integrity_checked_file_impl.hh" #include "db/extensions.hh" @@ -1377,18 +1378,14 @@ future<> sstable::open_data() noexcept { _shards = compute_shards_for_this_sstable(); } auto* sm = _components->scylla_metadata->data.get(); - if (!sm) { + if (sm) { + // Sharding information uses a lot of memory and once we're doing with this computation we will no longer use it. + return utils::clear_gently(sm->token_ranges.elements).then([sm] { + sm->token_ranges.elements = {}; + }); + } else { return make_ready_future<>(); } - auto c = &sm->token_ranges.elements; - // Sharding information uses a lot of memory and once we're doing with this computation we will no longer use it. - return do_until([c] { return c->empty(); }, [c] { - c->pop_back(); - return make_ready_future<>(); - }).then([this, c] () mutable { - *c = {}; - return make_ready_future<>(); - }); }).then([this] { auto* ld_stats = _components->scylla_metadata->data.get(); if (ld_stats) { From c85ff7945d7ce334c7e8a5b0ddc8876d581a641f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Mon, 19 Dec 2022 09:59:48 -0500 Subject: [PATCH 3/5] sstables: coroutinize sstable::open_data() Used once when sstable is opened on startup, not performance sensitive. --- sstables/sstables.cc | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 951086ad8d..51de8eb547 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -1371,22 +1371,17 @@ future<> sstable::open_or_create_data(open_flags oflags, file_open_options optio } future<> sstable::open_data() noexcept { - return open_or_create_data(open_flags::ro).then([this] { - return this->update_info_for_opened_data(); - }).then([this] { + co_await open_or_create_data(open_flags::ro); + co_await update_info_for_opened_data(); if (_shards.empty()) { _shards = compute_shards_for_this_sstable(); } auto* sm = _components->scylla_metadata->data.get(); if (sm) { // Sharding information uses a lot of memory and once we're doing with this computation we will no longer use it. - return utils::clear_gently(sm->token_ranges.elements).then([sm] { + co_await utils::clear_gently(sm->token_ranges.elements); sm->token_ranges.elements = {}; - }); - } else { - return make_ready_future<>(); } - }).then([this] { auto* ld_stats = _components->scylla_metadata->data.get(); if (ld_stats) { _large_data_stats.emplace(*ld_stats); @@ -1395,10 +1390,8 @@ future<> sstable::open_data() noexcept { if (origin) { _origin = sstring(to_sstring_view(bytes_view(origin->value))); } - }).then([this] { _open_mode.emplace(open_flags::ro); _stats.on_open_for_reading(); - }); } future<> sstable::update_info_for_opened_data() { From bba956c13c911d0b19b3f8eda73f7286ecc4b823 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Mon, 19 Dec 2022 09:34:29 -0500 Subject: [PATCH 4/5] sstables: sstable::{load,open_data}(): fix indentation --- sstables/sstables.cc | 66 ++++++++++++++++++++++---------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 51de8eb547..41fd14e57f 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -1373,25 +1373,25 @@ future<> sstable::open_or_create_data(open_flags oflags, file_open_options optio future<> sstable::open_data() noexcept { co_await open_or_create_data(open_flags::ro); co_await update_info_for_opened_data(); - if (_shards.empty()) { - _shards = compute_shards_for_this_sstable(); - } - auto* sm = _components->scylla_metadata->data.get(); - if (sm) { - // Sharding information uses a lot of memory and once we're doing with this computation we will no longer use it. - co_await utils::clear_gently(sm->token_ranges.elements); - sm->token_ranges.elements = {}; - } - auto* ld_stats = _components->scylla_metadata->data.get(); - if (ld_stats) { - _large_data_stats.emplace(*ld_stats); - } - auto* origin = _components->scylla_metadata->data.get(); - if (origin) { - _origin = sstring(to_sstring_view(bytes_view(origin->value))); - } - _open_mode.emplace(open_flags::ro); - _stats.on_open_for_reading(); + if (_shards.empty()) { + _shards = compute_shards_for_this_sstable(); + } + auto* sm = _components->scylla_metadata->data.get(); + if (sm) { + // Sharding information uses a lot of memory and once we're doing with this computation we will no longer use it. + co_await utils::clear_gently(sm->token_ranges.elements); + sm->token_ranges.elements = {}; + } + auto* ld_stats = _components->scylla_metadata->data.get(); + if (ld_stats) { + _large_data_stats.emplace(*ld_stats); + } + auto* origin = _components->scylla_metadata->data.get(); + if (origin) { + _origin = sstring(to_sstring_view(bytes_view(origin->value))); + } + _open_mode.emplace(open_flags::ro); + _stats.on_open_for_reading(); } future<> sstable::update_info_for_opened_data() { @@ -1497,20 +1497,20 @@ void sstable::write_filter(const io_priority_class& pc) { // No need to set tunable priorities for it. future<> sstable::load(const io_priority_class& pc) noexcept { co_await read_toc(); - // read scylla-meta after toc. Might need it to parse - // rest (hint extensions) - co_await read_scylla_metadata(pc); - // Read statistics ahead of others - if summary is missing - // we'll attempt to re-generate it and we need statistics for that - co_await read_statistics(pc); - co_await coroutine::all( - [&] { return read_compression(pc); }, - [&] { return read_filter(pc); }, - [&] { return read_summary(pc); }); - validate_min_max_metadata(); - validate_max_local_deletion_time(); - validate_partitioner(); - co_await open_data(); + // read scylla-meta after toc. Might need it to parse + // rest (hint extensions) + co_await read_scylla_metadata(pc); + // Read statistics ahead of others - if summary is missing + // we'll attempt to re-generate it and we need statistics for that + co_await read_statistics(pc); + co_await coroutine::all( + [&] { return read_compression(pc); }, + [&] { return read_filter(pc); }, + [&] { return read_summary(pc); }); + validate_min_max_metadata(); + validate_max_local_deletion_time(); + validate_partitioner(); + co_await open_data(); } future<> sstable::load(sstables::foreign_sstable_open_info info) noexcept { From 3c8949d34ce0c1130c67151c9c00061cfa238916 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Botond=20D=C3=A9nes?= Date: Mon, 19 Dec 2022 09:28:13 -0500 Subject: [PATCH 5/5] sstables: allow bypassing first/last position metadata loading When loading an sstable. Tests and tools might want to do this to be able to load a damaged sstable to do tests/diagnostics on it. --- sstables/sstables.cc | 14 ++++++++------ sstables/sstables.hh | 15 ++++++++++++--- tools/scylla-sstable.cc | 2 +- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 41fd14e57f..7df122d300 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -1370,9 +1370,9 @@ future<> sstable::open_or_create_data(open_flags oflags, file_open_options optio ).discard_result(); } -future<> sstable::open_data() noexcept { +future<> sstable::open_data(sstable_open_config cfg) noexcept { co_await open_or_create_data(open_flags::ro); - co_await update_info_for_opened_data(); + co_await update_info_for_opened_data(cfg); if (_shards.empty()) { _shards = compute_shards_for_this_sstable(); } @@ -1394,7 +1394,7 @@ future<> sstable::open_data() noexcept { _stats.on_open_for_reading(); } -future<> sstable::update_info_for_opened_data() { +future<> sstable::update_info_for_opened_data(sstable_open_config cfg) { struct stat st = co_await _data_file.stat(); if (this->has_component(component_type::CompressionInfo)) { @@ -1444,7 +1444,9 @@ future<> sstable::update_info_for_opened_data() { })); _bytes_on_disk += bytes; } - co_await load_first_and_last_position_in_partition(); + if (cfg.load_first_and_last_position_metadata) { + co_await load_first_and_last_position_in_partition(); + } } future<> sstable::create_data() noexcept { @@ -1495,7 +1497,7 @@ void sstable::write_filter(const io_priority_class& pc) { // This interface is only used during tests, snapshot loading and early initialization. // No need to set tunable priorities for it. -future<> sstable::load(const io_priority_class& pc) noexcept { +future<> sstable::load(const io_priority_class& pc, sstable_open_config cfg) noexcept { co_await read_toc(); // read scylla-meta after toc. Might need it to parse // rest (hint extensions) @@ -1510,7 +1512,7 @@ future<> sstable::load(const io_priority_class& pc) noexcept { validate_min_max_metadata(); validate_max_local_deletion_time(); validate_partitioner(); - co_await open_data(); + co_await open_data(cfg); } future<> sstable::load(sstables::foreign_sstable_open_info info) noexcept { diff --git a/sstables/sstables.hh b/sstables/sstables.hh index 038daf6109..f5fd040fb1 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -130,6 +130,15 @@ constexpr auto table_subdirectories = std::to_array({ constexpr const char* repair_origin = "repair"; +struct sstable_open_config { + // Load the first and last position in partition, populating the + // `_first_partition_first_position` and `_last_partition_last_position` + // fields respectively. Problematic sstables might fail to load. Set to + // false if you want to disable this, to be able to read such sstables. + // Should only be disabled for diagnostics purposes. + bool load_first_and_last_position_metadata = true; +}; + class sstable : public enable_lw_shared_from_this { friend ::sstable_assertions; public: @@ -186,9 +195,9 @@ public: // load all components from disk // this variant will be useful for testing purposes and also when loading // a new sstable from scratch for sharing its components. - future<> load(const io_priority_class& pc = default_priority_class()) noexcept; - future<> open_data() noexcept; - future<> update_info_for_opened_data(); + future<> load(const io_priority_class& pc = default_priority_class(), sstable_open_config cfg = {}) noexcept; + future<> open_data(sstable_open_config cfg = {}) noexcept; + future<> update_info_for_opened_data(sstable_open_config cfg = {}); class delayed_commit_changes { std::unordered_set _dirs; diff --git a/tools/scylla-sstable.cc b/tools/scylla-sstable.cc index 063db738b0..5e525cf98c 100644 --- a/tools/scylla-sstable.cc +++ b/tools/scylla-sstable.cc @@ -151,7 +151,7 @@ const std::vector load_sstables(schema_ptr schema, sst auto ed = sstables::entry_descriptor::make_descriptor(dir_path.c_str(), sst_filename.c_str(), schema->ks_name(), schema->cf_name()); auto sst = sst_man.make_sstable(schema, dir_path.c_str(), ed.generation, ed.version, ed.format); - co_await sst->load(); + co_await sst->load(default_priority_class(), sstables::sstable_open_config{.load_first_and_last_position_metadata = false}); sstables[i] = std::move(sst); }).get();