From 2dedf4d03a9702259a8d919c7920623d74ffaf75 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 17 Nov 2022 16:26:41 +0300 Subject: [PATCH 1/8] sstables: Dont write pending log with file_writer It's a wrapper over output_stream with offset tracking and the tracking is not needed to generate a log file. As a bonus of switching back we get a stream.write(sstring) sugar. Signed-off-by: Pavel Emelyanov --- sstables/sstables.cc | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 45ae4d833b..0942dda748 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -3112,18 +3112,17 @@ delete_atomically(std::vector ssts) { // Create temporary pending_delete log file. auto f = open_file_dma(tmp_pending_delete_log, oflags).get0(); // Write all toc names into the log file. - file_output_stream_options options; - options.buffer_size = 4096; - auto w = file_writer::make(std::move(f), options, tmp_pending_delete_log).get0(); + auto out = make_file_output_stream(std::move(f), 4096).get0(); + auto close_out = deferred_close(out); for (const auto& sst : ssts) { auto toc = sst->component_basename(component_type::TOC); - w.write(toc.c_str(), toc.size()); - w.write("\n", 1); + out.write(toc).get(); + out.write("\n").get(); } - w.flush(); - w.close(); + out.flush().get(); + close_out.close_now(); auto dir_f = open_directory(pending_delete_dir).get0(); // Once flushed and closed, the temporary log file can be renamed. From 6f3fd94162ad34ae4a5b46f4be38f79b72292f0a Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 17 Nov 2022 16:51:48 +0300 Subject: [PATCH 2/8] sstables: Read pending delete log with one line helper There's one in seastar since recently Signed-off-by: Pavel Emelyanov --- sstables/sstables.cc | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 0942dda748..7fae94dcf3 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -3161,12 +3161,7 @@ future<> replay_pending_delete_log(sstring pending_delete_log) { assert(sstable::is_pending_delete_dir(fs::path(pending_delete_dir))); try { auto sstdir = parent_path(pending_delete_dir); - auto f = open_file_dma(pending_delete_log, open_flags::ro).get0(); - auto size = f.size().get0(); - auto in = make_file_input_stream(f); - auto text = in.read_exactly(size).get0(); - in.close().get(); - f.close().get(); + auto text = seastar::util::read_entire_file_contiguous(fs::path(pending_delete_log)).get0(); sstring all(text.begin(), text.end()); std::vector basenames; From 85a73ca9c626dff2ed3706519f3736945fed28cc Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 17 Nov 2022 19:39:09 +0300 Subject: [PATCH 3/8] sstables: Coroutinize replay_pending_delete_log Signed-off-by: Pavel Emelyanov --- sstables/sstables.cc | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 7fae94dcf3..212ef167b7 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -3156,12 +3156,11 @@ delete_atomically(std::vector ssts) { // since this is an indication we crashed in the middle of delete_atomically future<> replay_pending_delete_log(sstring pending_delete_log) { sstlog.debug("Reading pending_deletes log file {}", pending_delete_log); - return seastar::async([pending_delete_log = std::move(pending_delete_log)] { sstring pending_delete_dir = parent_path(pending_delete_log); assert(sstable::is_pending_delete_dir(fs::path(pending_delete_dir))); try { auto sstdir = parent_path(pending_delete_dir); - auto text = seastar::util::read_entire_file_contiguous(fs::path(pending_delete_log)).get0(); + auto text = co_await seastar::util::read_entire_file_contiguous(fs::path(pending_delete_log)); sstring all(text.begin(), text.end()); std::vector basenames; @@ -3169,11 +3168,10 @@ future<> replay_pending_delete_log(sstring pending_delete_log) { auto tocs = boost::copy_range>(basenames | boost::adaptors::filtered([] (auto&& basename) { return !basename.empty(); }) | boost::adaptors::transformed([&sstdir] (auto&& basename) { return sstdir + "/" + basename; })); - delete_sstables(tocs).get(); + co_await delete_sstables(tocs); } catch (...) { sstlog.warn("Error replaying {}: {}. Ignoring.", pending_delete_log, std::current_exception()); } - }); } thread_local sstables_stats::stats sstables_stats::_shard_stats; From f5684bcaf0c572d570405f06b7626b8926760d0e Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 17 Nov 2022 19:41:15 +0300 Subject: [PATCH 4/8] sstables: Indentation fix after previous patch Signed-off-by: Pavel Emelyanov --- sstables/sstables.cc | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 212ef167b7..3c4b9423d8 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -3156,22 +3156,22 @@ delete_atomically(std::vector ssts) { // since this is an indication we crashed in the middle of delete_atomically future<> replay_pending_delete_log(sstring pending_delete_log) { sstlog.debug("Reading pending_deletes log file {}", pending_delete_log); - sstring pending_delete_dir = parent_path(pending_delete_log); - assert(sstable::is_pending_delete_dir(fs::path(pending_delete_dir))); - try { - auto sstdir = parent_path(pending_delete_dir); - auto text = co_await seastar::util::read_entire_file_contiguous(fs::path(pending_delete_log)); + sstring pending_delete_dir = parent_path(pending_delete_log); + assert(sstable::is_pending_delete_dir(fs::path(pending_delete_dir))); + try { + auto sstdir = parent_path(pending_delete_dir); + auto text = co_await seastar::util::read_entire_file_contiguous(fs::path(pending_delete_log)); - sstring all(text.begin(), text.end()); - std::vector basenames; - boost::split(basenames, all, boost::is_any_of("\n"), boost::token_compress_on); - auto tocs = boost::copy_range>(basenames - | boost::adaptors::filtered([] (auto&& basename) { return !basename.empty(); }) - | boost::adaptors::transformed([&sstdir] (auto&& basename) { return sstdir + "/" + basename; })); - co_await delete_sstables(tocs); - } catch (...) { - sstlog.warn("Error replaying {}: {}. Ignoring.", pending_delete_log, std::current_exception()); - } + sstring all(text.begin(), text.end()); + std::vector basenames; + boost::split(basenames, all, boost::is_any_of("\n"), boost::token_compress_on); + auto tocs = boost::copy_range>(basenames + | boost::adaptors::filtered([] (auto&& basename) { return !basename.empty(); }) + | boost::adaptors::transformed([&sstdir] (auto&& basename) { return sstdir + "/" + basename; })); + co_await delete_sstables(tocs); + } catch (...) { + sstlog.warn("Error replaying {}: {}. Ignoring.", pending_delete_log, std::current_exception()); + } } thread_local sstables_stats::stats sstables_stats::_shard_stats; From a61c96a6275be4c0fd57b285071579436c38a894 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 17 Nov 2022 19:50:17 +0300 Subject: [PATCH 5/8] sstables: Use fs::path in replay_pending_delete_log() It's called by a code that has fs::path at hand and internally uses helpers that need fs::path too, so no need to convert it back and forth. Signed-off-by: Pavel Emelyanov --- replica/distributed_loader.cc | 2 +- sstables/sstables.cc | 10 +++++----- sstables/sstables.hh | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/replica/distributed_loader.cc b/replica/distributed_loader.cc index 7f34b639e3..8a55ccb2a1 100644 --- a/replica/distributed_loader.cc +++ b/replica/distributed_loader.cc @@ -434,7 +434,7 @@ future<> distributed_loader::handle_sstables_pending_delete(sstring pending_dele futures.push_back(remove_file(file_path.string())); } else if (file_path.extension() == ".log") { dblog.info("Found pending_delete log file: {}, replaying", file_path); - auto f = sstables::replay_pending_delete_log(file_path.string()).then([file_path = std::move(file_path)] { + auto f = sstables::replay_pending_delete_log(file_path).then([file_path = std::move(file_path)] { dblog.debug("Replayed {}, removing", file_path); return remove_file(file_path.string()); }); diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 3c4b9423d8..5813b6e48e 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -3154,13 +3154,13 @@ delete_atomically(std::vector ssts) { // FIXME: Go through maybe_delete_large_partitions_entry on recovery // since this is an indication we crashed in the middle of delete_atomically -future<> replay_pending_delete_log(sstring pending_delete_log) { +future<> replay_pending_delete_log(fs::path pending_delete_log) { sstlog.debug("Reading pending_deletes log file {}", pending_delete_log); - sstring pending_delete_dir = parent_path(pending_delete_log); - assert(sstable::is_pending_delete_dir(fs::path(pending_delete_dir))); + fs::path pending_delete_dir = pending_delete_log.parent_path(); + assert(sstable::is_pending_delete_dir(pending_delete_dir)); try { - auto sstdir = parent_path(pending_delete_dir); - auto text = co_await seastar::util::read_entire_file_contiguous(fs::path(pending_delete_log)); + sstring sstdir = pending_delete_dir.parent_path().native(); + auto text = co_await seastar::util::read_entire_file_contiguous(pending_delete_log); sstring all(text.begin(), text.end()); std::vector basenames; diff --git a/sstables/sstables.hh b/sstables/sstables.hh index 451a552f8b..ffda6bf15e 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -920,7 +920,7 @@ public: // // This function only solves the second problem for now. future<> delete_atomically(std::vector ssts); -future<> replay_pending_delete_log(sstring log_file); +future<> replay_pending_delete_log(fs::path log_file); // Validate checksums // From 865c51c6cf1bbfeda143122cdc00678987897abb Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 17 Nov 2022 20:06:09 +0300 Subject: [PATCH 6/8] sstables: Open-code delete_sstables() call It's no used by any other code, but to be used it requires the caller to tranform TOC file names by prepending sstable directory to them. Things get shorter and simpler if merging the helper code into the caller. Signed-off-by: Pavel Emelyanov --- sstables/sstables.cc | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 5813b6e48e..6dce37b14f 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -3037,13 +3037,6 @@ utils::hashed_key sstable::make_hashed_key(const schema& s, const partition_key& return utils::make_hashed_key(static_cast(key::from_partition_key(s, key))); } -future<> -delete_sstables(std::vector tocs) { - return parallel_for_each(tocs, [] (const sstring& name) { - return remove_by_toc_name(name); - }); -} - future<> sstable::unlink() noexcept { // We must be able to generate toc_filename() @@ -3165,10 +3158,10 @@ future<> replay_pending_delete_log(fs::path pending_delete_log) { sstring all(text.begin(), text.end()); std::vector basenames; boost::split(basenames, all, boost::is_any_of("\n"), boost::token_compress_on); - auto tocs = boost::copy_range>(basenames - | boost::adaptors::filtered([] (auto&& basename) { return !basename.empty(); }) - | boost::adaptors::transformed([&sstdir] (auto&& basename) { return sstdir + "/" + basename; })); - co_await delete_sstables(tocs); + auto tocs = boost::copy_range>(basenames | boost::adaptors::filtered([] (auto&& basename) { return !basename.empty(); })); + co_await parallel_for_each(tocs, [&sstdir] (const sstring& name) { + return remove_by_toc_name(sstdir + "/" + name); + }); } catch (...) { sstlog.warn("Error replaying {}: {}. Ignoring.", pending_delete_log, std::current_exception()); } From bdc47b771757ed87b1d2dc120ca5a23b5757ddbb Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 17 Nov 2022 20:29:29 +0300 Subject: [PATCH 7/8] sstables: Move deletion log manipulations to sstable_directory.cc The deletion log concept uses the fact that files are on a POSIX filesystem. Support for another storage type will have to reimplement this place, so keep the FS-specific code in _directory.cc file. Signed-off-by: Pavel Emelyanov --- replica/distributed_loader.cc | 2 +- replica/table.cc | 4 +- sstables/sstable_directory.cc | 94 ++++++++++++++++++++++++++++ sstables/sstable_directory.hh | 15 +++++ sstables/sstables.cc | 96 +---------------------------- sstables/sstables.hh | 18 +----- test/boost/sstable_datafile_test.cc | 3 +- 7 files changed, 118 insertions(+), 114 deletions(-) diff --git a/replica/distributed_loader.cc b/replica/distributed_loader.cc index 8a55ccb2a1..df6eaba056 100644 --- a/replica/distributed_loader.cc +++ b/replica/distributed_loader.cc @@ -434,7 +434,7 @@ future<> distributed_loader::handle_sstables_pending_delete(sstring pending_dele futures.push_back(remove_file(file_path.string())); } else if (file_path.extension() == ".log") { dblog.info("Found pending_delete log file: {}, replaying", file_path); - auto f = sstables::replay_pending_delete_log(file_path).then([file_path = std::move(file_path)] { + auto f = sstables::sstable_directory::replay_pending_delete_log(file_path).then([file_path = std::move(file_path)] { dblog.debug("Replayed {}, removing", file_path); return remove_file(file_path.string()); }); diff --git a/replica/table.cc b/replica/table.cc index e5d6af1a4d..5850359bde 100644 --- a/replica/table.cc +++ b/replica/table.cc @@ -1086,7 +1086,7 @@ compaction_group::update_main_sstable_list_on_compaction_completion(sstables::co auto f = seastar::try_with_gate(_t._sstable_deletion_gate, [this, sstables_to_remove = desc.old_sstables] { return with_semaphore(_t._sstable_deletion_sem, 1, [sstables_to_remove = std::move(sstables_to_remove)] { - return sstables::delete_atomically(std::move(sstables_to_remove)); + return sstables::sstable_directory::delete_atomically(std::move(sstables_to_remove)); }); }); @@ -1699,7 +1699,7 @@ future table::discard_sstables(db_clock::time_point truncat if (r.enable_backlog_tracker) { remove_sstable_from_backlog_tracker(p->cg.get_backlog_tracker(), r.sst); } - return sstables::delete_atomically({r.sst}); + return sstables::sstable_directory::delete_atomically({r.sst}); }).then([p] { return make_ready_future(p->rp); }); diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index aeab920671..b2140be6ad 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -7,7 +7,9 @@ */ #include +#include #include +#include #include "sstables/sstable_directory.hh" #include "sstables/sstables.hh" #include "compaction/compaction_manager.hh" @@ -463,4 +465,96 @@ sstable_directory::retrieve_shared_sstables() { return std::exchange(_shared_sstable_info, {}); } +future<> sstable_directory::delete_atomically(std::vector ssts) { + if (ssts.empty()) { + return make_ready_future<>(); + } + return seastar::async([ssts = std::move(ssts)] { + sstring sstdir; + min_max_tracker gen_tracker; + + for (const auto& sst : ssts) { + gen_tracker.update(sst->generation()); + + if (sstdir.empty()) { + sstdir = sst->get_dir(); + } else { + // All sstables are assumed to be in the same column_family, hence + // sharing their base directory. + assert (sstdir == sst->get_dir()); + } + } + + sstring pending_delete_dir = sstdir + "/" + sstable::pending_delete_dir_basename(); + sstring pending_delete_log = format("{}/sstables-{}-{}.log", pending_delete_dir, gen_tracker.min(), gen_tracker.max()); + sstring tmp_pending_delete_log = pending_delete_log + ".tmp"; + sstlog.trace("Writing {}", tmp_pending_delete_log); + try { + touch_directory(pending_delete_dir).get(); + auto oflags = open_flags::wo | open_flags::create | open_flags::exclusive; + // Create temporary pending_delete log file. + auto f = open_file_dma(tmp_pending_delete_log, oflags).get0(); + // Write all toc names into the log file. + auto out = make_file_output_stream(std::move(f), 4096).get0(); + auto close_out = deferred_close(out); + + for (const auto& sst : ssts) { + auto toc = sst->component_basename(component_type::TOC); + out.write(toc).get(); + out.write("\n").get(); + } + + out.flush().get(); + close_out.close_now(); + + auto dir_f = open_directory(pending_delete_dir).get0(); + // Once flushed and closed, the temporary log file can be renamed. + rename_file(tmp_pending_delete_log, pending_delete_log).get(); + + // Guarantee that the changes above reached the disk. + dir_f.flush().get(); + dir_f.close().get(); + sstlog.debug("{} written successfully.", pending_delete_log); + } catch (...) { + sstlog.warn("Error while writing {}: {}. Ignoring.", pending_delete_log, std::current_exception()); + } + + parallel_for_each(ssts, [] (shared_sstable sst) { + return sst->unlink(); + }).get(); + + // Once all sstables are deleted, the log file can be removed. + // Note: the log file will be removed also if unlink failed to remove + // any sstable and ignored the error. + try { + remove_file(pending_delete_log).get(); + sstlog.debug("{} removed.", pending_delete_log); + } catch (...) { + sstlog.warn("Error removing {}: {}. Ignoring.", pending_delete_log, std::current_exception()); + } + }); +} + +// FIXME: Go through maybe_delete_large_partitions_entry on recovery +// since this is an indication we crashed in the middle of delete_atomically +future<> sstable_directory::replay_pending_delete_log(fs::path pending_delete_log) { + sstlog.debug("Reading pending_deletes log file {}", pending_delete_log); + fs::path pending_delete_dir = pending_delete_log.parent_path(); + assert(sstable::is_pending_delete_dir(pending_delete_dir)); + try { + sstring sstdir = pending_delete_dir.parent_path().native(); + auto text = co_await seastar::util::read_entire_file_contiguous(pending_delete_log); + + sstring all(text.begin(), text.end()); + std::vector basenames; + boost::split(basenames, all, boost::is_any_of("\n"), boost::token_compress_on); + auto tocs = boost::copy_range>(basenames | boost::adaptors::filtered([] (auto&& basename) { return !basename.empty(); })); + co_await parallel_for_each(tocs, [&sstdir] (const sstring& name) { + return remove_by_toc_name(sstdir + "/" + name); + }); + } catch (...) { + sstlog.warn("Error replaying {}: {}. Ignoring.", pending_delete_log, std::current_exception()); + } +} + } diff --git a/sstables/sstable_directory.hh b/sstables/sstable_directory.hh index 881f672c45..1c80bded55 100644 --- a/sstables/sstable_directory.hh +++ b/sstables/sstable_directory.hh @@ -207,6 +207,21 @@ public: std::filesystem::path sstable_dir() const noexcept { return _sstable_dir; } + + // When we compact sstables, we have to atomically instantiate the new + // sstable and delete the old ones. Otherwise, if we compact A+B into C, + // and if A contained some data that was tombstoned by B, and if B was + // deleted but A survived, then data from A will be resurrected. + // + // There are two violators of the requirement to atomically delete + // sstables: first sstable instantiation and deletion on disk is atomic + // only wrt. itself, not other sstables, and second when an sstable is + // shared among shard, so actual on-disk deletion of an sstable is deferred + // until all shards agree it can be deleted. + // + // This function only solves the second problem for now. + static future<> delete_atomically(std::vector ssts); + static future<> replay_pending_delete_log(std::filesystem::path log_file); }; } diff --git a/sstables/sstables.cc b/sstables/sstables.cc index 6dce37b14f..e66044cdb2 100644 --- a/sstables/sstables.cc +++ b/sstables/sstables.cc @@ -2816,8 +2816,7 @@ fsync_directory(const io_error_handler& error_handler, sstring dirname) { }); } -static future<> -remove_by_toc_name(sstring sstable_toc_name) { +future<> remove_by_toc_name(sstring sstable_toc_name) { sstring prefix = sstable_toc_name.substr(0, sstable_toc_name.size() - sstable_version_constants::TOC_SUFFIX.size()); sstring new_toc_name = prefix + sstable_version_constants::TEMPORARY_TOC_SUFFIX; @@ -3074,99 +3073,6 @@ sstable::unlink() noexcept { _stats.on_delete(); } -future<> -delete_atomically(std::vector ssts) { - if (ssts.empty()) { - return make_ready_future<>(); - } - return seastar::async([ssts = std::move(ssts)] { - sstring sstdir; - min_max_tracker gen_tracker; - - for (const auto& sst : ssts) { - gen_tracker.update(sst->generation()); - - if (sstdir.empty()) { - sstdir = sst->get_dir(); - } else { - // All sstables are assumed to be in the same column_family, hence - // sharing their base directory. - assert (sstdir == sst->get_dir()); - } - } - - sstring pending_delete_dir = sstdir + "/" + sstable::pending_delete_dir_basename(); - sstring pending_delete_log = format("{}/sstables-{}-{}.log", pending_delete_dir, gen_tracker.min(), gen_tracker.max()); - sstring tmp_pending_delete_log = pending_delete_log + ".tmp"; - sstlog.trace("Writing {}", tmp_pending_delete_log); - try { - touch_directory(pending_delete_dir).get(); - auto oflags = open_flags::wo | open_flags::create | open_flags::exclusive; - // Create temporary pending_delete log file. - auto f = open_file_dma(tmp_pending_delete_log, oflags).get0(); - // Write all toc names into the log file. - auto out = make_file_output_stream(std::move(f), 4096).get0(); - auto close_out = deferred_close(out); - - for (const auto& sst : ssts) { - auto toc = sst->component_basename(component_type::TOC); - out.write(toc).get(); - out.write("\n").get(); - } - - out.flush().get(); - close_out.close_now(); - - auto dir_f = open_directory(pending_delete_dir).get0(); - // Once flushed and closed, the temporary log file can be renamed. - rename_file(tmp_pending_delete_log, pending_delete_log).get(); - - // Guarantee that the changes above reached the disk. - dir_f.flush().get(); - dir_f.close().get(); - sstlog.debug("{} written successfully.", pending_delete_log); - } catch (...) { - sstlog.warn("Error while writing {}: {}. Ignoring.", pending_delete_log, std::current_exception()); - } - - parallel_for_each(ssts, [] (shared_sstable sst) { - return sst->unlink(); - }).get(); - - // Once all sstables are deleted, the log file can be removed. - // Note: the log file will be removed also if unlink failed to remove - // any sstable and ignored the error. - try { - remove_file(pending_delete_log).get(); - sstlog.debug("{} removed.", pending_delete_log); - } catch (...) { - sstlog.warn("Error removing {}: {}. Ignoring.", pending_delete_log, std::current_exception()); - } - }); -} - -// FIXME: Go through maybe_delete_large_partitions_entry on recovery -// since this is an indication we crashed in the middle of delete_atomically -future<> replay_pending_delete_log(fs::path pending_delete_log) { - sstlog.debug("Reading pending_deletes log file {}", pending_delete_log); - fs::path pending_delete_dir = pending_delete_log.parent_path(); - assert(sstable::is_pending_delete_dir(pending_delete_dir)); - try { - sstring sstdir = pending_delete_dir.parent_path().native(); - auto text = co_await seastar::util::read_entire_file_contiguous(pending_delete_log); - - sstring all(text.begin(), text.end()); - std::vector basenames; - boost::split(basenames, all, boost::is_any_of("\n"), boost::token_compress_on); - auto tocs = boost::copy_range>(basenames | boost::adaptors::filtered([] (auto&& basename) { return !basename.empty(); })); - co_await parallel_for_each(tocs, [&sstdir] (const sstring& name) { - return remove_by_toc_name(sstdir + "/" + name); - }); - } catch (...) { - sstlog.warn("Error replaying {}: {}. Ignoring.", pending_delete_log, std::current_exception()); - } -} - thread_local sstables_stats::stats sstables_stats::_shard_stats; thread_local partition_index_cache::stats partition_index_cache::_shard_stats; thread_local cached_file::metrics index_page_cache_metrics; diff --git a/sstables/sstables.hh b/sstables/sstables.hh index ffda6bf15e..ecca5cb3c5 100644 --- a/sstables/sstables.hh +++ b/sstables/sstables.hh @@ -907,21 +907,6 @@ public: gc_clock::time_point get_gc_before_for_fully_expire(const gc_clock::time_point& compaction_time, const tombstone_gc_state& gc_state) const; }; -// When we compact sstables, we have to atomically instantiate the new -// sstable and delete the old ones. Otherwise, if we compact A+B into C, -// and if A contained some data that was tombstoned by B, and if B was -// deleted but A survived, then data from A will be resurrected. -// -// There are two violators of the requirement to atomically delete -// sstables: first sstable instantiation and deletion on disk is atomic -// only wrt. itself, not other sstables, and second when an sstable is -// shared among shard, so actual on-disk deletion of an sstable is deferred -// until all shards agree it can be deleted. -// -// This function only solves the second problem for now. -future<> delete_atomically(std::vector ssts); -future<> replay_pending_delete_log(fs::path log_file); - // Validate checksums // // Sstables have two kind of checksums: per-chunk checksums and a @@ -977,4 +962,7 @@ public: // swallows all errors and just reports them to the log. future<> remove_table_directory_if_has_no_snapshots(fs::path table_dir); +// similar to sstable::unlink, but works on a TOC file name +future<> remove_by_toc_name(sstring sstable_toc_name); + } // namespace sstables diff --git a/test/boost/sstable_datafile_test.cc b/test/boost/sstable_datafile_test.cc index 1f08b82c7b..54ada96fef 100644 --- a/test/boost/sstable_datafile_test.cc +++ b/test/boost/sstable_datafile_test.cc @@ -22,6 +22,7 @@ #include "replica/database.hh" #include "sstables/metadata_collector.hh" #include "sstables/sstable_writer.hh" +#include "sstables/sstable_directory.hh" #include #include "test/boost/sstable_test.hh" #include @@ -2103,7 +2104,7 @@ SEASTAR_TEST_CASE(test_unknown_component) { BOOST_REQUIRE(!file_exists(tmp.path().string() + "/la-1-big-UNKNOWN.txt").get0()); BOOST_REQUIRE(file_exists(tmp.path().string() + "/la-2-big-UNKNOWN.txt").get0()); - sstables::delete_atomically({sstp}).get(); + sstables::sstable_directory::delete_atomically({sstp}).get(); // assure unknown component is deleted BOOST_REQUIRE(!file_exists(tmp.path().string() + "/la-2-big-UNKNOWN.txt").get0()); }); From 2f9b7931afddfe13520b47ffbf2b356f16dd5807 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Thu, 17 Nov 2022 21:00:17 +0300 Subject: [PATCH 8/8] sstables: Delete log file in replay_pending_delete_log() It's natural that the replayer cleans up after itself Signed-off-by: Pavel Emelyanov --- replica/distributed_loader.cc | 5 +---- sstables/sstable_directory.cc | 2 ++ 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/replica/distributed_loader.cc b/replica/distributed_loader.cc index df6eaba056..2b952e66d3 100644 --- a/replica/distributed_loader.cc +++ b/replica/distributed_loader.cc @@ -434,10 +434,7 @@ future<> distributed_loader::handle_sstables_pending_delete(sstring pending_dele futures.push_back(remove_file(file_path.string())); } else if (file_path.extension() == ".log") { dblog.info("Found pending_delete log file: {}, replaying", file_path); - auto f = sstables::sstable_directory::replay_pending_delete_log(file_path).then([file_path = std::move(file_path)] { - dblog.debug("Replayed {}, removing", file_path); - return remove_file(file_path.string()); - }); + auto f = sstables::sstable_directory::replay_pending_delete_log(std::move(file_path)); futures.push_back(std::move(f)); } else { dblog.debug("Found unknown file in pending_delete directory: {}, ignoring", file_path); diff --git a/sstables/sstable_directory.cc b/sstables/sstable_directory.cc index b2140be6ad..c7aa9acb82 100644 --- a/sstables/sstable_directory.cc +++ b/sstables/sstable_directory.cc @@ -552,6 +552,8 @@ future<> sstable_directory::replay_pending_delete_log(fs::path pending_delete_lo co_await parallel_for_each(tocs, [&sstdir] (const sstring& name) { return remove_by_toc_name(sstdir + "/" + name); }); + sstlog.debug("Replayed {}, removing", pending_delete_log); + co_await remove_file(pending_delete_log.native()); } catch (...) { sstlog.warn("Error replaying {}: {}. Ignoring.", pending_delete_log, std::current_exception()); }