From 9a4b4afadea748ce190882adff63d0de6839ea1e Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Tue, 11 Mar 2025 19:10:45 +0200 Subject: [PATCH] db: snapshot: backup_task: do_backup: prioritize sstables that are already deleted from the table Detect SSTables that are already deleted from the table in process_snapshot_dir when their number_of_links is equal to 1. Note that the SSTable may be hard-linked by more than one snapshot, so even after it is deleted from the table, its number of links would be greater than one. In that case, however, uploading it earlier won't help to free-up its capacity since it is still held by other snapshots. Signed-off-by: Benny Halevy --- db/snapshot/backup_task.cc | 22 ++++++++++++++++++++++ db/snapshot/backup_task.hh | 1 + 2 files changed, 23 insertions(+) diff --git a/db/snapshot/backup_task.cc b/db/snapshot/backup_task.cc index 19d0122655..4aa5196941 100644 --- a/db/snapshot/backup_task.cc +++ b/db/snapshot/backup_task.cc @@ -136,6 +136,14 @@ future<> backup_task_impl::process_snapshot_dir() { const auto& gen = desc.generation; _sstable_comps[gen].emplace_back(name); ++num_sstable_comps; + + // When the SSTable is only linked-to by the snapshot directory, + // it is already deleted from the table's base directory, and + // therefore it better be uploaded earlier to free-up its capacity. + if (desc.component == sstables::component_type::Data && st.number_of_links == 1) { + snap_log.debug("backup_task: SSTable with generation {} is already deleted from the table", gen); + _deleted_sstables.push_back(gen); + } } catch (const sstables::malformed_sstable_exception&) { _files.emplace_back(name); } @@ -217,6 +225,20 @@ void backup_task_impl::dequeue_sstable() { if (to_backup == _sstable_comps.end()) { return; } + // Prioritize stables that were already deleted + // for the table, to free up their capacity earlier. + while (!_deleted_sstables.empty()) { + auto gen = _deleted_sstables.back(); + _deleted_sstables.pop_back(); + auto it = _sstable_comps.find(gen); + // It is possible that the sstable was already backed up + // so silently skip this generation + // and keep looking for another candidate + if (it != _sstable_comps.end()) { + to_backup = it; + break; + } + } auto ent = _sstable_comps.extract(to_backup); snap_log.debug("Backing up SSTable generation {}", ent.key()); for (auto& name : ent.mapped()) { diff --git a/db/snapshot/backup_task.hh b/db/snapshot/backup_task.hh index 60eb8934ee..a0a75119d8 100644 --- a/db/snapshot/backup_task.hh +++ b/db/snapshot/backup_task.hh @@ -43,6 +43,7 @@ class backup_task_impl : public tasks::task_manager::task::impl { using comps_vector = utils::small_vector; using comps_map = std::unordered_map; comps_map _sstable_comps; + std::vector _deleted_sstables; future<> do_backup(); future<> upload_component(sstring name);