From ed9122a84e38efdb1d66e03034fbb4bb80bb1053 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Sun, 15 Sep 2024 20:16:31 +0300 Subject: [PATCH] time_window_compaction_strategy: get_reshaping_job: restrict sort of multi_window vector to its size Currently the function calls boost::partial_sort with a middle iterator that might be out of bound and cause undefined behavior. Check the vector size, and do a partial sort only if its longer than `max_sstables`, otherwise sort the whole vector. Fixes scylladb/scylladb#20608 Signed-off-by: Benny Halevy Closes scylladb/scylladb#20609 (cherry picked from commit 39ce358d82b7ad96f0675adcca9e262d7fb53b91) Refs: scylladb/scylladb#20609 --- compaction/time_window_compaction_strategy.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/compaction/time_window_compaction_strategy.cc b/compaction/time_window_compaction_strategy.cc index 7effe8eae2..9d8f5bbef4 100644 --- a/compaction/time_window_compaction_strategy.cc +++ b/compaction/time_window_compaction_strategy.cc @@ -296,7 +296,8 @@ time_window_compaction_strategy::get_reshaping_job(std::vector i // When trimming, let's keep sstables with overlapping time window, so as to reduce write amplification. // For example, if there are N sstables spanning window W, where N <= 32, then we can produce all data for W // in a single compaction round, removing the need to later compact W to reduce its number of files. - boost::partial_sort(multi_window, multi_window.begin() + max_sstables, [](const shared_sstable &a, const shared_sstable &b) { + auto sort_size = std::min(max_sstables, multi_window.size()); + boost::partial_sort(multi_window, multi_window.begin() + sort_size, [](const shared_sstable &a, const shared_sstable &b) { return a->get_stats_metadata().max_timestamp < b->get_stats_metadata().max_timestamp; }); maybe_trim_job(multi_window, job_size, disjoint);