From 73695f80c456bbbbbf5197746fd48bea3a78fcc9 Mon Sep 17 00:00:00 2001 From: "Raphael S. Carvalho" Date: Tue, 8 Sep 2015 15:54:34 -0300 Subject: [PATCH] fix compaction when a bucket grows beyond max threshold Size-tiered compaction strategy works by creating buckets with sstables of similar size, but if a bucket's size is greater than max_threshold (defined in schema), it will not be selected for compaction. Scenario described by issue 298 is facing that. If compaction takes a long time to finish, more than max_threshold sstables will be created, and thus there wouldn't be a 'valid' bucket for compaction. Solution is to not add a sstable for a bucket that reached its limit, so that bucket will have a chance to be compacted. Fixes issue #298. Signed-off-by: Raphael S. Carvalho --- sstables/compaction.cc | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sstables/compaction.cc b/sstables/compaction.cc index 07bc52199e..db56d0301f 100644 --- a/sstables/compaction.cc +++ b/sstables/compaction.cc @@ -356,7 +356,7 @@ class size_tiered_compaction_strategy : public compaction_strategy_impl { std::vector> create_sstable_and_length_pairs(const sstable_list& sstables); // Group files of similar size into buckets. - std::vector> get_buckets(const sstable_list& sstables); + std::vector> get_buckets(const sstable_list& sstables, unsigned max_threshold); // Maybe return a bucket of sstables to compact std::vector @@ -402,7 +402,7 @@ size_tiered_compaction_strategy::create_sstable_and_length_pairs(const sstable_l } std::vector> -size_tiered_compaction_strategy::get_buckets(const sstable_list& sstables) { +size_tiered_compaction_strategy::get_buckets(const sstable_list& sstables, unsigned max_threshold) { // sstables sorted by size of its data file. auto sorted_sstables = create_sstable_and_length_pairs(sstables); @@ -424,8 +424,9 @@ size_tiered_compaction_strategy::get_buckets(const sstable_list& sstables) { std::vector bucket = entry.second; size_t old_average_size = entry.first; - if ((size > (old_average_size * _options.bucket_low) && size < (old_average_size * _options.bucket_high)) + if (((size > (old_average_size * _options.bucket_low) && size < (old_average_size * _options.bucket_high)) || (size < _options.min_sstable_size && old_average_size < _options.min_sstable_size)) + && (bucket.size() < max_threshold)) { size_t total_size = bucket.size() * old_average_size; size_t new_average_size = (total_size + size) / (bucket.size() + 1); @@ -500,7 +501,7 @@ future<> size_tiered_compaction_strategy::compact(column_family& cfs) { // TODO: Add support to filter cold sstables (for reference: SizeTieredCompactionStrategy::filterColdSSTables). - auto buckets = get_buckets(*candidates); + auto buckets = get_buckets(*candidates, max_threshold); std::vector most_interesting = most_interesting_bucket(std::move(buckets), min_threshold, max_threshold); #ifdef __DEBUG__ @@ -517,7 +518,7 @@ future<> size_tiered_compaction_strategy::compact(column_family& cfs) { std::vector size_tiered_most_interesting_bucket(lw_shared_ptr candidates) { size_tiered_compaction_strategy cs; - auto buckets = cs.get_buckets(*candidates); + auto buckets = cs.get_buckets(*candidates, DEFAULT_MAX_COMPACTION_THRESHOLD); std::vector most_interesting = cs.most_interesting_bucket(std::move(buckets), DEFAULT_MIN_COMPACTION_THRESHOLD, DEFAULT_MAX_COMPACTION_THRESHOLD);