fix compaction when a bucket grows beyond max threshold

Size-tiered compaction strategy works by creating buckets with sstables
of similar size, but if a bucket's size is greater than max_threshold
(defined in schema), it will not be selected for compaction.
Scenario described by issue 298 is facing that. If compaction takes a
long time to finish, more than max_threshold sstables will be created,
and thus there wouldn't be a 'valid' bucket for compaction.
Solution is to not add a sstable for a bucket that reached its limit,
so that bucket will have a chance to be compacted.

Fixes issue #298.

Signed-off-by: Raphael S. Carvalho <raphaelsc@cloudius-systems.com>
This commit is contained in:
Raphael S. Carvalho
2015-09-08 15:54:34 -03:00
committed by Avi Kivity
parent 5bbe526738
commit 73695f80c4

View File

@@ -356,7 +356,7 @@ class size_tiered_compaction_strategy : public compaction_strategy_impl {
std::vector<std::pair<sstables::shared_sstable, uint64_t>> create_sstable_and_length_pairs(const sstable_list& sstables);
// Group files of similar size into buckets.
std::vector<std::vector<sstables::shared_sstable>> get_buckets(const sstable_list& sstables);
std::vector<std::vector<sstables::shared_sstable>> get_buckets(const sstable_list& sstables, unsigned max_threshold);
// Maybe return a bucket of sstables to compact
std::vector<sstables::shared_sstable>
@@ -402,7 +402,7 @@ size_tiered_compaction_strategy::create_sstable_and_length_pairs(const sstable_l
}
std::vector<std::vector<sstables::shared_sstable>>
size_tiered_compaction_strategy::get_buckets(const sstable_list& sstables) {
size_tiered_compaction_strategy::get_buckets(const sstable_list& sstables, unsigned max_threshold) {
// sstables sorted by size of its data file.
auto sorted_sstables = create_sstable_and_length_pairs(sstables);
@@ -424,8 +424,9 @@ size_tiered_compaction_strategy::get_buckets(const sstable_list& sstables) {
std::vector<sstables::shared_sstable> bucket = entry.second;
size_t old_average_size = entry.first;
if ((size > (old_average_size * _options.bucket_low) && size < (old_average_size * _options.bucket_high))
if (((size > (old_average_size * _options.bucket_low) && size < (old_average_size * _options.bucket_high))
|| (size < _options.min_sstable_size && old_average_size < _options.min_sstable_size))
&& (bucket.size() < max_threshold))
{
size_t total_size = bucket.size() * old_average_size;
size_t new_average_size = (total_size + size) / (bucket.size() + 1);
@@ -500,7 +501,7 @@ future<> size_tiered_compaction_strategy::compact(column_family& cfs) {
// TODO: Add support to filter cold sstables (for reference: SizeTieredCompactionStrategy::filterColdSSTables).
auto buckets = get_buckets(*candidates);
auto buckets = get_buckets(*candidates, max_threshold);
std::vector<sstables::shared_sstable> most_interesting = most_interesting_bucket(std::move(buckets), min_threshold, max_threshold);
#ifdef __DEBUG__
@@ -517,7 +518,7 @@ future<> size_tiered_compaction_strategy::compact(column_family& cfs) {
std::vector<sstables::shared_sstable> size_tiered_most_interesting_bucket(lw_shared_ptr<sstable_list> candidates) {
size_tiered_compaction_strategy cs;
auto buckets = cs.get_buckets(*candidates);
auto buckets = cs.get_buckets(*candidates, DEFAULT_MAX_COMPACTION_THRESHOLD);
std::vector<sstables::shared_sstable> most_interesting = cs.most_interesting_bucket(std::move(buckets),
DEFAULT_MIN_COMPACTION_THRESHOLD, DEFAULT_MAX_COMPACTION_THRESHOLD);