Files
scylladb/compaction/compaction_descriptor.hh
Raphael S. Carvalho ab0217e30e compaction: Improve overall efficiency by not diluting it with relatively inefficient jobs
Compaction efficiency can be defined as how much backlog is reduced per
byte read or written.
We know a few facts about efficiency:
1) the more files are compacted together (the fan-in) the higher the
efficiency will be, however...
2) the bigger the size difference of input files the worse the
efficiency, i.e. higher write amplification.

so compactions with similar-sized files are the most efficient ones,
and its efficiency increases with a higher number of files.

However, in order to not have bad read amplification, number of files
cannot grow out of bounds. So we have to allow parallel compaction
on different tiers, but to avoid "dilution" of overall efficiency,
we will only allow a compaction to proceed if its efficiency is
greater than or equal to the efficiency of ongoing compactions.

By the time being, we'll assume that strategies don't pick candidates
with wildly different sizes, so efficiency is only calculated as a
function of compaction fan-in.

Now when system is under heavy load, then fan-in threshold will
automatically grow to guarantee that overall efficiency remains
stable.

Please note that fan-in is defined in number of runs. LCS compaction
on higher levels will have a fan-in of 2. Under heavy load, it
may happen that LCS will temporarily switch to size-tiered mode
for compaction to keep up with amount of data being produced.

Signed-off-by: Raphael S. Carvalho <raphaelsc@scylladb.com>
Message-Id: <20211103215110.135633-2-raphaelsc@scylladb.com>
2021-11-03 20:03:23 +02:00

205 lines
7.6 KiB
C++

/*
* Copyright (C) 2020-present ScyllaDB
*
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <functional>
#include <optional>
#include <variant>
#include <seastar/core/smp.hh>
#include <seastar/core/file.hh>
#include "sstables/shared_sstable.hh"
#include "sstables/sstable_set.hh"
#include "utils/UUID.hh"
#include "dht/i_partitioner.hh"
#include "compaction_weight_registration.hh"
namespace sstables {
enum class compaction_type {
Compaction = 0,
Cleanup = 1,
Validation = 2, // Origin uses this for a compaction that is used exclusively for repair
Scrub = 3,
Index_build = 4,
Reshard = 5,
Upgrade = 6,
Reshape = 7,
};
std::ostream& operator<<(std::ostream& os, compaction_type type);
struct compaction_completion_desc {
// Old, existing SSTables that should be deleted and removed from the SSTable set.
std::vector<shared_sstable> old_sstables;
// New, fresh SSTables that should be added to SSTable set, replacing the old ones.
std::vector<shared_sstable> new_sstables;
// Set of compacted partition ranges that should be invalidated in the cache.
dht::partition_range_vector ranges_for_cache_invalidation;
};
// creates a new SSTable for a given shard
using compaction_sstable_creator_fn = std::function<shared_sstable(shard_id shard)>;
// Replaces old sstable(s) by new one(s) which contain all non-expired data.
using compaction_sstable_replacer_fn = std::function<void(compaction_completion_desc)>;
class compaction_type_options {
public:
struct regular {
};
struct cleanup {
dht::token_range_vector owned_ranges;
};
struct upgrade {
dht::token_range_vector owned_ranges;
};
struct scrub {
enum class mode {
abort, // abort scrub on the first sign of corruption
skip, // skip corrupt data, including range of rows and/or partitions that are out-of-order
segregate, // segregate out-of-order data into streams that all contain data with correct order
validate, // validate data, printing all errors found (sstables are only read, not rewritten)
};
mode operation_mode = mode::abort;
};
struct reshard {
};
struct reshape {
};
private:
using options_variant = std::variant<regular, cleanup, upgrade, scrub, reshard, reshape>;
private:
options_variant _options;
private:
explicit compaction_type_options(options_variant options) : _options(std::move(options)) {
}
public:
static compaction_type_options make_reshape() {
return compaction_type_options(reshape{});
}
static compaction_type_options make_reshard() {
return compaction_type_options(reshard{});
}
static compaction_type_options make_regular() {
return compaction_type_options(regular{});
}
static compaction_type_options make_cleanup(dht::token_range_vector&& owned_ranges) {
return compaction_type_options(cleanup{std::move(owned_ranges)});
}
static compaction_type_options make_upgrade(dht::token_range_vector&& owned_ranges) {
return compaction_type_options(upgrade{std::move(owned_ranges)});
}
static compaction_type_options make_scrub(scrub::mode mode) {
return compaction_type_options(scrub{mode});
}
template <typename... Visitor>
auto visit(Visitor&&... visitor) const {
return std::visit(std::forward<Visitor>(visitor)..., _options);
}
const options_variant& options() const { return _options; }
compaction_type type() const;
};
std::string_view to_string(compaction_type_options::scrub::mode);
std::ostream& operator<<(std::ostream& os, compaction_type_options::scrub::mode scrub_mode);
class dummy_tag {};
using has_only_fully_expired = seastar::bool_class<dummy_tag>;
struct compaction_descriptor {
// List of sstables to be compacted.
std::vector<sstables::shared_sstable> sstables;
// This is a snapshot of the table's sstable set, used only for the purpose of expiring tombstones.
// If this sstable set cannot be provided, expiration will be disabled to prevent data from being resurrected.
std::optional<sstables::sstable_set> all_sstables_snapshot;
// Level of sstable(s) created by compaction procedure.
int level;
// Threshold size for sstable(s) to be created.
uint64_t max_sstable_bytes;
// Run identifier of output sstables.
utils::UUID run_identifier;
// Calls compaction manager's task for this compaction to release reference to exhausted sstables.
std::function<void(const std::vector<shared_sstable>& exhausted_sstables)> release_exhausted;
// The options passed down to the compaction code.
// This also selects the kind of compaction to do.
compaction_type_options options = compaction_type_options::make_regular();
compaction_sstable_creator_fn creator;
compaction_sstable_replacer_fn replacer;
::io_priority_class io_priority = default_priority_class();
// Denotes if this compaction task is comprised solely of completely expired SSTables
sstables::has_only_fully_expired has_only_fully_expired = has_only_fully_expired::no;
compaction_descriptor() = default;
static constexpr int default_level = 0;
static constexpr uint64_t default_max_sstable_bytes = std::numeric_limits<uint64_t>::max();
explicit compaction_descriptor(std::vector<sstables::shared_sstable> sstables,
std::optional<sstables::sstable_set> all_sstables_snapshot,
::io_priority_class io_priority,
int level = default_level,
uint64_t max_sstable_bytes = default_max_sstable_bytes,
utils::UUID run_identifier = utils::make_random_uuid(),
compaction_type_options options = compaction_type_options::make_regular())
: sstables(std::move(sstables))
, all_sstables_snapshot(std::move(all_sstables_snapshot))
, level(level)
, max_sstable_bytes(max_sstable_bytes)
, run_identifier(run_identifier)
, options(options)
, io_priority(io_priority)
{}
explicit compaction_descriptor(sstables::has_only_fully_expired has_only_fully_expired,
std::vector<sstables::shared_sstable> sstables,
std::optional<sstables::sstable_set> all_sstables_snapshot,
::io_priority_class io_priority)
: sstables(std::move(sstables))
, all_sstables_snapshot(std::move(all_sstables_snapshot))
, level(default_level)
, max_sstable_bytes(default_max_sstable_bytes)
, run_identifier(utils::make_random_uuid())
, options(compaction_type_options::make_regular())
, io_priority(io_priority)
, has_only_fully_expired(has_only_fully_expired)
{}
// Return fan-in of this job, which is equal to its number of runs.
unsigned fan_in() const;
};
}