mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-22 09:30:45 +00:00
Instead of dht::partition_ranges_vector, which is an std::vector<> and
have been seen to cause large allocations when calculating ranges to be
invalidated after compaction:
seastar_memory - oversized allocation: 147456 bytes. This is non-fatal, but could lead to latency and/or fragmentation issues. Please report: at
[Backtrace #0]
void seastar::backtrace<seastar::current_backtrace_tasklocal()::$_0>(seastar::current_backtrace_tasklocal()::$_0&&, bool) at ./build/release/seastar/./seastar/include/seastar/util/backtrace.hh:89
(inlined by) seastar::current_backtrace_tasklocal() at ./build/release/seastar/./seastar/src/util/backtrace.cc:99
seastar::current_tasktrace() at ./build/release/seastar/./seastar/src/util/backtrace.cc:136
seastar::current_backtrace() at ./build/release/seastar/./seastar/src/util/backtrace.cc:169
seastar::memory::cpu_pages::warn_large_allocation(unsigned long) at ./build/release/seastar/./seastar/src/core/memory.cc:840
seastar::memory::cpu_pages::check_large_allocation(unsigned long) at ./build/release/seastar/./seastar/src/core/memory.cc:903
(inlined by) seastar::memory::cpu_pages::allocate_large(unsigned int, bool) at ./build/release/seastar/./seastar/src/core/memory.cc:910
(inlined by) seastar::memory::allocate_large(unsigned long, bool) at ./build/release/seastar/./seastar/src/core/memory.cc:1533
(inlined by) seastar::memory::allocate_slowpath(unsigned long) at ./build/release/seastar/./seastar/src/core/memory.cc:1679
seastar::memory::allocate(unsigned long) at ././seastar/src/core/memory.cc:1698
(inlined by) operator new(unsigned long) at ././seastar/src/core/memory.cc:2440
(inlined by) std::__new_allocator<interval<dht::ring_position>>::allocate(unsigned long, void const*) at /usr/lib/gcc/x86_64-redhat-linux/15/../../../../include/c++/15/bits/new_allocator.h:151
(inlined by) std::allocator<interval<dht::ring_position>>::allocate(unsigned long) at /usr/lib/gcc/x86_64-redhat-linux/15/../../../../include/c++/15/bits/allocator.h:203
(inlined by) std::allocator_traits<std::allocator<interval<dht::ring_position>>>::allocate(std::allocator<interval<dht::ring_position>>&, unsigned long) at /usr/lib/gcc/x86_64-redhat-linux/15/../../../../include/c++/15/bits/alloc_traits.h:614
(inlined by) std::_Vector_base<interval<dht::ring_position>, std::allocator<interval<dht::ring_position>>>::_M_allocate(unsigned long) at /usr/lib/gcc/x86_64-redhat-linux/15/../../../../include/c++/15/bits/stl_vector.h:387
(inlined by) std::vector<interval<dht::ring_position>, std::allocator<interval<dht::ring_position>>>::reserve(unsigned long) at /usr/lib/gcc/x86_64-redhat-linux/15/../../../../include/c++/15/bits/vector.tcc:79
dht::to_partition_ranges(utils::chunked_vector<interval<dht::token>, 131072ul> const&, seastar::bool_class<utils::can_yield_tag>) at ./dht/i_partitioner.cc:347
compaction::compaction::get_ranges_for_invalidation(std::vector<seastar::lw_shared_ptr<sstables::sstable>, std::allocator<seastar::lw_shared_ptr<sstables::sstable>>> const&) at ./compaction/compaction.cc:619
(inlined by) compaction::compaction::get_compaction_completion_desc(std::vector<seastar::lw_shared_ptr<sstables::sstable>, std::allocator<seastar::lw_shared_ptr<sstables::sstable>>>, std::vector<seastar::lw_shared_ptr<sstables::sstable>, std::allocator<seastar::lw_shared_ptr<sstables::sstable>>>) at ./compaction/compaction.cc:719
(inlined by) compaction::regular_compaction::replace_remaining_exhausted_sstables() at ./compaction/compaction.cc:1362
compaction::compaction::finish(std::chrono::time_point<db_clock, std::chrono::duration<long, std::ratio<1l, 1000l>>>, std::chrono::time_point<db_clock, std::chrono::duration<long, std::ratio<1l, 1000l>>>) at ./compaction/compaction.cc:1021
compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0::operator()() at ./compaction/compaction.cc:1960
(inlined by) compaction::compaction_result std::__invoke_impl<compaction::compaction_result, compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0>(std::__invoke_other, compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0&&) at /usr/lib/gcc/x86_64-redhat-linux/15/../../../../include/c++/15/bits/invoke.h:63
(inlined by) std::__invoke_result<compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0>::type std::__invoke<compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0>(compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0&&) at /usr/lib/gcc/x86_64-redhat-linux/15/../../../../include/c++/15/bits/invoke.h:98
(inlined by) decltype(auto) std::__apply_impl<compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0, std::tuple<>>(compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0&&, std::tuple<>&&, std::integer_sequence<unsigned long, ...>) at /usr/lib/gcc/x86_64-redhat-linux/15/../../../../include/c++/15/tuple:2920
(inlined by) decltype(auto) std::apply<compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0, std::tuple<>>(compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0&&, std::tuple<>&&) at /usr/lib/gcc/x86_64-redhat-linux/15/../../../../include/c++/15/tuple:2935
(inlined by) seastar::future<compaction::compaction_result> seastar::futurize<compaction::compaction_result>::apply<compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0>(compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0&&, std::tuple<>&&) at ././seastar/include/seastar/core/future.hh:1930
(inlined by) seastar::futurize<std::invoke_result<compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0>::type>::type seastar::async<compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0>(seastar::thread_attributes, compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0&&)::'lambda'()::operator()() const at ././seastar/include/seastar/core/thread.hh:267
(inlined by) seastar::noncopyable_function<void ()>::direct_vtable_for<seastar::futurize<std::invoke_result<compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0>::type>::type seastar::async<compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0>(seastar::thread_attributes, compaction::compaction::run(std::unique_ptr<compaction::compaction, std::default_delete<compaction::compaction>>)::$_0&&)::'lambda'()>::call(seastar::noncopyable_function<void ()> const*) at ././seastar/include/seastar/util/noncopyable_function.hh:138
seastar::noncopyable_function<void ()>::operator()() const at ./build/release/seastar/./seastar/include/seastar/util/noncopyable_function.hh:224
(inlined by) seastar::thread_context::main() at ./build/release/seastar/./seastar/src/core/thread.cc:318
dht::partition_ranges_vector is used on the hot path, so just convert
the problematic user -- cache invalidation -- to use
utils::chunked_vector<dht::partition_range> instead.
Fixes: SCYLLADB-121
Closes scylladb/scylladb#28855
(cherry picked from commit 13ff9c4394)
Closes scylladb/scylladb#28975
248 lines
9.8 KiB
C++
248 lines
9.8 KiB
C++
/*
|
|
* Copyright (C) 2020-present ScyllaDB
|
|
*
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <functional>
|
|
#include <optional>
|
|
#include <variant>
|
|
#include "sstables/types_fwd.hh"
|
|
#include "sstables/sstable_set.hh"
|
|
#include "compaction_fwd.hh"
|
|
#include "mutation_writer/token_group_based_splitting_writer.hh"
|
|
#include "utils/chunked_vector.hh"
|
|
|
|
namespace compaction {
|
|
|
|
enum class compaction_type {
|
|
Compaction = 0, // Used only for regular compactions
|
|
Cleanup = 1,
|
|
Validation = 2, // Origin uses this for a compaction that is used exclusively for repair
|
|
Scrub = 3,
|
|
Index_build = 4,
|
|
Reshard = 5,
|
|
Upgrade = 6,
|
|
Reshape = 7,
|
|
Split = 8,
|
|
Major = 9,
|
|
};
|
|
|
|
struct compaction_completion_desc {
|
|
// Old, existing SSTables that should be deleted and removed from the SSTable set.
|
|
std::vector<sstables::shared_sstable> old_sstables;
|
|
// New, fresh SSTables that should be added to SSTable set, replacing the old ones.
|
|
std::vector<sstables::shared_sstable> new_sstables;
|
|
// Set of compacted partition ranges that should be invalidated in the cache.
|
|
utils::chunked_vector<dht::partition_range> ranges_for_cache_invalidation;
|
|
};
|
|
|
|
// creates a new SSTable for a given shard
|
|
using compaction_sstable_creator_fn = std::function<sstables::shared_sstable(shard_id shard)>;
|
|
// Replaces old sstable(s) by new one(s) which contain all non-expired data.
|
|
using compaction_sstable_replacer_fn = std::function<void(compaction_completion_desc)>;
|
|
|
|
class compaction_type_options {
|
|
public:
|
|
struct regular {
|
|
};
|
|
struct major {
|
|
};
|
|
struct cleanup {
|
|
};
|
|
struct upgrade {
|
|
};
|
|
struct scrub {
|
|
enum class mode {
|
|
abort, // abort scrub on the first sign of corruption
|
|
skip, // skip corrupt data, including range of rows and/or partitions that are out-of-order
|
|
segregate, // segregate out-of-order data into streams that all contain data with correct order
|
|
validate, // validate data, printing all errors found (sstables are only read, not rewritten)
|
|
};
|
|
mode operation_mode = mode::abort;
|
|
|
|
enum class quarantine_mode {
|
|
include, // scrub all sstables, including quarantined
|
|
exclude, // scrub only non-quarantined sstables
|
|
only, // scrub only quarantined sstables
|
|
};
|
|
quarantine_mode quarantine_operation_mode = quarantine_mode::include;
|
|
|
|
using quarantine_invalid_sstables = bool_class<class quarantine_invalid_sstables_tag>;
|
|
|
|
// Should invalid sstables be moved into quarantine.
|
|
// Only applies to validate-mode.
|
|
quarantine_invalid_sstables quarantine_sstables = quarantine_invalid_sstables::yes;
|
|
|
|
using drop_unfixable_sstables = bool_class<class drop_unfixable_sstables_tag>;
|
|
// Drop sstables that cannot be fixed.
|
|
// Only applies to segregate-mode.
|
|
drop_unfixable_sstables drop_unfixable = drop_unfixable_sstables::no;
|
|
};
|
|
struct reshard {
|
|
};
|
|
struct reshape {
|
|
};
|
|
struct split {
|
|
mutation_writer::classify_by_token_group classifier;
|
|
};
|
|
private:
|
|
using options_variant = std::variant<regular, cleanup, upgrade, scrub, reshard, reshape, split, major>;
|
|
|
|
private:
|
|
options_variant _options;
|
|
|
|
private:
|
|
explicit compaction_type_options(options_variant options) : _options(std::move(options)) {
|
|
}
|
|
|
|
public:
|
|
static compaction_type_options make_reshape() {
|
|
return compaction_type_options(reshape{});
|
|
}
|
|
|
|
static compaction_type_options make_reshard() {
|
|
return compaction_type_options(reshard{});
|
|
}
|
|
|
|
static compaction_type_options make_regular() {
|
|
return compaction_type_options(regular{});
|
|
}
|
|
|
|
static compaction_type_options make_major() {
|
|
return compaction_type_options(major{});
|
|
}
|
|
|
|
static compaction_type_options make_cleanup() {
|
|
return compaction_type_options(cleanup{});
|
|
}
|
|
|
|
static compaction_type_options make_upgrade() {
|
|
return compaction_type_options(upgrade{});
|
|
}
|
|
|
|
static compaction_type_options make_scrub(scrub::mode mode, scrub::quarantine_invalid_sstables quarantine_sstables = scrub::quarantine_invalid_sstables::yes, scrub::drop_unfixable_sstables drop_unfixable_sstables = scrub::drop_unfixable_sstables::no) {
|
|
return compaction_type_options(scrub{.operation_mode = mode, .quarantine_sstables = quarantine_sstables, .drop_unfixable = drop_unfixable_sstables});
|
|
}
|
|
|
|
static compaction_type_options make_split(mutation_writer::classify_by_token_group classifier) {
|
|
return compaction_type_options(split{std::move(classifier)});
|
|
}
|
|
|
|
template <typename... Visitor>
|
|
auto visit(Visitor&&... visitor) const {
|
|
return std::visit(std::forward<Visitor>(visitor)..., _options);
|
|
}
|
|
|
|
template <typename OptionType>
|
|
const auto& as() const {
|
|
return std::get<OptionType>(_options);
|
|
}
|
|
|
|
const options_variant& options() const { return _options; }
|
|
|
|
compaction_type type() const;
|
|
};
|
|
|
|
std::string_view to_string(compaction_type_options::scrub::mode);
|
|
|
|
std::string_view to_string(compaction_type_options::scrub::quarantine_mode);
|
|
|
|
class dummy_tag {};
|
|
using has_only_fully_expired = seastar::bool_class<dummy_tag>;
|
|
|
|
struct compaction_descriptor {
|
|
// List of sstables to be compacted.
|
|
std::vector<sstables::shared_sstable> sstables;
|
|
// This is a snapshot of the table's sstable set, used only for the purpose of expiring tombstones.
|
|
// If this sstable set cannot be provided, expiration will be disabled to prevent data from being resurrected.
|
|
std::optional<sstables::sstable_set> all_sstables_snapshot;
|
|
// Level of sstable(s) created by compaction procedure.
|
|
int level;
|
|
// Threshold size for sstable(s) to be created.
|
|
uint64_t max_sstable_bytes;
|
|
// Can split large partitions at clustering boundary.
|
|
bool can_split_large_partition = false;
|
|
// Run identifier of output sstables.
|
|
sstables::run_id run_identifier;
|
|
// The options passed down to the compaction code.
|
|
// This also selects the kind of compaction to do.
|
|
compaction_type_options options = compaction_type_options::make_regular();
|
|
// If engaged, compaction will cleanup the input sstables by skipping non-owned ranges.
|
|
compaction::owned_ranges_ptr owned_ranges;
|
|
// Required for reshard compaction.
|
|
const dht::sharder* sharder;
|
|
|
|
compaction_sstable_creator_fn creator;
|
|
compaction_sstable_replacer_fn replacer;
|
|
|
|
// Denotes if this compaction task is comprised solely of completely expired SSTables
|
|
has_only_fully_expired has_only_fully_expired = has_only_fully_expired::no;
|
|
|
|
// If set to true, gc will check only the compacting sstables to collect tombstones.
|
|
// If set to false, gc will check the memtables, commit log and other uncompacting
|
|
// sstables to decide if a tombstone can be collected. Note that these checks are
|
|
// not perfect. W.r.to memtables and uncompacted SSTables, if their minimum timestamp
|
|
// is less than that of the tombstone and they contain the key, the tombstone will
|
|
// not be collected. No row-level, cell-level check takes place. W.r.to the commit
|
|
// log, there is currently no way to check if the key exists; only the minimum
|
|
// timestamp comparison, similar to memtables, is performed.
|
|
bool gc_check_only_compacting_sstables = false;
|
|
|
|
compaction_descriptor() = default;
|
|
|
|
static constexpr int default_level = 0;
|
|
static constexpr uint64_t default_max_sstable_bytes = std::numeric_limits<uint64_t>::max();
|
|
|
|
explicit compaction_descriptor(std::vector<sstables::shared_sstable> sstables,
|
|
int level = default_level,
|
|
uint64_t max_sstable_bytes = default_max_sstable_bytes,
|
|
sstables::run_id run_identifier = sstables::run_id::create_random_id(),
|
|
compaction_type_options options = compaction_type_options::make_regular(),
|
|
compaction::owned_ranges_ptr owned_ranges_ = {})
|
|
: sstables(std::move(sstables))
|
|
, level(level)
|
|
, max_sstable_bytes(max_sstable_bytes)
|
|
, run_identifier(run_identifier)
|
|
, options(options)
|
|
, owned_ranges(std::move(owned_ranges_))
|
|
{}
|
|
|
|
explicit compaction_descriptor(::compaction::has_only_fully_expired has_only_fully_expired,
|
|
std::vector<sstables::shared_sstable> sstables)
|
|
: sstables(std::move(sstables))
|
|
, level(default_level)
|
|
, max_sstable_bytes(default_max_sstable_bytes)
|
|
, run_identifier(sstables::run_id::create_random_id())
|
|
, options(compaction_type_options::make_regular())
|
|
, has_only_fully_expired(has_only_fully_expired)
|
|
{}
|
|
|
|
// Return fan-in of this job, which is equal to its number of runs.
|
|
unsigned fan_in() const;
|
|
// Enables garbage collection for this descriptor, meaning that compaction will be able to purge expired data
|
|
void enable_garbage_collection(sstables::sstable_set snapshot) { all_sstables_snapshot = std::move(snapshot); }
|
|
// Returns total size of all sstables contained in this descriptor
|
|
uint64_t sstables_size() const;
|
|
};
|
|
|
|
}
|
|
|
|
template <>
|
|
struct fmt::formatter<compaction::compaction_type> : fmt::formatter<string_view> {
|
|
auto format(compaction::compaction_type, fmt::format_context& ctx) const -> decltype(ctx.out());
|
|
};
|
|
template <>
|
|
struct fmt::formatter<compaction::compaction_type_options::scrub::mode> : fmt::formatter<string_view> {
|
|
auto format(compaction::compaction_type_options::scrub::mode, fmt::format_context& ctx) const -> decltype(ctx.out());
|
|
};
|
|
template <>
|
|
struct fmt::formatter<compaction::compaction_type_options::scrub::quarantine_mode> : fmt::formatter<string_view> {
|
|
auto format(compaction::compaction_type_options::scrub::quarantine_mode, fmt::format_context& ctx) const -> decltype(ctx.out());
|
|
};
|