mirror of
https://github.com/scylladb/scylladb.git
synced 2026-05-13 11:22:01 +00:00
Now that the timeout is stored in the reader permit use it for admission rather than a timeout parameter. Note that evictable_reader::next_partition currently passes db::no_timeout to resume_or_create_reader, which propagated to maybe_wait_readmission, but it seems to be an oversight of the f_m_r api that doesn't pass a timeout to next_partition(). Signed-off-by: Benny Halevy <bhalevy@scylladb.com>
414 lines
16 KiB
C++
414 lines
16 KiB
C++
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
/*
|
|
* Copyright (C) 2017-present ScyllaDB
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <boost/intrusive/list.hpp>
|
|
#include <seastar/core/future.hh>
|
|
#include <seastar/core/gate.hh>
|
|
#include "reader_permit.hh"
|
|
#include "flat_mutation_reader.hh"
|
|
|
|
namespace bi = boost::intrusive;
|
|
|
|
using namespace seastar;
|
|
|
|
/// Specific semaphore for controlling reader concurrency
|
|
///
|
|
/// Use `make_permit()` to create a permit to track the resource consumption
|
|
/// of a specific read. The permit should be created before the read is even
|
|
/// started so it is available to track resource consumption from the start.
|
|
/// Reader concurrency is dual limited by count and memory.
|
|
/// The semaphore can be configured with the desired limits on
|
|
/// construction. New readers will only be admitted when there is both
|
|
/// enough count and memory units available. Readers are admitted in
|
|
/// FIFO order.
|
|
/// Semaphore's `name` must be provided in ctor and its only purpose is
|
|
/// to increase readability of exceptions: both timeout exceptions and
|
|
/// queue overflow exceptions (read below) include this `name` in messages.
|
|
/// It's also possible to specify the maximum allowed number of waiting
|
|
/// readers by the `max_queue_length` constructor parameter. When the
|
|
/// number of waiting readers becomes equal or greater than
|
|
/// `max_queue_length` (upon calling `obtain_permit()`) an exception of
|
|
/// type `std::runtime_error` is thrown. Optionally, some additional
|
|
/// code can be executed just before throwing (`prethrow_action`
|
|
/// constructor parameter).
|
|
///
|
|
/// The semaphore also acts as an execution stage for reads. This
|
|
/// functionality is exposed via \ref with_permit() and \ref
|
|
/// with_ready_permit().
|
|
class reader_concurrency_semaphore {
|
|
public:
|
|
using resources = reader_resources;
|
|
|
|
friend class reader_permit;
|
|
|
|
enum class evict_reason {
|
|
permit, // evicted due to permit shortage
|
|
time, // evicted due to expiring ttl
|
|
manual, // evicted manually via `try_evict_one_inactive_read()`
|
|
};
|
|
|
|
using eviction_notify_handler = noncopyable_function<void(evict_reason)>;
|
|
|
|
struct stats {
|
|
// The number of inactive reads evicted to free up permits.
|
|
uint64_t permit_based_evictions = 0;
|
|
// The number of inactive reads evicted due to expiring.
|
|
uint64_t time_based_evictions = 0;
|
|
// The number of inactive reads currently registered.
|
|
uint64_t inactive_reads = 0;
|
|
// Total number of successful reads executed through this semaphore.
|
|
uint64_t total_successful_reads = 0;
|
|
// Total number of failed reads executed through this semaphore.
|
|
uint64_t total_failed_reads = 0;
|
|
// Total number of reads rejected because the admission queue reached its max capacity
|
|
uint64_t total_reads_shed_due_to_overload = 0;
|
|
// Total number of reads admitted, via all admission paths.
|
|
uint64_t reads_admitted = 0;
|
|
// Total number of reads enqueued to wait for admission.
|
|
uint64_t reads_enqueued = 0;
|
|
// Total number of permits created so far.
|
|
uint64_t total_permits = 0;
|
|
// Current number of permits.
|
|
uint64_t current_permits = 0;
|
|
// Current number of used permits.
|
|
uint64_t used_permits = 0;
|
|
// Current number of blocked permits.
|
|
uint64_t blocked_permits = 0;
|
|
};
|
|
|
|
using permit_list_type = bi::list<
|
|
reader_permit::impl,
|
|
bi::base_hook<bi::list_base_hook<bi::link_mode<bi::auto_unlink>>>,
|
|
bi::constant_time_size<false>>;
|
|
|
|
class inactive_read_handle;
|
|
|
|
using read_func = noncopyable_function<future<>(reader_permit)>;
|
|
|
|
private:
|
|
struct entry {
|
|
promise<> pr;
|
|
reader_permit permit;
|
|
read_func func;
|
|
entry(promise<>&& pr, reader_permit permit, read_func func)
|
|
: pr(std::move(pr)), permit(std::move(permit)), func(std::move(func)) {}
|
|
};
|
|
|
|
class expiry_handler {
|
|
reader_concurrency_semaphore& _semaphore;
|
|
public:
|
|
explicit expiry_handler(reader_concurrency_semaphore& semaphore)
|
|
: _semaphore(semaphore) {}
|
|
void operator()(entry& e) noexcept;
|
|
};
|
|
|
|
struct inactive_read : public bi::list_base_hook<bi::link_mode<bi::auto_unlink>> {
|
|
flat_mutation_reader reader;
|
|
eviction_notify_handler notify_handler;
|
|
timer<lowres_clock> ttl_timer;
|
|
inactive_read_handle* handle = nullptr;
|
|
|
|
explicit inactive_read(flat_mutation_reader reader_) noexcept
|
|
: reader(std::move(reader_))
|
|
{ }
|
|
~inactive_read();
|
|
void detach() noexcept;
|
|
};
|
|
|
|
using inactive_reads_type = bi::list<inactive_read, bi::constant_time_size<false>>;
|
|
|
|
public:
|
|
class inactive_read_handle {
|
|
reader_concurrency_semaphore* _sem = nullptr;
|
|
inactive_read* _irp = nullptr;
|
|
|
|
friend class reader_concurrency_semaphore;
|
|
|
|
private:
|
|
void abandon() noexcept;
|
|
|
|
explicit inactive_read_handle(reader_concurrency_semaphore& sem, inactive_read& ir) noexcept
|
|
: _sem(&sem), _irp(&ir) {
|
|
_irp->handle = this;
|
|
}
|
|
public:
|
|
inactive_read_handle() = default;
|
|
inactive_read_handle(inactive_read_handle&& o) noexcept
|
|
: _sem(std::exchange(o._sem, nullptr))
|
|
, _irp(std::exchange(o._irp, nullptr)) {
|
|
if (_irp) {
|
|
_irp->handle = this;
|
|
}
|
|
}
|
|
inactive_read_handle& operator=(inactive_read_handle&& o) noexcept {
|
|
if (this == &o) {
|
|
return *this;
|
|
}
|
|
abandon();
|
|
_sem = std::exchange(o._sem, nullptr);
|
|
_irp = std::exchange(o._irp, nullptr);
|
|
if (_irp) {
|
|
_irp->handle = this;
|
|
}
|
|
return *this;
|
|
}
|
|
~inactive_read_handle() {
|
|
abandon();
|
|
}
|
|
explicit operator bool() const noexcept {
|
|
return bool(_irp);
|
|
}
|
|
};
|
|
|
|
private:
|
|
const resources _initial_resources;
|
|
resources _resources;
|
|
|
|
expiring_fifo<entry, expiry_handler, db::timeout_clock> _wait_list;
|
|
queue<entry> _ready_list;
|
|
|
|
sstring _name;
|
|
size_t _max_queue_length = std::numeric_limits<size_t>::max();
|
|
inactive_reads_type _inactive_reads;
|
|
stats _stats;
|
|
permit_list_type _permit_list;
|
|
bool _stopped = false;
|
|
gate _close_readers_gate;
|
|
gate _permit_gate;
|
|
std::optional<future<>> _execution_loop_future;
|
|
|
|
private:
|
|
[[nodiscard]] flat_mutation_reader detach_inactive_reader(inactive_read&, evict_reason reason) noexcept;
|
|
void evict(inactive_read&, evict_reason reason) noexcept;
|
|
|
|
bool has_available_units(const resources& r) const;
|
|
|
|
bool all_used_permits_are_stalled() const;
|
|
|
|
[[nodiscard]] std::exception_ptr check_queue_size(std::string_view queue_name);
|
|
|
|
// Add the permit to the wait queue and return the future which resolves when
|
|
// the permit is admitted (popped from the queue).
|
|
future<> enqueue_waiter(reader_permit permit, read_func func);
|
|
void evict_readers_in_background();
|
|
future<> do_wait_admission(reader_permit permit, read_func func = {});
|
|
void maybe_admit_waiters() noexcept;
|
|
|
|
void on_permit_created(reader_permit::impl&);
|
|
void on_permit_destroyed(reader_permit::impl&) noexcept;
|
|
|
|
void on_permit_used() noexcept;
|
|
void on_permit_unused() noexcept;
|
|
|
|
void on_permit_blocked() noexcept;
|
|
void on_permit_unblocked() noexcept;
|
|
|
|
std::runtime_error stopped_exception();
|
|
|
|
// closes reader in the background.
|
|
void close_reader(flat_mutation_reader reader);
|
|
|
|
future<> execution_loop() noexcept;
|
|
|
|
public:
|
|
struct no_limits { };
|
|
|
|
/// Create a semaphore with the specified limits
|
|
///
|
|
/// The semaphore's name has to be unique!
|
|
reader_concurrency_semaphore(int count,
|
|
ssize_t memory,
|
|
sstring name,
|
|
size_t max_queue_length = std::numeric_limits<size_t>::max());
|
|
|
|
/// Create a semaphore with practically unlimited count and memory.
|
|
///
|
|
/// And conversely, no queue limit either.
|
|
/// The semaphore's name has to be unique!
|
|
explicit reader_concurrency_semaphore(no_limits, sstring name);
|
|
|
|
~reader_concurrency_semaphore();
|
|
|
|
reader_concurrency_semaphore(const reader_concurrency_semaphore&) = delete;
|
|
reader_concurrency_semaphore& operator=(const reader_concurrency_semaphore&) = delete;
|
|
|
|
reader_concurrency_semaphore(reader_concurrency_semaphore&&) = delete;
|
|
reader_concurrency_semaphore& operator=(reader_concurrency_semaphore&&) = delete;
|
|
|
|
/// Returns the name of the semaphore
|
|
///
|
|
/// If the semaphore has no name, "unnamed reader concurrency semaphore" is returned.
|
|
std::string_view name() const {
|
|
return _name.empty() ? "unnamed reader concurrency semaphore" : std::string_view(_name);
|
|
}
|
|
|
|
/// Register an inactive read.
|
|
///
|
|
/// The semaphore will evict this read when there is a shortage of
|
|
/// permits. This might be immediate, during this register call.
|
|
/// Clients can use the returned handle to unregister the read, when it
|
|
/// stops being inactive and hence evictable, or to set the optional
|
|
/// notify_handler and ttl.
|
|
///
|
|
/// The semaphore takes ownership of the passed in reader for the duration
|
|
/// of its inactivity and it may evict it to free up resources if necessary.
|
|
inactive_read_handle register_inactive_read(flat_mutation_reader ir) noexcept;
|
|
|
|
/// Set the inactive read eviction notification handler and optionally eviction ttl.
|
|
///
|
|
/// The semaphore may evict this read when there is a shortage of
|
|
/// permits or after the given ttl expired.
|
|
///
|
|
/// The notification handler will be called when the inactive read is evicted
|
|
/// passing with the reason it was evicted to the handler.
|
|
///
|
|
/// Note that the inactive read might have already been evicted if
|
|
/// the caller may yield after the register_inactive_read returned the handle
|
|
/// and before calling set_notify_handler. In this case, the caller must revalidate
|
|
/// the inactive_read_handle before calling this function.
|
|
void set_notify_handler(inactive_read_handle& irh, eviction_notify_handler&& handler, std::optional<std::chrono::seconds> ttl);
|
|
|
|
/// Unregister the previously registered inactive read.
|
|
///
|
|
/// If the read was not evicted, the inactive read object, passed in to the
|
|
/// register call, will be returned. Otherwise a nullptr is returned.
|
|
flat_mutation_reader_opt unregister_inactive_read(inactive_read_handle irh);
|
|
|
|
/// Try to evict an inactive read.
|
|
///
|
|
/// Return true if an inactive read was evicted and false otherwise
|
|
/// (if there was no reader to evict).
|
|
bool try_evict_one_inactive_read(evict_reason = evict_reason::manual);
|
|
|
|
/// Clear all inactive reads.
|
|
void clear_inactive_reads();
|
|
|
|
/// Stop the reader_concurrency_semaphore and clear all inactive reads.
|
|
///
|
|
/// Wait on all async background work to complete.
|
|
future<> stop() noexcept;
|
|
|
|
const stats& get_stats() const {
|
|
return _stats;
|
|
}
|
|
|
|
stats& get_stats() {
|
|
return _stats;
|
|
}
|
|
|
|
/// Make an admitted permit
|
|
///
|
|
/// The permit is already in an admitted state after being created, this
|
|
/// method includes waiting for admission.
|
|
/// The permit is associated with a schema, which is the schema of the table
|
|
/// the read is executed against, and the operation name, which should be a
|
|
/// name such that we can identify the operation which created this permit.
|
|
/// Ideally this should be a unique enough name that we not only can identify
|
|
/// the kind of read, but the exact code-path that was taken.
|
|
///
|
|
/// Some permits cannot be associated with any table, so passing nullptr as
|
|
/// the schema parameter is allowed.
|
|
future<reader_permit> obtain_permit(const schema* const schema, const char* const op_name, size_t memory, db::timeout_clock::time_point timeout);
|
|
future<reader_permit> obtain_permit(const schema* const schema, sstring&& op_name, size_t memory, db::timeout_clock::time_point timeout);
|
|
|
|
/// Make a tracking only permit
|
|
///
|
|
/// The permit is not admitted. It is intended for reads that bypass the
|
|
/// normal concurrency control, but whose resource usage we still want to
|
|
/// keep track of, as part of that concurrency control.
|
|
/// The permit is associated with a schema, which is the schema of the table
|
|
/// the read is executed against, and the operation name, which should be a
|
|
/// name such that we can identify the operation which created this permit.
|
|
/// Ideally this should be a unique enough name that we not only can identify
|
|
/// the kind of read, but the exact code-path that was taken.
|
|
///
|
|
/// Some permits cannot be associated with any table, so passing nullptr as
|
|
/// the schema parameter is allowed.
|
|
reader_permit make_tracking_only_permit(const schema* const schema, const char* const op_name, db::timeout_clock::time_point timeout);
|
|
reader_permit make_tracking_only_permit(const schema* const schema, sstring&& op_name, db::timeout_clock::time_point timeout);
|
|
|
|
/// Run the function through the semaphore's execution stage with an admitted permit
|
|
///
|
|
/// First a permit is obtained via the normal admission route, as if
|
|
/// it was created with \ref obtain_permit(), then func is enqueued to be
|
|
/// run by the semaphore's execution loop. This emulates an execution stage,
|
|
/// as it allows batching multiple funcs to be run together. Unlike an
|
|
/// execution stage, with_permit() accepts a type-erased function, which
|
|
/// allows for more flexibility in what functions are batched together.
|
|
/// Use only functions that share most of their code to benefit from the
|
|
/// instruction-cache warm-up!
|
|
///
|
|
/// The permit is associated with a schema, which is the schema of the table
|
|
/// the read is executed against, and the operation name, which should be a
|
|
/// name such that we can identify the operation which created this permit.
|
|
/// Ideally this should be a unique enough name that we not only can identify
|
|
/// the kind of read, but the exact code-path that was taken.
|
|
///
|
|
/// Some permits cannot be associated with any table, so passing nullptr as
|
|
/// the schema parameter is allowed.
|
|
future<> with_permit(const schema* const schema, const char* const op_name, size_t memory, db::timeout_clock::time_point timeout, read_func func);
|
|
|
|
/// Run the function through the semaphore's execution stage with a pre-admitted permit
|
|
///
|
|
/// Same as \ref with_permit(), but it uses an already admitted
|
|
/// permit. Should only be used when a permit is already readily
|
|
/// available, e.g. when resuming a saved read. Using
|
|
/// \ref obtain_permit(), then \ref with_ready_permit() is less
|
|
/// optimal then just using \ref with_permit().
|
|
future<> with_ready_permit(reader_permit permit, read_func func);
|
|
|
|
const resources initial_resources() const {
|
|
return _initial_resources;
|
|
}
|
|
|
|
bool is_unlimited() const {
|
|
return _initial_resources == reader_resources{std::numeric_limits<int>::max(), std::numeric_limits<ssize_t>::max()};
|
|
}
|
|
|
|
const resources available_resources() const {
|
|
return _resources;
|
|
}
|
|
|
|
void consume(resources r) {
|
|
_resources -= r;
|
|
}
|
|
|
|
void signal(const resources& r) noexcept;
|
|
|
|
size_t waiters() const {
|
|
return _wait_list.size();
|
|
}
|
|
|
|
void broken(std::exception_ptr ex = {});
|
|
|
|
/// Dump diagnostics printout
|
|
///
|
|
/// Use max-lines to cap the number of (permit) lines in the report.
|
|
/// Use 0 for unlimited.
|
|
std::string dump_diagnostics(unsigned max_lines = 0) const;
|
|
|
|
void set_max_queue_length(size_t size) {
|
|
_max_queue_length = size;
|
|
}
|
|
};
|