/*
* Copyright (C) 2018 ScyllaDB
*/
/*
* This file is part of Scylla.
*
* Scylla is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Scylla is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Scylla. If not, see .
*/
#include
#include
#include
#include "reader_concurrency_semaphore.hh"
#include "utils/exceptions.hh"
logger rcslog("reader_concurrency_semaphore");
reader_permit::resource_units::resource_units(reader_permit permit, reader_resources res) noexcept
: _permit(std::move(permit)), _resources(res) {
_permit.consume(res);
}
reader_permit::resource_units::resource_units(resource_units&& o) noexcept
: _permit(std::move(o._permit))
, _resources(std::exchange(o._resources, {})) {
}
reader_permit::resource_units::~resource_units() {
if (_resources) {
reset();
}
}
reader_permit::resource_units& reader_permit::resource_units::operator=(resource_units&& o) noexcept {
if (&o == this) {
return *this;
}
reset();
_permit = std::move(o._permit);
_resources = std::exchange(o._resources, {});
return *this;
}
void reader_permit::resource_units::add(resource_units&& o) {
assert(_permit == o._permit);
_resources += std::exchange(o._resources, {});
}
void reader_permit::resource_units::reset(reader_resources res) {
_permit.consume(res);
if (_resources) {
_permit.signal(_resources);
}
_resources = res;
}
class reader_permit::impl {
reader_concurrency_semaphore& _semaphore;
reader_resources _resources;
bool _admitted = false;
public:
impl(reader_concurrency_semaphore& semaphore) : _semaphore(semaphore) { }
~impl() {
if (_resources) {
on_internal_error_noexcept(rcslog, format("reader_permit::impl::~impl(): detected a leak of {{count={}, memory={}}} resources",
_resources.count, _resources.memory));
}
}
reader_concurrency_semaphore& semaphore() {
return _semaphore;
}
void on_admission() {
_admitted = true;
_semaphore.consume(_resources);
}
void consume(reader_resources res) {
_resources += res;
if (_admitted) {
_semaphore.consume(res);
}
}
void signal(reader_resources res) {
_resources -= res;
if (_admitted) {
_semaphore.signal(res);
}
}
};
reader_permit::reader_permit(reader_concurrency_semaphore& semaphore)
: _impl(make_shared(semaphore)) {
}
void reader_permit::on_admission() {
_impl->on_admission();
}
reader_permit::~reader_permit() {
}
reader_concurrency_semaphore& reader_permit::semaphore() {
return _impl->semaphore();
}
future reader_permit::wait_admission(size_t memory, db::timeout_clock::time_point timeout) {
return _impl->semaphore().do_wait_admission(*this, memory, timeout);
}
void reader_permit::consume(reader_resources res) {
_impl->consume(res);
}
void reader_permit::signal(reader_resources res) {
_impl->signal(res);
}
reader_permit::resource_units reader_permit::consume_memory(size_t memory) {
return consume_resources(reader_resources{0, ssize_t(memory)});
}
reader_permit::resource_units reader_permit::consume_resources(reader_resources res) {
return resource_units(*this, res);
}
void reader_concurrency_semaphore::signal(const resources& r) noexcept {
_resources += r;
while (!_wait_list.empty() && has_available_units(_wait_list.front().res)) {
auto& x = _wait_list.front();
try {
x.permit.on_admission();
x.pr.set_value(reader_permit::resource_units(std::move(x.permit), x.res));
} catch (...) {
x.pr.set_exception(std::current_exception());
}
_wait_list.pop_front();
}
}
reader_concurrency_semaphore::~reader_concurrency_semaphore() {
broken(std::make_exception_ptr(broken_semaphore{}));
}
reader_concurrency_semaphore::inactive_read_handle reader_concurrency_semaphore::register_inactive_read(std::unique_ptr ir) {
// Implies _inactive_reads.empty(), we don't queue new readers before
// evicting all inactive reads.
if (_wait_list.empty()) {
const auto [it, _] = _inactive_reads.emplace(_next_id++, std::move(ir));
(void)_;
++_stats.inactive_reads;
return inactive_read_handle(*this, it->first);
}
// The evicted reader will release its permit, hopefully allowing us to
// admit some readers from the _wait_list.
ir->evict();
++_stats.permit_based_evictions;
return inactive_read_handle();
}
std::unique_ptr reader_concurrency_semaphore::unregister_inactive_read(inactive_read_handle irh) {
if (irh && irh._sem != this) {
throw std::runtime_error(fmt::format(
"reader_concurrency_semaphore::unregister_inactive_read(): "
"attempted to unregister an inactive read with a handle belonging to another semaphore: "
"this is {} (0x{:x}) but the handle belongs to {} (0x{:x})",
name(),
reinterpret_cast(this),
irh._sem->name(),
reinterpret_cast(irh._sem)));
}
if (auto it = _inactive_reads.find(irh._id); it != _inactive_reads.end()) {
auto ir = std::move(it->second);
_inactive_reads.erase(it);
--_stats.inactive_reads;
return ir;
}
return {};
}
bool reader_concurrency_semaphore::try_evict_one_inactive_read() {
if (_inactive_reads.empty()) {
return false;
}
auto it = _inactive_reads.begin();
it->second->evict();
_inactive_reads.erase(it);
++_stats.permit_based_evictions;
--_stats.inactive_reads;
return true;
}
bool reader_concurrency_semaphore::has_available_units(const resources& r) const {
return bool(_resources) && _resources >= r;
}
bool reader_concurrency_semaphore::may_proceed(const resources& r) const {
// Special case: when there is no active reader (based on count) admit one
// regardless of availability of memory.
return _wait_list.empty() && (has_available_units(r) || _resources.count == _initial_resources.count);
}
future reader_concurrency_semaphore::do_wait_admission(reader_permit permit, size_t memory, db::timeout_clock::time_point timeout) {
if (_wait_list.size() >= _max_queue_length) {
if (_prethrow_action) {
_prethrow_action();
}
return make_exception_future(
std::make_exception_ptr(std::runtime_error(
format("{}: restricted mutation reader queue overload", _name))));
}
auto r = resources(1, static_cast(memory));
auto it = _inactive_reads.begin();
while (!may_proceed(r) && it != _inactive_reads.end()) {
auto ir = std::move(it->second);
it = _inactive_reads.erase(it);
ir->evict();
++_stats.permit_based_evictions;
--_stats.inactive_reads;
}
if (may_proceed(r)) {
permit.on_admission();
return make_ready_future(reader_permit::resource_units(std::move(permit), r));
}
promise pr;
auto fut = pr.get_future();
_wait_list.push_back(entry(std::move(pr), std::move(permit), r), timeout);
return fut;
}
reader_permit reader_concurrency_semaphore::make_permit() {
return reader_permit(*this);
}
void reader_concurrency_semaphore::broken(std::exception_ptr ex) {
while (!_wait_list.empty()) {
_wait_list.front().pr.set_exception(std::make_exception_ptr(broken_semaphore{}));
_wait_list.pop_front();
}
}
// A file that tracks the memory usage of buffers resulting from read
// operations.
class tracking_file_impl : public file_impl {
file _tracked_file;
reader_permit _permit;
public:
tracking_file_impl(file file, reader_permit permit)
: _tracked_file(std::move(file))
, _permit(std::move(permit)) {
_memory_dma_alignment = _tracked_file.memory_dma_alignment();
_disk_read_dma_alignment = _tracked_file.disk_read_dma_alignment();
_disk_write_dma_alignment = _tracked_file.disk_write_dma_alignment();
}
tracking_file_impl(const tracking_file_impl&) = delete;
tracking_file_impl& operator=(const tracking_file_impl&) = delete;
tracking_file_impl(tracking_file_impl&&) = default;
tracking_file_impl& operator=(tracking_file_impl&&) = default;
virtual future write_dma(uint64_t pos, const void* buffer, size_t len, const io_priority_class& pc) override {
return get_file_impl(_tracked_file)->write_dma(pos, buffer, len, pc);
}
virtual future write_dma(uint64_t pos, std::vector iov, const io_priority_class& pc) override {
return get_file_impl(_tracked_file)->write_dma(pos, std::move(iov), pc);
}
virtual future read_dma(uint64_t pos, void* buffer, size_t len, const io_priority_class& pc) override {
return get_file_impl(_tracked_file)->read_dma(pos, buffer, len, pc);
}
virtual future read_dma(uint64_t pos, std::vector iov, const io_priority_class& pc) override {
return get_file_impl(_tracked_file)->read_dma(pos, iov, pc);
}
virtual future<> flush(void) override {
return get_file_impl(_tracked_file)->flush();
}
virtual future stat(void) override {
return get_file_impl(_tracked_file)->stat();
}
virtual future<> truncate(uint64_t length) override {
return get_file_impl(_tracked_file)->truncate(length);
}
virtual future<> discard(uint64_t offset, uint64_t length) override {
return get_file_impl(_tracked_file)->discard(offset, length);
}
virtual future<> allocate(uint64_t position, uint64_t length) override {
return get_file_impl(_tracked_file)->allocate(position, length);
}
virtual future size(void) override {
return get_file_impl(_tracked_file)->size();
}
virtual future<> close() override {
return get_file_impl(_tracked_file)->close();
}
virtual std::unique_ptr dup() override {
return get_file_impl(_tracked_file)->dup();
}
virtual subscription list_directory(std::function (directory_entry de)> next) override {
return get_file_impl(_tracked_file)->list_directory(std::move(next));
}
virtual future> dma_read_bulk(uint64_t offset, size_t range_size, const io_priority_class& pc) override {
return get_file_impl(_tracked_file)->dma_read_bulk(offset, range_size, pc).then([this, units = _permit.consume_memory(range_size)] (temporary_buffer buf) {
return make_ready_future>(make_tracked_temporary_buffer(std::move(buf), _permit));
});
}
};
file make_tracked_file(file f, reader_permit p) {
return file(make_shared(f, std::move(p)));
}