mirror of
https://github.com/scylladb/scylladb.git
synced 2026-04-23 01:50:35 +00:00
This patch flips two "switches": 1) It switches admission to be up-front. 2) It changes the admission algorithm. (1) by now all permits are obtained up-front, so this patch just yanks out the restricted reader from all reader stacks and simultaneously switches all `obtain_permit_nowait()` calls to `obtain_permit()`. By doing this admission is now waited on when creating the permit. (2) we switch to an admission algorithm that adds a new aspect to the existing resource availability: the number of used/blocked reads. Namely it only admits new reads if in addition to the necessary amount of resources being available, all currently used readers are blocked. In other words we only admit new reads if all currently admitted reads requires something other than CPU to progress. They are either waiting on I/O, a remote shard, or attention from their consumers (not used currently). We flip these two switches at the same time because up-front admission means cache reads now need to obtain a permit too. For cache reads the optimal concurrency is 1. Anything above that just increases latency (without increasing throughput). So we want to make sure that if a cache reader hits it doesn't get any competition for CPU and it can run to completion. We admit new reads only if the read misses and has to go to disk. Another change made to accommodate this switch is the replacement of the replica side read execution stages which the reader concurrency semaphore as an execution stage. This replacement is needed because with the introduction of up-front admission, reads are not independent of each other any-more. One read executed can influence whether later reads executed will be admitted or not, and execution stages require independent operations to work well. By moving the execution stage into the semaphore, we have an execution stage which is in control of both admission and running the operations in batches, avoiding the bad interaction between the two.
110 lines
4.5 KiB
C++
110 lines
4.5 KiB
C++
/*
|
|
* Copyright (C) 2020-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "mutation_reader.hh"
|
|
#include <seastar/core/gate.hh>
|
|
|
|
class test_reader_lifecycle_policy
|
|
: public reader_lifecycle_policy
|
|
, public enable_shared_from_this<test_reader_lifecycle_policy> {
|
|
using factory_function = std::function<flat_mutation_reader(
|
|
schema_ptr,
|
|
reader_permit,
|
|
const dht::partition_range&,
|
|
const query::partition_slice&,
|
|
const io_priority_class&,
|
|
tracing::trace_state_ptr,
|
|
mutation_reader::forwarding)>;
|
|
|
|
struct reader_context {
|
|
std::optional<reader_concurrency_semaphore> semaphore;
|
|
std::optional<const dht::partition_range> range;
|
|
std::optional<const query::partition_slice> slice;
|
|
|
|
reader_context() = default;
|
|
reader_context(dht::partition_range range, query::partition_slice slice) : range(std::move(range)), slice(std::move(slice)) {
|
|
}
|
|
};
|
|
|
|
factory_function _factory_function;
|
|
std::vector<foreign_ptr<std::unique_ptr<reader_context>>> _contexts;
|
|
std::vector<future<>> _destroy_futures;
|
|
bool _evict_paused_readers = false;
|
|
|
|
public:
|
|
explicit test_reader_lifecycle_policy(factory_function f, bool evict_paused_readers = false)
|
|
: _factory_function(std::move(f))
|
|
, _contexts(smp::count)
|
|
, _evict_paused_readers(evict_paused_readers) {
|
|
}
|
|
virtual flat_mutation_reader create_reader(
|
|
schema_ptr schema,
|
|
reader_permit permit,
|
|
const dht::partition_range& range,
|
|
const query::partition_slice& slice,
|
|
const io_priority_class& pc,
|
|
tracing::trace_state_ptr trace_state,
|
|
mutation_reader::forwarding fwd_mr) override {
|
|
const auto shard = this_shard_id();
|
|
if (_contexts[shard]) {
|
|
_contexts[shard]->range.emplace(range);
|
|
_contexts[shard]->slice.emplace(slice);
|
|
} else {
|
|
_contexts[shard] = make_foreign(std::make_unique<reader_context>(range, slice));
|
|
}
|
|
return _factory_function(std::move(schema), std::move(permit), *_contexts[shard]->range, *_contexts[shard]->slice, pc, std::move(trace_state), fwd_mr);
|
|
}
|
|
virtual future<> destroy_reader(stopped_reader reader) noexcept override {
|
|
auto& ctx = _contexts[this_shard_id()];
|
|
auto reader_opt = ctx->semaphore->unregister_inactive_read(std::move(reader.handle));
|
|
auto ret = reader_opt ? reader_opt->close() : make_ready_future<>();
|
|
return ret.finally([&ctx] {
|
|
return ctx->semaphore->stop().finally([&ctx] {
|
|
ctx.release();
|
|
});
|
|
});
|
|
}
|
|
virtual reader_concurrency_semaphore& semaphore() override {
|
|
const auto shard = this_shard_id();
|
|
if (!_contexts[shard]) {
|
|
_contexts[shard] = make_foreign(std::make_unique<reader_context>());
|
|
} else if (_contexts[shard]->semaphore) {
|
|
return *_contexts[shard]->semaphore;
|
|
}
|
|
// To support multiple reader life-cycle instances alive at the same
|
|
// time, incorporate `this` into the name, to make their names unique.
|
|
auto name = format("tests::reader_lifecycle_policy@{}@shard_id={}", fmt::ptr(this), shard);
|
|
if (_evict_paused_readers) {
|
|
// Create with no memory, so all inactive reads are immediately evicted.
|
|
_contexts[shard]->semaphore.emplace(1, 0, std::move(name));
|
|
} else {
|
|
_contexts[shard]->semaphore.emplace(reader_concurrency_semaphore::no_limits{}, std::move(name));
|
|
}
|
|
return *_contexts[shard]->semaphore;
|
|
}
|
|
virtual future<reader_permit> obtain_reader_permit(schema_ptr schema, const char* const description, db::timeout_clock::time_point timeout) override {
|
|
return semaphore().obtain_permit(schema.get(), description, 128 * 1024, timeout);
|
|
}
|
|
};
|
|
|