diff --git a/backlog_controller.hh b/backlog_controller.hh new file mode 100644 index 0000000000..5aff48de3e --- /dev/null +++ b/backlog_controller.hh @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2017 ScyllaDB + */ + +/* + * This file is part of Scylla. + * + * Scylla is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * Scylla is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Scylla. If not, see . + */ + +#pragma once +#include +#include +#include + +// Simple proportional controller to adjust shares for processes for which a backlog can be clearly +// defined. +// +// Goal is to consume the backlog as fast as we can, but not so fast that we steal all the CPU from +// incoming requests, and at the same time minimize user-visible fluctuations in the quota. +// +// What that translates to is we'll try to keep the backlog's firt derivative at 0 (IOW, we keep +// backlog constant). As the backlog grows we increase CPU usage, decreasing CPU usage as the +// backlog diminishes. +// +// The exact point at which the controller stops determines the desired CPU usage. As the backlog +// grows and approach a maximum desired, we need to be more aggressive. We will therefore define two +// thresholds, and increase the constant as we cross them. +// +// Doing that divides the range in three (before the first, between first and second, and after +// second threshold), and we'll be slow to grow in the first region, grow normally in the second +// region, and aggressively in the third region. +// +// The constants q1 and q2 are used to determine the proportional factor at each stage. +class backlog_controller { +protected: + struct control_point { + float input; + float output; + }; + + std::chrono::milliseconds _interval; + timer<> _update_timer; + + std::vector _control_points; + + std::function _current_backlog; + + virtual void update_controller(float quota) = 0; + + void adjust(); + + backlog_controller(std::chrono::milliseconds interval, std::vector control_points, std::function backlog) + : _interval(interval) + , _update_timer([this] { adjust(); }) + , _control_points({{0,0}}) + , _current_backlog(std::move(backlog)) + { + _control_points.insert(_control_points.end(), control_points.begin(), control_points.end()); + _update_timer.arm_periodic(_interval); + } + + // Used when the controllers are disabled. When we deprecate the --auto-adjust-flush-quota + // parameter we can delete this constructor. + backlog_controller() = default; + virtual ~backlog_controller() {} +}; + + +class backlog_cpu_controller : public backlog_controller { +public: + struct disabled { + seastar::thread_scheduling_group *backup; + }; + + seastar::thread_scheduling_group* scheduling_group() { + return _current_scheduling_group; + } + + float current_quota() const { + return _current_quota; + } +protected: + float _current_quota = 0.0f; + + void update_controller(float quota) override; + + seastar::thread_scheduling_group _scheduling_group; + seastar::thread_scheduling_group *_current_scheduling_group = nullptr; + + backlog_cpu_controller(std::chrono::milliseconds interval, std::vector control_points, std::function backlog) + : backlog_controller(interval, std::move(control_points), backlog) + , _scheduling_group(std::chrono::milliseconds(1), _current_quota) + , _current_scheduling_group(&_scheduling_group) + {} + + backlog_cpu_controller(disabled d) + : backlog_controller() + , _scheduling_group(std::chrono::nanoseconds(0), 0) + , _current_scheduling_group(d.backup) {} +}; + +// memtable flush CPU controller. +// +// - First threshold is the soft limit line, +// - Maximum is the point in which we'd stop consuming request, +// - Second threshold is halfway between them. +// +// Below the soft limit, we are in no particular hurry to flush, since it means we're set to +// complete flushing before we a new memtable is ready. The quota is dirty * q1, and q1 is set to a +// low number. +// +// The first half of the virtual dirty region is where we expect to be usually, so we have a low +// slope corresponding to a sluggish response between q1 * soft_limit and q2. +// +// In the second half, we're getting close to the hard dirty limit so we increase the slope and +// become more responsive, up to a maximum quota of qmax. +class flush_cpu_controller : public backlog_cpu_controller { + static constexpr float hard_dirty_limit = 1.0f; +public: + flush_cpu_controller(backlog_cpu_controller::disabled d) : backlog_cpu_controller(std::move(d)) {} + flush_cpu_controller(flush_cpu_controller&&) = default; + flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function current_dirty) + : backlog_cpu_controller(std::move(interval), + std::vector({{soft_limit, 0.1}, {soft_limit + (hard_dirty_limit - soft_limit) / 2, 0.2} , {hard_dirty_limit, 1}}), + std::move(current_dirty) + ) + {} +}; diff --git a/cpu_controller.hh b/cpu_controller.hh deleted file mode 100644 index 62bbf962e1..0000000000 --- a/cpu_controller.hh +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (C) 2017 ScyllaDB - */ - -/* - * This file is part of Scylla. - * - * Scylla is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * Scylla is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with Scylla. If not, see . - */ - -#pragma once -#include -#include -#include - -// Simple proportional controller to adjust shares of memtable/streaming flushes. -// -// Goal is to flush as fast as we can, but not so fast that we steal all the CPU from incoming -// requests, and at the same time minimize user-visible fluctuations in the flush quota. -// -// What that translates to is we'll try to keep virtual dirty's firt derivative at 0 (IOW, we keep -// virtual dirty constant), which means that the rate of incoming writes is equal to the rate of -// flushed bytes. -// -// The exact point at which the controller stops determines the desired flush CPU usage. As we -// approach the hard dirty limit, we need to be more aggressive. We will therefore define two -// thresholds, and increase the constant as we cross them. -// -// 1) the soft limit line -// 2) halfway between soft limit and dirty limit -// -// The constants q1 and q2 are used to determine the proportional factor at each stage. -// -// Below the soft limit, we are in no particular hurry to flush, since it means we're set to -// complete flushing before we a new memtable is ready. The quota is dirty * q1, and q1 is set to a -// low number. -// -// The first half of the virtual dirty region is where we expect to be usually, so we have a low -// slope corresponding to a sluggish response between q1 * soft_limit and q2. -// -// In the second half, we're getting close to the hard dirty limit so we increase the slope and -// become more responsive, up to a maximum quota of qmax. -// -// For now we'll just set them in the structure not to complicate the constructor. But q1, q2 and -// qmax can easily become parameters if we find another user. -class flush_cpu_controller { - static constexpr float hard_dirty_limit = 0.50; - static constexpr float q1 = 0.01; - static constexpr float q2 = 0.2; - static constexpr float qmax = 1; - - float _current_quota = 0.0f; - float _goal; - std::function _current_dirty; - std::chrono::milliseconds _interval; - timer<> _update_timer; - - seastar::thread_scheduling_group _scheduling_group; - seastar::thread_scheduling_group *_current_scheduling_group = nullptr; - - void adjust(); -public: - seastar::thread_scheduling_group* scheduling_group() { - return _current_scheduling_group; - } - float current_quota() const { - return _current_quota; - } - - struct disabled { - seastar::thread_scheduling_group *backup; - }; - flush_cpu_controller(disabled d) : _scheduling_group(std::chrono::nanoseconds(0), 0), _current_scheduling_group(d.backup) {} - flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function current_dirty); - flush_cpu_controller(flush_cpu_controller&&) = default; -}; - - diff --git a/database.cc b/database.cc index 3406f43ad1..01f818b649 100644 --- a/database.cc +++ b/database.cc @@ -2049,7 +2049,7 @@ make_flush_cpu_controller(db::config& cfg, seastar::thread_scheduling_group* bac if (cfg.auto_adjust_flush_quota()) { return flush_cpu_controller(250ms, cfg.virtual_dirty_soft_limit(), std::move(fn)); } - return flush_cpu_controller(flush_cpu_controller::disabled{backup}); + return flush_cpu_controller(backlog_cpu_controller::disabled{backup}); } utils::UUID database::empty_version = utils::UUID_gen::get_name_UUID(bytes{}); @@ -2066,7 +2066,7 @@ database::database(const db::config& cfg) , _dirty_memory_manager(*this, memory::stats().total_memory() * 0.45, cfg.virtual_dirty_soft_limit()) , _streaming_dirty_memory_manager(*this, memory::stats().total_memory() * 0.10, cfg.virtual_dirty_soft_limit()) , _background_writer_scheduling_group(1ms, _cfg->background_writer_scheduling_quota()) - , _memtable_cpu_controller(make_flush_cpu_controller(*_cfg, &_background_writer_scheduling_group, [this, limit = 2.0f * _dirty_memory_manager.throttle_threshold()] { + , _memtable_cpu_controller(make_flush_cpu_controller(*_cfg, &_background_writer_scheduling_group, [this, limit = float(_dirty_memory_manager.throttle_threshold())] { return (_dirty_memory_manager.virtual_dirty_memory()) / limit; })) , _version(empty_version) @@ -2079,31 +2079,25 @@ database::database(const db::config& cfg) dblog.info("Row: max_vector_size: {}, internal_count: {}", size_t(row::max_vector_size), size_t(row::internal_count)); } -void flush_cpu_controller::adjust() { - auto mid = _goal + (hard_dirty_limit - _goal) / 2; +void backlog_controller::adjust() { + auto backlog = _current_backlog(); - auto dirty = _current_dirty(); - if (dirty < _goal) { - _current_quota = dirty * q1 / _goal; - } else if ((dirty >= _goal) && (dirty < mid)) { - _current_quota = q1 + (dirty - _goal) * (q2 - q1)/(mid - _goal); - } else { - _current_quota = q2 + (dirty - mid) * (qmax - q2) / (hard_dirty_limit - mid); + // interpolate to find out which region we are. This run infrequently and there are a fixed + // number of points so a simple loop will do. + size_t idx = 1; + while ((idx < _control_points.size()) && (_control_points[idx].input < backlog)) { + idx++; } - dblog.trace("dirty {}, goal {}, mid {} quota {}", dirty, _goal, mid, _current_quota); - _scheduling_group.update_usage(_current_quota); + control_point& cp = _control_points[idx]; + control_point& last = _control_points[idx - 1]; + float result = last.output + (backlog - last.input) * (cp.output - last.output)/(cp.input - last.input); + update_controller(result); } -flush_cpu_controller::flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function current_dirty) - : _goal(soft_limit / 2) - , _current_dirty(std::move(current_dirty)) - , _interval(interval) - , _update_timer([this] { adjust(); }) - , _scheduling_group(1ms, 0.0f) - , _current_scheduling_group(&_scheduling_group) -{ - _update_timer.arm_periodic(_interval); +void backlog_cpu_controller::update_controller(float quota) { + _current_quota = quota; + _scheduling_group.update_usage(_current_quota); } void diff --git a/database.hh b/database.hh index 09b8bcb8eb..b38b6b264f 100644 --- a/database.hh +++ b/database.hh @@ -78,7 +78,7 @@ #include "db/view/view.hh" #include "lister.hh" #include "utils/phased_barrier.hh" -#include "cpu_controller.hh" +#include "backlog_controller.hh" #include "dirty_memory_manager.hh" #include "reader_resource_tracker.hh"