diff --git a/backlog_controller.hh b/backlog_controller.hh
new file mode 100644
index 0000000000..5aff48de3e
--- /dev/null
+++ b/backlog_controller.hh
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2017 ScyllaDB
+ */
+
+/*
+ * This file is part of Scylla.
+ *
+ * Scylla is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU Affero General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * Scylla is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Scylla. If not, see .
+ */
+
+#pragma once
+#include
+#include
+#include
+
+// Simple proportional controller to adjust shares for processes for which a backlog can be clearly
+// defined.
+//
+// Goal is to consume the backlog as fast as we can, but not so fast that we steal all the CPU from
+// incoming requests, and at the same time minimize user-visible fluctuations in the quota.
+//
+// What that translates to is we'll try to keep the backlog's firt derivative at 0 (IOW, we keep
+// backlog constant). As the backlog grows we increase CPU usage, decreasing CPU usage as the
+// backlog diminishes.
+//
+// The exact point at which the controller stops determines the desired CPU usage. As the backlog
+// grows and approach a maximum desired, we need to be more aggressive. We will therefore define two
+// thresholds, and increase the constant as we cross them.
+//
+// Doing that divides the range in three (before the first, between first and second, and after
+// second threshold), and we'll be slow to grow in the first region, grow normally in the second
+// region, and aggressively in the third region.
+//
+// The constants q1 and q2 are used to determine the proportional factor at each stage.
+class backlog_controller {
+protected:
+ struct control_point {
+ float input;
+ float output;
+ };
+
+ std::chrono::milliseconds _interval;
+ timer<> _update_timer;
+
+ std::vector _control_points;
+
+ std::function _current_backlog;
+
+ virtual void update_controller(float quota) = 0;
+
+ void adjust();
+
+ backlog_controller(std::chrono::milliseconds interval, std::vector control_points, std::function backlog)
+ : _interval(interval)
+ , _update_timer([this] { adjust(); })
+ , _control_points({{0,0}})
+ , _current_backlog(std::move(backlog))
+ {
+ _control_points.insert(_control_points.end(), control_points.begin(), control_points.end());
+ _update_timer.arm_periodic(_interval);
+ }
+
+ // Used when the controllers are disabled. When we deprecate the --auto-adjust-flush-quota
+ // parameter we can delete this constructor.
+ backlog_controller() = default;
+ virtual ~backlog_controller() {}
+};
+
+
+class backlog_cpu_controller : public backlog_controller {
+public:
+ struct disabled {
+ seastar::thread_scheduling_group *backup;
+ };
+
+ seastar::thread_scheduling_group* scheduling_group() {
+ return _current_scheduling_group;
+ }
+
+ float current_quota() const {
+ return _current_quota;
+ }
+protected:
+ float _current_quota = 0.0f;
+
+ void update_controller(float quota) override;
+
+ seastar::thread_scheduling_group _scheduling_group;
+ seastar::thread_scheduling_group *_current_scheduling_group = nullptr;
+
+ backlog_cpu_controller(std::chrono::milliseconds interval, std::vector control_points, std::function backlog)
+ : backlog_controller(interval, std::move(control_points), backlog)
+ , _scheduling_group(std::chrono::milliseconds(1), _current_quota)
+ , _current_scheduling_group(&_scheduling_group)
+ {}
+
+ backlog_cpu_controller(disabled d)
+ : backlog_controller()
+ , _scheduling_group(std::chrono::nanoseconds(0), 0)
+ , _current_scheduling_group(d.backup) {}
+};
+
+// memtable flush CPU controller.
+//
+// - First threshold is the soft limit line,
+// - Maximum is the point in which we'd stop consuming request,
+// - Second threshold is halfway between them.
+//
+// Below the soft limit, we are in no particular hurry to flush, since it means we're set to
+// complete flushing before we a new memtable is ready. The quota is dirty * q1, and q1 is set to a
+// low number.
+//
+// The first half of the virtual dirty region is where we expect to be usually, so we have a low
+// slope corresponding to a sluggish response between q1 * soft_limit and q2.
+//
+// In the second half, we're getting close to the hard dirty limit so we increase the slope and
+// become more responsive, up to a maximum quota of qmax.
+class flush_cpu_controller : public backlog_cpu_controller {
+ static constexpr float hard_dirty_limit = 1.0f;
+public:
+ flush_cpu_controller(backlog_cpu_controller::disabled d) : backlog_cpu_controller(std::move(d)) {}
+ flush_cpu_controller(flush_cpu_controller&&) = default;
+ flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function current_dirty)
+ : backlog_cpu_controller(std::move(interval),
+ std::vector({{soft_limit, 0.1}, {soft_limit + (hard_dirty_limit - soft_limit) / 2, 0.2} , {hard_dirty_limit, 1}}),
+ std::move(current_dirty)
+ )
+ {}
+};
diff --git a/cpu_controller.hh b/cpu_controller.hh
deleted file mode 100644
index 62bbf962e1..0000000000
--- a/cpu_controller.hh
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (C) 2017 ScyllaDB
- */
-
-/*
- * This file is part of Scylla.
- *
- * Scylla is free software: you can redistribute it and/or modify
- * it under the terms of the GNU Affero General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * Scylla is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with Scylla. If not, see .
- */
-
-#pragma once
-#include
-#include
-#include
-
-// Simple proportional controller to adjust shares of memtable/streaming flushes.
-//
-// Goal is to flush as fast as we can, but not so fast that we steal all the CPU from incoming
-// requests, and at the same time minimize user-visible fluctuations in the flush quota.
-//
-// What that translates to is we'll try to keep virtual dirty's firt derivative at 0 (IOW, we keep
-// virtual dirty constant), which means that the rate of incoming writes is equal to the rate of
-// flushed bytes.
-//
-// The exact point at which the controller stops determines the desired flush CPU usage. As we
-// approach the hard dirty limit, we need to be more aggressive. We will therefore define two
-// thresholds, and increase the constant as we cross them.
-//
-// 1) the soft limit line
-// 2) halfway between soft limit and dirty limit
-//
-// The constants q1 and q2 are used to determine the proportional factor at each stage.
-//
-// Below the soft limit, we are in no particular hurry to flush, since it means we're set to
-// complete flushing before we a new memtable is ready. The quota is dirty * q1, and q1 is set to a
-// low number.
-//
-// The first half of the virtual dirty region is where we expect to be usually, so we have a low
-// slope corresponding to a sluggish response between q1 * soft_limit and q2.
-//
-// In the second half, we're getting close to the hard dirty limit so we increase the slope and
-// become more responsive, up to a maximum quota of qmax.
-//
-// For now we'll just set them in the structure not to complicate the constructor. But q1, q2 and
-// qmax can easily become parameters if we find another user.
-class flush_cpu_controller {
- static constexpr float hard_dirty_limit = 0.50;
- static constexpr float q1 = 0.01;
- static constexpr float q2 = 0.2;
- static constexpr float qmax = 1;
-
- float _current_quota = 0.0f;
- float _goal;
- std::function _current_dirty;
- std::chrono::milliseconds _interval;
- timer<> _update_timer;
-
- seastar::thread_scheduling_group _scheduling_group;
- seastar::thread_scheduling_group *_current_scheduling_group = nullptr;
-
- void adjust();
-public:
- seastar::thread_scheduling_group* scheduling_group() {
- return _current_scheduling_group;
- }
- float current_quota() const {
- return _current_quota;
- }
-
- struct disabled {
- seastar::thread_scheduling_group *backup;
- };
- flush_cpu_controller(disabled d) : _scheduling_group(std::chrono::nanoseconds(0), 0), _current_scheduling_group(d.backup) {}
- flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function current_dirty);
- flush_cpu_controller(flush_cpu_controller&&) = default;
-};
-
-
diff --git a/database.cc b/database.cc
index 3406f43ad1..01f818b649 100644
--- a/database.cc
+++ b/database.cc
@@ -2049,7 +2049,7 @@ make_flush_cpu_controller(db::config& cfg, seastar::thread_scheduling_group* bac
if (cfg.auto_adjust_flush_quota()) {
return flush_cpu_controller(250ms, cfg.virtual_dirty_soft_limit(), std::move(fn));
}
- return flush_cpu_controller(flush_cpu_controller::disabled{backup});
+ return flush_cpu_controller(backlog_cpu_controller::disabled{backup});
}
utils::UUID database::empty_version = utils::UUID_gen::get_name_UUID(bytes{});
@@ -2066,7 +2066,7 @@ database::database(const db::config& cfg)
, _dirty_memory_manager(*this, memory::stats().total_memory() * 0.45, cfg.virtual_dirty_soft_limit())
, _streaming_dirty_memory_manager(*this, memory::stats().total_memory() * 0.10, cfg.virtual_dirty_soft_limit())
, _background_writer_scheduling_group(1ms, _cfg->background_writer_scheduling_quota())
- , _memtable_cpu_controller(make_flush_cpu_controller(*_cfg, &_background_writer_scheduling_group, [this, limit = 2.0f * _dirty_memory_manager.throttle_threshold()] {
+ , _memtable_cpu_controller(make_flush_cpu_controller(*_cfg, &_background_writer_scheduling_group, [this, limit = float(_dirty_memory_manager.throttle_threshold())] {
return (_dirty_memory_manager.virtual_dirty_memory()) / limit;
}))
, _version(empty_version)
@@ -2079,31 +2079,25 @@ database::database(const db::config& cfg)
dblog.info("Row: max_vector_size: {}, internal_count: {}", size_t(row::max_vector_size), size_t(row::internal_count));
}
-void flush_cpu_controller::adjust() {
- auto mid = _goal + (hard_dirty_limit - _goal) / 2;
+void backlog_controller::adjust() {
+ auto backlog = _current_backlog();
- auto dirty = _current_dirty();
- if (dirty < _goal) {
- _current_quota = dirty * q1 / _goal;
- } else if ((dirty >= _goal) && (dirty < mid)) {
- _current_quota = q1 + (dirty - _goal) * (q2 - q1)/(mid - _goal);
- } else {
- _current_quota = q2 + (dirty - mid) * (qmax - q2) / (hard_dirty_limit - mid);
+ // interpolate to find out which region we are. This run infrequently and there are a fixed
+ // number of points so a simple loop will do.
+ size_t idx = 1;
+ while ((idx < _control_points.size()) && (_control_points[idx].input < backlog)) {
+ idx++;
}
- dblog.trace("dirty {}, goal {}, mid {} quota {}", dirty, _goal, mid, _current_quota);
- _scheduling_group.update_usage(_current_quota);
+ control_point& cp = _control_points[idx];
+ control_point& last = _control_points[idx - 1];
+ float result = last.output + (backlog - last.input) * (cp.output - last.output)/(cp.input - last.input);
+ update_controller(result);
}
-flush_cpu_controller::flush_cpu_controller(std::chrono::milliseconds interval, float soft_limit, std::function current_dirty)
- : _goal(soft_limit / 2)
- , _current_dirty(std::move(current_dirty))
- , _interval(interval)
- , _update_timer([this] { adjust(); })
- , _scheduling_group(1ms, 0.0f)
- , _current_scheduling_group(&_scheduling_group)
-{
- _update_timer.arm_periodic(_interval);
+void backlog_cpu_controller::update_controller(float quota) {
+ _current_quota = quota;
+ _scheduling_group.update_usage(_current_quota);
}
void
diff --git a/database.hh b/database.hh
index 09b8bcb8eb..b38b6b264f 100644
--- a/database.hh
+++ b/database.hh
@@ -78,7 +78,7 @@
#include "db/view/view.hh"
#include "lister.hh"
#include "utils/phased_barrier.hh"
-#include "cpu_controller.hh"
+#include "backlog_controller.hh"
#include "dirty_memory_manager.hh"
#include "reader_resource_tracker.hh"