Make struct scheduling_group be sub-class of the backlog controller. Its new meaning is now -- the group under controller maintenance. Both database and compaction manager derive their sched groups from this one. This makes backlog controller construction simpler, prepares the ground for sched groups unification in seastar and facilitates next patch. Signed-off-by: Pavel Emelyanov <xemul@scylladb.com>
136 lines
5.3 KiB
C++
136 lines
5.3 KiB
C++
/*
|
|
* Copyright (C) 2017-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
|
|
#pragma once
|
|
#include <seastar/core/scheduling.hh>
|
|
#include <seastar/core/timer.hh>
|
|
#include <seastar/core/gate.hh>
|
|
#include <seastar/core/file.hh>
|
|
#include <chrono>
|
|
#include <cmath>
|
|
|
|
#include "seastarx.hh"
|
|
|
|
// Simple proportional controller to adjust shares for processes for which a backlog can be clearly
|
|
// defined.
|
|
//
|
|
// Goal is to consume the backlog as fast as we can, but not so fast that we steal all the CPU from
|
|
// incoming requests, and at the same time minimize user-visible fluctuations in the quota.
|
|
//
|
|
// What that translates to is we'll try to keep the backlog's firt derivative at 0 (IOW, we keep
|
|
// backlog constant). As the backlog grows we increase CPU usage, decreasing CPU usage as the
|
|
// backlog diminishes.
|
|
//
|
|
// The exact point at which the controller stops determines the desired CPU usage. As the backlog
|
|
// grows and approach a maximum desired, we need to be more aggressive. We will therefore define two
|
|
// thresholds, and increase the constant as we cross them.
|
|
//
|
|
// Doing that divides the range in three (before the first, between first and second, and after
|
|
// second threshold), and we'll be slow to grow in the first region, grow normally in the second
|
|
// region, and aggressively in the third region.
|
|
//
|
|
// The constants q1 and q2 are used to determine the proportional factor at each stage.
|
|
class backlog_controller {
|
|
public:
|
|
struct scheduling_group {
|
|
seastar::scheduling_group cpu;
|
|
const ::io_priority_class& io;
|
|
};
|
|
future<> shutdown() {
|
|
_update_timer.cancel();
|
|
return std::move(_inflight_update);
|
|
}
|
|
protected:
|
|
struct control_point {
|
|
float input;
|
|
float output;
|
|
};
|
|
|
|
scheduling_group& _scheduling_group;
|
|
std::chrono::milliseconds _interval;
|
|
timer<> _update_timer;
|
|
|
|
std::vector<control_point> _control_points;
|
|
|
|
std::function<float()> _current_backlog;
|
|
// updating shares for an I/O class may contact another shard and returns a future.
|
|
future<> _inflight_update;
|
|
|
|
virtual void update_controller(float quota);
|
|
|
|
void adjust();
|
|
|
|
backlog_controller(scheduling_group& sg, std::chrono::milliseconds interval,
|
|
std::vector<control_point> control_points, std::function<float()> backlog)
|
|
: _scheduling_group(sg)
|
|
, _interval(interval)
|
|
, _update_timer([this] { adjust(); })
|
|
, _control_points()
|
|
, _current_backlog(std::move(backlog))
|
|
, _inflight_update(make_ready_future<>())
|
|
{
|
|
_control_points.insert(_control_points.end(), control_points.begin(), control_points.end());
|
|
_update_timer.arm_periodic(_interval);
|
|
}
|
|
|
|
// Used when the controllers are disabled and a static share is used
|
|
// When that option is deprecated we should remove this.
|
|
backlog_controller(scheduling_group& sg, float static_shares)
|
|
: _scheduling_group(sg)
|
|
, _inflight_update(make_ready_future<>())
|
|
{
|
|
update_controller(static_shares);
|
|
}
|
|
|
|
virtual ~backlog_controller() {}
|
|
public:
|
|
backlog_controller(backlog_controller&&) = default;
|
|
float backlog_of_shares(float shares) const;
|
|
};
|
|
|
|
// memtable flush CPU controller.
|
|
//
|
|
// - First threshold is the soft limit line,
|
|
// - Maximum is the point in which we'd stop consuming request,
|
|
// - Second threshold is halfway between them.
|
|
//
|
|
// Below the soft limit, we are in no particular hurry to flush, since it means we're set to
|
|
// complete flushing before we a new memtable is ready. The quota is dirty * q1, and q1 is set to a
|
|
// low number.
|
|
//
|
|
// The first half of the virtual dirty region is where we expect to be usually, so we have a low
|
|
// slope corresponding to a sluggish response between q1 * soft_limit and q2.
|
|
//
|
|
// In the second half, we're getting close to the hard dirty limit so we increase the slope and
|
|
// become more responsive, up to a maximum quota of qmax.
|
|
class flush_controller : public backlog_controller {
|
|
static constexpr float hard_dirty_limit = 1.0f;
|
|
public:
|
|
flush_controller(backlog_controller::scheduling_group& sg, float static_shares) : backlog_controller(sg, static_shares) {}
|
|
flush_controller(backlog_controller::scheduling_group& sg, std::chrono::milliseconds interval, float soft_limit, std::function<float()> current_dirty)
|
|
: backlog_controller(sg, std::move(interval),
|
|
std::vector<backlog_controller::control_point>({{0.0, 0.0}, {soft_limit, 10}, {soft_limit + (hard_dirty_limit - soft_limit) / 2, 200} , {hard_dirty_limit, 1000}}),
|
|
std::move(current_dirty)
|
|
)
|
|
{}
|
|
};
|
|
|
|
class compaction_controller : public backlog_controller {
|
|
public:
|
|
static constexpr unsigned normalization_factor = 30;
|
|
static constexpr float disable_backlog = std::numeric_limits<double>::infinity();
|
|
static constexpr float backlog_disabled(float backlog) { return std::isinf(backlog); }
|
|
compaction_controller(backlog_controller::scheduling_group& sg, float static_shares) : backlog_controller(sg, static_shares) {}
|
|
compaction_controller(backlog_controller::scheduling_group& sg, std::chrono::milliseconds interval, std::function<float()> current_backlog)
|
|
: backlog_controller(sg, std::move(interval),
|
|
std::vector<backlog_controller::control_point>({{0.0, 50}, {1.5, 100} , {normalization_factor, 1000}}),
|
|
std::move(current_backlog)
|
|
)
|
|
{}
|
|
};
|