Merge 'logalloc reclaim_timer improvements' from Michael Livshin

* round up reported time to microseconds
* add backtrace if stall detected
* add call site name (hierarchical when timers are nested)
* put timers in more places
* reduce possible logspam in nested timers by making sure to report on things only once and to not report on durations smaller than those already reported on

Closes #10576

* github.com:scylladb/scylla:
  utils: logalloc: fix indentation
  utils: logalloc: split the reclaim_timer in compact_and_evict_locked()
  utils: logalloc: report segment stats if reclaim_segments() times out
  utils: logalloc: reclaim_timer: add optional extra log callback
  utils: logalloc: reclaim_timer: report non-decreasing durations
  utils: logalloc: have reclaim_timer print reserve limits
  utils: logalloc: move reclaim timer destructor for more readability
  utils: logalloc: define a proper bundle type for reclaim_timer stats
  utils: logalloc: add arithmetic operations to segment_pool::stats
  utils: logalloc: have reclaim timers detect being nested
  utils: logalloc: add more reclaim_timers
  utils: logalloc: move reclaim_timer to compact_and_evict_locked
  utils: logalloc: pull reclaim_timer definition forward
  utils: logalloc: reclaim_timer make tracker optional
  utils: logalloc: reclaim_timer: print backtrace if stall detected
  utils: logalloc: reclaim_timer: get call site name
  utils: logalloc: reclaim_timer: rename set_result
  utils: logalloc: reclaim_timer: rename _reserve_segments member
  utils: logalloc: reclaim_timer round up microseconds
This commit is contained in:
Avi Kivity
2022-07-25 17:02:04 +03:00

View File

@@ -866,6 +866,35 @@ public:
uint64_t memory_freed;
uint64_t memory_compacted;
uint64_t memory_evicted;
friend stats operator+(const stats& s1, const stats& s2) {
stats result(s1);
result += s2;
return result;
}
friend stats operator-(const stats& s1, const stats& s2) {
stats result(s1);
result -= s2;
return result;
}
stats& operator+=(const stats& other) {
segments_compacted += other.segments_compacted;
lsa_buffer_segments += other.lsa_buffer_segments;
memory_allocated += other.memory_allocated;
memory_freed += other.memory_freed;
memory_compacted += other.memory_compacted;
memory_evicted += other.memory_evicted;
return *this;
}
stats& operator-=(const stats& other) {
segments_compacted -= other.segments_compacted;
lsa_buffer_segments -= other.lsa_buffer_segments;
memory_allocated -= other.memory_allocated;
memory_freed -= other.memory_freed;
memory_compacted -= other.memory_compacted;
memory_evicted -= other.memory_evicted;
return *this;
}
};
private:
stats _stats{};
@@ -879,15 +908,106 @@ public:
size_t free_segments() const { return _free_segments; }
};
struct reclaim_timer {
using extra_logger = noncopyable_function<void(log_level)>;
private:
using clock = utils::coarse_steady_clock;
struct stats {
occupancy_stats region_occupancy;
segment_pool::stats pool_stats;
friend stats operator+(const stats& s1, const stats& s2) {
stats result(s1);
result += s2;
return result;
}
friend stats operator-(const stats& s1, const stats& s2) {
stats result(s1);
result -= s2;
return result;
}
stats& operator+=(const stats& other) {
region_occupancy += other.region_occupancy;
pool_stats += other.pool_stats;
return *this;
}
stats& operator-=(const stats& other) {
region_occupancy -= other.region_occupancy;
pool_stats -= other.pool_stats;
return *this;
}
};
static thread_local reclaim_timer* _active_timer;
static thread_local clock::duration _duration_threshold;
const char* _name;
const is_preemptible _preemptible;
const size_t _memory_to_release;
const size_t _segments_to_release;
const size_t _reserve_goal, _reserve_max;
tracker::impl* _tracker;
extra_logger _extra_logs;
const bool _debug_enabled;
bool _stall_detected = false;
size_t _memory_released = 0;
clock::time_point _start;
stats _start_stats, _end_stats, _stat_diff;
clock::duration _duration;
public:
inline reclaim_timer(const char* name, is_preemptible preemptible, size_t memory_to_release, size_t segments_to_release, tracker::impl* tracker, extra_logger extra_logs = [](log_level){});
inline reclaim_timer(const char* name, is_preemptible preemptible, size_t memory_to_release, size_t segments_to_release, extra_logger extra_logs = [](log_level){})
: reclaim_timer(name, preemptible, memory_to_release, segments_to_release, nullptr, std::move(extra_logs))
{}
inline ~reclaim_timer();
size_t set_memory_released(size_t memory_released) noexcept {
return this->_memory_released = memory_released;
}
private:
void sample_stats(stats& data);
template <typename T>
void log_if_changed(log_level level, const char* name, T before, T now) const noexcept {
if (now != before) {
timing_logger.log(level, "- {}: {:.3f} -> {:.3f}", name, before, now);
}
}
template <typename T>
void log_if_any(log_level level, const char* name, T value) const noexcept {
if (value != 0) {
timing_logger.log(level, "- {}: {}", name, value);
}
}
template <typename T>
void log_if_any_mem(log_level level, const char* name, T value) const noexcept {
if (value != 0) {
timing_logger.log(level, "- {}: {:.3f} MiB", name, (float)value / (1024*1024));
}
}
void report() const noexcept;
};
size_t segment_pool::reclaim_segments(size_t target, is_preemptible preempt) {
// Reclaimer tries to release segments occupying lower parts of the address
// space.
llogger.debug("Trying to reclaim {} segments", target);
// Reclamation. Migrate segments to higher addresses and shrink segment pool.
size_t reclaimed_segments = 0;
reclaim_timer timing_guard("reclaim_segments", preempt, target * segment::size, target, [&] (log_level level) {
timing_logger.log(level, "- reclaimed {} out of requested {} segments", reclaimed_segments, target);
});
// We may fail to reclaim because a region has reclaim disabled (usually because
// it is in an allocating_section. Failed reclaims can cause high CPU usage
// if all of the lower addresses happen to be in a reclaim-disabled region (this
@@ -928,6 +1048,7 @@ size_t segment_pool::reclaim_segments(size_t target, is_preemptible preempt) {
}
llogger.debug("Reclaimed {} segments (requested {})", reclaimed_segments, target);
timing_guard.set_memory_released(reclaimed_segments * segment::size);
return reclaimed_segments;
}
@@ -1135,6 +1256,86 @@ segment::occupancy() {
return { shard_segment_pool.descriptor(this).free_space(), segment::size };
}
thread_local reclaim_timer* reclaim_timer::_active_timer;
thread_local clock::duration reclaim_timer::_duration_threshold = clock::duration::zero();
reclaim_timer::reclaim_timer(const char* name, is_preemptible preemptible, size_t memory_to_release, size_t segments_to_release, tracker::impl* tracker, extra_logger extra_logs)
: _name(name)
, _preemptible(preemptible)
, _memory_to_release(memory_to_release)
, _segments_to_release(segments_to_release)
, _reserve_goal(shard_segment_pool.current_emergency_reserve_goal())
, _reserve_max(shard_segment_pool.emergency_reserve_max())
, _tracker(tracker)
, _extra_logs(std::move(extra_logs))
, _debug_enabled(timing_logger.is_enabled(logging::log_level::debug))
{
if (_active_timer) {
return;
}
_active_timer = this;
_start = clock::now();
sample_stats(_start_stats);
if (_duration_threshold == clock::duration::zero()) {
_duration_threshold = engine().get_blocked_reactor_notify_ms();
}
}
reclaim_timer::~reclaim_timer() {
if (_active_timer != this) {
return;
}
_duration = clock::now() - _start;
_stall_detected = _duration >= _duration_threshold;
if (_debug_enabled || _stall_detected) {
sample_stats(_end_stats);
_stat_diff = _end_stats - _start_stats;
report();
}
_active_timer = nullptr;
}
void reclaim_timer::sample_stats(stats& data) {
if (_debug_enabled && _tracker) {
data.region_occupancy = _tracker->region_occupancy();
}
data.pool_stats = shard_segment_pool.statistics();
}
void reclaim_timer::report() const noexcept {
auto time_level = _stall_detected ? log_level::warn : log_level::debug;
auto info_level = _stall_detected ? log_level::info : log_level::debug;
auto MiB = 1024*1024;
auto msg_extra = _stall_detected ? fmt::format(", at {}", current_backtrace()) : "";
timing_logger.log(time_level, "{} took {} us, trying to release {:.3f} MiB {}preemptibly, reserve: {{goal: {}, max: {}}}{}",
_name, (_duration + 500ns) / 1us, (float)_memory_to_release / MiB, _preemptible ? "" : "non-",
_reserve_goal, _reserve_max,
msg_extra);
log_if_any(info_level, "segments to release", _segments_to_release);
_extra_logs(info_level);
if (_memory_released > 0) {
auto bytes_per_second =
static_cast<float>(_memory_released) / std::chrono::duration_cast<std::chrono::duration<float>>(_duration).count();
timing_logger.log(info_level, "- reclamation rate = {} MiB/s", format("{:.3f}", bytes_per_second / MiB));
}
if (_debug_enabled && _tracker) {
log_if_changed(info_level, "occupancy of regions",
_start_stats.region_occupancy.used_fraction(), _end_stats.region_occupancy.used_fraction());
}
auto pool_diff = _stat_diff.pool_stats;
log_if_any_mem(info_level, "evicted memory", pool_diff.memory_evicted);
log_if_any(info_level, "compacted segments", pool_diff.segments_compacted);
log_if_any_mem(info_level, "compacted memory", pool_diff.memory_compacted);
log_if_any_mem(info_level, "allocated memory", pool_diff.memory_allocated);
}
//
// For interface documentation see logalloc::region and allocation_strategy.
//
@@ -2122,97 +2323,6 @@ static void reclaim_from_evictable(region::impl& r, size_t target_mem_in_use, is
}
}
class reclaim_timer {
using clock = utils::coarse_steady_clock;
const is_preemptible _preemptible;
const size_t _memory_to_release;
const size_t _reserve_segments;
tracker::impl& _tracker;
const bool _debug_enabled;
bool _stall_detected = false;
size_t _memory_released = 0;
clock::time_point _start;
clock::duration _duration;
occupancy_stats _old_region_occupancy;
segment_pool::stats _old_pool_stats;
public:
reclaim_timer(is_preemptible preemptible, size_t memory_to_release, size_t reserve_segments, tracker::impl& tracker)
: _preemptible(preemptible)
, _memory_to_release(memory_to_release)
, _reserve_segments(reserve_segments)
, _tracker(tracker)
, _debug_enabled(timing_logger.is_enabled(logging::log_level::debug))
{
_start = clock::now();
if (_debug_enabled) {
_old_region_occupancy = tracker.region_occupancy();
}
_old_pool_stats = shard_segment_pool.statistics();
}
size_t set_result(size_t memory_released) noexcept {
return this->_memory_released = memory_released;
}
~reclaim_timer() {
_duration = clock::now() - _start;
_stall_detected = _duration >= engine().get_blocked_reactor_notify_ms();
if (_debug_enabled || _stall_detected) {
report();
}
}
private:
template <typename T>
void log_if_changed(log_level level, const char* name, T before, T now) const noexcept {
if (now != before) {
timing_logger.log(level, "- {}: {:.3f} -> {:.3f}", name, before, now);
}
}
template <typename T>
void log_if_any(log_level level, const char* name, T value) const noexcept {
if (value != 0) {
timing_logger.log(level, "- {}: {}", name, value);
}
}
template <typename T>
void log_if_any_mem(log_level level, const char* name, T value) const noexcept {
if (value != 0) {
timing_logger.log(level, "- {}: {:.3f} MiB", name, (float)value / (1024*1024));
}
}
void report() const noexcept {
auto time_level = _stall_detected ? log_level::warn : log_level::debug;
auto info_level = _stall_detected ? log_level::info : log_level::debug;
auto MiB = 1024*1024;
timing_logger.log(time_level, "Reclamation cycle took {} us, trying to release {:.3f} MiB {}preemptibly",
_duration / 1us, (float)_memory_to_release / MiB, _preemptible ? "" : "non-");
log_if_any(info_level, "reserved segments", _reserve_segments);
if (_memory_released > 0) {
auto bytes_per_second =
static_cast<float>(_memory_released) / std::chrono::duration_cast<std::chrono::duration<float>>(_duration).count();
timing_logger.log(info_level, "- reclamation rate = {} MiB/s", format("{:.3f}", bytes_per_second / MiB));
}
if (_debug_enabled) {
log_if_changed(info_level, "occupancy of regions",
_old_region_occupancy.used_fraction(), _tracker.region_occupancy().used_fraction());
}
auto pool_stats = shard_segment_pool.statistics();
log_if_any_mem(info_level, "evicted memory", pool_stats.memory_evicted - _old_pool_stats.memory_evicted);
log_if_any(info_level, "compacted segments", pool_stats.segments_compacted - _old_pool_stats.segments_compacted);
log_if_any_mem(info_level, "compacted memory", pool_stats.memory_compacted - _old_pool_stats.memory_compacted);
log_if_any_mem(info_level, "allocated memory", pool_stats.memory_allocated - _old_pool_stats.memory_allocated);
}
};
idle_cpu_handler_result tracker::impl::compact_on_idle(work_waiting_on_reactor check_for_work) {
if (!_reclaiming_enabled) {
return idle_cpu_handler_result::no_more_work;
@@ -2252,8 +2362,8 @@ size_t tracker::impl::reclaim(size_t memory_to_release, is_preemptible preempt)
return 0;
}
reclaiming_lock rl(*this);
reclaim_timer timing_guard(preempt, memory_to_release, 0, *this);
return timing_guard.set_result(reclaim_locked(memory_to_release, preempt));
reclaim_timer timing_guard("reclaim", preempt, memory_to_release, 0, this);
return timing_guard.set_memory_released(reclaim_locked(memory_to_release, preempt));
}
size_t tracker::impl::reclaim_locked(size_t memory_to_release, is_preemptible preempt) {
@@ -2295,8 +2405,7 @@ size_t tracker::impl::compact_and_evict(size_t reserve_segments, size_t memory_t
return 0;
}
reclaiming_lock rl(*this);
reclaim_timer timing_guard(preempt, memory_to_release, reserve_segments, *this);
return timing_guard.set_result(compact_and_evict_locked(reserve_segments, memory_to_release, preempt));
return compact_and_evict_locked(reserve_segments, memory_to_release, preempt);
}
size_t tracker::impl::compact_and_evict_locked(size_t reserve_segments, size_t memory_to_release, is_preemptible preempt) {
@@ -2348,42 +2457,57 @@ size_t tracker::impl::compact_and_evict_locked(size_t reserve_segments, size_t m
}
}
while (shard_segment_pool.total_memory_in_use() > target_mem) {
boost::range::pop_heap(_regions, cmp);
region::impl* r = _regions.back();
{
int regions = 0, evictable_regions = 0;
reclaim_timer timing_guard("compact", preempt, memory_to_release, reserve_segments, this, [&] (log_level level) {
timing_logger.log(level, "- processed {} regions: reclaimed from {}, compacted {}",
regions, evictable_regions, regions - evictable_regions);
});
while (shard_segment_pool.total_memory_in_use() > target_mem) {
boost::range::pop_heap(_regions, cmp);
region::impl* r = _regions.back();
if (!r->is_compactible()) {
llogger.trace("Unable to release segments, no compactible pools.");
break;
}
if (!r->is_compactible()) {
llogger.trace("Unable to release segments, no compactible pools.");
break;
}
++regions;
// Prefer evicting if average occupancy ratio is above the compaction threshold to avoid
// overhead of compaction in workloads where allocation order matches eviction order, where
// we can reclaim memory by eviction only. In some cases the cost of compaction on allocation
// would be higher than the cost of repopulating the region with evicted items.
if (r->is_evictable() && r->occupancy().used_space() >= max_used_space_ratio_for_compaction * r->occupancy().total_space()) {
reclaim_from_evictable(*r, target_mem, preempt);
} else {
r->compact();
}
// Prefer evicting if average occupancy ratio is above the compaction threshold to avoid
// overhead of compaction in workloads where allocation order matches eviction order, where
// we can reclaim memory by eviction only. In some cases the cost of compaction on allocation
// would be higher than the cost of repopulating the region with evicted items.
if (r->is_evictable() && r->occupancy().used_space() >= max_used_space_ratio_for_compaction * r->occupancy().total_space()) {
reclaim_from_evictable(*r, target_mem, preempt);
++evictable_regions;
} else {
r->compact();
}
boost::range::push_heap(_regions, cmp);
boost::range::push_heap(_regions, cmp);
if (preempt && need_preempt()) {
break;
if (preempt && need_preempt()) {
break;
}
}
}
auto released_during_compaction = mem_in_use - shard_segment_pool.total_memory_in_use();
if (shard_segment_pool.total_memory_in_use() > target_mem) {
int regions = 0, evictable_regions = 0;
reclaim_timer timing_guard("evict", preempt, memory_to_release, reserve_segments, this, [&] (log_level level) {
timing_logger.log(level, "- processed {} regions, reclaimed from {}", regions, evictable_regions);
});
llogger.debug("Considering evictable regions.");
// FIXME: Fair eviction
for (region::impl* r : _regions) {
if (preempt && need_preempt()) {
break;
}
++regions;
if (r->is_evictable()) {
++evictable_regions;
reclaim_from_evictable(*r, target_mem, preempt);
if (shard_segment_pool.total_memory_in_use() <= target_mem) {
break;