reclaim_timer uses a coarse clock, but does not account for the measurement error introduced by that -- it can falsely report reclaims as stalls, even if they are shorter by a full coarse clock tick from the requested threshold (blocked-reactor-notify-ms). Notably, if the stall threshold happens to be smaller or equal to coarse clock resolution, Scylla's log gets spammed with false stall reports. The resolution of coarse clocks in Linux is 1/CONFIG_HZ. This is typically equal to 1 ms or 4 ms, and stall thresholds of this order can occur in practice. Eliminate false positives by requiring the measured reclaim duration to be at least 1 clock tick longer than the configured threshold for it to be considered a stall. Fixes #10981 Closes #11680
44 lines
1.1 KiB
C++
44 lines
1.1 KiB
C++
/*
|
|
* Copyright (C) 2021-present ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* SPDX-License-Identifier: AGPL-3.0-or-later
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
// A coarser and faster version of std::steady_clock, using
|
|
// CLOCK_MONOTONIC_COARSE instead of CLOCK_MONOTONIC.
|
|
//
|
|
// Intended for measuring time taken by synchronous code paths (where
|
|
// seastar::lowres_clock is not suitable).
|
|
|
|
#include <chrono>
|
|
#include <ctime>
|
|
|
|
namespace utils {
|
|
|
|
struct coarse_steady_clock {
|
|
using duration = std::chrono::nanoseconds;
|
|
using rep = duration::rep;
|
|
using period = duration::period;
|
|
using time_point = std::chrono::time_point<coarse_steady_clock, duration>;
|
|
|
|
static constexpr bool is_steady = true;
|
|
|
|
static time_point now() noexcept {
|
|
timespec tp;
|
|
clock_gettime(CLOCK_MONOTONIC_COARSE, &tp);
|
|
return time_point(std::chrono::seconds(tp.tv_sec) + std::chrono::nanoseconds(tp.tv_nsec));
|
|
};
|
|
|
|
static duration get_resolution() noexcept {
|
|
timespec tp;
|
|
clock_getres(CLOCK_MONOTONIC_COARSE, &tp);
|
|
return std::chrono::seconds(tp.tv_sec) + std::chrono::nanoseconds(tp.tv_nsec);
|
|
}
|
|
};
|
|
|
|
};
|