Track and log quorum heartbeat delays

Add tracking and reporting of delays in sending or receiving quorum
heartbeat messages.  We measure the time between back to back sends or
receives of heartbeat messages.  We record these delays truncated down
to second granularity in the quorum sysfs status file.  We log messages
to the console for each longest measured delay up to the maximum
configurable heartbeat timeout.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2023-05-16 15:48:21 -07:00
parent 46640e4ff9
commit 7b65767803

View File

@@ -100,6 +100,11 @@ struct last_msg {
ktime_t ts;
};
struct count_recent {
u64 count;
ktime_t recent;
};
enum quorum_role { FOLLOWER, CANDIDATE, LEADER };
struct quorum_status {
@@ -112,6 +117,8 @@ struct quorum_status {
ktime_t timeout;
};
#define HB_DELAY_NR (SCOUTFS_QUORUM_MAX_HB_TIMEO_MS / MSEC_PER_SEC)
struct quorum_info {
struct super_block *sb;
struct scoutfs_quorum_config qconf;
@@ -127,6 +134,8 @@ struct quorum_info {
struct quorum_status show_status;
struct last_msg last_send[SCOUTFS_QUORUM_MAX_SLOTS];
struct last_msg last_recv[SCOUTFS_QUORUM_MAX_SLOTS];
struct count_recent *hb_delay;
unsigned long max_hb_delay;
struct scoutfs_sysfs_attrs ssa;
};
@@ -608,6 +617,71 @@ out:
return ret;
}
static void clear_hb_delay(struct quorum_info *qinf)
{
int i;
spin_lock(&qinf->show_lock);
qinf->max_hb_delay = 0;
for (i = 0; i < HB_DELAY_NR; i++) {
qinf->hb_delay[i].recent = ns_to_ktime(0);
qinf->hb_delay[i].count = 0;
}
spin_unlock(&qinf->show_lock);
}
struct hb_recording {
ktime_t prev;
int count;
};
/*
* Record long heartbeat delays. We only record the delay between back
* to back send attempts in the leader or back to back recv messages in
* the followers. The worker caller sets record_hb when their iteration
* sent or received a heartbeat. An iteration that does anything else
* resets the tracking.
*/
static void record_hb_delay(struct super_block *sb, struct quorum_info *qinf,
struct hb_recording *hbr, bool record_hb, int role)
{
bool log = false;
ktime_t now;
s64 s;
if (!record_hb) {
hbr->count = 0;
return;
}
now = ktime_get();
if (hbr->count < 2 && ++hbr->count < 2) {
hbr->prev = now;
return;
}
s = ktime_ms_delta(now, hbr->prev) / MSEC_PER_SEC;
hbr->prev = now;
if (s <= 0 || s >= HB_DELAY_NR)
return;
spin_lock(&qinf->show_lock);
if (qinf->max_hb_delay < s) {
qinf->max_hb_delay = s;
if (s >= 3)
log = true;
}
qinf->hb_delay[s].recent = now;
qinf->hb_delay[s].count++;
spin_unlock(&qinf->show_lock);
if (log)
scoutfs_info(sb, "longest quorum heartbeat %s delay of %lld sec",
role == LEADER ? "send" : "recv", s);
}
/*
* The main quorum task maintains its private status. It seemed cleaner
* to occasionally copy the status for showing in sysfs/debugfs files
@@ -637,6 +711,8 @@ static void scoutfs_quorum_worker(struct work_struct *work)
struct sockaddr_in unused;
struct quorum_host_msg msg;
struct quorum_status qst = {0,};
struct hb_recording hbr = {{0,},};
bool record_hb;
int ret;
int err;
@@ -679,6 +755,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
}
scoutfs_options_read(sb, &opts);
record_hb = false;
/* ignore messages from older terms */
if (msg.type != SCOUTFS_QUORUM_MSG_INVALID &&
@@ -695,6 +772,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
if (qst.role == LEADER) {
scoutfs_warn(sb, "saw msg type %u from %u for term %llu while leader in term %llu, shutting down server.",
msg.type, msg.from, msg.term, qst.term);
clear_hb_delay(qinf);
}
qst.role = FOLLOWER;
qst.term = msg.term;
@@ -717,6 +795,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
if (msg.type == SCOUTFS_QUORUM_MSG_HEARTBEAT) {
qst.timeout = heartbeat_timeout(&opts);
scoutfs_inc_counter(sb, quorum_recv_heartbeat);
record_hb = true;
}
/* receiving a resignation from server starts election */
@@ -779,6 +858,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
qst.timeout = heartbeat_interval();
update_show_status(qinf, &qst);
clear_hb_delay(qinf);
/* record that we've been elected before starting up server */
ret = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_ELECT, qst.term, true);
@@ -833,6 +913,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION,
qst.server_start_term);
scoutfs_inc_counter(sb, quorum_send_resignation);
clear_hb_delay(qinf);
}
ret = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP,
@@ -854,6 +935,8 @@ static void scoutfs_quorum_worker(struct work_struct *work)
qst.timeout = heartbeat_interval();
scoutfs_inc_counter(sb, quorum_send_heartbeat);
record_hb = true;
}
/* followers vote once per term */
@@ -865,6 +948,8 @@ static void scoutfs_quorum_worker(struct work_struct *work)
msg.from);
scoutfs_inc_counter(sb, quorum_send_vote);
}
record_hb_delay(sb, qinf, &hbr, record_hb, qst.role);
}
update_show_status(qinf, &qst);
@@ -1001,9 +1086,11 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
{
DECLARE_QUORUM_INFO_KOBJ(kobj, qinf);
struct quorum_status qst;
struct count_recent cr;
struct last_msg last;
struct timespec64 ts;
const ktime_t now = ktime_get();
unsigned long ul;
size_t size;
int ret;
int i;
@@ -1061,6 +1148,26 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
(s64)ts.tv_sec, (int)ts.tv_nsec);
}
spin_lock(&qinf->show_lock);
ul = qinf->max_hb_delay;
spin_unlock(&qinf->show_lock);
if (ul)
snprintf_ret(buf, size, &ret, "HB Delay(s) Count Secs Since\n");
for (i = 1; i <= ul && i < HB_DELAY_NR; i++) {
spin_lock(&qinf->show_lock);
cr = qinf->hb_delay[i];
spin_unlock(&qinf->show_lock);
if (cr.count == 0)
continue;
ts = ktime_to_timespec64(ktime_sub(now, cr.recent));
snprintf_ret(buf, size, &ret,
"%11u %9llu %lld.%09u\n",
i, cr.count, (s64)ts.tv_sec, (int)ts.tv_nsec);
}
return ret;
}
SCOUTFS_ATTR_RO(status);
@@ -1198,7 +1305,12 @@ int scoutfs_quorum_setup(struct super_block *sb)
qinf = kzalloc(sizeof(struct quorum_info), GFP_KERNEL);
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_KERNEL);
if (!qinf || !super) {
if (qinf)
qinf->hb_delay = __vmalloc(HB_DELAY_NR * sizeof(struct count_recent),
GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
if (!qinf || !super || !qinf->hb_delay) {
if (qinf)
vfree(qinf->hb_delay);
kfree(qinf);
ret = -ENOMEM;
goto out;
@@ -1277,6 +1389,7 @@ void scoutfs_quorum_destroy(struct super_block *sb)
if (qinf->sock)
sock_release(qinf->sock);
vfree(qinf->hb_delay);
kfree(qinf);
sbi->quorum_info = NULL;
}