mirror of
https://github.com/versity/scoutfs.git
synced 2025-12-23 05:25:18 +00:00
Merge pull request #239 from versity/auke/keepalive
Add tcp_keepalive_timeout_ms option, change default to 60s
This commit is contained in:
@@ -32,6 +32,7 @@
|
||||
#include "endian_swap.h"
|
||||
#include "tseq.h"
|
||||
#include "fence.h"
|
||||
#include "options.h"
|
||||
|
||||
/*
|
||||
* scoutfs networking delivers requests and responses between nodes.
|
||||
@@ -998,7 +999,7 @@ static void destroy_conn(struct scoutfs_net_connection *conn)
|
||||
* The TCP_KEEP* and TCP_USER_TIMEOUT option interaction is subtle.
|
||||
* TCP_USER_TIMEOUT only applies if there is unacked written data in the
|
||||
* send queue. It doesn't work if the connection is idle. Adding
|
||||
* keepalice probes with user_timeout set changes how the keepalive
|
||||
* keepalive probes with user_timeout set changes how the keepalive
|
||||
* timeout is calculated. CNT no longer matters. Each time
|
||||
* additional probes (not the first) are sent the user timeout is
|
||||
* checked against the last time data was received. If none of the
|
||||
@@ -1010,14 +1011,16 @@ static void destroy_conn(struct scoutfs_net_connection *conn)
|
||||
* elapses during the probe timer processing after the unsuccessful
|
||||
* probes.
|
||||
*/
|
||||
#define UNRESPONSIVE_TIMEOUT_SECS 10
|
||||
#define UNRESPONSIVE_PROBES 3
|
||||
static int sock_opts_and_names(struct scoutfs_net_connection *conn,
|
||||
static int sock_opts_and_names(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
struct socket *sock)
|
||||
{
|
||||
struct scoutfs_mount_options opts;
|
||||
int optval;
|
||||
int ret;
|
||||
|
||||
scoutfs_options_read(sb, &opts);
|
||||
|
||||
/* we use a keepalive timeout instead of send timeout */
|
||||
ret = kc_sock_set_sndtimeo(sock, 0);
|
||||
if (ret)
|
||||
@@ -1030,8 +1033,7 @@ static int sock_opts_and_names(struct scoutfs_net_connection *conn,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
BUILD_BUG_ON(UNRESPONSIVE_PROBES >= UNRESPONSIVE_TIMEOUT_SECS);
|
||||
optval = UNRESPONSIVE_TIMEOUT_SECS - (UNRESPONSIVE_PROBES);
|
||||
optval = (opts.tcp_keepalive_timeout_ms / MSEC_PER_SEC) - UNRESPONSIVE_PROBES;
|
||||
ret = kc_tcp_sock_set_keepidle(sock, optval);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -1041,7 +1043,7 @@ static int sock_opts_and_names(struct scoutfs_net_connection *conn,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
optval = UNRESPONSIVE_TIMEOUT_SECS * MSEC_PER_SEC;
|
||||
optval = opts.tcp_keepalive_timeout_ms;
|
||||
ret = kc_tcp_sock_set_user_timeout(sock, optval);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -1115,7 +1117,7 @@ static void scoutfs_net_listen_worker(struct work_struct *work)
|
||||
continue;
|
||||
}
|
||||
|
||||
ret = sock_opts_and_names(acc_conn, acc_sock);
|
||||
ret = sock_opts_and_names(sb, acc_conn, acc_sock);
|
||||
if (ret) {
|
||||
sock_release(acc_sock);
|
||||
destroy_conn(acc_conn);
|
||||
@@ -1186,7 +1188,7 @@ static void scoutfs_net_connect_worker(struct work_struct *work)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = sock_opts_and_names(conn, sock);
|
||||
ret = sock_opts_and_names(sb, conn, sock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ enum {
|
||||
Opt_orphan_scan_delay_ms,
|
||||
Opt_quorum_heartbeat_timeout_ms,
|
||||
Opt_quorum_slot_nr,
|
||||
Opt_tcp_keepalive_timeout_ms,
|
||||
Opt_err,
|
||||
};
|
||||
|
||||
@@ -52,6 +53,7 @@ static const match_table_t tokens = {
|
||||
{Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"},
|
||||
{Opt_quorum_heartbeat_timeout_ms, "quorum_heartbeat_timeout_ms=%s"},
|
||||
{Opt_quorum_slot_nr, "quorum_slot_nr=%s"},
|
||||
{Opt_tcp_keepalive_timeout_ms, "tcp_keepalive_timeout_ms=%s"},
|
||||
{Opt_err, NULL}
|
||||
};
|
||||
|
||||
@@ -126,6 +128,8 @@ static void free_options(struct scoutfs_mount_options *opts)
|
||||
#define MIN_DATA_PREALLOC_BLOCKS 1ULL
|
||||
#define MAX_DATA_PREALLOC_BLOCKS ((unsigned long long)SCOUTFS_BLOCK_SM_MAX)
|
||||
|
||||
#define DEFAULT_TCP_KEEPALIVE_TIMEOUT_MS (60 * MSEC_PER_SEC)
|
||||
|
||||
static void init_default_options(struct scoutfs_mount_options *opts)
|
||||
{
|
||||
memset(opts, 0, sizeof(*opts));
|
||||
@@ -136,6 +140,7 @@ static void init_default_options(struct scoutfs_mount_options *opts)
|
||||
opts->orphan_scan_delay_ms = -1;
|
||||
opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
|
||||
opts->quorum_slot_nr = -1;
|
||||
opts->tcp_keepalive_timeout_ms = DEFAULT_TCP_KEEPALIVE_TIMEOUT_MS;
|
||||
}
|
||||
|
||||
static int verify_log_merge_wait_timeout_ms(struct super_block *sb, int ret, int val)
|
||||
@@ -168,6 +173,21 @@ static int verify_quorum_heartbeat_timeout_ms(struct super_block *sb, int ret, u
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_tcp_keepalive_timeout_ms(struct super_block *sb, int ret, int val)
|
||||
{
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "failed to parse tcp_keepalive_timeout_ms value");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (val <= (UNRESPONSIVE_PROBES * MSEC_PER_SEC)) {
|
||||
scoutfs_err(sb, "invalid tcp_keepalive_timeout_ms value %d, must be larger than %lu",
|
||||
val, (UNRESPONSIVE_PROBES * MSEC_PER_SEC));
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse the option string into our options struct. This can allocate
|
||||
* memory in the struct. The caller is responsible for always calling
|
||||
@@ -218,6 +238,14 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
|
||||
opts->data_prealloc_contig_only = nr;
|
||||
break;
|
||||
|
||||
case Opt_tcp_keepalive_timeout_ms:
|
||||
ret = match_int(args, &nr);
|
||||
ret = verify_tcp_keepalive_timeout_ms(sb, ret, nr);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
opts->tcp_keepalive_timeout_ms = nr;
|
||||
break;
|
||||
|
||||
case Opt_log_merge_wait_timeout_ms:
|
||||
ret = match_int(args, &nr);
|
||||
ret = verify_log_merge_wait_timeout_ms(sb, ret, nr);
|
||||
@@ -371,6 +399,7 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
|
||||
seq_printf(seq, ",orphan_scan_delay_ms=%u", opts.orphan_scan_delay_ms);
|
||||
if (opts.quorum_slot_nr >= 0)
|
||||
seq_printf(seq, ",quorum_slot_nr=%d", opts.quorum_slot_nr);
|
||||
seq_printf(seq, ",tcp_keepalive_timeout_ms=%d", opts.tcp_keepalive_timeout_ms);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -13,8 +13,11 @@ struct scoutfs_mount_options {
|
||||
unsigned int orphan_scan_delay_ms;
|
||||
int quorum_slot_nr;
|
||||
u64 quorum_heartbeat_timeout_ms;
|
||||
int tcp_keepalive_timeout_ms;
|
||||
};
|
||||
|
||||
#define UNRESPONSIVE_PROBES 3
|
||||
|
||||
void scoutfs_options_read(struct super_block *sb, struct scoutfs_mount_options *opts);
|
||||
int scoutfs_options_show(struct seq_file *seq, struct dentry *root);
|
||||
|
||||
|
||||
@@ -130,6 +130,23 @@ the server for the filesystem if it is elected leader.
|
||||
The assigned number must match one of the slots defined with \-Q options
|
||||
when the filesystem was created with mkfs. If the number assigned
|
||||
doesn't match a number created during mkfs then the mount will fail.
|
||||
.TP
|
||||
.B tcp_keepalive_timeout_ms=<number>
|
||||
This option sets the amount of time, in milliseconds, that a client
|
||||
connection will wait for active TCP packets, before deciding that
|
||||
the connection is dead. This setting is per-mount and only changes
|
||||
the behavior of that mount.
|
||||
.sp
|
||||
The default value of this setting is 60000msec (60s). Any precision
|
||||
beyond a whole second is likely unrealistic due to the nature of
|
||||
TCP keepalive mechanisms in the Linux kernel. Valid values are any
|
||||
value higher than 3000 (3s).
|
||||
.sp
|
||||
The TCP keepalive mechanism is complex and observing a lost connection
|
||||
quickly is important to maintain cluster stability. If the local
|
||||
network suffers from intermittent outages this option may provide
|
||||
some respite to overcome these outages without the cluster becoming
|
||||
desynchronized.
|
||||
.SH VOLUME OPTIONS
|
||||
Volume options are persistent options which are stored in the super
|
||||
block in the metadata device and which apply to all mounts of the volume.
|
||||
|
||||
Reference in New Issue
Block a user