mirror of
https://github.com/versity/scoutfs.git
synced 2026-06-01 17:26:20 +00:00
Fix client/server race btwn lock recov and farewell
Tear down client lock server state and set a boolean so that there is no race between client/server processing lock recovery at the same time as farewell. Currently there is a bug where if server and clients are unmounted then work from the client is processed out of order, which leaves behind a server_lock for a RID that no longer exists. In order to fix this we need to serialize SCOUTFS_NET_CMD_FAREWELL in recv_worker. Signed-off-by: Bryant G. Duffy-Ly <bduffyly@versity.com>
This commit is contained in:
@@ -675,18 +675,28 @@ static void scoutfs_net_recv_worker(struct work_struct *work)
|
||||
|
||||
scoutfs_tseq_add(&ninf->msg_tseq_tree, &mrecv->tseq_entry);
|
||||
|
||||
/*
|
||||
* We want to drain the proc_workq in order to ensure that
|
||||
* that the inflight lock recovery work is fully flushed out
|
||||
* so that we can prevent the client/server racing trying to
|
||||
* do lock recovery and processing farewell at the same time.
|
||||
*/
|
||||
if (nh.cmd == SCOUTFS_NET_CMD_FAREWELL && conn->listening_conn)
|
||||
drain_workqueue(conn->proc_workq);
|
||||
|
||||
/*
|
||||
* Initial received greetings are processed
|
||||
* Initial received greetings and farewell are processed
|
||||
* synchronously before any other incoming messages.
|
||||
*
|
||||
* Incoming requests or responses to the lock client are
|
||||
* called synchronously to avoid reordering.
|
||||
*/
|
||||
if (nh.cmd == SCOUTFS_NET_CMD_GREETING ||
|
||||
(nh.cmd == SCOUTFS_NET_CMD_FAREWELL && conn->listening_conn) ||
|
||||
(nh.cmd == SCOUTFS_NET_CMD_LOCK && !conn->listening_conn))
|
||||
scoutfs_net_proc_worker(&mrecv->proc_work);
|
||||
else
|
||||
queue_work(conn->workq, &mrecv->proc_work);
|
||||
queue_work(conn->proc_workq, &mrecv->proc_work);
|
||||
}
|
||||
|
||||
if (ret)
|
||||
@@ -851,6 +861,7 @@ static void scoutfs_net_destroy_worker(struct work_struct *work)
|
||||
}
|
||||
|
||||
destroy_workqueue(conn->workq);
|
||||
destroy_workqueue(conn->proc_workq);
|
||||
scoutfs_tseq_del(&ninf->conn_tseq_tree, &conn->tseq_entry);
|
||||
kfree(conn->info);
|
||||
trace_scoutfs_conn_destroy_free(conn);
|
||||
@@ -1150,6 +1161,8 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work)
|
||||
/* wait for socket and proc work to finish, includes chained work */
|
||||
drain_workqueue(conn->workq);
|
||||
|
||||
drain_workqueue(conn->proc_workq);
|
||||
|
||||
/* tear down the sock now that all work is done */
|
||||
if (conn->sock) {
|
||||
sock_release(conn->sock);
|
||||
@@ -1347,7 +1360,7 @@ scoutfs_net_alloc_conn(struct super_block *sb,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
conn->workq = alloc_workqueue("scoutfs_net_%s",
|
||||
conn->workq = alloc_workqueue("scoutfs_net_workq_%s",
|
||||
WQ_UNBOUND | WQ_NON_REENTRANT, 0,
|
||||
name_suffix);
|
||||
if (!conn->workq) {
|
||||
@@ -1356,6 +1369,16 @@ scoutfs_net_alloc_conn(struct super_block *sb,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
conn->proc_workq = alloc_workqueue("scoutfs_net_proc_workq_%s",
|
||||
WQ_UNBOUND | WQ_NON_REENTRANT, 0,
|
||||
name_suffix);
|
||||
if (!conn->proc_workq) {
|
||||
destroy_workqueue(conn->workq);
|
||||
kfree(conn->info);
|
||||
kfree(conn);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
conn->sb = sb;
|
||||
conn->notify_up = notify_up;
|
||||
conn->notify_down = notify_down;
|
||||
@@ -1388,6 +1411,14 @@ scoutfs_net_alloc_conn(struct super_block *sb,
|
||||
return conn;
|
||||
}
|
||||
|
||||
/* Give the caller the client info of the connection. This is used by
|
||||
* server processing the server_farewell, and lock response/recovery.
|
||||
*/
|
||||
void *scoutfs_net_client_info(struct scoutfs_net_connection *conn)
|
||||
{
|
||||
return conn->info;
|
||||
}
|
||||
|
||||
/*
|
||||
* Give the caller the client rid of the connection. This used by rare
|
||||
* server processing callers who want to send async responses after
|
||||
|
||||
@@ -63,6 +63,7 @@ struct scoutfs_net_connection {
|
||||
atomic64_t recv_seq;
|
||||
|
||||
struct workqueue_struct *workq;
|
||||
struct workqueue_struct *proc_workq;
|
||||
struct work_struct listen_work;
|
||||
struct work_struct connect_work;
|
||||
struct work_struct send_work;
|
||||
@@ -115,6 +116,7 @@ scoutfs_net_alloc_conn(struct super_block *sb,
|
||||
scoutfs_net_notify_t notify_up,
|
||||
scoutfs_net_notify_t notify_down, size_t info_size,
|
||||
scoutfs_net_request_t *req_funcs, char *name_suffix);
|
||||
void *scoutfs_net_client_info(struct scoutfs_net_connection *conn);
|
||||
u64 scoutfs_net_client_rid(struct scoutfs_net_connection *conn);
|
||||
int scoutfs_net_connect(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
|
||||
@@ -122,6 +122,7 @@ struct server_info {
|
||||
struct server_client_info {
|
||||
u64 rid;
|
||||
struct list_head head;
|
||||
bool received_farewell;
|
||||
};
|
||||
|
||||
static __le64 *first_valopt(struct scoutfs_volume_options *valopt)
|
||||
@@ -1505,11 +1506,15 @@ static int server_lock(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct server_client_info *sci = scoutfs_net_client_info(conn);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
|
||||
if (arg_len != sizeof(struct scoutfs_net_lock))
|
||||
return -EINVAL;
|
||||
|
||||
if (sci->received_farewell)
|
||||
return scoutfs_net_response(sb, conn, cmd, id, -EINVAL, NULL, 0);
|
||||
|
||||
return scoutfs_lock_server_request(sb, rid, id, arg);
|
||||
}
|
||||
|
||||
@@ -1518,11 +1523,15 @@ static int lock_response(struct super_block *sb,
|
||||
void *resp, unsigned int resp_len,
|
||||
int error, void *data)
|
||||
{
|
||||
struct server_client_info *sci = scoutfs_net_client_info(conn);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
|
||||
if (resp_len != sizeof(struct scoutfs_net_lock))
|
||||
return -EINVAL;
|
||||
|
||||
if (sci->received_farewell)
|
||||
return 0;
|
||||
|
||||
return scoutfs_lock_server_response(sb, rid, resp);
|
||||
}
|
||||
|
||||
@@ -1560,11 +1569,15 @@ static int lock_recover_response(struct super_block *sb,
|
||||
void *resp, unsigned int resp_len,
|
||||
int error, void *data)
|
||||
{
|
||||
struct server_client_info *sci = scoutfs_net_client_info(conn);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
|
||||
if (invalid_recover(resp, resp_len))
|
||||
return -EINVAL;
|
||||
|
||||
if (sci->received_farewell)
|
||||
return 0;
|
||||
|
||||
return scoutfs_lock_server_recover_response(sb, rid, resp);
|
||||
}
|
||||
|
||||
@@ -3516,9 +3529,11 @@ static int server_farewell(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct server_client_info *sci = scoutfs_net_client_info(conn);
|
||||
struct server_info *server = SCOUTFS_SB(sb)->server_info;
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct farewell_request *fw;
|
||||
int ret;
|
||||
|
||||
if (arg_len != 0)
|
||||
return -EINVAL;
|
||||
@@ -3536,6 +3551,20 @@ static int server_farewell(struct super_block *sb,
|
||||
list_add_tail(&fw->entry, &server->farewell_requests);
|
||||
spin_unlock(&server->farewell_lock);
|
||||
|
||||
/*
|
||||
* Tear down client lock server state and set that we recieved farewell
|
||||
* to ensure that we do not race between client and server trying to process
|
||||
* lock recovery at the same time (race). We also want to mark that the recovery
|
||||
* finished so that if client's try to send stuff later; the server doesnt care.
|
||||
*/
|
||||
sci->received_farewell = true;
|
||||
ret = scoutfs_lock_server_farewell(sb, rid);
|
||||
if (ret < 0) {
|
||||
kfree(fw);
|
||||
return ret;
|
||||
}
|
||||
scoutfs_server_recov_finish(sb, rid, SCOUTFS_RECOV_LOCKS);
|
||||
|
||||
queue_farewell_work(server);
|
||||
|
||||
/* response will be sent later */
|
||||
|
||||
Reference in New Issue
Block a user