diff --git a/kmod/src/lock_server.c b/kmod/src/lock_server.c index 2de5e9e7..9623f866 100644 --- a/kmod/src/lock_server.c +++ b/kmod/src/lock_server.c @@ -153,30 +153,30 @@ enum { */ static void add_client_entry(struct server_lock_node *snode, struct list_head *list, - struct client_lock_entry *clent) + struct client_lock_entry *c_ent) { WARN_ON_ONCE(!mutex_is_locked(&snode->mutex)); - if (list_empty(&clent->head)) - list_add_tail(&clent->head, list); + if (list_empty(&c_ent->head)) + list_add_tail(&c_ent->head, list); else - list_move_tail(&clent->head, list); + list_move_tail(&c_ent->head, list); - clent->on_list = list == &snode->granted ? OL_GRANTED : + c_ent->on_list = list == &snode->granted ? OL_GRANTED : list == &snode->requested ? OL_REQUESTED : OL_INVALIDATED; } static void free_client_entry(struct lock_server_info *inf, struct server_lock_node *snode, - struct client_lock_entry *clent) + struct client_lock_entry *c_ent) { WARN_ON_ONCE(!mutex_is_locked(&snode->mutex)); - if (!list_empty(&clent->head)) - list_del_init(&clent->head); - scoutfs_tseq_del(&inf->tseq_tree, &clent->tseq_entry); - kfree(clent); + if (!list_empty(&c_ent->head)) + list_del_init(&c_ent->head); + scoutfs_tseq_del(&inf->tseq_tree, &c_ent->tseq_entry); + kfree(c_ent); } static bool invalid_mode(u8 mode) @@ -339,13 +339,13 @@ static struct client_lock_entry *find_entry(struct server_lock_node *snode, struct list_head *list, u64 rid) { - struct client_lock_entry *clent; + struct client_lock_entry *c_ent; WARN_ON_ONCE(!mutex_is_locked(&snode->mutex)); - list_for_each_entry(clent, list, head) { - if (clent->rid == rid) - return clent; + list_for_each_entry(c_ent, list, head) { + if (c_ent->rid == rid) + return c_ent; } return NULL; @@ -364,7 +364,7 @@ int scoutfs_lock_server_request(struct super_block *sb, u64 rid, u64 net_id, struct scoutfs_net_lock *nl) { DECLARE_LOCK_SERVER_INFO(sb, inf); - struct client_lock_entry *clent; + struct client_lock_entry *c_ent; struct server_lock_node *snode; int ret; @@ -376,29 +376,29 @@ int scoutfs_lock_server_request(struct super_block *sb, u64 rid, goto out; } - clent = kzalloc(sizeof(struct client_lock_entry), GFP_NOFS); - if (!clent) { + c_ent = kzalloc(sizeof(struct client_lock_entry), GFP_NOFS); + if (!c_ent) { ret = -ENOMEM; goto out; } - INIT_LIST_HEAD(&clent->head); - clent->rid = rid; - clent->net_id = net_id; - clent->mode = nl->new_mode; + INIT_LIST_HEAD(&c_ent->head); + c_ent->rid = rid; + c_ent->net_id = net_id; + c_ent->mode = nl->new_mode; snode = alloc_server_lock(inf, &nl->key); if (snode == NULL) { - kfree(clent); + kfree(c_ent); ret = -ENOMEM; goto out; } snode->stats[SLT_REQUEST]++; - clent->snode = snode; - add_client_entry(snode, &snode->requested, clent); - scoutfs_tseq_add(&inf->tseq_tree, &clent->tseq_entry); + c_ent->snode = snode; + add_client_entry(snode, &snode->requested, c_ent); + scoutfs_tseq_add(&inf->tseq_tree, &c_ent->tseq_entry); ret = process_waiting_requests(sb, snode); out: @@ -417,7 +417,7 @@ int scoutfs_lock_server_response(struct super_block *sb, u64 rid, struct scoutfs_net_lock *nl) { DECLARE_LOCK_SERVER_INFO(sb, inf); - struct client_lock_entry *clent; + struct client_lock_entry *c_ent; struct server_lock_node *snode; int ret; @@ -438,18 +438,18 @@ int scoutfs_lock_server_response(struct super_block *sb, u64 rid, snode->stats[SLT_RESPONSE]++; - clent = find_entry(snode, &snode->invalidated, rid); - if (!clent) { + c_ent = find_entry(snode, &snode->invalidated, rid); + if (!c_ent) { put_server_lock(inf, snode); ret = -EINVAL; goto out; } if (nl->new_mode == SCOUTFS_LOCK_NULL) { - free_client_entry(inf, snode, clent); + free_client_entry(inf, snode, c_ent); } else { - clent->mode = nl->new_mode; - add_client_entry(snode, &snode->granted, clent); + c_ent->mode = nl->new_mode; + add_client_entry(snode, &snode->granted, c_ent); } ret = process_waiting_requests(sb, snode); @@ -632,7 +632,7 @@ int scoutfs_lock_server_recover_response(struct super_block *sb, u64 rid, { DECLARE_LOCK_SERVER_INFO(sb, inf); struct client_lock_entry *existing; - struct client_lock_entry *clent; + struct client_lock_entry *c_ent; struct server_lock_node *snode; struct scoutfs_key key; int ret = 0; @@ -652,35 +652,35 @@ int scoutfs_lock_server_recover_response(struct super_block *sb, u64 rid, } for (i = 0; i < le16_to_cpu(nlr->nr); i++) { - clent = kzalloc(sizeof(struct client_lock_entry), GFP_NOFS); - if (!clent) { + c_ent = kzalloc(sizeof(struct client_lock_entry), GFP_NOFS); + if (!c_ent) { ret = -ENOMEM; goto out; } - INIT_LIST_HEAD(&clent->head); - clent->rid = rid; - clent->net_id = 0; - clent->mode = nlr->locks[i].new_mode; + INIT_LIST_HEAD(&c_ent->head); + c_ent->rid = rid; + c_ent->net_id = 0; + c_ent->mode = nlr->locks[i].new_mode; snode = alloc_server_lock(inf, &nlr->locks[i].key); if (snode == NULL) { - kfree(clent); + kfree(c_ent); ret = -ENOMEM; goto out; } existing = find_entry(snode, &snode->granted, rid); if (existing) { - kfree(clent); + kfree(c_ent); put_server_lock(inf, snode); ret = -EEXIST; goto out; } - clent->snode = snode; - add_client_entry(snode, &snode->granted, clent); - scoutfs_tseq_add(&inf->tseq_tree, &clent->tseq_entry); + c_ent->snode = snode; + add_client_entry(snode, &snode->granted, c_ent); + scoutfs_tseq_add(&inf->tseq_tree, &c_ent->tseq_entry); put_server_lock(inf, snode); @@ -707,7 +707,7 @@ out: int scoutfs_lock_server_farewell(struct super_block *sb, u64 rid) { DECLARE_LOCK_SERVER_INFO(sb, inf); - struct client_lock_entry *clent; + struct client_lock_entry *c_ent; struct client_lock_entry *tmp; struct server_lock_node *snode; struct scoutfs_key key; @@ -724,9 +724,9 @@ int scoutfs_lock_server_farewell(struct super_block *sb, u64 rid) (list == &snode->requested) ? &snode->invalidated : NULL) { - list_for_each_entry_safe(clent, tmp, list, head) { - if (clent->rid == rid) { - free_client_entry(inf, snode, clent); + list_for_each_entry_safe(c_ent, tmp, list, head) { + if (c_ent->rid == rid) { + free_client_entry(inf, snode, c_ent); freed = true; } } @@ -787,15 +787,15 @@ static char *lock_on_list_string(u8 on_list) static void lock_server_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent) { - struct client_lock_entry *clent = container_of(ent, + struct client_lock_entry *c_ent = container_of(ent, struct client_lock_entry, tseq_entry); - struct server_lock_node *snode = clent->snode; + struct server_lock_node *snode = c_ent->snode; seq_printf(m, SK_FMT" %s %s rid %016llx net_id %llu\n", - SK_ARG(&snode->key), lock_mode_string(clent->mode), - lock_on_list_string(clent->on_list), clent->rid, - clent->net_id); + SK_ARG(&snode->key), lock_mode_string(c_ent->mode), + lock_on_list_string(c_ent->on_list), c_ent->rid, + c_ent->net_id); } static void stats_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent) @@ -857,7 +857,7 @@ void scoutfs_lock_server_destroy(struct super_block *sb) DECLARE_LOCK_SERVER_INFO(sb, inf); struct server_lock_node *snode; struct server_lock_node *stmp; - struct client_lock_entry *clent; + struct client_lock_entry *c_ent; struct client_lock_entry *ctmp; LIST_HEAD(list); @@ -873,8 +873,8 @@ void scoutfs_lock_server_destroy(struct super_block *sb) list_splice_init(&snode->invalidated, &list); mutex_lock(&snode->mutex); - list_for_each_entry_safe(clent, ctmp, &list, head) { - free_client_entry(inf, snode, clent); + list_for_each_entry_safe(c_ent, ctmp, &list, head) { + free_client_entry(inf, snode, c_ent); } mutex_unlock(&snode->mutex); diff --git a/kmod/src/net.c b/kmod/src/net.c index e028ce22..e3b6a4a6 100644 --- a/kmod/src/net.c +++ b/kmod/src/net.c @@ -675,18 +675,28 @@ static void scoutfs_net_recv_worker(struct work_struct *work) scoutfs_tseq_add(&ninf->msg_tseq_tree, &mrecv->tseq_entry); + /* + * We want to drain the proc_workq in order to ensure that + * that the inflight lock recovery work is fully flushed out + * so that we can prevent the client/server racing trying to + * do lock recovery and processing farewell at the same time. + */ + if (nh.cmd == SCOUTFS_NET_CMD_FAREWELL && conn->listening_conn) + drain_workqueue(conn->proc_workq); + /* - * Initial received greetings are processed + * Initial received greetings and farewell are processed * synchronously before any other incoming messages. * * Incoming requests or responses to the lock client are * called synchronously to avoid reordering. */ if (nh.cmd == SCOUTFS_NET_CMD_GREETING || + (nh.cmd == SCOUTFS_NET_CMD_FAREWELL && conn->listening_conn) || (nh.cmd == SCOUTFS_NET_CMD_LOCK && !conn->listening_conn)) scoutfs_net_proc_worker(&mrecv->proc_work); else - queue_work(conn->workq, &mrecv->proc_work); + queue_work(conn->proc_workq, &mrecv->proc_work); } if (ret) @@ -851,6 +861,7 @@ static void scoutfs_net_destroy_worker(struct work_struct *work) } destroy_workqueue(conn->workq); + destroy_workqueue(conn->proc_workq); scoutfs_tseq_del(&ninf->conn_tseq_tree, &conn->tseq_entry); kfree(conn->info); trace_scoutfs_conn_destroy_free(conn); @@ -1150,6 +1161,8 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work) /* wait for socket and proc work to finish, includes chained work */ drain_workqueue(conn->workq); + drain_workqueue(conn->proc_workq); + /* tear down the sock now that all work is done */ if (conn->sock) { sock_release(conn->sock); @@ -1347,7 +1360,7 @@ scoutfs_net_alloc_conn(struct super_block *sb, return NULL; } - conn->workq = alloc_workqueue("scoutfs_net_%s", + conn->workq = alloc_workqueue("scoutfs_net_workq_%s", WQ_UNBOUND | WQ_NON_REENTRANT, 0, name_suffix); if (!conn->workq) { @@ -1356,6 +1369,16 @@ scoutfs_net_alloc_conn(struct super_block *sb, return NULL; } + conn->proc_workq = alloc_workqueue("scoutfs_net_proc_workq_%s", + WQ_UNBOUND | WQ_NON_REENTRANT, 0, + name_suffix); + if (!conn->proc_workq) { + destroy_workqueue(conn->workq); + kfree(conn->info); + kfree(conn); + return NULL; + } + conn->sb = sb; conn->notify_up = notify_up; conn->notify_down = notify_down; @@ -1388,6 +1411,14 @@ scoutfs_net_alloc_conn(struct super_block *sb, return conn; } +/* Give the caller the client info of the connection. This is used by + * server processing the server_farewell, and lock response/recovery. + */ +void *scoutfs_net_client_info(struct scoutfs_net_connection *conn) +{ + return conn->info; +} + /* * Give the caller the client rid of the connection. This used by rare * server processing callers who want to send async responses after diff --git a/kmod/src/net.h b/kmod/src/net.h index 847e9204..dd8408a7 100644 --- a/kmod/src/net.h +++ b/kmod/src/net.h @@ -63,6 +63,7 @@ struct scoutfs_net_connection { atomic64_t recv_seq; struct workqueue_struct *workq; + struct workqueue_struct *proc_workq; struct work_struct listen_work; struct work_struct connect_work; struct work_struct send_work; @@ -115,6 +116,7 @@ scoutfs_net_alloc_conn(struct super_block *sb, scoutfs_net_notify_t notify_up, scoutfs_net_notify_t notify_down, size_t info_size, scoutfs_net_request_t *req_funcs, char *name_suffix); +void *scoutfs_net_client_info(struct scoutfs_net_connection *conn); u64 scoutfs_net_client_rid(struct scoutfs_net_connection *conn); int scoutfs_net_connect(struct super_block *sb, struct scoutfs_net_connection *conn, diff --git a/kmod/src/server.c b/kmod/src/server.c index 50d271ac..e84d1198 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -122,6 +122,7 @@ struct server_info { struct server_client_info { u64 rid; struct list_head head; + bool received_farewell; }; static __le64 *first_valopt(struct scoutfs_volume_options *valopt) @@ -1505,11 +1506,15 @@ static int server_lock(struct super_block *sb, struct scoutfs_net_connection *conn, u8 cmd, u64 id, void *arg, u16 arg_len) { + struct server_client_info *sci = scoutfs_net_client_info(conn); u64 rid = scoutfs_net_client_rid(conn); if (arg_len != sizeof(struct scoutfs_net_lock)) return -EINVAL; + if (sci->received_farewell) + return scoutfs_net_response(sb, conn, cmd, id, -EINVAL, NULL, 0); + return scoutfs_lock_server_request(sb, rid, id, arg); } @@ -1518,11 +1523,15 @@ static int lock_response(struct super_block *sb, void *resp, unsigned int resp_len, int error, void *data) { + struct server_client_info *sci = scoutfs_net_client_info(conn); u64 rid = scoutfs_net_client_rid(conn); if (resp_len != sizeof(struct scoutfs_net_lock)) return -EINVAL; + if (sci->received_farewell) + return 0; + return scoutfs_lock_server_response(sb, rid, resp); } @@ -1560,11 +1569,15 @@ static int lock_recover_response(struct super_block *sb, void *resp, unsigned int resp_len, int error, void *data) { + struct server_client_info *sci = scoutfs_net_client_info(conn); u64 rid = scoutfs_net_client_rid(conn); if (invalid_recover(resp, resp_len)) return -EINVAL; + if (sci->received_farewell) + return 0; + return scoutfs_lock_server_recover_response(sb, rid, resp); } @@ -3516,9 +3529,11 @@ static int server_farewell(struct super_block *sb, struct scoutfs_net_connection *conn, u8 cmd, u64 id, void *arg, u16 arg_len) { + struct server_client_info *sci = scoutfs_net_client_info(conn); struct server_info *server = SCOUTFS_SB(sb)->server_info; u64 rid = scoutfs_net_client_rid(conn); struct farewell_request *fw; + int ret; if (arg_len != 0) return -EINVAL; @@ -3536,6 +3551,20 @@ static int server_farewell(struct super_block *sb, list_add_tail(&fw->entry, &server->farewell_requests); spin_unlock(&server->farewell_lock); + /* + * Tear down client lock server state and set that we recieved farewell + * to ensure that we do not race between client and server trying to process + * lock recovery at the same time (race). We also want to mark that the recovery + * finished so that if client's try to send stuff later; the server doesnt care. + */ + sci->received_farewell = true; + ret = scoutfs_lock_server_farewell(sb, rid); + if (ret < 0) { + kfree(fw); + return ret; + } + scoutfs_server_recov_finish(sb, rid, SCOUTFS_RECOV_LOCKS); + queue_farewell_work(server); /* response will be sent later */ diff --git a/tests/golden/client-unmount-recovery b/tests/golden/client-unmount-recovery new file mode 100644 index 00000000..866ad611 --- /dev/null +++ b/tests/golden/client-unmount-recovery @@ -0,0 +1 @@ +== 60s of unmounting non-quorum clients during recovery diff --git a/tests/sequence b/tests/sequence index a168e287..c6e463f1 100644 --- a/tests/sequence +++ b/tests/sequence @@ -33,6 +33,7 @@ resize-devices.sh fence-and-reclaim.sh orphan-inodes.sh mount-unmount-race.sh +client-unmount-recovery.sh createmany-parallel-mounts.sh archive-light-cycle.sh block-stale-reads.sh diff --git a/tests/tests/client-unmount-recovery.sh b/tests/tests/client-unmount-recovery.sh new file mode 100644 index 00000000..44e6233a --- /dev/null +++ b/tests/tests/client-unmount-recovery.sh @@ -0,0 +1,61 @@ +# +# Unmount Server and unmount a client as it's replaying to a remaining server +# + +majority_nr=$(t_majority_count) +quorum_nr=$T_QUORUM + +test "$quorum_nr" == "$majority_nr" && \ + t_skip "all quorum members make up majority, need more mounts to unmount" + +test "$T_NR_MOUNTS" -lt "$T_QUORUM" && \ + t_skip "Need enough non-quorum clients to unmount" + +for i in $(t_fs_nrs); do + mounted[$i]=1 +done + +LENGTH=60 +echo "== ${LENGTH}s of unmounting non-quorum clients during recovery" +END=$((SECONDS + LENGTH)) +while [ "$SECONDS" -lt "$END" ]; do + sv=$(t_server_nr) + rid=$(t_mount_rid $sv) + echo "sv $sv rid $rid" >> "$T_TMP.log" + sync + t_umount $sv & + + for i in $(t_fs_nrs); do + if [ "$i" -ge "$quorum_nr" ]; then + t_umount $i & + echo "umount $i pid $pid quo $quorum_nr" \ + >> $T_TMP.log + mounted[$i]=0 + fi + done + + wait + + t_mount $sv & + for i in $(t_fs_nrs); do + if [ "${mounted[$i]}" == 0 ]; then + t_mount $i & + fi + done + + wait + + declare RID_LIST=$(cat /sys/fs/scoutfs/*/rid | sort -u) + read -a rid_arr <<< $RID_LIST + + declare LOCK_LIST=$(cut -d' ' -f 5 /sys/kernel/debug/scoutfs/*/server_locks | sort -u) + read -a lock_arr <<< $LOCK_LIST + + for i in "${lock_arr[@]}"; do + if [[ ! " ${rid_arr[*]} " =~ " $i " ]]; then + t_fail "RID($i): exists when not mounted" + fi + done +done + +t_pass