Merge pull request #54 from bgly/bduffyly/abort_conn

Fix client/server abort conn on force unmount
This commit is contained in:
Zach Brown
2021-11-09 13:29:20 -08:00
committed by GitHub
7 changed files with 75 additions and 19 deletions

View File

@@ -1233,10 +1233,6 @@ static int btree_walk(struct super_block *sb,
WARN_ON_ONCE((flags & (BTW_GET_PAR|BTW_SET_PAR)) && !par_root))
return -EINVAL;
/* all ops come through walk and walk calls all reads */
if (scoutfs_forcing_unmount(sb))
return -EIO;
scoutfs_inc_counter(sb, btree_walk);
restart:

View File

@@ -668,3 +668,11 @@ void scoutfs_client_destroy(struct super_block *sb)
kfree(client);
sbi->client_info = NULL;
}
void scoutfs_client_net_shutdown(struct super_block *sb)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
if (client && client->conn)
scoutfs_net_shutdown(sb, client->conn);
}

View File

@@ -35,6 +35,7 @@ int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_op
int scoutfs_client_resize_devices(struct super_block *sb, struct scoutfs_net_resize_devices *nrd);
int scoutfs_client_statfs(struct super_block *sb, struct scoutfs_net_statfs *nst);
void scoutfs_client_net_shutdown(struct super_block *sb);
int scoutfs_client_setup(struct super_block *sb);
void scoutfs_client_destroy(struct super_block *sb);

View File

@@ -835,17 +835,9 @@ static void scoutfs_net_destroy_worker(struct work_struct *work)
if (conn->listening_conn && conn->notify_down)
conn->notify_down(sb, conn, conn->info, conn->rid);
/*
* Usually networking is idle and we destroy pending sends, but when forcing unmount
* we can have to wake up waiters by failing pending sends.
*/
list_splice_init(&conn->resend_queue, &conn->send_queue);
list_for_each_entry_safe(msend, tmp, &conn->send_queue, head) {
if (scoutfs_forcing_unmount(sb))
call_resp_func(sb, conn, msend->resp_func, msend->resp_data,
NULL, 0, -ECONNABORTED);
list_for_each_entry_safe(msend, tmp, &conn->send_queue, head)
free_msend(ninf, msend);
}
/* accepted sockets are removed from their listener's list */
if (conn->listening_conn) {
@@ -1134,9 +1126,11 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work)
struct net_info *ninf = SCOUTFS_SB(sb)->net_info;
struct scoutfs_net_connection *listener;
struct scoutfs_net_connection *acc_conn;
scoutfs_net_response_t resp_func;
struct message_send *msend;
struct message_send *tmp;
unsigned long delay;
void *resp_data;
trace_scoutfs_net_shutdown_work_enter(sb, 0, 0);
trace_scoutfs_conn_shutdown_start(conn);
@@ -1182,6 +1176,30 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work)
/* and wait for accepted conn shutdown work to finish */
wait_event(conn->waitq, empty_accepted_list(conn));
/*
* Forced unmount will cause net submit to fail once it's
* started and it calls shutdown to interrupt any previous
* senders waiting for a response. The response callbacks can
* do quite a lot of work so we're careful to call them outside
* the lock.
*/
if (scoutfs_forcing_unmount(sb)) {
spin_lock(&conn->lock);
list_splice_tail_init(&conn->send_queue, &conn->resend_queue);
while ((msend = list_first_entry_or_null(&conn->resend_queue,
struct message_send, head))) {
resp_func = msend->resp_func;
resp_data = msend->resp_data;
free_msend(ninf, msend);
spin_unlock(&conn->lock);
call_resp_func(sb, conn, resp_func, resp_data, NULL, 0, -ECONNABORTED);
spin_lock(&conn->lock);
}
spin_unlock(&conn->lock);
}
spin_lock(&conn->lock);
/* greetings aren't resent across sockets */

View File

@@ -2068,6 +2068,19 @@ static void server_log_merge_free_work(struct work_struct *work)
break;
}
/* Dirty the btree before freeing so that we can pin it
* so that later touches will succeed.
*/
init_log_merge_key(&key, SCOUTFS_LOG_MERGE_FREEING_ZONE,
le64_to_cpu(fr.seq), 0);
ret = scoutfs_btree_dirty(sb, &server->alloc,
&server->wri, &super->log_merge,
&key);
if (ret < 0) {
err_str = "dirtying log btree";
break;
}
ret = scoutfs_btree_free_blocks(sb, &server->alloc,
&server->wri, &fr.key,
&fr.root, 10);
@@ -2077,8 +2090,6 @@ static void server_log_merge_free_work(struct work_struct *work)
}
/* freed blocks are in allocator, we *have* to update key */
init_log_merge_key(&key, SCOUTFS_LOG_MERGE_FREEING_ZONE,
le64_to_cpu(fr.seq), 0);
if (scoutfs_key_is_ones(&fr.key))
ret = scoutfs_btree_delete(sb, &server->alloc,
&server->wri,
@@ -2415,7 +2426,9 @@ static int server_commit_log_merge(struct super_block *sb,
struct scoutfs_log_merge_range rng;
struct scoutfs_key key;
char *err_str = NULL;
int ret;
bool deleted = false;
int ret = 0;
int err = 0;
scoutfs_key_set_zeros(&rng.end);
@@ -2463,6 +2476,7 @@ static int server_commit_log_merge(struct super_block *sb,
err_str = "deleting orig request";
goto out;
}
deleted = true;
if (le64_to_cpu(comp->flags) & SCOUTFS_LOG_MERGE_COMP_ERROR) {
/* restore the range and reclaim the allocator if it failed */
@@ -2522,8 +2536,11 @@ out:
if (ret < 0)
scoutfs_err(sb, "error %d committing log merge: %s", ret, err_str);
ret = scoutfs_server_apply_commit(sb, ret);
BUG_ON(ret < 0); /* inconsistent */
err = scoutfs_server_apply_commit(sb, ret);
BUG_ON(ret < 0 && deleted); /* inconsistent */
if (ret == 0)
ret = err;
return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
}
@@ -3812,6 +3829,7 @@ static void scoutfs_server_worker(struct work_struct *work)
struct scoutfs_net_connection *conn = NULL;
DECLARE_WAIT_QUEUE_HEAD(waitq);
struct sockaddr_in sin;
bool alloc_init = false;
u64 max_seq;
int ret;
@@ -3820,6 +3838,8 @@ static void scoutfs_server_worker(struct work_struct *work)
scoutfs_quorum_slot_sin(super, opts->quorum_slot_nr, &sin);
scoutfs_info(sb, "server starting at "SIN_FMT, SIN_ARG(&sin));
scoutfs_block_writer_init(sb, &server->wri);
/* first make sure no other servers are still running */
ret = scoutfs_quorum_fence_leaders(sb, server->term);
if (ret < 0) {
@@ -3859,7 +3879,6 @@ static void scoutfs_server_worker(struct work_struct *work)
atomic64_set(&server->seq_atomic, le64_to_cpu(super->seq));
set_roots(server, &super->fs_root, &super->logs_root,
&super->srch_root);
scoutfs_block_writer_init(sb, &server->wri);
/* prepare server alloc for this transaction, larger first */
if (le64_to_cpu(super->server_meta_avail[0].total_nr) <
@@ -3870,6 +3889,7 @@ static void scoutfs_server_worker(struct work_struct *work)
scoutfs_alloc_init(&server->alloc,
&super->server_meta_avail[server->other_ind ^ 1],
&super->server_meta_freed[server->other_ind ^ 1]);
alloc_init = true;
server->other_avail = &super->server_meta_avail[server->other_ind];
server->other_freed = &super->server_meta_freed[server->other_ind];
@@ -3931,6 +3951,11 @@ shutdown:
/* wait for extra queues by requests, won't find waiters */
flush_work(&server->commit_work);
if (alloc_init)
scoutfs_alloc_prepare_commit(sb, &server->alloc, &server->wri);
scoutfs_block_writer_forget_all(sb, &server->wri);
scoutfs_lock_server_destroy(sb);
scoutfs_omap_server_shutdown(sb);

View File

@@ -271,6 +271,8 @@ static void scoutfs_umount_begin(struct super_block *sb)
scoutfs_warn(sb, "forcing unmount, can return errors and lose unsynced data");
sbi->forced_unmount = true;
scoutfs_client_net_shutdown(sb);
}
static const struct super_operations scoutfs_super_ops = {

View File

@@ -72,6 +72,12 @@ t_filter_dmesg()
re="$re|scoutfs .* error reading quorum block"
re="$re|scoutfs .* error .* writing quorum block"
re="$re|scoutfs .* error .* while checking to delete inode"
re="$re|scoutfs .* error .*writing btree blocks.*"
re="$re|scoutfs .* error .*writing super block.*"
re="$re|scoutfs .* error .* freeing merged btree blocks.*.looping commit del.*upd freeing item"
re="$re|scoutfs .* error .* freeing merged btree blocks.*.final commit del.upd freeing item"
re="$re|scoutfs .* error .*reading quorum block.*to update event.*"
re="$re|scoutfs .* error.*server failed to bind to.*"
egrep -v "($re)"
}