mirror of
https://github.com/versity/scoutfs.git
synced 2026-02-09 04:00:10 +00:00
scoutfs: centralize teardown in put_super
We were trying to tear down our mounted file system resources in the ->kill_sb() callback. This happens relatively early in the unmount process. We call kill_block_super() in our teardown which syncs the mount and tears down the vfs structures. By tearing down in ->kill_sb() we were forced to juggle tearing down before and after the call to kill_block_super(). When we got that wrong we'd tear down too many resources and crash in kill_block_super() or we wouldn't tear down enough and leave work still pending that'd explode as we tried to shut down after kill_block_super(). It turns out the vfs has a callback specifcally to solve this ordering problem. The put_super callback is called after having synced the mount but before its totally torn down. By putting all our shutdown in there we no longer have to worry about racing with active use. Auditing the shutdown dependencies also found some bad cases where we were tearding down subsystems that were still in use. The biggest problem was shutting down locking and networking before shutting down the transaction processing which relies on both. Now we first shut down all the client processing, then all the server processing, then the lowest level common infrastructure. The trickiest part in understanding this is knowing that kill_block_super() only calls put_super during mount failure if mount got far enough to assign the root dentry to s_root. We call put_super manually ourselves in mount failure if it didn't get far enough so that all teardown goes through put_super. (You'll see this s_root test in other upstream file system error paths.) Finally while auding the setup and shutdown paths I noticed a few, trans and counters, that needed simple fixes to properly cleanup errors and only shutdown if they've been setup. This all was stressed with an xfstests that races mount and unmount across the cluster. Before this change it'd crash/hang almost instantly and with this change it runs to completion. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -82,13 +82,14 @@ int scoutfs_setup_counters(struct super_block *sb)
|
||||
scoutfs_foreach_counter(sb, pcpu) {
|
||||
ret = percpu_counter_init(pcpu, 0, GFP_KERNEL);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
}
|
||||
|
||||
counters->kobj.kset = sbi->kset;
|
||||
init_completion(&counters->comp);
|
||||
ret = kobject_init_and_add(&counters->kobj, &scoutfs_counters_ktype,
|
||||
NULL, "counters");
|
||||
out:
|
||||
if (ret) {
|
||||
/* tear down partial to avoid destroying null kobjs */
|
||||
scoutfs_foreach_counter(sb, pcpu)
|
||||
|
||||
@@ -1909,6 +1909,40 @@ TRACE_EVENT(scoutfs_rename,
|
||||
__entry->new_inode_ino)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_super_lifecycle_class,
|
||||
TP_PROTO(struct super_block *sb),
|
||||
TP_ARGS(sb),
|
||||
TP_STRUCT__entry(
|
||||
__field(__u64, fsid)
|
||||
__field(void *, sb)
|
||||
__field(void *, sbi)
|
||||
__field(void *, s_root)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->fsid = SCOUTFS_SB(sb) ? FSID_ARG(sb) : 0;
|
||||
__entry->sb = sb;
|
||||
__entry->sbi = SCOUTFS_SB(sb);
|
||||
__entry->s_root = sb->s_root;
|
||||
),
|
||||
TP_printk("fsid "FSID_FMT" sb %p sbi %p s_root %p",
|
||||
__entry->fsid, __entry->sb, __entry->sbi, __entry->s_root)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_super_lifecycle_class, scoutfs_fill_super,
|
||||
TP_PROTO(struct super_block *sb),
|
||||
TP_ARGS(sb)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_super_lifecycle_class, scoutfs_put_super,
|
||||
TP_PROTO(struct super_block *sb),
|
||||
TP_ARGS(sb)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_super_lifecycle_class, scoutfs_kill_sb,
|
||||
TP_PROTO(struct super_block *sb),
|
||||
TP_ARGS(sb)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_SCOUTFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
||||
113
kmod/src/super.c
113
kmod/src/super.c
@@ -98,6 +98,42 @@ static int scoutfs_sync_fs(struct super_block *sb, int wait)
|
||||
return scoutfs_trans_sync(sb, wait);
|
||||
}
|
||||
|
||||
/*
|
||||
* This destroys all the state that's built up in the sb info during
|
||||
* mount. It's called by us on errors during mount if we haven't set
|
||||
* s_root, by mount after returning errors if we have set s_root, and by
|
||||
* unmount after having synced the super.
|
||||
*/
|
||||
static void scoutfs_put_super(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
trace_scoutfs_put_super(sb);
|
||||
|
||||
scoutfs_unlock_flags(sb, sbi->node_id_lock, DLM_LOCK_EX,
|
||||
SCOUTFS_LKF_NO_TASK_REF);
|
||||
sbi->node_id_lock = NULL;
|
||||
|
||||
scoutfs_shutdown_trans(sb);
|
||||
scoutfs_client_destroy(sb);
|
||||
scoutfs_data_destroy(sb);
|
||||
scoutfs_inode_destroy(sb);
|
||||
scoutfs_item_destroy(sb);
|
||||
|
||||
/* the server locks the listen address and compacts */
|
||||
scoutfs_server_destroy(sb);
|
||||
scoutfs_seg_destroy(sb);
|
||||
scoutfs_lock_destroy(sb);
|
||||
|
||||
debugfs_remove(sbi->debug_root);
|
||||
scoutfs_destroy_counters(sb);
|
||||
if (sbi->kset)
|
||||
kset_unregister(sbi->kset);
|
||||
kfree(sbi);
|
||||
|
||||
sb->s_fs_info = NULL;
|
||||
}
|
||||
|
||||
static const struct super_operations scoutfs_super_ops = {
|
||||
.alloc_inode = scoutfs_alloc_inode,
|
||||
.drop_inode = scoutfs_drop_inode,
|
||||
@@ -105,6 +141,7 @@ static const struct super_operations scoutfs_super_ops = {
|
||||
.destroy_inode = scoutfs_destroy_inode,
|
||||
.sync_fs = scoutfs_sync_fs,
|
||||
.statfs = scoutfs_statfs,
|
||||
.put_super = scoutfs_put_super,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -244,6 +281,8 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
struct inode *inode;
|
||||
int ret;
|
||||
|
||||
trace_scoutfs_fill_super(sb);
|
||||
|
||||
sb->s_magic = SCOUTFS_SUPER_MAGIC;
|
||||
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
||||
sb->s_op = &scoutfs_super_ops;
|
||||
@@ -271,12 +310,14 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
|
||||
/* XXX can have multiple mounts of a device, need mount id */
|
||||
sbi->kset = kset_create_and_add(sb->s_id, NULL, &scoutfs_kset->kobj);
|
||||
if (!sbi->kset)
|
||||
return -ENOMEM;
|
||||
if (!sbi->kset) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_parse_options(sb, data, &opts);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
sbi->opts = opts;
|
||||
|
||||
@@ -288,23 +329,9 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
scoutfs_inode_setup(sb) ?:
|
||||
scoutfs_data_setup(sb) ?:
|
||||
scoutfs_setup_trans(sb) ?:
|
||||
scoutfs_lock_setup(sb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
/*
|
||||
* The server is a bit magical because it can try to read the
|
||||
* device in async work context. Once we return an error from
|
||||
* here the kernel starts tearing down the mount and it isn't
|
||||
* safe to do IO. So we shut the server down before returning
|
||||
* an error.
|
||||
*
|
||||
* But we still want to start the server before the client to
|
||||
* help single mounts come up without passing through connection
|
||||
* timeouts.
|
||||
*/
|
||||
ret = scoutfs_server_setup(sb) ?:
|
||||
scoutfs_client_setup(sb);
|
||||
scoutfs_lock_setup(sb) ?:
|
||||
scoutfs_server_setup(sb) ?:
|
||||
scoutfs_client_setup(sb) ?:
|
||||
scoutfs_lock_node_id(sb, DLM_LOCK_EX, 0, sbi->node_id,
|
||||
&sbi->node_id_lock);
|
||||
if (ret)
|
||||
@@ -330,12 +357,10 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
// scoutfs_scan_orphans(sb);
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret) {
|
||||
scoutfs_server_destroy(sb);
|
||||
scoutfs_unlock_flags(sb, sbi->node_id_lock, DLM_LOCK_EX,
|
||||
SCOUTFS_LKF_NO_TASK_REF);
|
||||
sbi->node_id_lock = NULL;
|
||||
}
|
||||
/* on error, generic_shutdown_super calls put_super if s_root */
|
||||
if (ret && !sb->s_root)
|
||||
scoutfs_put_super(sb);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -345,42 +370,14 @@ static struct dentry *scoutfs_mount(struct file_system_type *fs_type, int flags,
|
||||
return mount_bdev(fs_type, flags, dev_name, data, scoutfs_fill_super);
|
||||
}
|
||||
|
||||
/*
|
||||
* kill_block_super eventually calls ->put_super if s_root is set
|
||||
*/
|
||||
static void scoutfs_kill_sb(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
/*
|
||||
* If we had successfully mounted then make sure dirty data
|
||||
* writeback and compaction is done before we kill the block
|
||||
* super and start tearing everything down.
|
||||
*/
|
||||
if (sb->s_root) {
|
||||
sync_filesystem(sb);
|
||||
|
||||
scoutfs_server_destroy(sb);
|
||||
}
|
||||
trace_scoutfs_kill_sb(sb);
|
||||
|
||||
kill_block_super(sb);
|
||||
|
||||
if (sbi) {
|
||||
scoutfs_unlock_flags(sb, sbi->node_id_lock, DLM_LOCK_EX,
|
||||
SCOUTFS_LKF_NO_TASK_REF);
|
||||
sbi->node_id_lock = NULL;
|
||||
|
||||
scoutfs_lock_destroy(sb);
|
||||
scoutfs_client_destroy(sb);
|
||||
scoutfs_server_destroy(sb);
|
||||
scoutfs_shutdown_trans(sb);
|
||||
scoutfs_data_destroy(sb);
|
||||
scoutfs_inode_destroy(sb);
|
||||
scoutfs_item_destroy(sb);
|
||||
scoutfs_seg_destroy(sb);
|
||||
debugfs_remove(sbi->debug_root);
|
||||
scoutfs_destroy_counters(sb);
|
||||
if (sbi->kset)
|
||||
kset_unregister(sbi->kset);
|
||||
kfree(sbi);
|
||||
}
|
||||
}
|
||||
|
||||
static struct file_system_type scoutfs_fs_type = {
|
||||
|
||||
@@ -497,10 +497,14 @@ void scoutfs_shutdown_trans(struct super_block *sb)
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
DECLARE_TRANS_INFO(sb, tri);
|
||||
|
||||
if (sbi->trans_write_workq) {
|
||||
cancel_delayed_work_sync(&sbi->trans_write_work);
|
||||
destroy_workqueue(sbi->trans_write_workq);
|
||||
if (tri) {
|
||||
if (sbi->trans_write_workq) {
|
||||
cancel_delayed_work_sync(&sbi->trans_write_work);
|
||||
destroy_workqueue(sbi->trans_write_workq);
|
||||
/* trans work schedules after shutdown see null */
|
||||
sbi->trans_write_workq = NULL;
|
||||
}
|
||||
kfree(tri);
|
||||
sbi->trans_info = NULL;
|
||||
}
|
||||
|
||||
kfree(tri);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user