Compare commits

..

1 Commits

Author SHA1 Message Date
Zach Brown 96f2ad29dc Add inode crtime creation time
Add an inode creation time field.  It's created for all new inodes.
It's visible to stat_more.  setattr_more can set it during
restore.

Signed-off-by: Zach Brown <zab@versity.com>
2021-07-08 11:00:30 -07:00
38 changed files with 330 additions and 891 deletions
+20 -62
View File
@@ -261,17 +261,20 @@ static bool invalid_extent(u64 start, u64 end, u64 first, u64 last)
static bool invalid_meta_blkno(struct super_block *sb, u64 blkno)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
u64 last_meta = (i_size_read(sbi->meta_bdev->bd_inode) >> SCOUTFS_BLOCK_LG_SHIFT) - 1;
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
return invalid_extent(blkno, blkno, SCOUTFS_META_DEV_START_BLKNO, last_meta);
return invalid_extent(blkno, blkno,
le64_to_cpu(super->first_meta_blkno),
le64_to_cpu(super->last_meta_blkno));
}
static bool invalid_data_extent(struct super_block *sb, u64 start, u64 len)
{
u64 last_data = (i_size_read(sb->s_bdev->bd_inode) >> SCOUTFS_BLOCK_SM_SHIFT) - 1;
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
return invalid_extent(start, start + len - 1, SCOUTFS_DATA_DEV_START_BLKNO, last_data);
return invalid_extent(start, start + len - 1,
le64_to_cpu(super->first_data_blkno),
le64_to_cpu(super->last_data_blkno));
}
void scoutfs_alloc_init(struct scoutfs_alloc *alloc,
@@ -977,39 +980,6 @@ int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
return ret;
}
/*
* Add new free space to an allocator. _ext_insert will make sure that it doesn't
* overlap with any existing extents. This is done by the server in a transaction that
* also updates total_*_blocks in the super so we don't verify.
*/
int scoutfs_alloc_insert(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri, struct scoutfs_alloc_root *root,
u64 start, u64 len)
{
struct alloc_ext_args args = {
.alloc = alloc,
.wri = wri,
.root = root,
.zone = SCOUTFS_FREE_EXTENT_BLKNO_ZONE,
};
return scoutfs_ext_insert(sb, &alloc_ext_ops, &args, start, len, 0, 0);
}
int scoutfs_alloc_remove(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri, struct scoutfs_alloc_root *root,
u64 start, u64 len)
{
struct alloc_ext_args args = {
.alloc = alloc,
.wri = wri,
.root = root,
.zone = SCOUTFS_FREE_EXTENT_BLKNO_ZONE,
};
return scoutfs_ext_remove(sb, &alloc_ext_ops, &args, start, len);
}
/*
* We only trim one block, instead of looping trimming all, because the
* caller is assuming that we do a fixed amount of work when they check
@@ -1056,31 +1026,18 @@ out:
}
/*
* True if the allocator has enough blocks in the avail list and space
* in the freed list to be able to perform the callers operations. If
* false the caller should back off and return partial progress rather
* than completely exhausting the avail list or overflowing the freed
* list.
* True if the allocator has enough free blocks to cow (alloc and free)
* a list block and all the btree blocks that store extent items.
*
* An extent modification dirties three distinct leaves of an allocator
* btree as it adds and removes the blkno and size sorted items for the
* old and new lengths of the extent. Dirtying the paths to these
* leaves can grow the tree and grow/shrink neighbours at each level.
* We over-estimate the number of blocks allocated and freed (the paths
* share a root, growth doesn't free) to err on the simpler and safer
* side. The overhead is minimal given the relatively large list blocks
* and relatively short allocator trees.
*
* The caller tells us how many extents they're about to modify and how
* many other additional blocks they may cow manually. And finally, the
* caller could be the first to dirty the avail and freed blocks in the
* allocator,
* At most, an extent operation can dirty down three paths of the tree
* to modify a blkno item and two distant order items. We can grow and
* split the root, and then those three paths could share blocks but each
* modify two leaf blocks.
*/
static bool list_has_blocks(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_alloc_root *root, u32 extents, u32 addl_blocks)
static bool list_can_cow(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_alloc_root *root)
{
u32 tree_blocks = (((1 + root->root.height) * 2) * 3) * extents;
u32 most = 1 + tree_blocks + addl_blocks;
u32 most = 1 + (1 + 1 + (3 * (1 - root->root.height + 1)));
if (le32_to_cpu(alloc->avail.first_nr) < most) {
scoutfs_inc_counter(sb, alloc_list_avail_lo);
@@ -1144,7 +1101,8 @@ int scoutfs_alloc_fill_list(struct super_block *sb,
goto out;
lblk = bl->data;
while (le32_to_cpu(lblk->nr) < target && list_has_blocks(sb, alloc, root, 1, 0)) {
while (le32_to_cpu(lblk->nr) < target &&
list_can_cow(sb, alloc, root)) {
ret = scoutfs_ext_alloc(sb, &alloc_ext_ops, &args, 0, 0,
target - le32_to_cpu(lblk->nr), &ext);
@@ -1188,7 +1146,7 @@ int scoutfs_alloc_empty_list(struct super_block *sb,
if (WARN_ON_ONCE(lhead_in_alloc(alloc, lhead)))
return -EINVAL;
while (lhead->ref.blkno && list_has_blocks(sb, alloc, args.root, 1, 1)) {
while (lhead->ref.blkno && list_can_cow(sb, alloc, args.root)) {
if (lhead->first_nr == 0) {
ret = trim_empty_first_block(sb, alloc, wri, lhead);
-6
View File
@@ -132,12 +132,6 @@ int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_alloc_root *dst,
struct scoutfs_alloc_root *src, u64 total,
__le64 *exclusive, __le64 *vacant, u64 zone_blocks);
int scoutfs_alloc_insert(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri, struct scoutfs_alloc_root *root,
u64 start, u64 len);
int scoutfs_alloc_remove(struct super_block *sb, struct scoutfs_alloc *alloc,
struct scoutfs_block_writer *wri, struct scoutfs_alloc_root *root,
u64 start, u64 len);
int scoutfs_alloc_fill_list(struct super_block *sb,
struct scoutfs_alloc *alloc,
+2 -4
View File
@@ -645,11 +645,9 @@ static struct block_private *block_read(struct super_block *sb, u64 blkno)
goto out;
}
wait_event(binf->waitq, uptodate_or_error(bp));
if (test_bit(BLOCK_BIT_ERROR, &bp->bits))
ret = wait_event_interruptible(binf->waitq, uptodate_or_error(bp));
if (ret == 0 && test_bit(BLOCK_BIT_ERROR, &bp->bits))
ret = -EIO;
else
ret = 0;
out:
if (ret < 0) {
+4 -10
View File
@@ -297,14 +297,6 @@ int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_op
volopt, sizeof(*volopt), NULL, 0);
}
int scoutfs_client_resize_devices(struct super_block *sb, struct scoutfs_net_resize_devices *nrd)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES,
nrd, sizeof(*nrd), NULL, 0);
}
/* The client is receiving a invalidation request from the server */
static int client_lock(struct super_block *sb,
struct scoutfs_net_connection *conn, u8 cmd, u64 id,
@@ -631,8 +623,10 @@ void scoutfs_client_destroy(struct super_block *sb)
client_farewell_response,
NULL, NULL);
if (ret == 0) {
wait_for_completion(&client->farewell_comp);
ret = client->farewell_error;
ret = wait_for_completion_interruptible(
&client->farewell_comp);
if (ret == 0)
ret = client->farewell_error;
}
if (ret) {
scoutfs_inc_counter(sb, client_farewell_error);
-1
View File
@@ -33,7 +33,6 @@ int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
int scoutfs_client_resize_devices(struct super_block *sb, struct scoutfs_net_resize_devices *nrd);
int scoutfs_client_setup(struct super_block *sb);
void scoutfs_client_destroy(struct super_block *sb);
+1
View File
@@ -88,6 +88,7 @@
EXPAND_COUNTER(forest_read_items) \
EXPAND_COUNTER(forest_roots_next_hint) \
EXPAND_COUNTER(forest_set_bloom_bits) \
EXPAND_COUNTER(inode_evict_intr) \
EXPAND_COUNTER(item_clear_dirty) \
EXPAND_COUNTER(item_create) \
EXPAND_COUNTER(item_delete) \
+8 -23
View File
@@ -207,7 +207,6 @@ static s64 truncate_extents(struct super_block *sb, struct inode *inode,
u64 offset;
s64 ret;
u8 flags;
int err;
int i;
flags = offline ? SEF_OFFLINE : 0;
@@ -247,18 +246,6 @@ static s64 truncate_extents(struct super_block *sb, struct inode *inode,
tr.len = min(ext.len - offset, last - iblock + 1);
tr.flags = ext.flags;
trace_scoutfs_data_extent_truncated(sb, ino, &tr);
ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
tr.start, tr.len, 0, flags);
if (ret < 0) {
if (WARN_ON_ONCE(ret == -EINVAL)) {
scoutfs_err(sb, "unexpected truncate inconsistency: ino %llu iblock %llu last %llu, start %llu len %llu",
ino, iblock, last, tr.start, tr.len);
}
break;
}
if (tr.map) {
mutex_lock(&datinf->mutex);
ret = scoutfs_free_data(sb, datinf->alloc,
@@ -266,16 +253,16 @@ static s64 truncate_extents(struct super_block *sb, struct inode *inode,
&datinf->data_freed,
tr.map, tr.len);
mutex_unlock(&datinf->mutex);
if (ret < 0) {
err = scoutfs_ext_set(sb, &data_ext_ops, &args,
tr.start, tr.len, tr.map, tr.flags);
if (err < 0)
scoutfs_err(sb, "truncate err %d restoring extent after error %lld: ino %llu start %llu len %llu",
err, ret, ino, tr.start, tr.len);
if (ret < 0)
break;
}
}
trace_scoutfs_data_extent_truncated(sb, ino, &tr);
ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
tr.start, tr.len, 0, flags);
BUG_ON(ret); /* inconsistent, could prealloc items */
iblock += tr.len;
}
@@ -1031,10 +1018,8 @@ long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
end = (iblock + ret) << SCOUTFS_BLOCK_SM_SHIFT;
if (end > offset + len)
end = offset + len;
if (end > i_size_read(inode)) {
if (end > i_size_read(inode))
i_size_write(inode, end);
scoutfs_inode_inc_data_version(inode);
}
}
if (ret >= 0)
scoutfs_update_inode_item(inode, lock, &ind_locks);
+11 -2
View File
@@ -253,7 +253,7 @@ static u64 dirent_name_hash(const char *name, unsigned int name_len)
((u64)dirent_name_fingerprint(name, name_len) << 32);
}
static bool dirent_names_equal(const char *a_name, unsigned int a_len,
static u64 dirent_names_equal(const char *a_name, unsigned int a_len,
const char *b_name, unsigned int b_len)
{
return a_len == b_len && memcmp(a_name, b_name, a_len) == 0;
@@ -462,7 +462,7 @@ out:
else if (ino == 0)
inode = NULL;
else
inode = scoutfs_iget(sb, ino, 0);
inode = scoutfs_iget(sb, ino);
/*
* We can't splice dir aliases into the dcache. dir entries
@@ -753,6 +753,7 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
struct inode *inode = NULL;
struct scoutfs_lock *dir_lock = NULL;
struct scoutfs_lock *inode_lock = NULL;
struct scoutfs_inode_info *si;
LIST_HEAD(ind_locks);
u64 hash;
u64 pos;
@@ -766,6 +767,7 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
&dir_lock, &inode_lock, NULL, &ind_locks);
if (IS_ERR(inode))
return PTR_ERR(inode);
si = SCOUTFS_I(inode);
pos = SCOUTFS_I(dir)->next_readdir_pos++;
@@ -781,6 +783,7 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
inode->i_mtime = inode->i_atime = inode->i_ctime = dir->i_mtime;
si->crtime = inode->i_mtime;
if (S_ISDIR(mode)) {
inc_nlink(inode);
@@ -1185,6 +1188,7 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
struct inode *inode = NULL;
struct scoutfs_lock *dir_lock = NULL;
struct scoutfs_lock *inode_lock = NULL;
struct scoutfs_inode_info *si;
LIST_HEAD(ind_locks);
u64 hash;
u64 pos;
@@ -1205,6 +1209,7 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
&dir_lock, &inode_lock, NULL, &ind_locks);
if (IS_ERR(inode))
return PTR_ERR(inode);
si = SCOUTFS_I(inode);
ret = symlink_item_ops(sb, SYM_CREATE, scoutfs_ino(inode), inode_lock,
symname, name_len);
@@ -1226,6 +1231,7 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
inode->i_ctime = dir->i_mtime;
si->crtime = inode->i_ctime;
i_size_write(inode, name_len);
scoutfs_update_inode_item(inode, inode_lock, &ind_locks);
@@ -1817,6 +1823,7 @@ static int scoutfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mod
struct scoutfs_lock *dir_lock = NULL;
struct scoutfs_lock *inode_lock = NULL;
struct scoutfs_lock *orph_lock = NULL;
struct scoutfs_inode_info *si;
LIST_HEAD(ind_locks);
int ret;
@@ -1827,6 +1834,7 @@ static int scoutfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mod
&dir_lock, &inode_lock, &orph_lock, &ind_locks);
if (IS_ERR(inode))
return PTR_ERR(inode);
si = SCOUTFS_I(inode);
ret = scoutfs_inode_orphan_create(sb, scoutfs_ino(inode), orph_lock);
if (ret < 0) {
@@ -1835,6 +1843,7 @@ static int scoutfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mod
}
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
si->crtime = inode->i_mtime;
insert_inode_hash(inode);
ihold(inode); /* need to update inode modifications in d_tmpfile */
d_tmpfile(dentry, inode);
+3 -3
View File
@@ -81,7 +81,7 @@ static struct dentry *scoutfs_fh_to_dentry(struct super_block *sb,
trace_scoutfs_fh_to_dentry(sb, fh_type, sfid);
if (scoutfs_valid_fileid(fh_type))
inode = scoutfs_iget(sb, le64_to_cpu(sfid->ino), 0);
inode = scoutfs_iget(sb, le64_to_cpu(sfid->ino));
return d_obtain_alias(inode);
}
@@ -100,7 +100,7 @@ static struct dentry *scoutfs_fh_to_parent(struct super_block *sb,
if (scoutfs_valid_fileid(fh_type) &&
fh_type == FILEID_SCOUTFS_WITH_PARENT)
inode = scoutfs_iget(sb, le64_to_cpu(sfid->parent_ino), 0);
inode = scoutfs_iget(sb, le64_to_cpu(sfid->parent_ino));
return d_obtain_alias(inode);
}
@@ -123,7 +123,7 @@ static struct dentry *scoutfs_get_parent(struct dentry *child)
scoutfs_dir_free_backref_path(sb, &list);
trace_scoutfs_get_parent(sb, inode, ino);
inode = scoutfs_iget(sb, ino, 0);
inode = scoutfs_iget(sb, ino);
return d_obtain_alias(inode);
}
+1 -1
View File
@@ -376,7 +376,7 @@ int scoutfs_fence_wait_fenced(struct super_block *sb, long timeout_jiffies)
bool error;
long ret;
ret = wait_event_timeout(fi->waitq, all_fenced(fi, &error), timeout_jiffies);
ret = wait_event_interruptible_timeout(fi->waitq, all_fenced(fi, &error), timeout_jiffies);
if (ret == 0)
ret = -ETIMEDOUT;
else if (ret > 0)
+3 -8
View File
@@ -747,6 +747,9 @@ int scoutfs_forest_setup(struct super_block *sb)
goto out;
}
queue_delayed_work(finf->workq, &finf->log_merge_dwork,
msecs_to_jiffies(LOG_MERGE_DELAY_MS));
ret = 0;
out:
if (ret)
@@ -755,14 +758,6 @@ out:
return 0;
}
void scoutfs_forest_start(struct super_block *sb)
{
DECLARE_FOREST_INFO(sb, finf);
queue_delayed_work(finf->workq, &finf->log_merge_dwork,
msecs_to_jiffies(LOG_MERGE_DELAY_MS));
}
void scoutfs_forest_stop(struct super_block *sb)
{
DECLARE_FOREST_INFO(sb, finf);
-1
View File
@@ -39,7 +39,6 @@ void scoutfs_forest_get_btrees(struct super_block *sb,
struct scoutfs_log_trees *lt);
int scoutfs_forest_setup(struct super_block *sb);
void scoutfs_forest_start(struct super_block *sb);
void scoutfs_forest_stop(struct super_block *sb);
void scoutfs_forest_destroy(struct super_block *sb);
+5 -7
View File
@@ -779,7 +779,11 @@ struct scoutfs_super_block {
__le64 seq;
__le64 next_ino;
__le64 total_meta_blocks; /* both static and dynamic */
__le64 first_meta_blkno; /* first dynamically allocated */
__le64 last_meta_blkno;
__le64 total_data_blocks;
__le64 first_data_blkno;
__le64 last_data_blkno;
struct scoutfs_quorum_config qconf;
struct scoutfs_alloc_root meta_alloc[2];
struct scoutfs_alloc_root data_alloc;
@@ -817,7 +821,6 @@ struct scoutfs_super_block {
* online by staging.
*
* XXX
* - otime?
* - compat flags?
* - version?
* - generation?
@@ -841,6 +844,7 @@ struct scoutfs_inode {
struct scoutfs_timespec atime;
struct scoutfs_timespec ctime;
struct scoutfs_timespec mtime;
struct scoutfs_timespec crtime;
};
#define SCOUTFS_INO_FLAG_TRUNCATE 0x1
@@ -986,7 +990,6 @@ enum scoutfs_net_cmd {
SCOUTFS_NET_CMD_GET_VOLOPT,
SCOUTFS_NET_CMD_SET_VOLOPT,
SCOUTFS_NET_CMD_CLEAR_VOLOPT,
SCOUTFS_NET_CMD_RESIZE_DEVICES,
SCOUTFS_NET_CMD_FAREWELL,
SCOUTFS_NET_CMD_UNKNOWN,
};
@@ -1029,11 +1032,6 @@ struct scoutfs_net_roots {
struct scoutfs_btree_root srch_root;
};
struct scoutfs_net_resize_devices {
__le64 new_total_meta_blocks;
__le64 new_total_data_blocks;
};
struct scoutfs_net_lock {
struct scoutfs_key key;
__le64 write_seq;
+85 -82
View File
@@ -59,7 +59,7 @@ struct inode_sb_info {
bool stopped;
spinlock_t writeback_lock;
struct list_head writeback_list;
struct rb_root writeback_inodes;
struct inode_allocator dir_ino_alloc;
struct inode_allocator ino_alloc;
@@ -68,9 +68,6 @@ struct inode_sb_info {
/* serialize multiple inode ->evict trying to delete same ino's items */
spinlock_t deleting_items_lock;
struct list_head deleting_items_list;
struct work_struct iput_work;
struct llist_head iput_llist;
};
#define DECLARE_INODE_SB_INFO(sb, name) \
@@ -95,9 +92,9 @@ static void scoutfs_inode_ctor(void *obj)
atomic64_set(&si->data_waitq.changed, 0);
init_waitqueue_head(&si->data_waitq.waitq);
init_rwsem(&si->xattr_rwsem);
INIT_LIST_HEAD(&si->writeback_entry);
RB_CLEAR_NODE(&si->writeback_node);
scoutfs_lock_init_coverage(&si->ino_lock_cov);
atomic_set(&si->iput_count, 0);
atomic_set(&si->inv_iput_count, 0);
inode_init_once(&si->inode);
}
@@ -121,14 +118,47 @@ static void scoutfs_i_callback(struct rcu_head *head)
kmem_cache_free(scoutfs_inode_cachep, SCOUTFS_I(inode));
}
static void insert_writeback_inode(struct inode_sb_info *inf,
struct scoutfs_inode_info *ins)
{
struct rb_root *root = &inf->writeback_inodes;
struct rb_node **node = &root->rb_node;
struct rb_node *parent = NULL;
struct scoutfs_inode_info *si;
while (*node) {
parent = *node;
si = container_of(*node, struct scoutfs_inode_info,
writeback_node);
if (ins->ino < si->ino)
node = &(*node)->rb_left;
else if (ins->ino > si->ino)
node = &(*node)->rb_right;
else
BUG();
}
rb_link_node(&ins->writeback_node, parent, node);
rb_insert_color(&ins->writeback_node, root);
}
static void remove_writeback_inode(struct inode_sb_info *inf,
struct scoutfs_inode_info *si)
{
if (!RB_EMPTY_NODE(&si->writeback_node)) {
rb_erase(&si->writeback_node, &inf->writeback_inodes);
RB_CLEAR_NODE(&si->writeback_node);
}
}
void scoutfs_destroy_inode(struct inode *inode)
{
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
DECLARE_INODE_SB_INFO(inode->i_sb, inf);
spin_lock(&inf->writeback_lock);
if (!list_empty(&si->writeback_entry))
list_del_init(&si->writeback_entry);
remove_writeback_inode(inf, SCOUTFS_I(inode));
spin_unlock(&inf->writeback_lock);
scoutfs_lock_del_coverage(inode->i_sb, &si->ino_lock_cov);
@@ -232,6 +262,8 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
si->next_readdir_pos = le64_to_cpu(cinode->next_readdir_pos);
si->next_xattr_id = le64_to_cpu(cinode->next_xattr_id);
si->flags = le32_to_cpu(cinode->flags);
si->crtime.tv_sec = le64_to_cpu(cinode->crtime.sec);
si->crtime.tv_nsec = le32_to_cpu(cinode->crtime.nsec);
/*
* i_blocks is initialized from online and offline and is then
@@ -662,14 +694,14 @@ struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino)
return ilookup5(sb, ino, scoutfs_iget_test, &ino);
}
struct inode *scoutfs_iget(struct super_block *sb, u64 ino, int lkf)
struct inode *scoutfs_iget(struct super_block *sb, u64 ino)
{
struct scoutfs_lock *lock = NULL;
struct scoutfs_inode_info *si;
struct inode *inode;
int ret;
ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_READ, lkf, ino, &lock);
ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_READ, 0, ino, &lock);
if (ret)
return ERR_PTR(ret);
@@ -734,6 +766,9 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
cinode->next_readdir_pos = cpu_to_le64(si->next_readdir_pos);
cinode->next_xattr_id = cpu_to_le64(si->next_xattr_id);
cinode->flags = cpu_to_le32(si->flags);
cinode->crtime.sec = cpu_to_le64(si->crtime.tv_sec);
cinode->crtime.nsec = cpu_to_le32(si->crtime.tv_nsec);
memset(cinode->crtime.__pad, 0, sizeof(cinode->crtime.__pad));
}
/*
@@ -1627,6 +1662,11 @@ void scoutfs_evict_inode(struct inode *inode)
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
scoutfs_unlock(sb, orph_lock, SCOUTFS_LOCK_WRITE_ONLY);
}
if (ret == -ERESTARTSYS) {
/* can be in task with pending, could be found as orphan */
scoutfs_inc_counter(sb, inode_evict_intr);
ret = 0;
}
if (ret < 0) {
scoutfs_err(sb, "error %d while checking to delete inode nr %llu, it might linger.",
ret, ino);
@@ -1664,49 +1704,6 @@ int scoutfs_drop_inode(struct inode *inode)
generic_drop_inode(inode);
}
static void iput_worker(struct work_struct *work)
{
struct inode_sb_info *inf = container_of(work, struct inode_sb_info, iput_work);
struct scoutfs_inode_info *si;
struct scoutfs_inode_info *tmp;
struct llist_node *inodes;
bool more;
inodes = llist_del_all(&inf->iput_llist);
llist_for_each_entry_safe(si, tmp, inodes, iput_llnode) {
do {
more = atomic_dec_return(&si->iput_count) > 0;
iput(&si->inode);
} while (more);
}
}
/*
* Final iput can get into evict and perform final inode deletion which
* can delete a lot of items spanning multiple cluster locks and
* transactions. It should be understood as a heavy high level
* operation, more like file writing and less like dropping a refcount.
*
* Unfortunately we also have incentives to use igrab/iput from internal
* contexts that have no business doing that work, like lock
* invalidation or dirty inode writeback during transaction commit.
*
* In those cases we can kick iput off to background work context.
* Nothing stops multiple puts of an inode before the work runs so we
* can track multiple puts in flight.
*/
void scoutfs_inode_queue_iput(struct inode *inode)
{
DECLARE_INODE_SB_INFO(inode->i_sb, inf);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
if (atomic_inc_return(&si->iput_count) == 1)
llist_add(&si->iput_llnode, &inf->iput_llist);
smp_wmb(); /* count and list visible before work executes */
schedule_work(&inf->iput_work);
}
/*
* All mounts are performing this work concurrently. We introduce
* significant jitter between them to try and keep them from all
@@ -1822,7 +1819,7 @@ static void inode_orphan_scan_worker(struct work_struct *work)
}
/* try to cached and evict unused inode to delete, can be racing */
inode = scoutfs_iget(sb, ino, 0);
inode = scoutfs_iget(sb, ino);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
if (ret == -ENOENT)
@@ -1851,33 +1848,30 @@ out:
* ourselves in knots trying to call through the high level vfs sync
* methods.
*
* File data block allocations tend to advance through free space so we
* add the inode to the end of the list to roughly encourage sequential
* IO.
*
* This is called by writers who hold the inode and transaction. The
* inode is removed from the list by evict->destroy if it's unlinked
* during the transaction or by committing the transaction. Pruning the
* icache won't try to evict the inode as long as it has dirty buffers.
* inode's presence in the rbtree is removed by destroy_inode, prevented
* by the inode hold, and by committing the transaction, which is
* prevented by holding the transaction. The inode can only go from
* empty to on the rbtree while we're here.
*/
void scoutfs_inode_queue_writeback(struct inode *inode)
{
DECLARE_INODE_SB_INFO(inode->i_sb, inf);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
if (list_empty(&si->writeback_entry)) {
if (RB_EMPTY_NODE(&si->writeback_node)) {
spin_lock(&inf->writeback_lock);
if (list_empty(&si->writeback_entry))
list_add_tail(&si->writeback_entry, &inf->writeback_list);
if (RB_EMPTY_NODE(&si->writeback_node))
insert_writeback_inode(inf, si);
spin_unlock(&inf->writeback_lock);
}
}
/*
* Walk our dirty inodes and either start dirty page writeback or wait
* for writeback to complete.
* Walk our dirty inodes in ino order and either start dirty page
* writeback or wait for writeback to complete.
*
* This is called by transaction committing so other writers are
* This is called by transaction commiting so other writers are
* excluded. We're still very careful to iterate over the tree while it
* and the inodes could be changing.
*
@@ -1890,19 +1884,29 @@ int scoutfs_inode_walk_writeback(struct super_block *sb, bool write)
{
DECLARE_INODE_SB_INFO(sb, inf);
struct scoutfs_inode_info *si;
struct scoutfs_inode_info *tmp;
struct rb_node *node;
struct inode *inode;
struct inode *defer_iput = NULL;
int ret;
spin_lock(&inf->writeback_lock);
list_for_each_entry_safe(si, tmp, &inf->writeback_list, writeback_entry) {
node = rb_first(&inf->writeback_inodes);
while (node) {
si = container_of(node, struct scoutfs_inode_info,
writeback_node);
node = rb_next(node);
inode = igrab(&si->inode);
if (!inode)
continue;
spin_unlock(&inf->writeback_lock);
if (defer_iput) {
iput(defer_iput);
defer_iput = NULL;
}
if (write)
ret = filemap_fdatawrite(inode->i_mapping);
else
@@ -1910,28 +1914,28 @@ int scoutfs_inode_walk_writeback(struct super_block *sb, bool write)
trace_scoutfs_inode_walk_writeback(sb, scoutfs_ino(inode),
write, ret);
if (ret) {
scoutfs_inode_queue_iput(inode);
iput(inode);
goto out;
}
spin_lock(&inf->writeback_lock);
/* restore tmp after reacquiring lock */
if (WARN_ON_ONCE(list_empty(&si->writeback_entry)))
tmp = list_first_entry(&inf->writeback_list, struct scoutfs_inode_info,
writeback_entry);
if (WARN_ON_ONCE(RB_EMPTY_NODE(&si->writeback_node)))
node = rb_first(&inf->writeback_inodes);
else
tmp = list_next_entry(si, writeback_entry);
node = rb_next(&si->writeback_node);
if (!write)
list_del_init(&si->writeback_entry);
remove_writeback_inode(inf, si);
scoutfs_inode_queue_iput(inode);
/* avoid iput->destroy lock deadlock */
defer_iput = inode;
}
spin_unlock(&inf->writeback_lock);
out:
if (defer_iput)
iput(defer_iput);
return ret;
}
@@ -1946,14 +1950,12 @@ int scoutfs_inode_setup(struct super_block *sb)
inf->sb = sb;
spin_lock_init(&inf->writeback_lock);
INIT_LIST_HEAD(&inf->writeback_list);
inf->writeback_inodes = RB_ROOT;
spin_lock_init(&inf->dir_ino_alloc.lock);
spin_lock_init(&inf->ino_alloc.lock);
INIT_DELAYED_WORK(&inf->orphan_scan_dwork, inode_orphan_scan_worker);
spin_lock_init(&inf->deleting_items_lock);
INIT_LIST_HEAD(&inf->deleting_items_list);
INIT_WORK(&inf->iput_work, iput_worker);
init_llist_head(&inf->iput_llist);
sbi->inode_sb_info = inf;
@@ -1965,11 +1967,12 @@ int scoutfs_inode_setup(struct super_block *sb)
* many other subsystems like networking and the server. We only kick
* it off once everything is ready.
*/
void scoutfs_inode_start(struct super_block *sb)
int scoutfs_inode_start(struct super_block *sb)
{
DECLARE_INODE_SB_INFO(sb, inf);
schedule_orphan_dwork(inf);
return 0;
}
void scoutfs_inode_stop(struct super_block *sb)
+6 -6
View File
@@ -20,6 +20,7 @@ struct scoutfs_inode_info {
u64 online_blocks;
u64 offline_blocks;
u32 flags;
struct timespec crtime;
/*
* Protects per-inode extent items, most particularly readers
@@ -49,14 +50,14 @@ struct scoutfs_inode_info {
struct scoutfs_per_task pt_data_lock;
struct scoutfs_data_waitq data_waitq;
struct rw_semaphore xattr_rwsem;
struct list_head writeback_entry;
struct rb_node writeback_node;
struct scoutfs_lock_coverage ino_lock_cov;
/* drop if i_count hits 0, allows drop while invalidate holds coverage */
bool drop_invalidated;
struct llist_node iput_llnode;
atomic_t iput_count;
struct llist_node inv_iput_llnode;
atomic_t inv_iput_count;
struct inode inode;
};
@@ -75,9 +76,8 @@ struct inode *scoutfs_alloc_inode(struct super_block *sb);
void scoutfs_destroy_inode(struct inode *inode);
int scoutfs_drop_inode(struct inode *inode);
void scoutfs_evict_inode(struct inode *inode);
void scoutfs_inode_queue_iput(struct inode *inode);
struct inode *scoutfs_iget(struct super_block *sb, u64 ino, int lkf);
struct inode *scoutfs_iget(struct super_block *sb, u64 ino);
struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino);
void scoutfs_inode_init_index_key(struct scoutfs_key *key, u8 type, u64 major,
@@ -132,7 +132,7 @@ void scoutfs_inode_exit(void);
int scoutfs_inode_init(void);
int scoutfs_inode_setup(struct super_block *sb);
void scoutfs_inode_start(struct super_block *sb);
int scoutfs_inode_start(struct super_block *sb);
void scoutfs_inode_stop(struct super_block *sb);
void scoutfs_inode_destroy(struct super_block *sb);
+11 -49
View File
@@ -541,6 +541,7 @@ out:
static long scoutfs_ioc_stat_more(struct file *file, unsigned long arg)
{
struct inode *inode = file_inode(file);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct scoutfs_ioctl_stat_more stm;
if (get_user(stm.valid_bytes, (__u64 __user *)arg))
@@ -552,6 +553,8 @@ static long scoutfs_ioc_stat_more(struct file *file, unsigned long arg)
stm.data_seq = scoutfs_inode_data_seq(inode);
stm.data_version = scoutfs_inode_data_version(inode);
scoutfs_inode_get_onoff(inode, &stm.online_blocks, &stm.offline_blocks);
stm.crtime_sec = si->crtime.tv_sec;
stm.crtime_nsec = si->crtime.tv_nsec;
if (copy_to_user((void __user *)arg, &stm, stm.valid_bytes))
return -EFAULT;
@@ -617,6 +620,7 @@ static long scoutfs_ioc_data_waiting(struct file *file, unsigned long arg)
static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
{
struct inode *inode = file->f_inode;
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct scoutfs_ioctl_setattr_more __user *usm = (void __user *)arg;
struct scoutfs_ioctl_setattr_more sm;
@@ -685,6 +689,8 @@ static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
i_size_write(inode, sm.i_size);
inode->i_ctime.tv_sec = sm.ctime_sec;
inode->i_ctime.tv_nsec = sm.ctime_nsec;
si->crtime.tv_sec = sm.crtime_sec;
si->crtime.tv_nsec = sm.crtime_nsec;
scoutfs_update_inode_item(inode, lock, &ind_locks);
ret = 0;
@@ -867,21 +873,13 @@ static long scoutfs_ioc_statfs_more(struct file *file, unsigned long arg)
{
struct super_block *sb = file_inode(file)->i_sb;
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_super_block *super;
struct scoutfs_super_block *super = &sbi->super;
struct scoutfs_ioctl_statfs_more sfm;
int ret;
if (get_user(sfm.valid_bytes, (__u64 __user *)arg))
return -EFAULT;
super = kzalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
if (!super)
return -ENOMEM;
ret = scoutfs_read_super(sb, super);
if (ret)
goto out;
sfm.valid_bytes = min_t(u64, sfm.valid_bytes,
sizeof(struct scoutfs_ioctl_statfs_more));
sfm.fsid = le64_to_cpu(super->hdr.fsid);
@@ -892,15 +890,12 @@ static long scoutfs_ioc_statfs_more(struct file *file, unsigned long arg)
ret = scoutfs_client_get_last_seq(sb, &sfm.committed_seq);
if (ret)
goto out;
return ret;
if (copy_to_user((void __user *)arg, &sfm, sfm.valid_bytes))
ret = -EFAULT;
else
ret = 0;
out:
kfree(super);
return ret;
return -EFAULT;
return 0;
}
struct copy_alloc_detail_args {
@@ -1004,37 +999,6 @@ out:
return ret;
}
static long scoutfs_ioc_resize_devices(struct file *file, unsigned long arg)
{
struct super_block *sb = file_inode(file)->i_sb;
struct scoutfs_ioctl_resize_devices __user *urd = (void __user *)arg;
struct scoutfs_ioctl_resize_devices rd;
struct scoutfs_net_resize_devices nrd;
int ret;
if (!(file->f_mode & FMODE_READ)) {
ret = -EBADF;
goto out;
}
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
goto out;
}
if (copy_from_user(&rd, urd, sizeof(rd))) {
ret = -EFAULT;
goto out;
}
nrd.new_total_meta_blocks = cpu_to_le64(rd.new_total_meta_blocks);
nrd.new_total_data_blocks = cpu_to_le64(rd.new_total_data_blocks);
ret = scoutfs_client_resize_devices(sb, &nrd);
out:
return ret;
}
long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
@@ -1064,8 +1028,6 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return scoutfs_ioc_alloc_detail(file, arg);
case SCOUTFS_IOC_MOVE_BLOCKS:
return scoutfs_ioc_move_blocks(file, arg);
case SCOUTFS_IOC_RESIZE_DEVICES:
return scoutfs_ioc_resize_devices(file, arg);
}
return -ENOTTY;
+5 -9
View File
@@ -232,6 +232,9 @@ struct scoutfs_ioctl_stat_more {
__u64 data_version;
__u64 online_blocks;
__u64 offline_blocks;
__u64 crtime_sec;
__u32 crtime_nsec;
__u8 _pad[4];
};
#define SCOUTFS_IOC_STAT_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 5, \
@@ -275,7 +278,8 @@ struct scoutfs_ioctl_setattr_more {
__u64 flags;
__u64 ctime_sec;
__u32 ctime_nsec;
__u8 _pad[4];
__u32 crtime_nsec;
__u64 crtime_sec;
};
#define SCOUTFS_IOC_SETATTR_MORE_OFFLINE (1 << 0)
@@ -477,12 +481,4 @@ struct scoutfs_ioctl_move_blocks {
#define SCOUTFS_IOC_MOVE_BLOCKS _IOR(SCOUTFS_IOCTL_MAGIC, 13, \
struct scoutfs_ioctl_move_blocks)
struct scoutfs_ioctl_resize_devices {
__u64 new_total_meta_blocks;
__u64 new_total_data_blocks;
};
#define SCOUTFS_IOC_RESIZE_DEVICES \
_IOR(SCOUTFS_IOCTL_MAGIC, 14, struct scoutfs_ioctl_resize_devices)
#endif
+39 -10
View File
@@ -89,6 +89,8 @@ struct lock_info {
struct work_struct shrink_work;
struct list_head shrink_list;
atomic64_t next_refresh_gen;
struct work_struct inv_iput_work;
struct llist_head inv_iput_llist;
struct dentry *tseq_dentry;
struct scoutfs_tseq_tree tseq_tree;
@@ -124,6 +126,34 @@ static bool lock_modes_match(int granted, int requested)
requested == SCOUTFS_LOCK_READ);
}
/*
* Final iput can get into evict and perform final inode deletion which
* can delete a lot of items under locks and transactions. We really
* don't want to be doing all that in an iput during invalidation. When
* invalidation sees that iput might perform final deletion it puts them
* on a list and queues this work.
*
* Nothing stops multiple puts for multiple invalidations of an inode
* before the work runs so we can track multiple puts in flight.
*/
static void lock_inv_iput_worker(struct work_struct *work)
{
struct lock_info *linfo = container_of(work, struct lock_info, inv_iput_work);
struct scoutfs_inode_info *si;
struct scoutfs_inode_info *tmp;
struct llist_node *inodes;
bool more;
inodes = llist_del_all(&linfo->inv_iput_llist);
llist_for_each_entry_safe(si, tmp, inodes, inv_iput_llnode) {
do {
more = atomic_dec_return(&si->inv_iput_count) > 0;
iput(&si->inode);
} while (more);
}
}
/*
* Invalidate cached data associated with an inode whose lock is going
* away.
@@ -164,8 +194,11 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
if (scoutfs_lock_is_covered(sb, &si->ino_lock_cov) && inode->i_nlink > 0) {
iput(inode);
} else {
/* defer iput to work context so we don't evict inodes from invalidation */
scoutfs_inode_queue_iput(inode);
/* defer iput to work context so we don't evict inodes from invalidation */
if (atomic_inc_return(&si->inv_iput_count) == 1)
llist_add(&si->inv_iput_llnode, &linfo->inv_iput_llist);
smp_wmb(); /* count and list visible before work executes */
queue_work(linfo->workq, &linfo->inv_iput_work);
}
}
}
@@ -1095,14 +1128,8 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
trace_scoutfs_lock_wait(sb, lock);
if (flags & SCOUTFS_LKF_INTERRUPTIBLE) {
ret = wait_event_interruptible(lock->waitq,
lock_wait_cond(sb, lock, mode));
} else {
wait_event(lock->waitq, lock_wait_cond(sb, lock, mode));
ret = 0;
}
ret = wait_event_interruptible(lock->waitq,
lock_wait_cond(sb, lock, mode));
spin_lock(&linfo->lock);
if (ret)
break;
@@ -1762,6 +1789,8 @@ int scoutfs_lock_setup(struct super_block *sb)
INIT_WORK(&linfo->shrink_work, lock_shrink_worker);
INIT_LIST_HEAD(&linfo->shrink_list);
atomic64_set(&linfo->next_refresh_gen, 0);
INIT_WORK(&linfo->inv_iput_work, lock_inv_iput_worker);
init_llist_head(&linfo->inv_iput_llist);
scoutfs_tseq_tree_init(&linfo->tseq_tree, lock_tseq_show);
sbi->lock_info = linfo;
+1 -2
View File
@@ -6,8 +6,7 @@
#define SCOUTFS_LKF_REFRESH_INODE 0x01 /* update stale inode from item */
#define SCOUTFS_LKF_NONBLOCK 0x02 /* only use already held locks */
#define SCOUTFS_LKF_INTERRUPTIBLE 0x04 /* pending signals return -ERESTARTSYS */
#define SCOUTFS_LKF_INVALID (~((SCOUTFS_LKF_INTERRUPTIBLE << 1) - 1))
#define SCOUTFS_LKF_INVALID (~((SCOUTFS_LKF_NONBLOCK << 1) - 1))
#define SCOUTFS_LOCK_NR_MODES SCOUTFS_LOCK_INVALID
+13 -9
View File
@@ -1486,7 +1486,8 @@ int scoutfs_net_connect(struct super_block *sb,
struct scoutfs_net_connection *conn,
struct sockaddr_in *sin, unsigned long timeout_ms)
{
int ret = 0;
int error = 0;
int ret;
spin_lock(&conn->lock);
conn->connect_sin = *sin;
@@ -1494,8 +1495,10 @@ int scoutfs_net_connect(struct super_block *sb,
spin_unlock(&conn->lock);
queue_work(conn->workq, &conn->connect_work);
wait_event(conn->waitq, connect_result(conn, &ret));
return ret;
ret = wait_event_interruptible(conn->waitq,
connect_result(conn, &error));
return ret ?: error;
}
static void set_valid_greeting(struct scoutfs_net_connection *conn)
@@ -1631,10 +1634,10 @@ restart:
conn->next_send_id = reconn->next_send_id;
atomic64_set(&conn->recv_seq, atomic64_read(&reconn->recv_seq));
/* reconn should be idle while in reconn_wait */
/* greeting response/ack will be on conn send queue */
BUG_ON(!list_empty(&reconn->send_queue));
/* queued greeting response is racing, can be in send or resend queue */
list_splice_tail_init(&reconn->resend_queue, &conn->resend_queue);
BUG_ON(!list_empty(&conn->resend_queue));
list_splice_init(&reconn->resend_queue, &conn->resend_queue);
/* new conn info is unused, swap, old won't call down */
swap(conn->info, reconn->info);
@@ -1798,10 +1801,11 @@ int scoutfs_net_sync_request(struct super_block *sb,
ret = scoutfs_net_submit_request(sb, conn, cmd, arg, arg_len,
sync_response, &sreq, &id);
if (ret == 0) {
wait_for_completion(&sreq.comp);
ret = wait_for_completion_interruptible(&sreq.comp);
if (ret == -ERESTARTSYS)
scoutfs_net_cancel_request(sb, conn, cmd, id);
else
ret = sreq.error;
}
return ret;
}
+20 -26
View File
@@ -97,7 +97,7 @@ struct quorum_host_msg {
struct last_msg {
struct quorum_host_msg msg;
ktime_t ts;
struct timespec64 ts;
};
enum quorum_role { FOLLOWER, CANDIDATE, LEADER };
@@ -209,7 +209,7 @@ static void send_msg_members(struct super_block *sb, int type, u64 term,
DECLARE_QUORUM_INFO(sb, qinf);
struct mount_options *opts = &SCOUTFS_SB(sb)->opts;
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
ktime_t now;
struct timespec64 ts;
int i;
struct scoutfs_quorum_message qmes = {
@@ -235,6 +235,7 @@ static void send_msg_members(struct super_block *sb, int type, u64 term,
qmes.crc = quorum_message_crc(&qmes);
ts = ktime_to_timespec64(ktime_get());
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
if (!quorum_slot_present(super, i) ||
@@ -242,13 +243,12 @@ static void send_msg_members(struct super_block *sb, int type, u64 term,
continue;
scoutfs_quorum_slot_sin(super, i, &sin);
now = ktime_get();
kernel_sendmsg(qinf->sock, &mh, &kv, 1, kv.iov_len);
spin_lock(&qinf->show_lock);
qinf->last_send[i].msg.term = term;
qinf->last_send[i].msg.type = type;
qinf->last_send[i].ts = now;
qinf->last_send[i].ts = ts;
spin_unlock(&qinf->show_lock);
if (i == only)
@@ -308,8 +308,6 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
if (ret < 0)
return ret;
now = ktime_get();
if (ret != sizeof(qmes) ||
qmes.crc != quorum_message_crc(&qmes) ||
qmes.fsid != super->hdr.fsid ||
@@ -329,7 +327,7 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
spin_lock(&qinf->show_lock);
qinf->last_recv[msg->from].msg = *msg;
qinf->last_recv[msg->from].ts = now;
qinf->last_recv[msg->from].ts = ktime_to_timespec64(ktime_get());
spin_unlock(&qinf->show_lock);
return 0;
@@ -558,8 +556,10 @@ out:
ret = err;
}
if (ret < 0)
if (ret < 0) {
scoutfs_err(sb, "error %d attempting to find and fence previous leaders", ret);
scoutfs_inc_counter(sb, quorum_fence_error);
}
return ret;
}
@@ -610,7 +610,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
if (ret < 0)
goto out;
while (!(qinf->shutdown || scoutfs_forcing_unmount(sb))) {
while (!qinf->shutdown) {
ret = recv_msg(sb, &msg, qst.timeout);
if (ret < 0) {
@@ -733,15 +733,13 @@ static void scoutfs_quorum_worker(struct work_struct *work)
ret = scoutfs_server_start(sb, qst.term);
if (ret < 0) {
clear_bit(QINF_FLAG_SERVER, &qinf->flags);
scoutfs_err(sb, "server startup failed with %d", ret);
/* store our increased term */
err = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP, qst.term,
true);
if (err < 0) {
if (err < 0 && ret == 0)
ret = err;
goto out;
}
ret = 0;
continue;
goto out;
}
}
@@ -791,7 +789,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_END, qst.term, true);
out:
if (ret < 0) {
scoutfs_err(sb, "quorum service saw error %d, shutting down. This mount is no longer participating in quorum. It should be remounted to restore service.",
scoutfs_err(sb, "quorum service saw error %d, shutting down. Cluster will be degraded until this slot is remounted to restart the quorum service",
ret);
}
}
@@ -917,7 +915,6 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
struct quorum_status qst;
struct last_msg last;
struct timespec64 ts;
const ktime_t now = ktime_get();
size_t size;
int ret;
int i;
@@ -939,9 +936,9 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
qst.vote_for);
snprintf_ret(buf, size, &ret, "vote_bits 0x%lx (count %lu)\n",
qst.vote_bits, hweight_long(qst.vote_bits));
ts = ktime_to_timespec64(ktime_sub(qst.timeout, now));
snprintf_ret(buf, size, &ret, "timeout_in_secs %lld.%09u\n",
(s64)ts.tv_sec, (int)ts.tv_nsec);
ts = ktime_to_timespec64(qst.timeout);
snprintf_ret(buf, size, &ret, "timeout %llu.%u\n",
(u64)ts.tv_sec, (int)ts.tv_nsec);
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
spin_lock(&qinf->show_lock);
@@ -951,11 +948,10 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
if (last.msg.term == 0)
continue;
ts = ktime_to_timespec64(ktime_sub(now, last.ts));
snprintf_ret(buf, size, &ret,
"last_send to %u term %llu type %u secs_since %lld.%09u\n",
"last_send to %u term %llu type %u ts %llu.%u\n",
i, last.msg.term, last.msg.type,
(s64)ts.tv_sec, (int)ts.tv_nsec);
(u64)last.ts.tv_sec, (int)last.ts.tv_nsec);
}
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
@@ -965,12 +961,10 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
if (last.msg.term == 0)
continue;
ts = ktime_to_timespec64(ktime_sub(now, last.ts));
snprintf_ret(buf, size, &ret,
"last_recv from %u term %llu type %u secs_since %lld.%09u\n",
"last_recv from %u term %llu type %u ts %llu.%u\n",
i, last.msg.term, last.msg.type,
(s64)ts.tv_sec, (int)ts.tv_nsec);
(u64)last.ts.tv_sec, (int)last.ts.tv_nsec);
}
return ret;
+21 -142
View File
@@ -323,6 +323,7 @@ static void scoutfs_server_commit_func(struct work_struct *work)
struct commit_waiter *cw;
struct commit_waiter *pos;
struct llist_node *node;
u64 reserved;
int ret;
trace_scoutfs_server_commit_work_enter(sb, 0, 0);
@@ -388,19 +389,16 @@ static void scoutfs_server_commit_func(struct work_struct *work)
server->other_freed = &super->server_meta_freed[server->other_ind];
/*
* get_log_trees sets ALLOC_LOW when its allocator drops below
* the reserved blocks after having filled the log trees's avail
* allocator during its transaction. To avoid prematurely
* setting the low flag and causing enospc we make sure that the
* next transaction's meta_avail has 2x the reserved blocks so
* that it can consume a full reserved amount and still have
* enough to avoid enospc. We swap to freed if avail is under
* the buffer and freed is larger.
* The reserved metadata blocks includes the max size of
* outstanding allocators and a server transaction could be
* asked to refill all those allocators from meta_avail. If our
* meta_avail falls below the reserved count, and freed is still
* above it, then swap so that we don't start returning enospc
* until we're truly low.
*/
if ((le64_to_cpu(server->meta_avail->total_len) <
(scoutfs_server_reserved_meta_blocks(sb) * 2)) &&
(le64_to_cpu(server->meta_freed->total_len) >
le64_to_cpu(server->meta_avail->total_len)))
reserved = scoutfs_server_reserved_meta_blocks(sb);
if (le64_to_cpu(server->meta_avail->total_len) <= reserved &&
le64_to_cpu(server->meta_freed->total_len) > reserved)
swap(server->meta_avail, server->meta_freed);
ret = 0;
@@ -2357,25 +2355,15 @@ static int open_ino_map_response(struct super_block *sb, struct scoutfs_net_conn
return scoutfs_omap_server_handle_response(sb, rid, resp);
}
/*
* The server is sending an omap requests to all the clients it thought
* were connected when it received a request from another client.
* This send can race with the client's connection being removed. We
* can drop those sends on the floor and mask ENOTCONN. The client's rid
* will soon be removed from the request which will be correctly handled.
*/
/* The server is sending an omap request to the client */
int scoutfs_server_send_omap_request(struct super_block *sb, u64 rid,
struct scoutfs_open_ino_map_args *args)
{
struct server_info *server = SCOUTFS_SB(sb)->server_info;
int ret;
ret = scoutfs_net_submit_request_node(sb, server->conn, rid, SCOUTFS_NET_CMD_OPEN_INO_MAP,
return scoutfs_net_submit_request_node(sb, server->conn, rid, SCOUTFS_NET_CMD_OPEN_INO_MAP,
args, sizeof(*args),
open_ino_map_response, NULL, NULL);
if (ret == -ENOTCONN)
ret = 0;
return ret;
}
/*
@@ -2565,103 +2553,6 @@ out:
return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
}
static u64 device_blocks(struct block_device *bdev, int shift)
{
return i_size_read(bdev->bd_inode) >> shift;
}
static int server_resize_devices(struct super_block *sb, struct scoutfs_net_connection *conn,
u8 cmd, u64 id, void *arg, u16 arg_len)
{
DECLARE_SERVER_INFO(sb, server);
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
struct scoutfs_net_resize_devices *nrd;
u64 meta_tot;
u64 meta_start;
u64 meta_len;
u64 data_tot;
u64 data_start;
u64 data_len;
int ret;
int err;
if (arg_len != sizeof(struct scoutfs_net_resize_devices)) {
ret = -EINVAL;
goto out;
}
nrd = arg;
meta_tot = le64_to_cpu(nrd->new_total_meta_blocks);
data_tot = le64_to_cpu(nrd->new_total_data_blocks);
scoutfs_server_hold_commit(sb);
mutex_lock(&server->alloc_mutex);
if (meta_tot == le64_to_cpu(super->total_meta_blocks))
meta_tot = 0;
if (data_tot == le64_to_cpu(super->total_data_blocks))
data_tot = 0;
if (!meta_tot && !data_tot) {
ret = 0;
goto unlock;
}
/* we don't support shrinking */
if ((meta_tot && (meta_tot < le64_to_cpu(super->total_meta_blocks))) ||
(data_tot && (data_tot < le64_to_cpu(super->total_data_blocks)))) {
ret = -EINVAL;
goto unlock;
}
/* must be within devices */
if ((meta_tot > device_blocks(sbi->meta_bdev, SCOUTFS_BLOCK_LG_SHIFT)) ||
(data_tot > device_blocks(sb->s_bdev, SCOUTFS_BLOCK_SM_SHIFT))) {
ret = -EINVAL;
goto unlock;
}
/* extents are only used if _tot is set */
meta_start = le64_to_cpu(super->total_meta_blocks);
meta_len = meta_tot - meta_start;
data_start = le64_to_cpu(super->total_data_blocks);
data_len = data_tot - data_start;
if (meta_tot) {
ret = scoutfs_alloc_insert(sb, &server->alloc, &server->wri,
server->meta_avail, meta_start, meta_len);
if (ret < 0)
goto unlock;
}
if (data_tot) {
ret = scoutfs_alloc_insert(sb, &server->alloc, &server->wri,
&super->data_alloc, data_start, data_len);
if (ret < 0) {
if (meta_tot) {
err = scoutfs_alloc_remove(sb, &server->alloc, &server->wri,
server->meta_avail, meta_start,
meta_len);
WARN_ON_ONCE(err); /* btree blocks are dirty.. really unlikely? */
}
goto unlock;
}
}
if (meta_tot)
super->total_meta_blocks = cpu_to_le64(meta_tot);
if (data_tot)
super->total_data_blocks = cpu_to_le64(data_tot);
ret = 0;
unlock:
mutex_unlock(&server->alloc_mutex);
ret = scoutfs_server_apply_commit(sb, ret);
out:
return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
};
static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
{
*key = (struct scoutfs_key) {
@@ -3298,7 +3189,6 @@ static scoutfs_net_request_t server_req_funcs[] = {
[SCOUTFS_NET_CMD_GET_VOLOPT] = server_get_volopt,
[SCOUTFS_NET_CMD_SET_VOLOPT] = server_set_volopt,
[SCOUTFS_NET_CMD_CLEAR_VOLOPT] = server_clear_volopt,
[SCOUTFS_NET_CMD_RESIZE_DEVICES] = server_resize_devices,
[SCOUTFS_NET_CMD_FAREWELL] = server_farewell,
};
@@ -3573,15 +3463,13 @@ static void scoutfs_server_worker(struct work_struct *work)
trace_scoutfs_server_work_enter(sb, 0, 0);
scoutfs_quorum_slot_sin(super, opts->quorum_slot_nr, &sin);
scoutfs_info(sb, "server starting at "SIN_FMT, SIN_ARG(&sin));
/* first make sure no other servers are still running */
ret = scoutfs_quorum_fence_leaders(sb, server->term);
if (ret < 0) {
scoutfs_err(sb, "server error %d attempting to fence previous leaders", ret);
if (ret < 0)
goto out;
}
scoutfs_quorum_slot_sin(super, opts->quorum_slot_nr, &sin);
scoutfs_info(sb, "server setting up at "SIN_FMT, SIN_ARG(&sin));
conn = scoutfs_net_alloc_conn(sb, server_notify_up, server_notify_down,
sizeof(struct server_client_info),
@@ -3602,10 +3490,8 @@ static void scoutfs_server_worker(struct work_struct *work)
/* start up the server subsystems before accepting */
ret = scoutfs_read_super(sb, super);
if (ret < 0) {
scoutfs_err(sb, "server error %d reading super block", ret);
if (ret < 0)
goto shutdown;
}
/* update volume options early, possibly for use during startup */
write_seqcount_begin(&server->volopt_seqcount);
@@ -3643,17 +3529,10 @@ static void scoutfs_server_worker(struct work_struct *work)
}
scoutfs_server_set_seq_if_greater(sb, max_seq);
ret = scoutfs_lock_server_setup(sb, &server->alloc, &server->wri);
if (ret) {
scoutfs_err(sb, "server error %d starting lock server", ret);
ret = scoutfs_lock_server_setup(sb, &server->alloc, &server->wri) ?:
start_recovery(sb);
if (ret)
goto shutdown;
}
ret = start_recovery(sb);
if (ret) {
scoutfs_err(sb, "server error %d starting client recovery", ret);
goto shutdown;
}
/* start accepting connections and processing work */
server->conn = conn;
@@ -3664,7 +3543,7 @@ static void scoutfs_server_worker(struct work_struct *work)
queue_reclaim_work(server, 0);
/* interruptible mostly to avoid stuck messages */
/* wait_event/wake_up provide barriers */
wait_event_interruptible(server->waitq, test_shutting_down(server));
shutdown:
+34 -30
View File
@@ -230,15 +230,7 @@ static void scoutfs_metadev_close(struct super_block *sb)
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
if (sbi->meta_bdev) {
/*
* Some kernels have blkdev_reread_part which calls
* fsync_bdev while holding the bd_mutex which inverts
* the s_umount hold in deactivate_super and blkdev_put
* from kill_sb->put_super.
*/
lockdep_off();
blkdev_put(sbi->meta_bdev, SCOUTFS_META_BDEV_MODE);
lockdep_on();
sbi->meta_bdev = NULL;
}
}
@@ -336,16 +328,28 @@ int scoutfs_write_super(struct super_block *sb,
sizeof(struct scoutfs_super_block));
}
static bool small_bdev(struct super_block *sb, char *which, u64 blocks,
struct block_device *bdev, int shift)
static bool invalid_blkno_limits(struct super_block *sb, char *which,
u64 start, __le64 first, __le64 last,
struct block_device *bdev, int shift)
{
u64 size = (u64)i_size_read(bdev->bd_inode);
u64 count = size >> shift;
u64 blkno;
if (blocks > count) {
scoutfs_err(sb, "super block records %llu %s blocks, but device %u:%u size %llu only allows %llu blocks",
blocks, which, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev), size, count);
if (le64_to_cpu(first) < start) {
scoutfs_err(sb, "super block first %s blkno %llu is within first valid blkno %llu",
which, le64_to_cpu(first), start);
return true;
}
if (le64_to_cpu(first) > le64_to_cpu(last)) {
scoutfs_err(sb, "super block first %s blkno %llu is greater than last %s blkno %llu",
which, le64_to_cpu(first), which, le64_to_cpu(last));
return true;
}
blkno = (i_size_read(bdev->bd_inode) >> shift) - 1;
if (le64_to_cpu(last) > blkno) {
scoutfs_err(sb, "super block last %s blkno %llu is beyond device size last blkno %llu",
which, le64_to_cpu(last), blkno);
return true;
}
@@ -405,10 +409,16 @@ static int scoutfs_read_super_from_bdev(struct super_block *sb,
/* XXX do we want more rigorous invalid super checking? */
if (small_bdev(sb, "metadata", le64_to_cpu(super->total_meta_blocks), sbi->meta_bdev,
SCOUTFS_BLOCK_LG_SHIFT) ||
small_bdev(sb, "data", le64_to_cpu(super->total_data_blocks), sb->s_bdev,
SCOUTFS_BLOCK_SM_SHIFT)) {
if (invalid_blkno_limits(sb, "meta",
SCOUTFS_META_DEV_START_BLKNO,
super->first_meta_blkno,
super->last_meta_blkno, sbi->meta_bdev,
SCOUTFS_BLOCK_LG_SHIFT) ||
invalid_blkno_limits(sb, "data",
SCOUTFS_DATA_DEV_START_BLKNO,
super->first_data_blkno,
super->last_data_blkno, sb->s_bdev,
SCOUTFS_BLOCK_SM_SHIFT)) {
ret = -EINVAL;
}
@@ -612,16 +622,15 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
scoutfs_quorum_setup(sb) ?:
scoutfs_client_setup(sb) ?:
scoutfs_volopt_setup(sb) ?:
scoutfs_srch_setup(sb);
scoutfs_trans_get_log_trees(sb) ?:
scoutfs_srch_setup(sb) ?:
scoutfs_inode_start(sb);
if (ret)
goto out;
/* this interruptible iget lets hung mount be aborted with ctl-c */
inode = scoutfs_iget(sb, SCOUTFS_ROOT_INO, SCOUTFS_LKF_INTERRUPTIBLE);
inode = scoutfs_iget(sb, SCOUTFS_ROOT_INO);
if (IS_ERR(inode)) {
ret = PTR_ERR(inode);
if (ret == -ERESTARTSYS)
ret = -EINTR;
goto out;
}
@@ -631,15 +640,10 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
goto out;
}
/* send requests once iget progress shows we had a server */
ret = scoutfs_trans_get_log_trees(sb) ?:
scoutfs_client_advance_seq(sb, &sbi->trans_seq);
ret = scoutfs_client_advance_seq(sb, &sbi->trans_seq);
if (ret)
goto out;
/* start up background services that use everything else */
scoutfs_inode_start(sb);
scoutfs_forest_start(sb);
scoutfs_trans_restart_sync_deadline(sb);
ret = 0;
out:
+12 -5
View File
@@ -291,7 +291,7 @@ static void queue_trans_work(struct scoutfs_sb_info *sbi)
int scoutfs_trans_sync(struct super_block *sb, int wait)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct write_attempt attempt = { .ret = 0 };
struct write_attempt attempt;
int ret;
@@ -306,8 +306,10 @@ int scoutfs_trans_sync(struct super_block *sb, int wait)
queue_trans_work(sbi);
wait_event(sbi->trans_write_wq, write_attempted(sbi, &attempt));
ret = attempt.ret;
ret = wait_event_interruptible(sbi->trans_write_wq,
write_attempted(sbi, &attempt));
if (ret == 0)
ret = attempt.ret;
return ret;
}
@@ -494,7 +496,9 @@ int scoutfs_hold_trans(struct super_block *sb, bool allocing)
/* wait until the writer work is finished */
if (!inc_holders_unless_writer(tri)) {
dec_journal_info_holders();
wait_event(sbi->trans_hold_wq, holders_no_writer(tri));
ret = wait_event_interruptible(sbi->trans_hold_wq, holders_no_writer(tri));
if (ret < 0)
break;
continue;
}
@@ -510,7 +514,10 @@ int scoutfs_hold_trans(struct super_block *sb, bool allocing)
seq = scoutfs_trans_sample_seq(sb);
release_holders(sb);
queue_trans_work(sbi);
wait_event(sbi->trans_hold_wq, scoutfs_trans_sample_seq(sb) != seq);
ret = wait_event_interruptible(sbi->trans_hold_wq,
scoutfs_trans_sample_seq(sb) != seq);
if (ret < 0)
break;
continue;
}
+1 -1
View File
@@ -40,7 +40,7 @@ t_filter_dmesg()
# mount and unmount spew a bunch
re="$re|scoutfs.*client connected"
re="$re|scoutfs.*client disconnected"
re="$re|scoutfs.*server starting"
re="$re|scoutfs.*server setting up"
re="$re|scoutfs.*server ready"
re="$re|scoutfs.*server accepted"
re="$re|scoutfs.*server closing"
-27
View File
@@ -1,27 +0,0 @@
== make initial small fs
== 0s do nothing
== shrinking fails
resize_devices ioctl failed: Invalid argument (22)
scoutfs: resize-devices failed: Invalid argument (22)
resize_devices ioctl failed: Invalid argument (22)
scoutfs: resize-devices failed: Invalid argument (22)
resize_devices ioctl failed: Invalid argument (22)
scoutfs: resize-devices failed: Invalid argument (22)
== existing sizes do nothing
== growing outside device fails
resize_devices ioctl failed: Invalid argument (22)
scoutfs: resize-devices failed: Invalid argument (22)
resize_devices ioctl failed: Invalid argument (22)
scoutfs: resize-devices failed: Invalid argument (22)
resize_devices ioctl failed: Invalid argument (22)
scoutfs: resize-devices failed: Invalid argument (22)
== resizing meta works
== resizing data works
== shrinking back fails
resize_devices ioctl failed: Invalid argument (22)
scoutfs: resize-devices failed: Invalid argument (22)
resize_devices ioctl failed: Invalid argument (22)
scoutfs: resize-devices failed: Invalid argument (22)
== resizing again does nothing
== resizing to full works
== cleanup extra fs
-1
View File
@@ -29,7 +29,6 @@ lock-conflicting-batch-commit.sh
cross-mount-data-free.sh
persistent-item-vers.sh
setup-error-teardown.sh
resize-devices.sh
fence-and-reclaim.sh
orphan-inodes.sh
mount-unmount-race.sh
+6 -15
View File
@@ -48,9 +48,8 @@ char buf[SZ];
int main(int argc, char **argv)
{
struct scoutfs_ioctl_release rel = {0};
struct scoutfs_ioctl_release ioctl_args = {0};
struct scoutfs_ioctl_move_blocks mb;
struct scoutfs_ioctl_stat_more stm;
struct sub_tmp_info sub_tmps[8];
int tot_size = 0;
char *dest_file;
@@ -112,20 +111,12 @@ int main(int argc, char **argv)
exit(1);
}
// get current data_version after fallocate's size extensions
stm.valid_bytes = sizeof(struct scoutfs_ioctl_stat_more);
ret = ioctl(dest_fd, SCOUTFS_IOC_STAT_MORE, &stm);
if (ret < 0) {
perror("stat_more ioctl error");
exit(1);
}
// release everything in dest file
rel.offset = 0;
rel.length = tot_size;
rel.data_version = stm.data_version;
ioctl_args.offset = 0;
ioctl_args.length = tot_size;
ioctl_args.data_version = 0;
ret = ioctl(dest_fd, SCOUTFS_IOC_RELEASE, &rel);
ret = ioctl(dest_fd, SCOUTFS_IOC_RELEASE, &ioctl_args);
if (ret < 0) {
perror("error");
exit(1);
@@ -139,7 +130,7 @@ int main(int argc, char **argv)
mb.from_off = 0;
mb.len = sub_tmp->length;
mb.to_off = sub_tmp->offset;
mb.data_version = stm.data_version;
mb.data_version = 0;
mb.flags = SCOUTFS_IOC_MB_STAGE;
ret = ioctl(dest_fd, SCOUTFS_IOC_MOVE_BLOCKS, &mb);
-149
View File
@@ -1,149 +0,0 @@
#
# Some basic tests of online resizing metadata and data devices.
#
statfs_total() {
local single="total_$1_blocks"
local mnt="$2"
scoutfs statfs -s $single -p "$mnt"
}
df_free() {
local md="$1"
local mnt="$2"
scoutfs df -p "$mnt" | awk '($1 == "'$md'") { print $5; exit }'
}
same_totals() {
cur_meta_tot=$(statfs_total meta "$SCR")
cur_data_tot=$(statfs_total data "$SCR")
test "$cur_meta_tot" == "$exp_meta_tot" || \
t_fail "cur total_meta_blocks $cur_meta_tot != expected $exp_meta_tot"
test "$cur_data_tot" == "$exp_data_tot" || \
t_fail "cur total_data_blocks $cur_data_tot != expected $exp_data_tot"
}
#
# make sure that the specified devices have grown by doubling. The
# total blocks can be tested exactly but the df reported total needs
# some slop to account for reserved blocks and concurrent allocation.
#
devices_grew() {
cur_meta_tot=$(statfs_total meta "$SCR")
cur_data_tot=$(statfs_total data "$SCR")
cur_meta_df=$(df_free MetaData "$SCR")
cur_data_df=$(df_free Data "$SCR")
local grow_meta_tot=$(echo "$exp_meta_tot * 2" | bc)
local grow_data_tot=$(echo "$exp_data_tot * 2" | bc)
local grow_meta_df=$(echo "($exp_meta_df * 1.95)/1" | bc)
local grow_data_df=$(echo "($exp_data_df * 1.95)/1" | bc)
if [ "$1" == "meta" ]; then
test "$cur_meta_tot" == "$grow_meta_tot" || \
t_fail "cur total_meta_blocks $cur_meta_tot != grown $grow_meta_tot"
test "$cur_meta_df" -lt "$grow_meta_df" && \
t_fail "cur meta df total $cur_meta_df < grown $grow_meta_df"
exp_meta_tot=$cur_meta_tot
exp_meta_df=$cur_meta_df
shift
fi
if [ "$1" == "data" ]; then
test "$cur_data_tot" == "$grow_data_tot" || \
t_fail "cur total_data_blocks $cur_data_tot != grown $grow_data_tot"
test "$cur_data_df" -lt "$grow_data_df" && \
t_fail "cur data df total $cur_data_df < grown $grow_data_df"
exp_data_tot=$cur_data_tot
exp_data_df=$cur_data_df
fi
}
# first calculate small mkfs based on device size
size_meta=$(blockdev --getsize64 "$T_EX_META_DEV")
size_data=$(blockdev --getsize64 "$T_EX_DATA_DEV")
quarter_meta=$(echo "$size_meta / 4" | bc)
quarter_data=$(echo "$size_data / 4" | bc)
# XXX this is all pretty manual, would be nice to have helpers
echo "== make initial small fs"
scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m $quarter_meta -d $quarter_data \
"$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
t_fail "mkfs failed"
SCR="/mnt/scoutfs.enospc"
mkdir -p "$SCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
# then calculate sizes based on blocks that mkfs used
quarter_meta=$(echo "$(statfs_total meta "$SCR") * 64 * 1024" | bc)
quarter_data=$(echo "$(statfs_total data "$SCR") * 4 * 1024" | bc)
whole_meta=$(echo "$quarter_meta * 4" | bc)
whole_data=$(echo "$quarter_data * 4" | bc)
outsize_meta=$(echo "$whole_meta * 2" | bc)
outsize_data=$(echo "$whole_data * 2" | bc)
half_meta=$(echo "$whole_meta / 2" | bc)
half_data=$(echo "$whole_data / 2" | bc)
shrink_meta=$(echo "$quarter_meta / 2" | bc)
shrink_data=$(echo "$quarter_data / 2" | bc)
# and save expected values for checks
exp_meta_tot=$(statfs_total meta "$SCR")
exp_meta_df=$(df_free MetaData "$SCR")
exp_data_tot=$(statfs_total data "$SCR")
exp_data_df=$(df_free Data "$SCR")
echo "== 0s do nothing"
scoutfs resize-devices -p "$SCR"
scoutfs resize-devices -p "$SCR" -m 0
scoutfs resize-devices -p "$SCR" -d 0
scoutfs resize-devices -p "$SCR" -m 0 -d 0
echo "== shrinking fails"
scoutfs resize-devices -p "$SCR" -m $shrink_meta
scoutfs resize-devices -p "$SCR" -d $shrink_data
scoutfs resize-devices -p "$SCR" -m $shrink_meta -d $shrink_data
same_totals
echo "== existing sizes do nothing"
scoutfs resize-devices -p "$SCR" -m $quarter_meta
scoutfs resize-devices -p "$SCR" -d $quarter_data
scoutfs resize-devices -p "$SCR" -m $quarter_meta -d $quarter_data
same_totals
echo "== growing outside device fails"
scoutfs resize-devices -p "$SCR" -m $outsize_meta
scoutfs resize-devices -p "$SCR" -d $outsize_data
scoutfs resize-devices -p "$SCR" -m $outsize_meta -d $outsize_data
same_totals
echo "== resizing meta works"
scoutfs resize-devices -p "$SCR" -m $half_meta
devices_grew meta
echo "== resizing data works"
scoutfs resize-devices -p "$SCR" -d $half_data
devices_grew data
echo "== shrinking back fails"
scoutfs resize-devices -p "$SCR" -m $quarter_meta
scoutfs resize-devices -p "$SCR" -m $quarter_data
same_totals
echo "== resizing again does nothing"
scoutfs resize-devices -p "$SCR" -m $half_meta
scoutfs resize-devices -p "$SCR" -m $half_data
same_totals
echo "== resizing to full works"
scoutfs resize-devices -p "$SCR" -m $whole_meta -d $whole_data
devices_grew meta data
echo "== cleanup extra fs"
umount "$SCR"
rmdir "$SCR"
t_pass
+2 -2
View File
@@ -7,7 +7,7 @@ message_output()
error_message()
{
message_output "$@" >&2
message_output "$@" >> /dev/stderr
}
error_exit()
@@ -18,7 +18,7 @@ error_exit()
log_message()
{
message_output "$@"
message_output "$@" >> /dev/stdout
}
# restart if we catch hup to re-read the config
-3
View File
@@ -1,3 +0,0 @@
SCOUTFS_FENCED_DELAY=1
SCOUTFS_FENCED_RUN=/usr/libexec/scoutfs-fenced/run/local-force-unmount
SCOUTFS_FENCED_RUN_ARGS=""
-11
View File
@@ -1,11 +0,0 @@
[Unit]
Description=ScoutFS fenced
[Service]
Restart=on-failure
RestartSec=5s
StartLimitBurst=5
ExecStart=/usr/libexec/scoutfs-fenced/scoutfs-fenced
[Install]
WantedBy=default.target
-57
View File
@@ -103,63 +103,6 @@ Ignore presence of existing data on the data and metadata devices.
.PD
.TP
.BI "resize-devices [-p|--path PATH] [-m|--meta-size SIZE] [-d|--data-size SIZE]"
.sp
Resize the metadata or data devices of a mounted ScoutFS filesystem.
.sp
ScoutFS metadata has free extent records and fields in the super block
that reflect the size of the devices in use. This command sends a
request to the server to change the size of the device that can be used
by updating free extents and setting the super block fields.
.sp
The specified sizes are in bytes and are translated into block counts.
If the specified sizes are not a multiple of the metadata or data block
sizes then a message is output and the resized size is truncated down to
the next whole block. Specifying either a size of 0 or the current
device size makes no change. The current size of the devices can be
seen, in units of their respective block sizes, in the total_meta_blocks
and total_data_blocks fields returned by the scoutfs statfs command (via
the statfs_more ioctl).
.sp
Shrinking is not supported. Specifying a smaller size for either device
will return an error and neither device will be resized.
.sp
Specifying a larger size will expand the initial size of the device that
will be used. Free space records are added for the expanded region and
can be used once the resizing transaction is complete.
.sp
The resizing action is performed in a transaction on the server. This
command will hang until a server is elected and running and can service
the reqeust. The server serializes any concurrent requests to resize.
.sp
The new sizes must fit within the current sizes of the mounted devices.
Presumably this command is being performed as part of a larger
coordinated resize of the underlying devices. The device must be
expanded before ScoutFS can use the larger device and ScoutFS must stop
using a region to shrink before it could be removed from the device
(which is not currently supported).
.sp
The resize will be committed by the server before the response is sent
to the client. The system can be using the new device size before the
result is communicated through the client and this command completes.
The client could crash and the server could still have performed the
resize.
.RS 1.0i
.PD 0
.TP
.sp
.B "-p, --path PATH"
A path in the mounted ScoutFS filesystem which will have its devices
resized.
.TP
.B "-m, --meta-size SIZE"
.B "-d, --data-size SIZE"
The new size of the metadata or data device to use, in bytes. Size is given as
an integer followed by a units digit: "K", "M", "G", "T", "P", to denote
kibibytes, mebibytes, etc.
.RE
.PD
.BI "stat FILE [-s|--single-field FIELD-NAME]"
.sp
Display ScoutFS-specific metadata fields for the given file.
-4
View File
@@ -56,14 +56,10 @@ install -m 644 -D src/ioctl.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/ioctl.h
install -m 644 -D src/format.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/format.h
install -m 755 -D fenced/scoutfs-fenced $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/scoutfs-fenced
install -m 755 -D fenced/local-force-unmount $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/run/local-force-unmount
install -m 644 -D fenced/scoutfs-fenced.service $RPM_BUILD_ROOT%{_unitdir}/scoutfs-fenced.service
install -m 644 -D fenced/scoutfs-fenced.conf.example $RPM_BUILD_ROOT%{_sysconfdir}/scoutfs/scoutfs-fenced.conf.example
%files
%defattr(644,root,root,755)
%{_mandir}/man*/scoutfs*.gz
%{_unitdir}/scoutfs-fenced.service
%{_sysconfdir}/scoutfs
%defattr(755,root,root,755)
%{_sbindir}/scoutfs
%{_libexecdir}/scoutfs-fenced
+6 -2
View File
@@ -241,7 +241,11 @@ static int do_mkfs(struct mkfs_args *args)
super->next_ino = cpu_to_le64(SCOUTFS_ROOT_INO + 1);
super->seq = cpu_to_le64(1);
super->total_meta_blocks = cpu_to_le64(last_meta + 1);
super->total_data_blocks = cpu_to_le64(last_data + 1);
super->first_meta_blkno = cpu_to_le64(next_meta);
super->last_meta_blkno = cpu_to_le64(last_meta);
super->total_data_blocks = cpu_to_le64(last_data - first_data + 1);
super->first_data_blkno = cpu_to_le64(first_data);
super->last_data_blkno = cpu_to_le64(last_data);
assert(sizeof(args->slots) ==
member_sizeof(struct scoutfs_super_block, qconf.slots));
@@ -316,7 +320,7 @@ static int do_mkfs(struct mkfs_args *args)
blkno = next_meta++;
ret = write_alloc_root(meta_fd, fsid, &super->data_alloc, bt,
1, blkno, first_data,
last_data - first_data + 1);
le64_to_cpu(super->total_data_blocks));
if (ret < 0)
goto out;
+6 -1
View File
@@ -951,7 +951,8 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
/* XXX these are all in a crazy order */
printf(" next_ino %llu seq %llu\n"
" total_meta_blocks %llu total_data_blocks %llu\n"
" total_meta_blocks %llu first_meta_blkno %llu last_meta_blkno %llu\n"
" total_data_blocks %llu first_data_blkno %llu last_data_blkno %llu\n"
" meta_alloc[0]: "ALCROOT_F"\n"
" meta_alloc[1]: "ALCROOT_F"\n"
" data_alloc: "ALCROOT_F"\n"
@@ -968,7 +969,11 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
le64_to_cpu(super->next_ino),
le64_to_cpu(super->seq),
le64_to_cpu(super->total_meta_blocks),
le64_to_cpu(super->first_meta_blkno),
le64_to_cpu(super->last_meta_blkno),
le64_to_cpu(super->total_data_blocks),
le64_to_cpu(super->first_data_blkno),
le64_to_cpu(super->last_data_blkno),
ALCROOT_A(&super->meta_alloc[0]),
ALCROOT_A(&super->meta_alloc[1]),
ALCROOT_A(&super->data_alloc),
-120
View File
@@ -1,120 +0,0 @@
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <argp.h>
#include "sparse.h"
#include "parse.h"
#include "util.h"
#include "format.h"
#include "ioctl.h"
#include "cmd.h"
struct resize_args {
char *path;
u64 meta_size;
u64 data_size;
};
static int do_resize_devices(struct resize_args *args)
{
struct scoutfs_ioctl_resize_devices rd;
int ret;
int fd;
if (args->meta_size & SCOUTFS_BLOCK_LG_MASK) {
printf("metadata device size %llu is not a multiple of %u metadata block size, truncating down to %llu byte size\n",
args->meta_size, SCOUTFS_BLOCK_LG_SIZE,
args->meta_size & ~(u64)SCOUTFS_BLOCK_LG_MASK);
}
if (args->data_size & SCOUTFS_BLOCK_SM_MASK) {
printf("data device size %llu is not a multiple of %u data block size, truncating down to %llu byte size\n",
args->data_size, SCOUTFS_BLOCK_SM_SIZE,
args->data_size & ~(u64)SCOUTFS_BLOCK_SM_MASK);
}
fd = get_path(args->path, O_RDONLY);
if (fd < 0)
return fd;
rd.new_total_meta_blocks = args->meta_size >> SCOUTFS_BLOCK_LG_SHIFT;
rd.new_total_data_blocks = args->data_size >> SCOUTFS_BLOCK_SM_SHIFT;
ret = ioctl(fd, SCOUTFS_IOC_RESIZE_DEVICES, &rd);
if (ret < 0) {
ret = -errno;
fprintf(stderr, "resize_devices ioctl failed: %s (%d)\n", strerror(errno), errno);
}
close(fd);
return ret;
};
static int parse_opt(int key, char *arg, struct argp_state *state)
{
struct resize_args *args = state->input;
int ret;
switch (key) {
case 'm': /* meta-size */
{
ret = parse_human(arg, &args->meta_size);
if (ret)
return ret;
break;
}
case 'd': /* data-size */
{
ret = parse_human(arg, &args->data_size);
if (ret)
return ret;
break;
}
case 'p':
args->path = strdup_or_error(state, arg);
break;
default:
break;
}
return 0;
}
static struct argp_option options[] = {
{ "path", 'p', "PATH", 0, "Path to ScoutFS filesystem"},
{ "meta-size", 'm', "SIZE", 0, "New metadata device size (bytes or KMGTP units)"},
{ "data-size", 'd', "SIZE", 0, "New data device size (bytes or KMGTP units)"},
{ NULL }
};
static struct argp argp = {
options,
parse_opt,
"",
"Online resize of metadata and/or data devices",
};
static int resize_devices_cmd(int argc, char **argv)
{
struct resize_args resize_args = {NULL,};
int ret;
ret = argp_parse(&argp, argc, argv, 0, NULL, &resize_args);
if (ret)
return ret;
return do_resize_devices(&resize_args);
}
static void __attribute__((constructor)) read_xattr_totals_ctor(void)
{
cmd_register_argp("resize-devices", &argp, GROUP_CORE, resize_devices_cmd);
}
+4
View File
@@ -37,6 +37,7 @@ static struct stat_more_field inode_fields[] = {
INODE_FIELD(data_version),
INODE_FIELD(online_blocks),
INODE_FIELD(offline_blocks),
{ .name = "crtime", .offset = INODE_FIELD_OFF(crtime_sec) },
{ NULL, }
};
@@ -60,6 +61,9 @@ static void print_inode_field(void *st, size_t off)
case INODE_FIELD_OFF(offline_blocks):
printf("%llu", stm->offline_blocks);
break;
case INODE_FIELD_OFF(crtime_sec):
printf("%llu.%09u", stm->crtime_sec, stm->crtime_nsec);
break;
};
}