mirror of
https://github.com/versity/scoutfs.git
synced 2026-06-09 21:22:36 +00:00
Compare commits
23 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| cb1726681c | |||
| cdff272163 | |||
| 7e935898ab | |||
| 6d0694f1b0 | |||
| fd686cab86 | |||
| 4c1181c055 | |||
| d6bed7181f | |||
| 4893a6f915 | |||
| 384590f016 | |||
| 192f077c16 | |||
| a9baeab22e | |||
| b7ab26539a | |||
| c51f0c37da | |||
| 52107424dd | |||
| 099a65ab07 | |||
| 21c5724dd5 | |||
| 3974d98f6b | |||
| 2901b43906 | |||
| 03d7a4e7fe | |||
| d5d3b12986 | |||
| e4dca8ddcc | |||
| 011b7d52e5 | |||
| 3a9db45194 |
+62
-20
@@ -261,20 +261,17 @@ static bool invalid_extent(u64 start, u64 end, u64 first, u64 last)
|
||||
|
||||
static bool invalid_meta_blkno(struct super_block *sb, u64 blkno)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
u64 last_meta = (i_size_read(sbi->meta_bdev->bd_inode) >> SCOUTFS_BLOCK_LG_SHIFT) - 1;
|
||||
|
||||
return invalid_extent(blkno, blkno,
|
||||
le64_to_cpu(super->first_meta_blkno),
|
||||
le64_to_cpu(super->last_meta_blkno));
|
||||
return invalid_extent(blkno, blkno, SCOUTFS_META_DEV_START_BLKNO, last_meta);
|
||||
}
|
||||
|
||||
static bool invalid_data_extent(struct super_block *sb, u64 start, u64 len)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
u64 last_data = (i_size_read(sb->s_bdev->bd_inode) >> SCOUTFS_BLOCK_SM_SHIFT) - 1;
|
||||
|
||||
return invalid_extent(start, start + len - 1,
|
||||
le64_to_cpu(super->first_data_blkno),
|
||||
le64_to_cpu(super->last_data_blkno));
|
||||
return invalid_extent(start, start + len - 1, SCOUTFS_DATA_DEV_START_BLKNO, last_data);
|
||||
}
|
||||
|
||||
void scoutfs_alloc_init(struct scoutfs_alloc *alloc,
|
||||
@@ -980,6 +977,39 @@ int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add new free space to an allocator. _ext_insert will make sure that it doesn't
|
||||
* overlap with any existing extents. This is done by the server in a transaction that
|
||||
* also updates total_*_blocks in the super so we don't verify.
|
||||
*/
|
||||
int scoutfs_alloc_insert(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, struct scoutfs_alloc_root *root,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
struct alloc_ext_args args = {
|
||||
.alloc = alloc,
|
||||
.wri = wri,
|
||||
.root = root,
|
||||
.zone = SCOUTFS_FREE_EXTENT_BLKNO_ZONE,
|
||||
};
|
||||
|
||||
return scoutfs_ext_insert(sb, &alloc_ext_ops, &args, start, len, 0, 0);
|
||||
}
|
||||
|
||||
int scoutfs_alloc_remove(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, struct scoutfs_alloc_root *root,
|
||||
u64 start, u64 len)
|
||||
{
|
||||
struct alloc_ext_args args = {
|
||||
.alloc = alloc,
|
||||
.wri = wri,
|
||||
.root = root,
|
||||
.zone = SCOUTFS_FREE_EXTENT_BLKNO_ZONE,
|
||||
};
|
||||
|
||||
return scoutfs_ext_remove(sb, &alloc_ext_ops, &args, start, len);
|
||||
}
|
||||
|
||||
/*
|
||||
* We only trim one block, instead of looping trimming all, because the
|
||||
* caller is assuming that we do a fixed amount of work when they check
|
||||
@@ -1026,18 +1056,31 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
* True if the allocator has enough free blocks to cow (alloc and free)
|
||||
* a list block and all the btree blocks that store extent items.
|
||||
* True if the allocator has enough blocks in the avail list and space
|
||||
* in the freed list to be able to perform the callers operations. If
|
||||
* false the caller should back off and return partial progress rather
|
||||
* than completely exhausting the avail list or overflowing the freed
|
||||
* list.
|
||||
*
|
||||
* At most, an extent operation can dirty down three paths of the tree
|
||||
* to modify a blkno item and two distant order items. We can grow and
|
||||
* split the root, and then those three paths could share blocks but each
|
||||
* modify two leaf blocks.
|
||||
* An extent modification dirties three distinct leaves of an allocator
|
||||
* btree as it adds and removes the blkno and size sorted items for the
|
||||
* old and new lengths of the extent. Dirtying the paths to these
|
||||
* leaves can grow the tree and grow/shrink neighbours at each level.
|
||||
* We over-estimate the number of blocks allocated and freed (the paths
|
||||
* share a root, growth doesn't free) to err on the simpler and safer
|
||||
* side. The overhead is minimal given the relatively large list blocks
|
||||
* and relatively short allocator trees.
|
||||
*
|
||||
* The caller tells us how many extents they're about to modify and how
|
||||
* many other additional blocks they may cow manually. And finally, the
|
||||
* caller could be the first to dirty the avail and freed blocks in the
|
||||
* allocator,
|
||||
*/
|
||||
static bool list_can_cow(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_alloc_root *root)
|
||||
static bool list_has_blocks(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_alloc_root *root, u32 extents, u32 addl_blocks)
|
||||
{
|
||||
u32 most = 1 + (1 + 1 + (3 * (1 - root->root.height + 1)));
|
||||
u32 tree_blocks = (((1 + root->root.height) * 2) * 3) * extents;
|
||||
u32 most = 1 + tree_blocks + addl_blocks;
|
||||
|
||||
if (le32_to_cpu(alloc->avail.first_nr) < most) {
|
||||
scoutfs_inc_counter(sb, alloc_list_avail_lo);
|
||||
@@ -1101,8 +1144,7 @@ int scoutfs_alloc_fill_list(struct super_block *sb,
|
||||
goto out;
|
||||
lblk = bl->data;
|
||||
|
||||
while (le32_to_cpu(lblk->nr) < target &&
|
||||
list_can_cow(sb, alloc, root)) {
|
||||
while (le32_to_cpu(lblk->nr) < target && list_has_blocks(sb, alloc, root, 1, 0)) {
|
||||
|
||||
ret = scoutfs_ext_alloc(sb, &alloc_ext_ops, &args, 0, 0,
|
||||
target - le32_to_cpu(lblk->nr), &ext);
|
||||
@@ -1146,7 +1188,7 @@ int scoutfs_alloc_empty_list(struct super_block *sb,
|
||||
if (WARN_ON_ONCE(lhead_in_alloc(alloc, lhead)))
|
||||
return -EINVAL;
|
||||
|
||||
while (lhead->ref.blkno && list_can_cow(sb, alloc, args.root)) {
|
||||
while (lhead->ref.blkno && list_has_blocks(sb, alloc, args.root, 1, 1)) {
|
||||
|
||||
if (lhead->first_nr == 0) {
|
||||
ret = trim_empty_first_block(sb, alloc, wri, lhead);
|
||||
|
||||
@@ -132,6 +132,12 @@ int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_alloc_root *dst,
|
||||
struct scoutfs_alloc_root *src, u64 total,
|
||||
__le64 *exclusive, __le64 *vacant, u64 zone_blocks);
|
||||
int scoutfs_alloc_insert(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, struct scoutfs_alloc_root *root,
|
||||
u64 start, u64 len);
|
||||
int scoutfs_alloc_remove(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, struct scoutfs_alloc_root *root,
|
||||
u64 start, u64 len);
|
||||
|
||||
int scoutfs_alloc_fill_list(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
|
||||
+4
-2
@@ -645,9 +645,11 @@ static struct block_private *block_read(struct super_block *sb, u64 blkno)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = wait_event_interruptible(binf->waitq, uptodate_or_error(bp));
|
||||
if (ret == 0 && test_bit(BLOCK_BIT_ERROR, &bp->bits))
|
||||
wait_event(binf->waitq, uptodate_or_error(bp));
|
||||
if (test_bit(BLOCK_BIT_ERROR, &bp->bits))
|
||||
ret = -EIO;
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
if (ret < 0) {
|
||||
|
||||
+10
-4
@@ -297,6 +297,14 @@ int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_op
|
||||
volopt, sizeof(*volopt), NULL, 0);
|
||||
}
|
||||
|
||||
int scoutfs_client_resize_devices(struct super_block *sb, struct scoutfs_net_resize_devices *nrd)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES,
|
||||
nrd, sizeof(*nrd), NULL, 0);
|
||||
}
|
||||
|
||||
/* The client is receiving a invalidation request from the server */
|
||||
static int client_lock(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn, u8 cmd, u64 id,
|
||||
@@ -623,10 +631,8 @@ void scoutfs_client_destroy(struct super_block *sb)
|
||||
client_farewell_response,
|
||||
NULL, NULL);
|
||||
if (ret == 0) {
|
||||
ret = wait_for_completion_interruptible(
|
||||
&client->farewell_comp);
|
||||
if (ret == 0)
|
||||
ret = client->farewell_error;
|
||||
wait_for_completion(&client->farewell_comp);
|
||||
ret = client->farewell_error;
|
||||
}
|
||||
if (ret) {
|
||||
scoutfs_inc_counter(sb, client_farewell_error);
|
||||
|
||||
@@ -33,6 +33,7 @@ int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
|
||||
int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
|
||||
int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
|
||||
int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
|
||||
int scoutfs_client_resize_devices(struct super_block *sb, struct scoutfs_net_resize_devices *nrd);
|
||||
|
||||
int scoutfs_client_setup(struct super_block *sb);
|
||||
void scoutfs_client_destroy(struct super_block *sb);
|
||||
|
||||
@@ -88,7 +88,6 @@
|
||||
EXPAND_COUNTER(forest_read_items) \
|
||||
EXPAND_COUNTER(forest_roots_next_hint) \
|
||||
EXPAND_COUNTER(forest_set_bloom_bits) \
|
||||
EXPAND_COUNTER(inode_evict_intr) \
|
||||
EXPAND_COUNTER(item_clear_dirty) \
|
||||
EXPAND_COUNTER(item_create) \
|
||||
EXPAND_COUNTER(item_delete) \
|
||||
|
||||
+23
-8
@@ -207,6 +207,7 @@ static s64 truncate_extents(struct super_block *sb, struct inode *inode,
|
||||
u64 offset;
|
||||
s64 ret;
|
||||
u8 flags;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
flags = offline ? SEF_OFFLINE : 0;
|
||||
@@ -246,6 +247,18 @@ static s64 truncate_extents(struct super_block *sb, struct inode *inode,
|
||||
tr.len = min(ext.len - offset, last - iblock + 1);
|
||||
tr.flags = ext.flags;
|
||||
|
||||
trace_scoutfs_data_extent_truncated(sb, ino, &tr);
|
||||
|
||||
ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
|
||||
tr.start, tr.len, 0, flags);
|
||||
if (ret < 0) {
|
||||
if (WARN_ON_ONCE(ret == -EINVAL)) {
|
||||
scoutfs_err(sb, "unexpected truncate inconsistency: ino %llu iblock %llu last %llu, start %llu len %llu",
|
||||
ino, iblock, last, tr.start, tr.len);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (tr.map) {
|
||||
mutex_lock(&datinf->mutex);
|
||||
ret = scoutfs_free_data(sb, datinf->alloc,
|
||||
@@ -253,16 +266,16 @@ static s64 truncate_extents(struct super_block *sb, struct inode *inode,
|
||||
&datinf->data_freed,
|
||||
tr.map, tr.len);
|
||||
mutex_unlock(&datinf->mutex);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
err = scoutfs_ext_set(sb, &data_ext_ops, &args,
|
||||
tr.start, tr.len, tr.map, tr.flags);
|
||||
if (err < 0)
|
||||
scoutfs_err(sb, "truncate err %d restoring extent after error %lld: ino %llu start %llu len %llu",
|
||||
err, ret, ino, tr.start, tr.len);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
trace_scoutfs_data_extent_truncated(sb, ino, &tr);
|
||||
|
||||
ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
|
||||
tr.start, tr.len, 0, flags);
|
||||
BUG_ON(ret); /* inconsistent, could prealloc items */
|
||||
|
||||
iblock += tr.len;
|
||||
}
|
||||
|
||||
@@ -1018,8 +1031,10 @@ long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
||||
end = (iblock + ret) << SCOUTFS_BLOCK_SM_SHIFT;
|
||||
if (end > offset + len)
|
||||
end = offset + len;
|
||||
if (end > i_size_read(inode))
|
||||
if (end > i_size_read(inode)) {
|
||||
i_size_write(inode, end);
|
||||
scoutfs_inode_inc_data_version(inode);
|
||||
}
|
||||
}
|
||||
if (ret >= 0)
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
|
||||
+2
-11
@@ -253,7 +253,7 @@ static u64 dirent_name_hash(const char *name, unsigned int name_len)
|
||||
((u64)dirent_name_fingerprint(name, name_len) << 32);
|
||||
}
|
||||
|
||||
static u64 dirent_names_equal(const char *a_name, unsigned int a_len,
|
||||
static bool dirent_names_equal(const char *a_name, unsigned int a_len,
|
||||
const char *b_name, unsigned int b_len)
|
||||
{
|
||||
return a_len == b_len && memcmp(a_name, b_name, a_len) == 0;
|
||||
@@ -462,7 +462,7 @@ out:
|
||||
else if (ino == 0)
|
||||
inode = NULL;
|
||||
else
|
||||
inode = scoutfs_iget(sb, ino);
|
||||
inode = scoutfs_iget(sb, ino, 0);
|
||||
|
||||
/*
|
||||
* We can't splice dir aliases into the dcache. dir entries
|
||||
@@ -753,7 +753,6 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
struct inode *inode = NULL;
|
||||
struct scoutfs_lock *dir_lock = NULL;
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
struct scoutfs_inode_info *si;
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 hash;
|
||||
u64 pos;
|
||||
@@ -767,7 +766,6 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
&dir_lock, &inode_lock, NULL, &ind_locks);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
si = SCOUTFS_I(inode);
|
||||
|
||||
pos = SCOUTFS_I(dir)->next_readdir_pos++;
|
||||
|
||||
@@ -783,7 +781,6 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
inode->i_mtime = inode->i_atime = inode->i_ctime = dir->i_mtime;
|
||||
si->crtime = inode->i_mtime;
|
||||
|
||||
if (S_ISDIR(mode)) {
|
||||
inc_nlink(inode);
|
||||
@@ -1188,7 +1185,6 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
struct inode *inode = NULL;
|
||||
struct scoutfs_lock *dir_lock = NULL;
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
struct scoutfs_inode_info *si;
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 hash;
|
||||
u64 pos;
|
||||
@@ -1209,7 +1205,6 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
&dir_lock, &inode_lock, NULL, &ind_locks);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
si = SCOUTFS_I(inode);
|
||||
|
||||
ret = symlink_item_ops(sb, SYM_CREATE, scoutfs_ino(inode), inode_lock,
|
||||
symname, name_len);
|
||||
@@ -1231,7 +1226,6 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
|
||||
inode->i_ctime = dir->i_mtime;
|
||||
si->crtime = inode->i_ctime;
|
||||
i_size_write(inode, name_len);
|
||||
|
||||
scoutfs_update_inode_item(inode, inode_lock, &ind_locks);
|
||||
@@ -1823,7 +1817,6 @@ static int scoutfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mod
|
||||
struct scoutfs_lock *dir_lock = NULL;
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
struct scoutfs_lock *orph_lock = NULL;
|
||||
struct scoutfs_inode_info *si;
|
||||
LIST_HEAD(ind_locks);
|
||||
int ret;
|
||||
|
||||
@@ -1834,7 +1827,6 @@ static int scoutfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mod
|
||||
&dir_lock, &inode_lock, &orph_lock, &ind_locks);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
si = SCOUTFS_I(inode);
|
||||
|
||||
ret = scoutfs_inode_orphan_create(sb, scoutfs_ino(inode), orph_lock);
|
||||
if (ret < 0) {
|
||||
@@ -1843,7 +1835,6 @@ static int scoutfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mod
|
||||
}
|
||||
|
||||
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
|
||||
si->crtime = inode->i_mtime;
|
||||
insert_inode_hash(inode);
|
||||
ihold(inode); /* need to update inode modifications in d_tmpfile */
|
||||
d_tmpfile(dentry, inode);
|
||||
|
||||
+3
-3
@@ -81,7 +81,7 @@ static struct dentry *scoutfs_fh_to_dentry(struct super_block *sb,
|
||||
trace_scoutfs_fh_to_dentry(sb, fh_type, sfid);
|
||||
|
||||
if (scoutfs_valid_fileid(fh_type))
|
||||
inode = scoutfs_iget(sb, le64_to_cpu(sfid->ino));
|
||||
inode = scoutfs_iget(sb, le64_to_cpu(sfid->ino), 0);
|
||||
|
||||
return d_obtain_alias(inode);
|
||||
}
|
||||
@@ -100,7 +100,7 @@ static struct dentry *scoutfs_fh_to_parent(struct super_block *sb,
|
||||
|
||||
if (scoutfs_valid_fileid(fh_type) &&
|
||||
fh_type == FILEID_SCOUTFS_WITH_PARENT)
|
||||
inode = scoutfs_iget(sb, le64_to_cpu(sfid->parent_ino));
|
||||
inode = scoutfs_iget(sb, le64_to_cpu(sfid->parent_ino), 0);
|
||||
|
||||
return d_obtain_alias(inode);
|
||||
}
|
||||
@@ -123,7 +123,7 @@ static struct dentry *scoutfs_get_parent(struct dentry *child)
|
||||
scoutfs_dir_free_backref_path(sb, &list);
|
||||
trace_scoutfs_get_parent(sb, inode, ino);
|
||||
|
||||
inode = scoutfs_iget(sb, ino);
|
||||
inode = scoutfs_iget(sb, ino, 0);
|
||||
|
||||
return d_obtain_alias(inode);
|
||||
}
|
||||
|
||||
+1
-1
@@ -376,7 +376,7 @@ int scoutfs_fence_wait_fenced(struct super_block *sb, long timeout_jiffies)
|
||||
bool error;
|
||||
long ret;
|
||||
|
||||
ret = wait_event_interruptible_timeout(fi->waitq, all_fenced(fi, &error), timeout_jiffies);
|
||||
ret = wait_event_timeout(fi->waitq, all_fenced(fi, &error), timeout_jiffies);
|
||||
if (ret == 0)
|
||||
ret = -ETIMEDOUT;
|
||||
else if (ret > 0)
|
||||
|
||||
+8
-3
@@ -747,9 +747,6 @@ int scoutfs_forest_setup(struct super_block *sb)
|
||||
goto out;
|
||||
}
|
||||
|
||||
queue_delayed_work(finf->workq, &finf->log_merge_dwork,
|
||||
msecs_to_jiffies(LOG_MERGE_DELAY_MS));
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret)
|
||||
@@ -758,6 +755,14 @@ out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
void scoutfs_forest_start(struct super_block *sb)
|
||||
{
|
||||
DECLARE_FOREST_INFO(sb, finf);
|
||||
|
||||
queue_delayed_work(finf->workq, &finf->log_merge_dwork,
|
||||
msecs_to_jiffies(LOG_MERGE_DELAY_MS));
|
||||
}
|
||||
|
||||
void scoutfs_forest_stop(struct super_block *sb)
|
||||
{
|
||||
DECLARE_FOREST_INFO(sb, finf);
|
||||
|
||||
@@ -39,6 +39,7 @@ void scoutfs_forest_get_btrees(struct super_block *sb,
|
||||
struct scoutfs_log_trees *lt);
|
||||
|
||||
int scoutfs_forest_setup(struct super_block *sb);
|
||||
void scoutfs_forest_start(struct super_block *sb);
|
||||
void scoutfs_forest_stop(struct super_block *sb);
|
||||
void scoutfs_forest_destroy(struct super_block *sb);
|
||||
|
||||
|
||||
+7
-5
@@ -779,11 +779,7 @@ struct scoutfs_super_block {
|
||||
__le64 seq;
|
||||
__le64 next_ino;
|
||||
__le64 total_meta_blocks; /* both static and dynamic */
|
||||
__le64 first_meta_blkno; /* first dynamically allocated */
|
||||
__le64 last_meta_blkno;
|
||||
__le64 total_data_blocks;
|
||||
__le64 first_data_blkno;
|
||||
__le64 last_data_blkno;
|
||||
struct scoutfs_quorum_config qconf;
|
||||
struct scoutfs_alloc_root meta_alloc[2];
|
||||
struct scoutfs_alloc_root data_alloc;
|
||||
@@ -821,6 +817,7 @@ struct scoutfs_super_block {
|
||||
* online by staging.
|
||||
*
|
||||
* XXX
|
||||
* - otime?
|
||||
* - compat flags?
|
||||
* - version?
|
||||
* - generation?
|
||||
@@ -844,7 +841,6 @@ struct scoutfs_inode {
|
||||
struct scoutfs_timespec atime;
|
||||
struct scoutfs_timespec ctime;
|
||||
struct scoutfs_timespec mtime;
|
||||
struct scoutfs_timespec crtime;
|
||||
};
|
||||
|
||||
#define SCOUTFS_INO_FLAG_TRUNCATE 0x1
|
||||
@@ -990,6 +986,7 @@ enum scoutfs_net_cmd {
|
||||
SCOUTFS_NET_CMD_GET_VOLOPT,
|
||||
SCOUTFS_NET_CMD_SET_VOLOPT,
|
||||
SCOUTFS_NET_CMD_CLEAR_VOLOPT,
|
||||
SCOUTFS_NET_CMD_RESIZE_DEVICES,
|
||||
SCOUTFS_NET_CMD_FAREWELL,
|
||||
SCOUTFS_NET_CMD_UNKNOWN,
|
||||
};
|
||||
@@ -1032,6 +1029,11 @@ struct scoutfs_net_roots {
|
||||
struct scoutfs_btree_root srch_root;
|
||||
};
|
||||
|
||||
struct scoutfs_net_resize_devices {
|
||||
__le64 new_total_meta_blocks;
|
||||
__le64 new_total_data_blocks;
|
||||
};
|
||||
|
||||
struct scoutfs_net_lock {
|
||||
struct scoutfs_key key;
|
||||
__le64 write_seq;
|
||||
|
||||
+82
-85
@@ -59,7 +59,7 @@ struct inode_sb_info {
|
||||
bool stopped;
|
||||
|
||||
spinlock_t writeback_lock;
|
||||
struct rb_root writeback_inodes;
|
||||
struct list_head writeback_list;
|
||||
struct inode_allocator dir_ino_alloc;
|
||||
struct inode_allocator ino_alloc;
|
||||
|
||||
@@ -68,6 +68,9 @@ struct inode_sb_info {
|
||||
/* serialize multiple inode ->evict trying to delete same ino's items */
|
||||
spinlock_t deleting_items_lock;
|
||||
struct list_head deleting_items_list;
|
||||
|
||||
struct work_struct iput_work;
|
||||
struct llist_head iput_llist;
|
||||
};
|
||||
|
||||
#define DECLARE_INODE_SB_INFO(sb, name) \
|
||||
@@ -92,9 +95,9 @@ static void scoutfs_inode_ctor(void *obj)
|
||||
atomic64_set(&si->data_waitq.changed, 0);
|
||||
init_waitqueue_head(&si->data_waitq.waitq);
|
||||
init_rwsem(&si->xattr_rwsem);
|
||||
RB_CLEAR_NODE(&si->writeback_node);
|
||||
INIT_LIST_HEAD(&si->writeback_entry);
|
||||
scoutfs_lock_init_coverage(&si->ino_lock_cov);
|
||||
atomic_set(&si->inv_iput_count, 0);
|
||||
atomic_set(&si->iput_count, 0);
|
||||
|
||||
inode_init_once(&si->inode);
|
||||
}
|
||||
@@ -118,47 +121,14 @@ static void scoutfs_i_callback(struct rcu_head *head)
|
||||
kmem_cache_free(scoutfs_inode_cachep, SCOUTFS_I(inode));
|
||||
}
|
||||
|
||||
static void insert_writeback_inode(struct inode_sb_info *inf,
|
||||
struct scoutfs_inode_info *ins)
|
||||
{
|
||||
struct rb_root *root = &inf->writeback_inodes;
|
||||
struct rb_node **node = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct scoutfs_inode_info *si;
|
||||
|
||||
while (*node) {
|
||||
parent = *node;
|
||||
si = container_of(*node, struct scoutfs_inode_info,
|
||||
writeback_node);
|
||||
|
||||
if (ins->ino < si->ino)
|
||||
node = &(*node)->rb_left;
|
||||
else if (ins->ino > si->ino)
|
||||
node = &(*node)->rb_right;
|
||||
else
|
||||
BUG();
|
||||
}
|
||||
|
||||
rb_link_node(&ins->writeback_node, parent, node);
|
||||
rb_insert_color(&ins->writeback_node, root);
|
||||
}
|
||||
|
||||
static void remove_writeback_inode(struct inode_sb_info *inf,
|
||||
struct scoutfs_inode_info *si)
|
||||
{
|
||||
if (!RB_EMPTY_NODE(&si->writeback_node)) {
|
||||
rb_erase(&si->writeback_node, &inf->writeback_inodes);
|
||||
RB_CLEAR_NODE(&si->writeback_node);
|
||||
}
|
||||
}
|
||||
|
||||
void scoutfs_destroy_inode(struct inode *inode)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
DECLARE_INODE_SB_INFO(inode->i_sb, inf);
|
||||
|
||||
spin_lock(&inf->writeback_lock);
|
||||
remove_writeback_inode(inf, SCOUTFS_I(inode));
|
||||
if (!list_empty(&si->writeback_entry))
|
||||
list_del_init(&si->writeback_entry);
|
||||
spin_unlock(&inf->writeback_lock);
|
||||
|
||||
scoutfs_lock_del_coverage(inode->i_sb, &si->ino_lock_cov);
|
||||
@@ -262,8 +232,6 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
si->next_readdir_pos = le64_to_cpu(cinode->next_readdir_pos);
|
||||
si->next_xattr_id = le64_to_cpu(cinode->next_xattr_id);
|
||||
si->flags = le32_to_cpu(cinode->flags);
|
||||
si->crtime.tv_sec = le64_to_cpu(cinode->crtime.sec);
|
||||
si->crtime.tv_nsec = le32_to_cpu(cinode->crtime.nsec);
|
||||
|
||||
/*
|
||||
* i_blocks is initialized from online and offline and is then
|
||||
@@ -694,14 +662,14 @@ struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino)
|
||||
return ilookup5(sb, ino, scoutfs_iget_test, &ino);
|
||||
}
|
||||
|
||||
struct inode *scoutfs_iget(struct super_block *sb, u64 ino)
|
||||
struct inode *scoutfs_iget(struct super_block *sb, u64 ino, int lkf)
|
||||
{
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct scoutfs_inode_info *si;
|
||||
struct inode *inode;
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_READ, 0, ino, &lock);
|
||||
ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_READ, lkf, ino, &lock);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
@@ -766,9 +734,6 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
cinode->next_readdir_pos = cpu_to_le64(si->next_readdir_pos);
|
||||
cinode->next_xattr_id = cpu_to_le64(si->next_xattr_id);
|
||||
cinode->flags = cpu_to_le32(si->flags);
|
||||
cinode->crtime.sec = cpu_to_le64(si->crtime.tv_sec);
|
||||
cinode->crtime.nsec = cpu_to_le32(si->crtime.tv_nsec);
|
||||
memset(cinode->crtime.__pad, 0, sizeof(cinode->crtime.__pad));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1662,11 +1627,6 @@ void scoutfs_evict_inode(struct inode *inode)
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
scoutfs_unlock(sb, orph_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
}
|
||||
if (ret == -ERESTARTSYS) {
|
||||
/* can be in task with pending, could be found as orphan */
|
||||
scoutfs_inc_counter(sb, inode_evict_intr);
|
||||
ret = 0;
|
||||
}
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "error %d while checking to delete inode nr %llu, it might linger.",
|
||||
ret, ino);
|
||||
@@ -1704,6 +1664,49 @@ int scoutfs_drop_inode(struct inode *inode)
|
||||
generic_drop_inode(inode);
|
||||
}
|
||||
|
||||
static void iput_worker(struct work_struct *work)
|
||||
{
|
||||
struct inode_sb_info *inf = container_of(work, struct inode_sb_info, iput_work);
|
||||
struct scoutfs_inode_info *si;
|
||||
struct scoutfs_inode_info *tmp;
|
||||
struct llist_node *inodes;
|
||||
bool more;
|
||||
|
||||
inodes = llist_del_all(&inf->iput_llist);
|
||||
|
||||
llist_for_each_entry_safe(si, tmp, inodes, iput_llnode) {
|
||||
do {
|
||||
more = atomic_dec_return(&si->iput_count) > 0;
|
||||
iput(&si->inode);
|
||||
} while (more);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Final iput can get into evict and perform final inode deletion which
|
||||
* can delete a lot of items spanning multiple cluster locks and
|
||||
* transactions. It should be understood as a heavy high level
|
||||
* operation, more like file writing and less like dropping a refcount.
|
||||
*
|
||||
* Unfortunately we also have incentives to use igrab/iput from internal
|
||||
* contexts that have no business doing that work, like lock
|
||||
* invalidation or dirty inode writeback during transaction commit.
|
||||
*
|
||||
* In those cases we can kick iput off to background work context.
|
||||
* Nothing stops multiple puts of an inode before the work runs so we
|
||||
* can track multiple puts in flight.
|
||||
*/
|
||||
void scoutfs_inode_queue_iput(struct inode *inode)
|
||||
{
|
||||
DECLARE_INODE_SB_INFO(inode->i_sb, inf);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
if (atomic_inc_return(&si->iput_count) == 1)
|
||||
llist_add(&si->iput_llnode, &inf->iput_llist);
|
||||
smp_wmb(); /* count and list visible before work executes */
|
||||
schedule_work(&inf->iput_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* All mounts are performing this work concurrently. We introduce
|
||||
* significant jitter between them to try and keep them from all
|
||||
@@ -1819,7 +1822,7 @@ static void inode_orphan_scan_worker(struct work_struct *work)
|
||||
}
|
||||
|
||||
/* try to cached and evict unused inode to delete, can be racing */
|
||||
inode = scoutfs_iget(sb, ino);
|
||||
inode = scoutfs_iget(sb, ino, 0);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
if (ret == -ENOENT)
|
||||
@@ -1848,30 +1851,33 @@ out:
|
||||
* ourselves in knots trying to call through the high level vfs sync
|
||||
* methods.
|
||||
*
|
||||
* File data block allocations tend to advance through free space so we
|
||||
* add the inode to the end of the list to roughly encourage sequential
|
||||
* IO.
|
||||
*
|
||||
* This is called by writers who hold the inode and transaction. The
|
||||
* inode's presence in the rbtree is removed by destroy_inode, prevented
|
||||
* by the inode hold, and by committing the transaction, which is
|
||||
* prevented by holding the transaction. The inode can only go from
|
||||
* empty to on the rbtree while we're here.
|
||||
* inode is removed from the list by evict->destroy if it's unlinked
|
||||
* during the transaction or by committing the transaction. Pruning the
|
||||
* icache won't try to evict the inode as long as it has dirty buffers.
|
||||
*/
|
||||
void scoutfs_inode_queue_writeback(struct inode *inode)
|
||||
{
|
||||
DECLARE_INODE_SB_INFO(inode->i_sb, inf);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
if (RB_EMPTY_NODE(&si->writeback_node)) {
|
||||
if (list_empty(&si->writeback_entry)) {
|
||||
spin_lock(&inf->writeback_lock);
|
||||
if (RB_EMPTY_NODE(&si->writeback_node))
|
||||
insert_writeback_inode(inf, si);
|
||||
if (list_empty(&si->writeback_entry))
|
||||
list_add_tail(&si->writeback_entry, &inf->writeback_list);
|
||||
spin_unlock(&inf->writeback_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk our dirty inodes in ino order and either start dirty page
|
||||
* writeback or wait for writeback to complete.
|
||||
* Walk our dirty inodes and either start dirty page writeback or wait
|
||||
* for writeback to complete.
|
||||
*
|
||||
* This is called by transaction commiting so other writers are
|
||||
* This is called by transaction committing so other writers are
|
||||
* excluded. We're still very careful to iterate over the tree while it
|
||||
* and the inodes could be changing.
|
||||
*
|
||||
@@ -1884,29 +1890,19 @@ int scoutfs_inode_walk_writeback(struct super_block *sb, bool write)
|
||||
{
|
||||
DECLARE_INODE_SB_INFO(sb, inf);
|
||||
struct scoutfs_inode_info *si;
|
||||
struct rb_node *node;
|
||||
struct scoutfs_inode_info *tmp;
|
||||
struct inode *inode;
|
||||
struct inode *defer_iput = NULL;
|
||||
int ret;
|
||||
|
||||
spin_lock(&inf->writeback_lock);
|
||||
|
||||
node = rb_first(&inf->writeback_inodes);
|
||||
while (node) {
|
||||
si = container_of(node, struct scoutfs_inode_info,
|
||||
writeback_node);
|
||||
node = rb_next(node);
|
||||
list_for_each_entry_safe(si, tmp, &inf->writeback_list, writeback_entry) {
|
||||
inode = igrab(&si->inode);
|
||||
if (!inode)
|
||||
continue;
|
||||
|
||||
spin_unlock(&inf->writeback_lock);
|
||||
|
||||
if (defer_iput) {
|
||||
iput(defer_iput);
|
||||
defer_iput = NULL;
|
||||
}
|
||||
|
||||
if (write)
|
||||
ret = filemap_fdatawrite(inode->i_mapping);
|
||||
else
|
||||
@@ -1914,28 +1910,28 @@ int scoutfs_inode_walk_writeback(struct super_block *sb, bool write)
|
||||
trace_scoutfs_inode_walk_writeback(sb, scoutfs_ino(inode),
|
||||
write, ret);
|
||||
if (ret) {
|
||||
iput(inode);
|
||||
scoutfs_inode_queue_iput(inode);
|
||||
goto out;
|
||||
}
|
||||
|
||||
spin_lock(&inf->writeback_lock);
|
||||
|
||||
if (WARN_ON_ONCE(RB_EMPTY_NODE(&si->writeback_node)))
|
||||
node = rb_first(&inf->writeback_inodes);
|
||||
/* restore tmp after reacquiring lock */
|
||||
if (WARN_ON_ONCE(list_empty(&si->writeback_entry)))
|
||||
tmp = list_first_entry(&inf->writeback_list, struct scoutfs_inode_info,
|
||||
writeback_entry);
|
||||
else
|
||||
node = rb_next(&si->writeback_node);
|
||||
tmp = list_next_entry(si, writeback_entry);
|
||||
|
||||
if (!write)
|
||||
remove_writeback_inode(inf, si);
|
||||
list_del_init(&si->writeback_entry);
|
||||
|
||||
/* avoid iput->destroy lock deadlock */
|
||||
defer_iput = inode;
|
||||
scoutfs_inode_queue_iput(inode);
|
||||
}
|
||||
|
||||
spin_unlock(&inf->writeback_lock);
|
||||
out:
|
||||
if (defer_iput)
|
||||
iput(defer_iput);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1950,12 +1946,14 @@ int scoutfs_inode_setup(struct super_block *sb)
|
||||
|
||||
inf->sb = sb;
|
||||
spin_lock_init(&inf->writeback_lock);
|
||||
inf->writeback_inodes = RB_ROOT;
|
||||
INIT_LIST_HEAD(&inf->writeback_list);
|
||||
spin_lock_init(&inf->dir_ino_alloc.lock);
|
||||
spin_lock_init(&inf->ino_alloc.lock);
|
||||
INIT_DELAYED_WORK(&inf->orphan_scan_dwork, inode_orphan_scan_worker);
|
||||
spin_lock_init(&inf->deleting_items_lock);
|
||||
INIT_LIST_HEAD(&inf->deleting_items_list);
|
||||
INIT_WORK(&inf->iput_work, iput_worker);
|
||||
init_llist_head(&inf->iput_llist);
|
||||
|
||||
sbi->inode_sb_info = inf;
|
||||
|
||||
@@ -1967,12 +1965,11 @@ int scoutfs_inode_setup(struct super_block *sb)
|
||||
* many other subsystems like networking and the server. We only kick
|
||||
* it off once everything is ready.
|
||||
*/
|
||||
int scoutfs_inode_start(struct super_block *sb)
|
||||
void scoutfs_inode_start(struct super_block *sb)
|
||||
{
|
||||
DECLARE_INODE_SB_INFO(sb, inf);
|
||||
|
||||
schedule_orphan_dwork(inf);
|
||||
return 0;
|
||||
}
|
||||
|
||||
void scoutfs_inode_stop(struct super_block *sb)
|
||||
|
||||
+6
-6
@@ -20,7 +20,6 @@ struct scoutfs_inode_info {
|
||||
u64 online_blocks;
|
||||
u64 offline_blocks;
|
||||
u32 flags;
|
||||
struct timespec crtime;
|
||||
|
||||
/*
|
||||
* Protects per-inode extent items, most particularly readers
|
||||
@@ -50,14 +49,14 @@ struct scoutfs_inode_info {
|
||||
struct scoutfs_per_task pt_data_lock;
|
||||
struct scoutfs_data_waitq data_waitq;
|
||||
struct rw_semaphore xattr_rwsem;
|
||||
struct rb_node writeback_node;
|
||||
struct list_head writeback_entry;
|
||||
|
||||
struct scoutfs_lock_coverage ino_lock_cov;
|
||||
|
||||
/* drop if i_count hits 0, allows drop while invalidate holds coverage */
|
||||
bool drop_invalidated;
|
||||
struct llist_node inv_iput_llnode;
|
||||
atomic_t inv_iput_count;
|
||||
struct llist_node iput_llnode;
|
||||
atomic_t iput_count;
|
||||
|
||||
struct inode inode;
|
||||
};
|
||||
@@ -76,8 +75,9 @@ struct inode *scoutfs_alloc_inode(struct super_block *sb);
|
||||
void scoutfs_destroy_inode(struct inode *inode);
|
||||
int scoutfs_drop_inode(struct inode *inode);
|
||||
void scoutfs_evict_inode(struct inode *inode);
|
||||
void scoutfs_inode_queue_iput(struct inode *inode);
|
||||
|
||||
struct inode *scoutfs_iget(struct super_block *sb, u64 ino);
|
||||
struct inode *scoutfs_iget(struct super_block *sb, u64 ino, int lkf);
|
||||
struct inode *scoutfs_ilookup(struct super_block *sb, u64 ino);
|
||||
|
||||
void scoutfs_inode_init_index_key(struct scoutfs_key *key, u8 type, u64 major,
|
||||
@@ -132,7 +132,7 @@ void scoutfs_inode_exit(void);
|
||||
int scoutfs_inode_init(void);
|
||||
|
||||
int scoutfs_inode_setup(struct super_block *sb);
|
||||
int scoutfs_inode_start(struct super_block *sb);
|
||||
void scoutfs_inode_start(struct super_block *sb);
|
||||
void scoutfs_inode_stop(struct super_block *sb);
|
||||
void scoutfs_inode_destroy(struct super_block *sb);
|
||||
|
||||
|
||||
+49
-11
@@ -541,7 +541,6 @@ out:
|
||||
static long scoutfs_ioc_stat_more(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct scoutfs_ioctl_stat_more stm;
|
||||
|
||||
if (get_user(stm.valid_bytes, (__u64 __user *)arg))
|
||||
@@ -553,8 +552,6 @@ static long scoutfs_ioc_stat_more(struct file *file, unsigned long arg)
|
||||
stm.data_seq = scoutfs_inode_data_seq(inode);
|
||||
stm.data_version = scoutfs_inode_data_version(inode);
|
||||
scoutfs_inode_get_onoff(inode, &stm.online_blocks, &stm.offline_blocks);
|
||||
stm.crtime_sec = si->crtime.tv_sec;
|
||||
stm.crtime_nsec = si->crtime.tv_nsec;
|
||||
|
||||
if (copy_to_user((void __user *)arg, &stm, stm.valid_bytes))
|
||||
return -EFAULT;
|
||||
@@ -620,7 +617,6 @@ static long scoutfs_ioc_data_waiting(struct file *file, unsigned long arg)
|
||||
static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file->f_inode;
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_ioctl_setattr_more __user *usm = (void __user *)arg;
|
||||
struct scoutfs_ioctl_setattr_more sm;
|
||||
@@ -689,8 +685,6 @@ static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
|
||||
i_size_write(inode, sm.i_size);
|
||||
inode->i_ctime.tv_sec = sm.ctime_sec;
|
||||
inode->i_ctime.tv_nsec = sm.ctime_nsec;
|
||||
si->crtime.tv_sec = sm.crtime_sec;
|
||||
si->crtime.tv_nsec = sm.crtime_nsec;
|
||||
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
ret = 0;
|
||||
@@ -873,13 +867,21 @@ static long scoutfs_ioc_statfs_more(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_super_block *super;
|
||||
struct scoutfs_ioctl_statfs_more sfm;
|
||||
int ret;
|
||||
|
||||
if (get_user(sfm.valid_bytes, (__u64 __user *)arg))
|
||||
return -EFAULT;
|
||||
|
||||
super = kzalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
if (!super)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
sfm.valid_bytes = min_t(u64, sfm.valid_bytes,
|
||||
sizeof(struct scoutfs_ioctl_statfs_more));
|
||||
sfm.fsid = le64_to_cpu(super->hdr.fsid);
|
||||
@@ -890,12 +892,15 @@ static long scoutfs_ioc_statfs_more(struct file *file, unsigned long arg)
|
||||
|
||||
ret = scoutfs_client_get_last_seq(sb, &sfm.committed_seq);
|
||||
if (ret)
|
||||
return ret;
|
||||
goto out;
|
||||
|
||||
if (copy_to_user((void __user *)arg, &sfm, sfm.valid_bytes))
|
||||
return -EFAULT;
|
||||
|
||||
return 0;
|
||||
ret = -EFAULT;
|
||||
else
|
||||
ret = 0;
|
||||
out:
|
||||
kfree(super);
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct copy_alloc_detail_args {
|
||||
@@ -999,6 +1004,37 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_resize_devices(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_resize_devices __user *urd = (void __user *)arg;
|
||||
struct scoutfs_ioctl_resize_devices rd;
|
||||
struct scoutfs_net_resize_devices nrd;
|
||||
int ret;
|
||||
|
||||
if (!(file->f_mode & FMODE_READ)) {
|
||||
ret = -EBADF;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(&rd, urd, sizeof(rd))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
nrd.new_total_meta_blocks = cpu_to_le64(rd.new_total_meta_blocks);
|
||||
nrd.new_total_data_blocks = cpu_to_le64(rd.new_total_data_blocks);
|
||||
|
||||
ret = scoutfs_client_resize_devices(sb, &nrd);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
@@ -1028,6 +1064,8 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
return scoutfs_ioc_alloc_detail(file, arg);
|
||||
case SCOUTFS_IOC_MOVE_BLOCKS:
|
||||
return scoutfs_ioc_move_blocks(file, arg);
|
||||
case SCOUTFS_IOC_RESIZE_DEVICES:
|
||||
return scoutfs_ioc_resize_devices(file, arg);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
||||
+9
-5
@@ -232,9 +232,6 @@ struct scoutfs_ioctl_stat_more {
|
||||
__u64 data_version;
|
||||
__u64 online_blocks;
|
||||
__u64 offline_blocks;
|
||||
__u64 crtime_sec;
|
||||
__u32 crtime_nsec;
|
||||
__u8 _pad[4];
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_STAT_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 5, \
|
||||
@@ -278,8 +275,7 @@ struct scoutfs_ioctl_setattr_more {
|
||||
__u64 flags;
|
||||
__u64 ctime_sec;
|
||||
__u32 ctime_nsec;
|
||||
__u32 crtime_nsec;
|
||||
__u64 crtime_sec;
|
||||
__u8 _pad[4];
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_SETATTR_MORE_OFFLINE (1 << 0)
|
||||
@@ -481,4 +477,12 @@ struct scoutfs_ioctl_move_blocks {
|
||||
#define SCOUTFS_IOC_MOVE_BLOCKS _IOR(SCOUTFS_IOCTL_MAGIC, 13, \
|
||||
struct scoutfs_ioctl_move_blocks)
|
||||
|
||||
struct scoutfs_ioctl_resize_devices {
|
||||
__u64 new_total_meta_blocks;
|
||||
__u64 new_total_data_blocks;
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_RESIZE_DEVICES \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 14, struct scoutfs_ioctl_resize_devices)
|
||||
|
||||
#endif
|
||||
|
||||
+10
-39
@@ -89,8 +89,6 @@ struct lock_info {
|
||||
struct work_struct shrink_work;
|
||||
struct list_head shrink_list;
|
||||
atomic64_t next_refresh_gen;
|
||||
struct work_struct inv_iput_work;
|
||||
struct llist_head inv_iput_llist;
|
||||
|
||||
struct dentry *tseq_dentry;
|
||||
struct scoutfs_tseq_tree tseq_tree;
|
||||
@@ -126,34 +124,6 @@ static bool lock_modes_match(int granted, int requested)
|
||||
requested == SCOUTFS_LOCK_READ);
|
||||
}
|
||||
|
||||
/*
|
||||
* Final iput can get into evict and perform final inode deletion which
|
||||
* can delete a lot of items under locks and transactions. We really
|
||||
* don't want to be doing all that in an iput during invalidation. When
|
||||
* invalidation sees that iput might perform final deletion it puts them
|
||||
* on a list and queues this work.
|
||||
*
|
||||
* Nothing stops multiple puts for multiple invalidations of an inode
|
||||
* before the work runs so we can track multiple puts in flight.
|
||||
*/
|
||||
static void lock_inv_iput_worker(struct work_struct *work)
|
||||
{
|
||||
struct lock_info *linfo = container_of(work, struct lock_info, inv_iput_work);
|
||||
struct scoutfs_inode_info *si;
|
||||
struct scoutfs_inode_info *tmp;
|
||||
struct llist_node *inodes;
|
||||
bool more;
|
||||
|
||||
inodes = llist_del_all(&linfo->inv_iput_llist);
|
||||
|
||||
llist_for_each_entry_safe(si, tmp, inodes, inv_iput_llnode) {
|
||||
do {
|
||||
more = atomic_dec_return(&si->inv_iput_count) > 0;
|
||||
iput(&si->inode);
|
||||
} while (more);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate cached data associated with an inode whose lock is going
|
||||
* away.
|
||||
@@ -194,11 +164,8 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
|
||||
if (scoutfs_lock_is_covered(sb, &si->ino_lock_cov) && inode->i_nlink > 0) {
|
||||
iput(inode);
|
||||
} else {
|
||||
/* defer iput to work context so we don't evict inodes from invalidation */
|
||||
if (atomic_inc_return(&si->inv_iput_count) == 1)
|
||||
llist_add(&si->inv_iput_llnode, &linfo->inv_iput_llist);
|
||||
smp_wmb(); /* count and list visible before work executes */
|
||||
queue_work(linfo->workq, &linfo->inv_iput_work);
|
||||
/* defer iput to work context so we don't evict inodes from invalidation */
|
||||
scoutfs_inode_queue_iput(inode);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1128,8 +1095,14 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
|
||||
|
||||
trace_scoutfs_lock_wait(sb, lock);
|
||||
|
||||
ret = wait_event_interruptible(lock->waitq,
|
||||
lock_wait_cond(sb, lock, mode));
|
||||
if (flags & SCOUTFS_LKF_INTERRUPTIBLE) {
|
||||
ret = wait_event_interruptible(lock->waitq,
|
||||
lock_wait_cond(sb, lock, mode));
|
||||
} else {
|
||||
wait_event(lock->waitq, lock_wait_cond(sb, lock, mode));
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
if (ret)
|
||||
break;
|
||||
@@ -1789,8 +1762,6 @@ int scoutfs_lock_setup(struct super_block *sb)
|
||||
INIT_WORK(&linfo->shrink_work, lock_shrink_worker);
|
||||
INIT_LIST_HEAD(&linfo->shrink_list);
|
||||
atomic64_set(&linfo->next_refresh_gen, 0);
|
||||
INIT_WORK(&linfo->inv_iput_work, lock_inv_iput_worker);
|
||||
init_llist_head(&linfo->inv_iput_llist);
|
||||
scoutfs_tseq_tree_init(&linfo->tseq_tree, lock_tseq_show);
|
||||
|
||||
sbi->lock_info = linfo;
|
||||
|
||||
+2
-1
@@ -6,7 +6,8 @@
|
||||
|
||||
#define SCOUTFS_LKF_REFRESH_INODE 0x01 /* update stale inode from item */
|
||||
#define SCOUTFS_LKF_NONBLOCK 0x02 /* only use already held locks */
|
||||
#define SCOUTFS_LKF_INVALID (~((SCOUTFS_LKF_NONBLOCK << 1) - 1))
|
||||
#define SCOUTFS_LKF_INTERRUPTIBLE 0x04 /* pending signals return -ERESTARTSYS */
|
||||
#define SCOUTFS_LKF_INVALID (~((SCOUTFS_LKF_INTERRUPTIBLE << 1) - 1))
|
||||
|
||||
#define SCOUTFS_LOCK_NR_MODES SCOUTFS_LOCK_INVALID
|
||||
|
||||
|
||||
+9
-13
@@ -1486,8 +1486,7 @@ int scoutfs_net_connect(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
struct sockaddr_in *sin, unsigned long timeout_ms)
|
||||
{
|
||||
int error = 0;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
spin_lock(&conn->lock);
|
||||
conn->connect_sin = *sin;
|
||||
@@ -1495,10 +1494,8 @@ int scoutfs_net_connect(struct super_block *sb,
|
||||
spin_unlock(&conn->lock);
|
||||
|
||||
queue_work(conn->workq, &conn->connect_work);
|
||||
|
||||
ret = wait_event_interruptible(conn->waitq,
|
||||
connect_result(conn, &error));
|
||||
return ret ?: error;
|
||||
wait_event(conn->waitq, connect_result(conn, &ret));
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void set_valid_greeting(struct scoutfs_net_connection *conn)
|
||||
@@ -1634,10 +1631,10 @@ restart:
|
||||
conn->next_send_id = reconn->next_send_id;
|
||||
atomic64_set(&conn->recv_seq, atomic64_read(&reconn->recv_seq));
|
||||
|
||||
/* greeting response/ack will be on conn send queue */
|
||||
/* reconn should be idle while in reconn_wait */
|
||||
BUG_ON(!list_empty(&reconn->send_queue));
|
||||
BUG_ON(!list_empty(&conn->resend_queue));
|
||||
list_splice_init(&reconn->resend_queue, &conn->resend_queue);
|
||||
/* queued greeting response is racing, can be in send or resend queue */
|
||||
list_splice_tail_init(&reconn->resend_queue, &conn->resend_queue);
|
||||
|
||||
/* new conn info is unused, swap, old won't call down */
|
||||
swap(conn->info, reconn->info);
|
||||
@@ -1801,11 +1798,10 @@ int scoutfs_net_sync_request(struct super_block *sb,
|
||||
ret = scoutfs_net_submit_request(sb, conn, cmd, arg, arg_len,
|
||||
sync_response, &sreq, &id);
|
||||
|
||||
ret = wait_for_completion_interruptible(&sreq.comp);
|
||||
if (ret == -ERESTARTSYS)
|
||||
scoutfs_net_cancel_request(sb, conn, cmd, id);
|
||||
else
|
||||
if (ret == 0) {
|
||||
wait_for_completion(&sreq.comp);
|
||||
ret = sreq.error;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
+26
-20
@@ -97,7 +97,7 @@ struct quorum_host_msg {
|
||||
|
||||
struct last_msg {
|
||||
struct quorum_host_msg msg;
|
||||
struct timespec64 ts;
|
||||
ktime_t ts;
|
||||
};
|
||||
|
||||
enum quorum_role { FOLLOWER, CANDIDATE, LEADER };
|
||||
@@ -209,7 +209,7 @@ static void send_msg_members(struct super_block *sb, int type, u64 term,
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct mount_options *opts = &SCOUTFS_SB(sb)->opts;
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct timespec64 ts;
|
||||
ktime_t now;
|
||||
int i;
|
||||
|
||||
struct scoutfs_quorum_message qmes = {
|
||||
@@ -235,7 +235,6 @@ static void send_msg_members(struct super_block *sb, int type, u64 term,
|
||||
|
||||
qmes.crc = quorum_message_crc(&qmes);
|
||||
|
||||
ts = ktime_to_timespec64(ktime_get());
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (!quorum_slot_present(super, i) ||
|
||||
@@ -243,12 +242,13 @@ static void send_msg_members(struct super_block *sb, int type, u64 term,
|
||||
continue;
|
||||
|
||||
scoutfs_quorum_slot_sin(super, i, &sin);
|
||||
now = ktime_get();
|
||||
kernel_sendmsg(qinf->sock, &mh, &kv, 1, kv.iov_len);
|
||||
|
||||
spin_lock(&qinf->show_lock);
|
||||
qinf->last_send[i].msg.term = term;
|
||||
qinf->last_send[i].msg.type = type;
|
||||
qinf->last_send[i].ts = ts;
|
||||
qinf->last_send[i].ts = now;
|
||||
spin_unlock(&qinf->show_lock);
|
||||
|
||||
if (i == only)
|
||||
@@ -308,6 +308,8 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
now = ktime_get();
|
||||
|
||||
if (ret != sizeof(qmes) ||
|
||||
qmes.crc != quorum_message_crc(&qmes) ||
|
||||
qmes.fsid != super->hdr.fsid ||
|
||||
@@ -327,7 +329,7 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
|
||||
|
||||
spin_lock(&qinf->show_lock);
|
||||
qinf->last_recv[msg->from].msg = *msg;
|
||||
qinf->last_recv[msg->from].ts = ktime_to_timespec64(ktime_get());
|
||||
qinf->last_recv[msg->from].ts = now;
|
||||
spin_unlock(&qinf->show_lock);
|
||||
|
||||
return 0;
|
||||
@@ -556,10 +558,8 @@ out:
|
||||
ret = err;
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "error %d attempting to find and fence previous leaders", ret);
|
||||
if (ret < 0)
|
||||
scoutfs_inc_counter(sb, quorum_fence_error);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -610,7 +610,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
while (!qinf->shutdown) {
|
||||
while (!(qinf->shutdown || scoutfs_forcing_unmount(sb))) {
|
||||
|
||||
ret = recv_msg(sb, &msg, qst.timeout);
|
||||
if (ret < 0) {
|
||||
@@ -733,13 +733,15 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
ret = scoutfs_server_start(sb, qst.term);
|
||||
if (ret < 0) {
|
||||
clear_bit(QINF_FLAG_SERVER, &qinf->flags);
|
||||
scoutfs_err(sb, "server startup failed with %d", ret);
|
||||
/* store our increased term */
|
||||
err = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP, qst.term,
|
||||
true);
|
||||
if (err < 0 && ret == 0)
|
||||
if (err < 0) {
|
||||
ret = err;
|
||||
goto out;
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -789,7 +791,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_END, qst.term, true);
|
||||
out:
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "quorum service saw error %d, shutting down. Cluster will be degraded until this slot is remounted to restart the quorum service",
|
||||
scoutfs_err(sb, "quorum service saw error %d, shutting down. This mount is no longer participating in quorum. It should be remounted to restore service.",
|
||||
ret);
|
||||
}
|
||||
}
|
||||
@@ -915,6 +917,7 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
struct quorum_status qst;
|
||||
struct last_msg last;
|
||||
struct timespec64 ts;
|
||||
const ktime_t now = ktime_get();
|
||||
size_t size;
|
||||
int ret;
|
||||
int i;
|
||||
@@ -936,9 +939,9 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
qst.vote_for);
|
||||
snprintf_ret(buf, size, &ret, "vote_bits 0x%lx (count %lu)\n",
|
||||
qst.vote_bits, hweight_long(qst.vote_bits));
|
||||
ts = ktime_to_timespec64(qst.timeout);
|
||||
snprintf_ret(buf, size, &ret, "timeout %llu.%u\n",
|
||||
(u64)ts.tv_sec, (int)ts.tv_nsec);
|
||||
ts = ktime_to_timespec64(ktime_sub(qst.timeout, now));
|
||||
snprintf_ret(buf, size, &ret, "timeout_in_secs %lld.%09u\n",
|
||||
(s64)ts.tv_sec, (int)ts.tv_nsec);
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
spin_lock(&qinf->show_lock);
|
||||
@@ -948,10 +951,11 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
if (last.msg.term == 0)
|
||||
continue;
|
||||
|
||||
ts = ktime_to_timespec64(ktime_sub(now, last.ts));
|
||||
snprintf_ret(buf, size, &ret,
|
||||
"last_send to %u term %llu type %u ts %llu.%u\n",
|
||||
"last_send to %u term %llu type %u secs_since %lld.%09u\n",
|
||||
i, last.msg.term, last.msg.type,
|
||||
(u64)last.ts.tv_sec, (int)last.ts.tv_nsec);
|
||||
(s64)ts.tv_sec, (int)ts.tv_nsec);
|
||||
}
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
@@ -961,10 +965,12 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
|
||||
if (last.msg.term == 0)
|
||||
continue;
|
||||
|
||||
ts = ktime_to_timespec64(ktime_sub(now, last.ts));
|
||||
snprintf_ret(buf, size, &ret,
|
||||
"last_recv from %u term %llu type %u ts %llu.%u\n",
|
||||
"last_recv from %u term %llu type %u secs_since %lld.%09u\n",
|
||||
i, last.msg.term, last.msg.type,
|
||||
(u64)last.ts.tv_sec, (int)last.ts.tv_nsec);
|
||||
(s64)ts.tv_sec, (int)ts.tv_nsec);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
+142
-21
@@ -323,7 +323,6 @@ static void scoutfs_server_commit_func(struct work_struct *work)
|
||||
struct commit_waiter *cw;
|
||||
struct commit_waiter *pos;
|
||||
struct llist_node *node;
|
||||
u64 reserved;
|
||||
int ret;
|
||||
|
||||
trace_scoutfs_server_commit_work_enter(sb, 0, 0);
|
||||
@@ -389,16 +388,19 @@ static void scoutfs_server_commit_func(struct work_struct *work)
|
||||
server->other_freed = &super->server_meta_freed[server->other_ind];
|
||||
|
||||
/*
|
||||
* The reserved metadata blocks includes the max size of
|
||||
* outstanding allocators and a server transaction could be
|
||||
* asked to refill all those allocators from meta_avail. If our
|
||||
* meta_avail falls below the reserved count, and freed is still
|
||||
* above it, then swap so that we don't start returning enospc
|
||||
* until we're truly low.
|
||||
* get_log_trees sets ALLOC_LOW when its allocator drops below
|
||||
* the reserved blocks after having filled the log trees's avail
|
||||
* allocator during its transaction. To avoid prematurely
|
||||
* setting the low flag and causing enospc we make sure that the
|
||||
* next transaction's meta_avail has 2x the reserved blocks so
|
||||
* that it can consume a full reserved amount and still have
|
||||
* enough to avoid enospc. We swap to freed if avail is under
|
||||
* the buffer and freed is larger.
|
||||
*/
|
||||
reserved = scoutfs_server_reserved_meta_blocks(sb);
|
||||
if (le64_to_cpu(server->meta_avail->total_len) <= reserved &&
|
||||
le64_to_cpu(server->meta_freed->total_len) > reserved)
|
||||
if ((le64_to_cpu(server->meta_avail->total_len) <
|
||||
(scoutfs_server_reserved_meta_blocks(sb) * 2)) &&
|
||||
(le64_to_cpu(server->meta_freed->total_len) >
|
||||
le64_to_cpu(server->meta_avail->total_len)))
|
||||
swap(server->meta_avail, server->meta_freed);
|
||||
|
||||
ret = 0;
|
||||
@@ -2355,15 +2357,25 @@ static int open_ino_map_response(struct super_block *sb, struct scoutfs_net_conn
|
||||
return scoutfs_omap_server_handle_response(sb, rid, resp);
|
||||
}
|
||||
|
||||
/* The server is sending an omap request to the client */
|
||||
/*
|
||||
* The server is sending an omap requests to all the clients it thought
|
||||
* were connected when it received a request from another client.
|
||||
* This send can race with the client's connection being removed. We
|
||||
* can drop those sends on the floor and mask ENOTCONN. The client's rid
|
||||
* will soon be removed from the request which will be correctly handled.
|
||||
*/
|
||||
int scoutfs_server_send_omap_request(struct super_block *sb, u64 rid,
|
||||
struct scoutfs_open_ino_map_args *args)
|
||||
{
|
||||
struct server_info *server = SCOUTFS_SB(sb)->server_info;
|
||||
int ret;
|
||||
|
||||
return scoutfs_net_submit_request_node(sb, server->conn, rid, SCOUTFS_NET_CMD_OPEN_INO_MAP,
|
||||
ret = scoutfs_net_submit_request_node(sb, server->conn, rid, SCOUTFS_NET_CMD_OPEN_INO_MAP,
|
||||
args, sizeof(*args),
|
||||
open_ino_map_response, NULL, NULL);
|
||||
if (ret == -ENOTCONN)
|
||||
ret = 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2553,6 +2565,103 @@ out:
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
|
||||
}
|
||||
|
||||
static u64 device_blocks(struct block_device *bdev, int shift)
|
||||
{
|
||||
return i_size_read(bdev->bd_inode) >> shift;
|
||||
}
|
||||
|
||||
static int server_resize_devices(struct super_block *sb, struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_net_resize_devices *nrd;
|
||||
u64 meta_tot;
|
||||
u64 meta_start;
|
||||
u64 meta_len;
|
||||
u64 data_tot;
|
||||
u64 data_start;
|
||||
u64 data_len;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
if (arg_len != sizeof(struct scoutfs_net_resize_devices)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
nrd = arg;
|
||||
|
||||
meta_tot = le64_to_cpu(nrd->new_total_meta_blocks);
|
||||
data_tot = le64_to_cpu(nrd->new_total_data_blocks);
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
mutex_lock(&server->alloc_mutex);
|
||||
|
||||
if (meta_tot == le64_to_cpu(super->total_meta_blocks))
|
||||
meta_tot = 0;
|
||||
if (data_tot == le64_to_cpu(super->total_data_blocks))
|
||||
data_tot = 0;
|
||||
|
||||
if (!meta_tot && !data_tot) {
|
||||
ret = 0;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* we don't support shrinking */
|
||||
if ((meta_tot && (meta_tot < le64_to_cpu(super->total_meta_blocks))) ||
|
||||
(data_tot && (data_tot < le64_to_cpu(super->total_data_blocks)))) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* must be within devices */
|
||||
if ((meta_tot > device_blocks(sbi->meta_bdev, SCOUTFS_BLOCK_LG_SHIFT)) ||
|
||||
(data_tot > device_blocks(sb->s_bdev, SCOUTFS_BLOCK_SM_SHIFT))) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* extents are only used if _tot is set */
|
||||
meta_start = le64_to_cpu(super->total_meta_blocks);
|
||||
meta_len = meta_tot - meta_start;
|
||||
data_start = le64_to_cpu(super->total_data_blocks);
|
||||
data_len = data_tot - data_start;
|
||||
|
||||
if (meta_tot) {
|
||||
ret = scoutfs_alloc_insert(sb, &server->alloc, &server->wri,
|
||||
server->meta_avail, meta_start, meta_len);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (data_tot) {
|
||||
ret = scoutfs_alloc_insert(sb, &server->alloc, &server->wri,
|
||||
&super->data_alloc, data_start, data_len);
|
||||
if (ret < 0) {
|
||||
if (meta_tot) {
|
||||
err = scoutfs_alloc_remove(sb, &server->alloc, &server->wri,
|
||||
server->meta_avail, meta_start,
|
||||
meta_len);
|
||||
WARN_ON_ONCE(err); /* btree blocks are dirty.. really unlikely? */
|
||||
}
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
|
||||
if (meta_tot)
|
||||
super->total_meta_blocks = cpu_to_le64(meta_tot);
|
||||
if (data_tot)
|
||||
super->total_data_blocks = cpu_to_le64(data_tot);
|
||||
|
||||
ret = 0;
|
||||
unlock:
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
out:
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
|
||||
};
|
||||
|
||||
static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
|
||||
{
|
||||
*key = (struct scoutfs_key) {
|
||||
@@ -3189,6 +3298,7 @@ static scoutfs_net_request_t server_req_funcs[] = {
|
||||
[SCOUTFS_NET_CMD_GET_VOLOPT] = server_get_volopt,
|
||||
[SCOUTFS_NET_CMD_SET_VOLOPT] = server_set_volopt,
|
||||
[SCOUTFS_NET_CMD_CLEAR_VOLOPT] = server_clear_volopt,
|
||||
[SCOUTFS_NET_CMD_RESIZE_DEVICES] = server_resize_devices,
|
||||
[SCOUTFS_NET_CMD_FAREWELL] = server_farewell,
|
||||
};
|
||||
|
||||
@@ -3463,13 +3573,15 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
|
||||
trace_scoutfs_server_work_enter(sb, 0, 0);
|
||||
|
||||
scoutfs_quorum_slot_sin(super, opts->quorum_slot_nr, &sin);
|
||||
scoutfs_info(sb, "server starting at "SIN_FMT, SIN_ARG(&sin));
|
||||
|
||||
/* first make sure no other servers are still running */
|
||||
ret = scoutfs_quorum_fence_leaders(sb, server->term);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "server error %d attempting to fence previous leaders", ret);
|
||||
goto out;
|
||||
|
||||
scoutfs_quorum_slot_sin(super, opts->quorum_slot_nr, &sin);
|
||||
scoutfs_info(sb, "server setting up at "SIN_FMT, SIN_ARG(&sin));
|
||||
}
|
||||
|
||||
conn = scoutfs_net_alloc_conn(sb, server_notify_up, server_notify_down,
|
||||
sizeof(struct server_client_info),
|
||||
@@ -3490,8 +3602,10 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
|
||||
/* start up the server subsystems before accepting */
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "server error %d reading super block", ret);
|
||||
goto shutdown;
|
||||
}
|
||||
|
||||
/* update volume options early, possibly for use during startup */
|
||||
write_seqcount_begin(&server->volopt_seqcount);
|
||||
@@ -3529,10 +3643,17 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
}
|
||||
scoutfs_server_set_seq_if_greater(sb, max_seq);
|
||||
|
||||
ret = scoutfs_lock_server_setup(sb, &server->alloc, &server->wri) ?:
|
||||
start_recovery(sb);
|
||||
if (ret)
|
||||
ret = scoutfs_lock_server_setup(sb, &server->alloc, &server->wri);
|
||||
if (ret) {
|
||||
scoutfs_err(sb, "server error %d starting lock server", ret);
|
||||
goto shutdown;
|
||||
}
|
||||
|
||||
ret = start_recovery(sb);
|
||||
if (ret) {
|
||||
scoutfs_err(sb, "server error %d starting client recovery", ret);
|
||||
goto shutdown;
|
||||
}
|
||||
|
||||
/* start accepting connections and processing work */
|
||||
server->conn = conn;
|
||||
@@ -3543,7 +3664,7 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
|
||||
queue_reclaim_work(server, 0);
|
||||
|
||||
/* wait_event/wake_up provide barriers */
|
||||
/* interruptible mostly to avoid stuck messages */
|
||||
wait_event_interruptible(server->waitq, test_shutting_down(server));
|
||||
|
||||
shutdown:
|
||||
|
||||
+30
-34
@@ -230,7 +230,15 @@ static void scoutfs_metadev_close(struct super_block *sb)
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
if (sbi->meta_bdev) {
|
||||
/*
|
||||
* Some kernels have blkdev_reread_part which calls
|
||||
* fsync_bdev while holding the bd_mutex which inverts
|
||||
* the s_umount hold in deactivate_super and blkdev_put
|
||||
* from kill_sb->put_super.
|
||||
*/
|
||||
lockdep_off();
|
||||
blkdev_put(sbi->meta_bdev, SCOUTFS_META_BDEV_MODE);
|
||||
lockdep_on();
|
||||
sbi->meta_bdev = NULL;
|
||||
}
|
||||
}
|
||||
@@ -328,28 +336,16 @@ int scoutfs_write_super(struct super_block *sb,
|
||||
sizeof(struct scoutfs_super_block));
|
||||
}
|
||||
|
||||
static bool invalid_blkno_limits(struct super_block *sb, char *which,
|
||||
u64 start, __le64 first, __le64 last,
|
||||
struct block_device *bdev, int shift)
|
||||
static bool small_bdev(struct super_block *sb, char *which, u64 blocks,
|
||||
struct block_device *bdev, int shift)
|
||||
{
|
||||
u64 blkno;
|
||||
u64 size = (u64)i_size_read(bdev->bd_inode);
|
||||
u64 count = size >> shift;
|
||||
|
||||
if (le64_to_cpu(first) < start) {
|
||||
scoutfs_err(sb, "super block first %s blkno %llu is within first valid blkno %llu",
|
||||
which, le64_to_cpu(first), start);
|
||||
return true;
|
||||
}
|
||||
if (blocks > count) {
|
||||
scoutfs_err(sb, "super block records %llu %s blocks, but device %u:%u size %llu only allows %llu blocks",
|
||||
blocks, which, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev), size, count);
|
||||
|
||||
if (le64_to_cpu(first) > le64_to_cpu(last)) {
|
||||
scoutfs_err(sb, "super block first %s blkno %llu is greater than last %s blkno %llu",
|
||||
which, le64_to_cpu(first), which, le64_to_cpu(last));
|
||||
return true;
|
||||
}
|
||||
|
||||
blkno = (i_size_read(bdev->bd_inode) >> shift) - 1;
|
||||
if (le64_to_cpu(last) > blkno) {
|
||||
scoutfs_err(sb, "super block last %s blkno %llu is beyond device size last blkno %llu",
|
||||
which, le64_to_cpu(last), blkno);
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -409,16 +405,10 @@ static int scoutfs_read_super_from_bdev(struct super_block *sb,
|
||||
|
||||
/* XXX do we want more rigorous invalid super checking? */
|
||||
|
||||
if (invalid_blkno_limits(sb, "meta",
|
||||
SCOUTFS_META_DEV_START_BLKNO,
|
||||
super->first_meta_blkno,
|
||||
super->last_meta_blkno, sbi->meta_bdev,
|
||||
SCOUTFS_BLOCK_LG_SHIFT) ||
|
||||
invalid_blkno_limits(sb, "data",
|
||||
SCOUTFS_DATA_DEV_START_BLKNO,
|
||||
super->first_data_blkno,
|
||||
super->last_data_blkno, sb->s_bdev,
|
||||
SCOUTFS_BLOCK_SM_SHIFT)) {
|
||||
if (small_bdev(sb, "metadata", le64_to_cpu(super->total_meta_blocks), sbi->meta_bdev,
|
||||
SCOUTFS_BLOCK_LG_SHIFT) ||
|
||||
small_bdev(sb, "data", le64_to_cpu(super->total_data_blocks), sb->s_bdev,
|
||||
SCOUTFS_BLOCK_SM_SHIFT)) {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
@@ -622,15 +612,16 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
scoutfs_quorum_setup(sb) ?:
|
||||
scoutfs_client_setup(sb) ?:
|
||||
scoutfs_volopt_setup(sb) ?:
|
||||
scoutfs_trans_get_log_trees(sb) ?:
|
||||
scoutfs_srch_setup(sb) ?:
|
||||
scoutfs_inode_start(sb);
|
||||
scoutfs_srch_setup(sb);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
inode = scoutfs_iget(sb, SCOUTFS_ROOT_INO);
|
||||
/* this interruptible iget lets hung mount be aborted with ctl-c */
|
||||
inode = scoutfs_iget(sb, SCOUTFS_ROOT_INO, SCOUTFS_LKF_INTERRUPTIBLE);
|
||||
if (IS_ERR(inode)) {
|
||||
ret = PTR_ERR(inode);
|
||||
if (ret == -ERESTARTSYS)
|
||||
ret = -EINTR;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -640,10 +631,15 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_client_advance_seq(sb, &sbi->trans_seq);
|
||||
/* send requests once iget progress shows we had a server */
|
||||
ret = scoutfs_trans_get_log_trees(sb) ?:
|
||||
scoutfs_client_advance_seq(sb, &sbi->trans_seq);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* start up background services that use everything else */
|
||||
scoutfs_inode_start(sb);
|
||||
scoutfs_forest_start(sb);
|
||||
scoutfs_trans_restart_sync_deadline(sb);
|
||||
ret = 0;
|
||||
out:
|
||||
|
||||
+5
-12
@@ -291,7 +291,7 @@ static void queue_trans_work(struct scoutfs_sb_info *sbi)
|
||||
int scoutfs_trans_sync(struct super_block *sb, int wait)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct write_attempt attempt;
|
||||
struct write_attempt attempt = { .ret = 0 };
|
||||
int ret;
|
||||
|
||||
|
||||
@@ -306,10 +306,8 @@ int scoutfs_trans_sync(struct super_block *sb, int wait)
|
||||
|
||||
queue_trans_work(sbi);
|
||||
|
||||
ret = wait_event_interruptible(sbi->trans_write_wq,
|
||||
write_attempted(sbi, &attempt));
|
||||
if (ret == 0)
|
||||
ret = attempt.ret;
|
||||
wait_event(sbi->trans_write_wq, write_attempted(sbi, &attempt));
|
||||
ret = attempt.ret;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -496,9 +494,7 @@ int scoutfs_hold_trans(struct super_block *sb, bool allocing)
|
||||
/* wait until the writer work is finished */
|
||||
if (!inc_holders_unless_writer(tri)) {
|
||||
dec_journal_info_holders();
|
||||
ret = wait_event_interruptible(sbi->trans_hold_wq, holders_no_writer(tri));
|
||||
if (ret < 0)
|
||||
break;
|
||||
wait_event(sbi->trans_hold_wq, holders_no_writer(tri));
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -514,10 +510,7 @@ int scoutfs_hold_trans(struct super_block *sb, bool allocing)
|
||||
seq = scoutfs_trans_sample_seq(sb);
|
||||
release_holders(sb);
|
||||
queue_trans_work(sbi);
|
||||
ret = wait_event_interruptible(sbi->trans_hold_wq,
|
||||
scoutfs_trans_sample_seq(sb) != seq);
|
||||
if (ret < 0)
|
||||
break;
|
||||
wait_event(sbi->trans_hold_wq, scoutfs_trans_sample_seq(sb) != seq);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -40,7 +40,7 @@ t_filter_dmesg()
|
||||
# mount and unmount spew a bunch
|
||||
re="$re|scoutfs.*client connected"
|
||||
re="$re|scoutfs.*client disconnected"
|
||||
re="$re|scoutfs.*server setting up"
|
||||
re="$re|scoutfs.*server starting"
|
||||
re="$re|scoutfs.*server ready"
|
||||
re="$re|scoutfs.*server accepted"
|
||||
re="$re|scoutfs.*server closing"
|
||||
|
||||
@@ -0,0 +1,27 @@
|
||||
== make initial small fs
|
||||
== 0s do nothing
|
||||
== shrinking fails
|
||||
resize_devices ioctl failed: Invalid argument (22)
|
||||
scoutfs: resize-devices failed: Invalid argument (22)
|
||||
resize_devices ioctl failed: Invalid argument (22)
|
||||
scoutfs: resize-devices failed: Invalid argument (22)
|
||||
resize_devices ioctl failed: Invalid argument (22)
|
||||
scoutfs: resize-devices failed: Invalid argument (22)
|
||||
== existing sizes do nothing
|
||||
== growing outside device fails
|
||||
resize_devices ioctl failed: Invalid argument (22)
|
||||
scoutfs: resize-devices failed: Invalid argument (22)
|
||||
resize_devices ioctl failed: Invalid argument (22)
|
||||
scoutfs: resize-devices failed: Invalid argument (22)
|
||||
resize_devices ioctl failed: Invalid argument (22)
|
||||
scoutfs: resize-devices failed: Invalid argument (22)
|
||||
== resizing meta works
|
||||
== resizing data works
|
||||
== shrinking back fails
|
||||
resize_devices ioctl failed: Invalid argument (22)
|
||||
scoutfs: resize-devices failed: Invalid argument (22)
|
||||
resize_devices ioctl failed: Invalid argument (22)
|
||||
scoutfs: resize-devices failed: Invalid argument (22)
|
||||
== resizing again does nothing
|
||||
== resizing to full works
|
||||
== cleanup extra fs
|
||||
@@ -29,6 +29,7 @@ lock-conflicting-batch-commit.sh
|
||||
cross-mount-data-free.sh
|
||||
persistent-item-vers.sh
|
||||
setup-error-teardown.sh
|
||||
resize-devices.sh
|
||||
fence-and-reclaim.sh
|
||||
orphan-inodes.sh
|
||||
mount-unmount-race.sh
|
||||
|
||||
@@ -48,8 +48,9 @@ char buf[SZ];
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct scoutfs_ioctl_release ioctl_args = {0};
|
||||
struct scoutfs_ioctl_release rel = {0};
|
||||
struct scoutfs_ioctl_move_blocks mb;
|
||||
struct scoutfs_ioctl_stat_more stm;
|
||||
struct sub_tmp_info sub_tmps[8];
|
||||
int tot_size = 0;
|
||||
char *dest_file;
|
||||
@@ -111,12 +112,20 @@ int main(int argc, char **argv)
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// release everything in dest file
|
||||
ioctl_args.offset = 0;
|
||||
ioctl_args.length = tot_size;
|
||||
ioctl_args.data_version = 0;
|
||||
// get current data_version after fallocate's size extensions
|
||||
stm.valid_bytes = sizeof(struct scoutfs_ioctl_stat_more);
|
||||
ret = ioctl(dest_fd, SCOUTFS_IOC_STAT_MORE, &stm);
|
||||
if (ret < 0) {
|
||||
perror("stat_more ioctl error");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ret = ioctl(dest_fd, SCOUTFS_IOC_RELEASE, &ioctl_args);
|
||||
// release everything in dest file
|
||||
rel.offset = 0;
|
||||
rel.length = tot_size;
|
||||
rel.data_version = stm.data_version;
|
||||
|
||||
ret = ioctl(dest_fd, SCOUTFS_IOC_RELEASE, &rel);
|
||||
if (ret < 0) {
|
||||
perror("error");
|
||||
exit(1);
|
||||
@@ -130,7 +139,7 @@ int main(int argc, char **argv)
|
||||
mb.from_off = 0;
|
||||
mb.len = sub_tmp->length;
|
||||
mb.to_off = sub_tmp->offset;
|
||||
mb.data_version = 0;
|
||||
mb.data_version = stm.data_version;
|
||||
mb.flags = SCOUTFS_IOC_MB_STAGE;
|
||||
|
||||
ret = ioctl(dest_fd, SCOUTFS_IOC_MOVE_BLOCKS, &mb);
|
||||
|
||||
@@ -0,0 +1,149 @@
|
||||
#
|
||||
# Some basic tests of online resizing metadata and data devices.
|
||||
#
|
||||
|
||||
statfs_total() {
|
||||
local single="total_$1_blocks"
|
||||
local mnt="$2"
|
||||
|
||||
scoutfs statfs -s $single -p "$mnt"
|
||||
}
|
||||
|
||||
df_free() {
|
||||
local md="$1"
|
||||
local mnt="$2"
|
||||
|
||||
scoutfs df -p "$mnt" | awk '($1 == "'$md'") { print $5; exit }'
|
||||
}
|
||||
|
||||
same_totals() {
|
||||
cur_meta_tot=$(statfs_total meta "$SCR")
|
||||
cur_data_tot=$(statfs_total data "$SCR")
|
||||
|
||||
test "$cur_meta_tot" == "$exp_meta_tot" || \
|
||||
t_fail "cur total_meta_blocks $cur_meta_tot != expected $exp_meta_tot"
|
||||
test "$cur_data_tot" == "$exp_data_tot" || \
|
||||
t_fail "cur total_data_blocks $cur_data_tot != expected $exp_data_tot"
|
||||
}
|
||||
|
||||
#
|
||||
# make sure that the specified devices have grown by doubling. The
|
||||
# total blocks can be tested exactly but the df reported total needs
|
||||
# some slop to account for reserved blocks and concurrent allocation.
|
||||
#
|
||||
devices_grew() {
|
||||
cur_meta_tot=$(statfs_total meta "$SCR")
|
||||
cur_data_tot=$(statfs_total data "$SCR")
|
||||
cur_meta_df=$(df_free MetaData "$SCR")
|
||||
cur_data_df=$(df_free Data "$SCR")
|
||||
|
||||
local grow_meta_tot=$(echo "$exp_meta_tot * 2" | bc)
|
||||
local grow_data_tot=$(echo "$exp_data_tot * 2" | bc)
|
||||
local grow_meta_df=$(echo "($exp_meta_df * 1.95)/1" | bc)
|
||||
local grow_data_df=$(echo "($exp_data_df * 1.95)/1" | bc)
|
||||
|
||||
if [ "$1" == "meta" ]; then
|
||||
test "$cur_meta_tot" == "$grow_meta_tot" || \
|
||||
t_fail "cur total_meta_blocks $cur_meta_tot != grown $grow_meta_tot"
|
||||
test "$cur_meta_df" -lt "$grow_meta_df" && \
|
||||
t_fail "cur meta df total $cur_meta_df < grown $grow_meta_df"
|
||||
exp_meta_tot=$cur_meta_tot
|
||||
exp_meta_df=$cur_meta_df
|
||||
shift
|
||||
fi
|
||||
|
||||
if [ "$1" == "data" ]; then
|
||||
test "$cur_data_tot" == "$grow_data_tot" || \
|
||||
t_fail "cur total_data_blocks $cur_data_tot != grown $grow_data_tot"
|
||||
test "$cur_data_df" -lt "$grow_data_df" && \
|
||||
t_fail "cur data df total $cur_data_df < grown $grow_data_df"
|
||||
exp_data_tot=$cur_data_tot
|
||||
exp_data_df=$cur_data_df
|
||||
fi
|
||||
}
|
||||
|
||||
# first calculate small mkfs based on device size
|
||||
size_meta=$(blockdev --getsize64 "$T_EX_META_DEV")
|
||||
size_data=$(blockdev --getsize64 "$T_EX_DATA_DEV")
|
||||
quarter_meta=$(echo "$size_meta / 4" | bc)
|
||||
quarter_data=$(echo "$size_data / 4" | bc)
|
||||
|
||||
# XXX this is all pretty manual, would be nice to have helpers
|
||||
echo "== make initial small fs"
|
||||
scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m $quarter_meta -d $quarter_data \
|
||||
"$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
|
||||
t_fail "mkfs failed"
|
||||
SCR="/mnt/scoutfs.enospc"
|
||||
mkdir -p "$SCR"
|
||||
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
|
||||
# then calculate sizes based on blocks that mkfs used
|
||||
quarter_meta=$(echo "$(statfs_total meta "$SCR") * 64 * 1024" | bc)
|
||||
quarter_data=$(echo "$(statfs_total data "$SCR") * 4 * 1024" | bc)
|
||||
whole_meta=$(echo "$quarter_meta * 4" | bc)
|
||||
whole_data=$(echo "$quarter_data * 4" | bc)
|
||||
outsize_meta=$(echo "$whole_meta * 2" | bc)
|
||||
outsize_data=$(echo "$whole_data * 2" | bc)
|
||||
half_meta=$(echo "$whole_meta / 2" | bc)
|
||||
half_data=$(echo "$whole_data / 2" | bc)
|
||||
shrink_meta=$(echo "$quarter_meta / 2" | bc)
|
||||
shrink_data=$(echo "$quarter_data / 2" | bc)
|
||||
|
||||
# and save expected values for checks
|
||||
exp_meta_tot=$(statfs_total meta "$SCR")
|
||||
exp_meta_df=$(df_free MetaData "$SCR")
|
||||
exp_data_tot=$(statfs_total data "$SCR")
|
||||
exp_data_df=$(df_free Data "$SCR")
|
||||
|
||||
echo "== 0s do nothing"
|
||||
scoutfs resize-devices -p "$SCR"
|
||||
scoutfs resize-devices -p "$SCR" -m 0
|
||||
scoutfs resize-devices -p "$SCR" -d 0
|
||||
scoutfs resize-devices -p "$SCR" -m 0 -d 0
|
||||
|
||||
echo "== shrinking fails"
|
||||
scoutfs resize-devices -p "$SCR" -m $shrink_meta
|
||||
scoutfs resize-devices -p "$SCR" -d $shrink_data
|
||||
scoutfs resize-devices -p "$SCR" -m $shrink_meta -d $shrink_data
|
||||
same_totals
|
||||
|
||||
echo "== existing sizes do nothing"
|
||||
scoutfs resize-devices -p "$SCR" -m $quarter_meta
|
||||
scoutfs resize-devices -p "$SCR" -d $quarter_data
|
||||
scoutfs resize-devices -p "$SCR" -m $quarter_meta -d $quarter_data
|
||||
same_totals
|
||||
|
||||
echo "== growing outside device fails"
|
||||
scoutfs resize-devices -p "$SCR" -m $outsize_meta
|
||||
scoutfs resize-devices -p "$SCR" -d $outsize_data
|
||||
scoutfs resize-devices -p "$SCR" -m $outsize_meta -d $outsize_data
|
||||
same_totals
|
||||
|
||||
echo "== resizing meta works"
|
||||
scoutfs resize-devices -p "$SCR" -m $half_meta
|
||||
devices_grew meta
|
||||
|
||||
echo "== resizing data works"
|
||||
scoutfs resize-devices -p "$SCR" -d $half_data
|
||||
devices_grew data
|
||||
|
||||
echo "== shrinking back fails"
|
||||
scoutfs resize-devices -p "$SCR" -m $quarter_meta
|
||||
scoutfs resize-devices -p "$SCR" -m $quarter_data
|
||||
same_totals
|
||||
|
||||
echo "== resizing again does nothing"
|
||||
scoutfs resize-devices -p "$SCR" -m $half_meta
|
||||
scoutfs resize-devices -p "$SCR" -m $half_data
|
||||
same_totals
|
||||
|
||||
echo "== resizing to full works"
|
||||
scoutfs resize-devices -p "$SCR" -m $whole_meta -d $whole_data
|
||||
devices_grew meta data
|
||||
|
||||
echo "== cleanup extra fs"
|
||||
umount "$SCR"
|
||||
rmdir "$SCR"
|
||||
|
||||
t_pass
|
||||
@@ -7,7 +7,7 @@ message_output()
|
||||
|
||||
error_message()
|
||||
{
|
||||
message_output "$@" >> /dev/stderr
|
||||
message_output "$@" >&2
|
||||
}
|
||||
|
||||
error_exit()
|
||||
@@ -18,7 +18,7 @@ error_exit()
|
||||
|
||||
log_message()
|
||||
{
|
||||
message_output "$@" >> /dev/stdout
|
||||
message_output "$@"
|
||||
}
|
||||
|
||||
# restart if we catch hup to re-read the config
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
SCOUTFS_FENCED_DELAY=1
|
||||
SCOUTFS_FENCED_RUN=/usr/libexec/scoutfs-fenced/run/local-force-unmount
|
||||
SCOUTFS_FENCED_RUN_ARGS=""
|
||||
@@ -0,0 +1,11 @@
|
||||
[Unit]
|
||||
Description=ScoutFS fenced
|
||||
|
||||
[Service]
|
||||
Restart=on-failure
|
||||
RestartSec=5s
|
||||
StartLimitBurst=5
|
||||
ExecStart=/usr/libexec/scoutfs-fenced/scoutfs-fenced
|
||||
|
||||
[Install]
|
||||
WantedBy=default.target
|
||||
@@ -103,6 +103,63 @@ Ignore presence of existing data on the data and metadata devices.
|
||||
.PD
|
||||
|
||||
.TP
|
||||
.BI "resize-devices [-p|--path PATH] [-m|--meta-size SIZE] [-d|--data-size SIZE]"
|
||||
.sp
|
||||
Resize the metadata or data devices of a mounted ScoutFS filesystem.
|
||||
.sp
|
||||
ScoutFS metadata has free extent records and fields in the super block
|
||||
that reflect the size of the devices in use. This command sends a
|
||||
request to the server to change the size of the device that can be used
|
||||
by updating free extents and setting the super block fields.
|
||||
.sp
|
||||
The specified sizes are in bytes and are translated into block counts.
|
||||
If the specified sizes are not a multiple of the metadata or data block
|
||||
sizes then a message is output and the resized size is truncated down to
|
||||
the next whole block. Specifying either a size of 0 or the current
|
||||
device size makes no change. The current size of the devices can be
|
||||
seen, in units of their respective block sizes, in the total_meta_blocks
|
||||
and total_data_blocks fields returned by the scoutfs statfs command (via
|
||||
the statfs_more ioctl).
|
||||
.sp
|
||||
Shrinking is not supported. Specifying a smaller size for either device
|
||||
will return an error and neither device will be resized.
|
||||
.sp
|
||||
Specifying a larger size will expand the initial size of the device that
|
||||
will be used. Free space records are added for the expanded region and
|
||||
can be used once the resizing transaction is complete.
|
||||
.sp
|
||||
The resizing action is performed in a transaction on the server. This
|
||||
command will hang until a server is elected and running and can service
|
||||
the reqeust. The server serializes any concurrent requests to resize.
|
||||
.sp
|
||||
The new sizes must fit within the current sizes of the mounted devices.
|
||||
Presumably this command is being performed as part of a larger
|
||||
coordinated resize of the underlying devices. The device must be
|
||||
expanded before ScoutFS can use the larger device and ScoutFS must stop
|
||||
using a region to shrink before it could be removed from the device
|
||||
(which is not currently supported).
|
||||
.sp
|
||||
The resize will be committed by the server before the response is sent
|
||||
to the client. The system can be using the new device size before the
|
||||
result is communicated through the client and this command completes.
|
||||
The client could crash and the server could still have performed the
|
||||
resize.
|
||||
.RS 1.0i
|
||||
.PD 0
|
||||
.TP
|
||||
.sp
|
||||
.B "-p, --path PATH"
|
||||
A path in the mounted ScoutFS filesystem which will have its devices
|
||||
resized.
|
||||
.TP
|
||||
.B "-m, --meta-size SIZE"
|
||||
.B "-d, --data-size SIZE"
|
||||
The new size of the metadata or data device to use, in bytes. Size is given as
|
||||
an integer followed by a units digit: "K", "M", "G", "T", "P", to denote
|
||||
kibibytes, mebibytes, etc.
|
||||
.RE
|
||||
.PD
|
||||
|
||||
.BI "stat FILE [-s|--single-field FIELD-NAME]"
|
||||
.sp
|
||||
Display ScoutFS-specific metadata fields for the given file.
|
||||
|
||||
@@ -56,10 +56,14 @@ install -m 644 -D src/ioctl.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/ioctl.h
|
||||
install -m 644 -D src/format.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/format.h
|
||||
install -m 755 -D fenced/scoutfs-fenced $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/scoutfs-fenced
|
||||
install -m 755 -D fenced/local-force-unmount $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/run/local-force-unmount
|
||||
install -m 644 -D fenced/scoutfs-fenced.service $RPM_BUILD_ROOT%{_unitdir}/scoutfs-fenced.service
|
||||
install -m 644 -D fenced/scoutfs-fenced.conf.example $RPM_BUILD_ROOT%{_sysconfdir}/scoutfs/scoutfs-fenced.conf.example
|
||||
|
||||
%files
|
||||
%defattr(644,root,root,755)
|
||||
%{_mandir}/man*/scoutfs*.gz
|
||||
%{_unitdir}/scoutfs-fenced.service
|
||||
%{_sysconfdir}/scoutfs
|
||||
%defattr(755,root,root,755)
|
||||
%{_sbindir}/scoutfs
|
||||
%{_libexecdir}/scoutfs-fenced
|
||||
|
||||
+2
-6
@@ -241,11 +241,7 @@ static int do_mkfs(struct mkfs_args *args)
|
||||
super->next_ino = cpu_to_le64(SCOUTFS_ROOT_INO + 1);
|
||||
super->seq = cpu_to_le64(1);
|
||||
super->total_meta_blocks = cpu_to_le64(last_meta + 1);
|
||||
super->first_meta_blkno = cpu_to_le64(next_meta);
|
||||
super->last_meta_blkno = cpu_to_le64(last_meta);
|
||||
super->total_data_blocks = cpu_to_le64(last_data - first_data + 1);
|
||||
super->first_data_blkno = cpu_to_le64(first_data);
|
||||
super->last_data_blkno = cpu_to_le64(last_data);
|
||||
super->total_data_blocks = cpu_to_le64(last_data + 1);
|
||||
|
||||
assert(sizeof(args->slots) ==
|
||||
member_sizeof(struct scoutfs_super_block, qconf.slots));
|
||||
@@ -320,7 +316,7 @@ static int do_mkfs(struct mkfs_args *args)
|
||||
blkno = next_meta++;
|
||||
ret = write_alloc_root(meta_fd, fsid, &super->data_alloc, bt,
|
||||
1, blkno, first_data,
|
||||
le64_to_cpu(super->total_data_blocks));
|
||||
last_data - first_data + 1);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
|
||||
+1
-6
@@ -951,8 +951,7 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
|
||||
|
||||
/* XXX these are all in a crazy order */
|
||||
printf(" next_ino %llu seq %llu\n"
|
||||
" total_meta_blocks %llu first_meta_blkno %llu last_meta_blkno %llu\n"
|
||||
" total_data_blocks %llu first_data_blkno %llu last_data_blkno %llu\n"
|
||||
" total_meta_blocks %llu total_data_blocks %llu\n"
|
||||
" meta_alloc[0]: "ALCROOT_F"\n"
|
||||
" meta_alloc[1]: "ALCROOT_F"\n"
|
||||
" data_alloc: "ALCROOT_F"\n"
|
||||
@@ -969,11 +968,7 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
|
||||
le64_to_cpu(super->next_ino),
|
||||
le64_to_cpu(super->seq),
|
||||
le64_to_cpu(super->total_meta_blocks),
|
||||
le64_to_cpu(super->first_meta_blkno),
|
||||
le64_to_cpu(super->last_meta_blkno),
|
||||
le64_to_cpu(super->total_data_blocks),
|
||||
le64_to_cpu(super->first_data_blkno),
|
||||
le64_to_cpu(super->last_data_blkno),
|
||||
ALCROOT_A(&super->meta_alloc[0]),
|
||||
ALCROOT_A(&super->meta_alloc[1]),
|
||||
ALCROOT_A(&super->data_alloc),
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "parse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "ioctl.h"
|
||||
#include "cmd.h"
|
||||
|
||||
struct resize_args {
|
||||
char *path;
|
||||
u64 meta_size;
|
||||
u64 data_size;
|
||||
};
|
||||
|
||||
static int do_resize_devices(struct resize_args *args)
|
||||
{
|
||||
struct scoutfs_ioctl_resize_devices rd;
|
||||
int ret;
|
||||
int fd;
|
||||
|
||||
if (args->meta_size & SCOUTFS_BLOCK_LG_MASK) {
|
||||
printf("metadata device size %llu is not a multiple of %u metadata block size, truncating down to %llu byte size\n",
|
||||
args->meta_size, SCOUTFS_BLOCK_LG_SIZE,
|
||||
args->meta_size & ~(u64)SCOUTFS_BLOCK_LG_MASK);
|
||||
}
|
||||
|
||||
if (args->data_size & SCOUTFS_BLOCK_SM_MASK) {
|
||||
printf("data device size %llu is not a multiple of %u data block size, truncating down to %llu byte size\n",
|
||||
args->data_size, SCOUTFS_BLOCK_SM_SIZE,
|
||||
args->data_size & ~(u64)SCOUTFS_BLOCK_SM_MASK);
|
||||
}
|
||||
|
||||
fd = get_path(args->path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
rd.new_total_meta_blocks = args->meta_size >> SCOUTFS_BLOCK_LG_SHIFT;
|
||||
rd.new_total_data_blocks = args->data_size >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
|
||||
ret = ioctl(fd, SCOUTFS_IOC_RESIZE_DEVICES, &rd);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "resize_devices ioctl failed: %s (%d)\n", strerror(errno), errno);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
return ret;
|
||||
};
|
||||
|
||||
static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct resize_args *args = state->input;
|
||||
int ret;
|
||||
|
||||
switch (key) {
|
||||
case 'm': /* meta-size */
|
||||
{
|
||||
ret = parse_human(arg, &args->meta_size);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
}
|
||||
case 'd': /* data-size */
|
||||
{
|
||||
ret = parse_human(arg, &args->data_size);
|
||||
if (ret)
|
||||
return ret;
|
||||
break;
|
||||
}
|
||||
case 'p':
|
||||
args->path = strdup_or_error(state, arg);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{ "path", 'p', "PATH", 0, "Path to ScoutFS filesystem"},
|
||||
{ "meta-size", 'm', "SIZE", 0, "New metadata device size (bytes or KMGTP units)"},
|
||||
{ "data-size", 'd', "SIZE", 0, "New data device size (bytes or KMGTP units)"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp argp = {
|
||||
options,
|
||||
parse_opt,
|
||||
"",
|
||||
"Online resize of metadata and/or data devices",
|
||||
};
|
||||
|
||||
static int resize_devices_cmd(int argc, char **argv)
|
||||
{
|
||||
|
||||
struct resize_args resize_args = {NULL,};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&argp, argc, argv, 0, NULL, &resize_args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_resize_devices(&resize_args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) read_xattr_totals_ctor(void)
|
||||
{
|
||||
cmd_register_argp("resize-devices", &argp, GROUP_CORE, resize_devices_cmd);
|
||||
}
|
||||
@@ -37,7 +37,6 @@ static struct stat_more_field inode_fields[] = {
|
||||
INODE_FIELD(data_version),
|
||||
INODE_FIELD(online_blocks),
|
||||
INODE_FIELD(offline_blocks),
|
||||
{ .name = "crtime", .offset = INODE_FIELD_OFF(crtime_sec) },
|
||||
{ NULL, }
|
||||
};
|
||||
|
||||
@@ -61,9 +60,6 @@ static void print_inode_field(void *st, size_t off)
|
||||
case INODE_FIELD_OFF(offline_blocks):
|
||||
printf("%llu", stm->offline_blocks);
|
||||
break;
|
||||
case INODE_FIELD_OFF(crtime_sec):
|
||||
printf("%llu.%09u", stm->crtime_sec, stm->crtime_nsec);
|
||||
break;
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user