Compare commits

..

3 Commits

Author SHA1 Message Date
Auke Kok
4e2c1e83be Optionally print out xattr values
We cannot validate that totl keys have the correct count/value without
also extracting and printing the value for xattrs, which by default
is omitted (a sane default). Add a default-disabled --xattr-values/-V
flag to enable printing these out.

Because xattr values can span multiple items, this only will print
out the first one, and ellipsize it if it continues elsewhere. It is
filtered through isprint() to avoid printing non-printable characters.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-04-16 11:26:29 -07:00
Auke Kok
8c4e9bfa3e Add print filters for remaining types.
Chris already added print filters for most common types, but
omitted totl, indx, orphan, quota, and inode_index items. This
adds those as well, completing the set.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-04-16 11:01:47 -07:00
Chris Kirby
91638191de Add finer grained options to scoutfs print
The default output from scoutfs print can be very large, even
when using the -S option. Add three new command line options
to allow more targeted selection of btrees and their items.

--allocs prints the metadata and data allocators
--roots allows the selection of btree roots to walk (logs, srch, fs)
--items allows the selection of items to print from the selected btrees

Signed-off-by: Chris Kirby <ckirby@versity.com>
2025-11-21 10:39:58 -06:00
53 changed files with 816 additions and 2299 deletions

View File

@@ -1,66 +1,6 @@
Versity ScoutFS Release Notes
=============================
---
v1.29
\
*Mar 25, 2026*
Add a repair mechanism for mount logs that weren't properly resolved as
mounts left the cluster. The presence of these logs prevents log
merging from making forward progress and the backlog of logs over time
can cause operations to slow to a crawl. With the repair mechanism in
place the orphaned logs don't stop merging and operations proceed as
usual.
Add an ioctl for turning offline unmapped file regions into sparse
regions.
---
v1.28
\
*Feb 5, 2026*
Fix a bug that lead to incorrect negative caching of ACL entries
starting in version 9.6 of distribution kernels in the enterprise linux
family. This would manifest as ACLs seemingly disappearing,
particularly default ACLs on directories. The persistent ACLs always
existed but because of internal API incompatibility some readers
couldn't see them and would cache that they didn't exist.
---
v1.27
\
*Jan 15, 2026*
Switch away from using the general VM cache reclaim machinery to reduce
idle cluster locks in the client. The VM treated locks like a cache and
let many accumulate, presuming that it would be efficient to free them
in batches. Lock freeing requires network communication so this could
result in enormous backlogs in network messages (on the order of
hundreds of thousands) and could result in signifcant delays of other
network messaging.
Fix inefficient network receive processing while many messages are in
the send queue. This consumed sufficient CPU to cause significant
stalls, perhaps resulting in hung task warning messages due to delayed
lock message delivery.
Fix a server livelock case that could happen while committing client
transactions that contain a large amount of freed file data extents.
This would present as client tasks hanging and a server task spinning
consuming cpu.
Fix a rare server request processing failure that doesn't deal with
retransmission of a request that a previous server partially processed.
This would present as hung client tasks and repeated "error -2
committing log merge: getting merge status item" kernel messages.
Fix an unneccessary server shutdown during specific circumstances in
client lock recovery. The shutdown was due to server state and was
ultimately harmless. The next server that started up would proceed
accordingly.
---
v1.26
\

View File

@@ -479,20 +479,10 @@ ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h
ccflags-y += -DKC_STACK_TRACE_SAVE
endif
# v6.1-rc1-4-g7420332a6ff4
#
# v6.1-rc1-2-g138060ba92b3
#
# set_acl now passed a struct dentry instead of inode.
#
ifneq (,$(shell grep 'int ..set_acl.*struct dentry' include/linux/fs.h))
ccflags-y += -DKC_SET_ACL_DENTRY
endif
#
# v6.1-rc1-3-gcac2f8b8d8b5
#
# get_acl renamed to get_inode_acl.
#
ifneq (,$(shell grep 'struct posix_acl.*get_inode_acl' include/linux/fs.h))
ccflags-y += -DKC_GET_INODE_ACL
# .get_acl() method now has dentry arg (and mnt_idmap). The old get_acl has been renamed
# to get_inode_acl() and is still available as well, but has an extra rcu param.
ifneq (,$(shell grep 'struct posix_acl ...get_acl..struct mnt_idmap ., struct dentry' include/linux/fs.h))
ccflags-y += -DKC_GET_ACL_DENTRY
endif

View File

@@ -107,22 +107,20 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
return acl;
}
#ifdef KC_GET_INODE_ACL
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu)
#ifdef KC_GET_ACL_DENTRY
struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF
struct dentry *dentry, int type)
{
struct inode *inode = dentry->d_inode;
#else
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
#endif
{
#endif
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *lock = NULL;
struct posix_acl *acl;
int ret;
#ifdef KC_GET_INODE_ACL
if (rcu)
return ERR_PTR(-ECHILD);
#endif
#ifndef KC___POSIX_ACL_CREATE
if (!IS_POSIXACL(inode))
return NULL;
@@ -210,7 +208,7 @@ out:
return ret;
}
#ifdef KC_SET_ACL_DENTRY
#ifdef KC_GET_ACL_DENTRY
int scoutfs_set_acl(KC_VFS_NS_DEF
struct dentry *dentry, struct posix_acl *acl, int type)
{
@@ -256,8 +254,9 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
if (!IS_POSIXACL(dentry->d_inode))
return -EOPNOTSUPP;
#ifdef KC_GET_INODE_ACL
acl = scoutfs_get_acl(dentry->d_inode, type, false);
#ifdef KC_GET_ACL_DENTRY
acl = scoutfs_get_acl(KC_VFS_INIT_NS
dentry, type);
#else
acl = scoutfs_get_acl(dentry->d_inode, type);
#endif
@@ -306,7 +305,7 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
}
}
#ifdef KC_SET_ACL_DENTRY
#ifdef KC_GET_ACL_DENTRY
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
#else
ret = scoutfs_set_acl(dentry->d_inode, acl, type);

View File

@@ -1,16 +1,12 @@
#ifndef _SCOUTFS_ACL_H_
#define _SCOUTFS_ACL_H_
#ifdef KC_SET_ACL_DENTRY
int scoutfs_set_acl(KC_VFS_NS_DEF
struct dentry *dentry, struct posix_acl *acl, int type);
#else
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#endif
#ifdef KC_GET_INODE_ACL
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu);
#ifdef KC_GET_ACL_DENTRY
struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF struct dentry *dentry, int type);
int scoutfs_set_acl(KC_VFS_NS_DEF struct dentry *dentry, struct posix_acl *acl, int type);
#else
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#endif
struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,

View File

@@ -125,6 +125,7 @@
EXPAND_COUNTER(item_update) \
EXPAND_COUNTER(item_write_dirty) \
EXPAND_COUNTER(lock_alloc) \
EXPAND_COUNTER(lock_count_objects) \
EXPAND_COUNTER(lock_free) \
EXPAND_COUNTER(lock_grant_request) \
EXPAND_COUNTER(lock_grant_response) \
@@ -138,13 +139,13 @@
EXPAND_COUNTER(lock_lock_error) \
EXPAND_COUNTER(lock_nonblock_eagain) \
EXPAND_COUNTER(lock_recover_request) \
EXPAND_COUNTER(lock_scan_objects) \
EXPAND_COUNTER(lock_shrink_attempted) \
EXPAND_COUNTER(lock_shrink_request_failed) \
EXPAND_COUNTER(lock_shrink_aborted) \
EXPAND_COUNTER(lock_shrink_work) \
EXPAND_COUNTER(lock_unlock) \
EXPAND_COUNTER(lock_wait) \
EXPAND_COUNTER(log_merge_complete) \
EXPAND_COUNTER(log_merge_no_finalized) \
EXPAND_COUNTER(log_merge_start) \
EXPAND_COUNTER(log_merge_wait_timeout) \
EXPAND_COUNTER(net_dropped_response) \
EXPAND_COUNTER(net_send_bytes) \
@@ -159,7 +160,6 @@
EXPAND_COUNTER(orphan_scan) \
EXPAND_COUNTER(orphan_scan_attempts) \
EXPAND_COUNTER(orphan_scan_cached) \
EXPAND_COUNTER(orphan_scan_empty) \
EXPAND_COUNTER(orphan_scan_error) \
EXPAND_COUNTER(orphan_scan_item) \
EXPAND_COUNTER(orphan_scan_omap_set) \

View File

@@ -79,10 +79,8 @@ static void item_from_extent(struct scoutfs_key *key,
.skdx_end = cpu_to_le64(start + len - 1),
.skdx_len = cpu_to_le64(len),
};
*dv = (struct scoutfs_data_extent_val) {
.blkno = cpu_to_le64(map),
.flags = flags,
};
dv->blkno = cpu_to_le64(map);
dv->flags = flags;
}
static void ext_from_item(struct scoutfs_extent *ext,
@@ -1517,101 +1515,6 @@ out:
return ret;
}
/*
* Punch holes in offline extents. This is a very specific tool that
* only does one job: it converts extents from offline to sparse. It
* returns an error if it encounters an extent that isn't offline or has
* a block mapping. It ignores i_size completely; it does not test it,
* and does not update it.
*
* The caller has the inode locked in the vfs and performed basic sanity
* checks. We manage transactions and the extent_sem which is ordered
* inside the transaction.
*/
int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version,
struct scoutfs_lock *lock)
{
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct data_ext_args args = {
.ino = scoutfs_ino(inode),
.inode = inode,
.lock = lock,
};
struct scoutfs_extent ext;
LIST_HEAD(ind_locks);
int ret;
int i;
if (WARN_ON_ONCE(iblock > last)) {
ret = -EINVAL;
goto out;
}
/* idiomatic to call start,last with 0,~0, clamp last to last possible */
last = min(last, SCOUTFS_BLOCK_SM_MAX);
ret = 0;
while (iblock <= last) {
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false) ?:
scoutfs_dirty_inode_item(inode, lock);
if (ret < 0)
break;
down_write(&si->extent_sem);
for (i = 0; i < 32 && (iblock <= last); i++) {
ret = scoutfs_ext_next(sb, &data_ext_ops, &args, iblock, 1, &ext);
if (ret == -ENOENT) {
iblock = last + 1;
ret = 0;
break;
}
if (ret < 0)
break;
if (ext.start > last) {
iblock = last + 1;
break;
}
if (ext.map) {
ret = -EINVAL;
break;
}
if (ext.flags & SEF_OFFLINE) {
if (iblock > ext.start) {
ext.len -= iblock - ext.start;
ext.start = iblock;
}
ext.len = min(ext.len, last - ext.start + 1);
ext.flags &= ~SEF_OFFLINE;
ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
ext.start, ext.len, ext.map, ext.flags);
if (ret < 0)
break;
}
iblock = ext.start + ext.len;
}
up_write(&si->extent_sem);
scoutfs_update_inode_item(inode, lock, &ind_locks);
scoutfs_release_trans(sb);
scoutfs_inode_index_unlock(sb, &ind_locks);
if (ret < 0)
break;
}
out:
return ret;
}
/*
* This copies to userspace :/
*/

View File

@@ -57,8 +57,6 @@ int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
u64 byte_len, struct inode *to, u64 to_off, bool to_stage,
u64 data_version);
int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version,
struct scoutfs_lock *lock);
int scoutfs_data_wait_check(struct inode *inode, loff_t pos, loff_t len,
u8 sef, u8 op, struct scoutfs_data_wait *ow,

View File

@@ -587,12 +587,10 @@ static int add_entry_items(struct super_block *sb, u64 dir_ino, u64 hash,
}
/* initialize the dent */
*dent = (struct scoutfs_dirent) {
.ino = cpu_to_le64(ino),
.hash = cpu_to_le64(hash),
.pos = cpu_to_le64(pos),
.type = mode_to_type(mode),
};
dent->ino = cpu_to_le64(ino);
dent->hash = cpu_to_le64(hash);
dent->pos = cpu_to_le64(pos);
dent->type = mode_to_type(mode);
memcpy(dent->name, name, name_len);
init_dirent_key(&ent_key, SCOUTFS_DIRENT_TYPE, dir_ino, hash, pos);
@@ -2008,11 +2006,7 @@ const struct inode_operations scoutfs_symlink_iops = {
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.removexattr = generic_removexattr,
#endif
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.tmpfile = scoutfs_tmpfile,
.rename = scoutfs_rename_common,
@@ -2058,12 +2052,8 @@ const struct inode_operations scoutfs_dir_iops = {
.removexattr = generic_removexattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_SET_ACL_DENTRY
#ifdef KC_GET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
.symlink = scoutfs_symlink,

View File

@@ -793,7 +793,7 @@ out:
if (ret)
scoutfs_forest_destroy(sb);
return ret;
return 0;
}
void scoutfs_forest_start(struct super_block *sb)

View File

@@ -149,12 +149,8 @@ static const struct inode_operations scoutfs_file_iops = {
.removexattr = generic_removexattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_SET_ACL_DENTRY
#ifdef KC_GET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
.fiemap = scoutfs_data_fiemap,
@@ -169,12 +165,8 @@ static const struct inode_operations scoutfs_special_iops = {
.removexattr = generic_removexattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_SET_ACL_DENTRY
#ifdef KC_GET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
};
@@ -1645,14 +1637,10 @@ int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_
struct scoutfs_lock *primary)
{
struct scoutfs_key key;
int ret;
init_orphan_key(&key, ino);
ret = scoutfs_item_delete_force(sb, &key, lock, primary);
trace_scoutfs_inode_orphan_delete(sb, ino, ret);
return ret;
return scoutfs_item_delete_force(sb, &key, lock, primary);
}
/*
@@ -1734,8 +1722,6 @@ out:
scoutfs_release_trans(sb);
scoutfs_inode_index_unlock(sb, &ind_locks);
trace_scoutfs_delete_inode_end(sb, ino, mode, size, ret);
return ret;
}
@@ -1831,9 +1817,6 @@ out:
* they've checked that the inode could really be deleted. We serialize
* on a bit in the lock data so that we only have one deletion attempt
* per inode under this mount's cluster lock.
*
* Returns -EAGAIN if we either did some cleanup work or are unable to finish
* cleaning up this inode right now.
*/
static int try_delete_inode_items(struct super_block *sb, u64 ino)
{
@@ -1847,8 +1830,6 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
int bit_nr;
int ret;
trace_scoutfs_try_delete(sb, ino);
ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_WRITE, 0, ino, &lock);
if (ret < 0)
goto out;
@@ -1861,32 +1842,27 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
/* only one local attempt per inode at a time */
if (test_and_set_bit(bit_nr, ldata->trying)) {
trace_scoutfs_try_delete_local_busy(sb, ino);
ret = -EAGAIN;
ret = 0;
goto out;
}
clear_trying = true;
/* can't delete if it's cached in local or remote mounts */
if (scoutfs_omap_test(sb, ino) || test_bit_le(bit_nr, ldata->map.bits)) {
trace_scoutfs_try_delete_cached(sb, ino);
ret = -EAGAIN;
ret = 0;
goto out;
}
scoutfs_inode_init_key(&key, ino);
ret = lookup_inode_item(sb, &key, &sinode, lock);
if (ret < 0) {
if (ret == -ENOENT) {
trace_scoutfs_try_delete_no_item(sb, ino);
if (ret == -ENOENT)
ret = 0;
}
goto out;
}
if (le32_to_cpu(sinode.nlink) > 0) {
trace_scoutfs_try_delete_has_links(sb, ino, le32_to_cpu(sinode.nlink));
ret = -EAGAIN;
ret = 0;
goto out;
}
@@ -1895,10 +1871,8 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
goto out;
ret = delete_inode_items(sb, ino, &sinode, lock, orph_lock);
if (ret == 0) {
ret = -EAGAIN;
if (ret == 0)
scoutfs_inc_counter(sb, inode_deleted);
}
out:
if (clear_trying)
@@ -2100,10 +2074,6 @@ void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb)
* a locally cached inode. Then we ask the server for the open map
* containing the inode. Only if we don't see any cached users do we do
* the expensive work of acquiring locks to try and delete the items.
*
* We need to track whether there is any orphan cleanup work remaining so
* that tests such as inode-deletion can watch the orphan_scan_empty counter
* to determine when inode cleanup from open-unlink scenarios is complete.
*/
static void inode_orphan_scan_worker(struct work_struct *work)
{
@@ -2115,14 +2085,11 @@ static void inode_orphan_scan_worker(struct work_struct *work)
SCOUTFS_BTREE_ITEM_REF(iref);
struct scoutfs_key last;
struct scoutfs_key key;
bool work_todo = false;
u64 group_nr;
int bit_nr;
u64 ino;
int ret;
trace_scoutfs_orphan_scan_start(sb);
scoutfs_inc_counter(sb, orphan_scan);
init_orphan_key(&last, U64_MAX);
@@ -2142,10 +2109,8 @@ static void inode_orphan_scan_worker(struct work_struct *work)
init_orphan_key(&key, ino);
ret = scoutfs_btree_next(sb, &roots.fs_root, &key, &iref);
if (ret < 0) {
if (ret == -ENOENT) {
trace_scoutfs_orphan_scan_work(sb, 0);
if (ret == -ENOENT)
break;
}
goto out;
}
@@ -2160,7 +2125,6 @@ static void inode_orphan_scan_worker(struct work_struct *work)
/* locally cached inodes will try to delete as they evict */
if (scoutfs_omap_test(sb, ino)) {
work_todo = true;
scoutfs_inc_counter(sb, orphan_scan_cached);
continue;
}
@@ -2176,22 +2140,13 @@ static void inode_orphan_scan_worker(struct work_struct *work)
/* remote cached inodes will also try to delete */
if (test_bit_le(bit_nr, omap.bits)) {
work_todo = true;
scoutfs_inc_counter(sb, orphan_scan_omap_set);
continue;
}
/* seemingly orphaned and unused, get locks and check for sure */
scoutfs_inc_counter(sb, orphan_scan_attempts);
trace_scoutfs_orphan_scan_work(sb, ino);
ret = try_delete_inode_items(sb, ino);
if (ret == -EAGAIN) {
work_todo = true;
ret = 0;
}
trace_scoutfs_orphan_scan_end(sb, ino, ret);
}
ret = 0;
@@ -2200,11 +2155,6 @@ out:
if (ret < 0)
scoutfs_inc_counter(sb, orphan_scan_error);
if (!work_todo)
scoutfs_inc_counter(sb, orphan_scan_empty);
trace_scoutfs_orphan_scan_stop(sb, work_todo);
scoutfs_inode_schedule_orphan_dwork(sb);
}

View File

@@ -415,6 +415,8 @@ static long scoutfs_ioc_data_wait_err(struct file *file, unsigned long arg)
return 0;
if ((args.op & SCOUTFS_IOC_DWO_UNKNOWN) || !IS_ERR_VALUE(args.err))
return -EINVAL;
if ((args.op & SCOUTFS_IOC_DWO_UNKNOWN) || !IS_ERR_VALUE(args.err))
return -EINVAL;
trace_scoutfs_ioc_data_wait_err(sb, &args);
@@ -1667,78 +1669,6 @@ out:
return ret;
}
static long scoutfs_ioc_punch_offline(struct file *file, unsigned long arg)
{
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct scoutfs_ioctl_punch_offline __user *upo = (void __user *)arg;
struct scoutfs_ioctl_punch_offline po;
struct scoutfs_lock *lock = NULL;
u64 iblock;
u64 last;
u64 tmp;
int ret;
if (copy_from_user(&po, upo, sizeof(po)))
return -EFAULT;
if (po.len == 0)
return 0;
if (check_add_overflow(po.offset, po.len - 1, &tmp) ||
(po.offset & SCOUTFS_BLOCK_SM_MASK) ||
(po.len & SCOUTFS_BLOCK_SM_MASK))
return -EOVERFLOW;
if (po.flags)
return -EINVAL;
ret = mnt_want_write_file(file);
if (ret < 0)
return ret;
inode_lock(inode);
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
if (ret)
goto out;
if (!S_ISREG(inode->i_mode)) {
ret = -EINVAL;
goto out;
}
if (!(file->f_mode & FMODE_WRITE)) {
ret = -EINVAL;
goto out;
}
ret = inode_permission(KC_VFS_INIT_NS inode, MAY_WRITE);
if (ret < 0)
goto out;
if (scoutfs_inode_data_version(inode) != po.data_version) {
ret = -ESTALE;
goto out;
}
if ((ret = scoutfs_inode_check_retention(inode)))
goto out;
iblock = po.offset >> SCOUTFS_BLOCK_SM_SHIFT;
last = (po.offset + po.len - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
ret = scoutfs_data_punch_offline(inode, iblock, last, po.data_version, lock);
out:
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
inode_unlock(inode);
mnt_drop_write_file(file);
return ret;
}
long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
@@ -1788,8 +1718,6 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return scoutfs_ioc_mod_quota_rule(file, arg, false);
case SCOUTFS_IOC_READ_XATTR_INDEX:
return scoutfs_ioc_read_xattr_index(file, arg);
case SCOUTFS_IOC_PUNCH_OFFLINE:
return scoutfs_ioc_punch_offline(file, arg);
}
return -ENOTTY;

View File

@@ -848,32 +848,4 @@ struct scoutfs_ioctl_read_xattr_index {
#define SCOUTFS_IOC_READ_XATTR_INDEX \
_IOR(SCOUTFS_IOCTL_MAGIC, 23, struct scoutfs_ioctl_read_xattr_index)
/*
* This is a limited and specific version of hole punching. It's an
* archive layer operation that only converts unmapped offline extents
* into sparse extents. It is intended to be used when restoring sparse
* files after the initial creation set the entire file size offline.
*
* The offset and len fields are in units of bytes and must be aligned
* to the small (4KiB) block size. All regions of offline extents
* covered by the region will be converted into sparse online extents,
* including regions that straddle the boundaries of the region. Any
* existing sparse extents in the region are ignored.
*
* The data_version must match the inode or EINVAL is returned. The
* data_version is not modified by this operation.
*
* EINVAL is returned if any mapped extents are found in the region. If
* an error is returned then partial progress may have been made.
*/
struct scoutfs_ioctl_punch_offline {
__u64 offset;
__u64 len;
__u64 data_version;
__u64 flags;
};
#define SCOUTFS_IOC_PUNCH_OFFLINE \
_IOW(SCOUTFS_IOCTL_MAGIC, 24, struct scoutfs_ioctl_punch_offline)
#endif

View File

@@ -53,10 +53,8 @@
* all access to the lock (by revoking it down to a null mode) then the
* lock is freed.
*
* Each client has a configurable number of locks that are allowed to
* remain idle after being granted, for use by future tasks. Past the
* limit locks are freed by requesting a null mode from the server,
* governed by a LRU.
* Memory pressure on the client can cause the client to request a null
* mode from the server so that once its granted the lock can be freed.
*
* So far we've only needed a minimal trylock. We return -EAGAIN if a
* lock attempt can't immediately match an existing granted lock. This
@@ -81,11 +79,14 @@ struct lock_info {
bool unmounting;
struct rb_root lock_tree;
struct rb_root lock_range_tree;
u64 nr_locks;
KC_DEFINE_SHRINKER(shrinker);
struct list_head lru_list;
unsigned long long lru_nr;
struct workqueue_struct *workq;
struct work_struct inv_work;
struct list_head inv_list;
struct work_struct shrink_work;
struct list_head shrink_list;
atomic64_t next_refresh_gen;
struct dentry *tseq_dentry;
@@ -248,6 +249,7 @@ static void lock_free(struct lock_info *linfo, struct scoutfs_lock *lock)
BUG_ON(!RB_EMPTY_NODE(&lock->range_node));
BUG_ON(!list_empty(&lock->lru_head));
BUG_ON(!list_empty(&lock->inv_head));
BUG_ON(!list_empty(&lock->shrink_head));
BUG_ON(!list_empty(&lock->cov_list));
kfree(lock->inode_deletion_data);
@@ -275,6 +277,7 @@ static struct scoutfs_lock *lock_alloc(struct super_block *sb,
INIT_LIST_HEAD(&lock->lru_head);
INIT_LIST_HEAD(&lock->inv_head);
INIT_LIST_HEAD(&lock->inv_list);
INIT_LIST_HEAD(&lock->shrink_head);
spin_lock_init(&lock->cov_list_lock);
INIT_LIST_HEAD(&lock->cov_list);
@@ -407,7 +410,6 @@ static bool lock_insert(struct super_block *sb, struct scoutfs_lock *ins)
rb_link_node(&ins->node, parent, node);
rb_insert_color(&ins->node, &linfo->lock_tree);
linfo->nr_locks++;
scoutfs_tseq_add(&linfo->tseq_tree, &ins->tseq_entry);
return true;
@@ -422,7 +424,6 @@ static void lock_remove(struct lock_info *linfo, struct scoutfs_lock *lock)
rb_erase(&lock->range_node, &linfo->lock_range_tree);
RB_CLEAR_NODE(&lock->range_node);
linfo->nr_locks--;
scoutfs_tseq_del(&linfo->tseq_tree, &lock->tseq_entry);
}
@@ -462,8 +463,10 @@ static void __lock_del_lru(struct lock_info *linfo, struct scoutfs_lock *lock)
{
assert_spin_locked(&linfo->lock);
if (!list_empty(&lock->lru_head))
if (!list_empty(&lock->lru_head)) {
list_del_init(&lock->lru_head);
linfo->lru_nr--;
}
}
/*
@@ -522,16 +525,14 @@ static struct scoutfs_lock *create_lock(struct super_block *sb,
* indicate that the lock wasn't idle. If it really is idle then we
* either free it if it's null or put it back on the lru.
*/
static void __put_lock(struct lock_info *linfo, struct scoutfs_lock *lock, bool tail)
static void put_lock(struct lock_info *linfo,struct scoutfs_lock *lock)
{
assert_spin_locked(&linfo->lock);
if (lock_idle(lock)) {
if (lock->mode != SCOUTFS_LOCK_NULL) {
if (tail)
list_add_tail(&lock->lru_head, &linfo->lru_list);
else
list_add(&lock->lru_head, &linfo->lru_list);
list_add_tail(&lock->lru_head, &linfo->lru_list);
linfo->lru_nr++;
} else {
lock_remove(linfo, lock);
lock_free(linfo, lock);
@@ -539,11 +540,6 @@ static void __put_lock(struct lock_info *linfo, struct scoutfs_lock *lock, bool
}
}
static inline void put_lock(struct lock_info *linfo, struct scoutfs_lock *lock)
{
__put_lock(linfo, lock, true);
}
/*
* The caller has made a change (set a lock mode) which can let one of the
* invalidating locks make forward progress.
@@ -717,14 +713,14 @@ static void lock_invalidate_worker(struct work_struct *work)
/* only lock protocol, inv can't call subsystems after shutdown */
if (!linfo->shutdown) {
ret = lock_invalidate(sb, lock, nl->old_mode, nl->new_mode);
BUG_ON(ret < 0 && ret != -ENOLINK);
BUG_ON(ret);
}
/* respond with the key and modes from the request, server might have died */
ret = scoutfs_client_lock_response(sb, ireq->net_id, nl);
if (ret == -ENOTCONN)
ret = 0;
BUG_ON(ret < 0 && ret != -ENOLINK);
BUG_ON(ret);
scoutfs_inc_counter(sb, lock_invalidate_response);
}
@@ -879,69 +875,6 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
return ret;
}
/*
* This is called on every _lock call to try and keep the number of
* locks under the idle count. We're intentionally trying to throttle
* shrinking bursts by tying its frequency to lock use. It will only
* send requests to free unused locks, though, so it's always possible
* to exceed the high water mark under heavy load.
*
* We send a null request and the lock will be freed by the response
* once all users drain. If this races with invalidation then the
* server will only send the grant response once the invalidation is
* finished.
*/
static bool try_shrink_lock(struct super_block *sb, struct lock_info *linfo, bool force)
{
struct scoutfs_mount_options opts;
struct scoutfs_lock *lock = NULL;
struct scoutfs_net_lock nl;
int ret = 0;
scoutfs_options_read(sb, &opts);
/* avoiding lock contention with unsynchronized test, don't mind temp false results */
if (!force && (list_empty(&linfo->lru_list) ||
READ_ONCE(linfo->nr_locks) <= opts.lock_idle_count))
return false;
spin_lock(&linfo->lock);
lock = list_first_entry_or_null(&linfo->lru_list, struct scoutfs_lock, lru_head);
if (lock && (force || (linfo->nr_locks > opts.lock_idle_count))) {
__lock_del_lru(linfo, lock);
lock->request_pending = 1;
nl.key = lock->start;
nl.old_mode = lock->mode;
nl.new_mode = SCOUTFS_LOCK_NULL;
} else {
lock = NULL;
}
spin_unlock(&linfo->lock);
if (lock) {
ret = scoutfs_client_lock_request(sb, &nl);
if (ret < 0) {
scoutfs_inc_counter(sb, lock_shrink_request_failed);
spin_lock(&linfo->lock);
lock->request_pending = 0;
wake_up(&lock->waitq);
__put_lock(linfo, lock, false);
spin_unlock(&linfo->lock);
} else {
scoutfs_inc_counter(sb, lock_shrink_attempted);
trace_scoutfs_lock_shrink(sb, lock);
}
}
return lock && ret == 0;
}
static bool lock_wait_cond(struct super_block *sb, struct scoutfs_lock *lock,
enum scoutfs_lock_mode mode)
{
@@ -1004,8 +937,6 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
if (WARN_ON_ONCE(scoutfs_trans_held()))
return -EDEADLK;
try_shrink_lock(sb, linfo, false);
spin_lock(&linfo->lock);
/* drops and re-acquires lock if it allocates */
@@ -1449,12 +1380,134 @@ bool scoutfs_lock_protected(struct scoutfs_lock *lock, struct scoutfs_key *key,
&lock->start, &lock->end) == 0;
}
/*
* The shrink callback got the lock, marked it request_pending, and put
* it on the shrink list. We send a null request and the lock will be
* freed by the response once all users drain. If this races with
* invalidation then the server will only send the grant response once
* the invalidation is finished.
*/
static void lock_shrink_worker(struct work_struct *work)
{
struct lock_info *linfo = container_of(work, struct lock_info,
shrink_work);
struct super_block *sb = linfo->sb;
struct scoutfs_net_lock nl;
struct scoutfs_lock *lock;
struct scoutfs_lock *tmp;
LIST_HEAD(list);
int ret;
scoutfs_inc_counter(sb, lock_shrink_work);
spin_lock(&linfo->lock);
list_splice_init(&linfo->shrink_list, &list);
spin_unlock(&linfo->lock);
list_for_each_entry_safe(lock, tmp, &list, shrink_head) {
list_del_init(&lock->shrink_head);
/* unlocked lock access, but should be stable since we queued */
nl.key = lock->start;
nl.old_mode = lock->mode;
nl.new_mode = SCOUTFS_LOCK_NULL;
ret = scoutfs_client_lock_request(sb, &nl);
if (ret) {
/* oh well, not freeing */
scoutfs_inc_counter(sb, lock_shrink_aborted);
spin_lock(&linfo->lock);
lock->request_pending = 0;
wake_up(&lock->waitq);
put_lock(linfo, lock);
spin_unlock(&linfo->lock);
}
}
}
static unsigned long lock_count_objects(struct shrinker *shrink,
struct shrink_control *sc)
{
struct lock_info *linfo = KC_SHRINKER_CONTAINER_OF(shrink, struct lock_info);
struct super_block *sb = linfo->sb;
scoutfs_inc_counter(sb, lock_count_objects);
return shrinker_min_long(linfo->lru_nr);
}
/*
* Start the shrinking process for locks on the lru. If a lock is on
* the lru then it can't have any active users. We don't want to block
* or allocate here so all we do is get the lock, mark it request
* pending, and kick off the work. The work sends a null request and
* eventually the lock is freed by its response.
*
* Only a racing lock attempt that isn't matched can prevent the lock
* from being freed. It'll block waiting to send its request for its
* mode which will prevent the lock from being freed when the null
* response arrives.
*/
static unsigned long lock_scan_objects(struct shrinker *shrink,
struct shrink_control *sc)
{
struct lock_info *linfo = KC_SHRINKER_CONTAINER_OF(shrink, struct lock_info);
struct super_block *sb = linfo->sb;
struct scoutfs_lock *lock;
struct scoutfs_lock *tmp;
unsigned long freed = 0;
unsigned long nr = sc->nr_to_scan;
bool added = false;
scoutfs_inc_counter(sb, lock_scan_objects);
spin_lock(&linfo->lock);
restart:
list_for_each_entry_safe(lock, tmp, &linfo->lru_list, lru_head) {
BUG_ON(!lock_idle(lock));
BUG_ON(lock->mode == SCOUTFS_LOCK_NULL);
BUG_ON(!list_empty(&lock->shrink_head));
if (nr-- == 0)
break;
__lock_del_lru(linfo, lock);
lock->request_pending = 1;
list_add_tail(&lock->shrink_head, &linfo->shrink_list);
added = true;
freed++;
scoutfs_inc_counter(sb, lock_shrink_attempted);
trace_scoutfs_lock_shrink(sb, lock);
/* could have bazillions of idle locks */
if (cond_resched_lock(&linfo->lock))
goto restart;
}
spin_unlock(&linfo->lock);
if (added)
queue_work(linfo->workq, &linfo->shrink_work);
trace_scoutfs_lock_shrink_exit(sb, sc->nr_to_scan, freed);
return freed;
}
void scoutfs_free_unused_locks(struct super_block *sb)
{
DECLARE_LOCK_INFO(sb, linfo);
struct lock_info *linfo = SCOUTFS_SB(sb)->lock_info;
struct shrink_control sc = {
.gfp_mask = GFP_NOFS,
.nr_to_scan = INT_MAX,
};
while (try_shrink_lock(sb, linfo, true))
cond_resched();
lock_scan_objects(KC_SHRINKER_FN(&linfo->shrinker), &sc);
}
static void lock_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent)
@@ -1537,10 +1590,10 @@ u64 scoutfs_lock_ino_refresh_gen(struct super_block *sb, u64 ino)
* transitions and sending requests. We set the shutdown flag to catch
* anyone who breaks this rule.
*
* With no more lock callers, we'll no longer try to shrink the pool of
* granted locks. We'll free all of them as _destroy() is called after
* the farewell response indicates that the server tore down all our
* lock state.
* We unregister the shrinker so that we won't try and send null
* requests in response to memory pressure. The locks will all be
* unceremoniously dropped once we get a farewell response from the
* server which indicates that they destroyed our locking state.
*
* We will still respond to invalidation requests that have to be
* processed to let unmount in other mounts acquire locks and make
@@ -1560,6 +1613,10 @@ void scoutfs_lock_shutdown(struct super_block *sb)
trace_scoutfs_lock_shutdown(sb, linfo);
/* stop the shrinker from queueing work */
KC_UNREGISTER_SHRINKER(&linfo->shrinker);
flush_work(&linfo->shrink_work);
/* cause current and future lock calls to return errors */
spin_lock(&linfo->lock);
linfo->shutdown = true;
@@ -1650,6 +1707,8 @@ void scoutfs_lock_destroy(struct super_block *sb)
list_del_init(&lock->inv_head);
lock->invalidate_pending = 0;
}
if (!list_empty(&lock->shrink_head))
list_del_init(&lock->shrink_head);
lock_remove(linfo, lock);
lock_free(linfo, lock);
}
@@ -1674,9 +1733,14 @@ int scoutfs_lock_setup(struct super_block *sb)
spin_lock_init(&linfo->lock);
linfo->lock_tree = RB_ROOT;
linfo->lock_range_tree = RB_ROOT;
KC_INIT_SHRINKER_FUNCS(&linfo->shrinker, lock_count_objects,
lock_scan_objects);
KC_REGISTER_SHRINKER(&linfo->shrinker, "scoutfs-lock:" SCSBF, SCSB_ARGS(sb));
INIT_LIST_HEAD(&linfo->lru_list);
INIT_WORK(&linfo->inv_work, lock_invalidate_worker);
INIT_LIST_HEAD(&linfo->inv_list);
INIT_WORK(&linfo->shrink_work, lock_shrink_worker);
INIT_LIST_HEAD(&linfo->shrink_list);
atomic64_set(&linfo->next_refresh_gen, 0);
scoutfs_tseq_tree_init(&linfo->tseq_tree, lock_tseq_show);

View File

@@ -506,19 +506,6 @@ out:
* because we don't know which locks they'll hold. Once recover
* finishes the server calls us to kick all the locks that were waiting
* during recovery.
*
* The calling server shuts down if we return errors indicating that we
* weren't able to ensure forward progress in the lock state machine.
*
* Failure to send to a disconnected client is not a fatal error.
* During normal disconnection the client's state is removed before
* their connection is destroyed. We can't use state to try and send to
* a non-existing connection. But a client that fails to reconnect is
* disconnected before being fenced. If we have multiple disconnected
* clients we can try to send to one while cleaning up another. If
* they've uncleanly disconnected their locks are going to be removed
* and the lock can make forward progress again. Or we'll shutdown for
* failure to fence.
*/
static int process_waiting_requests(struct super_block *sb,
struct server_lock_node *snode)
@@ -610,10 +597,6 @@ static int process_waiting_requests(struct super_block *sb,
out:
put_server_lock(inf, snode);
/* disconnected clients will be fenced, trying to send to them isn't fatal */
if (ret == -ENOTCONN)
ret = 0;
return ret;
}

View File

@@ -21,7 +21,6 @@
#include <net/tcp.h>
#include <linux/log2.h>
#include <linux/jhash.h>
#include <linux/rbtree.h>
#include "format.h"
#include "counters.h"
@@ -126,7 +125,6 @@ struct message_send {
unsigned long dead:1;
struct list_head head;
scoutfs_net_response_t resp_func;
struct rb_node node;
void *resp_data;
struct scoutfs_net_header nh;
};
@@ -163,118 +161,49 @@ static bool nh_is_request(struct scoutfs_net_header *nh)
return !nh_is_response(nh);
}
static int cmp_sorted_msend(u64 pos, struct message_send *msend)
{
if (nh_is_request(&msend->nh))
return pos < le64_to_cpu(msend->nh.id) ? -1 :
pos > le64_to_cpu(msend->nh.id) ? 1 : 0;
else
return pos < le64_to_cpu(msend->nh.seq) ? -1 :
pos > le64_to_cpu(msend->nh.seq) ? 1 : 0;
}
static struct message_send *search_sorted_msends(struct rb_root *root, u64 pos, struct rb_node *ins)
{
struct rb_node **node = &root->rb_node;
struct rb_node *parent = NULL;
struct message_send *msend = NULL;
struct message_send *next = NULL;
int cmp = -1;
while (*node) {
parent = *node;
msend = container_of(*node, struct message_send, node);
cmp = cmp_sorted_msend(pos, msend);
if (cmp < 0) {
next = msend;
node = &(*node)->rb_left;
} else if (cmp > 0) {
node = &(*node)->rb_right;
} else {
next = msend;
break;
}
}
BUG_ON(cmp == 0 && ins);
if (ins) {
rb_link_node(ins, parent, node);
rb_insert_color(ins, root);
}
return next;
}
static struct message_send *next_sorted_msend(struct message_send *msend)
{
struct rb_node *node = rb_next(&msend->node);
return node ? rb_entry(node, struct message_send, node) : NULL;
}
#define for_each_sorted_msend(MSEND_, TMP_, ROOT_, POS_) \
for (MSEND_ = search_sorted_msends(ROOT_, POS_, NULL); \
MSEND_ != NULL && ({ TMP_ = next_sorted_msend(MSEND_); true; }); \
MSEND_ = TMP_)
static void insert_sorted_msend(struct scoutfs_net_connection *conn, struct message_send *msend)
{
BUG_ON(!RB_EMPTY_NODE(&msend->node));
if (nh_is_request(&msend->nh))
search_sorted_msends(&conn->req_root, le64_to_cpu(msend->nh.id), &msend->node);
else
search_sorted_msends(&conn->resp_root, le64_to_cpu(msend->nh.seq), &msend->node);
}
static void erase_sorted_msend(struct scoutfs_net_connection *conn, struct message_send *msend)
{
if (!RB_EMPTY_NODE(&msend->node)) {
if (nh_is_request(&msend->nh))
rb_erase(&msend->node, &conn->req_root);
else
rb_erase(&msend->node, &conn->resp_root);
RB_CLEAR_NODE(&msend->node);
}
}
static void move_sorted_msends(struct scoutfs_net_connection *dst_conn, struct rb_root *dst_root,
struct scoutfs_net_connection *src_conn, struct rb_root *src_root)
{
struct message_send *msend;
struct message_send *tmp;
for_each_sorted_msend(msend, tmp, src_root, 0) {
erase_sorted_msend(src_conn, msend);
insert_sorted_msend(dst_conn, msend);
}
}
/*
* Pending requests are uniquely identified by the id they were assigned
* as they were first put on the send queue.
* We return dead requests so that the caller can stop searching other
* lists for the dead request that we found.
*/
static struct message_send *find_request(struct scoutfs_net_connection *conn, u8 cmd, u64 id)
static struct message_send *search_list(struct scoutfs_net_connection *conn,
struct list_head *list,
u8 cmd, u64 id)
{
struct message_send *msend;
assert_spin_locked(&conn->lock);
msend = search_sorted_msends(&conn->req_root, id, NULL);
if (msend && !(msend->nh.cmd == cmd && le64_to_cpu(msend->nh.id) == id))
msend = NULL;
list_for_each_entry(msend, list, head) {
if (nh_is_request(&msend->nh) && msend->nh.cmd == cmd &&
le64_to_cpu(msend->nh.id) == id)
return msend;
}
return NULL;
}
/*
* Find an active send request on the lists. It's almost certainly
* waiting on the resend queue but it could be actively being sent.
*/
static struct message_send *find_request(struct scoutfs_net_connection *conn,
u8 cmd, u64 id)
{
struct message_send *msend;
msend = search_list(conn, &conn->resend_queue, cmd, id) ?:
search_list(conn, &conn->send_queue, cmd, id);
if (msend && msend->dead)
msend = NULL;
return msend;
}
/*
* Free a send message by moving it to the send queue and marking it
* dead. It is removed from the sorted rb roots so it won't be visible
* as a request for response processing.
* Complete a send message by moving it to the send queue and marking it
* to be freed. It won't be visible to callers trying to find sends.
*/
static void queue_dead_free(struct scoutfs_net_connection *conn, struct message_send *msend)
static void complete_send(struct scoutfs_net_connection *conn,
struct message_send *msend)
{
assert_spin_locked(&conn->lock);
@@ -284,7 +213,6 @@ static void queue_dead_free(struct scoutfs_net_connection *conn, struct message_
msend->dead = 1;
list_move(&msend->head, &conn->send_queue);
erase_sorted_msend(conn, msend);
queue_work(conn->workq, &conn->send_work);
}
@@ -336,7 +264,7 @@ static inline u8 net_err_from_host(struct super_block *sb, int error)
error);
}
return SCOUTFS_NET_ERR_EINVAL;
return -EINVAL;
}
return net_errs[ind];
@@ -442,7 +370,6 @@ static int submit_send(struct super_block *sb,
msend->resp_func = resp_func;
msend->resp_data = resp_data;
msend->dead = 0;
RB_CLEAR_NODE(&msend->node);
msend->nh.seq = cpu_to_le64(seq);
msend->nh.recv_seq = 0; /* set when sent, not when queued */
@@ -463,7 +390,6 @@ static int submit_send(struct super_block *sb,
} else {
list_add_tail(&msend->head, &conn->resend_queue);
}
insert_sorted_msend(conn, msend);
if (id_ret)
*id_ret = le64_to_cpu(msend->nh.id);
@@ -533,7 +459,7 @@ static int process_response(struct scoutfs_net_connection *conn,
if (msend) {
resp_func = msend->resp_func;
resp_data = msend->resp_data;
queue_dead_free(conn, msend);
complete_send(conn, msend);
} else {
scoutfs_inc_counter(sb, net_dropped_response);
}
@@ -624,21 +550,43 @@ static void queue_ordered_proc(struct scoutfs_net_connection *conn, struct messa
* Free live responses up to and including the seq by marking them dead
* and moving them to the send queue to be freed.
*/
static void free_acked_responses(struct scoutfs_net_connection *conn, u64 seq)
static bool move_acked_responses(struct scoutfs_net_connection *conn,
struct list_head *list, u64 seq)
{
struct message_send *msend;
struct message_send *tmp;
bool moved = false;
assert_spin_locked(&conn->lock);
list_for_each_entry_safe(msend, tmp, list, head) {
if (le64_to_cpu(msend->nh.seq) > seq)
break;
if (!nh_is_response(&msend->nh) || msend->dead)
continue;
msend->dead = 1;
list_move(&msend->head, &conn->send_queue);
moved = true;
}
return moved;
}
/* acks are processed inline in the recv worker */
static void free_acked_responses(struct scoutfs_net_connection *conn, u64 seq)
{
bool moved;
spin_lock(&conn->lock);
for_each_sorted_msend(msend, tmp, &conn->resp_root, 0) {
if (le64_to_cpu(msend->nh.seq) > seq)
break;
queue_dead_free(conn, msend);
}
moved = move_acked_responses(conn, &conn->send_queue, seq) |
move_acked_responses(conn, &conn->resend_queue, seq);
spin_unlock(&conn->lock);
if (moved)
queue_work(conn->workq, &conn->send_work);
}
static int k_recvmsg(struct socket *sock, void *buf, unsigned len)
@@ -876,11 +824,9 @@ static int k_sendmsg_full(struct socket *sock, struct kvec *kv, unsigned long nr
return ret;
}
static void free_msend(struct net_info *ninf, struct scoutfs_net_connection *conn,
struct message_send *msend)
static void free_msend(struct net_info *ninf, struct message_send *msend)
{
list_del_init(&msend->head);
erase_sorted_msend(conn, msend);
scoutfs_tseq_del(&ninf->msg_tseq_tree, &msend->tseq_entry);
kfree(msend);
}
@@ -920,10 +866,9 @@ static void scoutfs_net_send_worker(struct work_struct *work)
count = 0;
spin_lock(&conn->lock);
list_for_each_entry_safe(msend, _msend_, &conn->send_queue, head) {
if (msend->dead) {
free_msend(ninf, conn, msend);
free_msend(ninf, msend);
continue;
}
@@ -1012,7 +957,7 @@ static void scoutfs_net_destroy_worker(struct work_struct *work)
list_splice_init(&conn->resend_queue, &conn->send_queue);
list_for_each_entry_safe(msend, tmp, &conn->send_queue, head)
free_msend(ninf, conn, msend);
free_msend(ninf, msend);
/* accepted sockets are removed from their listener's list */
if (conn->listening_conn) {
@@ -1358,7 +1303,7 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work)
struct message_send, head))) {
resp_func = msend->resp_func;
resp_data = msend->resp_data;
free_msend(ninf, conn, msend);
free_msend(ninf, msend);
spin_unlock(&conn->lock);
call_resp_func(sb, conn, resp_func, resp_data, NULL, 0, -ECONNABORTED);
@@ -1374,7 +1319,7 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work)
list_splice_tail_init(&conn->send_queue, &conn->resend_queue);
list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head) {
if (msend->nh.cmd == SCOUTFS_NET_CMD_GREETING)
free_msend(ninf, conn, msend);
free_msend(ninf, msend);
}
clear_conn_fl(conn, saw_greeting);
@@ -1548,8 +1493,6 @@ scoutfs_net_alloc_conn(struct super_block *sb,
atomic64_set(&conn->recv_seq, 0);
INIT_LIST_HEAD(&conn->send_queue);
INIT_LIST_HEAD(&conn->resend_queue);
conn->req_root = RB_ROOT;
conn->resp_root = RB_ROOT;
INIT_WORK(&conn->listen_work, scoutfs_net_listen_worker);
INIT_WORK(&conn->connect_work, scoutfs_net_connect_worker);
INIT_WORK(&conn->send_work, scoutfs_net_send_worker);
@@ -1762,7 +1705,7 @@ void scoutfs_net_client_greeting(struct super_block *sb,
atomic64_set(&conn->recv_seq, 0);
list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head){
if (nh_is_response(&msend->nh))
free_msend(ninf, conn, msend);
free_msend(ninf, msend);
}
}
@@ -1865,8 +1808,6 @@ restart:
BUG_ON(!list_empty(&reconn->send_queue));
/* queued greeting response is racing, can be in send or resend queue */
list_splice_tail_init(&reconn->resend_queue, &conn->resend_queue);
move_sorted_msends(conn, &conn->req_root, reconn, &reconn->req_root);
move_sorted_msends(conn, &conn->resp_root, reconn, &reconn->resp_root);
/* new conn info is unused, swap, old won't call down */
swap(conn->info, reconn->info);

View File

@@ -67,8 +67,6 @@ struct scoutfs_net_connection {
u64 next_send_id;
struct list_head send_queue;
struct list_head resend_queue;
struct rb_root req_root;
struct rb_root resp_root;
atomic64_t recv_seq;
unsigned int ordered_proc_nr;

View File

@@ -34,7 +34,6 @@ enum {
Opt_data_prealloc_blocks,
Opt_data_prealloc_contig_only,
Opt_ino_alloc_per_lock,
Opt_lock_idle_count,
Opt_log_merge_wait_timeout_ms,
Opt_metadev_path,
Opt_noacl,
@@ -50,7 +49,6 @@ static const match_table_t tokens = {
{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
{Opt_ino_alloc_per_lock, "ino_alloc_per_lock=%s"},
{Opt_lock_idle_count, "lock_idle_count=%s"},
{Opt_log_merge_wait_timeout_ms, "log_merge_wait_timeout_ms=%s"},
{Opt_metadev_path, "metadev_path=%s"},
{Opt_noacl, "noacl"},
@@ -121,10 +119,6 @@ static void free_options(struct scoutfs_mount_options *opts)
kfree(opts->metadev_path);
}
#define MIN_LOCK_IDLE_COUNT 32
#define DEFAULT_LOCK_IDLE_COUNT (10 * 1000)
#define MAX_LOCK_IDLE_COUNT (100 * 1000)
#define MIN_LOG_MERGE_WAIT_TIMEOUT_MS 100UL
#define DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS 500
#define MAX_LOG_MERGE_WAIT_TIMEOUT_MS (60 * MSEC_PER_SEC)
@@ -145,7 +139,6 @@ static void init_default_options(struct scoutfs_mount_options *opts)
opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS;
opts->data_prealloc_contig_only = 1;
opts->ino_alloc_per_lock = SCOUTFS_LOCK_INODE_GROUP_NR;
opts->lock_idle_count = DEFAULT_LOCK_IDLE_COUNT;
opts->log_merge_wait_timeout_ms = DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS;
opts->orphan_scan_delay_ms = -1;
opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
@@ -153,21 +146,6 @@ static void init_default_options(struct scoutfs_mount_options *opts)
opts->tcp_keepalive_timeout_ms = DEFAULT_TCP_KEEPALIVE_TIMEOUT_MS;
}
static int verify_lock_idle_count(struct super_block *sb, int ret, int val)
{
if (ret < 0) {
scoutfs_err(sb, "failed to parse lock_idle_count value");
return -EINVAL;
}
if (val < MIN_LOCK_IDLE_COUNT || val > MAX_LOCK_IDLE_COUNT) {
scoutfs_err(sb, "invalid lock_idle_count value %d, must be between %u and %u",
val, MIN_LOCK_IDLE_COUNT, MAX_LOCK_IDLE_COUNT);
return -EINVAL;
}
return 0;
}
static int verify_log_merge_wait_timeout_ms(struct super_block *sb, int ret, int val)
{
if (ret < 0) {
@@ -283,14 +261,6 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
opts->tcp_keepalive_timeout_ms = nr;
break;
case Opt_lock_idle_count:
ret = match_int(args, &nr);
ret = verify_lock_idle_count(sb, ret, nr);
if (ret < 0)
return ret;
opts->lock_idle_count = nr;
break;
case Opt_log_merge_wait_timeout_ms:
ret = match_int(args, &nr);
ret = verify_log_merge_wait_timeout_ms(sb, ret, nr);
@@ -566,43 +536,6 @@ static ssize_t ino_alloc_per_lock_store(struct kobject *kobj, struct kobj_attrib
}
SCOUTFS_ATTR_RW(ino_alloc_per_lock);
static ssize_t lock_idle_count_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
struct scoutfs_mount_options opts;
scoutfs_options_read(sb, &opts);
return snprintf(buf, PAGE_SIZE, "%u", opts.lock_idle_count);
}
static ssize_t lock_idle_count_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
DECLARE_OPTIONS_INFO(sb, optinf);
char nullterm[30]; /* more than enough for octal -U64_MAX */
int val;
int len;
int ret;
len = min(count, sizeof(nullterm) - 1);
memcpy(nullterm, buf, len);
nullterm[len] = '\0';
ret = kstrtoint(nullterm, 0, &val);
ret = verify_lock_idle_count(sb, ret, val);
if (ret == 0) {
write_seqlock(&optinf->seqlock);
optinf->opts.lock_idle_count = val;
write_sequnlock(&optinf->seqlock);
ret = count;
}
return ret;
}
SCOUTFS_ATTR_RW(lock_idle_count);
static ssize_t log_merge_wait_timeout_ms_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
@@ -744,7 +677,6 @@ static struct attribute *options_attrs[] = {
SCOUTFS_ATTR_PTR(data_prealloc_blocks),
SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
SCOUTFS_ATTR_PTR(ino_alloc_per_lock),
SCOUTFS_ATTR_PTR(lock_idle_count),
SCOUTFS_ATTR_PTR(log_merge_wait_timeout_ms),
SCOUTFS_ATTR_PTR(metadev_path),
SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),

View File

@@ -9,7 +9,6 @@ struct scoutfs_mount_options {
u64 data_prealloc_blocks;
bool data_prealloc_contig_only;
unsigned int ino_alloc_per_lock;
int lock_idle_count;
unsigned int log_merge_wait_timeout_ms;
char *metadev_path;
unsigned int orphan_scan_delay_ms;

View File

@@ -1195,8 +1195,8 @@ static struct attribute *quorum_attrs[] = {
static inline bool valid_ipv4_unicast(__be32 addr)
{
return !(ipv4_is_multicast(addr) || ipv4_is_lbcast(addr) ||
ipv4_is_zeronet(addr) || ipv4_is_local_multicast(addr));
return !(ipv4_is_multicast(addr) && ipv4_is_lbcast(addr) &&
ipv4_is_zeronet(addr) && ipv4_is_local_multicast(addr));
}
static inline bool valid_ipv4_port(__be16 port)

View File

@@ -789,80 +789,6 @@ TRACE_EVENT(scoutfs_inode_walk_writeback,
__entry->ino, __entry->write, __entry->ret)
);
TRACE_EVENT(scoutfs_orphan_scan_start,
TP_PROTO(struct super_block *sb),
TP_ARGS(sb),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
),
TP_printk(SCSBF, SCSB_TRACE_ARGS)
);
TRACE_EVENT(scoutfs_orphan_scan_stop,
TP_PROTO(struct super_block *sb, bool work_todo),
TP_ARGS(sb, work_todo),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(bool, work_todo)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->work_todo = work_todo;
),
TP_printk(SCSBF" work_todo %d", SCSB_TRACE_ARGS, __entry->work_todo)
);
TRACE_EVENT(scoutfs_orphan_scan_work,
TP_PROTO(struct super_block *sb, __u64 ino),
TP_ARGS(sb, ino),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, ino)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->ino = ino;
),
TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS,
__entry->ino)
);
TRACE_EVENT(scoutfs_orphan_scan_end,
TP_PROTO(struct super_block *sb, __u64 ino, int ret),
TP_ARGS(sb, ino, ret),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, ino)
__field(int, ret)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->ino = ino;
__entry->ret = ret;
),
TP_printk(SCSBF" ino %llu ret %d", SCSB_TRACE_ARGS,
__entry->ino, __entry->ret)
);
DECLARE_EVENT_CLASS(scoutfs_lock_info_class,
TP_PROTO(struct super_block *sb, struct lock_info *linfo),
@@ -1110,82 +1036,6 @@ TRACE_EVENT(scoutfs_orphan_inode,
MINOR(__entry->dev), __entry->ino)
);
DECLARE_EVENT_CLASS(scoutfs_try_delete_class,
TP_PROTO(struct super_block *sb, u64 ino),
TP_ARGS(sb, ino),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, ino)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->ino = ino;
),
TP_printk(SCSBF" ino %llu", SCSB_TRACE_ARGS, __entry->ino)
);
DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete,
TP_PROTO(struct super_block *sb, u64 ino),
TP_ARGS(sb, ino)
);
DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_local_busy,
TP_PROTO(struct super_block *sb, u64 ino),
TP_ARGS(sb, ino)
);
DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_cached,
TP_PROTO(struct super_block *sb, u64 ino),
TP_ARGS(sb, ino)
);
DEFINE_EVENT(scoutfs_try_delete_class, scoutfs_try_delete_no_item,
TP_PROTO(struct super_block *sb, u64 ino),
TP_ARGS(sb, ino)
);
TRACE_EVENT(scoutfs_try_delete_has_links,
TP_PROTO(struct super_block *sb, u64 ino, unsigned int nlink),
TP_ARGS(sb, ino, nlink),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, ino)
__field(unsigned int, nlink)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->ino = ino;
__entry->nlink = nlink;
),
TP_printk(SCSBF" ino %llu nlink %u", SCSB_TRACE_ARGS, __entry->ino,
__entry->nlink)
);
TRACE_EVENT(scoutfs_inode_orphan_delete,
TP_PROTO(struct super_block *sb, u64 ino, int ret),
TP_ARGS(sb, ino, ret),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(__u64, ino)
__field(int, ret)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->ino = ino;
__entry->ret = ret;
),
TP_printk(SCSBF" ino %llu ret %d", SCSB_TRACE_ARGS, __entry->ino,
__entry->ret)
);
TRACE_EVENT(scoutfs_delete_inode,
TP_PROTO(struct super_block *sb, u64 ino, umode_t mode, u64 size),
@@ -1210,32 +1060,6 @@ TRACE_EVENT(scoutfs_delete_inode,
__entry->mode, __entry->size)
);
TRACE_EVENT(scoutfs_delete_inode_end,
TP_PROTO(struct super_block *sb, u64 ino, umode_t mode, u64 size, int ret),
TP_ARGS(sb, ino, mode, size, ret),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(__u64, ino)
__field(umode_t, mode)
__field(__u64, size)
__field(int, ret)
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->ino = ino;
__entry->mode = mode;
__entry->size = size;
__entry->ret = ret;
),
TP_printk("dev %d,%d ino %llu, mode 0x%x size %llu, ret %d",
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->ino,
__entry->mode, __entry->size, __entry->ret)
);
DECLARE_EVENT_CLASS(scoutfs_key_class,
TP_PROTO(struct super_block *sb, struct scoutfs_key *key),
TP_ARGS(sb, key),
@@ -1619,6 +1443,28 @@ DEFINE_EVENT(scoutfs_work_class, scoutfs_data_return_server_extents_exit,
TP_ARGS(sb, data, ret)
);
DECLARE_EVENT_CLASS(scoutfs_shrink_exit_class,
TP_PROTO(struct super_block *sb, unsigned long nr_to_scan, int ret),
TP_ARGS(sb, nr_to_scan, ret),
TP_STRUCT__entry(
__field(void *, sb)
__field(unsigned long, nr_to_scan)
__field(int, ret)
),
TP_fast_assign(
__entry->sb = sb;
__entry->nr_to_scan = nr_to_scan;
__entry->ret = ret;
),
TP_printk("sb %p nr_to_scan %lu ret %d",
__entry->sb, __entry->nr_to_scan, __entry->ret)
);
DEFINE_EVENT(scoutfs_shrink_exit_class, scoutfs_lock_shrink_exit,
TP_PROTO(struct super_block *sb, unsigned long nr_to_scan, int ret),
TP_ARGS(sb, nr_to_scan, ret)
);
TRACE_EVENT(scoutfs_rename,
TP_PROTO(struct super_block *sb, struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
@@ -3251,24 +3097,6 @@ TRACE_EVENT(scoutfs_ioc_search_xattrs,
__entry->ino, __entry->last_ino)
);
TRACE_EVENT(scoutfs_trigger_fired,
TP_PROTO(struct super_block *sb, const char *name),
TP_ARGS(sb, name),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
__field(const char *, name)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
__entry->name = name;
),
TP_printk(SCSBF" %s", SCSB_TRACE_ARGS, __entry->name)
);
#endif /* _TRACE_SCOUTFS_H */
/* This part must be outside protection */

View File

@@ -41,7 +41,6 @@
#include "recov.h"
#include "omap.h"
#include "fence.h"
#include "triggers.h"
/*
* Every active mount can act as the server that listens on a net
@@ -256,14 +255,6 @@ static void server_down(struct server_info *server)
cmpxchg(&server->status, was, SERVER_DOWN);
}
static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
{
*key = (struct scoutfs_key) {
.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
.skmc_rid = cpu_to_le64(rid),
};
}
/*
* The per-holder allocation block use budget balances batching
* efficiency and concurrency. The larger this gets, the fewer
@@ -971,28 +962,6 @@ static int find_log_trees_item(struct super_block *sb,
return ret;
}
/*
* Return true if the given rid has a mounted_clients entry.
*/
static bool rid_is_mounted(struct super_block *sb, u64 rid)
{
DECLARE_SERVER_INFO(sb, server);
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
SCOUTFS_BTREE_ITEM_REF(iref);
struct scoutfs_key key;
int ret;
init_mounted_client_key(&key, rid);
mutex_lock(&server->mounted_clients_mutex);
ret = scoutfs_btree_lookup(sb, &super->mounted_clients, &key, &iref);
if (ret == 0)
scoutfs_btree_put_iref(&iref);
mutex_unlock(&server->mounted_clients_mutex);
return ret == 0;
}
/*
* Find the log_trees item with the greatest nr for each rid. Fills the
* caller's log_trees and sets the key before the returned log_trees for
@@ -1251,60 +1220,6 @@ static int do_finalize_ours(struct super_block *sb,
* happens to arrive at just the right time. That's fine, merging will
* ignore and tear down the empty input.
*/
static int reclaim_open_log_tree(struct super_block *sb, u64 rid);
/*
* Reclaim log trees for rids that have no mounted_clients entry.
* They block merges by appearing active. reclaim_open_log_tree
* may need multiple commits to drain allocators (-EINPROGRESS).
*
* The caller holds logs_mutex and a commit, both are dropped and
* re-acquired around each reclaim call. Returns >0 if any orphans
* were reclaimed so the caller can re-check state that may have
* changed while the lock was dropped.
*/
static int reclaim_orphan_log_trees(struct super_block *sb, u64 rid,
struct commit_hold *hold)
{
struct server_info *server = SCOUTFS_SB(sb)->server_info;
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
struct scoutfs_log_trees lt;
struct scoutfs_key key;
bool found = false;
u64 orphan_rid;
int ret;
int err;
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, &lt)) > 0) {
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) ||
le64_to_cpu(lt.rid) == rid ||
rid_is_mounted(sb, le64_to_cpu(lt.rid)))
continue;
orphan_rid = le64_to_cpu(lt.rid);
scoutfs_err(sb, "reclaiming orphan log trees for rid %016llx nr %llu",
orphan_rid, le64_to_cpu(lt.nr));
found = true;
do {
mutex_unlock(&server->logs_mutex);
err = reclaim_open_log_tree(sb, orphan_rid);
ret = server_apply_commit(sb, hold,
err == -EINPROGRESS ? 0 : err);
server_hold_commit(sb, hold);
mutex_lock(&server->logs_mutex);
} while (err == -EINPROGRESS && ret == 0);
if (ret < 0)
break;
}
return ret < 0 ? ret : found;
}
#define FINALIZE_POLL_MIN_DELAY_MS 5U
#define FINALIZE_POLL_MAX_DELAY_MS 100U
#define FINALIZE_POLL_DELAY_GROWTH_PCT 150U
@@ -1345,16 +1260,6 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
break;
}
ret = reclaim_orphan_log_trees(sb, rid, hold);
if (ret < 0) {
err_str = "reclaiming orphan log trees";
break;
}
if (ret > 0) {
/* lock was dropped, re-check merge status */
continue;
}
/* look for finalized and other active log btrees */
saw_finalized = false;
others_active = false;
@@ -1386,13 +1291,9 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
* meta was low so that deleted items are merged
* promptly and freed blocks can bring the client out of
* enospc.
*
* The trigger can be used to force a log merge in cases where
* a test only generates small amounts of change.
*/
finalize_ours = (lt->item_root.height > 2) ||
(le32_to_cpu(lt->meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW) ||
scoutfs_trigger(sb, LOG_MERGE_FORCE_FINALIZE_OURS);
(le32_to_cpu(lt->meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW);
trace_scoutfs_server_finalize_decision(sb, rid, saw_finalized, others_active,
ours_visible, finalize_ours, delay_ms,
@@ -1501,8 +1402,6 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
BUG_ON(err); /* inconsistent */
}
scoutfs_inc_counter(sb, log_merge_start);
/* we're done, caller can make forward progress */
break;
}
@@ -1719,8 +1618,7 @@ static int server_get_log_trees(struct super_block *sb,
goto update;
}
ret = alloc_move_empty(sb, &super->data_alloc, &lt.data_freed,
COMMIT_HOLD_ALLOC_BUDGET / 2);
ret = alloc_move_empty(sb, &super->data_alloc, &lt.data_freed, 100);
if (ret == -EINPROGRESS)
ret = 0;
if (ret < 0) {
@@ -2015,15 +1913,13 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri, server->other_freed,
&lt.meta_avail)) ?:
(err_str = "empty data_avail",
alloc_move_empty(sb, &super->data_alloc, &lt.data_avail,
COMMIT_HOLD_ALLOC_BUDGET / 2)) ?:
alloc_move_empty(sb, &super->data_alloc, &lt.data_avail, 100)) ?:
(err_str = "empty data_freed",
alloc_move_empty(sb, &super->data_alloc, &lt.data_freed,
COMMIT_HOLD_ALLOC_BUDGET / 2));
alloc_move_empty(sb, &super->data_alloc, &lt.data_freed, 100));
mutex_unlock(&server->alloc_mutex);
/* only finalize, allowing merging, once the allocators are fully freed */
if (ret == 0 && !scoutfs_trigger(sb, RECLAIM_SKIP_FINALIZE)) {
if (ret == 0) {
/* the transaction is no longer open */
lt.commit_trans_seq = lt.get_trans_seq;
@@ -2075,8 +1971,7 @@ static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret)
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, &lt)) > 0) {
if ((le64_to_cpu(lt.get_trans_seq) > le64_to_cpu(lt.commit_trans_seq)) &&
le64_to_cpu(lt.get_trans_seq) <= last_seq &&
rid_is_mounted(sb, le64_to_cpu(lt.rid))) {
le64_to_cpu(lt.get_trans_seq) <= last_seq) {
last_seq = le64_to_cpu(lt.get_trans_seq) - 1;
}
}
@@ -2611,8 +2506,6 @@ static int splice_log_merge_completions(struct super_block *sb,
queue_work(server->wq, &server->log_merge_free_work);
else
err_str = "deleting merge status item";
scoutfs_inc_counter(sb, log_merge_complete);
out:
if (upd_stat) {
init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
@@ -3143,13 +3036,7 @@ static int server_commit_log_merge(struct super_block *sb,
SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0,
&stat, sizeof(stat));
if (ret < 0) {
/*
* During a retransmission, it's possible that the server
* already committed and resolved this log merge. ENOENT
* is expected in that case.
*/
if (ret != -ENOENT)
err_str = "getting merge status item";
err_str = "getting merge status item";
goto out;
}
@@ -3628,6 +3515,14 @@ out:
return scoutfs_net_response(sb, conn, cmd, id, ret, &nst, sizeof(nst));
}
static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
{
*key = (struct scoutfs_key) {
.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
.skmc_rid = cpu_to_le64(rid),
};
}
static bool invalid_mounted_client_item(struct scoutfs_btree_item_ref *iref)
{
return (iref->val_len != sizeof(struct scoutfs_mounted_client_btree_val));

View File

@@ -18,7 +18,6 @@
#include "super.h"
#include "triggers.h"
#include "scoutfs_trace.h"
/*
* We have debugfs files we can write to which arm triggers which
@@ -40,12 +39,10 @@ struct scoutfs_triggers {
static char *names[] = {
[SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE] = "block_remove_stale",
[SCOUTFS_TRIGGER_LOG_MERGE_FORCE_FINALIZE_OURS] = "log_merge_force_finalize_ours",
[SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE] = "srch_compact_logs_pad_safe",
[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
[SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge",
[SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE] = "reclaim_skip_finalize",
};
bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
@@ -54,7 +51,6 @@ bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
atomic_t *atom;
int old;
int mem;
bool fired;
BUG_ON(t >= SCOUTFS_TRIGGER_NR);
atom = &triggers->atomics[t];
@@ -68,12 +64,7 @@ bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
mem = atomic_cmpxchg(atom, old, 0);
} while (mem && mem != old);
fired = !!mem;
if (fired)
trace_scoutfs_trigger_fired(sb, names[t]);
return fired;
return !!mem;
}
int scoutfs_setup_triggers(struct super_block *sb)

View File

@@ -3,12 +3,10 @@
enum scoutfs_trigger {
SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE,
SCOUTFS_TRIGGER_LOG_MERGE_FORCE_FINALIZE_OURS,
SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE,
SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
SCOUTFS_TRIGGER_STATFS_LOCK_PURGE,
SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE,
SCOUTFS_TRIGGER_NR,
};

View File

@@ -9,7 +9,7 @@
echo "$0 running rid '$SCOUTFS_FENCED_REQ_RID' ip '$SCOUTFS_FENCED_REQ_IP' args '$@'"
echo_fail() {
echo "$@" >&2
echo "$@" >> /dev/stderr
exit 1
}
@@ -27,7 +27,8 @@ for fs in /sys/fs/scoutfs/*; do
nr="$(quiet_cat $fs/data_device_maj_min)"
[ ! -d "$fs" -o "$fs_rid" != "$rid" ] && continue
mnt=$(findmnt -l -n -t scoutfs -o TARGET -S $nr)
mnt=$(findmnt -l -n -t scoutfs -o TARGET -S $nr) || \
echo_fail "findmnt -t scoutfs -S $nr failed"
[ -z "$mnt" ] && continue
if ! umount -qf "$mnt"; then

View File

@@ -123,9 +123,6 @@ t_filter_dmesg()
re="$re|hrtimer: interrupt took .*"
re="$re|clocksource: Long readout interval"
# orphan log trees reclaim is handled, not an error
re="$re|scoutfs .* reclaiming orphan log trees"
# fencing tests force unmounts and trigger timeouts
re="$re|scoutfs .* forcing unmount"
re="$re|scoutfs .* reconnect timed out"
@@ -173,9 +170,6 @@ t_filter_dmesg()
# some ci test guests are unresponsive
re="$re|longest quorum heartbeat .* delay"
# creating block devices may trigger this
re="$re|block device autoloading is deprecated and will be removed."
egrep -v "($re)" | \
ignore_harmless_unwind_kasan_stack_oob
}

View File

@@ -283,30 +283,6 @@ t_reinsert_remount_all()
t_quiet t_mount_all || t_fail "mounting all failed"
}
#
# scratch helpers
#
t_scratch_mkfs()
{
scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" "$@" > $T_TMP.mkfs.out 2>&1 || \
t_fail "scratch mkfs failed"
}
t_scratch_mount()
{
mkdir -p "$T_MSCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$@" "$T_EX_DATA_DEV" "$T_MSCR" || \
t_fail "scratch mount failed"
}
t_scratch_umount()
{
umount "$T_MSCR" || \
t_fail "scratch umount failed"
rmdir "$T_MSCR"
}
t_trigger_path() {
local nr="$1"
@@ -522,121 +498,3 @@ t_restore_all_sysfs_mount_options() {
t_set_sysfs_mount_option $i $name "${_saved_opts[$ind]}"
done
}
t_force_log_merge() {
local sv=$(t_server_nr)
local merges_started
local last_merges_started
local merges_completed
local last_merges_completed
while true; do
last_merges_started=$(t_counter log_merge_start $sv)
last_merges_completed=$(t_counter log_merge_complete $sv)
t_trigger_arm_silent log_merge_force_finalize_ours $sv
t_sync_seq_index
while test "$(t_trigger_get log_merge_force_finalize_ours $sv)" == "1"; do
sleep .5
done
merges_started=$(t_counter log_merge_start $sv)
if (( merges_started > last_merges_started )); then
merges_completed=$(t_counter log_merge_complete $sv)
while (( merges_completed == last_merges_completed )); do
sleep .5
merges_completed=$(t_counter log_merge_complete $sv)
done
break
fi
done
}
declare -A _last_scan
t_get_orphan_scan_runs() {
local i
for i in $(t_fs_nrs); do
_last_scan[$i]=$(t_counter orphan_scan $i)
done
}
t_wait_for_orphan_scan_runs() {
local i
local scan
t_get_orphan_scan_runs
for i in $(t_fs_nrs); do
while true; do
scan=$(t_counter orphan_scan $i)
if (( scan != _last_scan[$i] )); then
break
fi
sleep .5
done
done
}
declare -A _last_empty
t_get_orphan_scan_empty() {
local i
for i in $(t_fs_nrs); do
_last_empty[$i]=$(t_counter orphan_scan_empty $i)
done
}
t_wait_for_no_orphans() {
local i;
local working;
local empty;
t_get_orphan_scan_empty
while true; do
working=0
t_wait_for_orphan_scan_runs
for i in $(t_fs_nrs); do
empty=$(t_counter orphan_scan_empty $i)
if (( empty == _last_empty[$i] )); then
(( working++ ))
else
(( _last_empty[$i] = empty ))
fi
done
if (( working == 0 )); then
break
fi
sleep 1
done
}
#
# Repeatedly run the arguments as a command, sleeping in between, until
# it returns success. The first argument is a relative timeout in
# seconds. The remaining arguments are the command and its arguments.
#
# If the timeout expires without the command returning 0 then the test
# fails.
#
t_wait_until_timeout() {
local relative="$1"
local expire="$((SECONDS + relative))"
shift
while (( SECONDS < expire )); do
"$@" && return
sleep 1
done
t_fail "command failed for $relative sec: $@"
}

View File

@@ -43,14 +43,9 @@ t_tap_progress()
local testname=$1
local result=$2
local stmsg=""
local diff=""
local dmsg=""
if [[ -s $T_RESULTS/tmp/${testname}/status.msg ]]; then
stmsg="1"
fi
if [[ -s "$T_RESULTS/tmp/${testname}/dmesg.new" ]]; then
dmsg="1"
fi
@@ -66,7 +61,6 @@ t_tap_progress()
echo "# ${testname} ** skipped - permitted **"
else
echo "not ok ${i} - ${testname}"
case ${result} in
101)
echo "# ${testname} ** skipped **"
@@ -76,13 +70,6 @@ t_tap_progress()
;;
esac
if [[ -n "${stmsg}" ]]; then
echo "#"
echo "# status:"
echo "#"
cat $T_RESULTS/tmp/${testname}/status.msg | sed 's/^/# - /'
fi
if [[ -n "${diff}" ]]; then
echo "#"
echo "# diff:"

View File

@@ -1,6 +0,0 @@
== make scratch fs
== create uid/gids
== set acls and permissions
== compare output
== drop caches and compare again
== cleanup scratch fs

View File

@@ -17,7 +17,7 @@ ino not found in dseq index
mount 0 contents after mount 1 rm: contents
ino found in dseq index
ino found in dseq index
stat: cannot stat '/mnt/test/test/inode-deletion/badfile': No such file or directory
stat: cannot stat '/mnt/test/test/inode-deletion/file': No such file or directory
ino not found in dseq index
ino not found in dseq index
== lots of deletions use one open map

View File

View File

@@ -1,3 +0,0 @@
== create orphan log_trees entry via trigger
== verify orphan is reclaimed and merge completes
== verify orphan reclaim was logged

View File

@@ -1,460 +0,0 @@
== missing options should fail ==
punch-offline: must provide offset
Try `punch-offline --help' or `punch-offline --usage' for more information.
punch-offline: must provide length
Try `punch-offline --help' or `punch-offline --usage' for more information.
punch-offline: must provide data_version
Try `punch-offline --help' or `punch-offline --usage' for more information.
== can't hole punch dir or special ==
failed to open '/mnt/test.0/test/punch-offline/dir': Is a directory (21)
scoutfs: punch-offline failed: Is a directory (21)
== punching an empty file does nothing ==
== punch outside of i_size does nothing ==
== can't hole punch online extent ==
0: offset: 0 length: 4096 flags: ..L
extents: 1
punch_offline ioctl failed: Invalid argument (22)
scoutfs: punch-offline failed: Invalid argument (22)
0: offset: 0 length: 4096 flags: ..L
extents: 1
== can't hole punch unwritten extent ==
0: offset: 0 length: 12288 flags: .UL
extents: 1
punch_offline ioctl failed: Invalid argument (22)
scoutfs: punch-offline failed: Invalid argument (22)
0: offset: 0 length: 12288 flags: .UL
extents: 1
== hole punch offline extent ==
0: offset: 0 length: 12288 flags: O.L
extents: 1
0: offset: 0 length: 4096 flags: O..
1: offset: 8192 length: 4096 flags: O.L
extents: 2
== can't hole punch non-aligned bsz offset or len ==
0: offset: 0 length: 12288 flags: O.L
extents: 1
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
0: offset: 0 length: 12288 flags: O.L
extents: 1
== can't hole punch mismatched data_version ==
0: offset: 0 length: 12288 flags: O.L
extents: 1
punch_offline ioctl failed: Stale file handle (116)
scoutfs: punch-offline failed: Stale file handle (116)
punch_offline ioctl failed: Stale file handle (116)
scoutfs: punch-offline failed: Stale file handle (116)
punch_offline ioctl failed: Stale file handle (116)
scoutfs: punch-offline failed: Stale file handle (116)
0: offset: 0 length: 12288 flags: O.L
extents: 1
== Punch hole crossing multiple extents ==
0: offset: 0 length: 7 flags: O.L
extents: 1
0: offset: 0 length: 1 flags: O..
1: offset: 2 length: 1 flags: O..
2: offset: 4 length: 1 flags: O..
3: offset: 6 length: 1 flags: O.L
extents: 4
0: offset: 0 length: 1 flags: O..
1: offset: 6 length: 1 flags: O.L
extents: 2
== punch hole starting at a hole ==
0: offset: 0 length: 7 flags: O.L
extents: 1
0: offset: 0 length: 1 flags: O..
1: offset: 2 length: 1 flags: O..
2: offset: 4 length: 1 flags: O..
3: offset: 6 length: 1 flags: O.L
extents: 4
0: offset: 0 length: 1 flags: O..
1: offset: 6 length: 1 flags: O.L
extents: 2
== large punch ==
0: offset: 0 length: 1572864 flags: O.L
extents: 1
0: offset: 0 length: 134123 flags: O..
1: offset: 202466 length: 264807 flags: O..
2: offset: 535616 length: 199007 flags: O..
3: offset: 802966 length: 769898 flags: O.L
extents: 4
== overlapping punches with lots of extents ==
0: offset: 0 length: 4194304 flags: O.L
extents: 1
extents: 512
extents: 505
extents: 378
extents: 252
0: offset: 0 length: 4096 flags: O..
1: offset: 8192 length: 4096 flags: O..
2: offset: 32768 length: 4096 flags: O..
3: offset: 40960 length: 4096 flags: O..
4: offset: 65536 length: 4096 flags: O..
5: offset: 73728 length: 4096 flags: O..
6: offset: 98304 length: 4096 flags: O..
7: offset: 106496 length: 4096 flags: O..
8: offset: 196608 length: 4096 flags: O..
9: offset: 204800 length: 4096 flags: O..
10: offset: 229376 length: 4096 flags: O..
11: offset: 237568 length: 4096 flags: O..
12: offset: 262144 length: 4096 flags: O..
13: offset: 270336 length: 4096 flags: O..
14: offset: 294912 length: 4096 flags: O..
15: offset: 303104 length: 4096 flags: O..
16: offset: 327680 length: 4096 flags: O..
17: offset: 335872 length: 4096 flags: O..
18: offset: 360448 length: 4096 flags: O..
19: offset: 368640 length: 4096 flags: O..
20: offset: 393216 length: 4096 flags: O..
21: offset: 401408 length: 4096 flags: O..
22: offset: 425984 length: 4096 flags: O..
23: offset: 434176 length: 4096 flags: O..
24: offset: 458752 length: 4096 flags: O..
25: offset: 466944 length: 4096 flags: O..
26: offset: 491520 length: 4096 flags: O..
27: offset: 499712 length: 4096 flags: O..
28: offset: 720896 length: 4096 flags: O..
29: offset: 729088 length: 4096 flags: O..
30: offset: 753664 length: 4096 flags: O..
31: offset: 761856 length: 4096 flags: O..
32: offset: 786432 length: 4096 flags: O..
33: offset: 794624 length: 4096 flags: O..
34: offset: 819200 length: 4096 flags: O..
35: offset: 827392 length: 4096 flags: O..
36: offset: 851968 length: 4096 flags: O..
37: offset: 860160 length: 4096 flags: O..
38: offset: 884736 length: 4096 flags: O..
39: offset: 892928 length: 4096 flags: O..
40: offset: 917504 length: 4096 flags: O..
41: offset: 925696 length: 4096 flags: O..
42: offset: 950272 length: 4096 flags: O..
43: offset: 958464 length: 4096 flags: O..
44: offset: 983040 length: 4096 flags: O..
45: offset: 991232 length: 4096 flags: O..
46: offset: 1015808 length: 4096 flags: O..
47: offset: 1024000 length: 4096 flags: O..
48: offset: 1048576 length: 4096 flags: O..
49: offset: 1056768 length: 4096 flags: O..
50: offset: 1081344 length: 4096 flags: O..
51: offset: 1089536 length: 4096 flags: O..
52: offset: 1114112 length: 4096 flags: O..
53: offset: 1122304 length: 4096 flags: O..
54: offset: 1146880 length: 4096 flags: O..
55: offset: 1155072 length: 4096 flags: O..
56: offset: 1179648 length: 4096 flags: O..
57: offset: 1187840 length: 4096 flags: O..
58: offset: 1212416 length: 4096 flags: O..
59: offset: 1220608 length: 4096 flags: O..
60: offset: 1245184 length: 4096 flags: O..
61: offset: 1253376 length: 4096 flags: O..
62: offset: 1277952 length: 4096 flags: O..
63: offset: 1286144 length: 4096 flags: O..
64: offset: 1310720 length: 4096 flags: O..
65: offset: 1318912 length: 4096 flags: O..
66: offset: 1343488 length: 4096 flags: O..
67: offset: 1351680 length: 4096 flags: O..
68: offset: 1376256 length: 4096 flags: O..
69: offset: 1384448 length: 4096 flags: O..
70: offset: 1409024 length: 4096 flags: O..
71: offset: 1417216 length: 4096 flags: O..
72: offset: 1441792 length: 4096 flags: O..
73: offset: 1449984 length: 4096 flags: O..
74: offset: 1474560 length: 4096 flags: O..
75: offset: 1482752 length: 4096 flags: O..
76: offset: 1507328 length: 4096 flags: O..
77: offset: 1515520 length: 4096 flags: O..
78: offset: 1540096 length: 4096 flags: O..
79: offset: 1548288 length: 4096 flags: O..
80: offset: 1572864 length: 4096 flags: O..
81: offset: 1581056 length: 4096 flags: O..
82: offset: 1605632 length: 4096 flags: O..
83: offset: 1613824 length: 4096 flags: O..
84: offset: 1638400 length: 4096 flags: O..
85: offset: 1646592 length: 4096 flags: O..
86: offset: 1671168 length: 4096 flags: O..
87: offset: 1679360 length: 4096 flags: O..
88: offset: 1703936 length: 4096 flags: O..
89: offset: 1712128 length: 4096 flags: O..
90: offset: 1736704 length: 4096 flags: O..
91: offset: 1744896 length: 4096 flags: O..
92: offset: 1769472 length: 4096 flags: O..
93: offset: 1777664 length: 4096 flags: O..
94: offset: 1802240 length: 4096 flags: O..
95: offset: 1810432 length: 4096 flags: O..
96: offset: 1835008 length: 4096 flags: O..
97: offset: 1843200 length: 4096 flags: O..
98: offset: 1867776 length: 4096 flags: O..
99: offset: 1875968 length: 4096 flags: O..
100: offset: 1900544 length: 4096 flags: O..
101: offset: 1908736 length: 4096 flags: O..
102: offset: 1933312 length: 4096 flags: O..
103: offset: 1941504 length: 4096 flags: O..
104: offset: 1966080 length: 4096 flags: O..
105: offset: 1974272 length: 4096 flags: O..
106: offset: 1998848 length: 4096 flags: O..
107: offset: 2007040 length: 4096 flags: O..
108: offset: 2031616 length: 4096 flags: O..
109: offset: 2039808 length: 4096 flags: O..
110: offset: 2064384 length: 4096 flags: O..
111: offset: 2072576 length: 4096 flags: O..
112: offset: 2097152 length: 4096 flags: O..
113: offset: 2105344 length: 4096 flags: O..
114: offset: 2129920 length: 4096 flags: O..
115: offset: 2138112 length: 4096 flags: O..
116: offset: 2162688 length: 4096 flags: O..
117: offset: 2170880 length: 4096 flags: O..
118: offset: 2195456 length: 4096 flags: O..
119: offset: 2203648 length: 4096 flags: O..
120: offset: 2228224 length: 4096 flags: O..
121: offset: 2236416 length: 4096 flags: O..
122: offset: 2260992 length: 4096 flags: O..
123: offset: 2269184 length: 4096 flags: O..
124: offset: 2293760 length: 4096 flags: O..
125: offset: 2301952 length: 4096 flags: O..
126: offset: 2326528 length: 4096 flags: O..
127: offset: 2334720 length: 4096 flags: O..
128: offset: 2359296 length: 4096 flags: O..
129: offset: 2367488 length: 4096 flags: O..
130: offset: 2392064 length: 4096 flags: O..
131: offset: 2400256 length: 4096 flags: O..
132: offset: 2424832 length: 4096 flags: O..
133: offset: 2433024 length: 4096 flags: O..
134: offset: 2457600 length: 4096 flags: O..
135: offset: 2465792 length: 4096 flags: O..
136: offset: 2490368 length: 4096 flags: O..
137: offset: 2498560 length: 4096 flags: O..
138: offset: 2523136 length: 4096 flags: O..
139: offset: 2531328 length: 4096 flags: O..
140: offset: 2555904 length: 4096 flags: O..
141: offset: 2564096 length: 4096 flags: O..
142: offset: 2588672 length: 4096 flags: O..
143: offset: 2596864 length: 4096 flags: O..
144: offset: 2621440 length: 4096 flags: O..
145: offset: 2629632 length: 4096 flags: O..
146: offset: 2654208 length: 4096 flags: O..
147: offset: 2662400 length: 4096 flags: O..
148: offset: 2686976 length: 4096 flags: O..
149: offset: 2695168 length: 4096 flags: O..
150: offset: 2719744 length: 4096 flags: O..
151: offset: 2727936 length: 4096 flags: O..
152: offset: 2752512 length: 4096 flags: O..
153: offset: 2760704 length: 4096 flags: O..
154: offset: 2785280 length: 4096 flags: O..
155: offset: 2793472 length: 4096 flags: O..
156: offset: 2818048 length: 4096 flags: O..
157: offset: 2826240 length: 4096 flags: O..
158: offset: 2850816 length: 4096 flags: O..
159: offset: 2859008 length: 4096 flags: O..
160: offset: 2883584 length: 4096 flags: O..
161: offset: 2891776 length: 4096 flags: O..
162: offset: 2916352 length: 4096 flags: O..
163: offset: 2924544 length: 4096 flags: O..
164: offset: 2949120 length: 4096 flags: O..
165: offset: 2957312 length: 4096 flags: O..
166: offset: 2981888 length: 4096 flags: O..
167: offset: 2990080 length: 4096 flags: O..
168: offset: 3014656 length: 4096 flags: O..
169: offset: 3022848 length: 4096 flags: O..
170: offset: 3047424 length: 4096 flags: O..
171: offset: 3055616 length: 4096 flags: O..
172: offset: 3080192 length: 4096 flags: O..
173: offset: 3088384 length: 4096 flags: O..
174: offset: 3112960 length: 4096 flags: O..
175: offset: 3121152 length: 4096 flags: O..
176: offset: 3145728 length: 4096 flags: O..
177: offset: 3153920 length: 4096 flags: O..
178: offset: 3178496 length: 4096 flags: O..
179: offset: 3186688 length: 4096 flags: O..
180: offset: 3211264 length: 4096 flags: O..
181: offset: 3219456 length: 4096 flags: O..
182: offset: 3244032 length: 4096 flags: O..
183: offset: 3252224 length: 4096 flags: O..
184: offset: 3276800 length: 4096 flags: O..
185: offset: 3284992 length: 4096 flags: O..
186: offset: 3309568 length: 4096 flags: O..
187: offset: 3317760 length: 4096 flags: O..
188: offset: 3342336 length: 4096 flags: O..
189: offset: 3350528 length: 4096 flags: O..
190: offset: 3375104 length: 4096 flags: O..
191: offset: 3383296 length: 4096 flags: O..
192: offset: 3407872 length: 4096 flags: O..
193: offset: 3416064 length: 4096 flags: O..
194: offset: 3440640 length: 4096 flags: O..
195: offset: 3448832 length: 4096 flags: O..
196: offset: 3473408 length: 4096 flags: O..
197: offset: 3481600 length: 4096 flags: O..
198: offset: 3506176 length: 4096 flags: O..
199: offset: 3514368 length: 4096 flags: O..
200: offset: 3538944 length: 4096 flags: O..
201: offset: 3547136 length: 4096 flags: O..
202: offset: 3571712 length: 4096 flags: O..
203: offset: 3579904 length: 4096 flags: O..
204: offset: 3604480 length: 4096 flags: O..
205: offset: 3612672 length: 4096 flags: O..
206: offset: 3637248 length: 4096 flags: O..
207: offset: 3645440 length: 4096 flags: O..
208: offset: 3670016 length: 4096 flags: O..
209: offset: 3678208 length: 4096 flags: O..
210: offset: 3702784 length: 4096 flags: O..
211: offset: 3710976 length: 4096 flags: O..
212: offset: 3735552 length: 4096 flags: O..
213: offset: 3743744 length: 4096 flags: O..
214: offset: 3768320 length: 4096 flags: O..
215: offset: 3776512 length: 4096 flags: O..
216: offset: 3801088 length: 4096 flags: O..
217: offset: 3809280 length: 4096 flags: O..
218: offset: 3833856 length: 4096 flags: O..
219: offset: 3842048 length: 4096 flags: O..
220: offset: 3866624 length: 4096 flags: O..
221: offset: 3874816 length: 4096 flags: O..
222: offset: 3899392 length: 4096 flags: O..
223: offset: 3907584 length: 4096 flags: O..
224: offset: 3932160 length: 4096 flags: O..
225: offset: 3940352 length: 4096 flags: O..
226: offset: 3964928 length: 4096 flags: O..
227: offset: 3973120 length: 4096 flags: O..
228: offset: 3997696 length: 4096 flags: O..
229: offset: 4005888 length: 4096 flags: O..
230: offset: 4030464 length: 4096 flags: O..
231: offset: 4038656 length: 4096 flags: O..
232: offset: 4063232 length: 4096 flags: O..
233: offset: 4071424 length: 4096 flags: O..
234: offset: 4096000 length: 4096 flags: O..
235: offset: 4104192 length: 4096 flags: O..
236: offset: 4128768 length: 4096 flags: O..
237: offset: 4136960 length: 4096 flags: O..
238: offset: 4161536 length: 4096 flags: O..
239: offset: 4169728 length: 4096 flags: O.L
extents: 240
0: offset: 0 length: 1 flags: O..
1: offset: 8 length: 1 flags: O..
2: offset: 16 length: 1 flags: O..
3: offset: 24 length: 1 flags: O..
4: offset: 48 length: 1 flags: O..
5: offset: 56 length: 1 flags: O..
6: offset: 64 length: 1 flags: O..
7: offset: 72 length: 1 flags: O..
8: offset: 80 length: 1 flags: O..
9: offset: 88 length: 1 flags: O..
10: offset: 96 length: 1 flags: O..
11: offset: 104 length: 1 flags: O..
12: offset: 112 length: 1 flags: O..
13: offset: 120 length: 1 flags: O..
14: offset: 176 length: 1 flags: O..
15: offset: 184 length: 1 flags: O..
16: offset: 192 length: 1 flags: O..
17: offset: 200 length: 1 flags: O..
18: offset: 208 length: 1 flags: O..
19: offset: 216 length: 1 flags: O..
20: offset: 224 length: 1 flags: O..
21: offset: 232 length: 1 flags: O..
22: offset: 240 length: 1 flags: O..
23: offset: 248 length: 1 flags: O..
24: offset: 256 length: 1 flags: O..
25: offset: 264 length: 1 flags: O..
26: offset: 272 length: 1 flags: O..
27: offset: 280 length: 1 flags: O..
28: offset: 288 length: 1 flags: O..
29: offset: 296 length: 1 flags: O..
30: offset: 304 length: 1 flags: O..
31: offset: 312 length: 1 flags: O..
32: offset: 320 length: 1 flags: O..
33: offset: 328 length: 1 flags: O..
34: offset: 336 length: 1 flags: O..
35: offset: 344 length: 1 flags: O..
36: offset: 352 length: 1 flags: O..
37: offset: 360 length: 1 flags: O..
38: offset: 368 length: 1 flags: O..
39: offset: 376 length: 1 flags: O..
40: offset: 384 length: 1 flags: O..
41: offset: 392 length: 1 flags: O..
42: offset: 400 length: 1 flags: O..
43: offset: 408 length: 1 flags: O..
44: offset: 416 length: 1 flags: O..
45: offset: 424 length: 1 flags: O..
46: offset: 432 length: 1 flags: O..
47: offset: 440 length: 1 flags: O..
48: offset: 448 length: 1 flags: O..
49: offset: 456 length: 1 flags: O..
50: offset: 464 length: 1 flags: O..
51: offset: 472 length: 1 flags: O..
52: offset: 480 length: 1 flags: O..
53: offset: 488 length: 1 flags: O..
54: offset: 496 length: 1 flags: O..
55: offset: 504 length: 1 flags: O..
56: offset: 512 length: 1 flags: O..
57: offset: 520 length: 1 flags: O..
58: offset: 528 length: 1 flags: O..
59: offset: 536 length: 1 flags: O..
60: offset: 544 length: 1 flags: O..
61: offset: 552 length: 1 flags: O..
62: offset: 560 length: 1 flags: O..
63: offset: 568 length: 1 flags: O..
64: offset: 576 length: 1 flags: O..
65: offset: 584 length: 1 flags: O..
66: offset: 592 length: 1 flags: O..
67: offset: 600 length: 1 flags: O..
68: offset: 608 length: 1 flags: O..
69: offset: 616 length: 1 flags: O..
70: offset: 624 length: 1 flags: O..
71: offset: 632 length: 1 flags: O..
72: offset: 640 length: 1 flags: O..
73: offset: 648 length: 1 flags: O..
74: offset: 656 length: 1 flags: O..
75: offset: 664 length: 1 flags: O..
76: offset: 672 length: 1 flags: O..
77: offset: 680 length: 1 flags: O..
78: offset: 688 length: 1 flags: O..
79: offset: 696 length: 1 flags: O..
80: offset: 704 length: 1 flags: O..
81: offset: 712 length: 1 flags: O..
82: offset: 720 length: 1 flags: O..
83: offset: 728 length: 1 flags: O..
84: offset: 736 length: 1 flags: O..
85: offset: 744 length: 1 flags: O..
86: offset: 752 length: 1 flags: O..
87: offset: 760 length: 1 flags: O..
88: offset: 768 length: 1 flags: O..
89: offset: 776 length: 1 flags: O..
90: offset: 784 length: 1 flags: O..
91: offset: 792 length: 1 flags: O..
92: offset: 800 length: 1 flags: O..
93: offset: 808 length: 1 flags: O..
94: offset: 816 length: 1 flags: O..
95: offset: 824 length: 1 flags: O..
96: offset: 832 length: 1 flags: O..
97: offset: 840 length: 1 flags: O..
98: offset: 848 length: 1 flags: O..
99: offset: 856 length: 1 flags: O..
100: offset: 864 length: 1 flags: O..
101: offset: 872 length: 1 flags: O..
102: offset: 880 length: 1 flags: O..
103: offset: 888 length: 1 flags: O..
104: offset: 896 length: 1 flags: O..
105: offset: 904 length: 1 flags: O..
106: offset: 912 length: 1 flags: O..
107: offset: 920 length: 1 flags: O..
108: offset: 928 length: 1 flags: O..
109: offset: 936 length: 1 flags: O..
110: offset: 944 length: 1 flags: O..
111: offset: 952 length: 1 flags: O..
112: offset: 960 length: 1 flags: O..
113: offset: 968 length: 1 flags: O..
114: offset: 976 length: 1 flags: O..
115: offset: 984 length: 1 flags: O..
116: offset: 992 length: 1 flags: O..
117: offset: 1000 length: 1 flags: O..
118: offset: 1008 length: 1 flags: O..
119: offset: 1016 length: 1 flags: O.L
extents: 120
extents: 0

View File

@@ -400,8 +400,7 @@ if [ -n "$T_INSMOD" ]; then
fi
if [ -n "$T_TRACE_MULT" ]; then
# orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
orig_trace_size=1408
orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
@@ -505,10 +504,7 @@ crash_monitor()
fi
if [ "$bad" != 0 ]; then
echo "run-tests monitor syncing and triggering crash"
# hail mary, the sync could well hang
(echo s > /proc/sysrq-trigger) &
sleep 5
echo "run-tests monitor triggering crash"
echo c > /proc/sysrq-trigger
exit 1
fi
@@ -628,9 +624,6 @@ for t in $tests; do
cmd rm -rf "$T_TMPDIR"
cmd mkdir -p "$T_TMPDIR"
# assign scratch mount point in temporary dir
T_MSCR="$T_TMPDIR/scratch"
# create a test name dir in the fs, clean up old data as needed
T_DS=""
for i in $(seq 0 $((T_NR_MOUNTS - 1))); do

View File

@@ -2,7 +2,6 @@ export-get-name-parent.sh
basic-block-counts.sh
basic-bad-mounts.sh
basic-posix-acl.sh
basic-acl-consistency.sh
inode-items-updated.sh
simple-inode-index.sh
simple-staging.sh
@@ -11,7 +10,6 @@ simple-readdir.sh
get-referring-entries.sh
fallocate.sh
basic-truncate.sh
punch-offline.sh
data-prealloc.sh
setattr_more.sh
offline-extent-waiting.sh
@@ -50,7 +48,6 @@ setup-error-teardown.sh
resize-devices.sh
change-devices.sh
fence-and-reclaim.sh
orphan-log-trees.sh
quorum-heartbeat-timeout.sh
orphan-inodes.sh
mount-unmount-race.sh

View File

@@ -1,117 +0,0 @@
#
# Test basic clustered posix acl consistency.
#
t_require_commands getfacl setfacl
GETFACL="getfacl --absolute-names"
filter_scratch() {
sed "s@$T_MSCR@t_mscr@g"
}
acl_compare()
{
diff -u - <($GETFACL $T_MSCR/data/dir_a/dir_b | filter_scratch) <<EOF1
# file: t_mscr/data/dir_a/dir_b
# owner: t_usr_3
# group: t_grp_3
# flags: -s-
user::rwx
group::rwx
group:t_grp_2:r-x
mask::rwx
other::---
default:user::rwx
default:group::rwx
default:group:t_grp_2:r-x
default:group:t_grp_3:rwx
default:mask::rwx
default:other::---
EOF1
test $? -eq 0 || t_fail "dir_b differs"
diff -u - <($GETFACL -p $T_MSCR/data/dir_a/dir_b/dir_c/dir_d | filter_scratch) <<EOF3
# file: t_mscr/data/dir_a/dir_b/dir_c/dir_d
# owner: t_usr_1
# group: t_grp_1
# flags: -s-
user::rwx
group::rwx
group:t_grp_2:r-x
mask::rwx
other::---
default:user::rwx
default:group::rwx
default:group:t_grp_2:r-x
default:group:t_grp_3:rwx
default:mask::rwx
default:other::---
EOF3
test $? -eq 0 || t_fail "dir_d differs"
diff -u - <($GETFACL $T_MSCR/data/dir_a/dir_b/dir_c | filter_scratch) <<EOF2
# file: t_mscr/data/dir_a/dir_b/dir_c
# owner: t_usr_3
# group: t_grp_2
# flags: -s-
user::rwx
group::rwx
group:t_grp_2:r-x
mask::rwx
other::---
default:user::rwx
default:group::rwx
default:group:t_grp_2:r-x
default:group:t_grp_3:rwx
default:mask::rwx
default:other::---
EOF2
test $? -eq 0 || t_fail "dir_c differs"
}
echo "== make scratch fs"
t_scratch_mkfs
t_scratch_mount
rm -rf $T_MSCR/data
echo "== create uid/gids"
groupadd -g 7101 t_grp_1 > /dev/null 2>&1
useradd -g 7101 -u 7101 t_usr_1 > /dev/null 2>&1
groupadd -g 7102 t_grp_2 > /dev/null 2>&1
groupadd -g 7103 t_grp_3 > /dev/null 2>&1
useradd -g 7103 -u 7103 t_usr_3 > /dev/null 2>&1
echo "== set acls and permissions"
mkdir -p $T_MSCR/data/dir_a/dir_b
chown t_usr_3:t_grp_3 $T_MSCR/data/dir_a/dir_b
chmod 2770 $T_MSCR/data/dir_a/dir_b
setfacl -m g:t_grp_2:rx $T_MSCR/data/dir_a/dir_b
setfacl -m d:g:t_grp_2:rx $T_MSCR/data/dir_a/dir_b
setfacl -m d:g:t_grp_3:rwx $T_MSCR/data/dir_a/dir_b
mkdir -p $T_MSCR/data/dir_a/dir_b/dir_c
chown t_usr_3:t_grp_2 $T_MSCR/data/dir_a/dir_b/dir_c
setfacl -x g:t_grp_3 $T_MSCR/data/dir_a/dir_b/dir_c
mkdir -p $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
chown t_usr_1:t_grp_1 $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
setfacl -x g:t_grp_3 $T_MSCR/data/dir_a/dir_b/dir_c/dir_d
echo "== compare output"
acl_compare
echo "== drop caches and compare again"
sync
echo 3 > /proc/sys/vm/drop_caches
acl_compare
echo "== cleanup scratch fs"
t_scratch_umount
t_pass

View File

@@ -12,22 +12,25 @@ mount_fail()
}
echo "== prepare devices, mount point, and logs"
t_scratch_mkfs
SCR="$T_TMPDIR/mnt.scratch"
mkdir -p "$SCR"
> $T_TMP.mount.out
scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 \
|| t_fail "mkfs failed"
echo "== bad devices, bad options"
mount_fail -o _bad /dev/null /dev/null "$T_MSCR"
mount_fail -o _bad /dev/null /dev/null "$SCR"
echo "== swapped devices"
mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$T_MSCR"
mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$SCR"
echo "== both meta devices"
mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$T_MSCR"
mount_fail -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_META_DEV" "$SCR"
echo "== both data devices"
mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
mount_fail -o metadev_path=$T_EX_DATA_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
echo "== good volume, bad option and good options"
mount_fail -o _bad,metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
mount_fail -o _bad,metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
t_pass

View File

@@ -11,8 +11,9 @@ truncate -s $sz "$T_TMP.equal"
truncate -s $large_sz "$T_TMP.large"
echo "== make scratch fs"
t_scratch_mkfs
mkdir -p "$T_MSCR"
t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV"
SCR="$T_TMPDIR/mnt.scratch"
mkdir -p "$SCR"
echo "== small new data device fails"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.small"
@@ -22,13 +23,13 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.small"
t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"
echo "== preparing while mounted fails"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
umount "$T_MSCR"
umount "$SCR"
echo "== preparing without recovery fails"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
umount -f "$T_MSCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
umount -f "$SCR"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
echo "== check sees metadata errors"
@@ -36,16 +37,16 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV"
t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal"
echo "== preparing with file data fails"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
echo hi > "$T_MSCR"/file
umount "$T_MSCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
echo hi > "$SCR"/file
umount "$SCR"
scoutfs print "$T_EX_META_DEV" > "$T_TMP.print"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
echo "== preparing after emptied"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$T_MSCR"
rm -f "$T_MSCR"/file
umount "$T_MSCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$T_EX_DATA_DEV" "$SCR"
rm -f "$SCR"/file
umount "$SCR"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.equal"
echo "== checks pass"
@@ -54,22 +55,22 @@ t_rc scoutfs prepare-empty-data-device --check "$T_EX_META_DEV" "$T_TMP.equal"
echo "== using prepared"
scr_loop=$(losetup --find --show "$T_TMP.equal")
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$T_MSCR"
touch "$T_MSCR"/equal_prepared
equal_tot=$(scoutfs statfs -s total_data_blocks -p "$T_MSCR")
umount "$T_MSCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR"
touch "$SCR"/equal_prepared
equal_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR")
umount "$SCR"
losetup -d "$scr_loop"
echo "== preparing larger and resizing"
t_rc scoutfs prepare-empty-data-device "$T_EX_META_DEV" "$T_TMP.large"
scr_loop=$(losetup --find --show "$T_TMP.large")
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$T_MSCR"
touch "$T_MSCR"/large_prepared
ls "$T_MSCR"
scoutfs resize-devices -p "$T_MSCR" -d $large_sz
large_tot=$(scoutfs statfs -s total_data_blocks -p "$T_MSCR")
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 "$scr_loop" "$SCR"
touch "$SCR"/large_prepared
ls "$SCR"
scoutfs resize-devices -p "$SCR" -d $large_sz
large_tot=$(scoutfs statfs -s total_data_blocks -p "$SCR")
test "$large_tot" -gt "$equal_tot" ; echo "resized larger test rc: $?"
umount "$T_MSCR"
umount "$SCR"
losetup -d "$scr_loop"
echo "== cleanup"

View File

@@ -54,16 +54,21 @@ after=$(free_blocks Data "$T_M0")
test "$before" == "$after" || \
t_fail "$after free data blocks after rm, expected $before"
# XXX this is all pretty manual, would be nice to have helpers
echo "== make small meta fs"
# meta device just big enough for reserves and the metadata we'll fill
t_scratch_mkfs -A -m 10G
t_scratch_mount
scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
t_fail "mkfs failed"
SCR="$T_TMPDIR/mnt.scratch"
mkdir -p "$SCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
echo "== create large xattrs until we fill up metadata"
mkdir -p "$T_MSCR/xattrs"
mkdir -p "$SCR/xattrs"
for f in $(seq 1 100000); do
file="$T_MSCR/xattrs/file-$f"
file="$SCR/xattrs/file-$f"
touch "$file"
LC_ALL=C create_xattr_loop -c 1000 -n user.scoutfs-enospc -p "$file" -s 65535 > $T_TMP.cxl 2>&1
@@ -79,10 +84,10 @@ for f in $(seq 1 100000); do
done
echo "== remove files with xattrs after enospc"
rm -rf "$T_MSCR/xattrs"
rm -rf "$SCR/xattrs"
echo "== make sure we can create again"
file="$T_MSCR/file-after"
file="$SCR/file-after"
C=120
while (( C-- )); do
touch $file 2> /dev/null && break
@@ -94,6 +99,7 @@ sync
rm -f "$file"
echo "== cleanup small meta fs"
t_scratch_umount
umount "$SCR"
rmdir "$SCR"
t_pass

View File

@@ -5,9 +5,6 @@
t_require_commands sleep touch grep sync scoutfs
t_require_mounts 2
# regularly see ~20/~30s
VERIFY_TIMEOUT_SECS=90
#
# Make sure that all mounts can read the results of a write from each
# mount.
@@ -43,10 +40,8 @@ verify_fenced_run()
for rid in $rids; do
grep -q ".* running rid '$rid'.* args 'ignored run args'" "$T_FENCED_LOG" || \
return 1
t_fail "fenced didn't execute RUN script for rid $rid"
done
return 0
}
echo "== make sure all mounts can see each other"
@@ -59,7 +54,14 @@ rid=$(t_mount_rid $cl)
echo "cl $cl sv $sv rid $rid" >> "$T_TMP.log"
sync
t_force_umount $cl
t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rid
# wait for client reconnection to timeout
while grep -q $rid $(t_debugfs_path $sv)/connections; do
sleep .5
done
while t_rid_is_fencing $rid; do
sleep .5
done
verify_fenced_run $rid
t_mount $cl
check_read_write
@@ -81,7 +83,15 @@ for cl in $(t_fs_nrs); do
t_force_umount $cl
done
t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rids
# wait for all client reconnections to timeout
while egrep -q "($pattern)" $(t_debugfs_path $sv)/connections; do
sleep .5
done
# wait for all fence requests to complete
while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do
sleep .5
done
verify_fenced_run $rids
# remount all the clients
for cl in $(t_fs_nrs); do
if [ $cl == $sv ]; then
@@ -97,7 +107,12 @@ rid=$(t_mount_rid $sv)
echo "sv $sv rid $rid" >> "$T_TMP.log"
sync
t_force_umount $sv
t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rid
t_wait_for_leader
# wait until new server is done fencing unmounted leader rid
while t_rid_is_fencing $rid; do
sleep .5
done
verify_fenced_run $rid
t_mount $sv
check_read_write
@@ -112,7 +127,11 @@ for nr in $(t_fs_nrs); do
t_force_umount $nr
done
t_mount_all
t_wait_until_timeout $VERIFY_TIMEOUT_SECS verify_fenced_run $rids
# wait for all fence requests to complete
while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do
sleep .5
done
verify_fenced_run $rids
check_read_write
t_pass

View File

@@ -72,7 +72,7 @@ touch $T_D0/dir/file
mkdir $T_D0/dir/dir
ln -s $T_D0/dir/file $T_D0/dir/symlink
mknod $T_D0/dir/char c 1 3 # null
mknod $T_D0/dir/block b 42 0 # SAMPLE block dev - nonexistant/demo use only number
mknod $T_D0/dir/block b 7 0 # loop0
for name in $(ls -UA $T_D0/dir | sort); do
ino=$(stat -c '%i' $T_D0/dir/$name)
$GRE $ino | filter_types

View File

@@ -53,40 +53,26 @@ exec {FD1}>&- # close
exec {FD2}>&- # close
check_ino_index "$ino" "$dseq" "$T_M0"
# Hurry along the orphan scanners. If any are currently asleep, we will
# have to wait at least their current scan interval before they wake up,
# run, and notice their new interval.
t_save_all_sysfs_mount_options orphan_scan_delay_ms
t_set_all_sysfs_mount_options orphan_scan_delay_ms 500
t_wait_for_orphan_scan_runs
echo "== remote unopened unlink deletes"
echo "contents" > "$T_D0/file"
ino=$(stat -c "%i" "$T_D0/file")
dseq=$(scoutfs stat -s data_seq "$T_D0/file")
rm -f "$T_D1/file"
# cross-mount deletion falls back to the orphan scanner when the
# creating mount still has the inode cached, wait for it to complete
t_force_log_merge
# wait for orphan scanners to pick up the unlinked inode and become idle
t_wait_for_no_orphans
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"
echo "== unlink wait for open on other mount"
echo "contents" > "$T_D0/badfile"
ino=$(stat -c "%i" "$T_D0/badfile")
dseq=$(scoutfs stat -s data_seq "$T_D0/badfile")
exec {FD}<"$T_D0/badfile"
rm -f "$T_D1/badfile"
echo "contents" > "$T_D0/file"
ino=$(stat -c "%i" "$T_D0/file")
dseq=$(scoutfs stat -s data_seq "$T_D0/file")
exec {FD}<"$T_D0/file"
rm -f "$T_D1/file"
echo "mount 0 contents after mount 1 rm: $(cat <&$FD)"
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"
exec {FD}>&- # close
# we know that revalidating will unhash the remote dentry
stat "$T_D0/badfile" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
t_force_log_merge
t_wait_for_no_orphans
stat "$T_D0/file" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"
@@ -97,20 +83,16 @@ rm -f "$T_D0/dir"/files-*
rmdir "$T_D0/dir"
echo "== open files survive remote scanning orphans"
echo "contents" > "$T_D0/lastfile"
ino=$(stat -c "%i" "$T_D0/lastfile")
dseq=$(scoutfs stat -s data_seq "$T_D0/lastfile")
exec {FD}<"$T_D0/lastfile"
rm -f "$T_D0/lastfile"
echo "contents" > "$T_D0/file"
ino=$(stat -c "%i" "$T_D0/file")
dseq=$(scoutfs stat -s data_seq "$T_D0/file")
exec {FD}<"$T_D0/file"
rm -f "$T_D0/file"
t_umount 1
t_mount 1
echo "mount 0 contents after mount 1 remounted: $(cat <&$FD)"
exec {FD}>&- # close
t_force_log_merge
t_wait_for_no_orphans
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"
t_restore_all_sysfs_mount_options orphan_scan_delay_ms
t_pass

View File

@@ -1,52 +0,0 @@
#
# Test that orphaned log_trees entries from unmounted rids are
# finalized and merged.
#
# An orphan log_trees entry is one whose rid has no mounted_clients
# entry. This can happen from incomplete reclaim across server
# failovers. We simulate it with the reclaim_skip_finalize trigger
# which makes reclaim_open_log_tree skip the finalization step.
#
t_require_commands touch scoutfs
t_require_mounts 2
TIMEOUT=90
echo "== create orphan log_trees entry via trigger"
sv=$(t_server_nr)
cl=$(t_first_client_nr)
rid=$(t_mount_rid $cl)
touch "$T_D0/file" "$T_D1/file"
sync
# arm the trigger so reclaim skips finalization
t_trigger_arm_silent reclaim_skip_finalize $sv
# force unmount the client, server will fence and reclaim it
# but the trigger makes reclaim leave log_trees unfinalized
t_force_umount $cl
# wait for fencing to run
verify_fenced() {
grep -q "running rid '$rid'" "$T_FENCED_LOG" 2>/dev/null
}
t_wait_until_timeout $TIMEOUT verify_fenced
# give the server time to complete reclaim after fence
sleep 5
# remount the client so t_force_log_merge can sync all mounts.
# the client gets a new rid; the old rid's log_trees is the orphan.
t_mount $cl
echo "== verify orphan is reclaimed and merge completes"
t_force_log_merge
echo "== verify orphan reclaim was logged"
if ! dmesg | grep -q "reclaiming orphan log trees for rid $rid"; then
t_fail "expected orphan reclaim message for rid $rid in dmesg"
fi
t_pass

View File

@@ -1,152 +0,0 @@
t_require_commands scoutfs dd fallocate
FILE="$T_D0/file"
DIR="$T_D0/dir"
echo "== missing options should fail =="
rm -rf $DIR && mkdir -p $DIR
scoutfs punch-offline $DIR -l 4096 -V 0
scoutfs punch-offline $DIR -o 0 -V 0
scoutfs punch-offline $DIR -o 0 -l 4096
echo "== can't hole punch dir or special =="
rm -rf $DIR && mkdir -p $DIR
scoutfs punch-offline $DIR -o 0 -l 4096 -V 0
echo "== punching an empty file does nothing =="
rm -f $FILE && touch $FILE
scoutfs punch-offline $FILE -o 0 -l 4096 -V 0
echo "== punch outside of i_size does nothing =="
dd if=/dev/zero of=$FILE bs=4096 count=1 status=none
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 1
echo "== can't hole punch online extent =="
scoutfs get-fiemap -Lb $FILE
scoutfs punch-offline $FILE -o 0 -l 4096 -V 1
scoutfs get-fiemap -Lb $FILE
echo "== can't hole punch unwritten extent =="
rm -rf $FILE && touch $FILE
fallocate -l $((4096 * 3)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs get-fiemap -Lb $FILE
scoutfs punch-offline $FILE -o 4096 -l 4096 -V $vers
scoutfs get-fiemap -Lb $FILE
echo "== hole punch offline extent =="
rm -rf $FILE && touch $FILE
fallocate -l $((4096 * 3)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -Lb $FILE
scoutfs punch-offline $FILE -o 4096 -l 4096 -V $vers
scoutfs get-fiemap -Lb $FILE
echo "== can't hole punch non-aligned bsz offset or len =="
rm -rf $FILE && touch $FILE
fallocate -l $((4096 * 3)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -Lb $FILE
scoutfs punch-offline $FILE -o 4095 -l 4096 -V $vers
scoutfs punch-offline $FILE -o 1 -l 4096 -V $vers
scoutfs punch-offline $FILE -o 4096 -l 409700 -V $vers
scoutfs punch-offline $FILE -o 4096 -l 4097 -V $vers
scoutfs punch-offline $FILE -o 4096 -l 4095 -V $vers
scoutfs punch-offline $FILE -o 4096 -l 1 -V $vers
scoutfs punch-offline $FILE -o 4096 -l 0 -V $vers
scoutfs get-fiemap -Lb $FILE
echo "== can't hole punch mismatched data_version =="
rm -rf $FILE && touch $FILE
fallocate -l $((4096 * 3)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -Lb $FILE
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 0
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 2
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 9999
scoutfs get-fiemap -Lb $FILE
echo "== Punch hole crossing multiple extents =="
rm -rf $FILE && touch $FILE
fallocate -l $((7 * 4096)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -L $FILE
scoutfs punch-offline $FILE -o $((1 * 4096)) -l 4096 -V $vers
scoutfs punch-offline $FILE -o $((3 * 4096)) -l 4096 -V $vers
scoutfs punch-offline $FILE -o $((5 * 4096)) -l 4096 -V $vers
# 0.1.2.3
scoutfs get-fiemap -L $FILE
scoutfs punch-offline $FILE -o $((2 * 4096)) -l $((3 * 4096)) -V $vers
# 0.....1
scoutfs get-fiemap -L $FILE
echo "== punch hole starting at a hole =="
rm -rf $FILE && touch $FILE
fallocate -l $((7 * 4096)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -L $FILE
scoutfs punch-offline $FILE -o $((1 * 4096)) -l 4096 -V $vers
scoutfs punch-offline $FILE -o $((3 * 4096)) -l 4096 -V $vers
scoutfs punch-offline $FILE -o $((5 * 4096)) -l 4096 -V $vers
# 0.1.2.3
scoutfs get-fiemap -L $FILE
scoutfs punch-offline $FILE -o $((1 * 4096)) -l $((5 * 4096)) -V $vers
# 0.....1
scoutfs get-fiemap -L $FILE
echo "== large punch =="
rm -rf $FILE && touch $FILE
fallocate -l $((6 * 1024 * 1024 * 1024)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -L $FILE
scoutfs punch-offline $FILE -o $((134123 * 4096)) -l $((68343 * 4096)) -V $vers
scoutfs punch-offline $FILE -o $((467273 * 4096)) -l $((68343 * 4096)) -V $vers
scoutfs punch-offline $FILE -o $((734623 * 4096)) -l $((68343 * 4096)) -V $vers
scoutfs get-fiemap -L $FILE
echo "== overlapping punches with lots of extents =="
rm -rf $FILE && touch $FILE
fallocate -l $((4096 * 1024)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version 1
scoutfs get-fiemap -Lb $FILE
# punch odd ones away
for h in $(seq 1 2 1023); do
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
done
scoutfs get-fiemap -Lb $FILE | tail -n 1
# punch a large hole from 32 to 55, removing 7 extents
scoutfs punch-offline $FILE -o $((32 * 4096)) -l $((13 * 4096)) -V $vers
scoutfs get-fiemap -Lb $FILE | tail -n 1
# punch every 8th @6
for h in $(seq 6 8 1024); do
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
done
# again @4
scoutfs get-fiemap -Lb $FILE | tail -n 1
for h in $(seq 4 8 1024); do
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
done
scoutfs get-fiemap -Lb $FILE | tail -n 1
# punching a large hole from 127 to 175, removing 12 extents
scoutfs punch-offline $FILE -o $((127 * 4096)) -l $((48 * 4096)) -V $vers
scoutfs get-fiemap -Lb $FILE
# again @2
for h in $(seq 2 8 1024); do
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
done
scoutfs get-fiemap -L $FILE
# and again @0, punching away everything remaining extent
for h in $(seq 0 8 1024); do
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
done
scoutfs get-fiemap -Lb $FILE
t_pass

View File

@@ -62,7 +62,7 @@ test_timeout()
sleep 1
# tear down the current server/leader
t_force_umount $sv &
t_force_umount $sv
# see how long it takes for the next leader to start
start=$(time_ms)
@@ -73,7 +73,6 @@ test_timeout()
echo "to $to delay $delay" >> $T_TMP.delay
# restore the mount that we tore down
wait
t_mount $sv
# make sure the new leader delay was reasonable, allowing for some slack

View File

@@ -8,19 +8,19 @@ t_require_mounts 2
echo "=== renameat2 noreplace flag test"
# give each mount their own dir (lock group) to minimize create contention
mkdir $T_D0/dir0
mkdir $T_D1/dir1
mkdir $T_M0/dir0
mkdir $T_M1/dir1
echo "=== run two asynchronous calls to renameat2 NOREPLACE"
for i in $(seq 0 100); do
# prepare inputs in isolation
touch "$T_D0/dir0/old0"
touch "$T_D1/dir1/old1"
touch "$T_M0/dir0/old0"
touch "$T_M1/dir1/old1"
# race doing noreplace renames, both can't succeed
dumb_renameat2 -n "$T_D0/dir0/old0" "$T_D0/dir0/sharednew" 2> /dev/null &
dumb_renameat2 -n "$T_M0/dir0/old0" "$T_M0/dir0/sharednew" 2> /dev/null &
pid0=$!
dumb_renameat2 -n "$T_D1/dir1/old1" "$T_D1/dir0/sharednew" 2> /dev/null &
dumb_renameat2 -n "$T_M1/dir1/old1" "$T_M1/dir0/sharednew" 2> /dev/null &
pid1=$!
wait $pid0
@@ -31,7 +31,7 @@ for i in $(seq 0 100); do
test "$rc0" == 0 -a "$rc1" == 0 && t_fail "both renames succeeded"
# blow away possible files for either race outcome
rm -f "$T_D0/dir0/old0" "$T_D1/dir1/old1" "$T_D0/dir0/sharednew" "$T_D1/dir1/sharednew"
rm -f "$T_M0/dir0/old0" "$T_M1/dir1/old1" "$T_M0/dir0/sharednew" "$T_M1/dir1/sharednew"
done
t_pass

View File

@@ -19,8 +19,8 @@ df_free() {
}
same_totals() {
cur_meta_tot=$(statfs_total meta "$T_MSCR")
cur_data_tot=$(statfs_total data "$T_MSCR")
cur_meta_tot=$(statfs_total meta "$SCR")
cur_data_tot=$(statfs_total data "$SCR")
test "$cur_meta_tot" == "$exp_meta_tot" || \
t_fail "cur total_meta_blocks $cur_meta_tot != expected $exp_meta_tot"
@@ -34,10 +34,10 @@ same_totals() {
# some slop to account for reserved blocks and concurrent allocation.
#
devices_grew() {
cur_meta_tot=$(statfs_total meta "$T_MSCR")
cur_data_tot=$(statfs_total data "$T_MSCR")
cur_meta_df=$(df_free MetaData "$T_MSCR")
cur_data_df=$(df_free Data "$T_MSCR")
cur_meta_tot=$(statfs_total meta "$SCR")
cur_data_tot=$(statfs_total data "$SCR")
cur_meta_df=$(df_free MetaData "$SCR")
cur_data_df=$(df_free Data "$SCR")
local grow_meta_tot=$(echo "$exp_meta_tot * 2" | bc)
local grow_data_tot=$(echo "$exp_data_tot * 2" | bc)
@@ -70,13 +70,19 @@ size_data=$(blockdev --getsize64 "$T_EX_DATA_DEV")
quarter_meta=$(echo "$size_meta / 4" | bc)
quarter_data=$(echo "$size_data / 4" | bc)
# XXX this is all pretty manual, would be nice to have helpers
echo "== make initial small fs"
t_scratch_mkfs -A -m $quarter_meta -d $quarter_data
t_scratch_mount
scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m $quarter_meta -d $quarter_data \
"$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
t_fail "mkfs failed"
SCR="$T_TMPDIR/mnt.scratch"
mkdir -p "$SCR"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
# then calculate sizes based on blocks that mkfs used
quarter_meta=$(echo "$(statfs_total meta "$T_MSCR") * 64 * 1024" | bc)
quarter_data=$(echo "$(statfs_total data "$T_MSCR") * 4 * 1024" | bc)
quarter_meta=$(echo "$(statfs_total meta "$SCR") * 64 * 1024" | bc)
quarter_data=$(echo "$(statfs_total data "$SCR") * 4 * 1024" | bc)
whole_meta=$(echo "$quarter_meta * 4" | bc)
whole_data=$(echo "$quarter_data * 4" | bc)
outsize_meta=$(echo "$whole_meta * 2" | bc)
@@ -87,58 +93,59 @@ shrink_meta=$(echo "$quarter_meta / 2" | bc)
shrink_data=$(echo "$quarter_data / 2" | bc)
# and save expected values for checks
exp_meta_tot=$(statfs_total meta "$T_MSCR")
exp_meta_df=$(df_free MetaData "$T_MSCR")
exp_data_tot=$(statfs_total data "$T_MSCR")
exp_data_df=$(df_free Data "$T_MSCR")
exp_meta_tot=$(statfs_total meta "$SCR")
exp_meta_df=$(df_free MetaData "$SCR")
exp_data_tot=$(statfs_total data "$SCR")
exp_data_df=$(df_free Data "$SCR")
echo "== 0s do nothing"
scoutfs resize-devices -p "$T_MSCR"
scoutfs resize-devices -p "$T_MSCR" -m 0
scoutfs resize-devices -p "$T_MSCR" -d 0
scoutfs resize-devices -p "$T_MSCR" -m 0 -d 0
scoutfs resize-devices -p "$SCR"
scoutfs resize-devices -p "$SCR" -m 0
scoutfs resize-devices -p "$SCR" -d 0
scoutfs resize-devices -p "$SCR" -m 0 -d 0
echo "== shrinking fails"
scoutfs resize-devices -p "$T_MSCR" -m $shrink_meta
scoutfs resize-devices -p "$T_MSCR" -d $shrink_data
scoutfs resize-devices -p "$T_MSCR" -m $shrink_meta -d $shrink_data
scoutfs resize-devices -p "$SCR" -m $shrink_meta
scoutfs resize-devices -p "$SCR" -d $shrink_data
scoutfs resize-devices -p "$SCR" -m $shrink_meta -d $shrink_data
same_totals
echo "== existing sizes do nothing"
scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta
scoutfs resize-devices -p "$T_MSCR" -d $quarter_data
scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta -d $quarter_data
scoutfs resize-devices -p "$SCR" -m $quarter_meta
scoutfs resize-devices -p "$SCR" -d $quarter_data
scoutfs resize-devices -p "$SCR" -m $quarter_meta -d $quarter_data
same_totals
echo "== growing outside device fails"
scoutfs resize-devices -p "$T_MSCR" -m $outsize_meta
scoutfs resize-devices -p "$T_MSCR" -d $outsize_data
scoutfs resize-devices -p "$T_MSCR" -m $outsize_meta -d $outsize_data
scoutfs resize-devices -p "$SCR" -m $outsize_meta
scoutfs resize-devices -p "$SCR" -d $outsize_data
scoutfs resize-devices -p "$SCR" -m $outsize_meta -d $outsize_data
same_totals
echo "== resizing meta works"
scoutfs resize-devices -p "$T_MSCR" -m $half_meta
scoutfs resize-devices -p "$SCR" -m $half_meta
devices_grew meta
echo "== resizing data works"
scoutfs resize-devices -p "$T_MSCR" -d $half_data
scoutfs resize-devices -p "$SCR" -d $half_data
devices_grew data
echo "== shrinking back fails"
scoutfs resize-devices -p "$T_MSCR" -m $quarter_meta
scoutfs resize-devices -p "$T_MSCR" -m $quarter_data
scoutfs resize-devices -p "$SCR" -m $quarter_meta
scoutfs resize-devices -p "$SCR" -m $quarter_data
same_totals
echo "== resizing again does nothing"
scoutfs resize-devices -p "$T_MSCR" -m $half_meta
scoutfs resize-devices -p "$T_MSCR" -m $half_data
scoutfs resize-devices -p "$SCR" -m $half_meta
scoutfs resize-devices -p "$SCR" -m $half_data
same_totals
echo "== resizing to full works"
scoutfs resize-devices -p "$T_MSCR" -m $whole_meta -d $whole_data
scoutfs resize-devices -p "$SCR" -m $whole_meta -d $whole_data
devices_grew meta data
echo "== cleanup extra fs"
t_scratch_umount
umount "$SCR"
rmdir "$SCR"
t_pass

View File

@@ -32,7 +32,7 @@ echo "== dirs shouldn't appear in data_seq queries"
mkdir "$DIR"
ino=$(stat -c "%i" "$DIR")
t_sync_seq_index
query_index data_seq | awk '($4 == "'$ino'")'
query_index data_seq | grep "$ino\>"
echo "== two created files are present and come after each other"
touch "$DIR/first"
@@ -92,13 +92,13 @@ test "$before" -lt "$after" || \
# didn't skip past deleted dirty items
#
echo "== make sure dirtying doesn't livelock walk"
dd if=/dev/urandom of="$DIR/dirtying" bs=4K count=1 >> "$T_TMPDIR/seqres.full" 2>&1
dd if=/dev/urandom of="$DIR/dirtying" bs=4K count=1 >> $seqres.full 2>&1
nr=1
while [ "$nr" -lt 100 ]; do
echo "dirty/walk attempt $nr" >> "$T_TMPDIR/seqres.full"
echo "dirty/walk attempt $nr" >> $seqres.full
sync
dd if=/dev/urandom of="$DIR/dirtying" bs=4K count=1 conv=notrunc \
>> "$T_TMPDIR/seqres.full" 2>&1
>> $seqres.full 2>&1
scoutfs walk-inodes data_seq 0 -1 $DIR/dirtying >& /dev/null
((nr++))
done

View File

@@ -12,12 +12,12 @@ create_file() {
if [ "$blocks" != 0 ]; then
dd if=/dev/urandom bs=4096 count=$blocks of="$file" \
>> "$T_TMPDIR/seqres.full" 2>&1
>> $seqres.full 2>&1
fi
if [ "$remainder" != 0 ]; then
dd if=/dev/urandom bs="$remainder" count=1 of="$file" \
conv=notrunc oflag=append >> "$T_TMPDIR/seqres.full" 2>&1
conv=notrunc oflag=append >> $seqres.full 2>&1
fi
}
@@ -78,7 +78,7 @@ create_file "$FILE" $((4096 * 1024))
cp "$FILE" "$T_TMP"
nr=1
while [ "$nr" -lt 10 ]; do
echo "attempt $nr" >> "$T_TMPDIR/$seqres.full" 2>&1
echo "attempt $nr" >> $seqres.full 2>&1
release_vers "$FILE" stat 0 4096K
sync
echo 3 > /proc/sys/vm/drop_caches

View File

@@ -7,7 +7,7 @@ message_output()
error_message()
{
message_output "$@" >&2
message_output "$@" >> /dev/stderr
}
error_exit()

View File

@@ -63,22 +63,6 @@ mounts because there are more locks that cover the same number of
created files. This can be helpful when working with smaller numbers of
large files.
.TP
.B lock_idle_count=<number>
This option sets the number of locks that the client will allow to
remain idle after being granted. If the number of locks exceeds this
count then the client will try to free the oldest locks. This setting
is per-mount and only changes the behavior of that mount.
.sp
Idle locks are not reclaimed by memory pressure so this option
determines the limit of how much memory is likely to be pinned by
allocated idle locks. Setting this too low can increase latency of
operations as repeated use of a working set of locks has to request the
locks from the network rather than using granted idle locks.
.sp
The count is not strictly enforced. Operations are allowed to use locks
while over the limit to avoid deadlocks under heavy concurrent load.
Exceeding the count only attempts freeing of idle locks.
.TP
.B log_merge_wait_timeout_ms=<number>
This option sets the amount of time, in milliseconds, that log merge
creation can wait before timing out. This setting is per-mount, only

View File

@@ -402,25 +402,45 @@ before destroying an old empty data device.
.PD
.TP
.BI "print {-S|--skip-likely-huge} META-DEVICE"
.BI "print {-a|--allocs} {-i|--items ITEMS} {-r|--roots ROOTS} {-S|--skip-likely-huge} {-V|--xattr-values} META-DEVICE"
.sp
Prints out all of the metadata in the file system. This makes no effort
Prints out some or all of the metadata in the file system. This makes no effort
to ensure that the structures are consistent as they're traversed and
can present structures that seem corrupt as they change as they're
output.
.sp
Structures that are related to the number of mounts and are maintained at a
relatively reasonable size are always printed. These include per-mount log
trees, srch files, allocators, and the metadata allocators used by server
commits. Other btrees and their items can be selected as desired.
.RS 1.0i
.PD 0
.TP
.sp
.TP
.B "-a, --allocs"
Print the metadata and data allocators. Enabled by default.
.TP
.B "-r, --roots ROOTS"
This option can be used to select which btrees are traversed. It is a comma-separated list containing one or more of the following btree roots: logs, srch, fs. Default is all roots.
.TP
.B "-i, --items ITEMS"
This option can be used to choose which btree items are printed from the
selected btree roots. It is a comma-separated list containing one or
more of the following items: inode, xattr, dirent, symlink, backref, extent,
totl, indx, inoindex, orphan, quota.
Default is all items.
.TP
.B "-V, --xattr-values"
Print xattr values alongside the xattr item. Non-printable bytes are
rendered as '.'. A trailing '...' indicates the value continues in
additional item parts that aren't shown.
.TP
.B "-S, --skip-likely-huge"
Skip printing structures that are likely to be very large. The
structures that are skipped tend to be global and whose size tends to be
related to the size of the volume. Examples of skipped structures include
the global fs items, srch files, and metadata and data
allocators. Similar structures that are not skipped are related to the
number of mounts and are maintained at a relatively reasonable size.
These include per-mount log trees, srch files, allocators, and the
metadata allocators used by server commits.
allocators.
.sp
Skipping the larger structures limits the print output to a relatively
constant size rather than being a large multiple of the used metadata

View File

@@ -29,6 +29,54 @@
#include "leaf_item_hash.h"
#include "dev.h"
struct print_args {
char *meta_device;
bool skip_likely_huge;
bool roots_requested;
bool items_requested;
bool allocs_requested;
bool walk_allocs;
bool walk_logs_root;
bool walk_fs_root;
bool walk_srch_root;
bool print_inodes;
bool print_xattrs;
bool print_dirents;
bool print_symlinks;
bool print_backrefs;
bool print_extents;
bool print_totl;
bool print_indx;
bool print_inode_index;
bool print_orphan;
bool print_quota;
bool print_xattr_values;
};
static struct print_args print_args = {
.meta_device = NULL,
.skip_likely_huge = false,
.roots_requested = false,
.items_requested = false,
.allocs_requested = false,
.walk_allocs = true,
.walk_logs_root = true,
.walk_fs_root = true,
.walk_srch_root = true,
.print_inodes = true,
.print_xattrs = true,
.print_dirents = true,
.print_symlinks = true,
.print_backrefs = true,
.print_extents = true,
.print_totl = true,
.print_indx = true,
.print_inode_index = true,
.print_orphan = true,
.print_quota = true,
.print_xattr_values = false
};
static void print_block_header(struct scoutfs_block_header *hdr, int size)
{
u32 crc = crc_block(hdr, size);
@@ -135,15 +183,42 @@ static u8 *global_printable_name(u8 *name, int name_len)
static void print_xattr(struct scoutfs_key *key, void *val, int val_len)
{
struct scoutfs_xattr *xat = val;
unsigned int full_val_len;
int avail;
int show;
int i;
printf(" xattr: ino %llu name_hash %08x id %llu part %u\n",
le64_to_cpu(key->skx_ino), (u32)le64_to_cpu(key->skx_name_hash),
le64_to_cpu(key->skx_id), key->skx_part);
if (key->skx_part == 0)
printf(" name_len %u val_len %u name %s\n",
xat->name_len, le16_to_cpu(xat->val_len),
global_printable_name(xat->name, xat->name_len));
if (key->skx_part != 0)
return;
full_val_len = le16_to_cpu(xat->val_len);
printf(" name_len %u val_len %u name %s",
xat->name_len, full_val_len,
global_printable_name(xat->name, xat->name_len));
if (!print_args.print_xattr_values) {
putchar('\n');
return;
}
avail = val_len - (int)sizeof(*xat) - xat->name_len;
if (avail < 0)
avail = 0;
show = avail < (int)full_val_len ? avail : (int)full_val_len;
printf(" value ");
for (i = 0; i < show; i++) {
u8 c = xat->name[xat->name_len + i];
putchar(isprint(c) ? c : '.');
}
if (show < (int)full_val_len)
printf("...");
putchar('\n');
}
static void print_dirent(struct scoutfs_key *key, void *val, int val_len)
@@ -195,36 +270,72 @@ static void print_inode_index(struct scoutfs_key *key, void *val, int val_len)
typedef void (*print_func_t)(struct scoutfs_key *key, void *val, int val_len);
static print_func_t find_printer(u8 zone, u8 type)
static print_func_t find_printer(u8 zone, u8 type, bool *suppress)
{
if (zone == SCOUTFS_INODE_INDEX_ZONE &&
type >= SCOUTFS_INODE_INDEX_META_SEQ_TYPE &&
type <= SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE)
type <= SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE) {
if (!print_args.print_inode_index)
*suppress = true;
return print_inode_index;
if (zone == SCOUTFS_ORPHAN_ZONE) {
if (type == SCOUTFS_ORPHAN_TYPE)
return print_orphan;
}
if (zone == SCOUTFS_QUOTA_ZONE)
if (zone == SCOUTFS_ORPHAN_ZONE) {
if (type == SCOUTFS_ORPHAN_TYPE) {
if (!print_args.print_orphan)
*suppress = true;
return print_orphan;
}
}
if (zone == SCOUTFS_QUOTA_ZONE) {
if (!print_args.print_quota)
*suppress = true;
return print_quota;
}
if (zone == SCOUTFS_XATTR_TOTL_ZONE)
if (zone == SCOUTFS_XATTR_TOTL_ZONE) {
if (!print_args.print_totl)
*suppress = true;
return print_xattr_totl;
}
if (zone == SCOUTFS_XATTR_INDX_ZONE)
if (zone == SCOUTFS_XATTR_INDX_ZONE) {
if (!print_args.print_indx)
*suppress = true;
return print_xattr_indx;
}
if (zone == SCOUTFS_FS_ZONE) {
switch(type) {
case SCOUTFS_INODE_TYPE: return print_inode;
case SCOUTFS_XATTR_TYPE: return print_xattr;
case SCOUTFS_DIRENT_TYPE: return print_dirent;
case SCOUTFS_READDIR_TYPE: return print_dirent;
case SCOUTFS_SYMLINK_TYPE: return print_symlink;
case SCOUTFS_LINK_BACKREF_TYPE: return print_dirent;
case SCOUTFS_DATA_EXTENT_TYPE: return print_data_extent;
case SCOUTFS_INODE_TYPE:
if (!print_args.print_inodes)
*suppress = true;
return print_inode;
case SCOUTFS_XATTR_TYPE:
if (!print_args.print_xattrs)
*suppress = true;
return print_xattr;
case SCOUTFS_DIRENT_TYPE:
if (!print_args.print_dirents)
*suppress = true;
return print_dirent;
case SCOUTFS_READDIR_TYPE:
if (!print_args.print_dirents)
*suppress = true;
return print_dirent;
case SCOUTFS_SYMLINK_TYPE:
if (!print_args.print_symlinks)
*suppress = true;
return print_symlink;
case SCOUTFS_LINK_BACKREF_TYPE:
if (!print_args.print_backrefs)
*suppress = true;
return print_dirent;
case SCOUTFS_DATA_EXTENT_TYPE:
if (!print_args.print_extents)
*suppress = true;
return print_data_extent;
}
}
@@ -244,12 +355,16 @@ static int print_fs_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
/* only items in leaf blocks have values */
if (val != NULL && !(flags & SCOUTFS_ITEM_FLAG_DELETION)) {
printer = find_printer(key->sk_zone, key->sk_type);
if (printer)
printer(key, val, val_len);
else
bool suppress = false;
printer = find_printer(key->sk_zone, key->sk_type, &suppress);
if (printer) {
if (!suppress)
printer(key, val, val_len);
} else {
printf(" (unknown zone %u type %u)\n",
key->sk_zone, key->sk_type);
}
}
return 0;
@@ -1037,12 +1152,7 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
}
}
struct print_args {
char *meta_device;
bool skip_likely_huge;
};
static int print_volume(int fd, struct print_args *args)
static int print_volume(int fd)
{
struct scoutfs_super_block *super = NULL;
struct print_recursion_args pa;
@@ -1092,7 +1202,7 @@ static int print_volume(int fd, struct print_args *args)
ret = err;
}
if (!args->skip_likely_huge) {
if (print_args.walk_allocs) {
for (i = 0; i < array_size(super->meta_alloc); i++) {
snprintf(str, sizeof(str), "meta_alloc[%u]", i);
err = print_btree(fd, super, str, &super->meta_alloc[i].root,
@@ -1119,18 +1229,21 @@ static int print_volume(int fd, struct print_args *args)
pa.super = super;
pa.fd = fd;
if (!args->skip_likely_huge) {
if (print_args.walk_srch_root) {
err = print_btree_leaf_items(fd, super, &super->srch_root.ref,
print_srch_root_files, &pa);
if (err && !ret)
ret = err;
}
err = print_btree_leaf_items(fd, super, &super->logs_root.ref,
print_log_trees_roots, &pa);
if (err && !ret)
ret = err;
if (!args->skip_likely_huge) {
if (print_args.walk_logs_root) {
err = print_btree_leaf_items(fd, super, &super->logs_root.ref,
print_log_trees_roots, &pa);
if (err && !ret)
ret = err;
}
if (print_args.walk_fs_root) {
err = print_btree(fd, super, "fs_root", &super->fs_root,
print_fs_item, NULL);
if (err && !ret)
@@ -1143,16 +1256,16 @@ out:
return ret;
}
static int do_print(struct print_args *args)
static int do_print(void)
{
int ret;
int fd;
fd = open(args->meta_device, O_RDONLY);
fd = open(print_args.meta_device, O_RDONLY);
if (fd < 0) {
ret = -errno;
fprintf(stderr, "failed to open '%s': %s (%d)\n",
args->meta_device, strerror(errno), errno);
print_args.meta_device, strerror(errno), errno);
return ret;
}
@@ -1160,30 +1273,203 @@ static int do_print(struct print_args *args)
if (ret < 0)
goto out;
ret = print_volume(fd, args);
ret = print_volume(fd);
out:
close(fd);
return ret;
};
enum {
LOGS_OPT = 0,
FS_OPT,
SRCH_OPT
};
static char *const root_tokens[] = {
[LOGS_OPT] = "logs",
[FS_OPT] = "fs",
[SRCH_OPT] = "srch",
NULL
};
enum {
INODE_OPT = 0,
XATTR_OPT,
DIRENT_OPT,
SYMLINK_OPT,
BACKREF_OPT,
EXTENT_OPT,
TOTL_OPT,
INDX_OPT,
INOINDEX_OPT,
ORPHAN_OPT,
QUOTA_OPT
};
static char *const item_tokens[] = {
[INODE_OPT] = "inode",
[XATTR_OPT] = "xattr",
[DIRENT_OPT] = "dirent",
[SYMLINK_OPT] = "symlink",
[BACKREF_OPT] = "backref",
[EXTENT_OPT] = "extent",
[TOTL_OPT] = "totl",
[INDX_OPT] = "indx",
[INOINDEX_OPT] = "inoindex",
[ORPHAN_OPT] = "orphan",
[QUOTA_OPT] = "quota",
NULL
};
static void clear_items(void)
{
print_args.print_inodes = false;
print_args.print_xattrs = false;
print_args.print_dirents = false;
print_args.print_symlinks = false;
print_args.print_backrefs = false;
print_args.print_extents = false;
print_args.print_totl = false;
print_args.print_indx = false;
print_args.print_inode_index = false;
print_args.print_orphan = false;
print_args.print_quota = false;
}
static void clear_roots(void)
{
print_args.walk_logs_root = false;
print_args.walk_fs_root = false;
print_args.walk_srch_root = false;
}
static int parse_opt(int key, char *arg, struct argp_state *state)
{
struct print_args *args = state->input;
char *subopts;
char *value;
bool parse_err = false;
switch (key) {
case 'S':
args->skip_likely_huge = true;
break;
case 'a':
args->allocs_requested = true;
args->walk_allocs = true;
break;
case 'V':
args->print_xattr_values = true;
break;
case 'i':
/* Specific items being requested- clear them all to start */
if (!args->items_requested) {
clear_items();
if (!args->allocs_requested)
args->walk_allocs = false;
args->items_requested = true;
}
subopts = arg;
while (*subopts != '\0' && !parse_err) {
switch (getsubopt(&subopts, item_tokens, &value)) {
case INODE_OPT:
args->print_inodes = true;
break;
case XATTR_OPT:
args->print_xattrs = true;
break;
case DIRENT_OPT:
args->print_dirents = true;
break;
case SYMLINK_OPT:
args->print_symlinks = true;
break;
case BACKREF_OPT:
args->print_backrefs = true;
break;
case EXTENT_OPT:
args->print_extents = true;
break;
case TOTL_OPT:
args->print_totl = true;
break;
case INDX_OPT:
args->print_indx = true;
break;
case INOINDEX_OPT:
args->print_inode_index = true;
break;
case ORPHAN_OPT:
args->print_orphan = true;
break;
case QUOTA_OPT:
args->print_quota = true;
break;
default:
argp_usage(state);
parse_err = true;
break;
}
}
break;
case 'r':
/* Specific roots being requested- clear them all to start */
if (!args->roots_requested) {
clear_roots();
if (!args->allocs_requested)
args->walk_allocs = false;
args->roots_requested = true;
}
subopts = arg;
while (*subopts != '\0' && !parse_err) {
switch (getsubopt(&subopts, root_tokens, &value)) {
case LOGS_OPT:
args->walk_logs_root = true;
break;
case FS_OPT:
args->walk_fs_root = true;
break;
case SRCH_OPT:
args->walk_srch_root = true;
break;
default:
argp_usage(state);
parse_err = true;
break;
}
}
break;
case ARGP_KEY_ARG:
if (!args->meta_device)
args->meta_device = strdup_or_error(state, arg);
else
argp_error(state, "more than one argument given");
break;
case ARGP_KEY_FINI:
if (!args->meta_device)
argp_error(state, "no metadata device argument given");
/*
* For backwards compatibility, translate -S. Should we warn if
* this conflicts with other explicit options?
*/
if (args->skip_likely_huge) {
if (!args->allocs_requested)
args->walk_allocs = false;
args->walk_fs_root = false;
args->walk_srch_root = false;
}
break;
default:
break;
}
@@ -1192,7 +1478,11 @@ static int parse_opt(int key, char *arg, struct argp_state *state)
}
static struct argp_option options[] = {
{ "skip-likely-huge", 'S', NULL, 0, "Skip large structures to minimize output size"},
{ "allocs", 'a', NULL, 0, "Print metadata and data alloc lists" },
{ "items", 'i', "ITEMS", 0, "Item(s) to print (inode, xattr, dirent, symlink, backref, extent, totl, indx, inoindex, orphan, quota)" },
{ "roots", 'r', "ROOTS", 0, "Tree root(s) to walk (logs, srch, fs)" },
{ "skip-likely-huge", 'S', NULL, 0, "Skip allocs, srch root and fs root to minimize output size" },
{ "xattr-values", 'V', NULL, 0, "Print xattr values (non-printable bytes rendered as '.')" },
{ NULL }
};
@@ -1205,17 +1495,15 @@ static struct argp argp = {
static int print_cmd(int argc, char **argv)
{
struct print_args print_args = {NULL};
int ret;
ret = argp_parse(&argp, argc, argv, 0, NULL, &print_args);
if (ret)
return ret;
return do_print(&print_args);
return do_print();
}
static void __attribute__((constructor)) print_ctor(void)
{
cmd_register_argp("print", &argp, GROUP_DEBUG, print_cmd);

View File

@@ -1,127 +0,0 @@
#include <sys/ioctl.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <argp.h>
#include "sparse.h"
#include "parse.h"
#include "util.h"
#include "ioctl.h"
#include "cmd.h"
struct po_args {
char *path;
u64 offset;
u64 length;
u64 data_version;
unsigned offset_set:1,
length_set:1,
data_version_set:1;
};
static int do_punch_offline(struct po_args *args)
{
struct scoutfs_ioctl_punch_offline ioctl_args;
int ret;
int fd;
fd = get_path(args->path, O_RDWR);
if (fd < 0)
return fd;
ioctl_args.offset = args->offset;
ioctl_args.len = args->length;
ioctl_args.data_version = args->data_version;
ioctl_args.flags = 0;
ret = ioctl(fd, SCOUTFS_IOC_PUNCH_OFFLINE, &ioctl_args);
if (ret < 0) {
ret = -errno;
fprintf(stderr, "punch_offline ioctl failed: %s (%d)\n",
strerror(errno), errno);
}
close(fd);
return ret;
}
static int parse_opt(int key, char *arg, struct argp_state *state)
{
struct po_args *args = state->input;
int ret = 0;
switch (key) {
case 'V':
ret = parse_u64(arg, &args->data_version);
if (ret)
return ret;
args->data_version_set = 1;
break;
case 'o': /* offset */
ret = parse_human(arg, &args->offset);
if (ret)
return ret;
args->offset_set = 1;
break;
case 'l': /* length */
ret = parse_human(arg, &args->length);
if (ret)
return ret;
args->length_set = 1;
break;
case ARGP_KEY_ARG:
if (!args->path)
args->path = strdup_or_error(state, arg);
else
argp_error(state, "unknown extra argument given");
break;
case ARGP_KEY_FINI:
if (!args->path)
argp_error(state, "must provide path to file");
if (!args->offset_set)
argp_error(state, "must provide offset");
if (!args->length_set)
argp_error(state, "must provide length");
if (!args->data_version_set)
argp_error(state, "must provide data_version");
break;
default:
break;
}
return 0;
}
static struct argp_option options[] = {
{ "data-version", 'V', "VERSION", 0, "Data version of the file [Required]"},
{ "offset", 'o', "OFFSET", 0, "Offset (bytes or KMGTP units) in file to stage [Required]"},
{ "length", 'l', "LENGTH", 0, "Length of range (bytes or KMGTP units) of file to stage. [Required]"},
{ NULL }
};
static struct argp argp = {
options,
parse_opt,
"PATH",
"Make a (sparse) hole in the file at offset and with length"
};
static int punch_offline_cmd(int argc, char **argv)
{
struct po_args po_args = {NULL};
int ret;
ret = argp_parse(&argp, argc, argv, 0, NULL, &po_args);
if (ret)
return ret;
return do_punch_offline(&po_args);
}
static void __attribute__((constructor)) punch_offline_ctor(void)
{
cmd_register_argp("punch-offline", &argp, GROUP_AGENT, punch_offline_cmd);
}