Compare commits

..

1 Commits

Author SHA1 Message Date
Auke Kok
d84ba603b6 Check for fenced old leader in mounted test.
The old mounted check only considered begin/end quorum data, and
not whether the old leader that is now disconnected was fenced by
a new quorum leader.

Since this is the guaranteed case if the leader is disconnected
forcefully, this check must account for this case, so that quorum slots
can be modified if the node is permanently removed or replaced.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-03-13 12:22:21 -07:00
43 changed files with 199 additions and 3436 deletions

View File

@@ -1,38 +1,6 @@
Versity ScoutFS Release Notes
=============================
---
v1.30
\
*Apr 21, 2026*
Fix a problem reading the accumulated totals of contributing .totl.
xattrs when log merging is in progress. The problem would have readers
of the totals calculate the sums incorrectly.
Fix a problem updating quota rules. There was a race where updates
could be corrupted if they happened while a transaction was being
written.
Fix a problem deleting files with .indx. xattrs. The internal indexing
metadata wouldn't be properly deleted so the files would still claim to
be present and visible in the index, though the file no longer existed.
---
v1.29
\
*Mar 25, 2026*
Add a repair mechanism for mount logs that weren't properly resolved as
mounts left the cluster. The presence of these logs prevents log
merging from making forward progress and the backlog of logs over time
can cause operations to slow to a crawl. With the repair mechanism in
place the orphaned logs don't stop merging and operations proceed as
usual.
Add an ioctl for turning offline unmapped file regions into sparse
regions.
---
v1.28
\

View File

@@ -13,7 +13,6 @@ scoutfs-y += \
avl.o \
alloc.o \
block.o \
bsearch_index.o \
btree.o \
client.o \
counters.o \
@@ -37,7 +36,6 @@ scoutfs-y += \
per_task.o \
quorum.o \
quota.o \
raw.o \
recov.o \
scoutfs_trace.o \
server.o \

View File

@@ -1,8 +1,6 @@
#ifndef _SCOUTFS_BLOCK_H_
#define _SCOUTFS_BLOCK_H_
struct scoutfs_alloc;
struct scoutfs_block_writer {
spinlock_t lock;
struct list_head dirty_list;

View File

@@ -1,59 +0,0 @@
/*
* Copyright (C) 2026 Versity Software, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/bsearch.h>
#include "bsearch_index.h"
struct bsearch_index_key {
int (*cmp)(const void *key, const void *elt);
/* the key has to be const, so we have to update the index through a pointer */
void **index_elt;
const void *key;
size_t size;
};
static int cmp_index(const void *key, const void *elt)
{
const struct bsearch_index_key *bik = key;
int cmp = bik->cmp(bik->key, elt);
if (cmp > 0)
*(bik->index_elt) = (void *)elt + bik->size;
else
*(bik->index_elt) = (void *)elt;
return cmp;
}
/*
* A bsearch() wrapper that returns the index of the element of the
* array that the key would be stored in to maintain sort order. It's
* the first element where the existing element is greater than the key.
* It returns the size of the array if the key is greater than the last
* element in the array.
*/
size_t bsearch_index(const void *key, const void *base, size_t num, size_t size,
int (*cmp)(const void *key, const void *elt))
{
void *index_elt = (void *)base;
struct bsearch_index_key bik = {
.cmp = cmp,
.index_elt = &index_elt,
.key = key,
.size = size,
};
bsearch(&bik, base, num, size, cmp_index);
return ((unsigned long)index_elt - (unsigned long)base) / size;
}

View File

@@ -1,7 +0,0 @@
#ifndef _SCOUTFS_BSEARCH_INDEX_H_
#define _SCOUTFS_BSEARCH_INDEX_H_
size_t bsearch_index(const void *key, const void *base, size_t num, size_t size,
int (*cmp)(const void *key, const void *elt));
#endif

View File

@@ -1816,11 +1816,6 @@ int scoutfs_btree_dirty(struct super_block *sb,
* Call the users callback on all the items in the leaf that we find.
* We also set the caller's keys for the first and last possible keys
* that could exist in the leaf block.
*
* The callback can set a new key to continue reading from rather than
* iterating over all the items. It modifies the key and returns
* -ESRCH, which performs a new avl search. If the modified key falls
* outside of the range of keys in the block then we return.
*/
int scoutfs_btree_read_items(struct super_block *sb,
struct scoutfs_btree_root *root,
@@ -1834,7 +1829,6 @@ int scoutfs_btree_read_items(struct super_block *sb,
struct scoutfs_avl_node *next_node;
struct scoutfs_avl_node *node;
struct btree_walk_key_range kr;
struct scoutfs_key cb_key;
struct scoutfs_block *bl;
int ret;
@@ -1848,32 +1842,22 @@ int scoutfs_btree_read_items(struct super_block *sb,
if (scoutfs_key_compare(&kr.end, end) < 0)
*end = kr.end;
cb_key = *start;
search:
node = scoutfs_avl_search(&bt->item_root, cmp_key_item, &cb_key, NULL,
node = scoutfs_avl_search(&bt->item_root, cmp_key_item, start, NULL,
NULL, &next_node, NULL) ?: next_node;
while (node) {
item = node_item(node);
if (scoutfs_key_compare(&item->key, end) > 0)
break;
cb_key = *item_key(item);
ret = cb(sb, &cb_key, le64_to_cpu(item->seq), item->flags,
ret = cb(sb, item_key(item), le64_to_cpu(item->seq), item->flags,
item_val(bt, item), item_val_len(item), arg);
if (ret < 0) {
if (ret == -ESRCH) {
if (scoutfs_key_compare(&cb_key, start) >= 0)
goto search;
ret = 0;
}
goto out;
}
if (ret < 0)
break;
node = scoutfs_avl_next(&bt->item_root, node);
}
scoutfs_block_put(sb, bl);
ret = 0;
out:
return ret;
}
@@ -2199,8 +2183,6 @@ static int merge_read_item(struct super_block *sb, struct scoutfs_key *key, u64
if (ret > 0) {
if (ret == SCOUTFS_DELTA_COMBINED) {
scoutfs_inc_counter(sb, btree_merge_delta_combined);
if (seq > found->seq)
found->seq = seq;
} else if (ret == SCOUTFS_DELTA_COMBINED_NULL) {
scoutfs_inc_counter(sb, btree_merge_delta_null);
free_mitem(rng, found);
@@ -2504,14 +2486,6 @@ int scoutfs_btree_merge(struct super_block *sb,
mitem = next_mitem(mitem);
free_mitem(&rng, tmp);
}
if (mitem && walk_val_len == 0 &&
!(walk_flags & (BTW_INSERT | BTW_DELETE)) &&
scoutfs_trigger(sb, LOG_MERGE_FORCE_PARTIAL)) {
ret = -ERANGE;
*next_ret = mitem->key;
goto out;
}
}
ret = 0;

View File

@@ -1517,101 +1517,6 @@ out:
return ret;
}
/*
* Punch holes in offline extents. This is a very specific tool that
* only does one job: it converts extents from offline to sparse. It
* returns an error if it encounters an extent that isn't offline or has
* a block mapping. It ignores i_size completely; it does not test it,
* and does not update it.
*
* The caller has the inode locked in the vfs and performed basic sanity
* checks. We manage transactions and the extent_sem which is ordered
* inside the transaction.
*/
int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version,
struct scoutfs_lock *lock)
{
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct data_ext_args args = {
.ino = scoutfs_ino(inode),
.inode = inode,
.lock = lock,
};
struct scoutfs_extent ext;
LIST_HEAD(ind_locks);
int ret;
int i;
if (WARN_ON_ONCE(iblock > last)) {
ret = -EINVAL;
goto out;
}
/* idiomatic to call start,last with 0,~0, clamp last to last possible */
last = min(last, SCOUTFS_BLOCK_SM_MAX);
ret = 0;
while (iblock <= last) {
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false) ?:
scoutfs_dirty_inode_item(inode, lock);
if (ret < 0)
break;
down_write(&si->extent_sem);
for (i = 0; i < 32 && (iblock <= last); i++) {
ret = scoutfs_ext_next(sb, &data_ext_ops, &args, iblock, 1, &ext);
if (ret == -ENOENT) {
iblock = last + 1;
ret = 0;
break;
}
if (ret < 0)
break;
if (ext.start > last) {
iblock = last + 1;
break;
}
if (ext.map) {
ret = -EINVAL;
break;
}
if (ext.flags & SEF_OFFLINE) {
if (iblock > ext.start) {
ext.len -= iblock - ext.start;
ext.start = iblock;
}
ext.len = min(ext.len, last - ext.start + 1);
ext.flags &= ~SEF_OFFLINE;
ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
ext.start, ext.len, ext.map, ext.flags);
if (ret < 0)
break;
}
iblock = ext.start + ext.len;
}
up_write(&si->extent_sem);
scoutfs_update_inode_item(inode, lock, &ind_locks);
scoutfs_release_trans(sb);
scoutfs_inode_index_unlock(sb, &ind_locks);
if (ret < 0)
break;
}
out:
return ret;
}
/*
* This copies to userspace :/
*/

View File

@@ -57,8 +57,6 @@ int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
u64 byte_len, struct inode *to, u64 to_off, bool to_stage,
u64 data_version);
int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version,
struct scoutfs_lock *lock);
int scoutfs_data_wait_check(struct inode *inode, loff_t pos, loff_t len,
u8 sef, u8 op, struct scoutfs_data_wait *ow,

View File

@@ -114,42 +114,6 @@ static struct scoutfs_block *read_bloom_ref(struct super_block *sb, struct scout
return bl;
}
/*
* Returns >0 if there was a bloom block and all the bits were present.
*/
static int all_bloom_bits_present(struct super_block *sb, struct scoutfs_block_ref *ref,
struct forest_bloom_nrs *bloom)
{
struct scoutfs_bloom_block *bb;
struct scoutfs_block *bl;
int i;
if (ref->blkno == 0)
return 0;
bl = read_bloom_ref(sb, ref);
if (IS_ERR(bl))
return PTR_ERR(bl);
bb = bl->data;
for (i = 0; i < ARRAY_SIZE(bloom->nrs); i++) {
if (!test_bit_le(bloom->nrs[i], bb->bits))
break;
}
scoutfs_block_put(sb, bl);
/* one of the bloom bits wasn't set */
if (i != ARRAY_SIZE(bloom->nrs)) {
scoutfs_inc_counter(sb, forest_bloom_fail);
return 0;
}
scoutfs_inc_counter(sb, forest_bloom_pass);
return 1;
}
/*
* This is an unlocked iteration across all the btrees to find a hint at
* the next key that the caller could read. It's used to find out what
@@ -263,13 +227,9 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
}
/*
* Call the caller's callback for every item in the leaf blocks in each
* forest btree that contain the caller's key.
*
* If a bloom key is provided then each log tree's bloom block is
* checked and only trees with all the bloom key's bloom bits set will
* be read from. When the bloom key is null all trees will be read
* from.
* For each forest btree whose bloom block indicates that the lock might
* have items stored, call the caller's callback for every item in the
* leaf block in each tree which contains the key.
*
* The btree iter calls clamp the caller's range to the tightest range
* that covers all the blocks. Any keys outside of this range can't be
@@ -279,26 +239,33 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
* to reset their state and retry with a newer version of the btrees.
*/
int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
u64 merge_input_seq, struct scoutfs_key *key,
struct scoutfs_key *bloom_key, struct scoutfs_key *start,
struct scoutfs_key *end, scoutfs_forest_item_cb cb, void *arg)
struct scoutfs_key *key, struct scoutfs_key *bloom_key,
struct scoutfs_key *start, struct scoutfs_key *end,
scoutfs_forest_item_cb cb, void *arg)
{
struct forest_read_items_data rid = {
.cb = cb,
.cb_arg = arg,
};
struct scoutfs_log_trees lt;
struct scoutfs_bloom_block *bb;
struct forest_bloom_nrs bloom;
SCOUTFS_BTREE_ITEM_REF(iref);
struct scoutfs_block *bl;
struct scoutfs_key ltk;
struct scoutfs_key orig_start = *start;
struct scoutfs_key orig_end = *end;
int ret;
int i;
scoutfs_inc_counter(sb, forest_read_items);
if (bloom_key)
calc_bloom_nrs(&bloom, bloom_key);
calc_bloom_nrs(&bloom, bloom_key);
trace_scoutfs_forest_using_roots(sb, &roots->fs_root, &roots->logs_root);
*start = orig_start;
*end = orig_end;
/* start with fs root items */
rid.fic |= FIC_FS_ROOT;
ret = scoutfs_btree_read_items(sb, &roots->fs_root, key, start, end,
@@ -325,29 +292,40 @@ int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_r
goto out; /* including stale */
}
/* we're not expecting -ENOENT from _read_items */
if (lt.item_root.ref.blkno == 0)
if (lt.bloom_ref.blkno == 0)
continue;
if (bloom_key) {
ret = all_bloom_bits_present(sb, &lt.bloom_ref, &bloom);
if (ret < 0)
goto out;
if (ret == 0)
continue;
bl = read_bloom_ref(sb, &lt.bloom_ref);
if (IS_ERR(bl)) {
ret = PTR_ERR(bl);
goto out;
}
bb = bl->data;
for (i = 0; i < ARRAY_SIZE(bloom.nrs); i++) {
if (!test_bit_le(bloom.nrs[i], bb->bits))
break;
}
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) &&
(merge_input_seq == 0 ||
le64_to_cpu(lt.finalize_seq) < merge_input_seq))
rid.fic |= FIC_MERGE_INPUT;
scoutfs_block_put(sb, bl);
/* one of the bloom bits wasn't set */
if (i != ARRAY_SIZE(bloom.nrs)) {
scoutfs_inc_counter(sb, forest_bloom_fail);
continue;
}
scoutfs_inc_counter(sb, forest_bloom_pass);
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED))
rid.fic |= FIC_FINALIZED;
ret = scoutfs_btree_read_items(sb, &lt.item_root, key, start,
end, forest_read_items, &rid);
if (ret < 0)
goto out;
rid.fic &= ~FIC_MERGE_INPUT;
rid.fic &= ~FIC_FINALIZED;
}
ret = 0;
@@ -367,7 +345,7 @@ int scoutfs_forest_read_items(struct super_block *sb,
ret = scoutfs_client_get_roots(sb, &roots);
if (ret == 0)
ret = scoutfs_forest_read_items_roots(sb, &roots, 0, key, bloom_key, start, end,
ret = scoutfs_forest_read_items_roots(sb, &roots, key, bloom_key, start, end,
cb, arg);
return ret;
}

View File

@@ -11,7 +11,7 @@ struct scoutfs_lock;
/* caller gives an item to the callback */
enum {
FIC_FS_ROOT = (1 << 0),
FIC_MERGE_INPUT = (1 << 1),
FIC_FINALIZED = (1 << 1),
};
typedef int (*scoutfs_forest_item_cb)(struct super_block *sb, struct scoutfs_key *key, u64 seq,
u8 flags, void *val, int val_len, int fic, void *arg);
@@ -25,9 +25,9 @@ int scoutfs_forest_read_items(struct super_block *sb,
struct scoutfs_key *end,
scoutfs_forest_item_cb cb, void *arg);
int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
u64 merge_input_seq, struct scoutfs_key *key,
struct scoutfs_key *bloom_key, struct scoutfs_key *start,
struct scoutfs_key *end, scoutfs_forest_item_cb cb, void *arg);
struct scoutfs_key *key, struct scoutfs_key *bloom_key,
struct scoutfs_key *start, struct scoutfs_key *end,
scoutfs_forest_item_cb cb, void *arg);
int scoutfs_forest_set_bloom_bits(struct super_block *sb,
struct scoutfs_lock *lock);
void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq);

View File

@@ -49,7 +49,6 @@
#include "quota.h"
#include "scoutfs_trace.h"
#include "util.h"
#include "raw.h"
/*
* We make inode index items coherent by locking fixed size regions of
@@ -1668,141 +1667,6 @@ out:
return ret;
}
static long scoutfs_ioc_punch_offline(struct file *file, unsigned long arg)
{
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct scoutfs_ioctl_punch_offline __user *upo = (void __user *)arg;
struct scoutfs_ioctl_punch_offline po;
struct scoutfs_lock *lock = NULL;
u64 iblock;
u64 last;
u64 tmp;
int ret;
if (copy_from_user(&po, upo, sizeof(po)))
return -EFAULT;
if (po.len == 0)
return 0;
if (check_add_overflow(po.offset, po.len - 1, &tmp) ||
(po.offset & SCOUTFS_BLOCK_SM_MASK) ||
(po.len & SCOUTFS_BLOCK_SM_MASK))
return -EOVERFLOW;
if (po.flags)
return -EINVAL;
ret = mnt_want_write_file(file);
if (ret < 0)
return ret;
inode_lock(inode);
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
if (ret)
goto out;
if (!S_ISREG(inode->i_mode)) {
ret = -EINVAL;
goto out;
}
if (!(file->f_mode & FMODE_WRITE)) {
ret = -EINVAL;
goto out;
}
ret = inode_permission(KC_VFS_INIT_NS inode, MAY_WRITE);
if (ret < 0)
goto out;
if (scoutfs_inode_data_version(inode) != po.data_version) {
ret = -ESTALE;
goto out;
}
if ((ret = scoutfs_inode_check_retention(inode)))
goto out;
iblock = po.offset >> SCOUTFS_BLOCK_SM_SHIFT;
last = (po.offset + po.len - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
ret = scoutfs_data_punch_offline(inode, iblock, last, po.data_version, lock);
out:
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
inode_unlock(inode);
mnt_drop_write_file(file);
return ret;
}
static long scoutfs_ioc_raw_read_meta_seq(struct file *file, unsigned long arg)
{
struct super_block *sb = file_inode(file)->i_sb;
struct scoutfs_ioctl_raw_read_meta_seq __user *urms = (void __user *)arg;
struct scoutfs_ioctl_raw_read_meta_seq rms;
int ret;
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
goto out;
}
if (copy_from_user(&rms, urms, sizeof(rms))) {
ret = -EFAULT;
goto out;
}
if (rms.results_size == 0) {
ret = 0;
goto out;
}
if (rms.results_size < sizeof(struct scoutfs_ioctl_meta_seq) ||
rms.results_size > INT_MAX) {
ret = -EINVAL;
goto out;
}
ret = scoutfs_raw_read_meta_seq(sb, &rms, &rms.last);
if (ret >= 0 && copy_to_user(&urms->last, &rms.last, sizeof(rms.last)))
ret = -EFAULT;
out:
return ret;
}
static long scoutfs_ioc_raw_read_inode_info(struct file *file, unsigned long arg)
{
struct super_block *sb = file_inode(file)->i_sb;
struct scoutfs_ioctl_raw_read_inode_info __user *urii = (void __user *)arg;
struct scoutfs_ioctl_raw_read_inode_info rii;
int ret;
if (!capable(CAP_SYS_ADMIN)) {
ret = -EPERM;
goto out;
}
if (copy_from_user(&rii, urii, sizeof(rii))) {
ret = -EFAULT;
goto out;
}
if (rii.inos_count == 0 || rii.results_size > INT_MAX ||
!IS_ALIGNED(rii.inos_ptr, __alignof__(__u64))) {
ret = -EINVAL;
goto out;
}
ret = scoutfs_raw_read_inode_info(sb, &rii);
out:
return ret;
}
long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
switch (cmd) {
@@ -1852,12 +1716,6 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
return scoutfs_ioc_mod_quota_rule(file, arg, false);
case SCOUTFS_IOC_READ_XATTR_INDEX:
return scoutfs_ioc_read_xattr_index(file, arg);
case SCOUTFS_IOC_PUNCH_OFFLINE:
return scoutfs_ioc_punch_offline(file, arg);
case SCOUTFS_IOC_RAW_READ_META_SEQ:
return scoutfs_ioc_raw_read_meta_seq(file, arg);
case SCOUTFS_IOC_RAW_READ_INODE_INFO:
return scoutfs_ioc_raw_read_inode_info(file, arg);
}
return -ENOTTY;

View File

@@ -15,6 +15,20 @@
#define SCOUTFS_IOCTL_MAGIC 0xE8 /* arbitrarily chosen hole in ioctl-number.rst */
/*
* Packed scoutfs keys rarely cross the ioctl boundary so we have a
* translation struct.
*/
struct scoutfs_ioctl_key {
__le64 _sk_first;
__le64 _sk_second;
__le64 _sk_third;
__u8 _sk_fourth;
__u8 sk_type;
__u8 sk_zone;
__u8 _pad[5];
};
struct scoutfs_ioctl_walk_inodes_entry {
__u64 major;
__u64 ino;
@@ -834,197 +848,4 @@ struct scoutfs_ioctl_read_xattr_index {
#define SCOUTFS_IOC_READ_XATTR_INDEX \
_IOR(SCOUTFS_IOCTL_MAGIC, 23, struct scoutfs_ioctl_read_xattr_index)
/*
* This is a limited and specific version of hole punching. It's an
* archive layer operation that only converts unmapped offline extents
* into sparse extents. It is intended to be used when restoring sparse
* files after the initial creation set the entire file size offline.
*
* The offset and len fields are in units of bytes and must be aligned
* to the small (4KiB) block size. All regions of offline extents
* covered by the region will be converted into sparse online extents,
* including regions that straddle the boundaries of the region. Any
* existing sparse extents in the region are ignored.
*
* The data_version must match the inode or EINVAL is returned. The
* data_version is not modified by this operation.
*
* EINVAL is returned if any mapped extents are found in the region. If
* an error is returned then partial progress may have been made.
*/
struct scoutfs_ioctl_punch_offline {
__u64 offset;
__u64 len;
__u64 data_version;
__u64 flags;
};
#define SCOUTFS_IOC_PUNCH_OFFLINE \
_IOW(SCOUTFS_IOCTL_MAGIC, 24, struct scoutfs_ioctl_punch_offline)
/*
* Read meta_seq items without cluster locking.
*
* @start is the first meta_seq item value that could be returned.
* {0,0} is the minimum.
*
* @end is the last meta_seq item value that could be returned.
* {U64_MAX, U64_MAX} is the maximum.
*
* @last is only set on success from the call. It's the last meta_seq
* item that could have been returned. This lets the caller detect that
* the full input range wasn't explored. Another call can be made with
* start set to just after this.
*
* @results_ptr is a pointer to an array of (struct
* scoutfs_ioctl_meta_seq) elements that were found in the input range.
*
* @results_size is the count of elements in the results_ptr array and
* the maximum number of results that can be returned. There must be
* room for at least one result.
*
* Return existing meta_seq items starting from @start until @last.
* Partial results can be returned and is indicated by @last being set
* to an item before @last.
*
* The results are sorted first by increasing meta_seq and then by
* increasing ino. All of the results are from one version of file
* system metadata. This means that an inode can not be found multiple
* times within the results of one call.
*
* This call ignores currently dirty transactions and reads persistent
* items directly. A transaction can be written after this call and
* cause meta_seq items to appear before or within the results from this
* call.
*
* The number of meta_seq items stored in the results buffer is returned
* and @last is updated. 0 items can be returned if none are found
* within the input range.
*
* Unique errors:
*
* -EINVAL: The result count was 0 or greater than INT_MAX.
*
* -ESTALE: The results could not be read from one stable version of
* file system metadata. Decrease the number of inodes requested.
*/
struct scoutfs_ioctl_meta_seq {
__u64 meta_seq;
__u64 ino;
};
struct scoutfs_ioctl_raw_read_meta_seq {
struct scoutfs_ioctl_meta_seq start;
struct scoutfs_ioctl_meta_seq end;
struct scoutfs_ioctl_meta_seq last;
__u64 results_ptr;
__u32 results_size;
__u32 _pad;
};
#define SCOUTFS_IOC_RAW_READ_META_SEQ \
_IOR(SCOUTFS_IOCTL_MAGIC, 25, struct scoutfs_ioctl_raw_read_meta_seq)
/*
* Read inode metadata without cluster locking.
*
* @inos_ptr is a pointer to an aligned array of 64bit inode numbers.
*
* @inos_count is the number of elements in the array. The inode
* numbers must not be zero, must strictly increase, and must not
* contain any duplicates.
*
* @names_ptr is a pointer to a byte array of xattr names to return with
* each inode. The names are identical to those used in
* {get,set}xattr(2). The names must be null terminated and no two
* names may be equal.
*
* @names_count is the number of names that will be found in the
* names_ptr buffer.
*
* @results_ptr is a pointer to a buffer that will be filled by the read
* inode info results. The result structs and payloads are not aligned.
* Callers will almost certainly need to copy them into aligned
* addresses before referencing their contents.
*
* @results_size is the number of bytes available in the results_ptr
* buffer.
*
* For each inode an _INODE result will always be returned. Then a
* _XATTR result will be returned for each xattr on the inode that
* matches one of the given input names.
*
* Each call will not return partial results. -ERANGE is returned if the
* results for the requested inodes do not fit in the results buffer.
*
* The info for one call is from one consistent version of the file
* system metadata. The call can have to retry if it sees metadata
* change during its call. -ESTALE will be returned if it was not able
* to read all the inodes info from one metadata version. The number of
* inodes being read can be decreased to avoid this.
*
* Inodes with an nlink of 0 are not returned.
*
* The size in bytes of filled results is returned. A non-zero return
* will always include at least one full
* (struct scoutfs_ioctl_raw_read_result) header.
*
* Unique errors:
*
* -EINVAL: The inode count can't be zero. The inos ptr must be aligned
* to __u64 alignment. The results buffer size can't be larger than
* INT_MAX. Inode numbers can't be zero, must be sorted, and can't
* have duplicates. The xattr names must be unique, null terminated,
* and less than 256 bytes long.
*
* -ERANGE: The results for the requested inodes do not fit in the
* results buffer. Increase the buffer size (perhaps allowing for all
* xattrs with large values) or decrease the number of inodes per call.
*
* -ESTALE: The results could not be read from one stable version of
* file system metadata. Decrease the number of inodes requested.
*
* -EUCLEAN: Internal xattr metadata is inconsistent.
*/
struct scoutfs_ioctl_raw_read_inode_info {
__u64 inos_ptr;
__u32 inos_count;
__u32 names_count;
__u64 names_ptr;
__u64 results_ptr;
__u32 results_size;
__u8 _pad[4];
};
/*
* @type is one of the enums that determines the type of the following
* result payload.
*
* @size is the number of bytes of result payload immediately following
* the result struct. It does not include the size of the result struct
* header.
*/
struct scoutfs_ioctl_raw_read_result {
__u32 size;
__u8 _pad[7];
__u8 type;
};
/*
* The _INODE result contains an initial 64bit inode number followed by a
* struct scoutfs_inode as defined in format.h. The size includes the
* 8byte initial inode number. With that subtracted the size of the
* inode struct defines its version (and so the fields it supports).
*/
#define SCOUTFS_IOC_RAW_READ_RESULT_INODE 1
/*
* The result payload contains the null terminated name and the value.
* The value size can be found by subtracting the null terminated name
* length from the result size.
*/
#define SCOUTFS_IOC_RAW_READ_RESULT_XATTR 2
#define SCOUTFS_IOC_RAW_READ_INODE_INFO \
_IOR(SCOUTFS_IOCTL_MAGIC, 25, struct scoutfs_ioctl_raw_read_inode_info)
#endif

View File

@@ -1093,24 +1093,19 @@ out_unlock:
return ret;
}
void scoutfs_lock_get_fs_item_range(u64 ino, struct scoutfs_key *start, struct scoutfs_key *end)
{
scoutfs_key_set_zeros(start);
start->sk_zone = SCOUTFS_FS_ZONE;
start->ski_ino = cpu_to_le64(ino & ~(u64)SCOUTFS_LOCK_INODE_GROUP_MASK);
scoutfs_key_set_ones(end);
end->sk_zone = SCOUTFS_FS_ZONE;
end->ski_ino = cpu_to_le64(ino | SCOUTFS_LOCK_INODE_GROUP_MASK);
}
int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int flags, u64 ino,
struct scoutfs_lock **ret_lock)
{
struct scoutfs_key start;
struct scoutfs_key end;
scoutfs_lock_get_fs_item_range(ino, &start, &end);
scoutfs_key_set_zeros(&start);
start.sk_zone = SCOUTFS_FS_ZONE;
start.ski_ino = cpu_to_le64(ino & ~(u64)SCOUTFS_LOCK_INODE_GROUP_MASK);
scoutfs_key_set_ones(&end);
end.sk_zone = SCOUTFS_FS_ZONE;
end.ski_ino = cpu_to_le64(ino | SCOUTFS_LOCK_INODE_GROUP_MASK);
return lock_key_range(sb, mode, flags, &start, &end, ret_lock);
}

View File

@@ -65,7 +65,6 @@ int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
struct scoutfs_key *key);
void scoutfs_lock_get_fs_item_range(u64 ino, struct scoutfs_key *start, struct scoutfs_key *end);
int scoutfs_lock_inode(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
struct inode *inode, struct scoutfs_lock **ret_lock);
int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int flags, u64 ino,

View File

@@ -34,7 +34,6 @@
#include "totl.h"
#include "util.h"
#include "quota.h"
#include "trans.h"
#include "counters.h"
#include "scoutfs_trace.h"
@@ -1087,10 +1086,6 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
if (ret < 0)
goto out;
ret = scoutfs_hold_trans(sb, true);
if (ret < 0)
goto out;
down_write(&qtinf->rwsem);
if (is_add) {
@@ -1100,30 +1095,28 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
else if (ret == 0)
ret = -EEXIST;
if (ret < 0)
goto release;
goto unlock;
rule_to_rule_val(&rv, &rule);
ret = scoutfs_item_create(sb, &key, &rv, sizeof(rv), lock);
if (ret < 0)
goto release;
goto unlock;
} else {
ret = find_rule(sb, &rule, &key, lock) ?:
scoutfs_item_delete(sb, &key, lock);
if (ret < 0)
goto release;
goto unlock;
}
scoutfs_quota_invalidate(sb);
ret = 0;
release:
unlock:
up_write(&qtinf->rwsem);
scoutfs_release_trans(sb);
out:
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
out:
if (is_add)
trace_scoutfs_quota_add_rule(sb, &rule, ret);
else

View File

@@ -1,744 +0,0 @@
/*
* Copyright (C) 2026 Versity Software, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/list_sort.h>
#include <linux/sort.h>
#include "format.h"
#include "key.h"
#include "block.h"
#include "inode.h"
#include "forest.h"
#include "client.h"
#include "ioctl.h"
#include "lock.h"
#include "xattr.h"
#include "attr_x.h"
#include "bsearch_index.h"
#include "raw.h"
struct fs_item {
struct list_head head;
struct scoutfs_key key;
u64 seq;
int val_len;
bool deletion;
/* val is aligned so we can deref structs in vals */
u8 val[0] __aligned(ARCH_KMALLOC_MINALIGN);
};
static int save_fs_item(struct list_head *list, struct scoutfs_key *key, u64 seq, u8 flags,
void *val, int val_len)
{
struct fs_item *fsi;
/* max btree val len is hundreds of bytes */
fsi = kmalloc(offsetof(struct fs_item, val[val_len]), GFP_NOFS);
if (!fsi)
return -ENOMEM;
fsi->key = *key;
fsi->seq = seq;
fsi->val_len = val_len;
fsi->deletion = !!(flags & SCOUTFS_ITEM_FLAG_DELETION);
if (val_len > 0)
memcpy(fsi->val, val, val_len);
list_add_tail(&fsi->head, list);
return 0;
}
static void free_fs_item(struct fs_item *fsi)
{
if (!list_empty(&fsi->head))
list_del_init(&fsi->head);
kfree(fsi);
}
static void free_fs_items(struct list_head *list)
{
struct fs_item *fsi;
struct fs_item *tmp;
list_for_each_entry_safe(fsi, tmp, list, head)
free_fs_item(fsi);
}
static struct fs_item *next_fs_item(struct list_head *list, struct fs_item *fsi)
{
list_for_each_entry_continue(fsi, list, head)
return fsi;
return NULL;
}
static int cmp_fs_items(void *priv, KC_LIST_CMP_CONST struct list_head *A,
KC_LIST_CMP_CONST struct list_head *B)
{
KC_LIST_CMP_CONST struct fs_item *a =
container_of(A, KC_LIST_CMP_CONST struct fs_item, head);
KC_LIST_CMP_CONST struct fs_item *b =
container_of(B, KC_LIST_CMP_CONST struct fs_item, head);
return scoutfs_key_compare(&a->key, &b->key) ?: -scoutfs_cmp(a->seq, b->seq);
}
static void sort_and_remove(struct list_head *list, struct scoutfs_key *end)
{
struct fs_item *prev;
struct fs_item *fsi;
struct fs_item *tmp;
list_sort(NULL, list, cmp_fs_items);
/* start by removing any items read before end was decreased by later blocks */
list_for_each_entry_safe_reverse(fsi, tmp, list, head) {
if (scoutfs_key_compare(&fsi->key, end) > 0)
free_fs_item(fsi);
else
break;
}
prev = NULL;
list_for_each_entry_safe(fsi, tmp, list, head) {
/* remove this item if it's an older version of previous item */
if (prev && scoutfs_key_compare(&prev->key, &fsi->key) == 0) {
free_fs_item(fsi);
continue;
}
/* remove previous deletion item once it has removed all older versions */
if (prev && prev->deletion)
free_fs_item(prev);
/* next item might match this, record to compare */
prev = fsi;
}
/* remove the last item if it's a deletion */
list_for_each_entry_reverse(fsi, list, head) {
if (fsi->deletion)
free_fs_item(fsi);
break;
}
}
static int save_all_items(struct super_block *sb, struct scoutfs_key *key, u64 seq, u8 flags,
void *val, int val_len, int fic, void *arg)
{
struct list_head *list = arg;
return save_fs_item(list, key, seq, flags, val, val_len);
}
/* -------------- */
static void ms_from_key(struct scoutfs_ioctl_meta_seq *ms, struct scoutfs_key *key)
{
ms->meta_seq = le64_to_cpu(key->skii_major);
ms->ino = le64_to_cpu(key->skii_ino);
}
/*
* Increment the key's ino->meta_seq so that we don't land between items.
*/
static void inc_meta_seq(struct scoutfs_key *key)
{
le64_add_cpu(&key->skii_ino, 1);
if (key->skii_ino == 0)
le64_add_cpu(&key->skii_major, 1);
}
int scoutfs_raw_read_meta_seq(struct super_block *sb,
struct scoutfs_ioctl_raw_read_meta_seq *rms,
struct scoutfs_ioctl_meta_seq *last_ret)
{
struct scoutfs_ioctl_meta_seq __user *ums;
struct scoutfs_ioctl_meta_seq ms;
struct scoutfs_net_roots roots;
DECLARE_SAVED_REFS(saved);
struct scoutfs_key start;
struct scoutfs_key last;
struct scoutfs_key key;
struct scoutfs_key end;
struct fs_item *fsi;
struct fs_item *tmp;
LIST_HEAD(list);
int retries;
int copied;
int count;
int ret;
ums = (void __user *)rms->results_ptr;
count = rms->results_size / sizeof(struct scoutfs_ioctl_meta_seq);
retries = 10;
copied = 0;
scoutfs_inode_init_index_key(&last, SCOUTFS_INODE_INDEX_META_SEQ_TYPE,
rms->end.meta_seq, 0, rms->end.ino);
retry:
ret = scoutfs_client_get_roots(sb, &roots);
if (ret)
goto out;
scoutfs_inode_init_index_key(&key, SCOUTFS_INODE_INDEX_META_SEQ_TYPE,
rms->start.meta_seq, 0, rms->start.ino);
for (;;) {
start = key;
end = last;
ret = scoutfs_forest_read_items_roots(sb, &roots, 0, &key, NULL, &start, &end,
save_all_items, &list);
if (ret < 0)
goto out;
sort_and_remove(&list, &end);
list_for_each_entry_safe(fsi, tmp, &list, head) {
if (copied == count) {
/* results are full, set end to before item can't return */
end = fsi->key;
le64_add_cpu(&end.skii_ino, -1ULL);
ret = 0;
goto out;
}
ms_from_key(&ms, &fsi->key);
if (copy_to_user(&ums[copied], &ms, sizeof(ms))) {
ret = -EFAULT;
goto out;
}
free_fs_item(fsi);
copied++;
}
if (scoutfs_key_compare(&end, &last) >= 0) {
end = last;
break;
}
key = end;
inc_meta_seq(&key);
}
ret = 0;
out:
free_fs_items(&list);
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
if (ret == -ESTALE && copied == 0 && retries-- > 0)
goto retry;
ms_from_key(last_ret, &end);
return ret ?: copied;
}
/* -------------- */
struct inode_info_context {
size_t nr_inos;
u64 *inos;
size_t nr_names;
struct xattr_name {
u64 hash;
char *name;
u8 name_len; /* no null */
} *names;
struct list_head fs_items;
};
static int cmp_u64(const void *A, const void *B)
{
const u64 *a = A;
const u64 *b = B;
return scoutfs_cmp(*a, *b);
}
static int cmp_name_hash(const void *A, const void *B)
{
const struct xattr_name *a = A;
const struct xattr_name *b = B;
return scoutfs_cmp(a->hash, b->hash);
}
static int cmp_name_string(const void *A, const void *B)
{
const struct xattr_name *a = A;
const struct xattr_name *b = B;
return scoutfs_cmp(a->name_len, b->name_len) ?: memcmp(a->name, b->name, a->name_len);
}
static int setup_context(struct inode_info_context *ctx,
struct scoutfs_ioctl_raw_read_inode_info *rii)
{
__u64 __user *uinos = (void __user *)rii->inos_ptr;
char __user *uname;
long len_null;
long len;
int ret;
u32 i;
ctx->nr_inos = rii->inos_count;
ctx->nr_names = rii->names_count;
INIT_LIST_HEAD(&ctx->fs_items);
ctx->inos = kvmalloc_array(ctx->nr_inos, sizeof(ctx->inos[0]), GFP_KERNEL);
ctx->names = kvcalloc(ctx->nr_names, sizeof(ctx->names[0]), GFP_KERNEL);
if (!ctx->inos || !ctx->names) {
ret = -ENOMEM;
goto out;
}
if (copy_from_user(ctx->inos, uinos, ctx->nr_inos * sizeof(ctx->inos[0]))) {
ret = -EFAULT;
goto out;
}
/* inos must not be 0 and must increase and contain no duplicates */
if (ctx->inos[0] == 0) {
ret = -EINVAL;
goto out;
}
for (i = 1; i < ctx->nr_inos; i++) {
if (ctx->inos[i] <= ctx->inos[i - 1]) {
ret = -EINVAL;
goto out;
}
}
uname = (void __user *)rii->names_ptr;
for (i = 0; i < ctx->nr_names; i++) {
len_null = SCOUTFS_XATTR_MAX_NAME_LEN + 1;
ret = strnlen_user(uname, len_null);
if (ret <= 1 || ret > len_null) {
if (ret >= 0)
ret = -EINVAL;
goto out;
}
len_null = ret;
len = len_null - 1;
ctx->names[i].name_len = len;
ctx->names[i].name = kmalloc(len_null, GFP_KERNEL);
if (!ctx->names[i].name) {
ret = -ENOMEM;
goto out;
}
ret = strncpy_from_user(ctx->names[i].name, uname, len_null);
if (ret != len) {
if (ret >= 0)
ret = -EINVAL;
goto out;
}
ctx->names[i].hash = scoutfs_xattr_name_hash(ctx->names[i].name, len);
uname += len_null;
}
/* make sure all the names differ */
sort(ctx->names, ctx->nr_names, sizeof(ctx->names[0]), cmp_name_string, NULL);
for (i = 1; i < ctx->nr_names; i++) {
if (cmp_name_string(&ctx->names[i - 1], &ctx->names[i]) == 0) {
ret = -EINVAL;
goto out;
}
}
/* then leave them sorted by hash */
sort(ctx->names, ctx->nr_names, sizeof(ctx->names[0]), cmp_name_hash, NULL);
ret = 0;
out:
return ret;
}
static void free_context(struct inode_info_context *ctx)
{
int i;
kvfree(ctx->inos);
if (ctx->names) {
for (i = 0; i < ctx->nr_names; i++) {
if (!ctx->names[i].name)
break;
kfree(ctx->names[i].name);
}
kvfree(ctx->names);
}
}
/*
* Iterate over fs items and save any that we're interested in. We want
* inode struct items and any xattr items whose hashes collide with the
* xattr names we're searching for.
*
* Our forest calls can be advancing through the key space as we see
* slices that intersect with blocks in trees. And each forest caller
* can be resetting the key position to the start of each forest block
* it reads in an intersection.
*
* From this callback's perspective, the key can be jumping all over the
* place. We don't have any iterative position state. For each key we
* decide if we want to save it and then set the key to the next key we
* want after the current key. We'll combine all the saved keys later.
*/
static int save_info_items(struct super_block *sb, struct scoutfs_key *key, u64 seq,
u8 flags, void *val, int val_len, int fic, void *arg)
{
u64 ino = le64_to_cpu(key->_sk_first);
struct inode_info_context *ctx = arg;
struct xattr_name name;
size_t name_ind;
size_t ino_ind;
bool hash_match;
bool ino_match;
int ret;
ino_ind = bsearch_index(&ino, ctx->inos, ctx->nr_inos, sizeof(ctx->inos[0]), cmp_u64);
ino_match = ino_ind < ctx->nr_inos && ctx->inos[ino_ind] == ino;
/* jump to to next ino, could be for this key if we're before the ino struct */
if (!ino_match || key->sk_type < SCOUTFS_INODE_TYPE)
goto next_inode;
/* find our search position in xattrs */
if (key->sk_type < SCOUTFS_XATTR_TYPE) {
name_ind = 0;
hash_match = false;
} else if (key->sk_type == SCOUTFS_XATTR_TYPE) {
name = (struct xattr_name) { .hash = le64_to_cpu(key->skx_name_hash) };
name_ind = bsearch_index(&name, ctx->names, ctx->nr_names, sizeof(ctx->names[0]),
cmp_name_hash);
hash_match = name_ind < ctx->nr_names && ctx->names[name_ind].hash == name.hash;
} else {
name_ind = ctx->nr_names;
hash_match = false;
}
/* save inode items for our search and all xattr items that match search hashes */
if (key->sk_type == SCOUTFS_INODE_TYPE || hash_match) {
ret = save_fs_item(&ctx->fs_items, key, seq, flags, val, val_len);
if (ret < 0)
goto out;
}
/* let the caller continue iterating through matching xattr items */
if (hash_match) {
ret = 0;
goto out;
}
/* jump to the next xattr */
if (name_ind < ctx->nr_names) {
scoutfs_xattr_init_key(key, ino, ctx->names[name_ind].hash, 0);
ret = -ESRCH;
goto out;
}
/* no more xattrs, must be done with this ino */
ino_ind++;
next_inode:
/* now jump to next inode struct key, or we're done */
if (ino_ind < ctx->nr_inos)
scoutfs_inode_init_key(key, ctx->inos[ino_ind]);
else
scoutfs_key_set_ones(key);
ret = -ESRCH;
out:
return ret;
}
static int copy_to_user_off(void __user *dst, size_t *dst_off, size_t dst_size,
void *src, size_t copy_size)
{
if (copy_size == 0)
return 0;
if (*dst_off + copy_size > dst_size)
return -ERANGE;
if (copy_to_user(dst + *dst_off, src, copy_size))
return -EFAULT;
*dst_off += copy_size;
return 0;
}
static int copy_result_to_user(void __user *ures, size_t *off, size_t size, u8 type,
void *a_data, size_t a_len, void *b_data, size_t b_len,
size_t extra_size)
{
struct scoutfs_ioctl_raw_read_result res;
const size_t szof_res = sizeof(struct scoutfs_ioctl_raw_read_result);
memzero_explicit(&res, szof_res);
res = (struct scoutfs_ioctl_raw_read_result) {
.size = a_len + b_len + extra_size,
.type = type,
};
return copy_to_user_off(ures, off, size, &res, szof_res) ?:
(a_len ? copy_to_user_off(ures, off, size, a_data, a_len) : 0) ?:
(b_len ? copy_to_user_off(ures, off, size, b_data, b_len) : 0);
}
static int copy_item_results_to_user(struct super_block *sb, struct inode_info_context *ctx,
void __user *ures, size_t *off, size_t size,
struct fs_item *fsi)
{
struct scoutfs_inode *cinode;
struct scoutfs_xattr *xat;
static char null = '\0';
size_t len;
u64 ino;
int ret = 0;
if (fsi->key.sk_type == SCOUTFS_INODE_TYPE) {
cinode = (void *)fsi->val;
ino = le64_to_cpu(fsi->key.ski_ino);
ret = copy_result_to_user(ures, off, size, SCOUTFS_IOC_RAW_READ_RESULT_INODE,
&ino, sizeof(ino), cinode, sizeof(struct scoutfs_inode),
0);
} else if (fsi->key.sk_type == SCOUTFS_XATTR_TYPE) {
if (fsi->key.skx_part == 0) {
xat = (void *)fsi->val;
ret = copy_result_to_user(ures, off, size,
SCOUTFS_IOC_RAW_READ_RESULT_XATTR, xat->name,
xat->name_len, &null, sizeof(null),
le16_to_cpu(xat->val_len));
if (ret == 0 && xat->val_len != 0) {
/* then append the start of the value */
len = fsi->val_len -
offsetof(struct scoutfs_xattr, name[xat->name_len]);
ret = copy_to_user_off(ures, off, size, xat->name + xat->name_len,
len);
}
} else {
/* continue appending partial values */
ret = copy_to_user_off(ures, off, size, fsi->val, fsi->val_len);
}
}
return ret;
}
static bool ignore_zero_nlink(struct inode_info_context *ctx, struct fs_item *fsi)
{
struct scoutfs_inode *cinode = (void *)fsi->val;
return cinode->nlink == 0;
}
static bool ignore_xattr_name(struct inode_info_context *ctx, struct fs_item *fsi)
{
struct scoutfs_xattr *xat = (void *)fsi->val;
struct xattr_name name = {
.hash = le64_to_cpu(fsi->key.skx_name_hash),
.name = xat->name,
.name_len = xat->name_len,
};
size_t i;
for (i = bsearch_index(&name, ctx->names, ctx->nr_names, sizeof(ctx->names[0]),
cmp_name_hash);
i < ctx->nr_names && name.hash == ctx->names[i].hash; i++) {
if (cmp_name_string(&name, &ctx->names[i]) == 0)
return false;
}
return true;
}
static int copy_results_to_user(struct super_block *sb, struct inode_info_context *ctx,
struct scoutfs_ioctl_raw_read_inode_info *rii)
{
void __user *ures = (void __user *)rii->results_ptr;
struct scoutfs_xattr *xat;
struct fs_item *next;
struct fs_item *fsi;
struct fs_item *tmp;
size_t xattr_end;
size_t off;
__le64 in_ino;
__le64 in_id;
int ret;
in_ino = 0;
xattr_end = 0;
in_id = 0;
off = 0;
list_for_each_entry_safe(fsi, tmp, &ctx->fs_items, head) {
/*
* ignore:
* - inodes with an nlink of 0
* - all items for an ino after the inode struct that we're ignoring
* - first xattr parts with a name we don't need
* - additional xattr parts when we ignored the first
*/
if ((fsi->key.sk_type == SCOUTFS_INODE_TYPE && ignore_zero_nlink(ctx, fsi)) ||
(fsi->key.sk_type > SCOUTFS_INODE_TYPE && fsi->key._sk_first != in_ino) ||
(fsi->key.sk_type == SCOUTFS_XATTR_TYPE &&
((fsi->key.skx_part == 0 && ignore_xattr_name(ctx, fsi)) ||
(fsi->key.skx_part > 0 && fsi->key.skx_id != in_id)))) {
free_fs_item(fsi);
in_ino = 0;
in_id = 0;
continue;
}
/* advance ino/xattr stream context state machine */
if (fsi->key.sk_type == SCOUTFS_INODE_TYPE) {
in_ino = fsi->key.ski_ino;
in_id = 0;
} else if (fsi->key.sk_type == SCOUTFS_XATTR_TYPE && fsi->key.skx_part == 0) {
in_id = fsi->key.skx_id;
/* save the required offset after the complete xattr */
xat = (void *)fsi->val;
xattr_end = off + sizeof(struct scoutfs_ioctl_raw_read_result) +
xat->name_len + 1 + le16_to_cpu(xat->val_len);
}
/* copy results, usually with header, but additional xattr parts copied raw */
ret = copy_item_results_to_user(sb, ctx, ures, &off, rii->results_size, fsi);
if (ret < 0)
goto out;
/* make sure we saw all xattr parts and copied the correct size */
if (xattr_end > 0 &&
!((next = next_fs_item(&ctx->fs_items, fsi)) &&
next->key.sk_type == SCOUTFS_XATTR_TYPE && next->key.skx_ino == in_ino &&
next->key.skx_id == in_id)) {
if (off != xattr_end) {
ret = -EUCLEAN;
goto out;
}
xattr_end = 0;
}
}
ret = 0;
out:
return ret ?: off;
}
/*
* If the key is for an inode we're not interested in, or if its past
* the xattr items, then advance to the next inode. This is used
* between forest read items calls to avoid leaf blocks. The callback
* takes care of iterating through the items for an inode across
* multiple leaves.
*/
static void advance_key_ino(struct scoutfs_key *key, struct inode_info_context *ctx)
{
u64 ino = le64_to_cpu(key->_sk_first);
size_t ino_ind;
ino_ind = bsearch_index(&ino, ctx->inos, ctx->nr_inos, sizeof(ctx->inos[0]), cmp_u64);
if (ino_ind < ctx->nr_inos && ctx->inos[ino_ind] == ino) {
if (key->sk_type <= SCOUTFS_XATTR_TYPE)
return;
else
ino_ind++;
}
if (ino_ind < ctx->nr_inos)
scoutfs_inode_init_key(key, ctx->inos[ino_ind]);
else
scoutfs_key_set_ones(key);
}
int scoutfs_raw_read_inode_info(struct super_block *sb,
struct scoutfs_ioctl_raw_read_inode_info *rii)
{
struct inode_info_context ctx = {0, };
struct scoutfs_net_roots roots;
DECLARE_SAVED_REFS(saved);
struct scoutfs_key lock_start;
struct scoutfs_key lock_end;
struct scoutfs_key start;
struct scoutfs_key last;
struct scoutfs_key key;
struct scoutfs_key end;
LIST_HEAD(list);
int retries = 10;
int ret;
ret = setup_context(&ctx, rii);
if (ret < 0)
goto out;
if (ctx.nr_names > 0)
scoutfs_xattr_init_key(&last, ctx.inos[ctx.nr_inos -1],
ctx.names[ctx.nr_names - 1].hash, U64_MAX);
else
scoutfs_inode_init_key(&last, ctx.inos[ctx.nr_inos - 1]);
retry:
ret = scoutfs_client_get_roots(sb, &roots);
if (ret)
goto out;
scoutfs_inode_init_key(&key, ctx.inos[0]);
while (scoutfs_key_compare(&key, &last) <= 0) {
scoutfs_lock_get_fs_item_range(le64_to_cpu(key._sk_first), &lock_start, &lock_end);
start = key;
end = last;
if (scoutfs_key_compare(&lock_end, &end) < 0)
end = lock_end;
ret = scoutfs_forest_read_items_roots(sb, &roots, 0, &key, &lock_start,
&start, &end, save_info_items, &ctx);
if (ret < 0)
goto out;
/* save each sorted batch, might have partial results for an inode */
sort_and_remove(&ctx.fs_items, &end);
list_splice_tail_init(&ctx.fs_items, &list);
key = end;
if (!scoutfs_key_is_ones(&key)) {
scoutfs_key_inc(&key);
advance_key_ino(&key, &ctx);
}
}
list_splice_tail_init(&list, &ctx.fs_items);
ret = copy_results_to_user(sb, &ctx, rii);
out:
free_fs_items(&list);
free_fs_items(&ctx.fs_items);
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
if (ret == -ESTALE && retries-- > 0)
goto retry;
free_context(&ctx);
return ret;
}

View File

@@ -1,10 +0,0 @@
#ifndef _SCOUTFS_RAW_H_
#define _SCOUTFS_RAW_H_
int scoutfs_raw_read_meta_seq(struct super_block *sb,
struct scoutfs_ioctl_raw_read_meta_seq *rms,
struct scoutfs_ioctl_meta_seq *last_ret);
int scoutfs_raw_read_inode_info(struct super_block *sb,
struct scoutfs_ioctl_raw_read_inode_info *rii);
#endif

View File

@@ -256,14 +256,6 @@ static void server_down(struct server_info *server)
cmpxchg(&server->status, was, SERVER_DOWN);
}
static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
{
*key = (struct scoutfs_key) {
.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
.skmc_rid = cpu_to_le64(rid),
};
}
/*
* The per-holder allocation block use budget balances batching
* efficiency and concurrency. The larger this gets, the fewer
@@ -971,28 +963,6 @@ static int find_log_trees_item(struct super_block *sb,
return ret;
}
/*
* Return true if the given rid has a mounted_clients entry.
*/
static bool rid_is_mounted(struct super_block *sb, u64 rid)
{
DECLARE_SERVER_INFO(sb, server);
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
SCOUTFS_BTREE_ITEM_REF(iref);
struct scoutfs_key key;
int ret;
init_mounted_client_key(&key, rid);
mutex_lock(&server->mounted_clients_mutex);
ret = scoutfs_btree_lookup(sb, &super->mounted_clients, &key, &iref);
if (ret == 0)
scoutfs_btree_put_iref(&iref);
mutex_unlock(&server->mounted_clients_mutex);
return ret == 0;
}
/*
* Find the log_trees item with the greatest nr for each rid. Fills the
* caller's log_trees and sets the key before the returned log_trees for
@@ -1251,60 +1221,6 @@ static int do_finalize_ours(struct super_block *sb,
* happens to arrive at just the right time. That's fine, merging will
* ignore and tear down the empty input.
*/
static int reclaim_open_log_tree(struct super_block *sb, u64 rid);
/*
* Reclaim log trees for rids that have no mounted_clients entry.
* They block merges by appearing active. reclaim_open_log_tree
* may need multiple commits to drain allocators (-EINPROGRESS).
*
* The caller holds logs_mutex and a commit, both are dropped and
* re-acquired around each reclaim call. Returns >0 if any orphans
* were reclaimed so the caller can re-check state that may have
* changed while the lock was dropped.
*/
static int reclaim_orphan_log_trees(struct super_block *sb, u64 rid,
struct commit_hold *hold)
{
struct server_info *server = SCOUTFS_SB(sb)->server_info;
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
struct scoutfs_log_trees lt;
struct scoutfs_key key;
bool found = false;
u64 orphan_rid;
int ret;
int err;
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, &lt)) > 0) {
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) ||
le64_to_cpu(lt.rid) == rid ||
rid_is_mounted(sb, le64_to_cpu(lt.rid)))
continue;
orphan_rid = le64_to_cpu(lt.rid);
scoutfs_err(sb, "reclaiming orphan log trees for rid %016llx nr %llu",
orphan_rid, le64_to_cpu(lt.nr));
found = true;
do {
mutex_unlock(&server->logs_mutex);
err = reclaim_open_log_tree(sb, orphan_rid);
ret = server_apply_commit(sb, hold,
err == -EINPROGRESS ? 0 : err);
server_hold_commit(sb, hold);
mutex_lock(&server->logs_mutex);
} while (err == -EINPROGRESS && ret == 0);
if (ret < 0)
break;
}
return ret < 0 ? ret : found;
}
#define FINALIZE_POLL_MIN_DELAY_MS 5U
#define FINALIZE_POLL_MAX_DELAY_MS 100U
#define FINALIZE_POLL_DELAY_GROWTH_PCT 150U
@@ -1345,16 +1261,6 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
break;
}
ret = reclaim_orphan_log_trees(sb, rid, hold);
if (ret < 0) {
err_str = "reclaiming orphan log trees";
break;
}
if (ret > 0) {
/* lock was dropped, re-check merge status */
continue;
}
/* look for finalized and other active log btrees */
saw_finalized = false;
others_active = false;
@@ -2023,7 +1929,7 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
mutex_unlock(&server->alloc_mutex);
/* only finalize, allowing merging, once the allocators are fully freed */
if (ret == 0 && !scoutfs_trigger(sb, RECLAIM_SKIP_FINALIZE)) {
if (ret == 0) {
/* the transaction is no longer open */
lt.commit_trans_seq = lt.get_trans_seq;
@@ -2075,8 +1981,7 @@ static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret)
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, &lt)) > 0) {
if ((le64_to_cpu(lt.get_trans_seq) > le64_to_cpu(lt.commit_trans_seq)) &&
le64_to_cpu(lt.get_trans_seq) <= last_seq &&
rid_is_mounted(sb, le64_to_cpu(lt.rid))) {
le64_to_cpu(lt.get_trans_seq) <= last_seq) {
last_seq = le64_to_cpu(lt.get_trans_seq) - 1;
}
}
@@ -3628,6 +3533,14 @@ out:
return scoutfs_net_response(sb, conn, cmd, id, ret, &nst, sizeof(nst));
}
static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
{
*key = (struct scoutfs_key) {
.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
.skmc_rid = cpu_to_le64(rid),
};
}
static bool invalid_mounted_client_item(struct scoutfs_btree_item_ref *iref)
{
return (iref->val_len != sizeof(struct scoutfs_mounted_client_btree_val));

View File

@@ -30,11 +30,6 @@ void scoutfs_totl_merge_init(struct scoutfs_totl_merging *merg)
memset(merg, 0, sizeof(struct scoutfs_totl_merging));
}
/*
* bin the incoming merge inputs so that we can resolve delta items
* properly. Finalized logs that are merge inputs are kept separately
* from those that are not.
*/
void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
u64 seq, u8 flags, void *val, int val_len, int fic)
{
@@ -44,10 +39,10 @@ void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
merg->fs_seq = seq;
merg->fs_total = le64_to_cpu(tval->total);
merg->fs_count = le64_to_cpu(tval->count);
} else if (fic & FIC_MERGE_INPUT) {
merg->inp_seq = seq;
merg->inp_total += le64_to_cpu(tval->total);
merg->inp_count += le64_to_cpu(tval->count);
} else if (fic & FIC_FINALIZED) {
merg->fin_seq = seq;
merg->fin_total += le64_to_cpu(tval->total);
merg->fin_count += le64_to_cpu(tval->count);
} else {
merg->log_seq = seq;
merg->log_total += le64_to_cpu(tval->total);
@@ -58,18 +53,15 @@ void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
/*
* .totl. item merging has to be careful because the log btree merging
* code can write partial results to the fs_root. This means that a
* reader can see both cases where merge input deltas should be applied
* to the old fs items and where they have already been applied to the
* partially merged fs items.
*
* Only finalized log trees that are inputs to the current merge cycle
* are tracked in the inp_ bucket. Finalized trees that aren't merge
* inputs and active log trees are always applied unconditionally since
* they cannot be in fs_root.
* reader can see both cases where new finalized logs should be applied
* to the old fs items and where old finalized logs have already been
* applied to the partially merged fs items. Currently active logged
* items are always applied on top of all cases.
*
* These cases are differentiated with a combination of sequence numbers
* in items and the count of contributing xattrs. This lets us
* recognize all cases, including when merge inputs were merged and
* in items, the count of contributing xattrs, and a flag
* differentiating finalized and active logged items. This lets us
* recognize all cases, including when finalized logs were merged and
* deleted the fs item.
*/
void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total, __u64 *count)
@@ -83,14 +75,14 @@ void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total,
*count = merg->fs_count;
}
/* apply merge input deltas if they're newer or creating */
if (((merg->fs_seq != 0) && (merg->inp_seq > merg->fs_seq)) ||
((merg->fs_seq == 0) && (merg->inp_count > 0))) {
*total += merg->inp_total;
*count += merg->inp_count;
/* apply finalized logs if they're newer or creating */
if (((merg->fs_seq != 0) && (merg->fin_seq > merg->fs_seq)) ||
((merg->fs_seq == 0) && (merg->fin_count > 0))) {
*total += merg->fin_total;
*count += merg->fin_count;
}
/* always apply non-input finalized and active logs */
/* always apply active logs which must be newer than fs and finalized */
if (merg->log_seq > 0) {
*total += merg->log_total;
*count += merg->log_count;

View File

@@ -7,9 +7,9 @@ struct scoutfs_totl_merging {
u64 fs_seq;
u64 fs_total;
u64 fs_count;
u64 inp_seq;
u64 inp_total;
s64 inp_count;
u64 fin_seq;
u64 fin_total;
s64 fin_count;
u64 log_seq;
u64 log_total;
s64 log_count;

View File

@@ -45,8 +45,6 @@ static char *names[] = {
[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
[SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge",
[SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE] = "reclaim_skip_finalize",
[SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL] = "log_merge_force_partial",
};
bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)

View File

@@ -8,8 +8,6 @@ enum scoutfs_trigger {
SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
SCOUTFS_TRIGGER_STATFS_LOCK_PURGE,
SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE,
SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL,
SCOUTFS_TRIGGER_NR,
};

View File

@@ -95,7 +95,6 @@ struct wkic_info {
/* block reading slow path */
struct mutex roots_mutex;
struct scoutfs_net_roots roots;
u64 merge_input_seq;
u64 roots_read_seq;
ktime_t roots_expire;
@@ -806,79 +805,29 @@ static void free_page_list(struct super_block *sb, struct list_head *list)
* read_seq number so that we can compare the age of the items in cached
* pages. Only one request to refresh the roots is in progress at a
* time. This is the slow path that's only used when the cache isn't
* populated and the roots aren't cached.
*
* We read roots directly from the on-disk superblock rather than
* requesting them from the server so that we can also read the
* log_merge btree from the same superblock. The merge status item
* seq tells us which finalized log trees are inputs to the current
* merge, which is needed to correctly resolve totl delta items.
* populated and the roots aren't cached. The root request is fast
* enough, especially compared to the resulting item reading IO, that we
* don't mind hiding it behind a trivial mutex.
*/
static int refresh_roots(struct super_block *sb, struct wkic_info *winf)
{
struct scoutfs_super_block *super;
struct scoutfs_log_merge_status *stat;
SCOUTFS_BTREE_ITEM_REF(iref);
struct scoutfs_key key;
int ret;
super = kmalloc(sizeof(*super), GFP_NOFS);
if (!super)
return -ENOMEM;
ret = scoutfs_read_super(sb, super);
if (ret < 0)
goto out;
winf->roots = (struct scoutfs_net_roots){
.fs_root = super->fs_root,
.logs_root = super->logs_root,
.srch_root = super->srch_root,
};
winf->merge_input_seq = 0;
if (super->log_merge.ref.blkno) {
scoutfs_key_set_zeros(&key);
key.sk_zone = SCOUTFS_LOG_MERGE_STATUS_ZONE;
ret = scoutfs_btree_lookup(sb, &super->log_merge, &key, &iref);
if (ret == 0) {
if (iref.val_len == sizeof(*stat)) {
stat = iref.val;
winf->merge_input_seq = le64_to_cpu(stat->seq);
} else {
ret = -EUCLEAN;
}
scoutfs_btree_put_iref(&iref);
} else if (ret == -ENOENT) {
ret = 0;
}
if (ret < 0)
goto out;
}
winf->roots_read_seq++;
winf->roots_expire = ktime_add_ms(ktime_get_raw(), WKIC_CACHE_LIFETIME_MS);
out:
kfree(super);
return ret;
}
static int get_roots(struct super_block *sb, struct wkic_info *winf,
struct scoutfs_net_roots *roots_ret, u64 *merge_input_seq,
u64 *read_seq, bool force_new)
struct scoutfs_net_roots *roots_ret, u64 *read_seq, bool force_new)
{
struct scoutfs_net_roots roots;
int ret;
mutex_lock(&winf->roots_mutex);
if (force_new || ktime_before(winf->roots_expire, ktime_get_raw())) {
ret = refresh_roots(sb, winf);
ret = scoutfs_client_get_roots(sb, &roots);
if (ret < 0)
goto out;
winf->roots = roots;
winf->roots_read_seq++;
winf->roots_expire = ktime_add_ms(ktime_get_raw(), WKIC_CACHE_LIFETIME_MS);
}
*roots_ret = winf->roots;
*merge_input_seq = winf->merge_input_seq;
*read_seq = winf->roots_read_seq;
ret = 0;
out:
@@ -921,30 +870,24 @@ static int insert_read_pages(struct super_block *sb, struct wkic_info *winf,
struct scoutfs_key end;
struct wkic_page *wpage;
LIST_HEAD(pages);
u64 merge_input_seq;
u64 read_seq = 0;
u64 read_seq;
int ret;
ret = 0;
retry_stale:
ret = get_roots(sb, winf, &roots, &merge_input_seq, &read_seq, ret == -ESTALE);
ret = get_roots(sb, winf, &roots, &read_seq, ret == -ESTALE);
if (ret < 0)
goto check_stale;
goto out;
start = *range_start;
end = *range_end;
ret = scoutfs_forest_read_items_roots(sb, &roots, merge_input_seq, key, range_start,
&start, &end, read_items_cb, &root);
ret = scoutfs_forest_read_items_roots(sb, &roots, key, range_start, &start, &end,
read_items_cb, &root);
trace_scoutfs_wkic_read_items(sb, key, &start, &end);
check_stale:
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
if (ret < 0) {
if (ret == -ESTALE) {
/* not safe to retry due to delta items, must restart clean */
free_item_tree(&root);
root = RB_ROOT;
if (ret == -ESTALE)
goto retry_stale;
}
goto out;
}

View File

@@ -47,7 +47,7 @@
* - add acl support and call generic xattr->handlers for SYSTEM
*/
u32 scoutfs_xattr_name_hash(const char *name, unsigned int name_len)
static u32 xattr_name_hash(const char *name, unsigned int name_len)
{
return crc32c(U32_MAX, name, name_len);
}
@@ -65,7 +65,8 @@ static unsigned int xattr_nr_parts(struct scoutfs_xattr *xat)
le16_to_cpu(xat->val_len));
}
void scoutfs_xattr_init_key(struct scoutfs_key *key, u64 ino, u32 name_hash, u64 id)
static void init_xattr_key(struct scoutfs_key *key, u64 ino, u32 name_hash,
u64 id)
{
*key = (struct scoutfs_key) {
.sk_zone = SCOUTFS_FS_ZONE,
@@ -186,10 +187,10 @@ static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
return -EINVAL;
if (name_len)
name_hash = scoutfs_xattr_name_hash(name, name_len);
name_hash = xattr_name_hash(name, name_len);
scoutfs_xattr_init_key(key, scoutfs_ino(inode), name_hash, id);
scoutfs_xattr_init_key(&last, scoutfs_ino(inode), U32_MAX, U64_MAX);
init_xattr_key(key, scoutfs_ino(inode), name_hash, id);
init_xattr_key(&last, scoutfs_ino(inode), U32_MAX, U64_MAX);
for (;;) {
ret = scoutfs_item_next(sb, key, &last, xat, xat_bytes, lock);
@@ -334,8 +335,8 @@ static int create_xattr_items(struct inode *inode, u64 id, struct scoutfs_xattr
int len;
int i;
scoutfs_xattr_init_key(&key, scoutfs_ino(inode),
scoutfs_xattr_name_hash(xat->name, xat->name_len), id);
init_xattr_key(&key, scoutfs_ino(inode),
xattr_name_hash(xat->name, xat->name_len), id);
for (i = 0; i < new_parts; i++) {
key.skx_part = i;
@@ -364,7 +365,7 @@ static int delete_xattr_items(struct inode *inode, u32 name_hash, u64 id,
int ret = 0;
int i;
scoutfs_xattr_init_key(&key, scoutfs_ino(inode), name_hash, id);
init_xattr_key(&key, scoutfs_ino(inode), name_hash, id);
/* dirty additional existing old items */
for (i = 1; i < nr_parts; i++) {
@@ -406,8 +407,8 @@ static int change_xattr_items(struct inode *inode, u64 id,
int i;
int ret;
scoutfs_xattr_init_key(&key, scoutfs_ino(inode),
scoutfs_xattr_name_hash(xat->name, xat->name_len), id);
init_xattr_key(&key, scoutfs_ino(inode),
xattr_name_hash(xat->name, xat->name_len), id);
/* dirty existing old items */
for (i = 0; i < old_parts; i++) {
@@ -1223,8 +1224,8 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
goto out;
}
scoutfs_xattr_init_key(&key, ino, 0, 0);
scoutfs_xattr_init_key(&last, ino, U32_MAX, U64_MAX);
init_xattr_key(&key, ino, 0, 0);
init_xattr_key(&last, ino, U32_MAX, U64_MAX);
for (;;) {
ret = scoutfs_item_next(sb, &key, &last, (void *)xat, bytes,
@@ -1264,7 +1265,6 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
ret = parse_indx_key(&tag_key, xat->name, xat->name_len, ino);
if (ret < 0)
goto out;
scoutfs_xattr_set_indx_key_xid(&tag_key, le64_to_cpu(key.skx_id));
}
if ((tgs.totl || tgs.indx) && locked_zone != tag_key.sk_zone) {

View File

@@ -10,9 +10,6 @@ struct scoutfs_xattr_prefix_tags {
extern const struct xattr_handler *scoutfs_xattr_handlers[];
u32 scoutfs_xattr_name_hash(const char *name, unsigned int name_len);
void scoutfs_xattr_init_key(struct scoutfs_key *key, u64 ino, u32 name_hash, u64 id);
int scoutfs_xattr_get_locked(struct inode *inode, const char *name, void *buffer, size_t size,
struct scoutfs_lock *lck);
int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_len,

1
tests/.gitignore vendored
View File

@@ -12,4 +12,3 @@ src/o_tmpfile_umask
src/o_tmpfile_linkat
src/mmap_stress
src/mmap_validate
src/watch_raw_inode_change

View File

@@ -15,8 +15,7 @@ BIN := src/createmany \
src/o_tmpfile_umask \
src/o_tmpfile_linkat \
src/mmap_stress \
src/mmap_validate \
src/watch_raw_inode_change
src/mmap_validate
DEPS := $(wildcard src/*.d)

View File

@@ -20,6 +20,9 @@ t_filter_fs()
# [ 2687.691366] BUG: KASAN: stack-out-of-bounds in get_reg+0x1bc/0x230
# ...
# [ 2687.706220] ==================================================================
# [ 2687.707284] Disabling lock debugging due to kernel taint
#
# That final lock debugging message may not be included.
#
ignore_harmless_unwind_kasan_stack_oob()
{
@@ -43,6 +46,10 @@ awk '
saved=""
}
( in_soob == 2 && $0 ~ /==================================================================/ ) {
in_soob = 3
soob_nr = NR
}
( in_soob == 3 && NR > soob_nr && $0 !~ /Disabling lock debugging/ ) {
in_soob = 0
}
( !in_soob ) { print $0 }
@@ -54,58 +61,6 @@ awk '
'
}
#
# in el97+, XFS can generate a spurious lockdep circular dependency
# warning about reclaim. Fixed upstream in e.g. v5.7-rc4-129-g6dcde60efd94
#
ignore_harmless_xfs_lockdep_warning()
{
awk '
BEGIN {
in_block = 0
block_nr = 0
buf = ""
}
( !in_block && $0 ~ /======================================================/ ) {
in_block = 1
block_nr = NR
buf = $0 "\n"
next
}
( in_block == 1 && NR == (block_nr + 1) ) {
if (match($0, /WARNING: possible circular locking dependency detected/) != 0) {
in_block = 2
buf = buf $0 "\n"
} else {
in_block = 0
printf "%s", buf
print $0
buf = ""
}
next
}
( in_block == 2 ) {
buf = buf $0 "\n"
if ($0 ~ /<\/TASK>/) {
if (buf ~ /xfs_(nondir_|dir_)?ilock_class/ && buf ~ /fs_reclaim/) {
# known xfs lockdep false positive, discard
} else {
printf "%s", buf
}
in_block = 0
buf = ""
}
next
}
{ print $0 }
END {
if (buf) {
printf "%s", buf
}
}
'
}
#
# Filter out expected messages. Putting messages here implies that
# tests aren't relying on messages to discover failures.. they're
@@ -168,9 +123,6 @@ t_filter_dmesg()
re="$re|hrtimer: interrupt took .*"
re="$re|clocksource: Long readout interval"
# orphan log trees reclaim is handled, not an error
re="$re|scoutfs .* reclaiming orphan log trees"
# fencing tests force unmounts and trigger timeouts
re="$re|scoutfs .* forcing unmount"
re="$re|scoutfs .* reconnect timed out"
@@ -221,10 +173,6 @@ t_filter_dmesg()
# creating block devices may trigger this
re="$re|block device autoloading is deprecated and will be removed."
# lockdep or kasan warnings can cause this
re="$re|Disabling lock debugging due to kernel taint"
egrep -v "($re)" | \
ignore_harmless_unwind_kasan_stack_oob | \
ignore_harmless_xfs_lockdep_warning
ignore_harmless_unwind_kasan_stack_oob
}

View File

@@ -1,54 +0,0 @@
== testing invalid read-xattr-index arguments
bad index position entry argument 'bad', it must be in the form "a.b.ino" where each value can be prefixed by '0' for octal or '0x' for hex
scoutfs: read-xattr-index failed: Invalid argument (22)
bad index position entry argument '1.2', it must be in the form "a.b.ino" where each value can be prefixed by '0' for octal or '0x' for hex
scoutfs: read-xattr-index failed: Invalid argument (22)
initial major index position '256' must be between 0 and 255, inclusive.
scoutfs: read-xattr-index failed: Invalid argument (22)
first index position 1.2.3 must be less than last index position 0.0.0
scoutfs: read-xattr-index failed: Invalid argument (22)
first index position 1.2.0 must be less than last index position 1.1.2
scoutfs: read-xattr-index failed: Invalid argument (22)
first index position 2.2.2 must be less than last index position 2.2.1
scoutfs: read-xattr-index failed: Invalid argument (22)
== testing invalid names
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Numerical result out of range
== testing boundary values
0.0 found
255.max found
== indx xattr must have no value
setfattr: /mnt/test/test/basic-xattr-indx/noval: Invalid argument
setfattr: /mnt/test/test/basic-xattr-indx/noval: Invalid argument
== set indx xattr and verify index entry
found
== setting same indx xattr again is a no-op
found
== removing non-existent indx xattr succeeds
setfattr: /mnt/test/test/basic-xattr-indx/file: No such attribute
still found
== explicit xattr removal cleans up index entry
== file deletion cleans up index entry
found before delete
== multiple indx xattrs on one file cleaned up by deletion
entries before delete: 2
entries after delete: 0
== partial removal leaves other entries
300 found
== multiple files at same index position
files at same position: 2
surviving file found
== cross-mount visibility
found on mount 1
== duplicate position deduplication
entries for same position: 1

View File

@@ -1,3 +0,0 @@
== create orphan log_trees entry via trigger
== verify orphan is reclaimed and merge completes
== verify orphan reclaim was logged

View File

@@ -1,460 +0,0 @@
== missing options should fail ==
punch-offline: must provide offset
Try `punch-offline --help' or `punch-offline --usage' for more information.
punch-offline: must provide length
Try `punch-offline --help' or `punch-offline --usage' for more information.
punch-offline: must provide data_version
Try `punch-offline --help' or `punch-offline --usage' for more information.
== can't hole punch dir or special ==
failed to open '/mnt/test.0/test/punch-offline/dir': Is a directory (21)
scoutfs: punch-offline failed: Is a directory (21)
== punching an empty file does nothing ==
== punch outside of i_size does nothing ==
== can't hole punch online extent ==
0: offset: 0 length: 4096 flags: ..L
extents: 1
punch_offline ioctl failed: Invalid argument (22)
scoutfs: punch-offline failed: Invalid argument (22)
0: offset: 0 length: 4096 flags: ..L
extents: 1
== can't hole punch unwritten extent ==
0: offset: 0 length: 12288 flags: .UL
extents: 1
punch_offline ioctl failed: Invalid argument (22)
scoutfs: punch-offline failed: Invalid argument (22)
0: offset: 0 length: 12288 flags: .UL
extents: 1
== hole punch offline extent ==
0: offset: 0 length: 12288 flags: O.L
extents: 1
0: offset: 0 length: 4096 flags: O..
1: offset: 8192 length: 4096 flags: O.L
extents: 2
== can't hole punch non-aligned bsz offset or len ==
0: offset: 0 length: 12288 flags: O.L
extents: 1
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
punch_offline ioctl failed: Value too large for defined data type (75)
scoutfs: punch-offline failed: Value too large for defined data type (75)
0: offset: 0 length: 12288 flags: O.L
extents: 1
== can't hole punch mismatched data_version ==
0: offset: 0 length: 12288 flags: O.L
extents: 1
punch_offline ioctl failed: Stale file handle (116)
scoutfs: punch-offline failed: Stale file handle (116)
punch_offline ioctl failed: Stale file handle (116)
scoutfs: punch-offline failed: Stale file handle (116)
punch_offline ioctl failed: Stale file handle (116)
scoutfs: punch-offline failed: Stale file handle (116)
0: offset: 0 length: 12288 flags: O.L
extents: 1
== Punch hole crossing multiple extents ==
0: offset: 0 length: 7 flags: O.L
extents: 1
0: offset: 0 length: 1 flags: O..
1: offset: 2 length: 1 flags: O..
2: offset: 4 length: 1 flags: O..
3: offset: 6 length: 1 flags: O.L
extents: 4
0: offset: 0 length: 1 flags: O..
1: offset: 6 length: 1 flags: O.L
extents: 2
== punch hole starting at a hole ==
0: offset: 0 length: 7 flags: O.L
extents: 1
0: offset: 0 length: 1 flags: O..
1: offset: 2 length: 1 flags: O..
2: offset: 4 length: 1 flags: O..
3: offset: 6 length: 1 flags: O.L
extents: 4
0: offset: 0 length: 1 flags: O..
1: offset: 6 length: 1 flags: O.L
extents: 2
== large punch ==
0: offset: 0 length: 1572864 flags: O.L
extents: 1
0: offset: 0 length: 134123 flags: O..
1: offset: 202466 length: 264807 flags: O..
2: offset: 535616 length: 199007 flags: O..
3: offset: 802966 length: 769898 flags: O.L
extents: 4
== overlapping punches with lots of extents ==
0: offset: 0 length: 4194304 flags: O.L
extents: 1
extents: 512
extents: 505
extents: 378
extents: 252
0: offset: 0 length: 4096 flags: O..
1: offset: 8192 length: 4096 flags: O..
2: offset: 32768 length: 4096 flags: O..
3: offset: 40960 length: 4096 flags: O..
4: offset: 65536 length: 4096 flags: O..
5: offset: 73728 length: 4096 flags: O..
6: offset: 98304 length: 4096 flags: O..
7: offset: 106496 length: 4096 flags: O..
8: offset: 196608 length: 4096 flags: O..
9: offset: 204800 length: 4096 flags: O..
10: offset: 229376 length: 4096 flags: O..
11: offset: 237568 length: 4096 flags: O..
12: offset: 262144 length: 4096 flags: O..
13: offset: 270336 length: 4096 flags: O..
14: offset: 294912 length: 4096 flags: O..
15: offset: 303104 length: 4096 flags: O..
16: offset: 327680 length: 4096 flags: O..
17: offset: 335872 length: 4096 flags: O..
18: offset: 360448 length: 4096 flags: O..
19: offset: 368640 length: 4096 flags: O..
20: offset: 393216 length: 4096 flags: O..
21: offset: 401408 length: 4096 flags: O..
22: offset: 425984 length: 4096 flags: O..
23: offset: 434176 length: 4096 flags: O..
24: offset: 458752 length: 4096 flags: O..
25: offset: 466944 length: 4096 flags: O..
26: offset: 491520 length: 4096 flags: O..
27: offset: 499712 length: 4096 flags: O..
28: offset: 720896 length: 4096 flags: O..
29: offset: 729088 length: 4096 flags: O..
30: offset: 753664 length: 4096 flags: O..
31: offset: 761856 length: 4096 flags: O..
32: offset: 786432 length: 4096 flags: O..
33: offset: 794624 length: 4096 flags: O..
34: offset: 819200 length: 4096 flags: O..
35: offset: 827392 length: 4096 flags: O..
36: offset: 851968 length: 4096 flags: O..
37: offset: 860160 length: 4096 flags: O..
38: offset: 884736 length: 4096 flags: O..
39: offset: 892928 length: 4096 flags: O..
40: offset: 917504 length: 4096 flags: O..
41: offset: 925696 length: 4096 flags: O..
42: offset: 950272 length: 4096 flags: O..
43: offset: 958464 length: 4096 flags: O..
44: offset: 983040 length: 4096 flags: O..
45: offset: 991232 length: 4096 flags: O..
46: offset: 1015808 length: 4096 flags: O..
47: offset: 1024000 length: 4096 flags: O..
48: offset: 1048576 length: 4096 flags: O..
49: offset: 1056768 length: 4096 flags: O..
50: offset: 1081344 length: 4096 flags: O..
51: offset: 1089536 length: 4096 flags: O..
52: offset: 1114112 length: 4096 flags: O..
53: offset: 1122304 length: 4096 flags: O..
54: offset: 1146880 length: 4096 flags: O..
55: offset: 1155072 length: 4096 flags: O..
56: offset: 1179648 length: 4096 flags: O..
57: offset: 1187840 length: 4096 flags: O..
58: offset: 1212416 length: 4096 flags: O..
59: offset: 1220608 length: 4096 flags: O..
60: offset: 1245184 length: 4096 flags: O..
61: offset: 1253376 length: 4096 flags: O..
62: offset: 1277952 length: 4096 flags: O..
63: offset: 1286144 length: 4096 flags: O..
64: offset: 1310720 length: 4096 flags: O..
65: offset: 1318912 length: 4096 flags: O..
66: offset: 1343488 length: 4096 flags: O..
67: offset: 1351680 length: 4096 flags: O..
68: offset: 1376256 length: 4096 flags: O..
69: offset: 1384448 length: 4096 flags: O..
70: offset: 1409024 length: 4096 flags: O..
71: offset: 1417216 length: 4096 flags: O..
72: offset: 1441792 length: 4096 flags: O..
73: offset: 1449984 length: 4096 flags: O..
74: offset: 1474560 length: 4096 flags: O..
75: offset: 1482752 length: 4096 flags: O..
76: offset: 1507328 length: 4096 flags: O..
77: offset: 1515520 length: 4096 flags: O..
78: offset: 1540096 length: 4096 flags: O..
79: offset: 1548288 length: 4096 flags: O..
80: offset: 1572864 length: 4096 flags: O..
81: offset: 1581056 length: 4096 flags: O..
82: offset: 1605632 length: 4096 flags: O..
83: offset: 1613824 length: 4096 flags: O..
84: offset: 1638400 length: 4096 flags: O..
85: offset: 1646592 length: 4096 flags: O..
86: offset: 1671168 length: 4096 flags: O..
87: offset: 1679360 length: 4096 flags: O..
88: offset: 1703936 length: 4096 flags: O..
89: offset: 1712128 length: 4096 flags: O..
90: offset: 1736704 length: 4096 flags: O..
91: offset: 1744896 length: 4096 flags: O..
92: offset: 1769472 length: 4096 flags: O..
93: offset: 1777664 length: 4096 flags: O..
94: offset: 1802240 length: 4096 flags: O..
95: offset: 1810432 length: 4096 flags: O..
96: offset: 1835008 length: 4096 flags: O..
97: offset: 1843200 length: 4096 flags: O..
98: offset: 1867776 length: 4096 flags: O..
99: offset: 1875968 length: 4096 flags: O..
100: offset: 1900544 length: 4096 flags: O..
101: offset: 1908736 length: 4096 flags: O..
102: offset: 1933312 length: 4096 flags: O..
103: offset: 1941504 length: 4096 flags: O..
104: offset: 1966080 length: 4096 flags: O..
105: offset: 1974272 length: 4096 flags: O..
106: offset: 1998848 length: 4096 flags: O..
107: offset: 2007040 length: 4096 flags: O..
108: offset: 2031616 length: 4096 flags: O..
109: offset: 2039808 length: 4096 flags: O..
110: offset: 2064384 length: 4096 flags: O..
111: offset: 2072576 length: 4096 flags: O..
112: offset: 2097152 length: 4096 flags: O..
113: offset: 2105344 length: 4096 flags: O..
114: offset: 2129920 length: 4096 flags: O..
115: offset: 2138112 length: 4096 flags: O..
116: offset: 2162688 length: 4096 flags: O..
117: offset: 2170880 length: 4096 flags: O..
118: offset: 2195456 length: 4096 flags: O..
119: offset: 2203648 length: 4096 flags: O..
120: offset: 2228224 length: 4096 flags: O..
121: offset: 2236416 length: 4096 flags: O..
122: offset: 2260992 length: 4096 flags: O..
123: offset: 2269184 length: 4096 flags: O..
124: offset: 2293760 length: 4096 flags: O..
125: offset: 2301952 length: 4096 flags: O..
126: offset: 2326528 length: 4096 flags: O..
127: offset: 2334720 length: 4096 flags: O..
128: offset: 2359296 length: 4096 flags: O..
129: offset: 2367488 length: 4096 flags: O..
130: offset: 2392064 length: 4096 flags: O..
131: offset: 2400256 length: 4096 flags: O..
132: offset: 2424832 length: 4096 flags: O..
133: offset: 2433024 length: 4096 flags: O..
134: offset: 2457600 length: 4096 flags: O..
135: offset: 2465792 length: 4096 flags: O..
136: offset: 2490368 length: 4096 flags: O..
137: offset: 2498560 length: 4096 flags: O..
138: offset: 2523136 length: 4096 flags: O..
139: offset: 2531328 length: 4096 flags: O..
140: offset: 2555904 length: 4096 flags: O..
141: offset: 2564096 length: 4096 flags: O..
142: offset: 2588672 length: 4096 flags: O..
143: offset: 2596864 length: 4096 flags: O..
144: offset: 2621440 length: 4096 flags: O..
145: offset: 2629632 length: 4096 flags: O..
146: offset: 2654208 length: 4096 flags: O..
147: offset: 2662400 length: 4096 flags: O..
148: offset: 2686976 length: 4096 flags: O..
149: offset: 2695168 length: 4096 flags: O..
150: offset: 2719744 length: 4096 flags: O..
151: offset: 2727936 length: 4096 flags: O..
152: offset: 2752512 length: 4096 flags: O..
153: offset: 2760704 length: 4096 flags: O..
154: offset: 2785280 length: 4096 flags: O..
155: offset: 2793472 length: 4096 flags: O..
156: offset: 2818048 length: 4096 flags: O..
157: offset: 2826240 length: 4096 flags: O..
158: offset: 2850816 length: 4096 flags: O..
159: offset: 2859008 length: 4096 flags: O..
160: offset: 2883584 length: 4096 flags: O..
161: offset: 2891776 length: 4096 flags: O..
162: offset: 2916352 length: 4096 flags: O..
163: offset: 2924544 length: 4096 flags: O..
164: offset: 2949120 length: 4096 flags: O..
165: offset: 2957312 length: 4096 flags: O..
166: offset: 2981888 length: 4096 flags: O..
167: offset: 2990080 length: 4096 flags: O..
168: offset: 3014656 length: 4096 flags: O..
169: offset: 3022848 length: 4096 flags: O..
170: offset: 3047424 length: 4096 flags: O..
171: offset: 3055616 length: 4096 flags: O..
172: offset: 3080192 length: 4096 flags: O..
173: offset: 3088384 length: 4096 flags: O..
174: offset: 3112960 length: 4096 flags: O..
175: offset: 3121152 length: 4096 flags: O..
176: offset: 3145728 length: 4096 flags: O..
177: offset: 3153920 length: 4096 flags: O..
178: offset: 3178496 length: 4096 flags: O..
179: offset: 3186688 length: 4096 flags: O..
180: offset: 3211264 length: 4096 flags: O..
181: offset: 3219456 length: 4096 flags: O..
182: offset: 3244032 length: 4096 flags: O..
183: offset: 3252224 length: 4096 flags: O..
184: offset: 3276800 length: 4096 flags: O..
185: offset: 3284992 length: 4096 flags: O..
186: offset: 3309568 length: 4096 flags: O..
187: offset: 3317760 length: 4096 flags: O..
188: offset: 3342336 length: 4096 flags: O..
189: offset: 3350528 length: 4096 flags: O..
190: offset: 3375104 length: 4096 flags: O..
191: offset: 3383296 length: 4096 flags: O..
192: offset: 3407872 length: 4096 flags: O..
193: offset: 3416064 length: 4096 flags: O..
194: offset: 3440640 length: 4096 flags: O..
195: offset: 3448832 length: 4096 flags: O..
196: offset: 3473408 length: 4096 flags: O..
197: offset: 3481600 length: 4096 flags: O..
198: offset: 3506176 length: 4096 flags: O..
199: offset: 3514368 length: 4096 flags: O..
200: offset: 3538944 length: 4096 flags: O..
201: offset: 3547136 length: 4096 flags: O..
202: offset: 3571712 length: 4096 flags: O..
203: offset: 3579904 length: 4096 flags: O..
204: offset: 3604480 length: 4096 flags: O..
205: offset: 3612672 length: 4096 flags: O..
206: offset: 3637248 length: 4096 flags: O..
207: offset: 3645440 length: 4096 flags: O..
208: offset: 3670016 length: 4096 flags: O..
209: offset: 3678208 length: 4096 flags: O..
210: offset: 3702784 length: 4096 flags: O..
211: offset: 3710976 length: 4096 flags: O..
212: offset: 3735552 length: 4096 flags: O..
213: offset: 3743744 length: 4096 flags: O..
214: offset: 3768320 length: 4096 flags: O..
215: offset: 3776512 length: 4096 flags: O..
216: offset: 3801088 length: 4096 flags: O..
217: offset: 3809280 length: 4096 flags: O..
218: offset: 3833856 length: 4096 flags: O..
219: offset: 3842048 length: 4096 flags: O..
220: offset: 3866624 length: 4096 flags: O..
221: offset: 3874816 length: 4096 flags: O..
222: offset: 3899392 length: 4096 flags: O..
223: offset: 3907584 length: 4096 flags: O..
224: offset: 3932160 length: 4096 flags: O..
225: offset: 3940352 length: 4096 flags: O..
226: offset: 3964928 length: 4096 flags: O..
227: offset: 3973120 length: 4096 flags: O..
228: offset: 3997696 length: 4096 flags: O..
229: offset: 4005888 length: 4096 flags: O..
230: offset: 4030464 length: 4096 flags: O..
231: offset: 4038656 length: 4096 flags: O..
232: offset: 4063232 length: 4096 flags: O..
233: offset: 4071424 length: 4096 flags: O..
234: offset: 4096000 length: 4096 flags: O..
235: offset: 4104192 length: 4096 flags: O..
236: offset: 4128768 length: 4096 flags: O..
237: offset: 4136960 length: 4096 flags: O..
238: offset: 4161536 length: 4096 flags: O..
239: offset: 4169728 length: 4096 flags: O.L
extents: 240
0: offset: 0 length: 1 flags: O..
1: offset: 8 length: 1 flags: O..
2: offset: 16 length: 1 flags: O..
3: offset: 24 length: 1 flags: O..
4: offset: 48 length: 1 flags: O..
5: offset: 56 length: 1 flags: O..
6: offset: 64 length: 1 flags: O..
7: offset: 72 length: 1 flags: O..
8: offset: 80 length: 1 flags: O..
9: offset: 88 length: 1 flags: O..
10: offset: 96 length: 1 flags: O..
11: offset: 104 length: 1 flags: O..
12: offset: 112 length: 1 flags: O..
13: offset: 120 length: 1 flags: O..
14: offset: 176 length: 1 flags: O..
15: offset: 184 length: 1 flags: O..
16: offset: 192 length: 1 flags: O..
17: offset: 200 length: 1 flags: O..
18: offset: 208 length: 1 flags: O..
19: offset: 216 length: 1 flags: O..
20: offset: 224 length: 1 flags: O..
21: offset: 232 length: 1 flags: O..
22: offset: 240 length: 1 flags: O..
23: offset: 248 length: 1 flags: O..
24: offset: 256 length: 1 flags: O..
25: offset: 264 length: 1 flags: O..
26: offset: 272 length: 1 flags: O..
27: offset: 280 length: 1 flags: O..
28: offset: 288 length: 1 flags: O..
29: offset: 296 length: 1 flags: O..
30: offset: 304 length: 1 flags: O..
31: offset: 312 length: 1 flags: O..
32: offset: 320 length: 1 flags: O..
33: offset: 328 length: 1 flags: O..
34: offset: 336 length: 1 flags: O..
35: offset: 344 length: 1 flags: O..
36: offset: 352 length: 1 flags: O..
37: offset: 360 length: 1 flags: O..
38: offset: 368 length: 1 flags: O..
39: offset: 376 length: 1 flags: O..
40: offset: 384 length: 1 flags: O..
41: offset: 392 length: 1 flags: O..
42: offset: 400 length: 1 flags: O..
43: offset: 408 length: 1 flags: O..
44: offset: 416 length: 1 flags: O..
45: offset: 424 length: 1 flags: O..
46: offset: 432 length: 1 flags: O..
47: offset: 440 length: 1 flags: O..
48: offset: 448 length: 1 flags: O..
49: offset: 456 length: 1 flags: O..
50: offset: 464 length: 1 flags: O..
51: offset: 472 length: 1 flags: O..
52: offset: 480 length: 1 flags: O..
53: offset: 488 length: 1 flags: O..
54: offset: 496 length: 1 flags: O..
55: offset: 504 length: 1 flags: O..
56: offset: 512 length: 1 flags: O..
57: offset: 520 length: 1 flags: O..
58: offset: 528 length: 1 flags: O..
59: offset: 536 length: 1 flags: O..
60: offset: 544 length: 1 flags: O..
61: offset: 552 length: 1 flags: O..
62: offset: 560 length: 1 flags: O..
63: offset: 568 length: 1 flags: O..
64: offset: 576 length: 1 flags: O..
65: offset: 584 length: 1 flags: O..
66: offset: 592 length: 1 flags: O..
67: offset: 600 length: 1 flags: O..
68: offset: 608 length: 1 flags: O..
69: offset: 616 length: 1 flags: O..
70: offset: 624 length: 1 flags: O..
71: offset: 632 length: 1 flags: O..
72: offset: 640 length: 1 flags: O..
73: offset: 648 length: 1 flags: O..
74: offset: 656 length: 1 flags: O..
75: offset: 664 length: 1 flags: O..
76: offset: 672 length: 1 flags: O..
77: offset: 680 length: 1 flags: O..
78: offset: 688 length: 1 flags: O..
79: offset: 696 length: 1 flags: O..
80: offset: 704 length: 1 flags: O..
81: offset: 712 length: 1 flags: O..
82: offset: 720 length: 1 flags: O..
83: offset: 728 length: 1 flags: O..
84: offset: 736 length: 1 flags: O..
85: offset: 744 length: 1 flags: O..
86: offset: 752 length: 1 flags: O..
87: offset: 760 length: 1 flags: O..
88: offset: 768 length: 1 flags: O..
89: offset: 776 length: 1 flags: O..
90: offset: 784 length: 1 flags: O..
91: offset: 792 length: 1 flags: O..
92: offset: 800 length: 1 flags: O..
93: offset: 808 length: 1 flags: O..
94: offset: 816 length: 1 flags: O..
95: offset: 824 length: 1 flags: O..
96: offset: 832 length: 1 flags: O..
97: offset: 840 length: 1 flags: O..
98: offset: 848 length: 1 flags: O..
99: offset: 856 length: 1 flags: O..
100: offset: 864 length: 1 flags: O..
101: offset: 872 length: 1 flags: O..
102: offset: 880 length: 1 flags: O..
103: offset: 888 length: 1 flags: O..
104: offset: 896 length: 1 flags: O..
105: offset: 904 length: 1 flags: O..
106: offset: 912 length: 1 flags: O..
107: offset: 920 length: 1 flags: O..
108: offset: 928 length: 1 flags: O..
109: offset: 936 length: 1 flags: O..
110: offset: 944 length: 1 flags: O..
111: offset: 952 length: 1 flags: O..
112: offset: 960 length: 1 flags: O..
113: offset: 968 length: 1 flags: O..
114: offset: 976 length: 1 flags: O..
115: offset: 984 length: 1 flags: O..
116: offset: 992 length: 1 flags: O..
117: offset: 1000 length: 1 flags: O..
118: offset: 1008 length: 1 flags: O..
119: offset: 1016 length: 1 flags: O.L
extents: 120
extents: 0

View File

@@ -1,3 +0,0 @@
== setup
expected 4681
== cleanup

View File

@@ -694,8 +694,8 @@ for t in $tests; do
if [ "$sts" == "$T_PASS_STATUS" ]; then
dmesg | t_filter_dmesg > "$T_TMPDIR/dmesg.after"
diff --old-line-format="" --unchanged-line-format="" \
"$T_TMPDIR/dmesg.before" "$T_TMPDIR/dmesg.after" | \
grep -v '^$' > "$T_TMPDIR/dmesg.new"
"$T_TMPDIR/dmesg.before" "$T_TMPDIR/dmesg.after" > \
"$T_TMPDIR/dmesg.new"
if [ -s "$T_TMPDIR/dmesg.new" ]; then
message="unexpected messages in dmesg"

View File

@@ -11,7 +11,6 @@ simple-readdir.sh
get-referring-entries.sh
fallocate.sh
basic-truncate.sh
punch-offline.sh
data-prealloc.sh
setattr_more.sh
offline-extent-waiting.sh
@@ -26,9 +25,7 @@ srch-basic-functionality.sh
simple-xattr-unit.sh
retention-basic.sh
totl-xattr-tag.sh
basic-xattr-indx.sh
quota.sh
totl-merge-read.sh
lock-refleak.sh
lock-shrink-consistency.sh
lock-shrink-read-race.sh
@@ -52,7 +49,6 @@ setup-error-teardown.sh
resize-devices.sh
change-devices.sh
fence-and-reclaim.sh
orphan-log-trees.sh
quorum-heartbeat-timeout.sh
orphan-inodes.sh
mount-unmount-race.sh

View File

@@ -1,664 +0,0 @@
/*
* Copyright (C) 2026 Versity Software, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#ifndef _GNU_SOURCE
#define _GNU_SOURCE
#endif
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <sys/ioctl.h>
#include <fcntl.h>
#include <errno.h>
#include <time.h>
#include <linux/types.h>
#include <assert.h>
#include <stdbool.h>
#include "../../utils/src/util.h"
#include "ioctl.h"
#include "format.h"
/*
* This is a quick example of using the raw reading ioctls to get info
* on inodes as they change. We maintain an array of meta_seq items for
* inodes that we've seen. If we read the current meta_seq items and
* see differences then we get inode info and update our array with what
* we find.
*
* This only maintains one array and sorts it back and forth as we walk
* the meta_seq items and then search by inode number. This will
* eventually use far too much cpu as the number of inodes increases.
*/
#define MSF "%llu.%llu"
#define MSA(ms) (ms)->meta_seq, (ms)->ino
#define NERRF "nerr %d (\"%s\")"
#define NERRA(nerr) nerr, strerror(-nerr)
#define prerror(fmt, args...) \
fprintf(stderr, "error: "fmt"\n", ##args)
#define prdebug(fmt, args...) \
do { \
if (opts.debug) \
printf(fmt"\n", ##args); \
} while (0)
static struct opts {
bool debug;
char *path;
char *names;
size_t names_size;
size_t names_count;
} opts;
struct stats {
__u64 start;
__u64 last;
struct per_call {
__u64 begin;
__u64 calls;
__u64 time;
__u64 inos;
} rms, rii;
__u64 inodes;
__u64 add;
__u64 remove;
__u64 update;
unsigned lines;
} stats;
struct meta_seq_array {
size_t nr;
size_t alloc;
struct scoutfs_ioctl_meta_seq *ms;
};
#define INO_BATCH 1000
/* *2 for gratuitous allowance for struct expansion */
#define RESULTS_SIZE (INO_BATCH * 2 * (sizeof(struct scoutfs_ioctl_raw_read_result) + \
sizeof(__u64) + \
180 /* ~= sizeof(struct scoutfs_inode) */ + \
sizeof(struct scoutfs_ioctl_inode_attr_x)))
#define NSEC_PER_SEC 1000000000
static __u64 get_ns(void)
{
struct timespec tp;
int ret;
ret = clock_gettime(CLOCK_MONOTONIC, &tp);
if (ret != 0) {
ret = -errno;
prerror("clock_gettime() error: "NERRF, NERRA(ret));
exit(2);
}
return ((__u64)tp.tv_sec * NSEC_PER_SEC) + (__u64)tp.tv_nsec;
}
static void begin_call(struct per_call *pc)
{
pc->begin = get_ns();
}
static void end_call(struct per_call *pc)
{
pc->calls++;
pc->time += get_ns() - pc->begin;
}
static int expand_array(struct meta_seq_array *arr, size_t additional)
{
#define ALLOC_BATCH (1024 * 1024 / (sizeof(struct scoutfs_ioctl_meta_seq)))
struct scoutfs_ioctl_meta_seq *ms;
size_t expand;
if (arr->nr + additional <= arr->alloc)
return 0;
expand = arr->alloc + ALLOC_BATCH;
ms = reallocarray(arr->ms, expand, sizeof(arr->ms[0]));
if (!ms) {
prerror("allocating ms array with %zu elements failed", expand);
return -ENOMEM;
}
arr->alloc = expand;
arr->ms = ms;
return 0;
}
static void inc_ms(struct scoutfs_ioctl_meta_seq *ms)
{
if (++ms->ino == 0)
ms->meta_seq++;
}
static void set_ms(struct scoutfs_ioctl_meta_seq *ms, __u64 meta_seq, __u64 ino)
{
ms->meta_seq = meta_seq;
ms->ino = ino;
}
static int compar_ms_ino(const void *A, const void *B)
{
const struct scoutfs_ioctl_meta_seq *a = A;
const struct scoutfs_ioctl_meta_seq *b = B;
return a->ino < b->ino ? -1 : a->ino > b->ino ? 1 : 0;
}
static int compar_ms_meta_seq(const void *A, const void *B)
{
const struct scoutfs_ioctl_meta_seq *a = A;
const struct scoutfs_ioctl_meta_seq *b = B;
return a->meta_seq < b->meta_seq ? -1 : a->meta_seq > b->meta_seq ? 1 :
compar_ms_ino(A, B);
}
static int compar_u64(const void *A, const void *B)
{
const __u64 *a = A;
const __u64 *b = B;
return *a < *b ? -1 : *a > *b ? 1 : 0;
}
struct bsearch_ind_key {
int (*compar)(const void *a, const void *b);
void *key;
size_t size;
void **index;
};
static int bsearch_ind_compar(const void *a, const void *b)
{
const struct bsearch_ind_key *bik = (const void *)((unsigned long)a ^ 1);
int cmp;
/* this key hack only works if compar is always called where a is key and b is &base[..] */
assert((unsigned long)a & 1);
assert(!((unsigned long)b & 1));
cmp = bik->compar(bik->key, b);
if (cmp > 0)
*(bik->index) = (void *)b + bik->size;
else
*(bik->index) = (void *)b;
return cmp;
}
static size_t bsearch_ind(const void *key, const void *base, size_t nmemb, size_t size,
int (*compar)(const void *a, const void *b))
{
void *index = (void *)base;
struct bsearch_ind_key bik = {
.compar = compar,
.key = (void *)key,
.size = size,
.index = &index,
};
bsearch((void *)(((unsigned long)&bik) | 1), base, nmemb, size, bsearch_ind_compar);
return (index - base) / size;
}
/*
* Generate a sorted list of inode numbers for the meta_seq items that
* differ between the results from raw_read_meta_seq and the items we
* have saved in our array.
*/
static int differing_inos(__u64 *inos, struct meta_seq_array *arr,
struct scoutfs_ioctl_meta_seq *start,
struct scoutfs_ioctl_meta_seq *last,
struct scoutfs_ioctl_meta_seq *ms, size_t nr)
{
size_t arr_last;
size_t a;
size_t m;
int nr_inos;
int cmp;
int i;
int n;
/* find where we're going to stop in arr */
arr_last = bsearch_ind(last, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
if (arr_last < arr->nr && compar_ms_meta_seq(&arr->ms[arr_last], last) == 0)
arr_last++;
a = bsearch_ind(start, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
for (m = 0, nr_inos = 0; (a < arr_last || m < nr) && nr_inos < INO_BATCH; ) {
prdebug("diffing: m %zu nr %zu | a %zu arr_last %zu | nr_inos %d",
m, nr, a, arr_last, nr_inos);
if (a < arr_last)
prdebug(" arr->ms[%zu] = "MSF, a, MSA(&arr->ms[a]));
if (m < nr)
prdebug(" ms[%zu] = "MSF, m, MSA(&ms[m]));
/* setup comparison to copy lesser or only */
if (a < arr_last && m < nr)
cmp = compar_ms_meta_seq(&arr->ms[a], &ms[m]);
else if (a < arr_last)
cmp = -1;
else
cmp = 1;
prdebug(" cmp %d", cmp);
if (cmp == 0) {
/* ignore both when they match */
a++;
m++;
} else if (cmp < 0) {
inos[nr_inos++] = arr->ms[a++].ino;
} else { /* cmp > 0 */
inos[nr_inos++] = ms[m++].ino;
}
}
/* if we didn't consume all the read meta_seq then we might need to clamp last */
if (m < nr && compar_ms_meta_seq(&ms[m], last) <= 0) {
*last = ms[m];
last->ino--; /* must be non-zero, can't wrap */
}
/* sort and remove duplicate inode numbers */
if (nr_inos > 0) {
qsort(inos, nr_inos, sizeof(inos[0]), compar_u64);
for (i = 1, n = 1; i < nr_inos; i++) {
if (inos[i] != inos[n - 1])
inos[n++] = inos[i];
}
nr_inos = n;
}
return nr_inos;
}
/*
* We're not really validating the result stream. We assume that the offset currently
* points at an inode. We fill the caller's ms with its info then iterate through
* all its results until the next ino.
*/
static ssize_t read_inode_results(void *buf, size_t off, size_t size,
struct scoutfs_ioctl_meta_seq *found)
{
struct scoutfs_ioctl_raw_read_result res;
size_t len;
__le64 ms;
found->ino = 0;
while (off < size) {
memcpy(&res, buf + off, sizeof(res));
prdebug("res %u %u", res.type, res.size);
if (res.type == SCOUTFS_IOC_RAW_READ_RESULT_INODE && found->ino != 0)
break;
off += sizeof(res);
switch(res.type) {
case SCOUTFS_IOC_RAW_READ_RESULT_INODE:
memcpy(&found->ino, buf + off, sizeof(__u64));
memcpy(&ms, buf + off + sizeof(__u64) +
offsetof(struct scoutfs_inode, meta_seq), sizeof(__le64));
found->meta_seq = le64_to_cpu(ms);
prdebug("res ino %llu ms %llu", found->ino, found->meta_seq);
break;
case SCOUTFS_IOC_RAW_READ_RESULT_XATTR:
len = strlen((char *)buf + off) + 1;
prdebug("res xattr '%s' len %d: '%.*s'",
(char *)buf + off,
(int)(res.size - len),
(int)(res.size - len),
(char *)buf + off + len);
break;
};
off += res.size;
}
return off;
}
/*
* inos[] contains the inode numbers that we're interested in. Get
* their info and update our array with what we find.
*/
static int read_inode_info(int fd, void *buf, struct meta_seq_array *arr, __u64 *inos, int nr_inos)
{
struct scoutfs_ioctl_raw_read_inode_info rii;
struct scoutfs_ioctl_meta_seq found;
struct scoutfs_ioctl_meta_seq ms;
ssize_t off;
size_t size;
size_t ind;
size_t added;
int i;
int ret;
rii = (struct scoutfs_ioctl_raw_read_inode_info) {
.inos_ptr = (unsigned long)inos,
.inos_count = nr_inos,
.names_ptr = (unsigned long)opts.names,
.names_count = opts.names_count,
.results_ptr = (unsigned long)buf,
.results_size = RESULTS_SIZE,
};
begin_call(&stats.rii);
ret = ioctl(fd, SCOUTFS_IOC_RAW_READ_INODE_INFO, &rii);
if (ret < 0) {
ret = -errno;
prerror("READ_INODE_INFO ioctl failed: "NERRF, NERRA(ret));
goto out;
}
end_call(&stats.rii);
prdebug("gii ret %d", ret);
off = 0;
size = ret;
set_ms(&found, 0, 0);
added = 0;
i = 0;
/* sort by ino so we can search by ino for updates */
qsort(arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_ino);
while (i < nr_inos) {
/* find next ino */
if (!found.ino && off < size) {
off = read_inode_results(buf, off, size, &found);
if (off < 0) {
ret = off;
goto out;
}
stats.rii.inos++;
}
if (i < nr_inos && (!found.ino || inos[i] < found.ino)) {
/* delete any record of inodes we didn't find */
set_ms(&ms, UINT64_MAX, inos[i]);
i++;
} else if (found.ino) {
/* update/add arr to match the found ino */
ms = found;
if (i < nr_inos && inos[i] == found.ino)
i++;
set_ms(&found, 0, 0);
}
/* find existing record */
ind = bsearch_ind(&ms, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_ino);
if (ind < arr->nr && arr->ms[ind].ino == ms.ino) {
/* update existing ino, can be marking for deletion */
prdebug("updating arr [%zu] ino %llu ms %llu -> %llu",
ind, ms.ino, arr->ms[ind].meta_seq, ms.meta_seq);
arr->ms[ind].meta_seq = ms.meta_seq;
if (ms.meta_seq == UINT64_MAX)
stats.remove++;
else
stats.update++;
} else if (ms.meta_seq != UINT64_MAX) {
/* append new found, maintaining existing sorting */
arr->ms[arr->nr + added] = ms;
prdebug("adding arr [%zu] ino %llu ms %llu",
arr->nr + added, ms.ino, ms.meta_seq);
added++;
stats.add++;
}
}
/* sort by seq again for next meta seq read */
arr->nr += added;
qsort(arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
/* and trim off any deletions */
while (arr->nr > 0 && arr->ms[arr->nr - 1].meta_seq == UINT64_MAX)
arr->nr--;
ret = 0;
out:
return ret;
}
static double secs(u64 a_ns, u64 b_ns)
{
return (double)(a_ns - b_ns) / NSEC_PER_SEC;
}
static double nr_per_sec(u64 nr, __u64 nsec)
{
if (nsec == 0)
return 0;
return (double)nr / secs(nsec, 0);
}
static void print_stats(void)
{
u64 now = get_ns();
if (secs(now, stats.last) < 1.0)
return;
if ((stats.lines++ % 16) == 0) {
printf("%6s | %-29s | %-23s | %-23s\n",
"", "inodes", "meta_seq", "inode_info");
printf("%6s | %8s %6s %6s %6s | %7s %7s %7s | %7s %7s %7s\n",
"now",
"total", "add", "remove", "update",
"calls", "inos", "inos/s",
"calls", "inos", "inos/s");
}
printf("%6.3lf | %8llu %6llu %6llu %6llu | %7llu %7llu %7.0lf | %7llu %7llu %7.0lf\n",
secs(now, stats.start),
stats.inodes, stats.add, stats.remove, stats.update,
stats.rms.calls, stats.rms.inos, nr_per_sec(stats.rms.inos, stats.rms.time),
stats.rii.calls, stats.rii.inos, nr_per_sec(stats.rms.inos, stats.rii.time));
stats.last = now;
{
struct stats save = stats;
stats = (struct stats) {
.start = save.start,
.last = save.last,
.lines = save.lines,
};
}
}
static void add_xattr(char *name)
{
size_t len_null;
char *names;
int ret;
len_null = strlen(name) + 1;
names = realloc(opts.names, opts.names_size + len_null);
if (!names) {
ret = -errno;
prerror("allocation of xattr names buffer failed: "NERRF, NERRA(ret));
exit(3);
}
memcpy(names + opts.names_size, name, len_null);
opts.names = names;
opts.names_size += len_null;
opts.names_count++;
}
static bool parse_opts(int argc, char **argv)
{
bool usage = false;
int c;
opts = (struct opts) {
.debug = false,
};
while ((c = getopt(argc, argv, "dp:x:")) != -1) {
switch(c) {
case 'd':
opts.debug = true;
break;
case 'p':
opts.path = strdup(optarg);
break;
case 'x':
add_xattr(optarg);
break;
case '?':
printf("Unknown option '%c'\n", optopt);
usage = true;
}
}
if (!usage) {
usage = true;
if (!opts.path)
printf("need -p path option\n");
else
usage = false;
}
if (usage) {
printf("\nusage:\n"
" -d | enable verbose debugging output\n"
" -p PATH | path to file system to watch\n"
" -x NAME | try to read named xattr with inodes, can be many\n"
);
return false;
}
return true;
}
int main(int argc, char **argv)
{
struct scoutfs_ioctl_raw_read_meta_seq rms = {0,};
struct scoutfs_ioctl_meta_seq *ms;
struct meta_seq_array arr = {0,};
__u64 *inos = NULL;
void *buf = NULL;
int fd = -1;
int nr_inos;
int nr;
int i;
int ret;
if (!parse_opts(argc, argv))
exit(1);
inos = calloc(INO_BATCH, sizeof(inos[0]));
buf = malloc(RESULTS_SIZE);
if (!inos || !buf) {
ret = -ENOMEM;
goto out;
}
rms.results_ptr = (unsigned long)buf;
rms.results_size = min(RESULTS_SIZE, INO_BATCH * sizeof(struct scoutfs_ioctl_meta_seq));
fd = open(opts.path, O_RDONLY);
if (fd == -1) {
perror("error");
exit(1);
}
stats.start = get_ns();
for (;;) {
set_ms(&rms.start, 0, 0);
set_ms(&rms.end, UINT64_MAX, UINT64_MAX);
do {
begin_call(&stats.rms);
ret = ioctl(fd, SCOUTFS_IOC_RAW_READ_META_SEQ, &rms);
if (ret < 0) {
ret = -errno;
prerror("READ_META_SEQ ioctl failed, "
"start "MSF" end "MSF", "NERRF,
MSA(&rms.start), MSA(&rms.end), NERRA(ret));
goto out;
}
end_call(&stats.rms);
stats.rms.inos += ret;
prdebug("RMS last "MSF" ret %d:", MSA(&rms.last), ret);
nr = ret;
ms = buf;
if (opts.debug && nr > 0) {
for (i = 0; i < nr; i++)
prdebug(" [%u] "MSF"", i, MSA(&ms[i]));
}
nr_inos = differing_inos(inos, &arr, &rms.start, &rms.last, ms, nr);
if (nr_inos > 0) {
prdebug("diff inos %d:", nr_inos);
for (i = 0; i < nr_inos; i++)
prdebug(" [%u] %llu", i, inos[i]);
ret = expand_array(&arr, nr_inos) ?:
read_inode_info(fd, buf, &arr, inos, nr_inos);
if (ret < 0)
goto out;
}
stats.inodes = arr.nr;
print_stats();
rms.start = rms.last;
inc_ms(&rms.start);
} while (rms.last.meta_seq != UINT64_MAX || rms.last.ino != UINT64_MAX);
sleep(1);
}
ret = 0;
out:
if (fd >= 0)
close(fd);
free(inos);
free(buf);
free(arr.ms);
free(opts.names);
return ret;
}

View File

@@ -1,143 +0,0 @@
#
# Test basic .indx. xattr tag functionality and index entry lifecycle
#
t_require_commands touch rm setfattr scoutfs stat
t_require_mounts 2
# query index from a specific mount, default mount 0
read_xattr_index()
{
local nr="${1:-0}"
local mnt="$(eval echo \$T_M$nr)"
shift
sync
echo 1 > $(t_debugfs_path $nr)/drop_weak_item_cache
scoutfs read-xattr-index -p "$mnt" "$@"
}
MAJOR=5
MINOR=100
echo "== testing invalid read-xattr-index arguments"
scoutfs read-xattr-index -p "$T_M0" bad 2>&1
scoutfs read-xattr-index -p "$T_M0" 1.2 2>&1
scoutfs read-xattr-index -p "$T_M0" 1.2.3 256.0.0 2>&1
scoutfs read-xattr-index -p "$T_M0" 1.2.3 0.0.0 2>&1
scoutfs read-xattr-index -p "$T_M0" 1.2.0 1.1.2 2>&1
scoutfs read-xattr-index -p "$T_M0" 2.2.2 2.2.1 2>&1
echo "== testing invalid names"
touch "$T_D0/invalid"
setfattr -n scoutfs.hide.indx.test.$MAJOR "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test.. "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test..$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test.$MAJOR. "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test.256.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test.abc.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test.$MAJOR.abc "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test.-1.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test.$MAJOR.-1 "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test.18446744073709551616.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.$(printf 'x%.0s' $(seq 1 240)).$MAJOR.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
rm -f "$T_D0/invalid"
echo "== testing boundary values"
touch "$T_D0/boundary"
INO=$(stat -c "%i" "$T_D0/boundary")
setfattr -n scoutfs.hide.indx.test.0.0 "$T_D0/boundary"
read_xattr_index 0 0.0.0 0.0.-1 | awk '($3 == "'$INO'") {print "0.0 found"}'
setfattr -x scoutfs.hide.indx.test.0.0 "$T_D0/boundary"
setfattr -n scoutfs.hide.indx.test.255.18446744073709551615 "$T_D0/boundary"
read_xattr_index 0 255.0.0 255.-1.-1 | awk '($3 == "'$INO'") {print "255.max found"}'
setfattr -x scoutfs.hide.indx.test.255.18446744073709551615 "$T_D0/boundary"
rm -f "$T_D0/boundary"
echo "== indx xattr must have no value"
touch "$T_D0/noval"
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v "" "$T_D0/noval" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v 0 "$T_D0/noval" 2>&1 | t_filter_fs
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v 1 "$T_D0/noval" 2>&1 | t_filter_fs
rm -f "$T_D0/noval"
echo "== set indx xattr and verify index entry"
touch "$T_D0/file"
INO=$(stat -c "%i" "$T_D0/file")
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found"}'
echo "== setting same indx xattr again is a no-op"
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found"}'
echo "== removing non-existent indx xattr succeeds"
setfattr -x scoutfs.hide.indx.nonexistent.$MAJOR.999 "$T_D0/file" 2>&1 | t_filter_fs
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "still found"}'
echo "== explicit xattr removal cleans up index entry"
setfattr -x scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan"}'
rm -f "$T_D0/file"
echo "== file deletion cleans up index entry"
touch "$T_D0/file2"
INO=$(stat -c "%i" "$T_D0/file2")
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file2"
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found before delete"}'
rm -f "$T_D0/file2"
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan after delete"}'
echo "== multiple indx xattrs on one file cleaned up by deletion"
touch "$T_D0/file3"
INO=$(stat -c "%i" "$T_D0/file3")
setfattr -n scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/file3"
setfattr -n scoutfs.hide.indx.b.$MAJOR.300 "$T_D0/file3"
BEFORE=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
echo "entries before delete: $BEFORE"
rm -f "$T_D0/file3"
AFTER=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
echo "entries after delete: $AFTER"
echo "== partial removal leaves other entries"
touch "$T_D0/partial"
INO=$(stat -c "%i" "$T_D0/partial")
setfattr -n scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/partial"
setfattr -n scoutfs.hide.indx.b.$MAJOR.300 "$T_D0/partial"
setfattr -x scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/partial"
read_xattr_index 0 $MAJOR.200.0 $MAJOR.200.-1 | awk '($3 == "'$INO'") {print "200 found"}'
read_xattr_index 0 $MAJOR.300.0 $MAJOR.300.-1 | awk '($3 == "'$INO'") {print "300 found"}'
rm -f "$T_D0/partial"
echo "== multiple files at same index position"
touch "$T_D0/multi_a" "$T_D0/multi_b"
INO_A=$(stat -c "%i" "$T_D0/multi_a")
INO_B=$(stat -c "%i" "$T_D0/multi_b")
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/multi_a"
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/multi_b"
COUNT=$(read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | wc -l)
echo "files at same position: $COUNT"
rm -f "$T_D0/multi_a"
read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | awk '($3 == "'$INO_A'") {print "deleted file still found"}'
read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | awk '($3 == "'$INO_B'") {print "surviving file found"}'
rm -f "$T_D0/multi_b"
echo "== cross-mount visibility"
touch "$T_D0/file4"
INO=$(stat -c "%i" "$T_D0/file4")
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file4"
read_xattr_index 1 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found on mount 1"}'
rm -f "$T_D0/file4"
read_xattr_index 1 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan on mount 1"}'
echo "== duplicate position deduplication"
touch "$T_D0/file5"
INO=$(stat -c "%i" "$T_D0/file5")
setfattr -n scoutfs.hide.indx.aa.$MAJOR.$MINOR "$T_D0/file5"
setfattr -n scoutfs.hide.indx.bb.$MAJOR.$MINOR "$T_D0/file5"
COUNT=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
echo "entries for same position: $COUNT"
rm -f "$T_D0/file5"
t_pass

View File

@@ -53,6 +53,14 @@ exec {FD1}>&- # close
exec {FD2}>&- # close
check_ino_index "$ino" "$dseq" "$T_M0"
echo "== remote unopened unlink deletes"
echo "contents" > "$T_D0/file"
ino=$(stat -c "%i" "$T_D0/file")
dseq=$(scoutfs stat -s data_seq "$T_D0/file")
rm -f "$T_D1/file"
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"
# Hurry along the orphan scanners. If any are currently asleep, we will
# have to wait at least their current scan interval before they wake up,
# run, and notice their new interval.
@@ -60,19 +68,6 @@ t_save_all_sysfs_mount_options orphan_scan_delay_ms
t_set_all_sysfs_mount_options orphan_scan_delay_ms 500
t_wait_for_orphan_scan_runs
echo "== remote unopened unlink deletes"
echo "contents" > "$T_D0/file"
ino=$(stat -c "%i" "$T_D0/file")
dseq=$(scoutfs stat -s data_seq "$T_D0/file")
rm -f "$T_D1/file"
# cross-mount deletion falls back to the orphan scanner when the
# creating mount still has the inode cached, wait for it to complete
t_force_log_merge
# wait for orphan scanners to pick up the unlinked inode and become idle
t_wait_for_no_orphans
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"
echo "== unlink wait for open on other mount"
echo "contents" > "$T_D0/badfile"
ino=$(stat -c "%i" "$T_D0/badfile")
@@ -86,6 +81,7 @@ exec {FD}>&- # close
# we know that revalidating will unhash the remote dentry
stat "$T_D0/badfile" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
t_force_log_merge
# wait for orphan scanners to pick up the unlinked inode and become idle
t_wait_for_no_orphans
check_ino_index "$ino" "$dseq" "$T_M0"
check_ino_index "$ino" "$dseq" "$T_M1"

View File

@@ -1,52 +0,0 @@
#
# Test that orphaned log_trees entries from unmounted rids are
# finalized and merged.
#
# An orphan log_trees entry is one whose rid has no mounted_clients
# entry. This can happen from incomplete reclaim across server
# failovers. We simulate it with the reclaim_skip_finalize trigger
# which makes reclaim_open_log_tree skip the finalization step.
#
t_require_commands touch scoutfs
t_require_mounts 2
TIMEOUT=90
echo "== create orphan log_trees entry via trigger"
sv=$(t_server_nr)
cl=$(t_first_client_nr)
rid=$(t_mount_rid $cl)
touch "$T_D0/file" "$T_D1/file"
sync
# arm the trigger so reclaim skips finalization
t_trigger_arm_silent reclaim_skip_finalize $sv
# force unmount the client, server will fence and reclaim it
# but the trigger makes reclaim leave log_trees unfinalized
t_force_umount $cl
# wait for fencing to run
verify_fenced() {
grep -q "running rid '$rid'" "$T_FENCED_LOG" 2>/dev/null
}
t_wait_until_timeout $TIMEOUT verify_fenced
# give the server time to complete reclaim after fence
sleep 5
# remount the client so t_force_log_merge can sync all mounts.
# the client gets a new rid; the old rid's log_trees is the orphan.
t_mount $cl
echo "== verify orphan is reclaimed and merge completes"
t_force_log_merge
echo "== verify orphan reclaim was logged"
if ! dmesg | grep -q "reclaiming orphan log trees for rid $rid"; then
t_fail "expected orphan reclaim message for rid $rid in dmesg"
fi
t_pass

View File

@@ -1,152 +0,0 @@
t_require_commands scoutfs dd fallocate
FILE="$T_D0/file"
DIR="$T_D0/dir"
echo "== missing options should fail =="
rm -rf $DIR && mkdir -p $DIR
scoutfs punch-offline $DIR -l 4096 -V 0
scoutfs punch-offline $DIR -o 0 -V 0
scoutfs punch-offline $DIR -o 0 -l 4096
echo "== can't hole punch dir or special =="
rm -rf $DIR && mkdir -p $DIR
scoutfs punch-offline $DIR -o 0 -l 4096 -V 0
echo "== punching an empty file does nothing =="
rm -f $FILE && touch $FILE
scoutfs punch-offline $FILE -o 0 -l 4096 -V 0
echo "== punch outside of i_size does nothing =="
dd if=/dev/zero of=$FILE bs=4096 count=1 status=none
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 1
echo "== can't hole punch online extent =="
scoutfs get-fiemap -Lb $FILE
scoutfs punch-offline $FILE -o 0 -l 4096 -V 1
scoutfs get-fiemap -Lb $FILE
echo "== can't hole punch unwritten extent =="
rm -rf $FILE && touch $FILE
fallocate -l $((4096 * 3)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs get-fiemap -Lb $FILE
scoutfs punch-offline $FILE -o 4096 -l 4096 -V $vers
scoutfs get-fiemap -Lb $FILE
echo "== hole punch offline extent =="
rm -rf $FILE && touch $FILE
fallocate -l $((4096 * 3)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -Lb $FILE
scoutfs punch-offline $FILE -o 4096 -l 4096 -V $vers
scoutfs get-fiemap -Lb $FILE
echo "== can't hole punch non-aligned bsz offset or len =="
rm -rf $FILE && touch $FILE
fallocate -l $((4096 * 3)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -Lb $FILE
scoutfs punch-offline $FILE -o 4095 -l 4096 -V $vers
scoutfs punch-offline $FILE -o 1 -l 4096 -V $vers
scoutfs punch-offline $FILE -o 4096 -l 409700 -V $vers
scoutfs punch-offline $FILE -o 4096 -l 4097 -V $vers
scoutfs punch-offline $FILE -o 4096 -l 4095 -V $vers
scoutfs punch-offline $FILE -o 4096 -l 1 -V $vers
scoutfs punch-offline $FILE -o 4096 -l 0 -V $vers
scoutfs get-fiemap -Lb $FILE
echo "== can't hole punch mismatched data_version =="
rm -rf $FILE && touch $FILE
fallocate -l $((4096 * 3)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -Lb $FILE
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 0
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 2
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 9999
scoutfs get-fiemap -Lb $FILE
echo "== Punch hole crossing multiple extents =="
rm -rf $FILE && touch $FILE
fallocate -l $((7 * 4096)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -L $FILE
scoutfs punch-offline $FILE -o $((1 * 4096)) -l 4096 -V $vers
scoutfs punch-offline $FILE -o $((3 * 4096)) -l 4096 -V $vers
scoutfs punch-offline $FILE -o $((5 * 4096)) -l 4096 -V $vers
# 0.1.2.3
scoutfs get-fiemap -L $FILE
scoutfs punch-offline $FILE -o $((2 * 4096)) -l $((3 * 4096)) -V $vers
# 0.....1
scoutfs get-fiemap -L $FILE
echo "== punch hole starting at a hole =="
rm -rf $FILE && touch $FILE
fallocate -l $((7 * 4096)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -L $FILE
scoutfs punch-offline $FILE -o $((1 * 4096)) -l 4096 -V $vers
scoutfs punch-offline $FILE -o $((3 * 4096)) -l 4096 -V $vers
scoutfs punch-offline $FILE -o $((5 * 4096)) -l 4096 -V $vers
# 0.1.2.3
scoutfs get-fiemap -L $FILE
scoutfs punch-offline $FILE -o $((1 * 4096)) -l $((5 * 4096)) -V $vers
# 0.....1
scoutfs get-fiemap -L $FILE
echo "== large punch =="
rm -rf $FILE && touch $FILE
fallocate -l $((6 * 1024 * 1024 * 1024)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version $vers
scoutfs get-fiemap -L $FILE
scoutfs punch-offline $FILE -o $((134123 * 4096)) -l $((68343 * 4096)) -V $vers
scoutfs punch-offline $FILE -o $((467273 * 4096)) -l $((68343 * 4096)) -V $vers
scoutfs punch-offline $FILE -o $((734623 * 4096)) -l $((68343 * 4096)) -V $vers
scoutfs get-fiemap -L $FILE
echo "== overlapping punches with lots of extents =="
rm -rf $FILE && touch $FILE
fallocate -l $((4096 * 1024)) $FILE
vers=$(scoutfs stat -s data_version "$FILE")
scoutfs release $FILE --data-version 1
scoutfs get-fiemap -Lb $FILE
# punch odd ones away
for h in $(seq 1 2 1023); do
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
done
scoutfs get-fiemap -Lb $FILE | tail -n 1
# punch a large hole from 32 to 55, removing 7 extents
scoutfs punch-offline $FILE -o $((32 * 4096)) -l $((13 * 4096)) -V $vers
scoutfs get-fiemap -Lb $FILE | tail -n 1
# punch every 8th @6
for h in $(seq 6 8 1024); do
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
done
# again @4
scoutfs get-fiemap -Lb $FILE | tail -n 1
for h in $(seq 4 8 1024); do
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
done
scoutfs get-fiemap -Lb $FILE | tail -n 1
# punching a large hole from 127 to 175, removing 12 extents
scoutfs punch-offline $FILE -o $((127 * 4096)) -l $((48 * 4096)) -V $vers
scoutfs get-fiemap -Lb $FILE
# again @2
for h in $(seq 2 8 1024); do
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
done
scoutfs get-fiemap -L $FILE
# and again @0, punching away everything remaining extent
for h in $(seq 0 8 1024); do
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
done
scoutfs get-fiemap -Lb $FILE
t_pass

View File

@@ -32,7 +32,7 @@ echo "== dirs shouldn't appear in data_seq queries"
mkdir "$DIR"
ino=$(stat -c "%i" "$DIR")
t_sync_seq_index
query_index data_seq | awk '($4 == "'$ino'")'
query_index data_seq | grep "$ino\>"
echo "== two created files are present and come after each other"
touch "$DIR/first"

View File

@@ -1,50 +0,0 @@
#
# Test that merge_read_item() correctly updates the sequence number when
# combining delta items from multiple finalized log trees. Each mount
# sets a totl value in its own 3-bit lane (powers of 8) so that any
# double-counting overflows the lane and is caught by: or(v, exp) != exp.
#
t_require_commands setfattr scoutfs
t_require_mounts 5
echo "== setup"
for nr in $(t_fs_nrs); do
d=$(eval echo \$T_D$nr)
for i in $(seq 1 2500); do : > "$d/f$nr$i"; done
done
sync
t_force_log_merge
vals=(1 8 64 512 4096)
expected=4681
n=0
for nr in $(t_fs_nrs); do
d=$(eval echo \$T_D$nr)
v=${vals[$((n++))]}
for i in $(seq 1 2500); do
setfattr -n "scoutfs.totl.t.$i.0.0" -v $v "$d/f$nr$i"
done
done
t_trigger_arm_silent log_merge_force_partial $(t_server_nr)
bad="$T_TMPDIR/bad"
for nr in $(t_fs_nrs); do
( while true; do
echo 1 > "$(t_debugfs_path $nr)/drop_weak_item_cache"
scoutfs read-xattr-totals -p "$(eval echo \$T_M$nr)" | \
awk -F'[ =,]+' -v e=$expected 'or($2+0,e) != e'
done ) >> "$bad" &
done
echo "expected $expected"
t_force_log_merge
t_silent_kill $(jobs -p)
test -s "$bad" && echo "double-counted:" && cat "$bad"
echo "== cleanup"
for nr in $(t_fs_nrs); do
find "$(eval echo \$T_D$nr)" -name "f$nr*" -delete
done
t_pass

View File

@@ -1,127 +0,0 @@
#include <sys/ioctl.h>
#include <fcntl.h>
#include <errno.h>
#include <string.h>
#include <argp.h>
#include "sparse.h"
#include "parse.h"
#include "util.h"
#include "ioctl.h"
#include "cmd.h"
struct po_args {
char *path;
u64 offset;
u64 length;
u64 data_version;
unsigned offset_set:1,
length_set:1,
data_version_set:1;
};
static int do_punch_offline(struct po_args *args)
{
struct scoutfs_ioctl_punch_offline ioctl_args;
int ret;
int fd;
fd = get_path(args->path, O_RDWR);
if (fd < 0)
return fd;
ioctl_args.offset = args->offset;
ioctl_args.len = args->length;
ioctl_args.data_version = args->data_version;
ioctl_args.flags = 0;
ret = ioctl(fd, SCOUTFS_IOC_PUNCH_OFFLINE, &ioctl_args);
if (ret < 0) {
ret = -errno;
fprintf(stderr, "punch_offline ioctl failed: %s (%d)\n",
strerror(errno), errno);
}
close(fd);
return ret;
}
static int parse_opt(int key, char *arg, struct argp_state *state)
{
struct po_args *args = state->input;
int ret = 0;
switch (key) {
case 'V':
ret = parse_u64(arg, &args->data_version);
if (ret)
return ret;
args->data_version_set = 1;
break;
case 'o': /* offset */
ret = parse_human(arg, &args->offset);
if (ret)
return ret;
args->offset_set = 1;
break;
case 'l': /* length */
ret = parse_human(arg, &args->length);
if (ret)
return ret;
args->length_set = 1;
break;
case ARGP_KEY_ARG:
if (!args->path)
args->path = strdup_or_error(state, arg);
else
argp_error(state, "unknown extra argument given");
break;
case ARGP_KEY_FINI:
if (!args->path)
argp_error(state, "must provide path to file");
if (!args->offset_set)
argp_error(state, "must provide offset");
if (!args->length_set)
argp_error(state, "must provide length");
if (!args->data_version_set)
argp_error(state, "must provide data_version");
break;
default:
break;
}
return 0;
}
static struct argp_option options[] = {
{ "data-version", 'V', "VERSION", 0, "Data version of the file [Required]"},
{ "offset", 'o', "OFFSET", 0, "Offset (bytes or KMGTP units) in file to stage [Required]"},
{ "length", 'l', "LENGTH", 0, "Length of range (bytes or KMGTP units) of file to stage. [Required]"},
{ NULL }
};
static struct argp argp = {
options,
parse_opt,
"PATH",
"Make a (sparse) hole in the file at offset and with length"
};
static int punch_offline_cmd(int argc, char **argv)
{
struct po_args po_args = {NULL};
int ret;
ret = argp_parse(&argp, argc, argv, 0, NULL, &po_args);
if (ret)
return ret;
return do_punch_offline(&po_args);
}
static void __attribute__((constructor)) punch_offline_ctor(void)
{
cmd_register_argp("punch-offline", &argp, GROUP_AGENT, punch_offline_cmd);
}

View File

@@ -198,11 +198,13 @@ int write_block_sync(int fd, u32 magic, __le64 fsid, u64 seq, u64 blkno,
*/
int meta_super_in_use(int meta_fd, struct scoutfs_super_block *meta_super)
{
struct scoutfs_quorum_block *qblk = NULL;
struct scoutfs_quorum_block *qblk[SCOUTFS_QUORUM_BLOCKS] = {NULL,};
struct scoutfs_quorum_block_event *beg;
struct scoutfs_quorum_block_event *end;
struct scoutfs_quorum_block_event *fence;
bool beg_was_fenced;
int ret = 0;
int i;
int i, j;
if (meta_super->mounted_clients.ref.blkno != 0) {
fprintf(stderr, "meta superblock mounted clients btree is not empty.\n");
@@ -210,36 +212,61 @@ int meta_super_in_use(int meta_fd, struct scoutfs_super_block *meta_super)
goto out;
}
/* check for active quorum slots */
/* read all blocks */
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) {
if (!quorum_slot_present(meta_super, i))
continue;
ret = read_block(meta_fd, SCOUTFS_QUORUM_BLKNO + i, SCOUTFS_BLOCK_SM_SHIFT,
(void **)&qblk);
(void **)&qblk[i]);
if (ret < 0) {
fprintf(stderr, "error reading quorum block for slot %u\n", i);
goto out;
}
}
beg = &qblk->events[SCOUTFS_QUORUM_EVENT_BEGIN];
end = &qblk->events[SCOUTFS_QUORUM_EVENT_END];
/* check for active quorum slots */
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) {
if (!qblk[i])
continue;
if (le64_to_cpu(beg->write_nr) > le64_to_cpu(end->write_nr)) {
fprintf(stderr, "mount in quorum slot %u could still be running.\n"
" begin event: write_nr %llu timestamp %llu.%08u\n"
" end event: write_nr %llu timestamp %llu.%08u\n",
i, le64_to_cpu(beg->write_nr), le64_to_cpu(beg->ts.sec),
le32_to_cpu(beg->ts.nsec),
le64_to_cpu(end->write_nr), le64_to_cpu(end->ts.sec),
le32_to_cpu(end->ts.nsec));
ret = -EBUSY;
goto out;
beg = &qblk[i]->events[SCOUTFS_QUORUM_EVENT_BEGIN];
end = &qblk[i]->events[SCOUTFS_QUORUM_EVENT_END];
if (le64_to_cpu(beg->write_nr) <= le64_to_cpu(end->write_nr))
continue;
/* check if this term was fenced by others in a later term */
beg_was_fenced = false;
for (j = 0; j < SCOUTFS_QUORUM_BLOCKS; j++) {
if ((!qblk[j]) || (i == j))
continue;
fence = &qblk[j]->events[SCOUTFS_QUORUM_EVENT_FENCE];
if (le64_to_cpu(fence->term) > le64_to_cpu(beg->term)) {
beg_was_fenced = true;
break;
}
}
free(qblk);
qblk = NULL;
if (beg_was_fenced)
continue;
fprintf(stderr, "mount in quorum slot %u could still be running.\n"
" begin event: write_nr %llu timestamp %llu.%08u\n"
" end event: write_nr %llu timestamp %llu.%08u\n",
i, le64_to_cpu(beg->write_nr), le64_to_cpu(beg->ts.sec),
le32_to_cpu(beg->ts.nsec),
le64_to_cpu(end->write_nr), le64_to_cpu(end->ts.sec),
le32_to_cpu(end->ts.nsec));
ret = -EBUSY;
goto out;
}
out:
/* free any allocated blocks */
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++)
if (qblk[i] != NULL)
free(qblk[i]);
return ret;
}