mirror of
https://github.com/versity/scoutfs.git
synced 2026-04-30 09:56:55 +00:00
Compare commits
1 Commits
zab/get_ch
...
auke/utils
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d84ba603b6 |
@@ -1,38 +1,6 @@
|
||||
Versity ScoutFS Release Notes
|
||||
=============================
|
||||
|
||||
---
|
||||
v1.30
|
||||
\
|
||||
*Apr 21, 2026*
|
||||
|
||||
Fix a problem reading the accumulated totals of contributing .totl.
|
||||
xattrs when log merging is in progress. The problem would have readers
|
||||
of the totals calculate the sums incorrectly.
|
||||
|
||||
Fix a problem updating quota rules. There was a race where updates
|
||||
could be corrupted if they happened while a transaction was being
|
||||
written.
|
||||
|
||||
Fix a problem deleting files with .indx. xattrs. The internal indexing
|
||||
metadata wouldn't be properly deleted so the files would still claim to
|
||||
be present and visible in the index, though the file no longer existed.
|
||||
|
||||
---
|
||||
v1.29
|
||||
\
|
||||
*Mar 25, 2026*
|
||||
|
||||
Add a repair mechanism for mount logs that weren't properly resolved as
|
||||
mounts left the cluster. The presence of these logs prevents log
|
||||
merging from making forward progress and the backlog of logs over time
|
||||
can cause operations to slow to a crawl. With the repair mechanism in
|
||||
place the orphaned logs don't stop merging and operations proceed as
|
||||
usual.
|
||||
|
||||
Add an ioctl for turning offline unmapped file regions into sparse
|
||||
regions.
|
||||
|
||||
---
|
||||
v1.28
|
||||
\
|
||||
|
||||
@@ -13,7 +13,6 @@ scoutfs-y += \
|
||||
avl.o \
|
||||
alloc.o \
|
||||
block.o \
|
||||
bsearch_index.o \
|
||||
btree.o \
|
||||
client.o \
|
||||
counters.o \
|
||||
@@ -37,7 +36,6 @@ scoutfs-y += \
|
||||
per_task.o \
|
||||
quorum.o \
|
||||
quota.o \
|
||||
raw.o \
|
||||
recov.o \
|
||||
scoutfs_trace.o \
|
||||
server.o \
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
#ifndef _SCOUTFS_BLOCK_H_
|
||||
#define _SCOUTFS_BLOCK_H_
|
||||
|
||||
struct scoutfs_alloc;
|
||||
|
||||
struct scoutfs_block_writer {
|
||||
spinlock_t lock;
|
||||
struct list_head dirty_list;
|
||||
|
||||
@@ -1,59 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/bsearch.h>
|
||||
|
||||
#include "bsearch_index.h"
|
||||
|
||||
struct bsearch_index_key {
|
||||
int (*cmp)(const void *key, const void *elt);
|
||||
/* the key has to be const, so we have to update the index through a pointer */
|
||||
void **index_elt;
|
||||
const void *key;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
static int cmp_index(const void *key, const void *elt)
|
||||
{
|
||||
const struct bsearch_index_key *bik = key;
|
||||
int cmp = bik->cmp(bik->key, elt);
|
||||
|
||||
if (cmp > 0)
|
||||
*(bik->index_elt) = (void *)elt + bik->size;
|
||||
else
|
||||
*(bik->index_elt) = (void *)elt;
|
||||
|
||||
return cmp;
|
||||
}
|
||||
|
||||
/*
|
||||
* A bsearch() wrapper that returns the index of the element of the
|
||||
* array that the key would be stored in to maintain sort order. It's
|
||||
* the first element where the existing element is greater than the key.
|
||||
* It returns the size of the array if the key is greater than the last
|
||||
* element in the array.
|
||||
*/
|
||||
size_t bsearch_index(const void *key, const void *base, size_t num, size_t size,
|
||||
int (*cmp)(const void *key, const void *elt))
|
||||
{
|
||||
void *index_elt = (void *)base;
|
||||
struct bsearch_index_key bik = {
|
||||
.cmp = cmp,
|
||||
.index_elt = &index_elt,
|
||||
.key = key,
|
||||
.size = size,
|
||||
};
|
||||
|
||||
bsearch(&bik, base, num, size, cmp_index);
|
||||
return ((unsigned long)index_elt - (unsigned long)base) / size;
|
||||
}
|
||||
@@ -1,7 +0,0 @@
|
||||
#ifndef _SCOUTFS_BSEARCH_INDEX_H_
|
||||
#define _SCOUTFS_BSEARCH_INDEX_H_
|
||||
|
||||
size_t bsearch_index(const void *key, const void *base, size_t num, size_t size,
|
||||
int (*cmp)(const void *key, const void *elt));
|
||||
|
||||
#endif
|
||||
@@ -1816,11 +1816,6 @@ int scoutfs_btree_dirty(struct super_block *sb,
|
||||
* Call the users callback on all the items in the leaf that we find.
|
||||
* We also set the caller's keys for the first and last possible keys
|
||||
* that could exist in the leaf block.
|
||||
*
|
||||
* The callback can set a new key to continue reading from rather than
|
||||
* iterating over all the items. It modifies the key and returns
|
||||
* -ESRCH, which performs a new avl search. If the modified key falls
|
||||
* outside of the range of keys in the block then we return.
|
||||
*/
|
||||
int scoutfs_btree_read_items(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
@@ -1834,7 +1829,6 @@ int scoutfs_btree_read_items(struct super_block *sb,
|
||||
struct scoutfs_avl_node *next_node;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct btree_walk_key_range kr;
|
||||
struct scoutfs_key cb_key;
|
||||
struct scoutfs_block *bl;
|
||||
int ret;
|
||||
|
||||
@@ -1848,32 +1842,22 @@ int scoutfs_btree_read_items(struct super_block *sb,
|
||||
if (scoutfs_key_compare(&kr.end, end) < 0)
|
||||
*end = kr.end;
|
||||
|
||||
cb_key = *start;
|
||||
search:
|
||||
node = scoutfs_avl_search(&bt->item_root, cmp_key_item, &cb_key, NULL,
|
||||
node = scoutfs_avl_search(&bt->item_root, cmp_key_item, start, NULL,
|
||||
NULL, &next_node, NULL) ?: next_node;
|
||||
while (node) {
|
||||
item = node_item(node);
|
||||
if (scoutfs_key_compare(&item->key, end) > 0)
|
||||
break;
|
||||
|
||||
cb_key = *item_key(item);
|
||||
ret = cb(sb, &cb_key, le64_to_cpu(item->seq), item->flags,
|
||||
ret = cb(sb, item_key(item), le64_to_cpu(item->seq), item->flags,
|
||||
item_val(bt, item), item_val_len(item), arg);
|
||||
if (ret < 0) {
|
||||
if (ret == -ESRCH) {
|
||||
if (scoutfs_key_compare(&cb_key, start) >= 0)
|
||||
goto search;
|
||||
ret = 0;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
node = scoutfs_avl_next(&bt->item_root, node);
|
||||
}
|
||||
|
||||
scoutfs_block_put(sb, bl);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -2199,8 +2183,6 @@ static int merge_read_item(struct super_block *sb, struct scoutfs_key *key, u64
|
||||
if (ret > 0) {
|
||||
if (ret == SCOUTFS_DELTA_COMBINED) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_combined);
|
||||
if (seq > found->seq)
|
||||
found->seq = seq;
|
||||
} else if (ret == SCOUTFS_DELTA_COMBINED_NULL) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_null);
|
||||
free_mitem(rng, found);
|
||||
@@ -2504,14 +2486,6 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
mitem = next_mitem(mitem);
|
||||
free_mitem(&rng, tmp);
|
||||
}
|
||||
|
||||
if (mitem && walk_val_len == 0 &&
|
||||
!(walk_flags & (BTW_INSERT | BTW_DELETE)) &&
|
||||
scoutfs_trigger(sb, LOG_MERGE_FORCE_PARTIAL)) {
|
||||
ret = -ERANGE;
|
||||
*next_ret = mitem->key;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
@@ -1517,101 +1517,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Punch holes in offline extents. This is a very specific tool that
|
||||
* only does one job: it converts extents from offline to sparse. It
|
||||
* returns an error if it encounters an extent that isn't offline or has
|
||||
* a block mapping. It ignores i_size completely; it does not test it,
|
||||
* and does not update it.
|
||||
*
|
||||
* The caller has the inode locked in the vfs and performed basic sanity
|
||||
* checks. We manage transactions and the extent_sem which is ordered
|
||||
* inside the transaction.
|
||||
*/
|
||||
int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct data_ext_args args = {
|
||||
.ino = scoutfs_ino(inode),
|
||||
.inode = inode,
|
||||
.lock = lock,
|
||||
};
|
||||
struct scoutfs_extent ext;
|
||||
LIST_HEAD(ind_locks);
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (WARN_ON_ONCE(iblock > last)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* idiomatic to call start,last with 0,~0, clamp last to last possible */
|
||||
last = min(last, SCOUTFS_BLOCK_SM_MAX);
|
||||
|
||||
ret = 0;
|
||||
while (iblock <= last) {
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false) ?:
|
||||
scoutfs_dirty_inode_item(inode, lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
down_write(&si->extent_sem);
|
||||
|
||||
for (i = 0; i < 32 && (iblock <= last); i++) {
|
||||
ret = scoutfs_ext_next(sb, &data_ext_ops, &args, iblock, 1, &ext);
|
||||
if (ret == -ENOENT) {
|
||||
iblock = last + 1;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (ext.start > last) {
|
||||
iblock = last + 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ext.map) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
if (ext.flags & SEF_OFFLINE) {
|
||||
if (iblock > ext.start) {
|
||||
ext.len -= iblock - ext.start;
|
||||
ext.start = iblock;
|
||||
}
|
||||
ext.len = min(ext.len, last - ext.start + 1);
|
||||
ext.flags &= ~SEF_OFFLINE;
|
||||
|
||||
ret = scoutfs_ext_set(sb, &data_ext_ops, &args,
|
||||
ext.start, ext.len, ext.map, ext.flags);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
iblock = ext.start + ext.len;
|
||||
}
|
||||
|
||||
up_write(&si->extent_sem);
|
||||
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This copies to userspace :/
|
||||
*/
|
||||
|
||||
@@ -57,8 +57,6 @@ int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
|
||||
int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
u64 byte_len, struct inode *to, u64 to_off, bool to_stage,
|
||||
u64 data_version);
|
||||
int scoutfs_data_punch_offline(struct inode *inode, u64 iblock, u64 last, u64 data_version,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
int scoutfs_data_wait_check(struct inode *inode, loff_t pos, loff_t len,
|
||||
u8 sef, u8 op, struct scoutfs_data_wait *ow,
|
||||
|
||||
@@ -114,42 +114,6 @@ static struct scoutfs_block *read_bloom_ref(struct super_block *sb, struct scout
|
||||
return bl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns >0 if there was a bloom block and all the bits were present.
|
||||
*/
|
||||
static int all_bloom_bits_present(struct super_block *sb, struct scoutfs_block_ref *ref,
|
||||
struct forest_bloom_nrs *bloom)
|
||||
{
|
||||
struct scoutfs_bloom_block *bb;
|
||||
struct scoutfs_block *bl;
|
||||
int i;
|
||||
|
||||
if (ref->blkno == 0)
|
||||
return 0;
|
||||
|
||||
bl = read_bloom_ref(sb, ref);
|
||||
if (IS_ERR(bl))
|
||||
return PTR_ERR(bl);
|
||||
|
||||
bb = bl->data;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(bloom->nrs); i++) {
|
||||
if (!test_bit_le(bloom->nrs[i], bb->bits))
|
||||
break;
|
||||
}
|
||||
|
||||
scoutfs_block_put(sb, bl);
|
||||
|
||||
/* one of the bloom bits wasn't set */
|
||||
if (i != ARRAY_SIZE(bloom->nrs)) {
|
||||
scoutfs_inc_counter(sb, forest_bloom_fail);
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, forest_bloom_pass);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is an unlocked iteration across all the btrees to find a hint at
|
||||
* the next key that the caller could read. It's used to find out what
|
||||
@@ -263,13 +227,9 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
|
||||
}
|
||||
|
||||
/*
|
||||
* Call the caller's callback for every item in the leaf blocks in each
|
||||
* forest btree that contain the caller's key.
|
||||
*
|
||||
* If a bloom key is provided then each log tree's bloom block is
|
||||
* checked and only trees with all the bloom key's bloom bits set will
|
||||
* be read from. When the bloom key is null all trees will be read
|
||||
* from.
|
||||
* For each forest btree whose bloom block indicates that the lock might
|
||||
* have items stored, call the caller's callback for every item in the
|
||||
* leaf block in each tree which contains the key.
|
||||
*
|
||||
* The btree iter calls clamp the caller's range to the tightest range
|
||||
* that covers all the blocks. Any keys outside of this range can't be
|
||||
@@ -279,26 +239,33 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
|
||||
* to reset their state and retry with a newer version of the btrees.
|
||||
*/
|
||||
int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
|
||||
u64 merge_input_seq, struct scoutfs_key *key,
|
||||
struct scoutfs_key *bloom_key, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end, scoutfs_forest_item_cb cb, void *arg)
|
||||
struct scoutfs_key *key, struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg)
|
||||
{
|
||||
struct forest_read_items_data rid = {
|
||||
.cb = cb,
|
||||
.cb_arg = arg,
|
||||
};
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_bloom_block *bb;
|
||||
struct forest_bloom_nrs bloom;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_block *bl;
|
||||
struct scoutfs_key ltk;
|
||||
struct scoutfs_key orig_start = *start;
|
||||
struct scoutfs_key orig_end = *end;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
scoutfs_inc_counter(sb, forest_read_items);
|
||||
if (bloom_key)
|
||||
calc_bloom_nrs(&bloom, bloom_key);
|
||||
calc_bloom_nrs(&bloom, bloom_key);
|
||||
|
||||
trace_scoutfs_forest_using_roots(sb, &roots->fs_root, &roots->logs_root);
|
||||
|
||||
*start = orig_start;
|
||||
*end = orig_end;
|
||||
|
||||
/* start with fs root items */
|
||||
rid.fic |= FIC_FS_ROOT;
|
||||
ret = scoutfs_btree_read_items(sb, &roots->fs_root, key, start, end,
|
||||
@@ -325,29 +292,40 @@ int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_r
|
||||
goto out; /* including stale */
|
||||
}
|
||||
|
||||
/* we're not expecting -ENOENT from _read_items */
|
||||
if (lt.item_root.ref.blkno == 0)
|
||||
if (lt.bloom_ref.blkno == 0)
|
||||
continue;
|
||||
|
||||
if (bloom_key) {
|
||||
ret = all_bloom_bits_present(sb, <.bloom_ref, &bloom);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret == 0)
|
||||
continue;
|
||||
bl = read_bloom_ref(sb, <.bloom_ref);
|
||||
if (IS_ERR(bl)) {
|
||||
ret = PTR_ERR(bl);
|
||||
goto out;
|
||||
}
|
||||
bb = bl->data;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(bloom.nrs); i++) {
|
||||
if (!test_bit_le(bloom.nrs[i], bb->bits))
|
||||
break;
|
||||
}
|
||||
|
||||
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) &&
|
||||
(merge_input_seq == 0 ||
|
||||
le64_to_cpu(lt.finalize_seq) < merge_input_seq))
|
||||
rid.fic |= FIC_MERGE_INPUT;
|
||||
scoutfs_block_put(sb, bl);
|
||||
|
||||
/* one of the bloom bits wasn't set */
|
||||
if (i != ARRAY_SIZE(bloom.nrs)) {
|
||||
scoutfs_inc_counter(sb, forest_bloom_fail);
|
||||
continue;
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, forest_bloom_pass);
|
||||
|
||||
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED))
|
||||
rid.fic |= FIC_FINALIZED;
|
||||
|
||||
ret = scoutfs_btree_read_items(sb, <.item_root, key, start,
|
||||
end, forest_read_items, &rid);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
rid.fic &= ~FIC_MERGE_INPUT;
|
||||
rid.fic &= ~FIC_FINALIZED;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
@@ -367,7 +345,7 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret == 0)
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, 0, key, bloom_key, start, end,
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, key, bloom_key, start, end,
|
||||
cb, arg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ struct scoutfs_lock;
|
||||
/* caller gives an item to the callback */
|
||||
enum {
|
||||
FIC_FS_ROOT = (1 << 0),
|
||||
FIC_MERGE_INPUT = (1 << 1),
|
||||
FIC_FINALIZED = (1 << 1),
|
||||
};
|
||||
typedef int (*scoutfs_forest_item_cb)(struct super_block *sb, struct scoutfs_key *key, u64 seq,
|
||||
u8 flags, void *val, int val_len, int fic, void *arg);
|
||||
@@ -25,9 +25,9 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
|
||||
u64 merge_input_seq, struct scoutfs_key *key,
|
||||
struct scoutfs_key *bloom_key, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end, scoutfs_forest_item_cb cb, void *arg);
|
||||
struct scoutfs_key *key, struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_set_bloom_bits(struct super_block *sb,
|
||||
struct scoutfs_lock *lock);
|
||||
void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq);
|
||||
|
||||
142
kmod/src/ioctl.c
142
kmod/src/ioctl.c
@@ -49,7 +49,6 @@
|
||||
#include "quota.h"
|
||||
#include "scoutfs_trace.h"
|
||||
#include "util.h"
|
||||
#include "raw.h"
|
||||
|
||||
/*
|
||||
* We make inode index items coherent by locking fixed size regions of
|
||||
@@ -1668,141 +1667,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_punch_offline(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_ioctl_punch_offline __user *upo = (void __user *)arg;
|
||||
struct scoutfs_ioctl_punch_offline po;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
u64 iblock;
|
||||
u64 last;
|
||||
u64 tmp;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&po, upo, sizeof(po)))
|
||||
return -EFAULT;
|
||||
|
||||
if (po.len == 0)
|
||||
return 0;
|
||||
|
||||
if (check_add_overflow(po.offset, po.len - 1, &tmp) ||
|
||||
(po.offset & SCOUTFS_BLOCK_SM_MASK) ||
|
||||
(po.len & SCOUTFS_BLOCK_SM_MASK))
|
||||
return -EOVERFLOW;
|
||||
|
||||
if (po.flags)
|
||||
return -EINVAL;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
inode_lock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!S_ISREG(inode->i_mode)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(file->f_mode & FMODE_WRITE)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = inode_permission(KC_VFS_INIT_NS inode, MAY_WRITE);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_inode_data_version(inode) != po.data_version) {
|
||||
ret = -ESTALE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((ret = scoutfs_inode_check_retention(inode)))
|
||||
goto out;
|
||||
|
||||
iblock = po.offset >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
last = (po.offset + po.len - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
|
||||
ret = scoutfs_data_punch_offline(inode, iblock, last, po.data_version, lock);
|
||||
|
||||
out:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
inode_unlock(inode);
|
||||
mnt_drop_write_file(file);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_raw_read_meta_seq(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_raw_read_meta_seq __user *urms = (void __user *)arg;
|
||||
struct scoutfs_ioctl_raw_read_meta_seq rms;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(&rms, urms, sizeof(rms))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rms.results_size == 0) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rms.results_size < sizeof(struct scoutfs_ioctl_meta_seq) ||
|
||||
rms.results_size > INT_MAX) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_raw_read_meta_seq(sb, &rms, &rms.last);
|
||||
if (ret >= 0 && copy_to_user(&urms->last, &rms.last, sizeof(rms.last)))
|
||||
ret = -EFAULT;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_raw_read_inode_info(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_raw_read_inode_info __user *urii = (void __user *)arg;
|
||||
struct scoutfs_ioctl_raw_read_inode_info rii;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(&rii, urii, sizeof(rii))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rii.inos_count == 0 || rii.results_size > INT_MAX ||
|
||||
!IS_ALIGNED(rii.inos_ptr, __alignof__(__u64))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_raw_read_inode_info(sb, &rii);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
@@ -1852,12 +1716,6 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
return scoutfs_ioc_mod_quota_rule(file, arg, false);
|
||||
case SCOUTFS_IOC_READ_XATTR_INDEX:
|
||||
return scoutfs_ioc_read_xattr_index(file, arg);
|
||||
case SCOUTFS_IOC_PUNCH_OFFLINE:
|
||||
return scoutfs_ioc_punch_offline(file, arg);
|
||||
case SCOUTFS_IOC_RAW_READ_META_SEQ:
|
||||
return scoutfs_ioc_raw_read_meta_seq(file, arg);
|
||||
case SCOUTFS_IOC_RAW_READ_INODE_INFO:
|
||||
return scoutfs_ioc_raw_read_inode_info(file, arg);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
||||
207
kmod/src/ioctl.h
207
kmod/src/ioctl.h
@@ -15,6 +15,20 @@
|
||||
|
||||
#define SCOUTFS_IOCTL_MAGIC 0xE8 /* arbitrarily chosen hole in ioctl-number.rst */
|
||||
|
||||
/*
|
||||
* Packed scoutfs keys rarely cross the ioctl boundary so we have a
|
||||
* translation struct.
|
||||
*/
|
||||
struct scoutfs_ioctl_key {
|
||||
__le64 _sk_first;
|
||||
__le64 _sk_second;
|
||||
__le64 _sk_third;
|
||||
__u8 _sk_fourth;
|
||||
__u8 sk_type;
|
||||
__u8 sk_zone;
|
||||
__u8 _pad[5];
|
||||
};
|
||||
|
||||
struct scoutfs_ioctl_walk_inodes_entry {
|
||||
__u64 major;
|
||||
__u64 ino;
|
||||
@@ -834,197 +848,4 @@ struct scoutfs_ioctl_read_xattr_index {
|
||||
#define SCOUTFS_IOC_READ_XATTR_INDEX \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 23, struct scoutfs_ioctl_read_xattr_index)
|
||||
|
||||
/*
|
||||
* This is a limited and specific version of hole punching. It's an
|
||||
* archive layer operation that only converts unmapped offline extents
|
||||
* into sparse extents. It is intended to be used when restoring sparse
|
||||
* files after the initial creation set the entire file size offline.
|
||||
*
|
||||
* The offset and len fields are in units of bytes and must be aligned
|
||||
* to the small (4KiB) block size. All regions of offline extents
|
||||
* covered by the region will be converted into sparse online extents,
|
||||
* including regions that straddle the boundaries of the region. Any
|
||||
* existing sparse extents in the region are ignored.
|
||||
*
|
||||
* The data_version must match the inode or EINVAL is returned. The
|
||||
* data_version is not modified by this operation.
|
||||
*
|
||||
* EINVAL is returned if any mapped extents are found in the region. If
|
||||
* an error is returned then partial progress may have been made.
|
||||
*/
|
||||
struct scoutfs_ioctl_punch_offline {
|
||||
__u64 offset;
|
||||
__u64 len;
|
||||
__u64 data_version;
|
||||
__u64 flags;
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_PUNCH_OFFLINE \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 24, struct scoutfs_ioctl_punch_offline)
|
||||
|
||||
/*
|
||||
* Read meta_seq items without cluster locking.
|
||||
*
|
||||
* @start is the first meta_seq item value that could be returned.
|
||||
* {0,0} is the minimum.
|
||||
*
|
||||
* @end is the last meta_seq item value that could be returned.
|
||||
* {U64_MAX, U64_MAX} is the maximum.
|
||||
*
|
||||
* @last is only set on success from the call. It's the last meta_seq
|
||||
* item that could have been returned. This lets the caller detect that
|
||||
* the full input range wasn't explored. Another call can be made with
|
||||
* start set to just after this.
|
||||
*
|
||||
* @results_ptr is a pointer to an array of (struct
|
||||
* scoutfs_ioctl_meta_seq) elements that were found in the input range.
|
||||
*
|
||||
* @results_size is the count of elements in the results_ptr array and
|
||||
* the maximum number of results that can be returned. There must be
|
||||
* room for at least one result.
|
||||
*
|
||||
* Return existing meta_seq items starting from @start until @last.
|
||||
* Partial results can be returned and is indicated by @last being set
|
||||
* to an item before @last.
|
||||
*
|
||||
* The results are sorted first by increasing meta_seq and then by
|
||||
* increasing ino. All of the results are from one version of file
|
||||
* system metadata. This means that an inode can not be found multiple
|
||||
* times within the results of one call.
|
||||
*
|
||||
* This call ignores currently dirty transactions and reads persistent
|
||||
* items directly. A transaction can be written after this call and
|
||||
* cause meta_seq items to appear before or within the results from this
|
||||
* call.
|
||||
*
|
||||
* The number of meta_seq items stored in the results buffer is returned
|
||||
* and @last is updated. 0 items can be returned if none are found
|
||||
* within the input range.
|
||||
*
|
||||
* Unique errors:
|
||||
*
|
||||
* -EINVAL: The result count was 0 or greater than INT_MAX.
|
||||
*
|
||||
* -ESTALE: The results could not be read from one stable version of
|
||||
* file system metadata. Decrease the number of inodes requested.
|
||||
*/
|
||||
struct scoutfs_ioctl_meta_seq {
|
||||
__u64 meta_seq;
|
||||
__u64 ino;
|
||||
};
|
||||
struct scoutfs_ioctl_raw_read_meta_seq {
|
||||
struct scoutfs_ioctl_meta_seq start;
|
||||
struct scoutfs_ioctl_meta_seq end;
|
||||
struct scoutfs_ioctl_meta_seq last;
|
||||
__u64 results_ptr;
|
||||
__u32 results_size;
|
||||
__u32 _pad;
|
||||
};
|
||||
#define SCOUTFS_IOC_RAW_READ_META_SEQ \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 25, struct scoutfs_ioctl_raw_read_meta_seq)
|
||||
|
||||
|
||||
/*
|
||||
* Read inode metadata without cluster locking.
|
||||
*
|
||||
* @inos_ptr is a pointer to an aligned array of 64bit inode numbers.
|
||||
*
|
||||
* @inos_count is the number of elements in the array. The inode
|
||||
* numbers must not be zero, must strictly increase, and must not
|
||||
* contain any duplicates.
|
||||
*
|
||||
* @names_ptr is a pointer to a byte array of xattr names to return with
|
||||
* each inode. The names are identical to those used in
|
||||
* {get,set}xattr(2). The names must be null terminated and no two
|
||||
* names may be equal.
|
||||
*
|
||||
* @names_count is the number of names that will be found in the
|
||||
* names_ptr buffer.
|
||||
*
|
||||
* @results_ptr is a pointer to a buffer that will be filled by the read
|
||||
* inode info results. The result structs and payloads are not aligned.
|
||||
* Callers will almost certainly need to copy them into aligned
|
||||
* addresses before referencing their contents.
|
||||
*
|
||||
* @results_size is the number of bytes available in the results_ptr
|
||||
* buffer.
|
||||
*
|
||||
* For each inode an _INODE result will always be returned. Then a
|
||||
* _XATTR result will be returned for each xattr on the inode that
|
||||
* matches one of the given input names.
|
||||
*
|
||||
* Each call will not return partial results. -ERANGE is returned if the
|
||||
* results for the requested inodes do not fit in the results buffer.
|
||||
*
|
||||
* The info for one call is from one consistent version of the file
|
||||
* system metadata. The call can have to retry if it sees metadata
|
||||
* change during its call. -ESTALE will be returned if it was not able
|
||||
* to read all the inodes info from one metadata version. The number of
|
||||
* inodes being read can be decreased to avoid this.
|
||||
*
|
||||
* Inodes with an nlink of 0 are not returned.
|
||||
*
|
||||
* The size in bytes of filled results is returned. A non-zero return
|
||||
* will always include at least one full
|
||||
* (struct scoutfs_ioctl_raw_read_result) header.
|
||||
*
|
||||
* Unique errors:
|
||||
*
|
||||
* -EINVAL: The inode count can't be zero. The inos ptr must be aligned
|
||||
* to __u64 alignment. The results buffer size can't be larger than
|
||||
* INT_MAX. Inode numbers can't be zero, must be sorted, and can't
|
||||
* have duplicates. The xattr names must be unique, null terminated,
|
||||
* and less than 256 bytes long.
|
||||
*
|
||||
* -ERANGE: The results for the requested inodes do not fit in the
|
||||
* results buffer. Increase the buffer size (perhaps allowing for all
|
||||
* xattrs with large values) or decrease the number of inodes per call.
|
||||
*
|
||||
* -ESTALE: The results could not be read from one stable version of
|
||||
* file system metadata. Decrease the number of inodes requested.
|
||||
*
|
||||
* -EUCLEAN: Internal xattr metadata is inconsistent.
|
||||
*/
|
||||
|
||||
struct scoutfs_ioctl_raw_read_inode_info {
|
||||
__u64 inos_ptr;
|
||||
__u32 inos_count;
|
||||
__u32 names_count;
|
||||
__u64 names_ptr;
|
||||
__u64 results_ptr;
|
||||
__u32 results_size;
|
||||
__u8 _pad[4];
|
||||
};
|
||||
|
||||
/*
|
||||
* @type is one of the enums that determines the type of the following
|
||||
* result payload.
|
||||
*
|
||||
* @size is the number of bytes of result payload immediately following
|
||||
* the result struct. It does not include the size of the result struct
|
||||
* header.
|
||||
*/
|
||||
struct scoutfs_ioctl_raw_read_result {
|
||||
__u32 size;
|
||||
__u8 _pad[7];
|
||||
__u8 type;
|
||||
};
|
||||
|
||||
/*
|
||||
* The _INODE result contains an initial 64bit inode number followed by a
|
||||
* struct scoutfs_inode as defined in format.h. The size includes the
|
||||
* 8byte initial inode number. With that subtracted the size of the
|
||||
* inode struct defines its version (and so the fields it supports).
|
||||
*/
|
||||
#define SCOUTFS_IOC_RAW_READ_RESULT_INODE 1
|
||||
/*
|
||||
* The result payload contains the null terminated name and the value.
|
||||
* The value size can be found by subtracting the null terminated name
|
||||
* length from the result size.
|
||||
*/
|
||||
#define SCOUTFS_IOC_RAW_READ_RESULT_XATTR 2
|
||||
|
||||
#define SCOUTFS_IOC_RAW_READ_INODE_INFO \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 25, struct scoutfs_ioctl_raw_read_inode_info)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1093,24 +1093,19 @@ out_unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void scoutfs_lock_get_fs_item_range(u64 ino, struct scoutfs_key *start, struct scoutfs_key *end)
|
||||
{
|
||||
scoutfs_key_set_zeros(start);
|
||||
start->sk_zone = SCOUTFS_FS_ZONE;
|
||||
start->ski_ino = cpu_to_le64(ino & ~(u64)SCOUTFS_LOCK_INODE_GROUP_MASK);
|
||||
|
||||
scoutfs_key_set_ones(end);
|
||||
end->sk_zone = SCOUTFS_FS_ZONE;
|
||||
end->ski_ino = cpu_to_le64(ino | SCOUTFS_LOCK_INODE_GROUP_MASK);
|
||||
}
|
||||
|
||||
int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int flags, u64 ino,
|
||||
struct scoutfs_lock **ret_lock)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
|
||||
scoutfs_lock_get_fs_item_range(ino, &start, &end);
|
||||
scoutfs_key_set_zeros(&start);
|
||||
start.sk_zone = SCOUTFS_FS_ZONE;
|
||||
start.ski_ino = cpu_to_le64(ino & ~(u64)SCOUTFS_LOCK_INODE_GROUP_MASK);
|
||||
|
||||
scoutfs_key_set_ones(&end);
|
||||
end.sk_zone = SCOUTFS_FS_ZONE;
|
||||
end.ski_ino = cpu_to_le64(ino | SCOUTFS_LOCK_INODE_GROUP_MASK);
|
||||
|
||||
return lock_key_range(sb, mode, flags, &start, &end, ret_lock);
|
||||
}
|
||||
|
||||
@@ -65,7 +65,6 @@ int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
|
||||
int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_key *key);
|
||||
|
||||
void scoutfs_lock_get_fs_item_range(u64 ino, struct scoutfs_key *start, struct scoutfs_key *end);
|
||||
int scoutfs_lock_inode(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct inode *inode, struct scoutfs_lock **ret_lock);
|
||||
int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int flags, u64 ino,
|
||||
|
||||
@@ -34,7 +34,6 @@
|
||||
#include "totl.h"
|
||||
#include "util.h"
|
||||
#include "quota.h"
|
||||
#include "trans.h"
|
||||
#include "counters.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
@@ -1087,10 +1086,6 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_hold_trans(sb, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
down_write(&qtinf->rwsem);
|
||||
|
||||
if (is_add) {
|
||||
@@ -1100,30 +1095,28 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
|
||||
else if (ret == 0)
|
||||
ret = -EEXIST;
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
goto unlock;
|
||||
|
||||
rule_to_rule_val(&rv, &rule);
|
||||
ret = scoutfs_item_create(sb, &key, &rv, sizeof(rv), lock);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
goto unlock;
|
||||
|
||||
} else {
|
||||
ret = find_rule(sb, &rule, &key, lock) ?:
|
||||
scoutfs_item_delete(sb, &key, lock);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
scoutfs_quota_invalidate(sb);
|
||||
ret = 0;
|
||||
|
||||
release:
|
||||
unlock:
|
||||
up_write(&qtinf->rwsem);
|
||||
scoutfs_release_trans(sb);
|
||||
|
||||
out:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
|
||||
out:
|
||||
if (is_add)
|
||||
trace_scoutfs_quota_add_rule(sb, &rule, ret);
|
||||
else
|
||||
|
||||
744
kmod/src/raw.c
744
kmod/src/raw.c
@@ -1,744 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "key.h"
|
||||
#include "block.h"
|
||||
#include "inode.h"
|
||||
#include "forest.h"
|
||||
#include "client.h"
|
||||
#include "ioctl.h"
|
||||
#include "lock.h"
|
||||
#include "xattr.h"
|
||||
#include "attr_x.h"
|
||||
#include "bsearch_index.h"
|
||||
#include "raw.h"
|
||||
|
||||
struct fs_item {
|
||||
struct list_head head;
|
||||
struct scoutfs_key key;
|
||||
u64 seq;
|
||||
int val_len;
|
||||
bool deletion;
|
||||
/* val is aligned so we can deref structs in vals */
|
||||
u8 val[0] __aligned(ARCH_KMALLOC_MINALIGN);
|
||||
};
|
||||
|
||||
static int save_fs_item(struct list_head *list, struct scoutfs_key *key, u64 seq, u8 flags,
|
||||
void *val, int val_len)
|
||||
{
|
||||
struct fs_item *fsi;
|
||||
|
||||
/* max btree val len is hundreds of bytes */
|
||||
fsi = kmalloc(offsetof(struct fs_item, val[val_len]), GFP_NOFS);
|
||||
if (!fsi)
|
||||
return -ENOMEM;
|
||||
|
||||
fsi->key = *key;
|
||||
fsi->seq = seq;
|
||||
fsi->val_len = val_len;
|
||||
fsi->deletion = !!(flags & SCOUTFS_ITEM_FLAG_DELETION);
|
||||
if (val_len > 0)
|
||||
memcpy(fsi->val, val, val_len);
|
||||
list_add_tail(&fsi->head, list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_fs_item(struct fs_item *fsi)
|
||||
{
|
||||
if (!list_empty(&fsi->head))
|
||||
list_del_init(&fsi->head);
|
||||
kfree(fsi);
|
||||
}
|
||||
|
||||
static void free_fs_items(struct list_head *list)
|
||||
{
|
||||
struct fs_item *fsi;
|
||||
struct fs_item *tmp;
|
||||
|
||||
list_for_each_entry_safe(fsi, tmp, list, head)
|
||||
free_fs_item(fsi);
|
||||
}
|
||||
|
||||
static struct fs_item *next_fs_item(struct list_head *list, struct fs_item *fsi)
|
||||
{
|
||||
list_for_each_entry_continue(fsi, list, head)
|
||||
return fsi;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int cmp_fs_items(void *priv, KC_LIST_CMP_CONST struct list_head *A,
|
||||
KC_LIST_CMP_CONST struct list_head *B)
|
||||
{
|
||||
KC_LIST_CMP_CONST struct fs_item *a =
|
||||
container_of(A, KC_LIST_CMP_CONST struct fs_item, head);
|
||||
KC_LIST_CMP_CONST struct fs_item *b =
|
||||
container_of(B, KC_LIST_CMP_CONST struct fs_item, head);
|
||||
|
||||
return scoutfs_key_compare(&a->key, &b->key) ?: -scoutfs_cmp(a->seq, b->seq);
|
||||
}
|
||||
|
||||
static void sort_and_remove(struct list_head *list, struct scoutfs_key *end)
|
||||
{
|
||||
struct fs_item *prev;
|
||||
struct fs_item *fsi;
|
||||
struct fs_item *tmp;
|
||||
|
||||
list_sort(NULL, list, cmp_fs_items);
|
||||
|
||||
/* start by removing any items read before end was decreased by later blocks */
|
||||
list_for_each_entry_safe_reverse(fsi, tmp, list, head) {
|
||||
if (scoutfs_key_compare(&fsi->key, end) > 0)
|
||||
free_fs_item(fsi);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
prev = NULL;
|
||||
list_for_each_entry_safe(fsi, tmp, list, head) {
|
||||
/* remove this item if it's an older version of previous item */
|
||||
if (prev && scoutfs_key_compare(&prev->key, &fsi->key) == 0) {
|
||||
free_fs_item(fsi);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* remove previous deletion item once it has removed all older versions */
|
||||
if (prev && prev->deletion)
|
||||
free_fs_item(prev);
|
||||
|
||||
/* next item might match this, record to compare */
|
||||
prev = fsi;
|
||||
}
|
||||
|
||||
/* remove the last item if it's a deletion */
|
||||
list_for_each_entry_reverse(fsi, list, head) {
|
||||
if (fsi->deletion)
|
||||
free_fs_item(fsi);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int save_all_items(struct super_block *sb, struct scoutfs_key *key, u64 seq, u8 flags,
|
||||
void *val, int val_len, int fic, void *arg)
|
||||
{
|
||||
struct list_head *list = arg;
|
||||
|
||||
return save_fs_item(list, key, seq, flags, val, val_len);
|
||||
}
|
||||
|
||||
/* -------------- */
|
||||
|
||||
static void ms_from_key(struct scoutfs_ioctl_meta_seq *ms, struct scoutfs_key *key)
|
||||
{
|
||||
ms->meta_seq = le64_to_cpu(key->skii_major);
|
||||
ms->ino = le64_to_cpu(key->skii_ino);
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment the key's ino->meta_seq so that we don't land between items.
|
||||
*/
|
||||
static void inc_meta_seq(struct scoutfs_key *key)
|
||||
{
|
||||
le64_add_cpu(&key->skii_ino, 1);
|
||||
if (key->skii_ino == 0)
|
||||
le64_add_cpu(&key->skii_major, 1);
|
||||
}
|
||||
|
||||
int scoutfs_raw_read_meta_seq(struct super_block *sb,
|
||||
struct scoutfs_ioctl_raw_read_meta_seq *rms,
|
||||
struct scoutfs_ioctl_meta_seq *last_ret)
|
||||
{
|
||||
struct scoutfs_ioctl_meta_seq __user *ums;
|
||||
struct scoutfs_ioctl_meta_seq ms;
|
||||
struct scoutfs_net_roots roots;
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key end;
|
||||
struct fs_item *fsi;
|
||||
struct fs_item *tmp;
|
||||
LIST_HEAD(list);
|
||||
int retries;
|
||||
int copied;
|
||||
int count;
|
||||
int ret;
|
||||
|
||||
ums = (void __user *)rms->results_ptr;
|
||||
count = rms->results_size / sizeof(struct scoutfs_ioctl_meta_seq);
|
||||
retries = 10;
|
||||
copied = 0;
|
||||
|
||||
scoutfs_inode_init_index_key(&last, SCOUTFS_INODE_INDEX_META_SEQ_TYPE,
|
||||
rms->end.meta_seq, 0, rms->end.ino);
|
||||
|
||||
retry:
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
scoutfs_inode_init_index_key(&key, SCOUTFS_INODE_INDEX_META_SEQ_TYPE,
|
||||
rms->start.meta_seq, 0, rms->start.ino);
|
||||
|
||||
for (;;) {
|
||||
start = key;
|
||||
end = last;
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, 0, &key, NULL, &start, &end,
|
||||
save_all_items, &list);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sort_and_remove(&list, &end);
|
||||
|
||||
list_for_each_entry_safe(fsi, tmp, &list, head) {
|
||||
|
||||
if (copied == count) {
|
||||
/* results are full, set end to before item can't return */
|
||||
end = fsi->key;
|
||||
le64_add_cpu(&end.skii_ino, -1ULL);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ms_from_key(&ms, &fsi->key);
|
||||
if (copy_to_user(&ums[copied], &ms, sizeof(ms))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
free_fs_item(fsi);
|
||||
copied++;
|
||||
}
|
||||
|
||||
if (scoutfs_key_compare(&end, &last) >= 0) {
|
||||
end = last;
|
||||
break;
|
||||
}
|
||||
|
||||
key = end;
|
||||
inc_meta_seq(&key);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
free_fs_items(&list);
|
||||
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
|
||||
if (ret == -ESTALE && copied == 0 && retries-- > 0)
|
||||
goto retry;
|
||||
|
||||
ms_from_key(last_ret, &end);
|
||||
|
||||
return ret ?: copied;
|
||||
}
|
||||
|
||||
/* -------------- */
|
||||
|
||||
struct inode_info_context {
|
||||
size_t nr_inos;
|
||||
u64 *inos;
|
||||
|
||||
size_t nr_names;
|
||||
struct xattr_name {
|
||||
u64 hash;
|
||||
char *name;
|
||||
u8 name_len; /* no null */
|
||||
} *names;
|
||||
|
||||
struct list_head fs_items;
|
||||
};
|
||||
|
||||
static int cmp_u64(const void *A, const void *B)
|
||||
{
|
||||
const u64 *a = A;
|
||||
const u64 *b = B;
|
||||
|
||||
return scoutfs_cmp(*a, *b);
|
||||
}
|
||||
|
||||
static int cmp_name_hash(const void *A, const void *B)
|
||||
{
|
||||
const struct xattr_name *a = A;
|
||||
const struct xattr_name *b = B;
|
||||
|
||||
return scoutfs_cmp(a->hash, b->hash);
|
||||
}
|
||||
|
||||
static int cmp_name_string(const void *A, const void *B)
|
||||
{
|
||||
const struct xattr_name *a = A;
|
||||
const struct xattr_name *b = B;
|
||||
|
||||
return scoutfs_cmp(a->name_len, b->name_len) ?: memcmp(a->name, b->name, a->name_len);
|
||||
}
|
||||
|
||||
static int setup_context(struct inode_info_context *ctx,
|
||||
struct scoutfs_ioctl_raw_read_inode_info *rii)
|
||||
{
|
||||
__u64 __user *uinos = (void __user *)rii->inos_ptr;
|
||||
char __user *uname;
|
||||
long len_null;
|
||||
long len;
|
||||
int ret;
|
||||
u32 i;
|
||||
|
||||
ctx->nr_inos = rii->inos_count;
|
||||
ctx->nr_names = rii->names_count;
|
||||
INIT_LIST_HEAD(&ctx->fs_items);
|
||||
|
||||
ctx->inos = kvmalloc_array(ctx->nr_inos, sizeof(ctx->inos[0]), GFP_KERNEL);
|
||||
ctx->names = kvcalloc(ctx->nr_names, sizeof(ctx->names[0]), GFP_KERNEL);
|
||||
if (!ctx->inos || !ctx->names) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(ctx->inos, uinos, ctx->nr_inos * sizeof(ctx->inos[0]))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* inos must not be 0 and must increase and contain no duplicates */
|
||||
if (ctx->inos[0] == 0) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
for (i = 1; i < ctx->nr_inos; i++) {
|
||||
if (ctx->inos[i] <= ctx->inos[i - 1]) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
uname = (void __user *)rii->names_ptr;
|
||||
for (i = 0; i < ctx->nr_names; i++) {
|
||||
len_null = SCOUTFS_XATTR_MAX_NAME_LEN + 1;
|
||||
ret = strnlen_user(uname, len_null);
|
||||
if (ret <= 1 || ret > len_null) {
|
||||
if (ret >= 0)
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
len_null = ret;
|
||||
len = len_null - 1;
|
||||
|
||||
ctx->names[i].name_len = len;
|
||||
ctx->names[i].name = kmalloc(len_null, GFP_KERNEL);
|
||||
if (!ctx->names[i].name) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = strncpy_from_user(ctx->names[i].name, uname, len_null);
|
||||
if (ret != len) {
|
||||
if (ret >= 0)
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ctx->names[i].hash = scoutfs_xattr_name_hash(ctx->names[i].name, len);
|
||||
uname += len_null;
|
||||
}
|
||||
|
||||
/* make sure all the names differ */
|
||||
sort(ctx->names, ctx->nr_names, sizeof(ctx->names[0]), cmp_name_string, NULL);
|
||||
for (i = 1; i < ctx->nr_names; i++) {
|
||||
if (cmp_name_string(&ctx->names[i - 1], &ctx->names[i]) == 0) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* then leave them sorted by hash */
|
||||
sort(ctx->names, ctx->nr_names, sizeof(ctx->names[0]), cmp_name_hash, NULL);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void free_context(struct inode_info_context *ctx)
|
||||
{
|
||||
int i;
|
||||
|
||||
kvfree(ctx->inos);
|
||||
|
||||
if (ctx->names) {
|
||||
for (i = 0; i < ctx->nr_names; i++) {
|
||||
if (!ctx->names[i].name)
|
||||
break;
|
||||
kfree(ctx->names[i].name);
|
||||
}
|
||||
kvfree(ctx->names);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over fs items and save any that we're interested in. We want
|
||||
* inode struct items and any xattr items whose hashes collide with the
|
||||
* xattr names we're searching for.
|
||||
*
|
||||
* Our forest calls can be advancing through the key space as we see
|
||||
* slices that intersect with blocks in trees. And each forest caller
|
||||
* can be resetting the key position to the start of each forest block
|
||||
* it reads in an intersection.
|
||||
*
|
||||
* From this callback's perspective, the key can be jumping all over the
|
||||
* place. We don't have any iterative position state. For each key we
|
||||
* decide if we want to save it and then set the key to the next key we
|
||||
* want after the current key. We'll combine all the saved keys later.
|
||||
*/
|
||||
static int save_info_items(struct super_block *sb, struct scoutfs_key *key, u64 seq,
|
||||
u8 flags, void *val, int val_len, int fic, void *arg)
|
||||
{
|
||||
u64 ino = le64_to_cpu(key->_sk_first);
|
||||
struct inode_info_context *ctx = arg;
|
||||
struct xattr_name name;
|
||||
size_t name_ind;
|
||||
size_t ino_ind;
|
||||
bool hash_match;
|
||||
bool ino_match;
|
||||
int ret;
|
||||
|
||||
ino_ind = bsearch_index(&ino, ctx->inos, ctx->nr_inos, sizeof(ctx->inos[0]), cmp_u64);
|
||||
ino_match = ino_ind < ctx->nr_inos && ctx->inos[ino_ind] == ino;
|
||||
|
||||
/* jump to to next ino, could be for this key if we're before the ino struct */
|
||||
if (!ino_match || key->sk_type < SCOUTFS_INODE_TYPE)
|
||||
goto next_inode;
|
||||
|
||||
/* find our search position in xattrs */
|
||||
if (key->sk_type < SCOUTFS_XATTR_TYPE) {
|
||||
name_ind = 0;
|
||||
hash_match = false;
|
||||
|
||||
} else if (key->sk_type == SCOUTFS_XATTR_TYPE) {
|
||||
name = (struct xattr_name) { .hash = le64_to_cpu(key->skx_name_hash) };
|
||||
name_ind = bsearch_index(&name, ctx->names, ctx->nr_names, sizeof(ctx->names[0]),
|
||||
cmp_name_hash);
|
||||
hash_match = name_ind < ctx->nr_names && ctx->names[name_ind].hash == name.hash;
|
||||
} else {
|
||||
name_ind = ctx->nr_names;
|
||||
hash_match = false;
|
||||
}
|
||||
|
||||
/* save inode items for our search and all xattr items that match search hashes */
|
||||
if (key->sk_type == SCOUTFS_INODE_TYPE || hash_match) {
|
||||
ret = save_fs_item(&ctx->fs_items, key, seq, flags, val, val_len);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* let the caller continue iterating through matching xattr items */
|
||||
if (hash_match) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* jump to the next xattr */
|
||||
if (name_ind < ctx->nr_names) {
|
||||
scoutfs_xattr_init_key(key, ino, ctx->names[name_ind].hash, 0);
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* no more xattrs, must be done with this ino */
|
||||
ino_ind++;
|
||||
|
||||
next_inode:
|
||||
/* now jump to next inode struct key, or we're done */
|
||||
if (ino_ind < ctx->nr_inos)
|
||||
scoutfs_inode_init_key(key, ctx->inos[ino_ind]);
|
||||
else
|
||||
scoutfs_key_set_ones(key);
|
||||
|
||||
ret = -ESRCH;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int copy_to_user_off(void __user *dst, size_t *dst_off, size_t dst_size,
|
||||
void *src, size_t copy_size)
|
||||
{
|
||||
if (copy_size == 0)
|
||||
return 0;
|
||||
if (*dst_off + copy_size > dst_size)
|
||||
return -ERANGE;
|
||||
if (copy_to_user(dst + *dst_off, src, copy_size))
|
||||
return -EFAULT;
|
||||
|
||||
*dst_off += copy_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_result_to_user(void __user *ures, size_t *off, size_t size, u8 type,
|
||||
void *a_data, size_t a_len, void *b_data, size_t b_len,
|
||||
size_t extra_size)
|
||||
{
|
||||
struct scoutfs_ioctl_raw_read_result res;
|
||||
const size_t szof_res = sizeof(struct scoutfs_ioctl_raw_read_result);
|
||||
|
||||
memzero_explicit(&res, szof_res);
|
||||
res = (struct scoutfs_ioctl_raw_read_result) {
|
||||
.size = a_len + b_len + extra_size,
|
||||
.type = type,
|
||||
};
|
||||
|
||||
return copy_to_user_off(ures, off, size, &res, szof_res) ?:
|
||||
(a_len ? copy_to_user_off(ures, off, size, a_data, a_len) : 0) ?:
|
||||
(b_len ? copy_to_user_off(ures, off, size, b_data, b_len) : 0);
|
||||
}
|
||||
|
||||
static int copy_item_results_to_user(struct super_block *sb, struct inode_info_context *ctx,
|
||||
void __user *ures, size_t *off, size_t size,
|
||||
struct fs_item *fsi)
|
||||
{
|
||||
struct scoutfs_inode *cinode;
|
||||
struct scoutfs_xattr *xat;
|
||||
static char null = '\0';
|
||||
size_t len;
|
||||
u64 ino;
|
||||
int ret = 0;
|
||||
|
||||
if (fsi->key.sk_type == SCOUTFS_INODE_TYPE) {
|
||||
cinode = (void *)fsi->val;
|
||||
ino = le64_to_cpu(fsi->key.ski_ino);
|
||||
|
||||
ret = copy_result_to_user(ures, off, size, SCOUTFS_IOC_RAW_READ_RESULT_INODE,
|
||||
&ino, sizeof(ino), cinode, sizeof(struct scoutfs_inode),
|
||||
0);
|
||||
|
||||
} else if (fsi->key.sk_type == SCOUTFS_XATTR_TYPE) {
|
||||
if (fsi->key.skx_part == 0) {
|
||||
xat = (void *)fsi->val;
|
||||
ret = copy_result_to_user(ures, off, size,
|
||||
SCOUTFS_IOC_RAW_READ_RESULT_XATTR, xat->name,
|
||||
xat->name_len, &null, sizeof(null),
|
||||
le16_to_cpu(xat->val_len));
|
||||
if (ret == 0 && xat->val_len != 0) {
|
||||
/* then append the start of the value */
|
||||
len = fsi->val_len -
|
||||
offsetof(struct scoutfs_xattr, name[xat->name_len]);
|
||||
ret = copy_to_user_off(ures, off, size, xat->name + xat->name_len,
|
||||
len);
|
||||
}
|
||||
} else {
|
||||
/* continue appending partial values */
|
||||
ret = copy_to_user_off(ures, off, size, fsi->val, fsi->val_len);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool ignore_zero_nlink(struct inode_info_context *ctx, struct fs_item *fsi)
|
||||
{
|
||||
struct scoutfs_inode *cinode = (void *)fsi->val;
|
||||
|
||||
return cinode->nlink == 0;
|
||||
}
|
||||
|
||||
static bool ignore_xattr_name(struct inode_info_context *ctx, struct fs_item *fsi)
|
||||
{
|
||||
struct scoutfs_xattr *xat = (void *)fsi->val;
|
||||
struct xattr_name name = {
|
||||
.hash = le64_to_cpu(fsi->key.skx_name_hash),
|
||||
.name = xat->name,
|
||||
.name_len = xat->name_len,
|
||||
};
|
||||
size_t i;
|
||||
|
||||
for (i = bsearch_index(&name, ctx->names, ctx->nr_names, sizeof(ctx->names[0]),
|
||||
cmp_name_hash);
|
||||
i < ctx->nr_names && name.hash == ctx->names[i].hash; i++) {
|
||||
if (cmp_name_string(&name, &ctx->names[i]) == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int copy_results_to_user(struct super_block *sb, struct inode_info_context *ctx,
|
||||
struct scoutfs_ioctl_raw_read_inode_info *rii)
|
||||
{
|
||||
void __user *ures = (void __user *)rii->results_ptr;
|
||||
struct scoutfs_xattr *xat;
|
||||
struct fs_item *next;
|
||||
struct fs_item *fsi;
|
||||
struct fs_item *tmp;
|
||||
size_t xattr_end;
|
||||
size_t off;
|
||||
__le64 in_ino;
|
||||
__le64 in_id;
|
||||
int ret;
|
||||
|
||||
in_ino = 0;
|
||||
xattr_end = 0;
|
||||
in_id = 0;
|
||||
off = 0;
|
||||
|
||||
list_for_each_entry_safe(fsi, tmp, &ctx->fs_items, head) {
|
||||
/*
|
||||
* ignore:
|
||||
* - inodes with an nlink of 0
|
||||
* - all items for an ino after the inode struct that we're ignoring
|
||||
* - first xattr parts with a name we don't need
|
||||
* - additional xattr parts when we ignored the first
|
||||
*/
|
||||
if ((fsi->key.sk_type == SCOUTFS_INODE_TYPE && ignore_zero_nlink(ctx, fsi)) ||
|
||||
(fsi->key.sk_type > SCOUTFS_INODE_TYPE && fsi->key._sk_first != in_ino) ||
|
||||
(fsi->key.sk_type == SCOUTFS_XATTR_TYPE &&
|
||||
((fsi->key.skx_part == 0 && ignore_xattr_name(ctx, fsi)) ||
|
||||
(fsi->key.skx_part > 0 && fsi->key.skx_id != in_id)))) {
|
||||
free_fs_item(fsi);
|
||||
in_ino = 0;
|
||||
in_id = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* advance ino/xattr stream context state machine */
|
||||
if (fsi->key.sk_type == SCOUTFS_INODE_TYPE) {
|
||||
in_ino = fsi->key.ski_ino;
|
||||
in_id = 0;
|
||||
} else if (fsi->key.sk_type == SCOUTFS_XATTR_TYPE && fsi->key.skx_part == 0) {
|
||||
in_id = fsi->key.skx_id;
|
||||
/* save the required offset after the complete xattr */
|
||||
xat = (void *)fsi->val;
|
||||
xattr_end = off + sizeof(struct scoutfs_ioctl_raw_read_result) +
|
||||
xat->name_len + 1 + le16_to_cpu(xat->val_len);
|
||||
}
|
||||
|
||||
/* copy results, usually with header, but additional xattr parts copied raw */
|
||||
ret = copy_item_results_to_user(sb, ctx, ures, &off, rii->results_size, fsi);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* make sure we saw all xattr parts and copied the correct size */
|
||||
if (xattr_end > 0 &&
|
||||
!((next = next_fs_item(&ctx->fs_items, fsi)) &&
|
||||
next->key.sk_type == SCOUTFS_XATTR_TYPE && next->key.skx_ino == in_ino &&
|
||||
next->key.skx_id == in_id)) {
|
||||
if (off != xattr_end) {
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
xattr_end = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret ?: off;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the key is for an inode we're not interested in, or if its past
|
||||
* the xattr items, then advance to the next inode. This is used
|
||||
* between forest read items calls to avoid leaf blocks. The callback
|
||||
* takes care of iterating through the items for an inode across
|
||||
* multiple leaves.
|
||||
*/
|
||||
static void advance_key_ino(struct scoutfs_key *key, struct inode_info_context *ctx)
|
||||
{
|
||||
u64 ino = le64_to_cpu(key->_sk_first);
|
||||
size_t ino_ind;
|
||||
|
||||
ino_ind = bsearch_index(&ino, ctx->inos, ctx->nr_inos, sizeof(ctx->inos[0]), cmp_u64);
|
||||
if (ino_ind < ctx->nr_inos && ctx->inos[ino_ind] == ino) {
|
||||
if (key->sk_type <= SCOUTFS_XATTR_TYPE)
|
||||
return;
|
||||
else
|
||||
ino_ind++;
|
||||
}
|
||||
|
||||
if (ino_ind < ctx->nr_inos)
|
||||
scoutfs_inode_init_key(key, ctx->inos[ino_ind]);
|
||||
else
|
||||
scoutfs_key_set_ones(key);
|
||||
}
|
||||
|
||||
int scoutfs_raw_read_inode_info(struct super_block *sb,
|
||||
struct scoutfs_ioctl_raw_read_inode_info *rii)
|
||||
{
|
||||
struct inode_info_context ctx = {0, };
|
||||
struct scoutfs_net_roots roots;
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
struct scoutfs_key lock_start;
|
||||
struct scoutfs_key lock_end;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key end;
|
||||
LIST_HEAD(list);
|
||||
int retries = 10;
|
||||
int ret;
|
||||
|
||||
ret = setup_context(&ctx, rii);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (ctx.nr_names > 0)
|
||||
scoutfs_xattr_init_key(&last, ctx.inos[ctx.nr_inos -1],
|
||||
ctx.names[ctx.nr_names - 1].hash, U64_MAX);
|
||||
else
|
||||
scoutfs_inode_init_key(&last, ctx.inos[ctx.nr_inos - 1]);
|
||||
|
||||
retry:
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
scoutfs_inode_init_key(&key, ctx.inos[0]);
|
||||
|
||||
while (scoutfs_key_compare(&key, &last) <= 0) {
|
||||
scoutfs_lock_get_fs_item_range(le64_to_cpu(key._sk_first), &lock_start, &lock_end);
|
||||
|
||||
start = key;
|
||||
end = last;
|
||||
if (scoutfs_key_compare(&lock_end, &end) < 0)
|
||||
end = lock_end;
|
||||
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, 0, &key, &lock_start,
|
||||
&start, &end, save_info_items, &ctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* save each sorted batch, might have partial results for an inode */
|
||||
sort_and_remove(&ctx.fs_items, &end);
|
||||
list_splice_tail_init(&ctx.fs_items, &list);
|
||||
|
||||
key = end;
|
||||
if (!scoutfs_key_is_ones(&key)) {
|
||||
scoutfs_key_inc(&key);
|
||||
advance_key_ino(&key, &ctx);
|
||||
}
|
||||
}
|
||||
|
||||
list_splice_tail_init(&list, &ctx.fs_items);
|
||||
ret = copy_results_to_user(sb, &ctx, rii);
|
||||
out:
|
||||
free_fs_items(&list);
|
||||
free_fs_items(&ctx.fs_items);
|
||||
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
|
||||
if (ret == -ESTALE && retries-- > 0)
|
||||
goto retry;
|
||||
|
||||
free_context(&ctx);
|
||||
return ret;
|
||||
}
|
||||
@@ -1,10 +0,0 @@
|
||||
#ifndef _SCOUTFS_RAW_H_
|
||||
#define _SCOUTFS_RAW_H_
|
||||
|
||||
int scoutfs_raw_read_meta_seq(struct super_block *sb,
|
||||
struct scoutfs_ioctl_raw_read_meta_seq *rms,
|
||||
struct scoutfs_ioctl_meta_seq *last_ret);
|
||||
int scoutfs_raw_read_inode_info(struct super_block *sb,
|
||||
struct scoutfs_ioctl_raw_read_inode_info *rii);
|
||||
|
||||
#endif
|
||||
@@ -256,14 +256,6 @@ static void server_down(struct server_info *server)
|
||||
cmpxchg(&server->status, was, SERVER_DOWN);
|
||||
}
|
||||
|
||||
static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
|
||||
{
|
||||
*key = (struct scoutfs_key) {
|
||||
.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
|
||||
.skmc_rid = cpu_to_le64(rid),
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* The per-holder allocation block use budget balances batching
|
||||
* efficiency and concurrency. The larger this gets, the fewer
|
||||
@@ -971,28 +963,6 @@ static int find_log_trees_item(struct super_block *sb,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the given rid has a mounted_clients entry.
|
||||
*/
|
||||
static bool rid_is_mounted(struct super_block *sb, u64 rid)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
init_mounted_client_key(&key, rid);
|
||||
|
||||
mutex_lock(&server->mounted_clients_mutex);
|
||||
ret = scoutfs_btree_lookup(sb, &super->mounted_clients, &key, &iref);
|
||||
if (ret == 0)
|
||||
scoutfs_btree_put_iref(&iref);
|
||||
mutex_unlock(&server->mounted_clients_mutex);
|
||||
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the log_trees item with the greatest nr for each rid. Fills the
|
||||
* caller's log_trees and sets the key before the returned log_trees for
|
||||
@@ -1251,60 +1221,6 @@ static int do_finalize_ours(struct super_block *sb,
|
||||
* happens to arrive at just the right time. That's fine, merging will
|
||||
* ignore and tear down the empty input.
|
||||
*/
|
||||
|
||||
static int reclaim_open_log_tree(struct super_block *sb, u64 rid);
|
||||
|
||||
/*
|
||||
* Reclaim log trees for rids that have no mounted_clients entry.
|
||||
* They block merges by appearing active. reclaim_open_log_tree
|
||||
* may need multiple commits to drain allocators (-EINPROGRESS).
|
||||
*
|
||||
* The caller holds logs_mutex and a commit, both are dropped and
|
||||
* re-acquired around each reclaim call. Returns >0 if any orphans
|
||||
* were reclaimed so the caller can re-check state that may have
|
||||
* changed while the lock was dropped.
|
||||
*/
|
||||
static int reclaim_orphan_log_trees(struct super_block *sb, u64 rid,
|
||||
struct commit_hold *hold)
|
||||
{
|
||||
struct server_info *server = SCOUTFS_SB(sb)->server_info;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_key key;
|
||||
bool found = false;
|
||||
u64 orphan_rid;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
|
||||
while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, <)) > 0) {
|
||||
|
||||
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) ||
|
||||
le64_to_cpu(lt.rid) == rid ||
|
||||
rid_is_mounted(sb, le64_to_cpu(lt.rid)))
|
||||
continue;
|
||||
|
||||
orphan_rid = le64_to_cpu(lt.rid);
|
||||
scoutfs_err(sb, "reclaiming orphan log trees for rid %016llx nr %llu",
|
||||
orphan_rid, le64_to_cpu(lt.nr));
|
||||
found = true;
|
||||
|
||||
do {
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
err = reclaim_open_log_tree(sb, orphan_rid);
|
||||
ret = server_apply_commit(sb, hold,
|
||||
err == -EINPROGRESS ? 0 : err);
|
||||
server_hold_commit(sb, hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
} while (err == -EINPROGRESS && ret == 0);
|
||||
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret < 0 ? ret : found;
|
||||
}
|
||||
|
||||
#define FINALIZE_POLL_MIN_DELAY_MS 5U
|
||||
#define FINALIZE_POLL_MAX_DELAY_MS 100U
|
||||
#define FINALIZE_POLL_DELAY_GROWTH_PCT 150U
|
||||
@@ -1345,16 +1261,6 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
break;
|
||||
}
|
||||
|
||||
ret = reclaim_orphan_log_trees(sb, rid, hold);
|
||||
if (ret < 0) {
|
||||
err_str = "reclaiming orphan log trees";
|
||||
break;
|
||||
}
|
||||
if (ret > 0) {
|
||||
/* lock was dropped, re-check merge status */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* look for finalized and other active log btrees */
|
||||
saw_finalized = false;
|
||||
others_active = false;
|
||||
@@ -2023,7 +1929,7 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
|
||||
/* only finalize, allowing merging, once the allocators are fully freed */
|
||||
if (ret == 0 && !scoutfs_trigger(sb, RECLAIM_SKIP_FINALIZE)) {
|
||||
if (ret == 0) {
|
||||
/* the transaction is no longer open */
|
||||
lt.commit_trans_seq = lt.get_trans_seq;
|
||||
|
||||
@@ -2075,8 +1981,7 @@ static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret)
|
||||
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
|
||||
while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, <)) > 0) {
|
||||
if ((le64_to_cpu(lt.get_trans_seq) > le64_to_cpu(lt.commit_trans_seq)) &&
|
||||
le64_to_cpu(lt.get_trans_seq) <= last_seq &&
|
||||
rid_is_mounted(sb, le64_to_cpu(lt.rid))) {
|
||||
le64_to_cpu(lt.get_trans_seq) <= last_seq) {
|
||||
last_seq = le64_to_cpu(lt.get_trans_seq) - 1;
|
||||
}
|
||||
}
|
||||
@@ -3628,6 +3533,14 @@ out:
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, &nst, sizeof(nst));
|
||||
}
|
||||
|
||||
static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
|
||||
{
|
||||
*key = (struct scoutfs_key) {
|
||||
.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
|
||||
.skmc_rid = cpu_to_le64(rid),
|
||||
};
|
||||
}
|
||||
|
||||
static bool invalid_mounted_client_item(struct scoutfs_btree_item_ref *iref)
|
||||
{
|
||||
return (iref->val_len != sizeof(struct scoutfs_mounted_client_btree_val));
|
||||
|
||||
@@ -30,11 +30,6 @@ void scoutfs_totl_merge_init(struct scoutfs_totl_merging *merg)
|
||||
memset(merg, 0, sizeof(struct scoutfs_totl_merging));
|
||||
}
|
||||
|
||||
/*
|
||||
* bin the incoming merge inputs so that we can resolve delta items
|
||||
* properly. Finalized logs that are merge inputs are kept separately
|
||||
* from those that are not.
|
||||
*/
|
||||
void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
|
||||
u64 seq, u8 flags, void *val, int val_len, int fic)
|
||||
{
|
||||
@@ -44,10 +39,10 @@ void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
|
||||
merg->fs_seq = seq;
|
||||
merg->fs_total = le64_to_cpu(tval->total);
|
||||
merg->fs_count = le64_to_cpu(tval->count);
|
||||
} else if (fic & FIC_MERGE_INPUT) {
|
||||
merg->inp_seq = seq;
|
||||
merg->inp_total += le64_to_cpu(tval->total);
|
||||
merg->inp_count += le64_to_cpu(tval->count);
|
||||
} else if (fic & FIC_FINALIZED) {
|
||||
merg->fin_seq = seq;
|
||||
merg->fin_total += le64_to_cpu(tval->total);
|
||||
merg->fin_count += le64_to_cpu(tval->count);
|
||||
} else {
|
||||
merg->log_seq = seq;
|
||||
merg->log_total += le64_to_cpu(tval->total);
|
||||
@@ -58,18 +53,15 @@ void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
|
||||
/*
|
||||
* .totl. item merging has to be careful because the log btree merging
|
||||
* code can write partial results to the fs_root. This means that a
|
||||
* reader can see both cases where merge input deltas should be applied
|
||||
* to the old fs items and where they have already been applied to the
|
||||
* partially merged fs items.
|
||||
*
|
||||
* Only finalized log trees that are inputs to the current merge cycle
|
||||
* are tracked in the inp_ bucket. Finalized trees that aren't merge
|
||||
* inputs and active log trees are always applied unconditionally since
|
||||
* they cannot be in fs_root.
|
||||
* reader can see both cases where new finalized logs should be applied
|
||||
* to the old fs items and where old finalized logs have already been
|
||||
* applied to the partially merged fs items. Currently active logged
|
||||
* items are always applied on top of all cases.
|
||||
*
|
||||
* These cases are differentiated with a combination of sequence numbers
|
||||
* in items and the count of contributing xattrs. This lets us
|
||||
* recognize all cases, including when merge inputs were merged and
|
||||
* in items, the count of contributing xattrs, and a flag
|
||||
* differentiating finalized and active logged items. This lets us
|
||||
* recognize all cases, including when finalized logs were merged and
|
||||
* deleted the fs item.
|
||||
*/
|
||||
void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total, __u64 *count)
|
||||
@@ -83,14 +75,14 @@ void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total,
|
||||
*count = merg->fs_count;
|
||||
}
|
||||
|
||||
/* apply merge input deltas if they're newer or creating */
|
||||
if (((merg->fs_seq != 0) && (merg->inp_seq > merg->fs_seq)) ||
|
||||
((merg->fs_seq == 0) && (merg->inp_count > 0))) {
|
||||
*total += merg->inp_total;
|
||||
*count += merg->inp_count;
|
||||
/* apply finalized logs if they're newer or creating */
|
||||
if (((merg->fs_seq != 0) && (merg->fin_seq > merg->fs_seq)) ||
|
||||
((merg->fs_seq == 0) && (merg->fin_count > 0))) {
|
||||
*total += merg->fin_total;
|
||||
*count += merg->fin_count;
|
||||
}
|
||||
|
||||
/* always apply non-input finalized and active logs */
|
||||
/* always apply active logs which must be newer than fs and finalized */
|
||||
if (merg->log_seq > 0) {
|
||||
*total += merg->log_total;
|
||||
*count += merg->log_count;
|
||||
|
||||
@@ -7,9 +7,9 @@ struct scoutfs_totl_merging {
|
||||
u64 fs_seq;
|
||||
u64 fs_total;
|
||||
u64 fs_count;
|
||||
u64 inp_seq;
|
||||
u64 inp_total;
|
||||
s64 inp_count;
|
||||
u64 fin_seq;
|
||||
u64 fin_total;
|
||||
s64 fin_count;
|
||||
u64 log_seq;
|
||||
u64 log_total;
|
||||
s64 log_count;
|
||||
|
||||
@@ -45,8 +45,6 @@ static char *names[] = {
|
||||
[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
|
||||
[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
|
||||
[SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge",
|
||||
[SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE] = "reclaim_skip_finalize",
|
||||
[SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL] = "log_merge_force_partial",
|
||||
};
|
||||
|
||||
bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
|
||||
|
||||
@@ -8,8 +8,6 @@ enum scoutfs_trigger {
|
||||
SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
|
||||
SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
|
||||
SCOUTFS_TRIGGER_STATFS_LOCK_PURGE,
|
||||
SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE,
|
||||
SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL,
|
||||
SCOUTFS_TRIGGER_NR,
|
||||
};
|
||||
|
||||
|
||||
@@ -95,7 +95,6 @@ struct wkic_info {
|
||||
/* block reading slow path */
|
||||
struct mutex roots_mutex;
|
||||
struct scoutfs_net_roots roots;
|
||||
u64 merge_input_seq;
|
||||
u64 roots_read_seq;
|
||||
ktime_t roots_expire;
|
||||
|
||||
@@ -806,79 +805,29 @@ static void free_page_list(struct super_block *sb, struct list_head *list)
|
||||
* read_seq number so that we can compare the age of the items in cached
|
||||
* pages. Only one request to refresh the roots is in progress at a
|
||||
* time. This is the slow path that's only used when the cache isn't
|
||||
* populated and the roots aren't cached.
|
||||
*
|
||||
* We read roots directly from the on-disk superblock rather than
|
||||
* requesting them from the server so that we can also read the
|
||||
* log_merge btree from the same superblock. The merge status item
|
||||
* seq tells us which finalized log trees are inputs to the current
|
||||
* merge, which is needed to correctly resolve totl delta items.
|
||||
* populated and the roots aren't cached. The root request is fast
|
||||
* enough, especially compared to the resulting item reading IO, that we
|
||||
* don't mind hiding it behind a trivial mutex.
|
||||
*/
|
||||
static int refresh_roots(struct super_block *sb, struct wkic_info *winf)
|
||||
{
|
||||
struct scoutfs_super_block *super;
|
||||
struct scoutfs_log_merge_status *stat;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
super = kmalloc(sizeof(*super), GFP_NOFS);
|
||||
if (!super)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
winf->roots = (struct scoutfs_net_roots){
|
||||
.fs_root = super->fs_root,
|
||||
.logs_root = super->logs_root,
|
||||
.srch_root = super->srch_root,
|
||||
};
|
||||
|
||||
winf->merge_input_seq = 0;
|
||||
if (super->log_merge.ref.blkno) {
|
||||
scoutfs_key_set_zeros(&key);
|
||||
key.sk_zone = SCOUTFS_LOG_MERGE_STATUS_ZONE;
|
||||
ret = scoutfs_btree_lookup(sb, &super->log_merge, &key, &iref);
|
||||
if (ret == 0) {
|
||||
if (iref.val_len == sizeof(*stat)) {
|
||||
stat = iref.val;
|
||||
winf->merge_input_seq = le64_to_cpu(stat->seq);
|
||||
} else {
|
||||
ret = -EUCLEAN;
|
||||
}
|
||||
scoutfs_btree_put_iref(&iref);
|
||||
} else if (ret == -ENOENT) {
|
||||
ret = 0;
|
||||
}
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
winf->roots_read_seq++;
|
||||
winf->roots_expire = ktime_add_ms(ktime_get_raw(), WKIC_CACHE_LIFETIME_MS);
|
||||
out:
|
||||
kfree(super);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_roots(struct super_block *sb, struct wkic_info *winf,
|
||||
struct scoutfs_net_roots *roots_ret, u64 *merge_input_seq,
|
||||
u64 *read_seq, bool force_new)
|
||||
struct scoutfs_net_roots *roots_ret, u64 *read_seq, bool force_new)
|
||||
{
|
||||
struct scoutfs_net_roots roots;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&winf->roots_mutex);
|
||||
|
||||
if (force_new || ktime_before(winf->roots_expire, ktime_get_raw())) {
|
||||
ret = refresh_roots(sb, winf);
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
winf->roots = roots;
|
||||
winf->roots_read_seq++;
|
||||
winf->roots_expire = ktime_add_ms(ktime_get_raw(), WKIC_CACHE_LIFETIME_MS);
|
||||
}
|
||||
|
||||
*roots_ret = winf->roots;
|
||||
*merge_input_seq = winf->merge_input_seq;
|
||||
*read_seq = winf->roots_read_seq;
|
||||
ret = 0;
|
||||
out:
|
||||
@@ -921,30 +870,24 @@ static int insert_read_pages(struct super_block *sb, struct wkic_info *winf,
|
||||
struct scoutfs_key end;
|
||||
struct wkic_page *wpage;
|
||||
LIST_HEAD(pages);
|
||||
u64 merge_input_seq;
|
||||
u64 read_seq = 0;
|
||||
u64 read_seq;
|
||||
int ret;
|
||||
|
||||
ret = 0;
|
||||
retry_stale:
|
||||
ret = get_roots(sb, winf, &roots, &merge_input_seq, &read_seq, ret == -ESTALE);
|
||||
ret = get_roots(sb, winf, &roots, &read_seq, ret == -ESTALE);
|
||||
if (ret < 0)
|
||||
goto check_stale;
|
||||
goto out;
|
||||
|
||||
start = *range_start;
|
||||
end = *range_end;
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, merge_input_seq, key, range_start,
|
||||
&start, &end, read_items_cb, &root);
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, key, range_start, &start, &end,
|
||||
read_items_cb, &root);
|
||||
trace_scoutfs_wkic_read_items(sb, key, &start, &end);
|
||||
check_stale:
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
|
||||
if (ret < 0) {
|
||||
if (ret == -ESTALE) {
|
||||
/* not safe to retry due to delta items, must restart clean */
|
||||
free_item_tree(&root);
|
||||
root = RB_ROOT;
|
||||
if (ret == -ESTALE)
|
||||
goto retry_stale;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@
|
||||
* - add acl support and call generic xattr->handlers for SYSTEM
|
||||
*/
|
||||
|
||||
u32 scoutfs_xattr_name_hash(const char *name, unsigned int name_len)
|
||||
static u32 xattr_name_hash(const char *name, unsigned int name_len)
|
||||
{
|
||||
return crc32c(U32_MAX, name, name_len);
|
||||
}
|
||||
@@ -65,7 +65,8 @@ static unsigned int xattr_nr_parts(struct scoutfs_xattr *xat)
|
||||
le16_to_cpu(xat->val_len));
|
||||
}
|
||||
|
||||
void scoutfs_xattr_init_key(struct scoutfs_key *key, u64 ino, u32 name_hash, u64 id)
|
||||
static void init_xattr_key(struct scoutfs_key *key, u64 ino, u32 name_hash,
|
||||
u64 id)
|
||||
{
|
||||
*key = (struct scoutfs_key) {
|
||||
.sk_zone = SCOUTFS_FS_ZONE,
|
||||
@@ -186,10 +187,10 @@ static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
|
||||
return -EINVAL;
|
||||
|
||||
if (name_len)
|
||||
name_hash = scoutfs_xattr_name_hash(name, name_len);
|
||||
name_hash = xattr_name_hash(name, name_len);
|
||||
|
||||
scoutfs_xattr_init_key(key, scoutfs_ino(inode), name_hash, id);
|
||||
scoutfs_xattr_init_key(&last, scoutfs_ino(inode), U32_MAX, U64_MAX);
|
||||
init_xattr_key(key, scoutfs_ino(inode), name_hash, id);
|
||||
init_xattr_key(&last, scoutfs_ino(inode), U32_MAX, U64_MAX);
|
||||
|
||||
for (;;) {
|
||||
ret = scoutfs_item_next(sb, key, &last, xat, xat_bytes, lock);
|
||||
@@ -334,8 +335,8 @@ static int create_xattr_items(struct inode *inode, u64 id, struct scoutfs_xattr
|
||||
int len;
|
||||
int i;
|
||||
|
||||
scoutfs_xattr_init_key(&key, scoutfs_ino(inode),
|
||||
scoutfs_xattr_name_hash(xat->name, xat->name_len), id);
|
||||
init_xattr_key(&key, scoutfs_ino(inode),
|
||||
xattr_name_hash(xat->name, xat->name_len), id);
|
||||
|
||||
for (i = 0; i < new_parts; i++) {
|
||||
key.skx_part = i;
|
||||
@@ -364,7 +365,7 @@ static int delete_xattr_items(struct inode *inode, u32 name_hash, u64 id,
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
scoutfs_xattr_init_key(&key, scoutfs_ino(inode), name_hash, id);
|
||||
init_xattr_key(&key, scoutfs_ino(inode), name_hash, id);
|
||||
|
||||
/* dirty additional existing old items */
|
||||
for (i = 1; i < nr_parts; i++) {
|
||||
@@ -406,8 +407,8 @@ static int change_xattr_items(struct inode *inode, u64 id,
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
scoutfs_xattr_init_key(&key, scoutfs_ino(inode),
|
||||
scoutfs_xattr_name_hash(xat->name, xat->name_len), id);
|
||||
init_xattr_key(&key, scoutfs_ino(inode),
|
||||
xattr_name_hash(xat->name, xat->name_len), id);
|
||||
|
||||
/* dirty existing old items */
|
||||
for (i = 0; i < old_parts; i++) {
|
||||
@@ -1223,8 +1224,8 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_xattr_init_key(&key, ino, 0, 0);
|
||||
scoutfs_xattr_init_key(&last, ino, U32_MAX, U64_MAX);
|
||||
init_xattr_key(&key, ino, 0, 0);
|
||||
init_xattr_key(&last, ino, U32_MAX, U64_MAX);
|
||||
|
||||
for (;;) {
|
||||
ret = scoutfs_item_next(sb, &key, &last, (void *)xat, bytes,
|
||||
@@ -1264,7 +1265,6 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
ret = parse_indx_key(&tag_key, xat->name, xat->name_len, ino);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
scoutfs_xattr_set_indx_key_xid(&tag_key, le64_to_cpu(key.skx_id));
|
||||
}
|
||||
|
||||
if ((tgs.totl || tgs.indx) && locked_zone != tag_key.sk_zone) {
|
||||
|
||||
@@ -10,9 +10,6 @@ struct scoutfs_xattr_prefix_tags {
|
||||
|
||||
extern const struct xattr_handler *scoutfs_xattr_handlers[];
|
||||
|
||||
u32 scoutfs_xattr_name_hash(const char *name, unsigned int name_len);
|
||||
void scoutfs_xattr_init_key(struct scoutfs_key *key, u64 ino, u32 name_hash, u64 id);
|
||||
|
||||
int scoutfs_xattr_get_locked(struct inode *inode, const char *name, void *buffer, size_t size,
|
||||
struct scoutfs_lock *lck);
|
||||
int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_len,
|
||||
|
||||
1
tests/.gitignore
vendored
1
tests/.gitignore
vendored
@@ -12,4 +12,3 @@ src/o_tmpfile_umask
|
||||
src/o_tmpfile_linkat
|
||||
src/mmap_stress
|
||||
src/mmap_validate
|
||||
src/watch_raw_inode_change
|
||||
|
||||
@@ -15,8 +15,7 @@ BIN := src/createmany \
|
||||
src/o_tmpfile_umask \
|
||||
src/o_tmpfile_linkat \
|
||||
src/mmap_stress \
|
||||
src/mmap_validate \
|
||||
src/watch_raw_inode_change
|
||||
src/mmap_validate
|
||||
|
||||
DEPS := $(wildcard src/*.d)
|
||||
|
||||
|
||||
@@ -20,6 +20,9 @@ t_filter_fs()
|
||||
# [ 2687.691366] BUG: KASAN: stack-out-of-bounds in get_reg+0x1bc/0x230
|
||||
# ...
|
||||
# [ 2687.706220] ==================================================================
|
||||
# [ 2687.707284] Disabling lock debugging due to kernel taint
|
||||
#
|
||||
# That final lock debugging message may not be included.
|
||||
#
|
||||
ignore_harmless_unwind_kasan_stack_oob()
|
||||
{
|
||||
@@ -43,6 +46,10 @@ awk '
|
||||
saved=""
|
||||
}
|
||||
( in_soob == 2 && $0 ~ /==================================================================/ ) {
|
||||
in_soob = 3
|
||||
soob_nr = NR
|
||||
}
|
||||
( in_soob == 3 && NR > soob_nr && $0 !~ /Disabling lock debugging/ ) {
|
||||
in_soob = 0
|
||||
}
|
||||
( !in_soob ) { print $0 }
|
||||
@@ -54,58 +61,6 @@ awk '
|
||||
'
|
||||
}
|
||||
|
||||
#
|
||||
# in el97+, XFS can generate a spurious lockdep circular dependency
|
||||
# warning about reclaim. Fixed upstream in e.g. v5.7-rc4-129-g6dcde60efd94
|
||||
#
|
||||
ignore_harmless_xfs_lockdep_warning()
|
||||
{
|
||||
awk '
|
||||
BEGIN {
|
||||
in_block = 0
|
||||
block_nr = 0
|
||||
buf = ""
|
||||
}
|
||||
( !in_block && $0 ~ /======================================================/ ) {
|
||||
in_block = 1
|
||||
block_nr = NR
|
||||
buf = $0 "\n"
|
||||
next
|
||||
}
|
||||
( in_block == 1 && NR == (block_nr + 1) ) {
|
||||
if (match($0, /WARNING: possible circular locking dependency detected/) != 0) {
|
||||
in_block = 2
|
||||
buf = buf $0 "\n"
|
||||
} else {
|
||||
in_block = 0
|
||||
printf "%s", buf
|
||||
print $0
|
||||
buf = ""
|
||||
}
|
||||
next
|
||||
}
|
||||
( in_block == 2 ) {
|
||||
buf = buf $0 "\n"
|
||||
if ($0 ~ /<\/TASK>/) {
|
||||
if (buf ~ /xfs_(nondir_|dir_)?ilock_class/ && buf ~ /fs_reclaim/) {
|
||||
# known xfs lockdep false positive, discard
|
||||
} else {
|
||||
printf "%s", buf
|
||||
}
|
||||
in_block = 0
|
||||
buf = ""
|
||||
}
|
||||
next
|
||||
}
|
||||
{ print $0 }
|
||||
END {
|
||||
if (buf) {
|
||||
printf "%s", buf
|
||||
}
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
#
|
||||
# Filter out expected messages. Putting messages here implies that
|
||||
# tests aren't relying on messages to discover failures.. they're
|
||||
@@ -168,9 +123,6 @@ t_filter_dmesg()
|
||||
re="$re|hrtimer: interrupt took .*"
|
||||
re="$re|clocksource: Long readout interval"
|
||||
|
||||
# orphan log trees reclaim is handled, not an error
|
||||
re="$re|scoutfs .* reclaiming orphan log trees"
|
||||
|
||||
# fencing tests force unmounts and trigger timeouts
|
||||
re="$re|scoutfs .* forcing unmount"
|
||||
re="$re|scoutfs .* reconnect timed out"
|
||||
@@ -221,10 +173,6 @@ t_filter_dmesg()
|
||||
# creating block devices may trigger this
|
||||
re="$re|block device autoloading is deprecated and will be removed."
|
||||
|
||||
# lockdep or kasan warnings can cause this
|
||||
re="$re|Disabling lock debugging due to kernel taint"
|
||||
|
||||
egrep -v "($re)" | \
|
||||
ignore_harmless_unwind_kasan_stack_oob | \
|
||||
ignore_harmless_xfs_lockdep_warning
|
||||
ignore_harmless_unwind_kasan_stack_oob
|
||||
}
|
||||
|
||||
@@ -1,54 +0,0 @@
|
||||
== testing invalid read-xattr-index arguments
|
||||
bad index position entry argument 'bad', it must be in the form "a.b.ino" where each value can be prefixed by '0' for octal or '0x' for hex
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
bad index position entry argument '1.2', it must be in the form "a.b.ino" where each value can be prefixed by '0' for octal or '0x' for hex
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
initial major index position '256' must be between 0 and 255, inclusive.
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
first index position 1.2.3 must be less than last index position 0.0.0
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
first index position 1.2.0 must be less than last index position 1.1.2
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
first index position 2.2.2 must be less than last index position 2.2.1
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
== testing invalid names
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Numerical result out of range
|
||||
== testing boundary values
|
||||
0.0 found
|
||||
255.max found
|
||||
== indx xattr must have no value
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/noval: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/noval: Invalid argument
|
||||
== set indx xattr and verify index entry
|
||||
found
|
||||
== setting same indx xattr again is a no-op
|
||||
found
|
||||
== removing non-existent indx xattr succeeds
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/file: No such attribute
|
||||
still found
|
||||
== explicit xattr removal cleans up index entry
|
||||
== file deletion cleans up index entry
|
||||
found before delete
|
||||
== multiple indx xattrs on one file cleaned up by deletion
|
||||
entries before delete: 2
|
||||
entries after delete: 0
|
||||
== partial removal leaves other entries
|
||||
300 found
|
||||
== multiple files at same index position
|
||||
files at same position: 2
|
||||
surviving file found
|
||||
== cross-mount visibility
|
||||
found on mount 1
|
||||
== duplicate position deduplication
|
||||
entries for same position: 1
|
||||
@@ -1,3 +0,0 @@
|
||||
== create orphan log_trees entry via trigger
|
||||
== verify orphan is reclaimed and merge completes
|
||||
== verify orphan reclaim was logged
|
||||
@@ -1,460 +0,0 @@
|
||||
== missing options should fail ==
|
||||
punch-offline: must provide offset
|
||||
Try `punch-offline --help' or `punch-offline --usage' for more information.
|
||||
punch-offline: must provide length
|
||||
Try `punch-offline --help' or `punch-offline --usage' for more information.
|
||||
punch-offline: must provide data_version
|
||||
Try `punch-offline --help' or `punch-offline --usage' for more information.
|
||||
== can't hole punch dir or special ==
|
||||
failed to open '/mnt/test.0/test/punch-offline/dir': Is a directory (21)
|
||||
scoutfs: punch-offline failed: Is a directory (21)
|
||||
== punching an empty file does nothing ==
|
||||
== punch outside of i_size does nothing ==
|
||||
== can't hole punch online extent ==
|
||||
0: offset: 0 length: 4096 flags: ..L
|
||||
extents: 1
|
||||
punch_offline ioctl failed: Invalid argument (22)
|
||||
scoutfs: punch-offline failed: Invalid argument (22)
|
||||
0: offset: 0 length: 4096 flags: ..L
|
||||
extents: 1
|
||||
== can't hole punch unwritten extent ==
|
||||
0: offset: 0 length: 12288 flags: .UL
|
||||
extents: 1
|
||||
punch_offline ioctl failed: Invalid argument (22)
|
||||
scoutfs: punch-offline failed: Invalid argument (22)
|
||||
0: offset: 0 length: 12288 flags: .UL
|
||||
extents: 1
|
||||
== hole punch offline extent ==
|
||||
0: offset: 0 length: 12288 flags: O.L
|
||||
extents: 1
|
||||
0: offset: 0 length: 4096 flags: O..
|
||||
1: offset: 8192 length: 4096 flags: O.L
|
||||
extents: 2
|
||||
== can't hole punch non-aligned bsz offset or len ==
|
||||
0: offset: 0 length: 12288 flags: O.L
|
||||
extents: 1
|
||||
punch_offline ioctl failed: Value too large for defined data type (75)
|
||||
scoutfs: punch-offline failed: Value too large for defined data type (75)
|
||||
punch_offline ioctl failed: Value too large for defined data type (75)
|
||||
scoutfs: punch-offline failed: Value too large for defined data type (75)
|
||||
punch_offline ioctl failed: Value too large for defined data type (75)
|
||||
scoutfs: punch-offline failed: Value too large for defined data type (75)
|
||||
punch_offline ioctl failed: Value too large for defined data type (75)
|
||||
scoutfs: punch-offline failed: Value too large for defined data type (75)
|
||||
punch_offline ioctl failed: Value too large for defined data type (75)
|
||||
scoutfs: punch-offline failed: Value too large for defined data type (75)
|
||||
punch_offline ioctl failed: Value too large for defined data type (75)
|
||||
scoutfs: punch-offline failed: Value too large for defined data type (75)
|
||||
0: offset: 0 length: 12288 flags: O.L
|
||||
extents: 1
|
||||
== can't hole punch mismatched data_version ==
|
||||
0: offset: 0 length: 12288 flags: O.L
|
||||
extents: 1
|
||||
punch_offline ioctl failed: Stale file handle (116)
|
||||
scoutfs: punch-offline failed: Stale file handle (116)
|
||||
punch_offline ioctl failed: Stale file handle (116)
|
||||
scoutfs: punch-offline failed: Stale file handle (116)
|
||||
punch_offline ioctl failed: Stale file handle (116)
|
||||
scoutfs: punch-offline failed: Stale file handle (116)
|
||||
0: offset: 0 length: 12288 flags: O.L
|
||||
extents: 1
|
||||
== Punch hole crossing multiple extents ==
|
||||
0: offset: 0 length: 7 flags: O.L
|
||||
extents: 1
|
||||
0: offset: 0 length: 1 flags: O..
|
||||
1: offset: 2 length: 1 flags: O..
|
||||
2: offset: 4 length: 1 flags: O..
|
||||
3: offset: 6 length: 1 flags: O.L
|
||||
extents: 4
|
||||
0: offset: 0 length: 1 flags: O..
|
||||
1: offset: 6 length: 1 flags: O.L
|
||||
extents: 2
|
||||
== punch hole starting at a hole ==
|
||||
0: offset: 0 length: 7 flags: O.L
|
||||
extents: 1
|
||||
0: offset: 0 length: 1 flags: O..
|
||||
1: offset: 2 length: 1 flags: O..
|
||||
2: offset: 4 length: 1 flags: O..
|
||||
3: offset: 6 length: 1 flags: O.L
|
||||
extents: 4
|
||||
0: offset: 0 length: 1 flags: O..
|
||||
1: offset: 6 length: 1 flags: O.L
|
||||
extents: 2
|
||||
== large punch ==
|
||||
0: offset: 0 length: 1572864 flags: O.L
|
||||
extents: 1
|
||||
0: offset: 0 length: 134123 flags: O..
|
||||
1: offset: 202466 length: 264807 flags: O..
|
||||
2: offset: 535616 length: 199007 flags: O..
|
||||
3: offset: 802966 length: 769898 flags: O.L
|
||||
extents: 4
|
||||
== overlapping punches with lots of extents ==
|
||||
0: offset: 0 length: 4194304 flags: O.L
|
||||
extents: 1
|
||||
extents: 512
|
||||
extents: 505
|
||||
extents: 378
|
||||
extents: 252
|
||||
0: offset: 0 length: 4096 flags: O..
|
||||
1: offset: 8192 length: 4096 flags: O..
|
||||
2: offset: 32768 length: 4096 flags: O..
|
||||
3: offset: 40960 length: 4096 flags: O..
|
||||
4: offset: 65536 length: 4096 flags: O..
|
||||
5: offset: 73728 length: 4096 flags: O..
|
||||
6: offset: 98304 length: 4096 flags: O..
|
||||
7: offset: 106496 length: 4096 flags: O..
|
||||
8: offset: 196608 length: 4096 flags: O..
|
||||
9: offset: 204800 length: 4096 flags: O..
|
||||
10: offset: 229376 length: 4096 flags: O..
|
||||
11: offset: 237568 length: 4096 flags: O..
|
||||
12: offset: 262144 length: 4096 flags: O..
|
||||
13: offset: 270336 length: 4096 flags: O..
|
||||
14: offset: 294912 length: 4096 flags: O..
|
||||
15: offset: 303104 length: 4096 flags: O..
|
||||
16: offset: 327680 length: 4096 flags: O..
|
||||
17: offset: 335872 length: 4096 flags: O..
|
||||
18: offset: 360448 length: 4096 flags: O..
|
||||
19: offset: 368640 length: 4096 flags: O..
|
||||
20: offset: 393216 length: 4096 flags: O..
|
||||
21: offset: 401408 length: 4096 flags: O..
|
||||
22: offset: 425984 length: 4096 flags: O..
|
||||
23: offset: 434176 length: 4096 flags: O..
|
||||
24: offset: 458752 length: 4096 flags: O..
|
||||
25: offset: 466944 length: 4096 flags: O..
|
||||
26: offset: 491520 length: 4096 flags: O..
|
||||
27: offset: 499712 length: 4096 flags: O..
|
||||
28: offset: 720896 length: 4096 flags: O..
|
||||
29: offset: 729088 length: 4096 flags: O..
|
||||
30: offset: 753664 length: 4096 flags: O..
|
||||
31: offset: 761856 length: 4096 flags: O..
|
||||
32: offset: 786432 length: 4096 flags: O..
|
||||
33: offset: 794624 length: 4096 flags: O..
|
||||
34: offset: 819200 length: 4096 flags: O..
|
||||
35: offset: 827392 length: 4096 flags: O..
|
||||
36: offset: 851968 length: 4096 flags: O..
|
||||
37: offset: 860160 length: 4096 flags: O..
|
||||
38: offset: 884736 length: 4096 flags: O..
|
||||
39: offset: 892928 length: 4096 flags: O..
|
||||
40: offset: 917504 length: 4096 flags: O..
|
||||
41: offset: 925696 length: 4096 flags: O..
|
||||
42: offset: 950272 length: 4096 flags: O..
|
||||
43: offset: 958464 length: 4096 flags: O..
|
||||
44: offset: 983040 length: 4096 flags: O..
|
||||
45: offset: 991232 length: 4096 flags: O..
|
||||
46: offset: 1015808 length: 4096 flags: O..
|
||||
47: offset: 1024000 length: 4096 flags: O..
|
||||
48: offset: 1048576 length: 4096 flags: O..
|
||||
49: offset: 1056768 length: 4096 flags: O..
|
||||
50: offset: 1081344 length: 4096 flags: O..
|
||||
51: offset: 1089536 length: 4096 flags: O..
|
||||
52: offset: 1114112 length: 4096 flags: O..
|
||||
53: offset: 1122304 length: 4096 flags: O..
|
||||
54: offset: 1146880 length: 4096 flags: O..
|
||||
55: offset: 1155072 length: 4096 flags: O..
|
||||
56: offset: 1179648 length: 4096 flags: O..
|
||||
57: offset: 1187840 length: 4096 flags: O..
|
||||
58: offset: 1212416 length: 4096 flags: O..
|
||||
59: offset: 1220608 length: 4096 flags: O..
|
||||
60: offset: 1245184 length: 4096 flags: O..
|
||||
61: offset: 1253376 length: 4096 flags: O..
|
||||
62: offset: 1277952 length: 4096 flags: O..
|
||||
63: offset: 1286144 length: 4096 flags: O..
|
||||
64: offset: 1310720 length: 4096 flags: O..
|
||||
65: offset: 1318912 length: 4096 flags: O..
|
||||
66: offset: 1343488 length: 4096 flags: O..
|
||||
67: offset: 1351680 length: 4096 flags: O..
|
||||
68: offset: 1376256 length: 4096 flags: O..
|
||||
69: offset: 1384448 length: 4096 flags: O..
|
||||
70: offset: 1409024 length: 4096 flags: O..
|
||||
71: offset: 1417216 length: 4096 flags: O..
|
||||
72: offset: 1441792 length: 4096 flags: O..
|
||||
73: offset: 1449984 length: 4096 flags: O..
|
||||
74: offset: 1474560 length: 4096 flags: O..
|
||||
75: offset: 1482752 length: 4096 flags: O..
|
||||
76: offset: 1507328 length: 4096 flags: O..
|
||||
77: offset: 1515520 length: 4096 flags: O..
|
||||
78: offset: 1540096 length: 4096 flags: O..
|
||||
79: offset: 1548288 length: 4096 flags: O..
|
||||
80: offset: 1572864 length: 4096 flags: O..
|
||||
81: offset: 1581056 length: 4096 flags: O..
|
||||
82: offset: 1605632 length: 4096 flags: O..
|
||||
83: offset: 1613824 length: 4096 flags: O..
|
||||
84: offset: 1638400 length: 4096 flags: O..
|
||||
85: offset: 1646592 length: 4096 flags: O..
|
||||
86: offset: 1671168 length: 4096 flags: O..
|
||||
87: offset: 1679360 length: 4096 flags: O..
|
||||
88: offset: 1703936 length: 4096 flags: O..
|
||||
89: offset: 1712128 length: 4096 flags: O..
|
||||
90: offset: 1736704 length: 4096 flags: O..
|
||||
91: offset: 1744896 length: 4096 flags: O..
|
||||
92: offset: 1769472 length: 4096 flags: O..
|
||||
93: offset: 1777664 length: 4096 flags: O..
|
||||
94: offset: 1802240 length: 4096 flags: O..
|
||||
95: offset: 1810432 length: 4096 flags: O..
|
||||
96: offset: 1835008 length: 4096 flags: O..
|
||||
97: offset: 1843200 length: 4096 flags: O..
|
||||
98: offset: 1867776 length: 4096 flags: O..
|
||||
99: offset: 1875968 length: 4096 flags: O..
|
||||
100: offset: 1900544 length: 4096 flags: O..
|
||||
101: offset: 1908736 length: 4096 flags: O..
|
||||
102: offset: 1933312 length: 4096 flags: O..
|
||||
103: offset: 1941504 length: 4096 flags: O..
|
||||
104: offset: 1966080 length: 4096 flags: O..
|
||||
105: offset: 1974272 length: 4096 flags: O..
|
||||
106: offset: 1998848 length: 4096 flags: O..
|
||||
107: offset: 2007040 length: 4096 flags: O..
|
||||
108: offset: 2031616 length: 4096 flags: O..
|
||||
109: offset: 2039808 length: 4096 flags: O..
|
||||
110: offset: 2064384 length: 4096 flags: O..
|
||||
111: offset: 2072576 length: 4096 flags: O..
|
||||
112: offset: 2097152 length: 4096 flags: O..
|
||||
113: offset: 2105344 length: 4096 flags: O..
|
||||
114: offset: 2129920 length: 4096 flags: O..
|
||||
115: offset: 2138112 length: 4096 flags: O..
|
||||
116: offset: 2162688 length: 4096 flags: O..
|
||||
117: offset: 2170880 length: 4096 flags: O..
|
||||
118: offset: 2195456 length: 4096 flags: O..
|
||||
119: offset: 2203648 length: 4096 flags: O..
|
||||
120: offset: 2228224 length: 4096 flags: O..
|
||||
121: offset: 2236416 length: 4096 flags: O..
|
||||
122: offset: 2260992 length: 4096 flags: O..
|
||||
123: offset: 2269184 length: 4096 flags: O..
|
||||
124: offset: 2293760 length: 4096 flags: O..
|
||||
125: offset: 2301952 length: 4096 flags: O..
|
||||
126: offset: 2326528 length: 4096 flags: O..
|
||||
127: offset: 2334720 length: 4096 flags: O..
|
||||
128: offset: 2359296 length: 4096 flags: O..
|
||||
129: offset: 2367488 length: 4096 flags: O..
|
||||
130: offset: 2392064 length: 4096 flags: O..
|
||||
131: offset: 2400256 length: 4096 flags: O..
|
||||
132: offset: 2424832 length: 4096 flags: O..
|
||||
133: offset: 2433024 length: 4096 flags: O..
|
||||
134: offset: 2457600 length: 4096 flags: O..
|
||||
135: offset: 2465792 length: 4096 flags: O..
|
||||
136: offset: 2490368 length: 4096 flags: O..
|
||||
137: offset: 2498560 length: 4096 flags: O..
|
||||
138: offset: 2523136 length: 4096 flags: O..
|
||||
139: offset: 2531328 length: 4096 flags: O..
|
||||
140: offset: 2555904 length: 4096 flags: O..
|
||||
141: offset: 2564096 length: 4096 flags: O..
|
||||
142: offset: 2588672 length: 4096 flags: O..
|
||||
143: offset: 2596864 length: 4096 flags: O..
|
||||
144: offset: 2621440 length: 4096 flags: O..
|
||||
145: offset: 2629632 length: 4096 flags: O..
|
||||
146: offset: 2654208 length: 4096 flags: O..
|
||||
147: offset: 2662400 length: 4096 flags: O..
|
||||
148: offset: 2686976 length: 4096 flags: O..
|
||||
149: offset: 2695168 length: 4096 flags: O..
|
||||
150: offset: 2719744 length: 4096 flags: O..
|
||||
151: offset: 2727936 length: 4096 flags: O..
|
||||
152: offset: 2752512 length: 4096 flags: O..
|
||||
153: offset: 2760704 length: 4096 flags: O..
|
||||
154: offset: 2785280 length: 4096 flags: O..
|
||||
155: offset: 2793472 length: 4096 flags: O..
|
||||
156: offset: 2818048 length: 4096 flags: O..
|
||||
157: offset: 2826240 length: 4096 flags: O..
|
||||
158: offset: 2850816 length: 4096 flags: O..
|
||||
159: offset: 2859008 length: 4096 flags: O..
|
||||
160: offset: 2883584 length: 4096 flags: O..
|
||||
161: offset: 2891776 length: 4096 flags: O..
|
||||
162: offset: 2916352 length: 4096 flags: O..
|
||||
163: offset: 2924544 length: 4096 flags: O..
|
||||
164: offset: 2949120 length: 4096 flags: O..
|
||||
165: offset: 2957312 length: 4096 flags: O..
|
||||
166: offset: 2981888 length: 4096 flags: O..
|
||||
167: offset: 2990080 length: 4096 flags: O..
|
||||
168: offset: 3014656 length: 4096 flags: O..
|
||||
169: offset: 3022848 length: 4096 flags: O..
|
||||
170: offset: 3047424 length: 4096 flags: O..
|
||||
171: offset: 3055616 length: 4096 flags: O..
|
||||
172: offset: 3080192 length: 4096 flags: O..
|
||||
173: offset: 3088384 length: 4096 flags: O..
|
||||
174: offset: 3112960 length: 4096 flags: O..
|
||||
175: offset: 3121152 length: 4096 flags: O..
|
||||
176: offset: 3145728 length: 4096 flags: O..
|
||||
177: offset: 3153920 length: 4096 flags: O..
|
||||
178: offset: 3178496 length: 4096 flags: O..
|
||||
179: offset: 3186688 length: 4096 flags: O..
|
||||
180: offset: 3211264 length: 4096 flags: O..
|
||||
181: offset: 3219456 length: 4096 flags: O..
|
||||
182: offset: 3244032 length: 4096 flags: O..
|
||||
183: offset: 3252224 length: 4096 flags: O..
|
||||
184: offset: 3276800 length: 4096 flags: O..
|
||||
185: offset: 3284992 length: 4096 flags: O..
|
||||
186: offset: 3309568 length: 4096 flags: O..
|
||||
187: offset: 3317760 length: 4096 flags: O..
|
||||
188: offset: 3342336 length: 4096 flags: O..
|
||||
189: offset: 3350528 length: 4096 flags: O..
|
||||
190: offset: 3375104 length: 4096 flags: O..
|
||||
191: offset: 3383296 length: 4096 flags: O..
|
||||
192: offset: 3407872 length: 4096 flags: O..
|
||||
193: offset: 3416064 length: 4096 flags: O..
|
||||
194: offset: 3440640 length: 4096 flags: O..
|
||||
195: offset: 3448832 length: 4096 flags: O..
|
||||
196: offset: 3473408 length: 4096 flags: O..
|
||||
197: offset: 3481600 length: 4096 flags: O..
|
||||
198: offset: 3506176 length: 4096 flags: O..
|
||||
199: offset: 3514368 length: 4096 flags: O..
|
||||
200: offset: 3538944 length: 4096 flags: O..
|
||||
201: offset: 3547136 length: 4096 flags: O..
|
||||
202: offset: 3571712 length: 4096 flags: O..
|
||||
203: offset: 3579904 length: 4096 flags: O..
|
||||
204: offset: 3604480 length: 4096 flags: O..
|
||||
205: offset: 3612672 length: 4096 flags: O..
|
||||
206: offset: 3637248 length: 4096 flags: O..
|
||||
207: offset: 3645440 length: 4096 flags: O..
|
||||
208: offset: 3670016 length: 4096 flags: O..
|
||||
209: offset: 3678208 length: 4096 flags: O..
|
||||
210: offset: 3702784 length: 4096 flags: O..
|
||||
211: offset: 3710976 length: 4096 flags: O..
|
||||
212: offset: 3735552 length: 4096 flags: O..
|
||||
213: offset: 3743744 length: 4096 flags: O..
|
||||
214: offset: 3768320 length: 4096 flags: O..
|
||||
215: offset: 3776512 length: 4096 flags: O..
|
||||
216: offset: 3801088 length: 4096 flags: O..
|
||||
217: offset: 3809280 length: 4096 flags: O..
|
||||
218: offset: 3833856 length: 4096 flags: O..
|
||||
219: offset: 3842048 length: 4096 flags: O..
|
||||
220: offset: 3866624 length: 4096 flags: O..
|
||||
221: offset: 3874816 length: 4096 flags: O..
|
||||
222: offset: 3899392 length: 4096 flags: O..
|
||||
223: offset: 3907584 length: 4096 flags: O..
|
||||
224: offset: 3932160 length: 4096 flags: O..
|
||||
225: offset: 3940352 length: 4096 flags: O..
|
||||
226: offset: 3964928 length: 4096 flags: O..
|
||||
227: offset: 3973120 length: 4096 flags: O..
|
||||
228: offset: 3997696 length: 4096 flags: O..
|
||||
229: offset: 4005888 length: 4096 flags: O..
|
||||
230: offset: 4030464 length: 4096 flags: O..
|
||||
231: offset: 4038656 length: 4096 flags: O..
|
||||
232: offset: 4063232 length: 4096 flags: O..
|
||||
233: offset: 4071424 length: 4096 flags: O..
|
||||
234: offset: 4096000 length: 4096 flags: O..
|
||||
235: offset: 4104192 length: 4096 flags: O..
|
||||
236: offset: 4128768 length: 4096 flags: O..
|
||||
237: offset: 4136960 length: 4096 flags: O..
|
||||
238: offset: 4161536 length: 4096 flags: O..
|
||||
239: offset: 4169728 length: 4096 flags: O.L
|
||||
extents: 240
|
||||
0: offset: 0 length: 1 flags: O..
|
||||
1: offset: 8 length: 1 flags: O..
|
||||
2: offset: 16 length: 1 flags: O..
|
||||
3: offset: 24 length: 1 flags: O..
|
||||
4: offset: 48 length: 1 flags: O..
|
||||
5: offset: 56 length: 1 flags: O..
|
||||
6: offset: 64 length: 1 flags: O..
|
||||
7: offset: 72 length: 1 flags: O..
|
||||
8: offset: 80 length: 1 flags: O..
|
||||
9: offset: 88 length: 1 flags: O..
|
||||
10: offset: 96 length: 1 flags: O..
|
||||
11: offset: 104 length: 1 flags: O..
|
||||
12: offset: 112 length: 1 flags: O..
|
||||
13: offset: 120 length: 1 flags: O..
|
||||
14: offset: 176 length: 1 flags: O..
|
||||
15: offset: 184 length: 1 flags: O..
|
||||
16: offset: 192 length: 1 flags: O..
|
||||
17: offset: 200 length: 1 flags: O..
|
||||
18: offset: 208 length: 1 flags: O..
|
||||
19: offset: 216 length: 1 flags: O..
|
||||
20: offset: 224 length: 1 flags: O..
|
||||
21: offset: 232 length: 1 flags: O..
|
||||
22: offset: 240 length: 1 flags: O..
|
||||
23: offset: 248 length: 1 flags: O..
|
||||
24: offset: 256 length: 1 flags: O..
|
||||
25: offset: 264 length: 1 flags: O..
|
||||
26: offset: 272 length: 1 flags: O..
|
||||
27: offset: 280 length: 1 flags: O..
|
||||
28: offset: 288 length: 1 flags: O..
|
||||
29: offset: 296 length: 1 flags: O..
|
||||
30: offset: 304 length: 1 flags: O..
|
||||
31: offset: 312 length: 1 flags: O..
|
||||
32: offset: 320 length: 1 flags: O..
|
||||
33: offset: 328 length: 1 flags: O..
|
||||
34: offset: 336 length: 1 flags: O..
|
||||
35: offset: 344 length: 1 flags: O..
|
||||
36: offset: 352 length: 1 flags: O..
|
||||
37: offset: 360 length: 1 flags: O..
|
||||
38: offset: 368 length: 1 flags: O..
|
||||
39: offset: 376 length: 1 flags: O..
|
||||
40: offset: 384 length: 1 flags: O..
|
||||
41: offset: 392 length: 1 flags: O..
|
||||
42: offset: 400 length: 1 flags: O..
|
||||
43: offset: 408 length: 1 flags: O..
|
||||
44: offset: 416 length: 1 flags: O..
|
||||
45: offset: 424 length: 1 flags: O..
|
||||
46: offset: 432 length: 1 flags: O..
|
||||
47: offset: 440 length: 1 flags: O..
|
||||
48: offset: 448 length: 1 flags: O..
|
||||
49: offset: 456 length: 1 flags: O..
|
||||
50: offset: 464 length: 1 flags: O..
|
||||
51: offset: 472 length: 1 flags: O..
|
||||
52: offset: 480 length: 1 flags: O..
|
||||
53: offset: 488 length: 1 flags: O..
|
||||
54: offset: 496 length: 1 flags: O..
|
||||
55: offset: 504 length: 1 flags: O..
|
||||
56: offset: 512 length: 1 flags: O..
|
||||
57: offset: 520 length: 1 flags: O..
|
||||
58: offset: 528 length: 1 flags: O..
|
||||
59: offset: 536 length: 1 flags: O..
|
||||
60: offset: 544 length: 1 flags: O..
|
||||
61: offset: 552 length: 1 flags: O..
|
||||
62: offset: 560 length: 1 flags: O..
|
||||
63: offset: 568 length: 1 flags: O..
|
||||
64: offset: 576 length: 1 flags: O..
|
||||
65: offset: 584 length: 1 flags: O..
|
||||
66: offset: 592 length: 1 flags: O..
|
||||
67: offset: 600 length: 1 flags: O..
|
||||
68: offset: 608 length: 1 flags: O..
|
||||
69: offset: 616 length: 1 flags: O..
|
||||
70: offset: 624 length: 1 flags: O..
|
||||
71: offset: 632 length: 1 flags: O..
|
||||
72: offset: 640 length: 1 flags: O..
|
||||
73: offset: 648 length: 1 flags: O..
|
||||
74: offset: 656 length: 1 flags: O..
|
||||
75: offset: 664 length: 1 flags: O..
|
||||
76: offset: 672 length: 1 flags: O..
|
||||
77: offset: 680 length: 1 flags: O..
|
||||
78: offset: 688 length: 1 flags: O..
|
||||
79: offset: 696 length: 1 flags: O..
|
||||
80: offset: 704 length: 1 flags: O..
|
||||
81: offset: 712 length: 1 flags: O..
|
||||
82: offset: 720 length: 1 flags: O..
|
||||
83: offset: 728 length: 1 flags: O..
|
||||
84: offset: 736 length: 1 flags: O..
|
||||
85: offset: 744 length: 1 flags: O..
|
||||
86: offset: 752 length: 1 flags: O..
|
||||
87: offset: 760 length: 1 flags: O..
|
||||
88: offset: 768 length: 1 flags: O..
|
||||
89: offset: 776 length: 1 flags: O..
|
||||
90: offset: 784 length: 1 flags: O..
|
||||
91: offset: 792 length: 1 flags: O..
|
||||
92: offset: 800 length: 1 flags: O..
|
||||
93: offset: 808 length: 1 flags: O..
|
||||
94: offset: 816 length: 1 flags: O..
|
||||
95: offset: 824 length: 1 flags: O..
|
||||
96: offset: 832 length: 1 flags: O..
|
||||
97: offset: 840 length: 1 flags: O..
|
||||
98: offset: 848 length: 1 flags: O..
|
||||
99: offset: 856 length: 1 flags: O..
|
||||
100: offset: 864 length: 1 flags: O..
|
||||
101: offset: 872 length: 1 flags: O..
|
||||
102: offset: 880 length: 1 flags: O..
|
||||
103: offset: 888 length: 1 flags: O..
|
||||
104: offset: 896 length: 1 flags: O..
|
||||
105: offset: 904 length: 1 flags: O..
|
||||
106: offset: 912 length: 1 flags: O..
|
||||
107: offset: 920 length: 1 flags: O..
|
||||
108: offset: 928 length: 1 flags: O..
|
||||
109: offset: 936 length: 1 flags: O..
|
||||
110: offset: 944 length: 1 flags: O..
|
||||
111: offset: 952 length: 1 flags: O..
|
||||
112: offset: 960 length: 1 flags: O..
|
||||
113: offset: 968 length: 1 flags: O..
|
||||
114: offset: 976 length: 1 flags: O..
|
||||
115: offset: 984 length: 1 flags: O..
|
||||
116: offset: 992 length: 1 flags: O..
|
||||
117: offset: 1000 length: 1 flags: O..
|
||||
118: offset: 1008 length: 1 flags: O..
|
||||
119: offset: 1016 length: 1 flags: O.L
|
||||
extents: 120
|
||||
extents: 0
|
||||
@@ -1,3 +0,0 @@
|
||||
== setup
|
||||
expected 4681
|
||||
== cleanup
|
||||
@@ -694,8 +694,8 @@ for t in $tests; do
|
||||
if [ "$sts" == "$T_PASS_STATUS" ]; then
|
||||
dmesg | t_filter_dmesg > "$T_TMPDIR/dmesg.after"
|
||||
diff --old-line-format="" --unchanged-line-format="" \
|
||||
"$T_TMPDIR/dmesg.before" "$T_TMPDIR/dmesg.after" | \
|
||||
grep -v '^$' > "$T_TMPDIR/dmesg.new"
|
||||
"$T_TMPDIR/dmesg.before" "$T_TMPDIR/dmesg.after" > \
|
||||
"$T_TMPDIR/dmesg.new"
|
||||
|
||||
if [ -s "$T_TMPDIR/dmesg.new" ]; then
|
||||
message="unexpected messages in dmesg"
|
||||
|
||||
@@ -11,7 +11,6 @@ simple-readdir.sh
|
||||
get-referring-entries.sh
|
||||
fallocate.sh
|
||||
basic-truncate.sh
|
||||
punch-offline.sh
|
||||
data-prealloc.sh
|
||||
setattr_more.sh
|
||||
offline-extent-waiting.sh
|
||||
@@ -26,9 +25,7 @@ srch-basic-functionality.sh
|
||||
simple-xattr-unit.sh
|
||||
retention-basic.sh
|
||||
totl-xattr-tag.sh
|
||||
basic-xattr-indx.sh
|
||||
quota.sh
|
||||
totl-merge-read.sh
|
||||
lock-refleak.sh
|
||||
lock-shrink-consistency.sh
|
||||
lock-shrink-read-race.sh
|
||||
@@ -52,7 +49,6 @@ setup-error-teardown.sh
|
||||
resize-devices.sh
|
||||
change-devices.sh
|
||||
fence-and-reclaim.sh
|
||||
orphan-log-trees.sh
|
||||
quorum-heartbeat-timeout.sh
|
||||
orphan-inodes.sh
|
||||
mount-unmount-race.sh
|
||||
|
||||
@@ -1,664 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <time.h>
|
||||
#include <linux/types.h>
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "../../utils/src/util.h"
|
||||
#include "ioctl.h"
|
||||
#include "format.h"
|
||||
|
||||
/*
|
||||
* This is a quick example of using the raw reading ioctls to get info
|
||||
* on inodes as they change. We maintain an array of meta_seq items for
|
||||
* inodes that we've seen. If we read the current meta_seq items and
|
||||
* see differences then we get inode info and update our array with what
|
||||
* we find.
|
||||
*
|
||||
* This only maintains one array and sorts it back and forth as we walk
|
||||
* the meta_seq items and then search by inode number. This will
|
||||
* eventually use far too much cpu as the number of inodes increases.
|
||||
*/
|
||||
|
||||
#define MSF "%llu.%llu"
|
||||
#define MSA(ms) (ms)->meta_seq, (ms)->ino
|
||||
#define NERRF "nerr %d (\"%s\")"
|
||||
#define NERRA(nerr) nerr, strerror(-nerr)
|
||||
|
||||
#define prerror(fmt, args...) \
|
||||
fprintf(stderr, "error: "fmt"\n", ##args)
|
||||
|
||||
#define prdebug(fmt, args...) \
|
||||
do { \
|
||||
if (opts.debug) \
|
||||
printf(fmt"\n", ##args); \
|
||||
} while (0)
|
||||
|
||||
static struct opts {
|
||||
bool debug;
|
||||
char *path;
|
||||
char *names;
|
||||
size_t names_size;
|
||||
size_t names_count;
|
||||
} opts;
|
||||
|
||||
struct stats {
|
||||
__u64 start;
|
||||
__u64 last;
|
||||
|
||||
struct per_call {
|
||||
__u64 begin;
|
||||
__u64 calls;
|
||||
__u64 time;
|
||||
__u64 inos;
|
||||
} rms, rii;
|
||||
|
||||
__u64 inodes;
|
||||
__u64 add;
|
||||
__u64 remove;
|
||||
__u64 update;
|
||||
|
||||
unsigned lines;
|
||||
} stats;
|
||||
|
||||
struct meta_seq_array {
|
||||
size_t nr;
|
||||
size_t alloc;
|
||||
struct scoutfs_ioctl_meta_seq *ms;
|
||||
};
|
||||
|
||||
#define INO_BATCH 1000
|
||||
/* *2 for gratuitous allowance for struct expansion */
|
||||
#define RESULTS_SIZE (INO_BATCH * 2 * (sizeof(struct scoutfs_ioctl_raw_read_result) + \
|
||||
sizeof(__u64) + \
|
||||
180 /* ~= sizeof(struct scoutfs_inode) */ + \
|
||||
sizeof(struct scoutfs_ioctl_inode_attr_x)))
|
||||
|
||||
#define NSEC_PER_SEC 1000000000
|
||||
|
||||
static __u64 get_ns(void)
|
||||
{
|
||||
struct timespec tp;
|
||||
int ret;
|
||||
|
||||
ret = clock_gettime(CLOCK_MONOTONIC, &tp);
|
||||
if (ret != 0) {
|
||||
ret = -errno;
|
||||
prerror("clock_gettime() error: "NERRF, NERRA(ret));
|
||||
exit(2);
|
||||
}
|
||||
|
||||
return ((__u64)tp.tv_sec * NSEC_PER_SEC) + (__u64)tp.tv_nsec;
|
||||
}
|
||||
static void begin_call(struct per_call *pc)
|
||||
{
|
||||
pc->begin = get_ns();
|
||||
}
|
||||
|
||||
static void end_call(struct per_call *pc)
|
||||
{
|
||||
pc->calls++;
|
||||
pc->time += get_ns() - pc->begin;
|
||||
}
|
||||
|
||||
static int expand_array(struct meta_seq_array *arr, size_t additional)
|
||||
{
|
||||
#define ALLOC_BATCH (1024 * 1024 / (sizeof(struct scoutfs_ioctl_meta_seq)))
|
||||
struct scoutfs_ioctl_meta_seq *ms;
|
||||
size_t expand;
|
||||
|
||||
if (arr->nr + additional <= arr->alloc)
|
||||
return 0;
|
||||
|
||||
expand = arr->alloc + ALLOC_BATCH;
|
||||
ms = reallocarray(arr->ms, expand, sizeof(arr->ms[0]));
|
||||
if (!ms) {
|
||||
prerror("allocating ms array with %zu elements failed", expand);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
arr->alloc = expand;
|
||||
arr->ms = ms;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void inc_ms(struct scoutfs_ioctl_meta_seq *ms)
|
||||
{
|
||||
if (++ms->ino == 0)
|
||||
ms->meta_seq++;
|
||||
}
|
||||
|
||||
static void set_ms(struct scoutfs_ioctl_meta_seq *ms, __u64 meta_seq, __u64 ino)
|
||||
{
|
||||
ms->meta_seq = meta_seq;
|
||||
ms->ino = ino;
|
||||
}
|
||||
|
||||
static int compar_ms_ino(const void *A, const void *B)
|
||||
{
|
||||
const struct scoutfs_ioctl_meta_seq *a = A;
|
||||
const struct scoutfs_ioctl_meta_seq *b = B;
|
||||
|
||||
return a->ino < b->ino ? -1 : a->ino > b->ino ? 1 : 0;
|
||||
}
|
||||
|
||||
static int compar_ms_meta_seq(const void *A, const void *B)
|
||||
{
|
||||
const struct scoutfs_ioctl_meta_seq *a = A;
|
||||
const struct scoutfs_ioctl_meta_seq *b = B;
|
||||
|
||||
return a->meta_seq < b->meta_seq ? -1 : a->meta_seq > b->meta_seq ? 1 :
|
||||
compar_ms_ino(A, B);
|
||||
}
|
||||
|
||||
static int compar_u64(const void *A, const void *B)
|
||||
{
|
||||
const __u64 *a = A;
|
||||
const __u64 *b = B;
|
||||
|
||||
return *a < *b ? -1 : *a > *b ? 1 : 0;
|
||||
}
|
||||
|
||||
struct bsearch_ind_key {
|
||||
int (*compar)(const void *a, const void *b);
|
||||
void *key;
|
||||
size_t size;
|
||||
void **index;
|
||||
};
|
||||
|
||||
static int bsearch_ind_compar(const void *a, const void *b)
|
||||
{
|
||||
const struct bsearch_ind_key *bik = (const void *)((unsigned long)a ^ 1);
|
||||
int cmp;
|
||||
|
||||
/* this key hack only works if compar is always called where a is key and b is &base[..] */
|
||||
assert((unsigned long)a & 1);
|
||||
assert(!((unsigned long)b & 1));
|
||||
|
||||
cmp = bik->compar(bik->key, b);
|
||||
if (cmp > 0)
|
||||
*(bik->index) = (void *)b + bik->size;
|
||||
else
|
||||
*(bik->index) = (void *)b;
|
||||
|
||||
return cmp;
|
||||
}
|
||||
|
||||
static size_t bsearch_ind(const void *key, const void *base, size_t nmemb, size_t size,
|
||||
int (*compar)(const void *a, const void *b))
|
||||
{
|
||||
void *index = (void *)base;
|
||||
struct bsearch_ind_key bik = {
|
||||
.compar = compar,
|
||||
.key = (void *)key,
|
||||
.size = size,
|
||||
.index = &index,
|
||||
};
|
||||
|
||||
bsearch((void *)(((unsigned long)&bik) | 1), base, nmemb, size, bsearch_ind_compar);
|
||||
|
||||
return (index - base) / size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a sorted list of inode numbers for the meta_seq items that
|
||||
* differ between the results from raw_read_meta_seq and the items we
|
||||
* have saved in our array.
|
||||
*/
|
||||
static int differing_inos(__u64 *inos, struct meta_seq_array *arr,
|
||||
struct scoutfs_ioctl_meta_seq *start,
|
||||
struct scoutfs_ioctl_meta_seq *last,
|
||||
struct scoutfs_ioctl_meta_seq *ms, size_t nr)
|
||||
{
|
||||
size_t arr_last;
|
||||
size_t a;
|
||||
size_t m;
|
||||
int nr_inos;
|
||||
int cmp;
|
||||
int i;
|
||||
int n;
|
||||
|
||||
/* find where we're going to stop in arr */
|
||||
arr_last = bsearch_ind(last, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
|
||||
if (arr_last < arr->nr && compar_ms_meta_seq(&arr->ms[arr_last], last) == 0)
|
||||
arr_last++;
|
||||
|
||||
a = bsearch_ind(start, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
|
||||
|
||||
for (m = 0, nr_inos = 0; (a < arr_last || m < nr) && nr_inos < INO_BATCH; ) {
|
||||
|
||||
prdebug("diffing: m %zu nr %zu | a %zu arr_last %zu | nr_inos %d",
|
||||
m, nr, a, arr_last, nr_inos);
|
||||
if (a < arr_last)
|
||||
prdebug(" arr->ms[%zu] = "MSF, a, MSA(&arr->ms[a]));
|
||||
if (m < nr)
|
||||
prdebug(" ms[%zu] = "MSF, m, MSA(&ms[m]));
|
||||
|
||||
/* setup comparison to copy lesser or only */
|
||||
if (a < arr_last && m < nr)
|
||||
cmp = compar_ms_meta_seq(&arr->ms[a], &ms[m]);
|
||||
else if (a < arr_last)
|
||||
cmp = -1;
|
||||
else
|
||||
cmp = 1;
|
||||
|
||||
prdebug(" cmp %d", cmp);
|
||||
|
||||
if (cmp == 0) {
|
||||
/* ignore both when they match */
|
||||
a++;
|
||||
m++;
|
||||
} else if (cmp < 0) {
|
||||
inos[nr_inos++] = arr->ms[a++].ino;
|
||||
} else { /* cmp > 0 */
|
||||
inos[nr_inos++] = ms[m++].ino;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we didn't consume all the read meta_seq then we might need to clamp last */
|
||||
if (m < nr && compar_ms_meta_seq(&ms[m], last) <= 0) {
|
||||
*last = ms[m];
|
||||
last->ino--; /* must be non-zero, can't wrap */
|
||||
}
|
||||
|
||||
/* sort and remove duplicate inode numbers */
|
||||
if (nr_inos > 0) {
|
||||
qsort(inos, nr_inos, sizeof(inos[0]), compar_u64);
|
||||
for (i = 1, n = 1; i < nr_inos; i++) {
|
||||
if (inos[i] != inos[n - 1])
|
||||
inos[n++] = inos[i];
|
||||
}
|
||||
nr_inos = n;
|
||||
}
|
||||
|
||||
return nr_inos;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're not really validating the result stream. We assume that the offset currently
|
||||
* points at an inode. We fill the caller's ms with its info then iterate through
|
||||
* all its results until the next ino.
|
||||
*/
|
||||
static ssize_t read_inode_results(void *buf, size_t off, size_t size,
|
||||
struct scoutfs_ioctl_meta_seq *found)
|
||||
{
|
||||
struct scoutfs_ioctl_raw_read_result res;
|
||||
size_t len;
|
||||
__le64 ms;
|
||||
|
||||
found->ino = 0;
|
||||
|
||||
while (off < size) {
|
||||
memcpy(&res, buf + off, sizeof(res));
|
||||
prdebug("res %u %u", res.type, res.size);
|
||||
|
||||
if (res.type == SCOUTFS_IOC_RAW_READ_RESULT_INODE && found->ino != 0)
|
||||
break;
|
||||
|
||||
off += sizeof(res);
|
||||
|
||||
switch(res.type) {
|
||||
case SCOUTFS_IOC_RAW_READ_RESULT_INODE:
|
||||
memcpy(&found->ino, buf + off, sizeof(__u64));
|
||||
memcpy(&ms, buf + off + sizeof(__u64) +
|
||||
offsetof(struct scoutfs_inode, meta_seq), sizeof(__le64));
|
||||
found->meta_seq = le64_to_cpu(ms);
|
||||
prdebug("res ino %llu ms %llu", found->ino, found->meta_seq);
|
||||
break;
|
||||
|
||||
case SCOUTFS_IOC_RAW_READ_RESULT_XATTR:
|
||||
len = strlen((char *)buf + off) + 1;
|
||||
prdebug("res xattr '%s' len %d: '%.*s'",
|
||||
(char *)buf + off,
|
||||
(int)(res.size - len),
|
||||
(int)(res.size - len),
|
||||
(char *)buf + off + len);
|
||||
break;
|
||||
};
|
||||
off += res.size;
|
||||
}
|
||||
|
||||
return off;
|
||||
}
|
||||
|
||||
/*
|
||||
* inos[] contains the inode numbers that we're interested in. Get
|
||||
* their info and update our array with what we find.
|
||||
*/
|
||||
static int read_inode_info(int fd, void *buf, struct meta_seq_array *arr, __u64 *inos, int nr_inos)
|
||||
{
|
||||
struct scoutfs_ioctl_raw_read_inode_info rii;
|
||||
struct scoutfs_ioctl_meta_seq found;
|
||||
struct scoutfs_ioctl_meta_seq ms;
|
||||
ssize_t off;
|
||||
size_t size;
|
||||
size_t ind;
|
||||
size_t added;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
rii = (struct scoutfs_ioctl_raw_read_inode_info) {
|
||||
.inos_ptr = (unsigned long)inos,
|
||||
.inos_count = nr_inos,
|
||||
.names_ptr = (unsigned long)opts.names,
|
||||
.names_count = opts.names_count,
|
||||
.results_ptr = (unsigned long)buf,
|
||||
.results_size = RESULTS_SIZE,
|
||||
};
|
||||
|
||||
begin_call(&stats.rii);
|
||||
ret = ioctl(fd, SCOUTFS_IOC_RAW_READ_INODE_INFO, &rii);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
prerror("READ_INODE_INFO ioctl failed: "NERRF, NERRA(ret));
|
||||
goto out;
|
||||
}
|
||||
end_call(&stats.rii);
|
||||
|
||||
prdebug("gii ret %d", ret);
|
||||
|
||||
off = 0;
|
||||
size = ret;
|
||||
set_ms(&found, 0, 0);
|
||||
added = 0;
|
||||
i = 0;
|
||||
|
||||
/* sort by ino so we can search by ino for updates */
|
||||
qsort(arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_ino);
|
||||
|
||||
while (i < nr_inos) {
|
||||
/* find next ino */
|
||||
if (!found.ino && off < size) {
|
||||
off = read_inode_results(buf, off, size, &found);
|
||||
if (off < 0) {
|
||||
ret = off;
|
||||
goto out;
|
||||
}
|
||||
stats.rii.inos++;
|
||||
}
|
||||
|
||||
if (i < nr_inos && (!found.ino || inos[i] < found.ino)) {
|
||||
/* delete any record of inodes we didn't find */
|
||||
set_ms(&ms, UINT64_MAX, inos[i]);
|
||||
i++;
|
||||
|
||||
} else if (found.ino) {
|
||||
/* update/add arr to match the found ino */
|
||||
ms = found;
|
||||
if (i < nr_inos && inos[i] == found.ino)
|
||||
i++;
|
||||
set_ms(&found, 0, 0);
|
||||
}
|
||||
|
||||
/* find existing record */
|
||||
ind = bsearch_ind(&ms, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_ino);
|
||||
if (ind < arr->nr && arr->ms[ind].ino == ms.ino) {
|
||||
/* update existing ino, can be marking for deletion */
|
||||
prdebug("updating arr [%zu] ino %llu ms %llu -> %llu",
|
||||
ind, ms.ino, arr->ms[ind].meta_seq, ms.meta_seq);
|
||||
arr->ms[ind].meta_seq = ms.meta_seq;
|
||||
if (ms.meta_seq == UINT64_MAX)
|
||||
stats.remove++;
|
||||
else
|
||||
stats.update++;
|
||||
|
||||
} else if (ms.meta_seq != UINT64_MAX) {
|
||||
/* append new found, maintaining existing sorting */
|
||||
arr->ms[arr->nr + added] = ms;
|
||||
prdebug("adding arr [%zu] ino %llu ms %llu",
|
||||
arr->nr + added, ms.ino, ms.meta_seq);
|
||||
added++;
|
||||
stats.add++;
|
||||
}
|
||||
}
|
||||
|
||||
/* sort by seq again for next meta seq read */
|
||||
arr->nr += added;
|
||||
qsort(arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
|
||||
|
||||
/* and trim off any deletions */
|
||||
while (arr->nr > 0 && arr->ms[arr->nr - 1].meta_seq == UINT64_MAX)
|
||||
arr->nr--;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static double secs(u64 a_ns, u64 b_ns)
|
||||
{
|
||||
return (double)(a_ns - b_ns) / NSEC_PER_SEC;
|
||||
}
|
||||
|
||||
static double nr_per_sec(u64 nr, __u64 nsec)
|
||||
{
|
||||
if (nsec == 0)
|
||||
return 0;
|
||||
|
||||
return (double)nr / secs(nsec, 0);
|
||||
}
|
||||
|
||||
static void print_stats(void)
|
||||
{
|
||||
u64 now = get_ns();
|
||||
|
||||
if (secs(now, stats.last) < 1.0)
|
||||
return;
|
||||
|
||||
if ((stats.lines++ % 16) == 0) {
|
||||
printf("%6s | %-29s | %-23s | %-23s\n",
|
||||
"", "inodes", "meta_seq", "inode_info");
|
||||
printf("%6s | %8s %6s %6s %6s | %7s %7s %7s | %7s %7s %7s\n",
|
||||
"now",
|
||||
"total", "add", "remove", "update",
|
||||
"calls", "inos", "inos/s",
|
||||
"calls", "inos", "inos/s");
|
||||
}
|
||||
|
||||
printf("%6.3lf | %8llu %6llu %6llu %6llu | %7llu %7llu %7.0lf | %7llu %7llu %7.0lf\n",
|
||||
secs(now, stats.start),
|
||||
stats.inodes, stats.add, stats.remove, stats.update,
|
||||
stats.rms.calls, stats.rms.inos, nr_per_sec(stats.rms.inos, stats.rms.time),
|
||||
stats.rii.calls, stats.rii.inos, nr_per_sec(stats.rms.inos, stats.rii.time));
|
||||
|
||||
stats.last = now;
|
||||
|
||||
{
|
||||
struct stats save = stats;
|
||||
stats = (struct stats) {
|
||||
.start = save.start,
|
||||
.last = save.last,
|
||||
.lines = save.lines,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static void add_xattr(char *name)
|
||||
{
|
||||
size_t len_null;
|
||||
char *names;
|
||||
int ret;
|
||||
|
||||
len_null = strlen(name) + 1;
|
||||
names = realloc(opts.names, opts.names_size + len_null);
|
||||
if (!names) {
|
||||
ret = -errno;
|
||||
prerror("allocation of xattr names buffer failed: "NERRF, NERRA(ret));
|
||||
exit(3);
|
||||
}
|
||||
|
||||
memcpy(names + opts.names_size, name, len_null);
|
||||
|
||||
opts.names = names;
|
||||
opts.names_size += len_null;
|
||||
opts.names_count++;
|
||||
}
|
||||
|
||||
static bool parse_opts(int argc, char **argv)
|
||||
{
|
||||
bool usage = false;
|
||||
int c;
|
||||
|
||||
opts = (struct opts) {
|
||||
.debug = false,
|
||||
};
|
||||
|
||||
while ((c = getopt(argc, argv, "dp:x:")) != -1) {
|
||||
switch(c) {
|
||||
case 'd':
|
||||
opts.debug = true;
|
||||
break;
|
||||
case 'p':
|
||||
opts.path = strdup(optarg);
|
||||
break;
|
||||
case 'x':
|
||||
add_xattr(optarg);
|
||||
break;
|
||||
case '?':
|
||||
printf("Unknown option '%c'\n", optopt);
|
||||
usage = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!usage) {
|
||||
usage = true;
|
||||
if (!opts.path)
|
||||
printf("need -p path option\n");
|
||||
else
|
||||
usage = false;
|
||||
}
|
||||
|
||||
if (usage) {
|
||||
printf("\nusage:\n"
|
||||
" -d | enable verbose debugging output\n"
|
||||
" -p PATH | path to file system to watch\n"
|
||||
" -x NAME | try to read named xattr with inodes, can be many\n"
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct scoutfs_ioctl_raw_read_meta_seq rms = {0,};
|
||||
struct scoutfs_ioctl_meta_seq *ms;
|
||||
struct meta_seq_array arr = {0,};
|
||||
__u64 *inos = NULL;
|
||||
void *buf = NULL;
|
||||
int fd = -1;
|
||||
int nr_inos;
|
||||
int nr;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
if (!parse_opts(argc, argv))
|
||||
exit(1);
|
||||
|
||||
inos = calloc(INO_BATCH, sizeof(inos[0]));
|
||||
buf = malloc(RESULTS_SIZE);
|
||||
if (!inos || !buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
rms.results_ptr = (unsigned long)buf;
|
||||
rms.results_size = min(RESULTS_SIZE, INO_BATCH * sizeof(struct scoutfs_ioctl_meta_seq));
|
||||
|
||||
fd = open(opts.path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
perror("error");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
stats.start = get_ns();
|
||||
|
||||
for (;;) {
|
||||
set_ms(&rms.start, 0, 0);
|
||||
set_ms(&rms.end, UINT64_MAX, UINT64_MAX);
|
||||
|
||||
do {
|
||||
begin_call(&stats.rms);
|
||||
ret = ioctl(fd, SCOUTFS_IOC_RAW_READ_META_SEQ, &rms);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
prerror("READ_META_SEQ ioctl failed, "
|
||||
"start "MSF" end "MSF", "NERRF,
|
||||
MSA(&rms.start), MSA(&rms.end), NERRA(ret));
|
||||
goto out;
|
||||
}
|
||||
end_call(&stats.rms);
|
||||
stats.rms.inos += ret;
|
||||
|
||||
prdebug("RMS last "MSF" ret %d:", MSA(&rms.last), ret);
|
||||
|
||||
nr = ret;
|
||||
ms = buf;
|
||||
|
||||
if (opts.debug && nr > 0) {
|
||||
for (i = 0; i < nr; i++)
|
||||
prdebug(" [%u] "MSF"", i, MSA(&ms[i]));
|
||||
}
|
||||
|
||||
nr_inos = differing_inos(inos, &arr, &rms.start, &rms.last, ms, nr);
|
||||
|
||||
if (nr_inos > 0) {
|
||||
prdebug("diff inos %d:", nr_inos);
|
||||
for (i = 0; i < nr_inos; i++)
|
||||
prdebug(" [%u] %llu", i, inos[i]);
|
||||
|
||||
ret = expand_array(&arr, nr_inos) ?:
|
||||
read_inode_info(fd, buf, &arr, inos, nr_inos);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
stats.inodes = arr.nr;
|
||||
print_stats();
|
||||
|
||||
rms.start = rms.last;
|
||||
inc_ms(&rms.start);
|
||||
|
||||
} while (rms.last.meta_seq != UINT64_MAX || rms.last.ino != UINT64_MAX);
|
||||
|
||||
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
|
||||
free(inos);
|
||||
free(buf);
|
||||
free(arr.ms);
|
||||
free(opts.names);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1,143 +0,0 @@
|
||||
#
|
||||
# Test basic .indx. xattr tag functionality and index entry lifecycle
|
||||
#
|
||||
|
||||
t_require_commands touch rm setfattr scoutfs stat
|
||||
t_require_mounts 2
|
||||
|
||||
# query index from a specific mount, default mount 0
|
||||
read_xattr_index()
|
||||
{
|
||||
local nr="${1:-0}"
|
||||
local mnt="$(eval echo \$T_M$nr)"
|
||||
shift
|
||||
|
||||
sync
|
||||
echo 1 > $(t_debugfs_path $nr)/drop_weak_item_cache
|
||||
scoutfs read-xattr-index -p "$mnt" "$@"
|
||||
}
|
||||
|
||||
MAJOR=5
|
||||
MINOR=100
|
||||
|
||||
echo "== testing invalid read-xattr-index arguments"
|
||||
scoutfs read-xattr-index -p "$T_M0" bad 2>&1
|
||||
scoutfs read-xattr-index -p "$T_M0" 1.2 2>&1
|
||||
scoutfs read-xattr-index -p "$T_M0" 1.2.3 256.0.0 2>&1
|
||||
scoutfs read-xattr-index -p "$T_M0" 1.2.3 0.0.0 2>&1
|
||||
scoutfs read-xattr-index -p "$T_M0" 1.2.0 1.1.2 2>&1
|
||||
scoutfs read-xattr-index -p "$T_M0" 2.2.2 2.2.1 2>&1
|
||||
|
||||
echo "== testing invalid names"
|
||||
touch "$T_D0/invalid"
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.. "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test..$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR. "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.256.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.abc.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.abc "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.-1.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.-1 "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.18446744073709551616.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.$(printf 'x%.0s' $(seq 1 240)).$MAJOR.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
rm -f "$T_D0/invalid"
|
||||
|
||||
echo "== testing boundary values"
|
||||
touch "$T_D0/boundary"
|
||||
INO=$(stat -c "%i" "$T_D0/boundary")
|
||||
setfattr -n scoutfs.hide.indx.test.0.0 "$T_D0/boundary"
|
||||
read_xattr_index 0 0.0.0 0.0.-1 | awk '($3 == "'$INO'") {print "0.0 found"}'
|
||||
setfattr -x scoutfs.hide.indx.test.0.0 "$T_D0/boundary"
|
||||
setfattr -n scoutfs.hide.indx.test.255.18446744073709551615 "$T_D0/boundary"
|
||||
read_xattr_index 0 255.0.0 255.-1.-1 | awk '($3 == "'$INO'") {print "255.max found"}'
|
||||
setfattr -x scoutfs.hide.indx.test.255.18446744073709551615 "$T_D0/boundary"
|
||||
rm -f "$T_D0/boundary"
|
||||
|
||||
echo "== indx xattr must have no value"
|
||||
touch "$T_D0/noval"
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v "" "$T_D0/noval" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v 0 "$T_D0/noval" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v 1 "$T_D0/noval" 2>&1 | t_filter_fs
|
||||
rm -f "$T_D0/noval"
|
||||
|
||||
echo "== set indx xattr and verify index entry"
|
||||
touch "$T_D0/file"
|
||||
INO=$(stat -c "%i" "$T_D0/file")
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found"}'
|
||||
|
||||
echo "== setting same indx xattr again is a no-op"
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found"}'
|
||||
|
||||
echo "== removing non-existent indx xattr succeeds"
|
||||
setfattr -x scoutfs.hide.indx.nonexistent.$MAJOR.999 "$T_D0/file" 2>&1 | t_filter_fs
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "still found"}'
|
||||
|
||||
echo "== explicit xattr removal cleans up index entry"
|
||||
setfattr -x scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan"}'
|
||||
rm -f "$T_D0/file"
|
||||
|
||||
echo "== file deletion cleans up index entry"
|
||||
touch "$T_D0/file2"
|
||||
INO=$(stat -c "%i" "$T_D0/file2")
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file2"
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found before delete"}'
|
||||
rm -f "$T_D0/file2"
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan after delete"}'
|
||||
|
||||
echo "== multiple indx xattrs on one file cleaned up by deletion"
|
||||
touch "$T_D0/file3"
|
||||
INO=$(stat -c "%i" "$T_D0/file3")
|
||||
setfattr -n scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/file3"
|
||||
setfattr -n scoutfs.hide.indx.b.$MAJOR.300 "$T_D0/file3"
|
||||
BEFORE=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
|
||||
echo "entries before delete: $BEFORE"
|
||||
rm -f "$T_D0/file3"
|
||||
AFTER=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
|
||||
echo "entries after delete: $AFTER"
|
||||
|
||||
echo "== partial removal leaves other entries"
|
||||
touch "$T_D0/partial"
|
||||
INO=$(stat -c "%i" "$T_D0/partial")
|
||||
setfattr -n scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/partial"
|
||||
setfattr -n scoutfs.hide.indx.b.$MAJOR.300 "$T_D0/partial"
|
||||
setfattr -x scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/partial"
|
||||
read_xattr_index 0 $MAJOR.200.0 $MAJOR.200.-1 | awk '($3 == "'$INO'") {print "200 found"}'
|
||||
read_xattr_index 0 $MAJOR.300.0 $MAJOR.300.-1 | awk '($3 == "'$INO'") {print "300 found"}'
|
||||
rm -f "$T_D0/partial"
|
||||
|
||||
echo "== multiple files at same index position"
|
||||
touch "$T_D0/multi_a" "$T_D0/multi_b"
|
||||
INO_A=$(stat -c "%i" "$T_D0/multi_a")
|
||||
INO_B=$(stat -c "%i" "$T_D0/multi_b")
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/multi_a"
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/multi_b"
|
||||
COUNT=$(read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | wc -l)
|
||||
echo "files at same position: $COUNT"
|
||||
rm -f "$T_D0/multi_a"
|
||||
read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | awk '($3 == "'$INO_A'") {print "deleted file still found"}'
|
||||
read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | awk '($3 == "'$INO_B'") {print "surviving file found"}'
|
||||
rm -f "$T_D0/multi_b"
|
||||
|
||||
echo "== cross-mount visibility"
|
||||
touch "$T_D0/file4"
|
||||
INO=$(stat -c "%i" "$T_D0/file4")
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file4"
|
||||
read_xattr_index 1 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found on mount 1"}'
|
||||
rm -f "$T_D0/file4"
|
||||
read_xattr_index 1 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan on mount 1"}'
|
||||
|
||||
echo "== duplicate position deduplication"
|
||||
touch "$T_D0/file5"
|
||||
INO=$(stat -c "%i" "$T_D0/file5")
|
||||
setfattr -n scoutfs.hide.indx.aa.$MAJOR.$MINOR "$T_D0/file5"
|
||||
setfattr -n scoutfs.hide.indx.bb.$MAJOR.$MINOR "$T_D0/file5"
|
||||
COUNT=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
|
||||
echo "entries for same position: $COUNT"
|
||||
rm -f "$T_D0/file5"
|
||||
|
||||
t_pass
|
||||
@@ -53,6 +53,14 @@ exec {FD1}>&- # close
|
||||
exec {FD2}>&- # close
|
||||
check_ino_index "$ino" "$dseq" "$T_M0"
|
||||
|
||||
echo "== remote unopened unlink deletes"
|
||||
echo "contents" > "$T_D0/file"
|
||||
ino=$(stat -c "%i" "$T_D0/file")
|
||||
dseq=$(scoutfs stat -s data_seq "$T_D0/file")
|
||||
rm -f "$T_D1/file"
|
||||
check_ino_index "$ino" "$dseq" "$T_M0"
|
||||
check_ino_index "$ino" "$dseq" "$T_M1"
|
||||
|
||||
# Hurry along the orphan scanners. If any are currently asleep, we will
|
||||
# have to wait at least their current scan interval before they wake up,
|
||||
# run, and notice their new interval.
|
||||
@@ -60,19 +68,6 @@ t_save_all_sysfs_mount_options orphan_scan_delay_ms
|
||||
t_set_all_sysfs_mount_options orphan_scan_delay_ms 500
|
||||
t_wait_for_orphan_scan_runs
|
||||
|
||||
echo "== remote unopened unlink deletes"
|
||||
echo "contents" > "$T_D0/file"
|
||||
ino=$(stat -c "%i" "$T_D0/file")
|
||||
dseq=$(scoutfs stat -s data_seq "$T_D0/file")
|
||||
rm -f "$T_D1/file"
|
||||
# cross-mount deletion falls back to the orphan scanner when the
|
||||
# creating mount still has the inode cached, wait for it to complete
|
||||
t_force_log_merge
|
||||
# wait for orphan scanners to pick up the unlinked inode and become idle
|
||||
t_wait_for_no_orphans
|
||||
check_ino_index "$ino" "$dseq" "$T_M0"
|
||||
check_ino_index "$ino" "$dseq" "$T_M1"
|
||||
|
||||
echo "== unlink wait for open on other mount"
|
||||
echo "contents" > "$T_D0/badfile"
|
||||
ino=$(stat -c "%i" "$T_D0/badfile")
|
||||
@@ -86,6 +81,7 @@ exec {FD}>&- # close
|
||||
# we know that revalidating will unhash the remote dentry
|
||||
stat "$T_D0/badfile" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
|
||||
t_force_log_merge
|
||||
# wait for orphan scanners to pick up the unlinked inode and become idle
|
||||
t_wait_for_no_orphans
|
||||
check_ino_index "$ino" "$dseq" "$T_M0"
|
||||
check_ino_index "$ino" "$dseq" "$T_M1"
|
||||
|
||||
@@ -1,52 +0,0 @@
|
||||
#
|
||||
# Test that orphaned log_trees entries from unmounted rids are
|
||||
# finalized and merged.
|
||||
#
|
||||
# An orphan log_trees entry is one whose rid has no mounted_clients
|
||||
# entry. This can happen from incomplete reclaim across server
|
||||
# failovers. We simulate it with the reclaim_skip_finalize trigger
|
||||
# which makes reclaim_open_log_tree skip the finalization step.
|
||||
#
|
||||
|
||||
t_require_commands touch scoutfs
|
||||
t_require_mounts 2
|
||||
|
||||
TIMEOUT=90
|
||||
|
||||
echo "== create orphan log_trees entry via trigger"
|
||||
sv=$(t_server_nr)
|
||||
cl=$(t_first_client_nr)
|
||||
rid=$(t_mount_rid $cl)
|
||||
|
||||
touch "$T_D0/file" "$T_D1/file"
|
||||
sync
|
||||
|
||||
# arm the trigger so reclaim skips finalization
|
||||
t_trigger_arm_silent reclaim_skip_finalize $sv
|
||||
|
||||
# force unmount the client, server will fence and reclaim it
|
||||
# but the trigger makes reclaim leave log_trees unfinalized
|
||||
t_force_umount $cl
|
||||
|
||||
# wait for fencing to run
|
||||
verify_fenced() {
|
||||
grep -q "running rid '$rid'" "$T_FENCED_LOG" 2>/dev/null
|
||||
}
|
||||
t_wait_until_timeout $TIMEOUT verify_fenced
|
||||
|
||||
# give the server time to complete reclaim after fence
|
||||
sleep 5
|
||||
|
||||
# remount the client so t_force_log_merge can sync all mounts.
|
||||
# the client gets a new rid; the old rid's log_trees is the orphan.
|
||||
t_mount $cl
|
||||
|
||||
echo "== verify orphan is reclaimed and merge completes"
|
||||
t_force_log_merge
|
||||
|
||||
echo "== verify orphan reclaim was logged"
|
||||
if ! dmesg | grep -q "reclaiming orphan log trees for rid $rid"; then
|
||||
t_fail "expected orphan reclaim message for rid $rid in dmesg"
|
||||
fi
|
||||
|
||||
t_pass
|
||||
@@ -1,152 +0,0 @@
|
||||
|
||||
t_require_commands scoutfs dd fallocate
|
||||
|
||||
FILE="$T_D0/file"
|
||||
DIR="$T_D0/dir"
|
||||
|
||||
echo "== missing options should fail =="
|
||||
rm -rf $DIR && mkdir -p $DIR
|
||||
scoutfs punch-offline $DIR -l 4096 -V 0
|
||||
scoutfs punch-offline $DIR -o 0 -V 0
|
||||
scoutfs punch-offline $DIR -o 0 -l 4096
|
||||
|
||||
echo "== can't hole punch dir or special =="
|
||||
rm -rf $DIR && mkdir -p $DIR
|
||||
scoutfs punch-offline $DIR -o 0 -l 4096 -V 0
|
||||
|
||||
echo "== punching an empty file does nothing =="
|
||||
rm -f $FILE && touch $FILE
|
||||
scoutfs punch-offline $FILE -o 0 -l 4096 -V 0
|
||||
|
||||
echo "== punch outside of i_size does nothing =="
|
||||
dd if=/dev/zero of=$FILE bs=4096 count=1 status=none
|
||||
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 1
|
||||
|
||||
echo "== can't hole punch online extent =="
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
scoutfs punch-offline $FILE -o 0 -l 4096 -V 1
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
|
||||
echo "== can't hole punch unwritten extent =="
|
||||
rm -rf $FILE && touch $FILE
|
||||
fallocate -l $((4096 * 3)) $FILE
|
||||
vers=$(scoutfs stat -s data_version "$FILE")
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
scoutfs punch-offline $FILE -o 4096 -l 4096 -V $vers
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
|
||||
echo "== hole punch offline extent =="
|
||||
rm -rf $FILE && touch $FILE
|
||||
fallocate -l $((4096 * 3)) $FILE
|
||||
vers=$(scoutfs stat -s data_version "$FILE")
|
||||
scoutfs release $FILE --data-version $vers
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
scoutfs punch-offline $FILE -o 4096 -l 4096 -V $vers
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
|
||||
echo "== can't hole punch non-aligned bsz offset or len =="
|
||||
rm -rf $FILE && touch $FILE
|
||||
fallocate -l $((4096 * 3)) $FILE
|
||||
vers=$(scoutfs stat -s data_version "$FILE")
|
||||
scoutfs release $FILE --data-version $vers
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
scoutfs punch-offline $FILE -o 4095 -l 4096 -V $vers
|
||||
scoutfs punch-offline $FILE -o 1 -l 4096 -V $vers
|
||||
scoutfs punch-offline $FILE -o 4096 -l 409700 -V $vers
|
||||
scoutfs punch-offline $FILE -o 4096 -l 4097 -V $vers
|
||||
scoutfs punch-offline $FILE -o 4096 -l 4095 -V $vers
|
||||
scoutfs punch-offline $FILE -o 4096 -l 1 -V $vers
|
||||
scoutfs punch-offline $FILE -o 4096 -l 0 -V $vers
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
|
||||
echo "== can't hole punch mismatched data_version =="
|
||||
rm -rf $FILE && touch $FILE
|
||||
fallocate -l $((4096 * 3)) $FILE
|
||||
vers=$(scoutfs stat -s data_version "$FILE")
|
||||
scoutfs release $FILE --data-version $vers
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 0
|
||||
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 2
|
||||
scoutfs punch-offline $FILE -o 4096 -l 4096 -V 9999
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
|
||||
echo "== Punch hole crossing multiple extents =="
|
||||
rm -rf $FILE && touch $FILE
|
||||
fallocate -l $((7 * 4096)) $FILE
|
||||
vers=$(scoutfs stat -s data_version "$FILE")
|
||||
scoutfs release $FILE --data-version $vers
|
||||
scoutfs get-fiemap -L $FILE
|
||||
scoutfs punch-offline $FILE -o $((1 * 4096)) -l 4096 -V $vers
|
||||
scoutfs punch-offline $FILE -o $((3 * 4096)) -l 4096 -V $vers
|
||||
scoutfs punch-offline $FILE -o $((5 * 4096)) -l 4096 -V $vers
|
||||
# 0.1.2.3
|
||||
scoutfs get-fiemap -L $FILE
|
||||
scoutfs punch-offline $FILE -o $((2 * 4096)) -l $((3 * 4096)) -V $vers
|
||||
# 0.....1
|
||||
scoutfs get-fiemap -L $FILE
|
||||
|
||||
echo "== punch hole starting at a hole =="
|
||||
rm -rf $FILE && touch $FILE
|
||||
fallocate -l $((7 * 4096)) $FILE
|
||||
vers=$(scoutfs stat -s data_version "$FILE")
|
||||
scoutfs release $FILE --data-version $vers
|
||||
scoutfs get-fiemap -L $FILE
|
||||
scoutfs punch-offline $FILE -o $((1 * 4096)) -l 4096 -V $vers
|
||||
scoutfs punch-offline $FILE -o $((3 * 4096)) -l 4096 -V $vers
|
||||
scoutfs punch-offline $FILE -o $((5 * 4096)) -l 4096 -V $vers
|
||||
# 0.1.2.3
|
||||
scoutfs get-fiemap -L $FILE
|
||||
scoutfs punch-offline $FILE -o $((1 * 4096)) -l $((5 * 4096)) -V $vers
|
||||
# 0.....1
|
||||
scoutfs get-fiemap -L $FILE
|
||||
|
||||
echo "== large punch =="
|
||||
rm -rf $FILE && touch $FILE
|
||||
fallocate -l $((6 * 1024 * 1024 * 1024)) $FILE
|
||||
vers=$(scoutfs stat -s data_version "$FILE")
|
||||
scoutfs release $FILE --data-version $vers
|
||||
scoutfs get-fiemap -L $FILE
|
||||
scoutfs punch-offline $FILE -o $((134123 * 4096)) -l $((68343 * 4096)) -V $vers
|
||||
scoutfs punch-offline $FILE -o $((467273 * 4096)) -l $((68343 * 4096)) -V $vers
|
||||
scoutfs punch-offline $FILE -o $((734623 * 4096)) -l $((68343 * 4096)) -V $vers
|
||||
scoutfs get-fiemap -L $FILE
|
||||
|
||||
echo "== overlapping punches with lots of extents =="
|
||||
rm -rf $FILE && touch $FILE
|
||||
fallocate -l $((4096 * 1024)) $FILE
|
||||
vers=$(scoutfs stat -s data_version "$FILE")
|
||||
scoutfs release $FILE --data-version 1
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
# punch odd ones away
|
||||
for h in $(seq 1 2 1023); do
|
||||
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
|
||||
done
|
||||
scoutfs get-fiemap -Lb $FILE | tail -n 1
|
||||
# punch a large hole from 32 to 55, removing 7 extents
|
||||
scoutfs punch-offline $FILE -o $((32 * 4096)) -l $((13 * 4096)) -V $vers
|
||||
scoutfs get-fiemap -Lb $FILE | tail -n 1
|
||||
# punch every 8th @6
|
||||
for h in $(seq 6 8 1024); do
|
||||
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
|
||||
done
|
||||
# again @4
|
||||
scoutfs get-fiemap -Lb $FILE | tail -n 1
|
||||
for h in $(seq 4 8 1024); do
|
||||
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
|
||||
done
|
||||
scoutfs get-fiemap -Lb $FILE | tail -n 1
|
||||
# punching a large hole from 127 to 175, removing 12 extents
|
||||
scoutfs punch-offline $FILE -o $((127 * 4096)) -l $((48 * 4096)) -V $vers
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
# again @2
|
||||
for h in $(seq 2 8 1024); do
|
||||
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
|
||||
done
|
||||
scoutfs get-fiemap -L $FILE
|
||||
# and again @0, punching away everything remaining extent
|
||||
for h in $(seq 0 8 1024); do
|
||||
scoutfs punch-offline $FILE -o $((h * 4096)) -l 4096 -V $vers
|
||||
done
|
||||
scoutfs get-fiemap -Lb $FILE
|
||||
|
||||
t_pass
|
||||
@@ -32,7 +32,7 @@ echo "== dirs shouldn't appear in data_seq queries"
|
||||
mkdir "$DIR"
|
||||
ino=$(stat -c "%i" "$DIR")
|
||||
t_sync_seq_index
|
||||
query_index data_seq | awk '($4 == "'$ino'")'
|
||||
query_index data_seq | grep "$ino\>"
|
||||
|
||||
echo "== two created files are present and come after each other"
|
||||
touch "$DIR/first"
|
||||
|
||||
@@ -1,50 +0,0 @@
|
||||
#
|
||||
# Test that merge_read_item() correctly updates the sequence number when
|
||||
# combining delta items from multiple finalized log trees. Each mount
|
||||
# sets a totl value in its own 3-bit lane (powers of 8) so that any
|
||||
# double-counting overflows the lane and is caught by: or(v, exp) != exp.
|
||||
#
|
||||
|
||||
t_require_commands setfattr scoutfs
|
||||
t_require_mounts 5
|
||||
|
||||
echo "== setup"
|
||||
for nr in $(t_fs_nrs); do
|
||||
d=$(eval echo \$T_D$nr)
|
||||
for i in $(seq 1 2500); do : > "$d/f$nr$i"; done
|
||||
done
|
||||
sync
|
||||
t_force_log_merge
|
||||
|
||||
vals=(1 8 64 512 4096)
|
||||
expected=4681
|
||||
n=0
|
||||
for nr in $(t_fs_nrs); do
|
||||
d=$(eval echo \$T_D$nr)
|
||||
v=${vals[$((n++))]}
|
||||
for i in $(seq 1 2500); do
|
||||
setfattr -n "scoutfs.totl.t.$i.0.0" -v $v "$d/f$nr$i"
|
||||
done
|
||||
done
|
||||
|
||||
t_trigger_arm_silent log_merge_force_partial $(t_server_nr)
|
||||
|
||||
bad="$T_TMPDIR/bad"
|
||||
for nr in $(t_fs_nrs); do
|
||||
( while true; do
|
||||
echo 1 > "$(t_debugfs_path $nr)/drop_weak_item_cache"
|
||||
scoutfs read-xattr-totals -p "$(eval echo \$T_M$nr)" | \
|
||||
awk -F'[ =,]+' -v e=$expected 'or($2+0,e) != e'
|
||||
done ) >> "$bad" &
|
||||
done
|
||||
|
||||
echo "expected $expected"
|
||||
t_force_log_merge
|
||||
t_silent_kill $(jobs -p)
|
||||
test -s "$bad" && echo "double-counted:" && cat "$bad"
|
||||
|
||||
echo "== cleanup"
|
||||
for nr in $(t_fs_nrs); do
|
||||
find "$(eval echo \$T_D$nr)" -name "f$nr*" -delete
|
||||
done
|
||||
t_pass
|
||||
@@ -1,127 +0,0 @@
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "parse.h"
|
||||
#include "util.h"
|
||||
#include "ioctl.h"
|
||||
#include "cmd.h"
|
||||
|
||||
struct po_args {
|
||||
char *path;
|
||||
u64 offset;
|
||||
u64 length;
|
||||
u64 data_version;
|
||||
|
||||
unsigned offset_set:1,
|
||||
length_set:1,
|
||||
data_version_set:1;
|
||||
};
|
||||
|
||||
static int do_punch_offline(struct po_args *args)
|
||||
{
|
||||
struct scoutfs_ioctl_punch_offline ioctl_args;
|
||||
int ret;
|
||||
int fd;
|
||||
|
||||
fd = get_path(args->path, O_RDWR);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
ioctl_args.offset = args->offset;
|
||||
ioctl_args.len = args->length;
|
||||
ioctl_args.data_version = args->data_version;
|
||||
ioctl_args.flags = 0;
|
||||
|
||||
ret = ioctl(fd, SCOUTFS_IOC_PUNCH_OFFLINE, &ioctl_args);
|
||||
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "punch_offline ioctl failed: %s (%d)\n",
|
||||
strerror(errno), errno);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct po_args *args = state->input;
|
||||
int ret = 0;
|
||||
|
||||
switch (key) {
|
||||
case 'V':
|
||||
ret = parse_u64(arg, &args->data_version);
|
||||
if (ret)
|
||||
return ret;
|
||||
args->data_version_set = 1;
|
||||
break;
|
||||
case 'o': /* offset */
|
||||
ret = parse_human(arg, &args->offset);
|
||||
if (ret)
|
||||
return ret;
|
||||
args->offset_set = 1;
|
||||
break;
|
||||
case 'l': /* length */
|
||||
ret = parse_human(arg, &args->length);
|
||||
if (ret)
|
||||
return ret;
|
||||
args->length_set = 1;
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
if (!args->path)
|
||||
args->path = strdup_or_error(state, arg);
|
||||
else
|
||||
argp_error(state, "unknown extra argument given");
|
||||
break;
|
||||
case ARGP_KEY_FINI:
|
||||
if (!args->path)
|
||||
argp_error(state, "must provide path to file");
|
||||
if (!args->offset_set)
|
||||
argp_error(state, "must provide offset");
|
||||
if (!args->length_set)
|
||||
argp_error(state, "must provide length");
|
||||
if (!args->data_version_set)
|
||||
argp_error(state, "must provide data_version");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{ "data-version", 'V', "VERSION", 0, "Data version of the file [Required]"},
|
||||
{ "offset", 'o', "OFFSET", 0, "Offset (bytes or KMGTP units) in file to stage [Required]"},
|
||||
{ "length", 'l', "LENGTH", 0, "Length of range (bytes or KMGTP units) of file to stage. [Required]"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp argp = {
|
||||
options,
|
||||
parse_opt,
|
||||
"PATH",
|
||||
"Make a (sparse) hole in the file at offset and with length"
|
||||
};
|
||||
|
||||
static int punch_offline_cmd(int argc, char **argv)
|
||||
{
|
||||
struct po_args po_args = {NULL};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&argp, argc, argv, 0, NULL, &po_args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_punch_offline(&po_args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) punch_offline_ctor(void)
|
||||
{
|
||||
cmd_register_argp("punch-offline", &argp, GROUP_AGENT, punch_offline_cmd);
|
||||
}
|
||||
@@ -198,11 +198,13 @@ int write_block_sync(int fd, u32 magic, __le64 fsid, u64 seq, u64 blkno,
|
||||
*/
|
||||
int meta_super_in_use(int meta_fd, struct scoutfs_super_block *meta_super)
|
||||
{
|
||||
struct scoutfs_quorum_block *qblk = NULL;
|
||||
struct scoutfs_quorum_block *qblk[SCOUTFS_QUORUM_BLOCKS] = {NULL,};
|
||||
struct scoutfs_quorum_block_event *beg;
|
||||
struct scoutfs_quorum_block_event *end;
|
||||
struct scoutfs_quorum_block_event *fence;
|
||||
bool beg_was_fenced;
|
||||
int ret = 0;
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
if (meta_super->mounted_clients.ref.blkno != 0) {
|
||||
fprintf(stderr, "meta superblock mounted clients btree is not empty.\n");
|
||||
@@ -210,36 +212,61 @@ int meta_super_in_use(int meta_fd, struct scoutfs_super_block *meta_super)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* check for active quorum slots */
|
||||
/* read all blocks */
|
||||
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) {
|
||||
if (!quorum_slot_present(meta_super, i))
|
||||
continue;
|
||||
ret = read_block(meta_fd, SCOUTFS_QUORUM_BLKNO + i, SCOUTFS_BLOCK_SM_SHIFT,
|
||||
(void **)&qblk);
|
||||
(void **)&qblk[i]);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "error reading quorum block for slot %u\n", i);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
beg = &qblk->events[SCOUTFS_QUORUM_EVENT_BEGIN];
|
||||
end = &qblk->events[SCOUTFS_QUORUM_EVENT_END];
|
||||
/* check for active quorum slots */
|
||||
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) {
|
||||
if (!qblk[i])
|
||||
continue;
|
||||
|
||||
if (le64_to_cpu(beg->write_nr) > le64_to_cpu(end->write_nr)) {
|
||||
fprintf(stderr, "mount in quorum slot %u could still be running.\n"
|
||||
" begin event: write_nr %llu timestamp %llu.%08u\n"
|
||||
" end event: write_nr %llu timestamp %llu.%08u\n",
|
||||
i, le64_to_cpu(beg->write_nr), le64_to_cpu(beg->ts.sec),
|
||||
le32_to_cpu(beg->ts.nsec),
|
||||
le64_to_cpu(end->write_nr), le64_to_cpu(end->ts.sec),
|
||||
le32_to_cpu(end->ts.nsec));
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
beg = &qblk[i]->events[SCOUTFS_QUORUM_EVENT_BEGIN];
|
||||
end = &qblk[i]->events[SCOUTFS_QUORUM_EVENT_END];
|
||||
|
||||
if (le64_to_cpu(beg->write_nr) <= le64_to_cpu(end->write_nr))
|
||||
continue;
|
||||
|
||||
/* check if this term was fenced by others in a later term */
|
||||
beg_was_fenced = false;
|
||||
for (j = 0; j < SCOUTFS_QUORUM_BLOCKS; j++) {
|
||||
if ((!qblk[j]) || (i == j))
|
||||
continue;
|
||||
|
||||
fence = &qblk[j]->events[SCOUTFS_QUORUM_EVENT_FENCE];
|
||||
if (le64_to_cpu(fence->term) > le64_to_cpu(beg->term)) {
|
||||
beg_was_fenced = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free(qblk);
|
||||
qblk = NULL;
|
||||
if (beg_was_fenced)
|
||||
continue;
|
||||
|
||||
fprintf(stderr, "mount in quorum slot %u could still be running.\n"
|
||||
" begin event: write_nr %llu timestamp %llu.%08u\n"
|
||||
" end event: write_nr %llu timestamp %llu.%08u\n",
|
||||
i, le64_to_cpu(beg->write_nr), le64_to_cpu(beg->ts.sec),
|
||||
le32_to_cpu(beg->ts.nsec),
|
||||
le64_to_cpu(end->write_nr), le64_to_cpu(end->ts.sec),
|
||||
le32_to_cpu(end->ts.nsec));
|
||||
ret = -EBUSY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
/* free any allocated blocks */
|
||||
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++)
|
||||
if (qblk[i] != NULL)
|
||||
free(qblk[i]);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user