mirror of
https://github.com/versity/scoutfs.git
synced 2026-02-01 00:02:04 +00:00
Compare commits
19 Commits
zab/server
...
auke/work
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
732637d372 | ||
|
|
963591cc9a | ||
|
|
ad79ee94f9 | ||
|
|
65ea250de9 | ||
|
|
86ca09ed7d | ||
|
|
5681920bfe | ||
|
|
6c2ccf75ea | ||
|
|
a818b9e461 | ||
|
|
b9f8eee59e | ||
|
|
d8fcbb9564 | ||
|
|
4d58252e1a | ||
|
|
293df47589 | ||
|
|
2a58e4c147 | ||
|
|
1b7917e063 | ||
|
|
4f9c3503c8 | ||
|
|
541cb47af0 | ||
|
|
d537365d0a | ||
|
|
7375627861 | ||
|
|
48d849e2f4 |
@@ -14,7 +14,6 @@ scoutfs-y += \
|
||||
alloc.o \
|
||||
block.o \
|
||||
btree.o \
|
||||
check.o \
|
||||
client.o \
|
||||
counters.o \
|
||||
data.o \
|
||||
|
||||
@@ -278,14 +278,6 @@ ifneq (,$(shell grep 'int ..mknod. .struct user_namespace' include/linux/fs.h))
|
||||
ccflags-y += -DKC_VFS_METHOD_USER_NAMESPACE_ARG
|
||||
endif
|
||||
|
||||
#
|
||||
# v6.2-rc1-2-gabf08576afe3
|
||||
#
|
||||
# fs: vfs methods use struct mnt_idmap instead of struct user_namespace
|
||||
ifneq (,$(shell grep 'int vfs_mknod.struct mnt_idmap' include/linux/fs.h))
|
||||
ccflags-y += -DKC_VFS_METHOD_MNT_IDMAP_ARG
|
||||
endif
|
||||
|
||||
#
|
||||
# v5.17-rc2-21-g07888c665b40
|
||||
#
|
||||
@@ -470,19 +462,3 @@ ifneq (,$(shell grep 'struct list_lru_one \*list, spinlock_t \*lock, void \*cb_a
|
||||
ccflags-y += -DKC_LIST_LRU_WALK_CB_LIST_LOCK
|
||||
endif
|
||||
|
||||
#
|
||||
# v5.1-rc4-273-ge9b98e162aa5
|
||||
#
|
||||
# introduce stack trace helpers
|
||||
#
|
||||
ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h))
|
||||
ccflags-y += -DKC_STACK_TRACE_SAVE
|
||||
endif
|
||||
|
||||
# v6.1-rc1-4-g7420332a6ff4
|
||||
#
|
||||
# .get_acl() method now has dentry arg (and mnt_idmap). The old get_acl has been renamed
|
||||
# to get_inode_acl() and is still available as well, but has an extra rcu param.
|
||||
ifneq (,$(shell grep 'struct posix_acl ...get_acl..struct mnt_idmap ., struct dentry' include/linux/fs.h))
|
||||
ccflags-y += -DKC_GET_ACL_DENTRY
|
||||
endif
|
||||
|
||||
@@ -107,15 +107,8 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
|
||||
return acl;
|
||||
}
|
||||
|
||||
#ifdef KC_GET_ACL_DENTRY
|
||||
struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF
|
||||
struct dentry *dentry, int type)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
#else
|
||||
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
|
||||
{
|
||||
#endif
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct posix_acl *acl;
|
||||
@@ -208,15 +201,8 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_GET_ACL_DENTRY
|
||||
int scoutfs_set_acl(KC_VFS_NS_DEF
|
||||
struct dentry *dentry, struct posix_acl *acl, int type)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
#else
|
||||
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
|
||||
{
|
||||
#endif
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
LIST_HEAD(ind_locks);
|
||||
@@ -254,12 +240,7 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
|
||||
if (!IS_POSIXACL(dentry->d_inode))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
#ifdef KC_GET_ACL_DENTRY
|
||||
acl = scoutfs_get_acl(KC_VFS_INIT_NS
|
||||
dentry, type);
|
||||
#else
|
||||
acl = scoutfs_get_acl(dentry->d_inode, type);
|
||||
#endif
|
||||
if (IS_ERR(acl))
|
||||
return PTR_ERR(acl);
|
||||
if (acl == NULL)
|
||||
@@ -305,11 +286,7 @@ int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *v
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef KC_GET_ACL_DENTRY
|
||||
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
|
||||
#else
|
||||
ret = scoutfs_set_acl(dentry->d_inode, acl, type);
|
||||
#endif
|
||||
out:
|
||||
posix_acl_release(acl);
|
||||
|
||||
|
||||
@@ -1,14 +1,9 @@
|
||||
#ifndef _SCOUTFS_ACL_H_
|
||||
#define _SCOUTFS_ACL_H_
|
||||
|
||||
#ifdef KC_GET_ACL_DENTRY
|
||||
struct posix_acl *scoutfs_get_acl(KC_VFS_NS_DEF struct dentry *dentry, int type);
|
||||
int scoutfs_set_acl(KC_VFS_NS_DEF struct dentry *dentry, struct posix_acl *acl, int type);
|
||||
#else
|
||||
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
|
||||
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
|
||||
#endif
|
||||
struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
|
||||
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
|
||||
int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
|
||||
struct scoutfs_lock *lock, struct list_head *ind_locks);
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
|
||||
@@ -857,7 +857,7 @@ static int find_zone_extent(struct super_block *sb, struct scoutfs_alloc_root *r
|
||||
.zone = SCOUTFS_FREE_EXTENT_ORDER_ZONE,
|
||||
};
|
||||
struct scoutfs_extent found;
|
||||
struct scoutfs_extent ext = {0,};
|
||||
struct scoutfs_extent ext;
|
||||
u64 start;
|
||||
u64 len;
|
||||
int nr;
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
#include <linux/random.h>
|
||||
#include <linux/sched/mm.h>
|
||||
#include <linux/list_lru.h>
|
||||
#include <linux/stacktrace.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "super.h"
|
||||
@@ -81,8 +80,6 @@ struct block_private {
|
||||
struct page *page;
|
||||
void *virt;
|
||||
};
|
||||
unsigned int stack_len;
|
||||
unsigned long stack[10];
|
||||
};
|
||||
|
||||
#define TRACE_BLOCK(which, bp) \
|
||||
@@ -103,17 +100,7 @@ static __le32 block_calc_crc(struct scoutfs_block_header *hdr, u32 size)
|
||||
return cpu_to_le32(calc);
|
||||
}
|
||||
|
||||
static noinline void save_block_stack(struct block_private *bp)
|
||||
{
|
||||
bp->stack_len = stack_trace_save(bp->stack, ARRAY_SIZE(bp->stack), 2);
|
||||
}
|
||||
|
||||
static void print_block_stack(struct block_private *bp)
|
||||
{
|
||||
stack_trace_print(bp->stack, bp->stack_len, 1);
|
||||
}
|
||||
|
||||
static noinline struct block_private *block_alloc(struct super_block *sb, u64 blkno)
|
||||
static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
|
||||
{
|
||||
struct block_private *bp;
|
||||
unsigned int nofs_flags;
|
||||
@@ -169,7 +156,6 @@ static noinline struct block_private *block_alloc(struct super_block *sb, u64 bl
|
||||
atomic_set(&bp->io_count, 0);
|
||||
|
||||
TRACE_BLOCK(allocate, bp);
|
||||
save_block_stack(bp);
|
||||
|
||||
out:
|
||||
if (!bp)
|
||||
@@ -1127,19 +1113,6 @@ static unsigned long block_scan_objects(struct shrinker *shrink, struct shrink_c
|
||||
return freed;
|
||||
}
|
||||
|
||||
static enum lru_status dump_lru_block(struct list_head *item, struct list_lru_one *list,
|
||||
void *cb_arg)
|
||||
{
|
||||
struct block_private *bp = container_of(item, struct block_private, lru_head);
|
||||
|
||||
printk("blkno %llu refcount 0x%x io_count %d bits 0x%lx\n",
|
||||
bp->bl.blkno, atomic_read(&bp->refcount), atomic_read(&bp->io_count),
|
||||
bp->bits);
|
||||
print_block_stack(bp);
|
||||
|
||||
return LRU_SKIP;
|
||||
}
|
||||
|
||||
/*
|
||||
* Called during shutdown with no other users. The isolating walk must
|
||||
* find blocks on the lru that only have references for presence on the
|
||||
@@ -1149,19 +1122,11 @@ static void block_shrink_all(struct super_block *sb)
|
||||
{
|
||||
DECLARE_BLOCK_INFO(sb, binf);
|
||||
DECLARE_ISOLATE_ARGS(sb, ia);
|
||||
long count;
|
||||
|
||||
count = DIV_ROUND_UP(list_lru_count(&binf->lru), 128) * 2;
|
||||
do {
|
||||
kc_list_lru_walk(&binf->lru, isolate_lru_block, &ia, 128);
|
||||
shrink_dispose_blocks(sb, &ia.dispose);
|
||||
} while (list_lru_count(&binf->lru) > 0 && --count > 0);
|
||||
|
||||
count = list_lru_count(&binf->lru);
|
||||
if (count > 0) {
|
||||
scoutfs_err(sb, "failed to isolate/dispose %ld blocks", count);
|
||||
kc_list_lru_walk(&binf->lru, dump_lru_block, sb, count);
|
||||
}
|
||||
} while (list_lru_count(&binf->lru) > 0);
|
||||
}
|
||||
|
||||
struct sm_block_completion {
|
||||
|
||||
356
kmod/src/check.c
356
kmod/src/check.c
@@ -1,356 +0,0 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/vmalloc.h>
|
||||
#include <linux/bitmap.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "format.h"
|
||||
#include "block.h"
|
||||
#include "msg.h"
|
||||
#include "avl.h"
|
||||
#include "check.h"
|
||||
|
||||
struct bit_map {
|
||||
unsigned long *addr;
|
||||
long size;
|
||||
long bytes;
|
||||
};
|
||||
|
||||
static bool enabled = true;
|
||||
|
||||
#define warn_once_disable(sb, cond, fmt, args...) \
|
||||
({ \
|
||||
bool cond_ = (cond); \
|
||||
static bool warned_ = false; \
|
||||
\
|
||||
if (cond_ && !warned_) { \
|
||||
scoutfs_err(sb, "check: " fmt, ##args); \
|
||||
warned_ = true; \
|
||||
enabled = false; \
|
||||
} \
|
||||
\
|
||||
cond_; \
|
||||
})
|
||||
|
||||
static void check_blkno(struct super_block *sb, struct bit_map *map, long nr)
|
||||
{
|
||||
if (nr != 0 && !warn_once_disable(sb, nr < 0 || nr >= map->size,
|
||||
"nr %ld outside map->size %ld", nr, map->size))
|
||||
warn_once_disable(sb, test_and_set_bit(nr, map->addr),
|
||||
"nr %ld already set", nr);
|
||||
}
|
||||
|
||||
static void check_extent(struct super_block *sb, struct bit_map *map, u64 start, u64 len)
|
||||
{
|
||||
unsigned long nr;
|
||||
|
||||
if (!warn_once_disable(sb, start >= map->size || len > map->size ||
|
||||
(start + len) > map->size,
|
||||
"start %llu len %llu oustdie map->size %ld",
|
||||
start, len, map->size)) {
|
||||
|
||||
nr = find_next_bit(map->addr, map->size, start);
|
||||
warn_once_disable(sb, nr < start + len,
|
||||
"start %llu len %llu has bits already set, first %lu",
|
||||
start, len, nr);
|
||||
|
||||
bitmap_set(map->addr, start, len);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_block_ref(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_block_ref *ref)
|
||||
{
|
||||
check_blkno(sb, map, le64_to_cpu(ref->blkno));
|
||||
}
|
||||
|
||||
/*
|
||||
* As long as we're not handling errors, we can have this return the
|
||||
* pointer to the block data if it was read successfully. Everything
|
||||
* else returns null and the caller backs off.
|
||||
*/
|
||||
static void *read_block_ref(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_block_ref *ref, u32 magic,
|
||||
struct scoutfs_block **bl_ret)
|
||||
{
|
||||
check_block_ref(sb, map, ref);
|
||||
|
||||
if (ref->blkno != 0 && scoutfs_block_read_ref(sb, ref, magic, bl_ret) == 0)
|
||||
return (*bl_ret)->data;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* returns false if caller should stop iterating */
|
||||
typedef bool (*check_btree_item_cb)(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_key *key, void *val, u16 val_len);
|
||||
|
||||
/*
|
||||
* We walk the items in key order via the avl so that the item callbacks
|
||||
* can have us stop iterating based on their knowledge of key ordering.
|
||||
*/
|
||||
static void check_btree_block_ref(struct super_block *sb, struct bit_map *map,
|
||||
u8 level, struct scoutfs_block_ref *ref,
|
||||
check_btree_item_cb item_cb)
|
||||
{
|
||||
struct scoutfs_block *bl = NULL;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_avl_node *node;
|
||||
void *val;
|
||||
u16 val_off;
|
||||
u16 val_len;
|
||||
|
||||
if (!(bt = read_block_ref(sb, map, ref, SCOUTFS_BLOCK_MAGIC_BTREE, &bl)))
|
||||
return;
|
||||
|
||||
if (bt->level != level)
|
||||
goto out;
|
||||
|
||||
for (node = scoutfs_avl_first(&bt->item_root);
|
||||
node != NULL;
|
||||
node = scoutfs_avl_next(&bt->item_root, node)) {
|
||||
item = container_of(node, struct scoutfs_btree_item, node);
|
||||
|
||||
val_off = le16_to_cpu(item->val_off);
|
||||
val_len = le16_to_cpu(item->val_len);
|
||||
val = (void *)bt + val_off;
|
||||
|
||||
if (bt->level > 0)
|
||||
check_btree_block_ref(sb, map, bt->level - 1, val, item_cb);
|
||||
else if (item_cb && !item_cb(sb, map, &item->key, val, val_len))
|
||||
break;
|
||||
}
|
||||
out:
|
||||
scoutfs_block_put(sb, bl);
|
||||
}
|
||||
|
||||
static void check_btree_root(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_btree_root *root, check_btree_item_cb item_cb)
|
||||
{
|
||||
if (root->height > 0)
|
||||
check_btree_block_ref(sb, map, root->height - 1, &root->ref, item_cb);
|
||||
}
|
||||
|
||||
static bool check_alloc_extent_item(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_key *key, void *val, u16 val_len)
|
||||
{
|
||||
/* XXX only checking primary blkno items */
|
||||
if (key->sk_zone == SCOUTFS_FREE_EXTENT_BLKNO_ZONE) {
|
||||
check_extent(sb, map, le64_to_cpu(key->skfb_end) - le64_to_cpu(key->skfb_len) + 1,
|
||||
le64_to_cpu(key->skfb_len));
|
||||
return true;
|
||||
}
|
||||
|
||||
/* otherwise stop iterating over items */
|
||||
return false;
|
||||
}
|
||||
|
||||
static void check_alloc_root(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_alloc_root *root)
|
||||
{
|
||||
check_btree_root(sb, map, &root->root, check_alloc_extent_item);
|
||||
}
|
||||
|
||||
static void check_alloc_list_block_ref(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_block_ref *caller_ref)
|
||||
{
|
||||
struct scoutfs_alloc_list_block *lblk;
|
||||
struct scoutfs_block_ref ref;
|
||||
struct scoutfs_block *bl;
|
||||
u32 start;
|
||||
u32 nr;
|
||||
u32 i;
|
||||
|
||||
ref = *caller_ref;
|
||||
|
||||
while ((lblk = read_block_ref(sb, map, &ref, SCOUTFS_BLOCK_MAGIC_ALLOC_LIST, &bl))) {
|
||||
|
||||
start = le32_to_cpu(lblk->start);
|
||||
nr = le32_to_cpu(lblk->nr);
|
||||
|
||||
/* could sort and combine into extents */
|
||||
for (i = 0; i < nr; i++)
|
||||
check_blkno(sb, map, le64_to_cpu(lblk->blknos[start + i]));
|
||||
|
||||
ref = lblk->next;
|
||||
scoutfs_block_put(sb, bl);
|
||||
}
|
||||
}
|
||||
|
||||
static void check_alloc_list_head(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_alloc_list_head *lhead)
|
||||
{
|
||||
check_alloc_list_block_ref(sb, map, &lhead->ref);
|
||||
}
|
||||
|
||||
static bool check_log_merge_item(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_key *key, void *val, u16 val_len)
|
||||
{
|
||||
struct scoutfs_log_merge_request *req;
|
||||
struct scoutfs_log_merge_complete *comp;
|
||||
struct scoutfs_log_merge_freeing *fr;
|
||||
|
||||
switch(key->sk_zone) {
|
||||
case SCOUTFS_LOG_MERGE_REQUEST_ZONE:
|
||||
req = val;
|
||||
check_alloc_list_head(sb, map, &req->meta_avail);
|
||||
check_alloc_list_head(sb, map, &req->meta_freed);
|
||||
/* logs_root and root are shared refs */
|
||||
break;
|
||||
|
||||
case SCOUTFS_LOG_MERGE_COMPLETE_ZONE:
|
||||
comp = val;
|
||||
check_alloc_list_head(sb, map, &comp->meta_avail);
|
||||
check_alloc_list_head(sb, map, &comp->meta_freed);
|
||||
/* XXX merged subtree? hmm. */
|
||||
break;
|
||||
|
||||
case SCOUTFS_LOG_MERGE_FREEING_ZONE:
|
||||
fr = val;
|
||||
check_btree_root(sb, map, &fr->root, NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void check_srch_file_block_ref(struct super_block *sb, struct bit_map *map,
|
||||
u8 level, struct scoutfs_block_ref *ref)
|
||||
{
|
||||
struct scoutfs_block *bl = NULL;
|
||||
struct scoutfs_srch_parent *srp;
|
||||
int i;
|
||||
|
||||
if (level == 0) {
|
||||
check_block_ref(sb, map, ref);
|
||||
return;
|
||||
}
|
||||
|
||||
if (!(srp = read_block_ref(sb, map, ref, SCOUTFS_BLOCK_MAGIC_SRCH_PARENT, &bl)))
|
||||
return;
|
||||
|
||||
for (i = 0; i < SCOUTFS_SRCH_PARENT_REFS; i++)
|
||||
check_srch_file_block_ref(sb, map, level - 1, &srp->refs[i]);
|
||||
|
||||
scoutfs_block_put(sb, bl);
|
||||
}
|
||||
|
||||
static void check_srch_file(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_srch_file *sfl)
|
||||
{
|
||||
if (sfl->height > 0)
|
||||
check_srch_file_block_ref(sb, map, sfl->height - 1, &sfl->ref);
|
||||
}
|
||||
|
||||
static bool check_srch_item(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_key *key, void *val, u16 val_len)
|
||||
{
|
||||
struct scoutfs_srch_file *sfl;
|
||||
struct scoutfs_srch_compact *sc;
|
||||
|
||||
switch(key->sk_type) {
|
||||
case SCOUTFS_SRCH_BLOCKS_TYPE:
|
||||
case SCOUTFS_SRCH_LOG_TYPE:
|
||||
sfl = val;
|
||||
check_srch_file(sb, map, sfl);
|
||||
break;
|
||||
case SCOUTFS_SRCH_PENDING_TYPE:
|
||||
case SCOUTFS_SRCH_BUSY_TYPE:
|
||||
sc = val;
|
||||
check_alloc_list_head(sb, map, &sc->meta_avail);
|
||||
check_alloc_list_head(sb, map, &sc->meta_freed);
|
||||
check_srch_file(sb, map, &sc->out);
|
||||
break;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool check_log_trees_item(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_key *key, void *val, u16 val_len)
|
||||
{
|
||||
struct scoutfs_log_trees *lt = val;
|
||||
|
||||
check_alloc_list_head(sb, map, <->meta_avail);
|
||||
check_alloc_list_head(sb, map, <->meta_freed);
|
||||
check_btree_root(sb, map, <->item_root, NULL);
|
||||
check_block_ref(sb, map, <->bloom_ref);
|
||||
check_btree_root(sb, map, <->data_avail.root, NULL);
|
||||
check_btree_root(sb, map, <->data_freed.root, NULL);
|
||||
check_srch_file(sb, map, <->srch_file);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static void check_super(struct super_block *sb, struct bit_map *map,
|
||||
struct scoutfs_super_block *super)
|
||||
{
|
||||
check_alloc_root(sb, map, &super->meta_alloc[0]);
|
||||
check_alloc_root(sb, map, &super->meta_alloc[1]);
|
||||
check_btree_root(sb, map, &super->data_alloc.root, NULL);
|
||||
check_alloc_list_head(sb, map, &super->server_meta_avail[0]);
|
||||
check_alloc_list_head(sb, map, &super->server_meta_avail[1]);
|
||||
check_alloc_list_head(sb, map, &super->server_meta_freed[0]);
|
||||
check_alloc_list_head(sb, map, &super->server_meta_freed[1]);
|
||||
check_btree_root(sb, map, &super->fs_root, NULL);
|
||||
check_btree_root(sb, map, &super->logs_root, check_log_trees_item);
|
||||
check_btree_root(sb, map, &super->log_merge, check_log_merge_item);
|
||||
check_btree_root(sb, map, &super->mounted_clients, NULL);
|
||||
check_btree_root(sb, map, &super->srch_root, check_srch_item);
|
||||
}
|
||||
|
||||
static void check_map(struct super_block *sb, struct bit_map *map)
|
||||
{
|
||||
unsigned long nr = find_next_zero_bit(map->addr, map->size, 0);
|
||||
|
||||
warn_once_disable(sb, nr < map->size,
|
||||
"final map has missing bits, first %lu", nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called while the persistent block structures are stable.
|
||||
* While we might have to drop stale cache as we read these blocks, we
|
||||
* should be able to walk stable block references from the super.
|
||||
*/
|
||||
void scoutfs_check_meta_refs(struct super_block *sb, struct scoutfs_super_block *super)
|
||||
{
|
||||
static struct bit_map map = {NULL,};
|
||||
unsigned long bytes;
|
||||
u64 size;
|
||||
|
||||
if (!enabled)
|
||||
return;
|
||||
|
||||
size = le64_to_cpu(super->total_meta_blocks);
|
||||
|
||||
if (warn_once_disable(sb, size <= SCOUTFS_META_DEV_START_BLKNO,
|
||||
"total_meta %llu too small", size) ||
|
||||
warn_once_disable(sb, size > LONG_MAX,
|
||||
"total_meta %llu too large", size))
|
||||
return;
|
||||
|
||||
bytes = DIV_ROUND_UP(size, 8);
|
||||
if (size != map.size) {
|
||||
if (map.addr) {
|
||||
vfree(map.addr);
|
||||
map.addr = NULL;
|
||||
}
|
||||
|
||||
map.addr = vmalloc(bytes);
|
||||
if (warn_once_disable(sb, !map.addr, "couldn't alloc %lu byte vmalloc", bytes))
|
||||
return;
|
||||
|
||||
map.size = size;
|
||||
}
|
||||
|
||||
memset(map.addr, 0, bytes);
|
||||
/* initial large block numbers used by padding and 4k super and quorum blocks */
|
||||
bitmap_set(map.addr, 0, SCOUTFS_META_DEV_START_BLKNO);
|
||||
|
||||
check_super(sb, &map, super);
|
||||
check_map(sb, &map);
|
||||
|
||||
if (!enabled)
|
||||
panic("found inconsistent meta refs");
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
#ifndef _SCOUTFS_CHECK_H_
|
||||
#define _SCOUTFS_CHECK_H_
|
||||
|
||||
void scoutfs_check_meta_refs(struct super_block *sb, struct scoutfs_super_block *super);
|
||||
|
||||
#endif
|
||||
@@ -435,8 +435,8 @@ static int lookup_mounted_client_item(struct super_block *sb, u64 rid)
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
kfree(super);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -2053,9 +2053,6 @@ const struct inode_operations scoutfs_dir_iops = {
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
#ifdef KC_GET_ACL_DENTRY
|
||||
.set_acl = scoutfs_set_acl,
|
||||
#endif
|
||||
.symlink = scoutfs_symlink,
|
||||
.permission = scoutfs_permission,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
|
||||
@@ -150,9 +150,6 @@ static const struct inode_operations scoutfs_file_iops = {
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
#ifdef KC_GET_ACL_DENTRY
|
||||
.set_acl = scoutfs_set_acl,
|
||||
#endif
|
||||
.fiemap = scoutfs_data_fiemap,
|
||||
};
|
||||
|
||||
@@ -166,9 +163,6 @@ static const struct inode_operations scoutfs_special_iops = {
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
#ifdef KC_GET_ACL_DENTRY
|
||||
.set_acl = scoutfs_set_acl,
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -2194,7 +2188,7 @@ int scoutfs_inode_walk_writeback(struct super_block *sb, bool write)
|
||||
struct scoutfs_inode_info *si;
|
||||
struct scoutfs_inode_info *tmp;
|
||||
struct inode *inode;
|
||||
int ret = 0;
|
||||
int ret;
|
||||
|
||||
spin_lock(&inf->writeback_lock);
|
||||
|
||||
|
||||
@@ -954,9 +954,6 @@ static int copy_alloc_detail_to_user(struct super_block *sb, void *arg,
|
||||
if (args->copied == args->nr)
|
||||
return -EOVERFLOW;
|
||||
|
||||
/* .type and .pad need clearing */
|
||||
memset(&ade, 0, sizeof(struct scoutfs_ioctl_alloc_detail_entry));
|
||||
|
||||
ade.blocks = blocks;
|
||||
ade.id = id;
|
||||
ade.meta = !!meta;
|
||||
@@ -1372,7 +1369,7 @@ static long scoutfs_ioc_get_referring_entries(struct file *file, unsigned long a
|
||||
ent.d_type = bref->d_type;
|
||||
ent.name_len = name_len;
|
||||
|
||||
if (copy_to_user(uent, &ent, offsetof(struct scoutfs_ioctl_dirent, name[0])) ||
|
||||
if (copy_to_user(uent, &ent, sizeof(struct scoutfs_ioctl_dirent)) ||
|
||||
copy_to_user(&uent->name[0], bref->dent.name, name_len) ||
|
||||
put_user('\0', &uent->name[name_len])) {
|
||||
ret = -EFAULT;
|
||||
|
||||
@@ -86,8 +86,6 @@ struct item_cache_info {
|
||||
/* often walked, but per-cpu refs are fast path */
|
||||
rwlock_t rwlock;
|
||||
struct rb_root pg_root;
|
||||
/* stop readers from caching stale items behind reclaimed cleaned written items */
|
||||
u64 read_dirty_barrier;
|
||||
|
||||
/* page-granular modification by writers, then exclusive to commit */
|
||||
spinlock_t dirty_lock;
|
||||
@@ -98,6 +96,9 @@ struct item_cache_info {
|
||||
spinlock_t lru_lock;
|
||||
struct list_head lru_list;
|
||||
unsigned long lru_pages;
|
||||
|
||||
/* stop readers from caching stale items behind reclaimed cleaned written items */
|
||||
atomic64_t read_dirty_barrier;
|
||||
};
|
||||
|
||||
#define DECLARE_ITEM_CACHE_INFO(sb, name) \
|
||||
@@ -1430,9 +1431,7 @@ static int read_pages(struct super_block *sb, struct item_cache_info *cinf,
|
||||
pg->end = lock->end;
|
||||
rbtree_insert(&pg->node, NULL, &root.rb_node, &root);
|
||||
|
||||
read_lock(&cinf->rwlock);
|
||||
rdbar = cinf->read_dirty_barrier;
|
||||
read_unlock(&cinf->rwlock);
|
||||
rdbar = atomic64_read(&cinf->read_dirty_barrier);
|
||||
|
||||
start = lock->start;
|
||||
end = lock->end;
|
||||
@@ -1471,18 +1470,19 @@ static int read_pages(struct super_block *sb, struct item_cache_info *cinf,
|
||||
retry:
|
||||
write_lock(&cinf->rwlock);
|
||||
|
||||
/* can't insert if write has cleaned since we read */
|
||||
if (cinf->read_dirty_barrier != rdbar) {
|
||||
scoutfs_inc_counter(sb, item_read_pages_barrier);
|
||||
ret = -ESTALE;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
while ((rd = first_page(&root))) {
|
||||
|
||||
pg = page_rbtree_walk(sb, &cinf->pg_root, &rd->start, &rd->end,
|
||||
NULL, NULL, &par, &pnode);
|
||||
if (!pg) {
|
||||
/* can't insert if write is cleaning (write_lock is read barrier) */
|
||||
if (atomic64_read(&cinf->read_dirty_barrier) != rdbar) {
|
||||
scoutfs_inc_counter(sb, item_read_pages_barrier);
|
||||
ret = -ESTALE;
|
||||
break;
|
||||
}
|
||||
|
||||
/* insert read pages that don't intersect */
|
||||
rbtree_erase(&rd->node, &root);
|
||||
rbtree_insert(&rd->node, par, pnode, &cinf->pg_root);
|
||||
@@ -1515,9 +1515,6 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
unlock:
|
||||
write_unlock(&cinf->rwlock);
|
||||
|
||||
out:
|
||||
@@ -2361,10 +2358,9 @@ int scoutfs_item_write_done(struct super_block *sb)
|
||||
struct cached_item *tmp;
|
||||
struct cached_page *pg;
|
||||
|
||||
/* don't let read_pages miss written+cleaned items */
|
||||
write_lock(&cinf->rwlock);
|
||||
cinf->read_dirty_barrier++;
|
||||
write_unlock(&cinf->rwlock);
|
||||
/* don't let read_pages insert possibly stale items */
|
||||
atomic64_inc(&cinf->read_dirty_barrier);
|
||||
smp_mb__after_atomic();
|
||||
|
||||
spin_lock(&cinf->dirty_lock);
|
||||
while ((pg = list_first_entry_or_null(&cinf->dirty_list, struct cached_page, dirty_head))) {
|
||||
@@ -2619,6 +2615,7 @@ int scoutfs_item_setup(struct super_block *sb)
|
||||
atomic_set(&cinf->dirty_pages, 0);
|
||||
spin_lock_init(&cinf->lru_lock);
|
||||
INIT_LIST_HEAD(&cinf->lru_list);
|
||||
atomic64_set(&cinf->read_dirty_barrier, 0);
|
||||
|
||||
cinf->pcpu_pages = alloc_percpu(struct item_percpu_pages);
|
||||
if (!cinf->pcpu_pages)
|
||||
|
||||
@@ -263,11 +263,6 @@ typedef unsigned int blk_opf_t;
|
||||
#define kc__vmalloc __vmalloc
|
||||
#endif
|
||||
|
||||
#ifdef KC_VFS_METHOD_MNT_IDMAP_ARG
|
||||
#define KC_VFS_NS_DEF struct mnt_idmap *mnt_idmap,
|
||||
#define KC_VFS_NS mnt_idmap,
|
||||
#define KC_VFS_INIT_NS &nop_mnt_idmap,
|
||||
#else
|
||||
#ifdef KC_VFS_METHOD_USER_NAMESPACE_ARG
|
||||
#define KC_VFS_NS_DEF struct user_namespace *mnt_user_ns,
|
||||
#define KC_VFS_NS mnt_user_ns,
|
||||
@@ -277,7 +272,6 @@ typedef unsigned int blk_opf_t;
|
||||
#define KC_VFS_NS
|
||||
#define KC_VFS_INIT_NS
|
||||
#endif
|
||||
#endif /* KC_VFS_METHOD_MNT_IDMAP_ARG */
|
||||
|
||||
#ifdef KC_BIO_ALLOC_DEV_OPF_ARGS
|
||||
#define kc_bio_alloc bio_alloc
|
||||
@@ -463,30 +457,4 @@ static inline void list_lru_isolate_move(struct list_lru_one *list, struct list_
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KC_STACK_TRACE_SAVE
|
||||
#include <linux/stacktrace.h>
|
||||
static inline unsigned int stack_trace_save(unsigned long *store, unsigned int size,
|
||||
unsigned int skipnr)
|
||||
{
|
||||
struct stack_trace trace = {
|
||||
.entries = store,
|
||||
.max_entries = size,
|
||||
.skip = skipnr,
|
||||
};
|
||||
|
||||
save_stack_trace(&trace);
|
||||
return trace.nr_entries;
|
||||
}
|
||||
|
||||
static inline void stack_trace_print(unsigned long *entries, unsigned int nr_entries, int spaces)
|
||||
{
|
||||
struct stack_trace trace = {
|
||||
.entries = entries,
|
||||
.nr_entries = nr_entries,
|
||||
};
|
||||
|
||||
print_stack_trace(&trace, spaces);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1105,15 +1105,9 @@ static void scoutfs_net_listen_worker(struct work_struct *work)
|
||||
conn->notify_down,
|
||||
conn->info_size,
|
||||
conn->req_funcs, "accepted");
|
||||
/*
|
||||
* scoutfs_net_alloc_conn() can fail due to ENOMEM. If this
|
||||
* is the only thing that does so, there's no harm in trying
|
||||
* to see if kernel_accept() can get enough memory to try accepting
|
||||
* a new connection again. If that then fails with ENOMEM, it'll
|
||||
* shut down the conn anyway. So just retry here.
|
||||
*/
|
||||
if (!acc_conn) {
|
||||
sock_release(acc_sock);
|
||||
ret = -ENOMEM;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
@@ -592,7 +592,7 @@ static int handle_request(struct super_block *sb, struct omap_request *req)
|
||||
ret = 0;
|
||||
out:
|
||||
free_rids(&priv_rids);
|
||||
if ((ret < 0) && (req != NULL)) {
|
||||
if (ret < 0) {
|
||||
ret = scoutfs_server_send_omap_response(sb, req->client_rid, req->client_id,
|
||||
NULL, ret);
|
||||
free_req(req);
|
||||
|
||||
@@ -128,7 +128,7 @@ static void free_options(struct scoutfs_mount_options *opts)
|
||||
#define MIN_DATA_PREALLOC_BLOCKS 1ULL
|
||||
#define MAX_DATA_PREALLOC_BLOCKS ((unsigned long long)SCOUTFS_BLOCK_SM_MAX)
|
||||
|
||||
#define DEFAULT_TCP_KEEPALIVE_TIMEOUT_MS (60 * MSEC_PER_SEC)
|
||||
#define DEFAULT_TCP_KEEPALIVE_TIMEOUT_MS (10 * MSEC_PER_SEC)
|
||||
|
||||
static void init_default_options(struct scoutfs_mount_options *opts)
|
||||
{
|
||||
|
||||
@@ -507,10 +507,10 @@ static int update_quorum_block(struct super_block *sb, int event, u64 term, bool
|
||||
set_quorum_block_event(sb, &blk, event, term);
|
||||
ret = write_quorum_block(sb, blkno, &blk);
|
||||
if (ret < 0)
|
||||
scoutfs_err(sb, "error %d writing quorum block %llu after updating event %d term %llu",
|
||||
scoutfs_err(sb, "error %d reading quorum block %llu to update event %d term %llu",
|
||||
ret, blkno, event, term);
|
||||
} else {
|
||||
scoutfs_err(sb, "error %d reading quorum block %llu to update event %d term %llu",
|
||||
scoutfs_err(sb, "error %d writing quorum block %llu after updating event %d term %llu",
|
||||
ret, blkno, event, term);
|
||||
}
|
||||
|
||||
@@ -713,6 +713,8 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
struct quorum_status qst = {0,};
|
||||
struct hb_recording hbr;
|
||||
bool record_hb;
|
||||
bool recv_failed;
|
||||
bool initializing = true;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
@@ -745,6 +747,8 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
|
||||
update_show_status(qinf, &qst);
|
||||
|
||||
recv_failed = false;
|
||||
|
||||
ret = recv_msg(sb, &msg, qst.timeout);
|
||||
if (ret < 0) {
|
||||
if (ret != -ETIMEDOUT && ret != -EAGAIN) {
|
||||
@@ -752,6 +756,9 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
scoutfs_inc_counter(sb, quorum_recv_error);
|
||||
goto out;
|
||||
}
|
||||
|
||||
recv_failed = true;
|
||||
|
||||
msg.type = SCOUTFS_QUORUM_MSG_INVALID;
|
||||
ret = 0;
|
||||
}
|
||||
@@ -809,13 +816,13 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
|
||||
/* followers and candidates start new election on timeout */
|
||||
if (qst.role != LEADER &&
|
||||
msg.type == SCOUTFS_QUORUM_MSG_INVALID &&
|
||||
(initializing || recv_failed) &&
|
||||
ktime_after(ktime_get(), qst.timeout)) {
|
||||
/* .. but only if their server has stopped */
|
||||
if (!scoutfs_server_is_down(sb)) {
|
||||
qst.timeout = election_timeout();
|
||||
scoutfs_inc_counter(sb, quorum_candidate_server_stopping);
|
||||
continue;
|
||||
goto again;
|
||||
}
|
||||
|
||||
qst.role = CANDIDATE;
|
||||
@@ -952,6 +959,9 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
}
|
||||
|
||||
record_hb_delay(sb, qinf, &hbr, record_hb, qst.role);
|
||||
|
||||
again:
|
||||
initializing = false;
|
||||
}
|
||||
|
||||
update_show_status(qinf, &qst);
|
||||
@@ -970,10 +980,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
}
|
||||
|
||||
/* record that this slot no longer has an active quorum */
|
||||
err = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_END, qst.term, true);
|
||||
if (err < 0 && ret == 0)
|
||||
ret = err;
|
||||
|
||||
update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_END, qst.term, true);
|
||||
out:
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "quorum service saw error %d, shutting down. This mount is no longer participating in quorum. It should be remounted to restore service.",
|
||||
@@ -1062,7 +1069,7 @@ static char *role_str(int role)
|
||||
[LEADER] = "leader",
|
||||
};
|
||||
|
||||
if (role < 0 || role >= ARRAY_SIZE(roles) || !roles[role])
|
||||
if (role < 0 || role > ARRAY_SIZE(roles) || !roles[role])
|
||||
return "invalid";
|
||||
|
||||
return roles[role];
|
||||
|
||||
@@ -2134,7 +2134,7 @@ static int server_srch_commit_compact(struct super_block *sb,
|
||||
&super->srch_root, rid, sc,
|
||||
&av, &fr);
|
||||
mutex_unlock(&server->srch_mutex);
|
||||
if (ret < 0)
|
||||
if (ret < 0) /* XXX very bad, leaks allocators */
|
||||
goto apply;
|
||||
|
||||
/* reclaim allocators if they were set by _srch_commit_ */
|
||||
@@ -2144,10 +2144,10 @@ static int server_srch_commit_compact(struct super_block *sb,
|
||||
scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri,
|
||||
server->other_freed, &fr);
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
WARN_ON(ret < 0); /* XXX leaks allocators */
|
||||
apply:
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
out:
|
||||
WARN_ON(ret < 0); /* XXX leaks allocators */
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
|
||||
}
|
||||
|
||||
|
||||
124
kmod/src/srch.c
124
kmod/src/srch.c
@@ -537,35 +537,23 @@ out:
|
||||
* the pairs cancel each other out by all readers (the second encoding
|
||||
* looks like deletion) so they aren't visible to the first/last bounds of
|
||||
* the block or file.
|
||||
*
|
||||
* We use the same entry repeatedly, so the diff between them will be empty.
|
||||
* This lets us just emit the two-byte count word, leaving the other bytes
|
||||
* as zero.
|
||||
*
|
||||
* Split the desired total len into two pieces, adding any remainder to the
|
||||
* first four-bit value.
|
||||
*/
|
||||
static void append_padded_entry(struct scoutfs_srch_file *sfl,
|
||||
struct scoutfs_srch_block *srb,
|
||||
int len)
|
||||
static int append_padded_entry(struct scoutfs_srch_file *sfl, u64 blk,
|
||||
struct scoutfs_srch_block *srb, struct scoutfs_srch_entry *sre)
|
||||
{
|
||||
int each;
|
||||
int rem;
|
||||
u16 lengths = 0;
|
||||
u8 *buf = srb->entries + le32_to_cpu(srb->entry_bytes);
|
||||
int ret;
|
||||
|
||||
each = (len - 2) >> 1;
|
||||
rem = (len - 2) & 1;
|
||||
ret = encode_entry(srb->entries + le32_to_cpu(srb->entry_bytes),
|
||||
sre, &srb->tail);
|
||||
if (ret > 0) {
|
||||
srb->tail = *sre;
|
||||
le32_add_cpu(&srb->entry_nr, 1);
|
||||
le32_add_cpu(&srb->entry_bytes, ret);
|
||||
le64_add_cpu(&sfl->entries, 1);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
lengths |= each + rem;
|
||||
lengths |= each << 4;
|
||||
|
||||
memset(buf, 0, len);
|
||||
put_unaligned_le16(lengths, buf);
|
||||
|
||||
le32_add_cpu(&srb->entry_nr, 1);
|
||||
le32_add_cpu(&srb->entry_bytes, len);
|
||||
le64_add_cpu(&sfl->entries, 1);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -576,41 +564,61 @@ static void append_padded_entry(struct scoutfs_srch_file *sfl,
|
||||
* This is called when there is a single existing entry in the block.
|
||||
* We have the entire block to work with. We encode pairs of matching
|
||||
* entries. This hides them from readers (both searches and merging) as
|
||||
* they're interpreted as creation and deletion and are deleted.
|
||||
* they're interpreted as creation and deletion and are deleted. We use
|
||||
* the existing hash value of the first entry in the block but then set
|
||||
* the inode to an impossibly large number so it doesn't interfere with
|
||||
* anything.
|
||||
*
|
||||
* For simplicity and to maintain sort ordering within the block, we reuse
|
||||
* the existing entry. This lets us skip the encoding step, because we know
|
||||
* the diff will be zero. We can zero-pad the resulting entries to hit the
|
||||
* target offset exactly.
|
||||
* To hit the specific offset we very carefully manage the amount of
|
||||
* bytes of change between fields in the entry. We know that if we
|
||||
* change all the byte of the ino and id we end up with a 20 byte
|
||||
* (2+8+8,2) encoding of the pair of entries. To have the last entry
|
||||
* start at the _SAFE_POS offset we know that the final 20 byte pair
|
||||
* encoding needs to end at 2 bytes (second entry encoding) after the
|
||||
* _SAFE_POS offset.
|
||||
*
|
||||
* Because we can't predict the exact number of entry_bytes when we start,
|
||||
* we adjust the byte count of subsequent entries until we wind up at a
|
||||
* multiple of 20 bytes away from our goal and then use that length for
|
||||
* the remaining entries.
|
||||
*
|
||||
* We could just use a single pair of unnaturally large entries to consume
|
||||
* the needed space, adjusting for an odd number of entry_bytes if necessary.
|
||||
* The use of 19 or 20 bytes for the entry pair matches what we would see with
|
||||
* real (non-zero) entries that vary from the existing entry.
|
||||
* So as we encode pairs we watch the delta of our current offset from
|
||||
* that desired final offset of 2 past _SAFE_POS. If we're a multiple
|
||||
* of 20 away then we encode the full 20 byte pairs. If we're not, then
|
||||
* we drop a byte to encode 19 bytes. That'll slowly change the offset
|
||||
* to be a multiple of 20 again while encoding large entries.
|
||||
*/
|
||||
static void pad_entries_at_safe(struct scoutfs_srch_file *sfl,
|
||||
static void pad_entries_at_safe(struct scoutfs_srch_file *sfl, u64 blk,
|
||||
struct scoutfs_srch_block *srb)
|
||||
{
|
||||
struct scoutfs_srch_entry sre;
|
||||
u32 target;
|
||||
s32 diff;
|
||||
u64 hash;
|
||||
u64 ino;
|
||||
u64 id;
|
||||
int ret;
|
||||
|
||||
hash = le64_to_cpu(srb->tail.hash);
|
||||
ino = le64_to_cpu(srb->tail.ino) | (1ULL << 62);
|
||||
id = le64_to_cpu(srb->tail.id);
|
||||
|
||||
target = SCOUTFS_SRCH_BLOCK_SAFE_BYTES + 2;
|
||||
|
||||
while ((diff = target - le32_to_cpu(srb->entry_bytes)) > 0) {
|
||||
append_padded_entry(sfl, srb, 10);
|
||||
ino ^= 1ULL << (7 * 8);
|
||||
if (diff % 20 == 0) {
|
||||
append_padded_entry(sfl, srb, 10);
|
||||
id ^= 1ULL << (7 * 8);
|
||||
} else {
|
||||
append_padded_entry(sfl, srb, 9);
|
||||
id ^= 1ULL << (6 * 8);
|
||||
}
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(diff != 0);
|
||||
sre.hash = cpu_to_le64(hash);
|
||||
sre.ino = cpu_to_le64(ino);
|
||||
sre.id = cpu_to_le64(id);
|
||||
|
||||
ret = append_padded_entry(sfl, blk, srb, &sre);
|
||||
if (ret == 0)
|
||||
ret = append_padded_entry(sfl, blk, srb, &sre);
|
||||
BUG_ON(ret != 0);
|
||||
|
||||
diff = target - le32_to_cpu(srb->entry_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -856,14 +864,14 @@ static int search_sorted_file(struct super_block *sb,
|
||||
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
|
||||
/* can only be inconsistency :/ */
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = decode_entry(srb->entries + pos, &sre, &prev);
|
||||
if (ret <= 0) {
|
||||
/* can only be inconsistency :/ */
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
pos += ret;
|
||||
prev = sre;
|
||||
@@ -1406,7 +1414,7 @@ int scoutfs_srch_commit_compact(struct super_block *sb,
|
||||
ret = -EIO;
|
||||
scoutfs_btree_put_iref(&iref);
|
||||
}
|
||||
if (ret < 0)
|
||||
if (ret < 0) /* XXX leaks allocators */
|
||||
goto out;
|
||||
|
||||
/* restore busy to pending if the operation failed */
|
||||
@@ -1426,8 +1434,10 @@ int scoutfs_srch_commit_compact(struct super_block *sb,
|
||||
/* update file references if we finished compaction (!deleting) */
|
||||
if (!(res->flags & SCOUTFS_SRCH_COMPACT_FLAG_DELETE)) {
|
||||
ret = commit_files(sb, alloc, wri, root, res);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
/* XXX we can't commit, shutdown? */
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* transition flags for deleting input files */
|
||||
for (i = 0; i < res->nr; i++) {
|
||||
@@ -1454,7 +1464,7 @@ update:
|
||||
le64_to_cpu(pending->id), 0);
|
||||
ret = scoutfs_btree_insert(sb, alloc, wri, root, &key,
|
||||
pending, sizeof(*pending));
|
||||
if (WARN_ON_ONCE(ret < 0)) /* XXX inconsistency */
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1467,6 +1477,7 @@ update:
|
||||
BUG_ON(err); /* both busy and pending present */
|
||||
}
|
||||
out:
|
||||
WARN_ON_ONCE(ret < 0); /* XXX inconsistency */
|
||||
kfree(busy);
|
||||
return ret;
|
||||
}
|
||||
@@ -1664,7 +1675,7 @@ static int kway_merge(struct super_block *sb,
|
||||
/* end sorted block on _SAFE offset for testing */
|
||||
if (bl && le32_to_cpu(srb->entry_nr) == 1 && logs_input &&
|
||||
scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) {
|
||||
pad_entries_at_safe(sfl, srb);
|
||||
pad_entries_at_safe(sfl, blk, srb);
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
blk++;
|
||||
@@ -1862,7 +1873,7 @@ static int compact_logs(struct super_block *sb,
|
||||
if (pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
|
||||
/* can only be inconsistency :/ */
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = decode_entry(srb->entries + pos, sre, &prev);
|
||||
@@ -2276,11 +2287,12 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
|
||||
} else {
|
||||
ret = -EINVAL;
|
||||
}
|
||||
if (ret < 0)
|
||||
goto commit;
|
||||
|
||||
scoutfs_alloc_prepare_commit(sb, &alloc, &wri);
|
||||
if (ret == 0)
|
||||
ret = scoutfs_alloc_prepare_commit(sb, &alloc, &wri) ?:
|
||||
scoutfs_block_writer_write(sb, &wri);
|
||||
|
||||
commit:
|
||||
/* the server won't use our partial compact if _ERROR is set */
|
||||
sc->meta_avail = alloc.avail;
|
||||
sc->meta_freed = alloc.freed;
|
||||
@@ -2297,7 +2309,7 @@ out:
|
||||
scoutfs_inc_counter(sb, srch_compact_error);
|
||||
|
||||
scoutfs_block_writer_forget_all(sb, &wri);
|
||||
queue_compact_work(srinf, sc != NULL && sc->nr > 0 && ret == 0);
|
||||
queue_compact_work(srinf, sc->nr > 0 && ret == 0);
|
||||
|
||||
kfree(sc);
|
||||
}
|
||||
|
||||
@@ -512,9 +512,9 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
|
||||
sbi = kzalloc(sizeof(struct scoutfs_sb_info), GFP_KERNEL);
|
||||
sb->s_fs_info = sbi;
|
||||
sbi->sb = sb;
|
||||
if (!sbi)
|
||||
return -ENOMEM;
|
||||
sbi->sb = sb;
|
||||
|
||||
ret = assign_random_id(sbi);
|
||||
if (ret < 0)
|
||||
|
||||
@@ -69,7 +69,6 @@ $(basename $0) options:
|
||||
-r <dir> | Specify the directory in which to store results of
|
||||
| test runs. The directory will be created if it doesn't
|
||||
| exist. Previous results will be deleted as each test runs.
|
||||
-R | shuffle the test order randomly using shuf
|
||||
-s | Skip git repo checkouts.
|
||||
-t | Enabled trace events that match the given glob argument.
|
||||
| Multiple options enable multiple globbed events.
|
||||
@@ -90,7 +89,6 @@ done
|
||||
# set some T_ defaults
|
||||
T_TRACE_DUMP="0"
|
||||
T_TRACE_PRINTK="0"
|
||||
T_PORT_START="19700"
|
||||
|
||||
# array declarations to be able to use array ops
|
||||
declare -a T_TRACE_GLOB
|
||||
@@ -166,9 +164,6 @@ while true; do
|
||||
T_RESULTS="$2"
|
||||
shift
|
||||
;;
|
||||
-R)
|
||||
T_SHUF="1"
|
||||
;;
|
||||
-s)
|
||||
T_SKIP_CHECKOUT="1"
|
||||
;;
|
||||
@@ -266,37 +261,13 @@ for e in T_META_DEVICE T_DATA_DEVICE T_EX_META_DEV T_EX_DATA_DEV T_KMOD T_RESULT
|
||||
eval $e=\"$(readlink -f "${!e}")\"
|
||||
done
|
||||
|
||||
# try and check ports, but not necessary
|
||||
T_TEST_PORT="$T_PORT_START"
|
||||
T_SCRATCH_PORT="$((T_PORT_START + 100))"
|
||||
T_DEV_PORT="$((T_PORT_START + 200))"
|
||||
read local_start local_end < /proc/sys/net/ipv4/ip_local_port_range
|
||||
if [ -n "$local_start" -a -n "$local_end" -a "$local_start" -lt "$local_end" ]; then
|
||||
if [ ! "$T_DEV_PORT" -lt "$local_start" -a ! "$T_TEST_PORT" -gt "$local_end" ]; then
|
||||
die "listening port range $T_TEST_PORT - $T_DEV_PORT is within local dynamic port range $local_start - $local_end in /proc/sys/net/ipv4/ip_local_port_range"
|
||||
fi
|
||||
fi
|
||||
|
||||
# permute sequence?
|
||||
T_SEQUENCE=sequence
|
||||
if [ -n "$T_SHUF" ]; then
|
||||
msg "shuffling test order"
|
||||
shuf sequence -o sequence.shuf
|
||||
# keep xfstests at the end
|
||||
if grep -q 'xfstests.sh' sequence.shuf ; then
|
||||
sed -i '/xfstests.sh/d' sequence.shuf
|
||||
echo "xfstests.sh" >> sequence.shuf
|
||||
fi
|
||||
T_SEQUENCE=sequence.shuf
|
||||
fi
|
||||
|
||||
# include everything by default
|
||||
test -z "$T_INCLUDE" && T_INCLUDE="-e '.*'"
|
||||
# (quickly) exclude nothing by default
|
||||
test -z "$T_EXCLUDE" && T_EXCLUDE="-e '\Zx'"
|
||||
|
||||
# eval to strip re ticks but not expand
|
||||
tests=$(grep -v "^#" $T_SEQUENCE |
|
||||
tests=$(grep -v "^#" sequence |
|
||||
eval grep "$T_INCLUDE" | eval grep -v "$T_EXCLUDE")
|
||||
test -z "$tests" && \
|
||||
die "no tests found by including $T_INCLUDE and excluding $T_EXCLUDE"
|
||||
@@ -375,7 +346,7 @@ fi
|
||||
quo=""
|
||||
if [ -n "$T_MKFS" ]; then
|
||||
for i in $(seq -0 $((T_QUORUM - 1))); do
|
||||
quo="$quo -Q $i,127.0.0.1,$((T_TEST_PORT + i))"
|
||||
quo="$quo -Q $i,127.0.0.1,$((42000 + i))"
|
||||
done
|
||||
|
||||
msg "making new filesystem with $T_QUORUM quorum members"
|
||||
|
||||
@@ -15,7 +15,7 @@ echo "== prepare devices, mount point, and logs"
|
||||
SCR="$T_TMPDIR/mnt.scratch"
|
||||
mkdir -p "$SCR"
|
||||
> $T_TMP.mount.out
|
||||
scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 \
|
||||
scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 \
|
||||
|| t_fail "mkfs failed"
|
||||
|
||||
echo "== bad devices, bad options"
|
||||
|
||||
@@ -11,7 +11,7 @@ truncate -s $sz "$T_TMP.equal"
|
||||
truncate -s $large_sz "$T_TMP.large"
|
||||
|
||||
echo "== make scratch fs"
|
||||
t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV"
|
||||
t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV"
|
||||
SCR="$T_TMPDIR/mnt.scratch"
|
||||
mkdir -p "$SCR"
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ test "$before" == "$after" || \
|
||||
# XXX this is all pretty manual, would be nice to have helpers
|
||||
echo "== make small meta fs"
|
||||
# meta device just big enough for reserves and the metadata we'll fill
|
||||
scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
|
||||
scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
|
||||
t_fail "mkfs failed"
|
||||
SCR="$T_TMPDIR/mnt.scratch"
|
||||
mkdir -p "$SCR"
|
||||
|
||||
@@ -89,7 +89,7 @@ for vers in $(seq $MIN $((MAX - 1))); do
|
||||
old_module="$builds/$vers/scoutfs.ko"
|
||||
|
||||
echo "mkfs $vers" >> "$T_TMP.log"
|
||||
t_quiet $old_scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" \
|
||||
t_quiet $old_scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV" \
|
||||
|| t_fail "mkfs $vers failed"
|
||||
|
||||
echo "mount $vers with $vers" >> "$T_TMP.log"
|
||||
|
||||
@@ -72,7 +72,7 @@ quarter_data=$(echo "$size_data / 4" | bc)
|
||||
|
||||
# XXX this is all pretty manual, would be nice to have helpers
|
||||
echo "== make initial small fs"
|
||||
scoutfs mkfs -A -f -Q 0,127.0.0.1,$T_SCRATCH_PORT -m $quarter_meta -d $quarter_data \
|
||||
scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m $quarter_meta -d $quarter_data \
|
||||
"$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
|
||||
t_fail "mkfs failed"
|
||||
SCR="$T_TMPDIR/mnt.scratch"
|
||||
|
||||
@@ -50,9 +50,9 @@ t_quiet sync
|
||||
cat << EOF > local.config
|
||||
export FSTYP=scoutfs
|
||||
export MKFS_OPTIONS="-f"
|
||||
export MKFS_TEST_OPTIONS="-Q 0,127.0.0.1,$T_TEST_PORT"
|
||||
export MKFS_SCRATCH_OPTIONS="-Q 0,127.0.0.1,$T_SCRATCH_PORT"
|
||||
export MKFS_DEV_OPTIONS="-Q 0,127.0.0.1,$T_DEV_PORT"
|
||||
export MKFS_TEST_OPTIONS="-Q 0,127.0.0.1,42000"
|
||||
export MKFS_SCRATCH_OPTIONS="-Q 0,127.0.0.1,43000"
|
||||
export MKFS_DEV_OPTIONS="-Q 0,127.0.0.1,44000"
|
||||
export TEST_DEV=$T_DB0
|
||||
export TEST_DIR=$T_M0
|
||||
export SCRATCH_META_DEV=$T_EX_META_DEV
|
||||
|
||||
@@ -137,10 +137,11 @@ connection will wait for active TCP packets, before deciding that
|
||||
the connection is dead. This setting is per-mount and only changes
|
||||
the behavior of that mount.
|
||||
.sp
|
||||
The default value of this setting is 60000msec (60s). Any precision
|
||||
The default value of this setting is 10000msec (10s). Any precision
|
||||
beyond a whole second is likely unrealistic due to the nature of
|
||||
TCP keepalive mechanisms in the Linux kernel. Valid values are any
|
||||
value higher than 3000 (3s).
|
||||
value higher than 3000 (3s). Values that are higher than 30000msec
|
||||
(30s) will likely interfere with other embedded timeout values.
|
||||
.sp
|
||||
The TCP keepalive mechanism is complex and observing a lost connection
|
||||
quickly is important to maintain cluster stability. If the local
|
||||
|
||||
Reference in New Issue
Block a user