mirror of
https://github.com/versity/scoutfs.git
synced 2026-05-01 18:35:43 +00:00
Merge pull request #43 from versity/zab/btree_merging
Zab/btree merging
This commit is contained in:
@@ -1272,9 +1272,15 @@ int scoutfs_alloc_foreach(struct super_block *sb,
|
||||
struct scoutfs_block_ref refs[2] = {{0,}};
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct scoutfs_srch_compact *sc;
|
||||
struct scoutfs_log_merge_request *lmreq;
|
||||
struct scoutfs_log_merge_complete *lmcomp;
|
||||
struct scoutfs_log_trees lt;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
int expected;
|
||||
u64 avail_tot;
|
||||
u64 freed_tot;
|
||||
u64 id;
|
||||
int ret;
|
||||
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
@@ -1381,6 +1387,57 @@ retry:
|
||||
scoutfs_key_inc(&key);
|
||||
}
|
||||
|
||||
/* log merge allocators */
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.sk_zone = SCOUTFS_LOG_MERGE_REQUEST_ZONE;
|
||||
expected = sizeof(*lmreq);
|
||||
id = 0;
|
||||
avail_tot = 0;
|
||||
freed_tot = 0;
|
||||
|
||||
for (;;) {
|
||||
ret = scoutfs_btree_next(sb, &super->log_merge, &key, &iref);
|
||||
if (ret == 0) {
|
||||
if (iref.key->sk_zone != key.sk_zone) {
|
||||
ret = -ENOENT;
|
||||
} else if (iref.val_len == expected) {
|
||||
key = *iref.key;
|
||||
if (key.sk_zone == SCOUTFS_LOG_MERGE_REQUEST_ZONE) {
|
||||
lmreq = iref.val;
|
||||
id = le64_to_cpu(lmreq->rid);
|
||||
avail_tot = le64_to_cpu(lmreq->meta_avail.total_nr);
|
||||
freed_tot = le64_to_cpu(lmreq->meta_freed.total_nr);
|
||||
} else {
|
||||
lmcomp = iref.val;
|
||||
id = le64_to_cpu(lmcomp->rid);
|
||||
avail_tot = le64_to_cpu(lmcomp->meta_avail.total_nr);
|
||||
freed_tot = le64_to_cpu(lmcomp->meta_freed.total_nr);
|
||||
}
|
||||
} else {
|
||||
ret = -EIO;
|
||||
}
|
||||
scoutfs_btree_put_iref(&iref);
|
||||
}
|
||||
if (ret == -ENOENT) {
|
||||
if (key.sk_zone == SCOUTFS_LOG_MERGE_REQUEST_ZONE) {
|
||||
memset(&key, 0, sizeof(key));
|
||||
key.sk_zone = SCOUTFS_LOG_MERGE_COMPLETE_ZONE;
|
||||
expected = sizeof(*lmcomp);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = cb(sb, arg, SCOUTFS_ALLOC_OWNER_LOG_MERGE, id, true, true, avail_tot) ?:
|
||||
cb(sb, arg, SCOUTFS_ALLOC_OWNER_LOG_MERGE, id, true, false, freed_tot);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
scoutfs_key_inc(&key);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret == -ESTALE) {
|
||||
|
||||
@@ -55,6 +55,16 @@
|
||||
#define SCOUTFS_SERVER_DATA_FILL_LO \
|
||||
(1ULL * 1024 * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
/*
|
||||
* Log merge meta allocations are only used for one request and will
|
||||
* never use more than the dirty limit.
|
||||
*/
|
||||
#define SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT (64ULL * 1024 * 1024)
|
||||
/* a few extra blocks for alloc blocks */
|
||||
#define SCOUTFS_SERVER_MERGE_FILL_TARGET \
|
||||
((SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT >> SCOUTFS_BLOCK_LG_SHIFT) + 4)
|
||||
#define SCOUTFS_SERVER_MERGE_FILL_LO SCOUTFS_SERVER_MERGE_FILL_TARGET
|
||||
|
||||
/*
|
||||
* Each of the server meta_alloc roots will try to keep a minimum amount
|
||||
* of free blocks. The server will swap roots when its current avail
|
||||
|
||||
786
kmod/src/btree.c
786
kmod/src/btree.c
@@ -83,6 +83,10 @@ enum btree_walk_flags {
|
||||
BTW_ALLOC = (1 << 3), /* allocate a new block for 0 ref, requires dirty */
|
||||
BTW_INSERT = (1 << 4), /* walking to insert, try splitting */
|
||||
BTW_DELETE = (1 << 5), /* walking to delete, try joining */
|
||||
BTW_PAR_RNG = (1 << 6), /* return range through final parent */
|
||||
BTW_GET_PAR = (1 << 7), /* get reference to final parent */
|
||||
BTW_SET_PAR = (1 << 8), /* override reference to final parent */
|
||||
BTW_SUBTREE = (1 << 9), /* root is parent subtree, return -ERANGE if split/join */
|
||||
};
|
||||
|
||||
/* total length of the value payload */
|
||||
@@ -104,16 +108,22 @@ static inline unsigned int item_bytes(struct scoutfs_btree_item *item)
|
||||
}
|
||||
|
||||
/*
|
||||
* Join blocks when they both are 1/4 full. This puts some distance
|
||||
* between the join threshold and the full threshold for splitting.
|
||||
* Blocks that just split or joined need to undergo a reasonable amount
|
||||
* of item modification before they'll split or join again.
|
||||
* Refill blocks from their siblings when they're under 1/4 full. This
|
||||
* puts some distance between the join threshold and the full threshold
|
||||
* for splitting. Blocks that just split or joined need to undergo a
|
||||
* reasonable amount of item modification before they'll split or join
|
||||
* again.
|
||||
*/
|
||||
static unsigned int join_low_watermark(void)
|
||||
{
|
||||
return (SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_btree_block)) / 4;
|
||||
}
|
||||
|
||||
static bool total_above_join_low_water(struct scoutfs_btree_block *bt)
|
||||
{
|
||||
return le16_to_cpu(bt->total_item_bytes) >= join_low_watermark();
|
||||
}
|
||||
|
||||
/*
|
||||
* return the integer percentages of total space the block could have
|
||||
* consumed by items that is currently consumed.
|
||||
@@ -512,6 +522,7 @@ static void create_item(struct scoutfs_btree_block *bt,
|
||||
|
||||
item->val_off = insert_value(bt, ptr_off(bt, item), val, val_len);
|
||||
item->val_len = cpu_to_le16(val_len);
|
||||
memset(item->__pad, 0, sizeof(item->__pad));
|
||||
|
||||
le16_add_cpu(&bt->total_item_bytes, item_bytes(item));
|
||||
}
|
||||
@@ -805,12 +816,13 @@ static int try_join(struct super_block *sb,
|
||||
struct scoutfs_btree_block *sib;
|
||||
struct scoutfs_block *sib_bl;
|
||||
struct scoutfs_block_ref *ref;
|
||||
const unsigned int lwm = join_low_watermark();
|
||||
unsigned int sib_tot;
|
||||
bool move_right;
|
||||
int to_move;
|
||||
int ret;
|
||||
|
||||
if (le16_to_cpu(bt->total_item_bytes) >= join_low_watermark())
|
||||
if (total_above_join_low_water(bt))
|
||||
return 0;
|
||||
|
||||
scoutfs_inc_counter(sb, btree_join);
|
||||
@@ -830,18 +842,23 @@ static int try_join(struct super_block *sb,
|
||||
return ret;
|
||||
sib = sib_bl->data;
|
||||
|
||||
sib_tot = le16_to_cpu(bt->total_item_bytes);
|
||||
if (sib_tot < join_low_watermark())
|
||||
/* combine if resulting block would be up to 75% full, move big chunk otherwise */
|
||||
sib_tot = le16_to_cpu(sib->total_item_bytes);
|
||||
if (sib_tot <= lwm * 2)
|
||||
to_move = sib_tot;
|
||||
else
|
||||
to_move = sib_tot - join_low_watermark();
|
||||
to_move = lwm;
|
||||
|
||||
if (le16_to_cpu(bt->mid_free_len) < to_move) {
|
||||
/* compact to make room for over-estimate of worst case move overrun */
|
||||
if (le16_to_cpu(bt->mid_free_len) <
|
||||
(to_move + item_len_bytes(SCOUTFS_BTREE_MAX_VAL_LEN))) {
|
||||
ret = compact_values(sb, bt);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
scoutfs_block_put(sb, sib_bl);
|
||||
return ret;
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
move_items(bt, sib, move_right, to_move);
|
||||
|
||||
/* update our parent's item */
|
||||
@@ -904,20 +921,21 @@ static bool bad_avl_node_off(__le16 node_off, int nr)
|
||||
* - call after leaf modification
|
||||
* - padding is zero
|
||||
*/
|
||||
static void verify_btree_block(struct super_block *sb,
|
||||
__attribute__((unused))
|
||||
static void verify_btree_block(struct super_block *sb, char *str,
|
||||
struct scoutfs_btree_block *bt, int level,
|
||||
struct scoutfs_key *start,
|
||||
bool last_ref, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end)
|
||||
{
|
||||
__le16 *buckets = leaf_item_hash_buckets(bt);
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_avl_node *node;
|
||||
char *reason = NULL;
|
||||
int first_val = 0;
|
||||
int hashed = 0;
|
||||
int end_off;
|
||||
int tot = 0;
|
||||
int i = 0;
|
||||
int j = 0;
|
||||
int nr;
|
||||
|
||||
if (bt->level != level) {
|
||||
@@ -956,8 +974,9 @@ static void verify_btree_block(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (j = 0; j < sizeof(item->__pad); j++) {
|
||||
WARN_ON_ONCE(item->__pad[j] != 0);
|
||||
if (memchr_inv(item->__pad, '\0', sizeof(item->__pad))) {
|
||||
reason = "item struct __pad isn't zero";
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (scoutfs_key_compare(&item->key, start) < 0 ||
|
||||
@@ -972,19 +991,29 @@ static void verify_btree_block(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (level > 0 && le16_to_cpu(item->val_len) !=
|
||||
sizeof(struct scoutfs_block_ref)) {
|
||||
reason = "parent item val not sizeof ref";
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le16_to_cpu(item->val_len) > SCOUTFS_BTREE_MAX_VAL_LEN) {
|
||||
reason = "bad item val len";
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le16_to_cpu(item->val_off) % SCOUTFS_BTREE_VALUE_ALIGN) {
|
||||
reason = "item value not aligned";
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (((int)le16_to_cpu(item->val_off) +
|
||||
le16_to_cpu(item->val_len)) > end_off) {
|
||||
reason = "item value outside valid";
|
||||
goto out;
|
||||
}
|
||||
|
||||
tot += sizeof(struct scoutfs_btree_item) +
|
||||
le16_to_cpu(item->val_len);
|
||||
tot += item_len_bytes(le16_to_cpu(item->val_len));
|
||||
|
||||
if (item->val_len != 0) {
|
||||
first_val = min_t(int, first_val,
|
||||
@@ -992,6 +1021,15 @@ static void verify_btree_block(struct super_block *sb,
|
||||
}
|
||||
}
|
||||
|
||||
if (last_ref && level > 0 &&
|
||||
(node = scoutfs_avl_last(&bt->item_root)) != NULL) {
|
||||
item = node_item(node);
|
||||
if (scoutfs_key_compare(&item->key, end) != 0) {
|
||||
reason = "final ref item key not range end";
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; level == 0 && i < SCOUTFS_BTREE_LEAF_ITEM_HASH_NR; i++) {
|
||||
if (buckets[i] == 0)
|
||||
continue;
|
||||
@@ -1024,17 +1062,18 @@ out:
|
||||
if (!reason)
|
||||
return;
|
||||
|
||||
printk("found btree block inconsistency: %s\n", reason);
|
||||
printk("start "SK_FMT" end "SK_FMT"\n", SK_ARG(start), SK_ARG(end));
|
||||
printk("verifying btree %s: %s\n", str, reason);
|
||||
printk("args: level %u last_ref %u start "SK_FMT" end "SK_FMT"\n",
|
||||
level, last_ref, SK_ARG(start), SK_ARG(end));
|
||||
printk("calced: i %u tot %u hashed %u fv %u\n",
|
||||
i, tot, hashed, first_val);
|
||||
|
||||
printk("hdr: crc %x magic %x fsid %llx seq %llx blkno %llu\n",
|
||||
printk("bt hdr: crc %x magic %x fsid %llx seq %llx blkno %llu\n",
|
||||
le32_to_cpu(bt->hdr.crc), le32_to_cpu(bt->hdr.magic),
|
||||
le64_to_cpu(bt->hdr.fsid), le64_to_cpu(bt->hdr.seq),
|
||||
le64_to_cpu(bt->hdr.blkno));
|
||||
printk("item_root: node %u\n", le16_to_cpu(bt->item_root.node));
|
||||
printk("nr %u tib %u mfl %u lvl %u\n",
|
||||
printk("bt: nr %u tib %u mfl %u lvl %u\n",
|
||||
le16_to_cpu(bt->nr_items), le16_to_cpu(bt->total_item_bytes),
|
||||
le16_to_cpu(bt->mid_free_len), bt->level);
|
||||
|
||||
@@ -1051,6 +1090,92 @@ out:
|
||||
BUG();
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk from the root to the leaf, verifying the blocks traversed.
|
||||
*/
|
||||
__attribute__((unused))
|
||||
static void verify_btree_walk(struct super_block *sb, char *str,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key)
|
||||
{
|
||||
struct scoutfs_avl_node *next_node;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_item *prev;
|
||||
struct scoutfs_block *bl = NULL;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_block_ref ref;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
bool last_ref;
|
||||
int level;
|
||||
int ret;
|
||||
|
||||
if (root->height == 0 && root->ref.blkno != 0) {
|
||||
WARN_ONCE(1, "invalid btree root height %u blkno %llu seq %016llx\n",
|
||||
root->height, le64_to_cpu(root->ref.blkno),
|
||||
le64_to_cpu(root->ref.seq));
|
||||
return;
|
||||
}
|
||||
|
||||
if (root->height == 0)
|
||||
return;
|
||||
|
||||
scoutfs_key_set_zeros(&start);
|
||||
scoutfs_key_set_ones(&end);
|
||||
level = root->height;
|
||||
ref = root->ref;
|
||||
/* first parent last ref isn't all ones in subtrees */
|
||||
last_ref = false;
|
||||
|
||||
while(level-- > 0) {
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
ret = get_ref_block(sb, NULL, NULL, 0, &ref, &bl);
|
||||
if (ret) {
|
||||
printk("verifying btree %s: read error %d\n",
|
||||
str, ret);
|
||||
break;
|
||||
}
|
||||
bt = bl->data;
|
||||
|
||||
verify_btree_block(sb, str, bt, level, last_ref, &start, &end);
|
||||
|
||||
if (level == 0)
|
||||
break;
|
||||
|
||||
node = scoutfs_avl_search(&bt->item_root, cmp_key_item, key,
|
||||
NULL, NULL, &next_node, NULL);
|
||||
item = node_item(node ?: next_node);
|
||||
|
||||
if (item == NULL) {
|
||||
printk("verifying btree %s: no ref item\n", str);
|
||||
printk("root: height %u blkno %llu seq %016llx\n",
|
||||
root->height, le64_to_cpu(root->ref.blkno),
|
||||
le64_to_cpu(root->ref.seq));
|
||||
printk("walk level %u start "SK_FMT" end "SK_FMT"\n",
|
||||
level, SK_ARG(&start), SK_ARG(&end));
|
||||
|
||||
printk("block: level %u blkno %llu seq %016llx\n",
|
||||
bt->level, le64_to_cpu(bt->hdr.blkno),
|
||||
le64_to_cpu(bt->hdr.seq));
|
||||
printk("key: "SK_FMT"\n", SK_ARG(key));
|
||||
BUG();
|
||||
}
|
||||
|
||||
if ((prev = prev_item(bt, item))) {
|
||||
start = *item_key(prev);
|
||||
scoutfs_key_inc(&start);
|
||||
}
|
||||
end = *item_key(item);
|
||||
|
||||
memcpy(&ref, item_val(bt, item), sizeof(ref));
|
||||
last_ref = !next_item(bt, item);
|
||||
}
|
||||
|
||||
scoutfs_block_put(sb, bl);
|
||||
}
|
||||
|
||||
struct btree_walk_key_range {
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
@@ -1082,7 +1207,8 @@ static int btree_walk(struct super_block *sb,
|
||||
int flags, struct scoutfs_key *key,
|
||||
unsigned int val_len,
|
||||
struct scoutfs_block **bl_ret,
|
||||
struct btree_walk_key_range *kr)
|
||||
struct btree_walk_key_range *kr,
|
||||
struct scoutfs_btree_root *par_root)
|
||||
{
|
||||
struct scoutfs_block *par_bl = NULL;
|
||||
struct scoutfs_block *bl = NULL;
|
||||
@@ -1098,7 +1224,9 @@ static int btree_walk(struct super_block *sb,
|
||||
unsigned int nr;
|
||||
int ret;
|
||||
|
||||
if (WARN_ON_ONCE((flags & BTW_DIRTY) && (!alloc || !wri)))
|
||||
if (WARN_ON_ONCE((flags & BTW_DIRTY) && (!alloc || !wri)) ||
|
||||
WARN_ON_ONCE((flags & BTW_PAR_RNG) && !kr) ||
|
||||
WARN_ON_ONCE((flags & (BTW_GET_PAR|BTW_SET_PAR)) && !par_root))
|
||||
return -EINVAL;
|
||||
|
||||
/* all ops come through walk and walk calls all reads */
|
||||
@@ -1125,7 +1253,14 @@ restart:
|
||||
ret = 0;
|
||||
|
||||
if (!root->height) {
|
||||
if (!(flags & BTW_INSERT)) {
|
||||
if (flags & BTW_GET_PAR) {
|
||||
memset(par_root, 0, sizeof(*par_root));
|
||||
*root = *par_root;
|
||||
ret = 0;
|
||||
} else if (flags & BTW_SET_PAR) {
|
||||
*root = *par_root;
|
||||
ret = 0;
|
||||
} else if (!(flags & BTW_INSERT)) {
|
||||
ret = -ENOENT;
|
||||
} else {
|
||||
ret = get_ref_block(sb, alloc, wri, BTW_ALLOC | BTW_DIRTY, &root->ref, &bl);
|
||||
@@ -1144,14 +1279,40 @@ restart:
|
||||
|
||||
trace_scoutfs_btree_walk(sb, root, key, flags, level, ref);
|
||||
|
||||
/* par range set by ref to last parent block */
|
||||
if (level < 2 && (flags & BTW_PAR_RNG)) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (level < 2 && (flags & BTW_GET_PAR)) {
|
||||
par_root->ref = *ref;
|
||||
par_root->height = level + 1;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
if (level < 2 && (flags & BTW_SET_PAR)) {
|
||||
if (ref == &root->ref) {
|
||||
/* single parent block is replaced, can shrink/grow */
|
||||
*root = *par_root;
|
||||
} else {
|
||||
/* subtree replacing one of parents must match height */
|
||||
if (par_root->height != level + 1) {
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
*ref = par_root->ref;
|
||||
}
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = get_ref_block(sb, alloc, wri, flags, ref, &bl);
|
||||
if (ret)
|
||||
break;
|
||||
bt = bl->data;
|
||||
|
||||
if (0 && kr)
|
||||
verify_btree_block(sb, bt, level, &kr->start, &kr->end);
|
||||
|
||||
/* XXX more aggressive block verification, before ref updates? */
|
||||
if (bt->level != level) {
|
||||
scoutfs_corruption(sb, SC_BTREE_BLOCK_LEVEL,
|
||||
@@ -1167,6 +1328,17 @@ restart:
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* join/split won't check subtree parent root, let
|
||||
* caller know when it needs to be split/join.
|
||||
*/
|
||||
if ((flags & BTW_SUBTREE) && level == 1 &&
|
||||
(!total_above_join_low_water(bt) ||
|
||||
!mid_free_item_room(bt, sizeof(struct scoutfs_block_ref)))) {
|
||||
ret = -ERANGE;
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* Splitting and joining can add or remove parents or
|
||||
* change the parent item we use to reach the child
|
||||
@@ -1292,7 +1464,7 @@ int scoutfs_btree_lookup(struct super_block *sb,
|
||||
if (WARN_ON_ONCE(iref->key))
|
||||
return -EINVAL;
|
||||
|
||||
ret = btree_walk(sb, NULL, NULL, root, 0, key, 0, &bl, NULL);
|
||||
ret = btree_walk(sb, NULL, NULL, root, 0, key, 0, &bl, NULL, NULL);
|
||||
if (ret == 0) {
|
||||
bt = bl->data;
|
||||
|
||||
@@ -1344,7 +1516,7 @@ int scoutfs_btree_insert(struct super_block *sb,
|
||||
return -EINVAL;
|
||||
|
||||
ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY | BTW_INSERT, key,
|
||||
val_len, &bl, NULL);
|
||||
val_len, &bl, NULL, NULL);
|
||||
if (ret == 0) {
|
||||
bt = bl->data;
|
||||
|
||||
@@ -1406,7 +1578,7 @@ int scoutfs_btree_update(struct super_block *sb,
|
||||
return -EINVAL;
|
||||
|
||||
ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY | BTW_INSERT, key,
|
||||
val_len, &bl, NULL);
|
||||
val_len, &bl, NULL, NULL);
|
||||
if (ret == 0) {
|
||||
bt = bl->data;
|
||||
|
||||
@@ -1448,7 +1620,7 @@ int scoutfs_btree_force(struct super_block *sb,
|
||||
return -EINVAL;
|
||||
|
||||
ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY | BTW_INSERT, key,
|
||||
val_len, &bl, NULL);
|
||||
val_len, &bl, NULL, NULL);
|
||||
if (ret == 0) {
|
||||
bt = bl->data;
|
||||
|
||||
@@ -1486,7 +1658,7 @@ int scoutfs_btree_delete(struct super_block *sb,
|
||||
scoutfs_inc_counter(sb, btree_delete);
|
||||
|
||||
ret = btree_walk(sb, alloc, wri, root, BTW_DELETE | BTW_DIRTY, key,
|
||||
0, &bl, NULL);
|
||||
0, &bl, NULL, NULL);
|
||||
if (ret == 0) {
|
||||
bt = bl->data;
|
||||
|
||||
@@ -1550,7 +1722,7 @@ static int btree_iter(struct super_block *sb,struct scoutfs_btree_root *root,
|
||||
|
||||
for (;;) {
|
||||
ret = btree_walk(sb, NULL, NULL, root, flags, &walk_key,
|
||||
0, &bl, &kr);
|
||||
0, &bl, &kr, NULL);
|
||||
if (ret < 0)
|
||||
break;
|
||||
bt = bl->data;
|
||||
@@ -1623,7 +1795,8 @@ int scoutfs_btree_dirty(struct super_block *sb,
|
||||
|
||||
scoutfs_inc_counter(sb, btree_dirty);
|
||||
|
||||
ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY, key, 0, &bl, NULL);
|
||||
ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY, key, 0, &bl,
|
||||
NULL, NULL);
|
||||
if (ret == 0) {
|
||||
bt = bl->data;
|
||||
|
||||
@@ -1659,7 +1832,7 @@ int scoutfs_btree_read_items(struct super_block *sb,
|
||||
struct scoutfs_block *bl;
|
||||
int ret;
|
||||
|
||||
ret = btree_walk(sb, NULL, NULL, root, 0, key, 0, &bl, &kr);
|
||||
ret = btree_walk(sb, NULL, NULL, root, 0, key, 0, &bl, &kr, NULL);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
bt = bl->data;
|
||||
@@ -1714,7 +1887,7 @@ int scoutfs_btree_insert_list(struct super_block *sb,
|
||||
|
||||
while (lst) {
|
||||
ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY | BTW_INSERT,
|
||||
&lst->key, lst->val_len, &bl, &kr);
|
||||
&lst->key, lst->val_len, &bl, &kr, NULL);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
bt = bl->data;
|
||||
@@ -1742,3 +1915,542 @@ int scoutfs_btree_insert_list(struct super_block *sb,
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Descend towards the leaf that would contain the key. As we arrive at
|
||||
* the last parent block, set start and end to the range of keys that
|
||||
* could be found through traversal of that last parent.
|
||||
*
|
||||
* If the tree is too short for parent blocks then the max key range
|
||||
* is returned.
|
||||
*/
|
||||
int scoutfs_btree_parent_range(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end)
|
||||
{
|
||||
struct btree_walk_key_range kr;
|
||||
int ret;
|
||||
|
||||
ret = btree_walk(sb, NULL, NULL, root, BTW_PAR_RNG, key, 0, NULL,
|
||||
&kr, NULL);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
|
||||
*start = kr.start;
|
||||
*end = kr.end;
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the caller's root as a subtree whose ref points to the
|
||||
* last parent found as we traverse towards the leaf containing the key.
|
||||
* If the tree is too small to have multiple blocks at the final parent
|
||||
* level then the caller's root will be initialized to equal full input
|
||||
* root. If the tree is empty then the par root will also be empty.
|
||||
*/
|
||||
int scoutfs_btree_get_parent(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_root *par_root)
|
||||
{
|
||||
return btree_walk(sb, NULL, NULL, root, BTW_GET_PAR, key, 0, NULL,
|
||||
NULL, par_root);
|
||||
}
|
||||
|
||||
/*
|
||||
* Dirty a path towards the leaf block containing the key. As we reach
|
||||
* the reference to the final parent block override it with the ref in
|
||||
* the caller's block. If the tree only has a single block at the final
|
||||
* parent level, or a single leaf block, then the entire tree is
|
||||
* replaced with the caller's root.
|
||||
*
|
||||
* This manages allocs and frees while dirtying blocks in the path to
|
||||
* the ref, but it doesn't account for allocating the blocks that are
|
||||
* referenced by the ref nor freeing blocks referenced by the old ref
|
||||
* that's overwritten. Keeping allocators in sync with the result of
|
||||
* the ref override is the responsibility of the caller.
|
||||
*/
|
||||
int scoutfs_btree_set_parent(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_root *par_root)
|
||||
{
|
||||
|
||||
trace_scoutfs_btree_set_parent(sb, root, key, par_root);
|
||||
|
||||
return btree_walk(sb, alloc, wri, root, BTW_DIRTY | BTW_SET_PAR,
|
||||
key, 0, NULL, NULL, par_root);
|
||||
}
|
||||
|
||||
/*
|
||||
* Descend to the leaf, making sure that all the blocks conform to the
|
||||
* balance constraints. Blocks below the low threshold will be joined.
|
||||
* This is called to split blocks that were too large for insertions,
|
||||
* but those insertions were in a distant context and we don't bother
|
||||
* communicating the val_len back here. We just try to insert a max
|
||||
* value.
|
||||
*
|
||||
* This always dirties all the way to the leaf. It could be made more
|
||||
* efficient with more btree walk flags to walk and check for blocks
|
||||
* that need balancing, and then walks that don't dirty unless they need
|
||||
* to join/split.
|
||||
*/
|
||||
int scoutfs_btree_rebalance(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key)
|
||||
{
|
||||
return btree_walk(sb, alloc, wri, root,
|
||||
BTW_DIRTY | BTW_INSERT | BTW_DELETE,
|
||||
key, SCOUTFS_BTREE_MAX_VAL_LEN, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
struct merge_pos {
|
||||
struct rb_node node;
|
||||
struct scoutfs_btree_root *root;
|
||||
struct scoutfs_key key;
|
||||
unsigned int val_len;
|
||||
u8 val[SCOUTFS_BTREE_MAX_VAL_LEN];
|
||||
};
|
||||
|
||||
/*
|
||||
* Find the next item in the mpos's root after its key and make sure
|
||||
* that it's in its sorted position in the rbtree. We're responsible
|
||||
* for freeing the mpos if we don't put it back in the pos_root. This
|
||||
* happens naturally naturally when its item_root has no more items to
|
||||
* merge.
|
||||
*/
|
||||
static int reset_mpos(struct super_block *sb, struct rb_root *pos_root,
|
||||
struct merge_pos *mpos, struct scoutfs_key *end,
|
||||
scoutfs_btree_merge_cmp_t merge_cmp)
|
||||
{
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct merge_pos *walk;
|
||||
struct rb_node *parent;
|
||||
struct rb_node **node;
|
||||
int key_cmp;
|
||||
int val_cmp;
|
||||
int ret;
|
||||
|
||||
restart:
|
||||
if (!RB_EMPTY_NODE(&mpos->node)) {
|
||||
rb_erase(&mpos->node, pos_root);
|
||||
RB_CLEAR_NODE(&mpos->node);
|
||||
}
|
||||
|
||||
/* find the next item in the root within end */
|
||||
ret = scoutfs_btree_next(sb, mpos->root, &mpos->key, &iref);
|
||||
if (ret == 0) {
|
||||
if (scoutfs_key_compare(iref.key, end) > 0) {
|
||||
ret = -ENOENT;
|
||||
} else {
|
||||
mpos->key = *iref.key;
|
||||
mpos->val_len = iref.val_len;
|
||||
memcpy(mpos->val, iref.val, iref.val_len);
|
||||
}
|
||||
scoutfs_btree_put_iref(&iref);
|
||||
}
|
||||
if (ret < 0) {
|
||||
kfree(mpos);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
rewalk:
|
||||
/* sort merge items by key then oldest to newest */
|
||||
node = &pos_root->rb_node;
|
||||
parent = NULL;
|
||||
while (*node) {
|
||||
parent = *node;
|
||||
walk = container_of(*node, struct merge_pos, node);
|
||||
|
||||
key_cmp = scoutfs_key_compare(&mpos->key, &walk->key);
|
||||
val_cmp = merge_cmp(mpos->val, mpos->val_len,
|
||||
walk->val, walk->val_len);
|
||||
|
||||
/* drop old versions of logged keys as we discover them */
|
||||
if (key_cmp == 0) {
|
||||
scoutfs_inc_counter(sb, btree_merge_drop_old);
|
||||
if (val_cmp < 0) {
|
||||
scoutfs_key_inc(&mpos->key);
|
||||
goto restart;
|
||||
} else {
|
||||
BUG_ON(val_cmp == 0);
|
||||
rb_erase(&walk->node, pos_root);
|
||||
kfree(walk);
|
||||
goto rewalk;
|
||||
}
|
||||
}
|
||||
|
||||
if ((key_cmp ?: val_cmp) < 0)
|
||||
node = &(*node)->rb_left;
|
||||
else
|
||||
node = &(*node)->rb_right;
|
||||
}
|
||||
|
||||
rb_link_node(&mpos->node, parent, node);
|
||||
rb_insert_color(&mpos->node, pos_root);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct merge_pos *first_mpos(struct rb_root *root)
|
||||
{
|
||||
struct rb_node *node = rb_first(root);
|
||||
if (node)
|
||||
return container_of(node, struct merge_pos, node);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Merge items from a number of read-only input roots into a writable
|
||||
* destination root. The order of the input roots doesn't matter, the
|
||||
* items are merged in sorted key order.
|
||||
*
|
||||
* The merge_cmp callback determines the order that the input items are
|
||||
* merged in. The is_del callback determines if a merging item should
|
||||
* be removed from the destination.
|
||||
*
|
||||
* subtree indicates that the destination root is in fact one of many
|
||||
* parent blocks and shouldn't be split or allowed to fall below the
|
||||
* join low water mark.
|
||||
*
|
||||
* drop_val indicates the initial length of the value that should be
|
||||
* dropped when merging items into destination items.
|
||||
*
|
||||
* -ERANGE is returned if the merge doesn't fully exhaust the range, due
|
||||
* to allocators running low or needing to join/split the parent.
|
||||
* *next_ret is set to the next key which hasn't been merged so that the
|
||||
* caller can retry with a new allocator and subtree.
|
||||
*/
|
||||
int scoutfs_btree_merge(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
struct scoutfs_key *next_ret,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct list_head *inputs,
|
||||
scoutfs_btree_merge_cmp_t merge_cmp,
|
||||
scoutfs_btree_merge_is_del_t merge_is_del, bool subtree,
|
||||
int drop_val, int dirty_limit, int alloc_low)
|
||||
{
|
||||
struct scoutfs_btree_root_head *rhead;
|
||||
struct rb_root pos_root = RB_ROOT;
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_block *bl = NULL;
|
||||
struct btree_walk_key_range kr;
|
||||
struct scoutfs_avl_node *par;
|
||||
struct merge_pos *mpos;
|
||||
struct merge_pos *tmp;
|
||||
int walk_val_len;
|
||||
int walk_flags;
|
||||
bool is_del;
|
||||
int cmp;
|
||||
int ret;
|
||||
|
||||
trace_scoutfs_btree_merge(sb, root, start, end);
|
||||
scoutfs_inc_counter(sb, btree_merge);
|
||||
|
||||
list_for_each_entry(rhead, inputs, head) {
|
||||
mpos = kmalloc(sizeof(*mpos), GFP_NOFS);
|
||||
if (!mpos) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
RB_CLEAR_NODE(&mpos->node);
|
||||
mpos->key = *start;
|
||||
mpos->root = &rhead->root;
|
||||
|
||||
ret = reset_mpos(sb, &pos_root, mpos, end, merge_cmp);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
walk_flags = BTW_DIRTY;
|
||||
if (subtree)
|
||||
walk_flags |= BTW_SUBTREE;
|
||||
walk_val_len = 0;
|
||||
|
||||
while ((mpos = first_mpos(&pos_root))) {
|
||||
|
||||
if (scoutfs_block_writer_dirty_bytes(sb, wri) >= dirty_limit) {
|
||||
scoutfs_inc_counter(sb, btree_merge_dirty_limit);
|
||||
ret = -ERANGE;
|
||||
*next_ret = mpos->key;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (scoutfs_alloc_meta_low(sb, alloc, alloc_low)) {
|
||||
scoutfs_inc_counter(sb, btree_merge_alloc_low);
|
||||
ret = -ERANGE;
|
||||
*next_ret = mpos->key;
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
ret = btree_walk(sb, alloc, wri, root, walk_flags,
|
||||
&mpos->key, walk_val_len, &bl, &kr, NULL);
|
||||
if (ret < 0) {
|
||||
if (ret == -ERANGE)
|
||||
*next_ret = mpos->key;
|
||||
goto out;
|
||||
}
|
||||
bt = bl->data;
|
||||
scoutfs_inc_counter(sb, btree_merge_walk);
|
||||
|
||||
for (; mpos; mpos = first_mpos(&pos_root)) {
|
||||
|
||||
/* val must have at least what we need to drop */
|
||||
if (mpos->val_len < drop_val) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* walk to new leaf if we exceed parent ref key */
|
||||
if (scoutfs_key_compare(&mpos->key, &kr.end) > 0)
|
||||
break;
|
||||
|
||||
/* see if there's an existing item */
|
||||
item = leaf_item_hash_search(sb, bt, &mpos->key);
|
||||
is_del = merge_is_del(mpos->val, mpos->val_len);
|
||||
|
||||
trace_scoutfs_btree_merge_items(sb, mpos->root,
|
||||
&mpos->key, mpos->val_len,
|
||||
item ? root : NULL,
|
||||
item ? item_key(item) : NULL,
|
||||
item ? item_val_len(item) : 0, is_del);
|
||||
|
||||
/* rewalk and split if ins/update needs room */
|
||||
if (!is_del && !mid_free_item_room(bt, mpos->val_len)) {
|
||||
walk_flags |= BTW_INSERT;
|
||||
walk_val_len = mpos->val_len;
|
||||
break;
|
||||
}
|
||||
|
||||
/* insert missing non-deletion merge items */
|
||||
if (!item && !is_del) {
|
||||
scoutfs_avl_search(&bt->item_root,
|
||||
cmp_key_item, &mpos->key,
|
||||
&cmp, &par, NULL, NULL);
|
||||
create_item(bt, &mpos->key,
|
||||
mpos->val + drop_val,
|
||||
mpos->val_len - drop_val, par, cmp);
|
||||
scoutfs_inc_counter(sb, btree_merge_insert);
|
||||
}
|
||||
|
||||
/* update existing items */
|
||||
if (item && !is_del) {
|
||||
update_item_value(bt, item,
|
||||
mpos->val + drop_val,
|
||||
mpos->val_len - drop_val);
|
||||
scoutfs_inc_counter(sb, btree_merge_update);
|
||||
}
|
||||
|
||||
/* delete if merge item was deletion */
|
||||
if (item && is_del) {
|
||||
/* rewalk and join if non-root falls under low water mark */
|
||||
if (root->ref.blkno != bt->hdr.blkno &&
|
||||
!total_above_join_low_water(bt)) {
|
||||
walk_flags |= BTW_DELETE;
|
||||
break;
|
||||
}
|
||||
delete_item(bt, item, NULL);
|
||||
scoutfs_inc_counter(sb, btree_merge_delete);
|
||||
}
|
||||
|
||||
/* reset walk args now that we're not split/join */
|
||||
walk_flags &= ~(BTW_INSERT | BTW_DELETE);
|
||||
walk_val_len = 0;
|
||||
|
||||
/* finished with this merge item */
|
||||
scoutfs_key_inc(&mpos->key);
|
||||
ret = reset_mpos(sb, &pos_root, mpos, end, merge_cmp);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
mpos = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
scoutfs_block_put(sb, bl);
|
||||
rbtree_postorder_for_each_entry_safe(mpos, tmp, &pos_root, node) {
|
||||
kfree(mpos);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Free all the blocks referenced by a btree. The btree is only read,
|
||||
* this does not update the blocks as it frees. The caller ensures that
|
||||
* these btrees aren't been modified.
|
||||
*
|
||||
* The caller's key tracks which blocks have been freed. It must be
|
||||
* initialized to zeros before the first call to start freeing blocks.
|
||||
* Once a block is freed the key is updated such that the freed block
|
||||
* will not be read again.
|
||||
*
|
||||
* Returns 0 when progress has been made successfully, which includes
|
||||
* partial progress. The key is set to all ones once we've freed all
|
||||
* the blocks.
|
||||
*
|
||||
* This works by descending to the last parent block and freeing all its
|
||||
* leaf blocks without reading them. As it descends it remembers the
|
||||
* number of parent blocks which were traversed through their final
|
||||
* child ref. If we free all the leaf blocks then all these parent
|
||||
* blocks are no longer needed and can be freed. The caller's key is
|
||||
* updated to past the subtree that we just freed and we retry the
|
||||
* descent from the root through the next set of parents to the next set
|
||||
* of leaf blocks to free.
|
||||
*/
|
||||
int scoutfs_btree_free_blocks(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_root *root, int alloc_low)
|
||||
{
|
||||
u64 blknos[SCOUTFS_BTREE_MAX_HEIGHT];
|
||||
struct scoutfs_block *bl = NULL;
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_block_ref ref;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct scoutfs_avl_node *next;
|
||||
struct scoutfs_key par_next;
|
||||
int nr_par;
|
||||
int level;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (WARN_ON_ONCE(root->height > ARRAY_SIZE(blknos)))
|
||||
return -EIO; /* XXX corruption */
|
||||
|
||||
if (root->height == 0) {
|
||||
scoutfs_key_set_ones(key);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (scoutfs_key_is_ones(key))
|
||||
return 0;
|
||||
|
||||
/* just free a single leaf block */
|
||||
if (root->height == 1) {
|
||||
ret = scoutfs_free_meta(sb, alloc, wri,
|
||||
le64_to_cpu(root->ref.blkno));
|
||||
if (ret == 0) {
|
||||
trace_scoutfs_btree_free_blocks_single(sb, root,
|
||||
le64_to_cpu(root->ref.blkno));
|
||||
scoutfs_key_set_ones(key);
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
/* start the walk at the root block */
|
||||
level = root->height - 1;
|
||||
ref = root->ref;
|
||||
scoutfs_key_set_ones(&par_next);
|
||||
nr_par = 0;
|
||||
|
||||
/* read blocks until we read the last parent */
|
||||
for (;;) {
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
ret = get_ref_block(sb, alloc, wri, 0, &ref, &bl);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
bt = bl->data;
|
||||
|
||||
node = scoutfs_avl_search(&bt->item_root, cmp_key_item,
|
||||
key, NULL, NULL, &next, NULL);
|
||||
if (node == NULL)
|
||||
node = next;
|
||||
|
||||
/* should never descend into parent with no more refs */
|
||||
if (WARN_ON_ONCE(node == NULL)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* we'll free refs in the last parent */
|
||||
if (level == 1)
|
||||
break;
|
||||
|
||||
item = node_item(node);
|
||||
next = scoutfs_avl_next(&bt->item_root, node);
|
||||
if (next) {
|
||||
/* didn't take last ref, still need parents */
|
||||
nr_par = 0;
|
||||
par_next = *item_key(item);
|
||||
scoutfs_key_inc(&par_next);
|
||||
} else {
|
||||
/* final ref, could free after all leaves */
|
||||
blknos[nr_par++] = le64_to_cpu(bt->hdr.blkno);
|
||||
}
|
||||
|
||||
memcpy(&ref, item_val(bt, item), sizeof(ref));
|
||||
level--;
|
||||
}
|
||||
|
||||
/* free all leaf block refs in last parent */
|
||||
while (node) {
|
||||
|
||||
/* make sure we can always free parents after leaves */
|
||||
if (scoutfs_alloc_meta_low(sb, alloc,
|
||||
alloc_low + nr_par + 1)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
item = node_item(node);
|
||||
memcpy(&ref, item_val(bt, item), sizeof(ref));
|
||||
|
||||
trace_scoutfs_btree_free_blocks_leaf(sb, root,
|
||||
le64_to_cpu(ref.blkno));
|
||||
ret = scoutfs_free_meta(sb, alloc, wri,
|
||||
le64_to_cpu(ref.blkno));
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
node = scoutfs_avl_next(&bt->item_root, node);
|
||||
if (node) {
|
||||
/* done with keys in child we just freed */
|
||||
*key = *item_key(item);
|
||||
scoutfs_key_inc(key);
|
||||
}
|
||||
}
|
||||
|
||||
/* now that leaves are freed, free any empty parents */
|
||||
for (i = 0; i < nr_par; i++) {
|
||||
trace_scoutfs_btree_free_blocks_parent(sb, root,
|
||||
blknos[i]);
|
||||
ret = scoutfs_free_meta(sb, alloc, wri, blknos[i]);
|
||||
BUG_ON(ret); /* checked meta low, freed should fit */
|
||||
}
|
||||
|
||||
/* restart walk past the subtree we just freed */
|
||||
*key = par_next;
|
||||
|
||||
/* but done if we just freed all parents down right spine */
|
||||
if (scoutfs_key_is_ones(&par_next)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
scoutfs_block_put(sb, bl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -82,6 +82,58 @@ int scoutfs_btree_insert_list(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_btree_item_list *lst);
|
||||
|
||||
int scoutfs_btree_parent_range(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end);
|
||||
int scoutfs_btree_get_parent(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_root *par_root);
|
||||
int scoutfs_btree_set_parent(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_root *par_root);
|
||||
int scoutfs_btree_rebalance(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key);
|
||||
|
||||
/* merge input is a list of roots */
|
||||
struct scoutfs_btree_root_head {
|
||||
struct list_head head;
|
||||
struct scoutfs_btree_root root;
|
||||
};
|
||||
/*
|
||||
* Compare the values of merge input items whose keys are equal to
|
||||
* determine their merge order.
|
||||
*/
|
||||
typedef int (*scoutfs_btree_merge_cmp_t)(void *a_val, int a_val_len,
|
||||
void *b_val, int b_val_len);
|
||||
/* whether merging item should be removed from destination */
|
||||
typedef bool (*scoutfs_btree_merge_is_del_t)(void *val, int val_len);
|
||||
int scoutfs_btree_merge(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
struct scoutfs_key *next_ret,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct list_head *input_list,
|
||||
scoutfs_btree_merge_cmp_t merge_cmp,
|
||||
scoutfs_btree_merge_is_del_t merge_is_del, bool subtree,
|
||||
int drop_val, int dirty_limit, int alloc_low);
|
||||
|
||||
int scoutfs_btree_free_blocks(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_root *root, int alloc_low);
|
||||
|
||||
void scoutfs_btree_put_iref(struct scoutfs_btree_item_ref *iref);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -217,6 +217,26 @@ int scoutfs_client_srch_commit_compact(struct super_block *sb,
|
||||
res, sizeof(*res), NULL, 0);
|
||||
}
|
||||
|
||||
int scoutfs_client_get_log_merge(struct super_block *sb,
|
||||
struct scoutfs_log_merge_request *req)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_LOG_MERGE,
|
||||
NULL, 0, req, sizeof(*req));
|
||||
}
|
||||
|
||||
int scoutfs_client_commit_log_merge(struct super_block *sb,
|
||||
struct scoutfs_log_merge_complete *comp)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
|
||||
comp, sizeof(*comp), NULL, 0);
|
||||
}
|
||||
|
||||
int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
|
||||
struct scoutfs_open_ino_map *map)
|
||||
{
|
||||
|
||||
@@ -22,6 +22,10 @@ int scoutfs_client_srch_get_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *sc);
|
||||
int scoutfs_client_srch_commit_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *res);
|
||||
int scoutfs_client_get_log_merge(struct super_block *sb,
|
||||
struct scoutfs_log_merge_request *req);
|
||||
int scoutfs_client_commit_log_merge(struct super_block *sb,
|
||||
struct scoutfs_log_merge_complete *comp);
|
||||
int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
|
||||
struct scoutfs_open_ino_map *map);
|
||||
int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
|
||||
|
||||
@@ -44,6 +44,14 @@
|
||||
EXPAND_COUNTER(btree_insert) \
|
||||
EXPAND_COUNTER(btree_leaf_item_hash_search) \
|
||||
EXPAND_COUNTER(btree_lookup) \
|
||||
EXPAND_COUNTER(btree_merge) \
|
||||
EXPAND_COUNTER(btree_merge_alloc_low) \
|
||||
EXPAND_COUNTER(btree_merge_delete) \
|
||||
EXPAND_COUNTER(btree_merge_dirty_limit) \
|
||||
EXPAND_COUNTER(btree_merge_drop_old) \
|
||||
EXPAND_COUNTER(btree_merge_insert) \
|
||||
EXPAND_COUNTER(btree_merge_update) \
|
||||
EXPAND_COUNTER(btree_merge_walk) \
|
||||
EXPAND_COUNTER(btree_next) \
|
||||
EXPAND_COUNTER(btree_prev) \
|
||||
EXPAND_COUNTER(btree_split) \
|
||||
|
||||
@@ -37,9 +37,9 @@
|
||||
*
|
||||
* The log btrees are modified by multiple transactions over time so
|
||||
* there is no consistent ordering relationship between the items in
|
||||
* different btrees. Each item in a log btree stores a version number
|
||||
* for the item. Readers check log btrees for the most recent version
|
||||
* that it should use.
|
||||
* different btrees. Each item in a log btree stores a seq for the
|
||||
* item. Readers check log btrees for the most recent seq that it
|
||||
* should use.
|
||||
*
|
||||
* The item cache reads items in bulk from stable btrees, and writes a
|
||||
* transaction's worth of dirty items into the item log btree.
|
||||
@@ -52,6 +52,8 @@
|
||||
*/
|
||||
|
||||
struct forest_info {
|
||||
struct super_block *sb;
|
||||
|
||||
struct mutex mutex;
|
||||
struct scoutfs_alloc *alloc;
|
||||
struct scoutfs_block_writer *wri;
|
||||
@@ -60,6 +62,9 @@ struct forest_info {
|
||||
struct mutex srch_mutex;
|
||||
struct scoutfs_srch_file srch_file;
|
||||
struct scoutfs_block *srch_bl;
|
||||
|
||||
struct workqueue_struct *workq;
|
||||
struct delayed_work log_merge_dwork;
|
||||
};
|
||||
|
||||
#define DECLARE_FOREST_INFO(sb, name) \
|
||||
@@ -249,7 +254,7 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key,
|
||||
* If we hit stale blocks and retry we can call the callback for
|
||||
* duplicate items. This is harmless because the items are stable while
|
||||
* the caller holds their cluster lock and the caller has to filter out
|
||||
* item versions anyway.
|
||||
* item seqs anyway.
|
||||
*/
|
||||
int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_lock *lock,
|
||||
@@ -426,29 +431,29 @@ out:
|
||||
|
||||
/*
|
||||
* The caller is commiting items in the transaction and has found the
|
||||
* greatest item version amongst them. We store it in the log_trees root
|
||||
* greatest item seq amongst them. We store it in the log_trees root
|
||||
* to send to the server.
|
||||
*/
|
||||
void scoutfs_forest_set_max_vers(struct super_block *sb, u64 max_vers)
|
||||
void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq)
|
||||
{
|
||||
DECLARE_FOREST_INFO(sb, finf);
|
||||
|
||||
finf->our_log.max_item_vers = cpu_to_le64(max_vers);
|
||||
finf->our_log.max_item_seq = cpu_to_le64(max_seq);
|
||||
}
|
||||
|
||||
/*
|
||||
* The server is calling during setup to find the greatest item version
|
||||
* The server is calling during setup to find the greatest item seq
|
||||
* amongst all the log tree roots. They have the authoritative current
|
||||
* super.
|
||||
*
|
||||
* Item versions are only used to compare items in log trees, not in the
|
||||
* main fs tree. All we have to do is find the greatest version amongst
|
||||
* the log_trees so that new locks will have a write_version greater
|
||||
* than all the items in the log_trees.
|
||||
* Item seqs are only used to compare items in log trees, not in the
|
||||
* main fs tree. All we have to do is find the greatest seq amongst the
|
||||
* log_trees so that the core seq will have a greater seq than all the
|
||||
* items in the log_trees.
|
||||
*/
|
||||
int scoutfs_forest_get_max_vers(struct super_block *sb,
|
||||
struct scoutfs_super_block *super,
|
||||
u64 *vers)
|
||||
int scoutfs_forest_get_max_seq(struct super_block *sb,
|
||||
struct scoutfs_super_block *super,
|
||||
u64 *seq)
|
||||
{
|
||||
struct scoutfs_log_trees *lt;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
@@ -456,7 +461,7 @@ int scoutfs_forest_get_max_vers(struct super_block *sb,
|
||||
int ret;
|
||||
|
||||
scoutfs_key_init_log_trees(<k, 0, 0);
|
||||
*vers = 0;
|
||||
*seq = 0;
|
||||
|
||||
for (;; scoutfs_key_inc(<k)) {
|
||||
ret = scoutfs_btree_next(sb, &super->logs_root, <k, &iref);
|
||||
@@ -464,8 +469,7 @@ int scoutfs_forest_get_max_vers(struct super_block *sb,
|
||||
if (iref.val_len == sizeof(struct scoutfs_log_trees)) {
|
||||
ltk = *iref.key;
|
||||
lt = iref.val;
|
||||
*vers = max(*vers,
|
||||
le64_to_cpu(lt->max_item_vers));
|
||||
*seq = max(*seq, le64_to_cpu(lt->max_item_seq));
|
||||
} else {
|
||||
ret = -EIO;
|
||||
}
|
||||
@@ -534,7 +538,7 @@ void scoutfs_forest_init_btrees(struct super_block *sb,
|
||||
memset(&finf->our_log, 0, sizeof(finf->our_log));
|
||||
finf->our_log.item_root = lt->item_root;
|
||||
finf->our_log.bloom_ref = lt->bloom_ref;
|
||||
finf->our_log.max_item_vers = lt->max_item_vers;
|
||||
finf->our_log.max_item_seq = lt->max_item_seq;
|
||||
finf->our_log.rid = lt->rid;
|
||||
finf->our_log.nr = lt->nr;
|
||||
finf->srch_file = lt->srch_file;
|
||||
@@ -564,7 +568,7 @@ void scoutfs_forest_get_btrees(struct super_block *sb,
|
||||
lt->item_root = finf->our_log.item_root;
|
||||
lt->bloom_ref = finf->our_log.bloom_ref;
|
||||
lt->srch_file = finf->srch_file;
|
||||
lt->max_item_vers = finf->our_log.max_item_vers;
|
||||
lt->max_item_seq = finf->our_log.max_item_seq;
|
||||
|
||||
scoutfs_block_put(sb, finf->srch_bl);
|
||||
finf->srch_bl = NULL;
|
||||
@@ -573,6 +577,149 @@ void scoutfs_forest_get_btrees(struct super_block *sb,
|
||||
<->bloom_ref);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare input items to merge by their log item value seq when their
|
||||
* keys match.
|
||||
*/
|
||||
static int merge_cmp(void *a_val, int a_val_len, void *b_val, int b_val_len)
|
||||
{
|
||||
struct scoutfs_log_item_value *a = a_val;
|
||||
struct scoutfs_log_item_value *b = b_val;
|
||||
|
||||
/* sort merge item by seq */
|
||||
return scoutfs_cmp(le64_to_cpu(a->seq), le64_to_cpu(b->seq));
|
||||
}
|
||||
|
||||
static bool merge_is_del(void *val, int val_len)
|
||||
{
|
||||
struct scoutfs_log_item_value *liv = val;
|
||||
|
||||
return !!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION);
|
||||
}
|
||||
|
||||
#define LOG_MERGE_DELAY_MS (5 * MSEC_PER_SEC)
|
||||
|
||||
/*
|
||||
* Regularly try to get a log merge request from the server. If we get
|
||||
* a request we walk the log_trees items to find input trees and pass
|
||||
* them to btree_merge. All of our work is done in dirty blocks
|
||||
* allocated from available free blocks that the server gave us. If we
|
||||
* hit an error then we drop our dirty blocks without writing them and
|
||||
* send an error flag to the server so they can reclaim our allocators
|
||||
* and ignore the rest of our work.
|
||||
*/
|
||||
static void scoutfs_forest_log_merge_worker(struct work_struct *work)
|
||||
{
|
||||
struct forest_info *finf = container_of(work, struct forest_info,
|
||||
log_merge_dwork.work);
|
||||
struct super_block *sb = finf->sb;
|
||||
struct scoutfs_btree_root_head *rhead = NULL;
|
||||
struct scoutfs_btree_root_head *tmp;
|
||||
struct scoutfs_log_merge_complete comp;
|
||||
struct scoutfs_log_merge_request req;
|
||||
struct scoutfs_log_trees *lt;
|
||||
struct scoutfs_block_writer wri;
|
||||
struct scoutfs_alloc alloc;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key next;
|
||||
struct scoutfs_key key;
|
||||
unsigned long delay;
|
||||
LIST_HEAD(inputs);
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_client_get_log_merge(sb, &req);
|
||||
if (ret < 0)
|
||||
goto resched;
|
||||
|
||||
comp.root = req.root;
|
||||
comp.start = req.start;
|
||||
comp.end = req.end;
|
||||
comp.remain = req.end;
|
||||
comp.rid = req.rid;
|
||||
comp.seq = req.seq;
|
||||
comp.flags = 0;
|
||||
|
||||
scoutfs_alloc_init(&alloc, &req.meta_avail, &req.meta_freed);
|
||||
scoutfs_block_writer_init(sb, &wri);
|
||||
|
||||
/* find finalized input log trees up to last_seq */
|
||||
for (scoutfs_key_init_log_trees(&key, 0, 0); ; scoutfs_key_inc(&key)) {
|
||||
|
||||
if (!rhead) {
|
||||
rhead = kmalloc(sizeof(*rhead), GFP_NOFS);
|
||||
if (!rhead) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = scoutfs_btree_next(sb, &req.logs_root, &key, &iref);
|
||||
if (ret == 0) {
|
||||
if (iref.val_len == sizeof(*lt)) {
|
||||
key = *iref.key;
|
||||
lt = iref.val;
|
||||
if ((le64_to_cpu(lt->flags) &
|
||||
SCOUTFS_LOG_TREES_FINALIZED) &&
|
||||
(le64_to_cpu(lt->max_item_seq) <=
|
||||
le64_to_cpu(req.last_seq))) {
|
||||
rhead->root = lt->item_root;
|
||||
list_add_tail(&rhead->head, &inputs);
|
||||
rhead = NULL;
|
||||
}
|
||||
} else {
|
||||
ret = -EIO;
|
||||
}
|
||||
scoutfs_btree_put_iref(&iref);
|
||||
}
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* shouldn't be possible, but it's harmless */
|
||||
if (list_empty(&inputs)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_btree_merge(sb, &alloc, &wri, &req.start, &req.end,
|
||||
&next, &comp.root, &inputs, merge_cmp,
|
||||
merge_is_del,
|
||||
!!(req.flags & cpu_to_le64(SCOUTFS_LOG_MERGE_REQUEST_SUBTREE)),
|
||||
sizeof(struct scoutfs_log_item_value),
|
||||
SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT, 10);
|
||||
if (ret == -ERANGE) {
|
||||
comp.remain = next;
|
||||
le64_add_cpu(&comp.flags, SCOUTFS_LOG_MERGE_COMP_REMAIN);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
scoutfs_alloc_prepare_commit(sb, &alloc, &wri);
|
||||
if (ret == 0)
|
||||
ret = scoutfs_block_writer_write(sb, &wri);
|
||||
scoutfs_block_writer_forget_all(sb, &wri);
|
||||
|
||||
comp.meta_avail = alloc.avail;
|
||||
comp.meta_freed = alloc.freed;
|
||||
if (ret < 0)
|
||||
le64_add_cpu(&comp.flags, SCOUTFS_LOG_MERGE_COMP_ERROR);
|
||||
|
||||
ret = scoutfs_client_commit_log_merge(sb, &comp);
|
||||
|
||||
kfree(rhead);
|
||||
list_for_each_entry_safe(rhead, tmp, &inputs, head)
|
||||
kfree(rhead);
|
||||
|
||||
resched:
|
||||
delay = ret == 0 ? 0 : msecs_to_jiffies(LOG_MERGE_DELAY_MS);
|
||||
queue_delayed_work(finf->workq, &finf->log_merge_dwork, delay);
|
||||
}
|
||||
|
||||
int scoutfs_forest_setup(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
@@ -586,10 +733,23 @@ int scoutfs_forest_setup(struct super_block *sb)
|
||||
}
|
||||
|
||||
/* the finf fields will be setup as we open a transaction */
|
||||
finf->sb = sb;
|
||||
mutex_init(&finf->mutex);
|
||||
mutex_init(&finf->srch_mutex);
|
||||
|
||||
INIT_DELAYED_WORK(&finf->log_merge_dwork,
|
||||
scoutfs_forest_log_merge_worker);
|
||||
sbi->forest_info = finf;
|
||||
|
||||
finf->workq = alloc_workqueue("scoutfs_log_merge", WQ_NON_REENTRANT |
|
||||
WQ_UNBOUND | WQ_HIGHPRI, 0);
|
||||
if (!finf->workq) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
queue_delayed_work(finf->workq, &finf->log_merge_dwork,
|
||||
msecs_to_jiffies(LOG_MERGE_DELAY_MS));
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret)
|
||||
@@ -605,6 +765,12 @@ void scoutfs_forest_destroy(struct super_block *sb)
|
||||
|
||||
if (finf) {
|
||||
scoutfs_block_put(sb, finf->srch_bl);
|
||||
|
||||
if (finf->workq) {
|
||||
cancel_delayed_work_sync(&finf->log_merge_dwork);
|
||||
destroy_workqueue(finf->workq);
|
||||
}
|
||||
|
||||
kfree(finf);
|
||||
sbi->forest_info = NULL;
|
||||
}
|
||||
|
||||
@@ -23,10 +23,10 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_set_bloom_bits(struct super_block *sb,
|
||||
struct scoutfs_lock *lock);
|
||||
void scoutfs_forest_set_max_vers(struct super_block *sb, u64 max_vers);
|
||||
int scoutfs_forest_get_max_vers(struct super_block *sb,
|
||||
struct scoutfs_super_block *super,
|
||||
u64 *vers);
|
||||
void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq);
|
||||
int scoutfs_forest_get_max_seq(struct super_block *sb,
|
||||
struct scoutfs_super_block *super,
|
||||
u64 *seq);
|
||||
int scoutfs_forest_insert_list(struct super_block *sb,
|
||||
struct scoutfs_btree_item_list *lst);
|
||||
int scoutfs_forest_srch_add(struct super_block *sb, u64 hash, u64 ino, u64 id);
|
||||
|
||||
@@ -325,6 +325,7 @@ struct scoutfs_alloc_root {
|
||||
#define SCOUTFS_ALLOC_OWNER_SERVER 1
|
||||
#define SCOUTFS_ALLOC_OWNER_MOUNT 2
|
||||
#define SCOUTFS_ALLOC_OWNER_SRCH 3
|
||||
#define SCOUTFS_ALLOC_OWNER_LOG_MERGE 4
|
||||
|
||||
struct scoutfs_mounted_client_btree_val {
|
||||
union scoutfs_inet_addr addr;
|
||||
@@ -449,13 +450,16 @@ struct scoutfs_log_trees {
|
||||
struct scoutfs_srch_file srch_file;
|
||||
__le64 data_alloc_zone_blocks;
|
||||
__le64 data_alloc_zones[SCOUTFS_DATA_ALLOC_ZONE_LE64S];
|
||||
__le64 max_item_vers;
|
||||
__le64 max_item_seq;
|
||||
__le64 rid;
|
||||
__le64 nr;
|
||||
__le64 flags;
|
||||
};
|
||||
|
||||
#define SCOUTFS_LOG_TREES_FINALIZED (1ULL << 0)
|
||||
|
||||
struct scoutfs_log_item_value {
|
||||
__le64 vers;
|
||||
__le64 seq;
|
||||
__u8 flags;
|
||||
__u8 __pad[7];
|
||||
__u8 data[];
|
||||
@@ -490,6 +494,78 @@ struct scoutfs_bloom_block {
|
||||
member_sizeof(struct scoutfs_bloom_block, bits[0]) * 8)
|
||||
#define SCOUTFS_FOREST_BLOOM_FUNC_BITS (SCOUTFS_BLOCK_LG_SHIFT + 3)
|
||||
|
||||
/*
|
||||
* A private server btree item which records the status of a log merge
|
||||
* operation that is in progress.
|
||||
*/
|
||||
struct scoutfs_log_merge_status {
|
||||
struct scoutfs_key next_range_key;
|
||||
__le64 nr_requests;
|
||||
__le64 nr_complete;
|
||||
__le64 last_seq;
|
||||
__le64 seq;
|
||||
};
|
||||
|
||||
/*
|
||||
* A request is sent to the client and stored in a server btree item to
|
||||
* record resources that would be reclaimed if the client failed. It
|
||||
* has all the inputs needed for the client to perform its portion of a
|
||||
* merge.
|
||||
*/
|
||||
struct scoutfs_log_merge_request {
|
||||
struct scoutfs_alloc_list_head meta_avail;
|
||||
struct scoutfs_alloc_list_head meta_freed;
|
||||
struct scoutfs_btree_root logs_root;
|
||||
struct scoutfs_btree_root root;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
__le64 last_seq;
|
||||
__le64 rid;
|
||||
__le64 seq;
|
||||
__le64 flags;
|
||||
};
|
||||
|
||||
/* request root is subtree of fs root at parent, restricted merging modifications */
|
||||
#define SCOUTFS_LOG_MERGE_REQUEST_SUBTREE (1ULL << 0)
|
||||
|
||||
/*
|
||||
* The output of a client's merge of log btree items into a subtree
|
||||
* rooted at a parent in the fs_root. The client sends it to the
|
||||
* server, who stores it in a btree item for later splicing/rebalancing.
|
||||
*/
|
||||
struct scoutfs_log_merge_complete {
|
||||
struct scoutfs_alloc_list_head meta_avail;
|
||||
struct scoutfs_alloc_list_head meta_freed;
|
||||
struct scoutfs_btree_root root;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
struct scoutfs_key remain;
|
||||
__le64 rid;
|
||||
__le64 seq;
|
||||
__le64 flags;
|
||||
};
|
||||
|
||||
/* merge failed, ignore completion and reclaim stored request */
|
||||
#define SCOUTFS_LOG_MERGE_COMP_ERROR (1ULL << 0)
|
||||
/* merge didn't complete range, restart from remain */
|
||||
#define SCOUTFS_LOG_MERGE_COMP_REMAIN (1ULL << 1)
|
||||
|
||||
/*
|
||||
* Range items record the ranges of the fs keyspace that still need to
|
||||
* be merged. They're added as a merge starts, removed as requests are
|
||||
* sent and added back if the request didn't consume its entire range.
|
||||
*/
|
||||
struct scoutfs_log_merge_range {
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
};
|
||||
|
||||
struct scoutfs_log_merge_freeing {
|
||||
struct scoutfs_btree_root root;
|
||||
struct scoutfs_key key;
|
||||
__le64 seq;
|
||||
};
|
||||
|
||||
/*
|
||||
* Keys are first sorted by major key zones.
|
||||
*/
|
||||
@@ -504,6 +580,12 @@ struct scoutfs_bloom_block {
|
||||
#define SCOUTFS_SRCH_ZONE 9
|
||||
#define SCOUTFS_FREE_EXTENT_BLKNO_ZONE 10
|
||||
#define SCOUTFS_FREE_EXTENT_ORDER_ZONE 11
|
||||
/* Items only stored in log merge server btrees */
|
||||
#define SCOUTFS_LOG_MERGE_STATUS_ZONE 12
|
||||
#define SCOUTFS_LOG_MERGE_RANGE_ZONE 13
|
||||
#define SCOUTFS_LOG_MERGE_REQUEST_ZONE 14
|
||||
#define SCOUTFS_LOG_MERGE_COMPLETE_ZONE 15
|
||||
#define SCOUTFS_LOG_MERGE_FREEING_ZONE 16
|
||||
|
||||
/* inode index zone */
|
||||
#define SCOUTFS_INODE_INDEX_META_SEQ_TYPE 1
|
||||
@@ -688,8 +770,8 @@ struct scoutfs_super_block {
|
||||
__le64 version;
|
||||
__le64 flags;
|
||||
__u8 uuid[SCOUTFS_UUID_BYTES];
|
||||
__le64 seq;
|
||||
__le64 next_ino;
|
||||
__le64 next_trans_seq;
|
||||
__le64 total_meta_blocks; /* both static and dynamic */
|
||||
__le64 first_meta_blkno; /* first dynamically allocated */
|
||||
__le64 last_meta_blkno;
|
||||
@@ -703,6 +785,7 @@ struct scoutfs_super_block {
|
||||
struct scoutfs_alloc_list_head server_meta_freed[2];
|
||||
struct scoutfs_btree_root fs_root;
|
||||
struct scoutfs_btree_root logs_root;
|
||||
struct scoutfs_btree_root log_merge;
|
||||
struct scoutfs_btree_root trans_seqs;
|
||||
struct scoutfs_btree_root mounted_clients;
|
||||
struct scoutfs_btree_root srch_root;
|
||||
@@ -895,6 +978,8 @@ enum scoutfs_net_cmd {
|
||||
SCOUTFS_NET_CMD_LOCK_RECOVER,
|
||||
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
|
||||
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
|
||||
SCOUTFS_NET_CMD_GET_LOG_MERGE,
|
||||
SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
|
||||
SCOUTFS_NET_CMD_OPEN_INO_MAP,
|
||||
SCOUTFS_NET_CMD_GET_VOLOPT,
|
||||
SCOUTFS_NET_CMD_SET_VOLOPT,
|
||||
@@ -943,7 +1028,7 @@ struct scoutfs_net_roots {
|
||||
|
||||
struct scoutfs_net_lock {
|
||||
struct scoutfs_key key;
|
||||
__le64 write_version;
|
||||
__le64 write_seq;
|
||||
__u8 old_mode;
|
||||
__u8 new_mode;
|
||||
__u8 __pad[6];
|
||||
|
||||
@@ -149,7 +149,8 @@ struct cached_item {
|
||||
|
||||
static int item_val_bytes(int val_len)
|
||||
{
|
||||
return round_up(offsetof(struct cached_item, val[val_len]), CACHED_ITEM_ALIGN);
|
||||
return round_up(offsetof(struct cached_item, val[val_len]),
|
||||
CACHED_ITEM_ALIGN);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -345,7 +346,8 @@ static struct cached_page *alloc_pg(struct super_block *sb, gfp_t gfp)
|
||||
page = alloc_page(GFP_NOFS | gfp);
|
||||
if (!page || !pg) {
|
||||
kfree(pg);
|
||||
__free_page(page);
|
||||
if (page)
|
||||
__free_page(page);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -420,8 +422,7 @@ static struct cached_item *alloc_item(struct cached_page *pg,
|
||||
static void erase_item(struct cached_page *pg, struct cached_item *item)
|
||||
{
|
||||
rbtree_erase(&item->node, &pg->item_root);
|
||||
pg->erased_bytes += round_up(item_val_bytes(item->val_len),
|
||||
CACHED_ITEM_ALIGN);
|
||||
pg->erased_bytes += item_val_bytes(item->val_len);
|
||||
}
|
||||
|
||||
static void lru_add(struct super_block *sb, struct item_cache_info *cinf,
|
||||
@@ -852,8 +853,7 @@ static void compact_page_items(struct super_block *sb,
|
||||
|
||||
for (from = first_item(&pg->item_root); from; from = next_item(from)) {
|
||||
to = page_address(empty->page) + page_off;
|
||||
page_off += round_up(item_val_bytes(from->val_len),
|
||||
CACHED_ITEM_ALIGN);
|
||||
page_off += item_val_bytes(from->val_len);
|
||||
|
||||
/* copy the entire item, struct members and all */
|
||||
memcpy(to, from, item_val_bytes(from->val_len));
|
||||
@@ -1308,10 +1308,10 @@ static struct active_reader *active_rbtree_walk(struct rb_root *root,
|
||||
* on our root and aren't in dirty or lru lists.
|
||||
*
|
||||
* We need to store deletion items here as we read items from all the
|
||||
* btrees so that they can override older versions of the items. The
|
||||
* deletion items will be deleted before we insert the pages into the
|
||||
* cache. We don't insert old versions of items into the tree here so
|
||||
* that the trees don't have to compare versions.
|
||||
* btrees so that they can override older items. The deletion items
|
||||
* will be deleted before we insert the pages into the cache. We don't
|
||||
* insert old versions of items into the tree here so that the trees
|
||||
* don't have to compare seqs.
|
||||
*/
|
||||
static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_log_item_value *liv, void *val,
|
||||
@@ -1331,7 +1331,7 @@ static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
pg = page_rbtree_walk(sb, root, key, key, NULL, NULL, &p_par, &p_pnode);
|
||||
found = item_rbtree_walk(&pg->item_root, key, NULL, &par, &pnode);
|
||||
if (found && (le64_to_cpu(found->liv.vers) >= le64_to_cpu(liv->vers)))
|
||||
if (found && (le64_to_cpu(found->liv.seq) >= le64_to_cpu(liv->seq)))
|
||||
return 0;
|
||||
|
||||
if (!page_has_room(pg, val_len)) {
|
||||
@@ -1783,6 +1783,21 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* An item's seq is greater of the client transaction's seq and the
|
||||
* lock's write_seq. This ensures that multiple commits in one lock
|
||||
* grant will have increasing seqs, and new locks in open commits will
|
||||
* also increase the seqs. It lets us limit the inputs of item merging
|
||||
* to the last stable seq and ensure that all the items in open
|
||||
* transactions and granted locks will have greater seqs.
|
||||
*/
|
||||
static __le64 item_seq(struct super_block *sb, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
return cpu_to_le64(max(sbi->trans_seq, lock->write_seq));
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the item dirty. Dirtying while holding a transaction pins the
|
||||
* page holding the item and guarantees that the item can be deleted or
|
||||
@@ -1816,7 +1831,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key *key,
|
||||
ret = -ENOENT;
|
||||
} else {
|
||||
mark_item_dirty(sb, cinf, pg, NULL, item);
|
||||
item->liv.vers = cpu_to_le64(lock->write_version);
|
||||
item->liv.seq = item_seq(sb, lock);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
@@ -1836,7 +1851,7 @@ static int item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct scoutfs_log_item_value liv = {
|
||||
.vers = cpu_to_le64(lock->write_version),
|
||||
.seq = item_seq(sb, lock),
|
||||
};
|
||||
struct cached_item *found;
|
||||
struct cached_item *item;
|
||||
@@ -1911,7 +1926,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct scoutfs_log_item_value liv = {
|
||||
.vers = cpu_to_le64(lock->write_version),
|
||||
.seq = item_seq(sb, lock),
|
||||
};
|
||||
struct cached_item *item;
|
||||
struct cached_item *found;
|
||||
@@ -1944,9 +1959,10 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
if (val_len)
|
||||
memcpy(found->val, val, val_len);
|
||||
if (val_len < found->val_len)
|
||||
pg->erased_bytes += found->val_len - val_len;
|
||||
pg->erased_bytes += item_val_bytes(found->val_len) -
|
||||
item_val_bytes(val_len);
|
||||
found->val_len = val_len;
|
||||
found->liv.vers = liv.vers;
|
||||
found->liv.seq = liv.seq;
|
||||
mark_item_dirty(sb, cinf, pg, NULL, found);
|
||||
} else {
|
||||
item = alloc_item(pg, key, &liv, val, val_len);
|
||||
@@ -1978,7 +1994,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct scoutfs_log_item_value liv = {
|
||||
.vers = cpu_to_le64(lock->write_version),
|
||||
.seq = item_seq(sb, lock),
|
||||
};
|
||||
struct cached_item *item;
|
||||
struct cached_page *pg;
|
||||
@@ -2020,10 +2036,11 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
erase_item(pg, item);
|
||||
} else {
|
||||
/* must emit deletion to clobber old persistent item */
|
||||
item->liv.vers = cpu_to_le64(lock->write_version);
|
||||
item->liv.seq = liv.seq;
|
||||
item->liv.flags |= SCOUTFS_LOG_ITEM_FLAG_DELETION;
|
||||
item->deletion = 1;
|
||||
pg->erased_bytes += item->val_len;
|
||||
pg->erased_bytes += item_val_bytes(item->val_len) -
|
||||
item_val_bytes(0);
|
||||
item->val_len = 0;
|
||||
mark_item_dirty(sb, cinf, pg, NULL, item);
|
||||
}
|
||||
@@ -2106,7 +2123,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
struct page *page;
|
||||
LIST_HEAD(pages);
|
||||
LIST_HEAD(pos);
|
||||
u64 max_vers = 0;
|
||||
u64 max_seq = 0;
|
||||
int val_len;
|
||||
int bytes;
|
||||
int off;
|
||||
@@ -2171,7 +2188,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
val_len = sizeof(item->liv) + item->val_len;
|
||||
bytes = offsetof(struct scoutfs_btree_item_list,
|
||||
val[val_len]);
|
||||
max_vers = max(max_vers, le64_to_cpu(item->liv.vers));
|
||||
max_seq = max(max_seq, le64_to_cpu(item->liv.seq));
|
||||
|
||||
if (off + bytes > PAGE_SIZE) {
|
||||
page = second;
|
||||
@@ -2201,8 +2218,8 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
read_unlock(&pg->rwlock);
|
||||
}
|
||||
|
||||
/* store max item vers in forest's log_trees */
|
||||
scoutfs_forest_set_max_vers(sb, max_vers);
|
||||
/* store max item seq in forest's log_trees */
|
||||
scoutfs_forest_set_max_seq(sb, max_seq);
|
||||
|
||||
/* write all the dirty items into log btree blocks */
|
||||
ret = scoutfs_forest_insert_list(sb, first);
|
||||
|
||||
@@ -108,6 +108,16 @@ static inline void scoutfs_key_set_ones(struct scoutfs_key *key)
|
||||
memset(key->__pad, 0, sizeof(key->__pad));
|
||||
}
|
||||
|
||||
static inline bool scoutfs_key_is_ones(struct scoutfs_key *key)
|
||||
{
|
||||
return key->sk_zone == U8_MAX &&
|
||||
key->_sk_first == cpu_to_le64(U64_MAX) &&
|
||||
key->sk_type == U8_MAX &&
|
||||
key->_sk_second == cpu_to_le64(U64_MAX) &&
|
||||
key->_sk_third == cpu_to_le64(U64_MAX) &&
|
||||
key->_sk_fourth == U8_MAX;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a -1/0/1 comparison of keys.
|
||||
*
|
||||
|
||||
@@ -730,7 +730,7 @@ static void lock_grant_worker(struct work_struct *work)
|
||||
|
||||
lock->request_pending = 0;
|
||||
lock->mode = nl->new_mode;
|
||||
lock->write_version = le64_to_cpu(nl->write_version);
|
||||
lock->write_seq = le64_to_cpu(nl->write_seq);
|
||||
|
||||
if (lock_count_match_exists(nl->new_mode, lock->waiters))
|
||||
extend_grace(sb, lock);
|
||||
@@ -988,7 +988,7 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
|
||||
for (i = 0; lock && i < SCOUTFS_NET_LOCK_MAX_RECOVER_NR; i++) {
|
||||
|
||||
nlr->locks[i].key = lock->start;
|
||||
nlr->locks[i].write_version = cpu_to_le64(lock->write_version);
|
||||
nlr->locks[i].write_seq = cpu_to_le64(lock->write_seq);
|
||||
nlr->locks[i].old_mode = lock->mode;
|
||||
nlr->locks[i].new_mode = lock->mode;
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@
|
||||
struct scoutfs_omap_lock;
|
||||
|
||||
/*
|
||||
* A few fields (start, end, refresh_gen, write_version, granted_mode)
|
||||
* A few fields (start, end, refresh_gen, write_seq, granted_mode)
|
||||
* are referenced by code outside lock.c.
|
||||
*/
|
||||
struct scoutfs_lock {
|
||||
@@ -23,7 +23,7 @@ struct scoutfs_lock {
|
||||
struct rb_node node;
|
||||
struct rb_node range_node;
|
||||
u64 refresh_gen;
|
||||
u64 write_version;
|
||||
u64 write_seq;
|
||||
u64 dirty_trans_seq;
|
||||
struct list_head lru_head;
|
||||
wait_queue_head_t waitq;
|
||||
|
||||
@@ -81,8 +81,6 @@ struct lock_server_info {
|
||||
|
||||
struct scoutfs_alloc *alloc;
|
||||
struct scoutfs_block_writer *wri;
|
||||
|
||||
atomic64_t write_version;
|
||||
};
|
||||
|
||||
#define DECLARE_LOCK_SERVER_INFO(sb, name) \
|
||||
@@ -479,7 +477,7 @@ static int process_waiting_requests(struct super_block *sb,
|
||||
struct client_lock_entry *req_tmp;
|
||||
struct client_lock_entry *gr;
|
||||
struct client_lock_entry *gr_tmp;
|
||||
u64 wv;
|
||||
u64 seq;
|
||||
int ret;
|
||||
|
||||
BUG_ON(!mutex_is_locked(&snode->mutex));
|
||||
@@ -520,6 +518,7 @@ static int process_waiting_requests(struct super_block *sb,
|
||||
|
||||
nl.key = snode->key;
|
||||
nl.new_mode = req->mode;
|
||||
nl.write_seq = 0;
|
||||
|
||||
/* see if there's an existing compatible grant to replace */
|
||||
gr = find_entry(snode, &snode->granted, req->rid);
|
||||
@@ -532,8 +531,9 @@ static int process_waiting_requests(struct super_block *sb,
|
||||
|
||||
if (nl.new_mode == SCOUTFS_LOCK_WRITE ||
|
||||
nl.new_mode == SCOUTFS_LOCK_WRITE_ONLY) {
|
||||
wv = atomic64_inc_return(&inf->write_version);
|
||||
nl.write_version = cpu_to_le64(wv);
|
||||
/* doesn't commit seq update, recovered with locks */
|
||||
seq = scoutfs_server_next_seq(sb);
|
||||
nl.write_seq = cpu_to_le64(seq);
|
||||
}
|
||||
|
||||
ret = scoutfs_server_lock_response(sb, req->rid,
|
||||
@@ -609,14 +609,6 @@ int scoutfs_lock_server_finished_recovery(struct super_block *sb)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void set_max_write_version(struct lock_server_info *inf, u64 new)
|
||||
{
|
||||
u64 old;
|
||||
|
||||
while (new > (old = atomic64_read(&inf->write_version)) &&
|
||||
(atomic64_cmpxchg(&inf->write_version, old, new) != old));
|
||||
}
|
||||
|
||||
/*
|
||||
* We sent a lock recover request to the client when we received its
|
||||
* greeting while in recovery. Here we instantiate all the locks it
|
||||
@@ -680,9 +672,9 @@ int scoutfs_lock_server_recover_response(struct super_block *sb, u64 rid,
|
||||
|
||||
put_server_lock(inf, snode);
|
||||
|
||||
/* make sure next write lock is greater than all recovered */
|
||||
set_max_write_version(inf,
|
||||
le64_to_cpu(nlr->locks[i].write_version));
|
||||
/* make sure next core seq is greater than all lock write seq */
|
||||
scoutfs_server_set_seq_if_greater(sb,
|
||||
le64_to_cpu(nlr->locks[i].write_seq));
|
||||
}
|
||||
|
||||
/* send request for next batch of keys */
|
||||
@@ -800,7 +792,7 @@ static void lock_server_tseq_show(struct seq_file *m,
|
||||
*/
|
||||
int scoutfs_lock_server_setup(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, u64 max_vers)
|
||||
struct scoutfs_block_writer *wri)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct lock_server_info *inf;
|
||||
@@ -815,7 +807,6 @@ int scoutfs_lock_server_setup(struct super_block *sb,
|
||||
scoutfs_tseq_tree_init(&inf->tseq_tree, lock_server_tseq_show);
|
||||
inf->alloc = alloc;
|
||||
inf->wri = wri;
|
||||
atomic64_set(&inf->write_version, max_vers); /* inc_return gives +1 */
|
||||
|
||||
inf->tseq_dentry = scoutfs_tseq_create("server_locks", sbi->debug_root,
|
||||
&inf->tseq_tree);
|
||||
|
||||
@@ -13,7 +13,7 @@ int scoutfs_lock_server_farewell(struct super_block *sb, u64 rid);
|
||||
|
||||
int scoutfs_lock_server_setup(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, u64 max_vers);
|
||||
struct scoutfs_block_writer *wri);
|
||||
void scoutfs_lock_server_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -137,11 +137,10 @@ struct omap_request {
|
||||
/*
|
||||
* In each inode group cluster lock we store data to track the open ino
|
||||
* map which tracks all the inodes that the cluster lock covers. When
|
||||
* the version shows that the map is stale we send a request to update
|
||||
* it.
|
||||
* the seq shows that the map is stale we send a request to update it.
|
||||
*/
|
||||
struct scoutfs_omap_lock_data {
|
||||
u64 version;
|
||||
u64 seq;
|
||||
bool req_in_flight;
|
||||
wait_queue_head_t waitq;
|
||||
struct scoutfs_open_ino_map map;
|
||||
@@ -833,8 +832,7 @@ static bool omap_req_in_flight(struct scoutfs_lock *lock, struct scoutfs_omap_lo
|
||||
/*
|
||||
* Make sure the map covered by the cluster lock is current. The caller
|
||||
* holds the cluster lock so once we store lock_data on the cluster lock
|
||||
* it won't be freed and the write_version in the cluster lock won't
|
||||
* change.
|
||||
* it won't be freed and the write_seq in the cluster lock won't change.
|
||||
*
|
||||
* The omap_spinlock protects the omap_data in the cluster lock. We
|
||||
* have to drop it if we have to block to allocate lock_data, send a
|
||||
@@ -861,7 +859,7 @@ static int get_current_lock_data(struct super_block *sb, struct scoutfs_lock *lo
|
||||
}
|
||||
|
||||
if (lock->omap_data == NULL) {
|
||||
ldata->version = lock->write_version - 1; /* ensure refresh */
|
||||
ldata->seq = lock->write_seq - 1; /* ensure refresh */
|
||||
init_waitqueue_head(&ldata->waitq);
|
||||
|
||||
lock->omap_data = ldata;
|
||||
@@ -871,7 +869,7 @@ static int get_current_lock_data(struct super_block *sb, struct scoutfs_lock *lo
|
||||
}
|
||||
}
|
||||
|
||||
while (ldata->version != lock->write_version) {
|
||||
while (ldata->seq != lock->write_seq) {
|
||||
/* only one waiter sends a request at a time */
|
||||
if (!ldata->req_in_flight) {
|
||||
ldata->req_in_flight = true;
|
||||
@@ -891,7 +889,7 @@ static int get_current_lock_data(struct super_block *sb, struct scoutfs_lock *lo
|
||||
if (send_req) {
|
||||
ldata->req_in_flight = false;
|
||||
if (ret == 0)
|
||||
ldata->version = lock->write_version;
|
||||
ldata->seq = lock->write_seq;
|
||||
wake_up(&ldata->waitq);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -1644,6 +1644,164 @@ TRACE_EVENT(scoutfs_btree_walk,
|
||||
__entry->level, __entry->ref_blkno, __entry->ref_seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_btree_set_parent,
|
||||
TP_PROTO(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root, struct scoutfs_key *key,
|
||||
struct scoutfs_btree_root *par_root),
|
||||
|
||||
TP_ARGS(sb, root, key, par_root),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, root_blkno)
|
||||
__field(__u64, root_seq)
|
||||
__field(__u8, root_height)
|
||||
sk_trace_define(key)
|
||||
__field(__u64, par_root_blkno)
|
||||
__field(__u64, par_root_seq)
|
||||
__field(__u8, par_root_height)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->root_blkno = le64_to_cpu(root->ref.blkno);
|
||||
__entry->root_seq = le64_to_cpu(root->ref.seq);
|
||||
__entry->root_height = root->height;
|
||||
sk_trace_assign(key, key);
|
||||
__entry->par_root_blkno = le64_to_cpu(par_root->ref.blkno);
|
||||
__entry->par_root_seq = le64_to_cpu(par_root->ref.seq);
|
||||
__entry->par_root_height = par_root->height;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" root blkno %llu seq %llu height %u, key "SK_FMT", par_root blkno %llu seq %llu height %u",
|
||||
SCSB_TRACE_ARGS, __entry->root_blkno, __entry->root_seq,
|
||||
__entry->root_height, sk_trace_args(key),
|
||||
__entry->par_root_blkno, __entry->par_root_seq,
|
||||
__entry->par_root_height)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_btree_merge,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
|
||||
TP_ARGS(sb, root, start, end),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, root_blkno)
|
||||
__field(__u64, root_seq)
|
||||
__field(__u8, root_height)
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->root_blkno = le64_to_cpu(root->ref.blkno);
|
||||
__entry->root_seq = le64_to_cpu(root->ref.seq);
|
||||
__entry->root_height = root->height;
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" root blkno %llu seq %llu height %u start "SK_FMT" end "SK_FMT,
|
||||
SCSB_TRACE_ARGS, __entry->root_blkno, __entry->root_seq,
|
||||
__entry->root_height, sk_trace_args(start),
|
||||
sk_trace_args(end))
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_btree_merge_items,
|
||||
TP_PROTO(struct super_block *sb,
|
||||
struct scoutfs_btree_root *m_root,
|
||||
struct scoutfs_key *m_key, int m_val_len,
|
||||
struct scoutfs_btree_root *f_root,
|
||||
struct scoutfs_key *f_key, int f_val_len,
|
||||
int is_del),
|
||||
|
||||
TP_ARGS(sb, m_root, m_key, m_val_len, f_root, f_key, f_val_len, is_del),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, m_root_blkno)
|
||||
__field(__u64, m_root_seq)
|
||||
__field(__u8, m_root_height)
|
||||
sk_trace_define(m_key)
|
||||
__field(int, m_val_len)
|
||||
__field(__u64, f_root_blkno)
|
||||
__field(__u64, f_root_seq)
|
||||
__field(__u8, f_root_height)
|
||||
sk_trace_define(f_key)
|
||||
__field(int, f_val_len)
|
||||
__field(int, is_del)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->m_root_blkno = m_root ?
|
||||
le64_to_cpu(m_root->ref.blkno) : 0;
|
||||
__entry->m_root_seq = m_root ? le64_to_cpu(m_root->ref.seq) : 0;
|
||||
__entry->m_root_height = m_root ? m_root->height : 0;
|
||||
sk_trace_assign(m_key, m_key);
|
||||
__entry->m_val_len = m_val_len;
|
||||
__entry->f_root_blkno = f_root ?
|
||||
le64_to_cpu(f_root->ref.blkno) : 0;
|
||||
__entry->f_root_seq = f_root ? le64_to_cpu(f_root->ref.seq) : 0;
|
||||
__entry->f_root_height = f_root ? f_root->height : 0;
|
||||
sk_trace_assign(f_key, f_key);
|
||||
__entry->f_val_len = f_val_len;
|
||||
__entry->is_del = !!is_del;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" merge item root blkno %llu seq %llu height %u key "SK_FMT" val_len %d, fs item root blkno %llu seq %llu height %u key "SK_FMT" val_len %d, is_del %d",
|
||||
SCSB_TRACE_ARGS, __entry->m_root_blkno, __entry->m_root_seq,
|
||||
__entry->m_root_height, sk_trace_args(m_key),
|
||||
__entry->m_val_len, __entry->f_root_blkno,
|
||||
__entry->f_root_seq, __entry->f_root_height,
|
||||
sk_trace_args(f_key), __entry->f_val_len, __entry->is_del)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_btree_free_blocks,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
u64 blkno),
|
||||
|
||||
TP_ARGS(sb, root, blkno),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, root_blkno)
|
||||
__field(__u64, root_seq)
|
||||
__field(__u8, root_height)
|
||||
__field(__u64, blkno)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->root_blkno = le64_to_cpu(root->ref.blkno);
|
||||
__entry->root_seq = le64_to_cpu(root->ref.seq);
|
||||
__entry->root_height = root->height;
|
||||
__entry->blkno = blkno;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" root blkno %llu seq %llu height %u, free blkno %llu",
|
||||
SCSB_TRACE_ARGS, __entry->root_blkno, __entry->root_seq,
|
||||
__entry->root_height, __entry->blkno)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_btree_free_blocks, scoutfs_btree_free_blocks_single,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
u64 blkno),
|
||||
TP_ARGS(sb, root, blkno)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_btree_free_blocks, scoutfs_btree_free_blocks_leaf,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
u64 blkno),
|
||||
TP_ARGS(sb, root, blkno)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_btree_free_blocks, scoutfs_btree_free_blocks_parent,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
u64 blkno),
|
||||
TP_ARGS(sb, root, blkno)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_online_offline_blocks,
|
||||
TP_PROTO(struct inode *inode, s64 on_delta, s64 off_delta,
|
||||
u64 on_now, u64 off_now),
|
||||
@@ -1900,6 +2058,116 @@ TRACE_EVENT(scoutfs_trans_seq_last,
|
||||
SCSB_TRACE_ARGS, __entry->s_rid, __entry->trans_seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_get_log_merge_status,
|
||||
TP_PROTO(struct super_block *sb, u64 rid, struct scoutfs_key *next_range_key,
|
||||
u64 nr_requests, u64 nr_complete, u64 last_seq, u64 seq),
|
||||
|
||||
TP_ARGS(sb, rid, next_range_key, nr_requests, nr_complete, last_seq, seq),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, s_rid)
|
||||
sk_trace_define(next_range_key)
|
||||
__field(__u64, nr_requests)
|
||||
__field(__u64, nr_complete)
|
||||
__field(__u64, last_seq)
|
||||
__field(__u64, seq)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->s_rid = rid;
|
||||
sk_trace_assign(next_range_key, next_range_key);
|
||||
__entry->nr_requests = nr_requests;
|
||||
__entry->nr_complete = nr_complete;
|
||||
__entry->last_seq = last_seq;
|
||||
__entry->seq = seq;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" rid %016llx next_range_key "SK_FMT" nr_requests %llu nr_complete %llu last_seq %llu seq %llu",
|
||||
SCSB_TRACE_ARGS, __entry->s_rid, sk_trace_args(next_range_key),
|
||||
__entry->nr_requests, __entry->nr_complete, __entry->last_seq, __entry->seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_get_log_merge_request,
|
||||
TP_PROTO(struct super_block *sb, u64 rid,
|
||||
struct scoutfs_btree_root *root, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end, u64 last_seq, u64 seq),
|
||||
|
||||
TP_ARGS(sb, rid, root, start, end, last_seq, seq),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, s_rid)
|
||||
__field(__u64, root_blkno)
|
||||
__field(__u64, root_seq)
|
||||
__field(__u8, root_height)
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
__field(__u64, last_seq)
|
||||
__field(__u64, seq)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->s_rid = rid;
|
||||
__entry->root_blkno = le64_to_cpu(root->ref.blkno);
|
||||
__entry->root_seq = le64_to_cpu(root->ref.seq);
|
||||
__entry->root_height = root->height;
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
__entry->last_seq = last_seq;
|
||||
__entry->seq = seq;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" rid %016llx root blkno %llu seq %llu height %u start "SK_FMT" end "SK_FMT" last_seq %llu seq %llu",
|
||||
SCSB_TRACE_ARGS, __entry->s_rid, __entry->root_blkno,
|
||||
__entry->root_seq, __entry->root_height,
|
||||
sk_trace_args(start), sk_trace_args(end), __entry->last_seq,
|
||||
__entry->seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_get_log_merge_complete,
|
||||
TP_PROTO(struct super_block *sb, u64 rid,
|
||||
struct scoutfs_btree_root *root, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end, struct scoutfs_key *remain,
|
||||
u64 seq, u64 flags),
|
||||
|
||||
TP_ARGS(sb, rid, root, start, end, remain, seq, flags),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, s_rid)
|
||||
__field(__u64, root_blkno)
|
||||
__field(__u64, root_seq)
|
||||
__field(__u8, root_height)
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
sk_trace_define(remain)
|
||||
__field(__u64, seq)
|
||||
__field(__u64, flags)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->s_rid = rid;
|
||||
__entry->root_blkno = le64_to_cpu(root->ref.blkno);
|
||||
__entry->root_seq = le64_to_cpu(root->ref.seq);
|
||||
__entry->root_height = root->height;
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
sk_trace_assign(remain, remain);
|
||||
__entry->seq = seq;
|
||||
__entry->flags = flags;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" rid %016llx root blkno %llu seq %llu height %u start "SK_FMT" end "SK_FMT" remain "SK_FMT" seq %llu flags 0x%llx",
|
||||
SCSB_TRACE_ARGS, __entry->s_rid, __entry->root_blkno,
|
||||
__entry->root_seq, __entry->root_height,
|
||||
sk_trace_args(start), sk_trace_args(end),
|
||||
sk_trace_args(remain), __entry->seq, __entry->flags)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_forest_bloom_class,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key,
|
||||
u64 rid, u64 nr, u64 blkno, u64 seq, unsigned int count),
|
||||
|
||||
1319
kmod/src/server.c
1319
kmod/src/server.c
File diff suppressed because it is too large
Load Diff
@@ -62,7 +62,7 @@ int scoutfs_server_lock_response(struct super_block *sb, u64 rid, u64 id,
|
||||
struct scoutfs_net_lock *nl);
|
||||
int scoutfs_server_lock_recover_request(struct super_block *sb, u64 rid,
|
||||
struct scoutfs_key *key);
|
||||
int scoutfs_server_hold_commit(struct super_block *sb);
|
||||
void scoutfs_server_hold_commit(struct super_block *sb);
|
||||
int scoutfs_server_apply_commit(struct super_block *sb, int err);
|
||||
void scoutfs_server_recov_finish(struct super_block *sb, u64 rid, int which);
|
||||
|
||||
@@ -71,6 +71,10 @@ int scoutfs_server_send_omap_request(struct super_block *sb, u64 rid,
|
||||
int scoutfs_server_send_omap_response(struct super_block *sb, u64 rid, u64 id,
|
||||
struct scoutfs_open_ino_map *map, int err);
|
||||
|
||||
u64 scoutfs_server_seq(struct super_block *sb);
|
||||
u64 scoutfs_server_next_seq(struct super_block *sb);
|
||||
void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq);
|
||||
|
||||
struct sockaddr_in;
|
||||
struct scoutfs_quorum_elected_info;
|
||||
int scoutfs_server_start(struct super_block *sb, u64 term);
|
||||
|
||||
@@ -989,12 +989,13 @@ int scoutfs_srch_rotate_log(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_srch_file *sfl)
|
||||
struct scoutfs_srch_file *sfl, bool force)
|
||||
{
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
if (le64_to_cpu(sfl->blocks) < SCOUTFS_SRCH_LOG_BLOCK_LIMIT)
|
||||
if (sfl->ref.blkno == 0 ||
|
||||
(!force && le64_to_cpu(sfl->blocks) < SCOUTFS_SRCH_LOG_BLOCK_LIMIT))
|
||||
return 0;
|
||||
|
||||
init_srch_key(&key, SCOUTFS_SRCH_LOG_TYPE,
|
||||
|
||||
@@ -37,7 +37,7 @@ int scoutfs_srch_rotate_log(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_srch_file *sfl);
|
||||
struct scoutfs_srch_file *sfl, bool force);
|
||||
int scoutfs_srch_get_compact(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
|
||||
@@ -40,7 +40,7 @@ static void *alloc_val(struct scoutfs_btree_block *bt, int len)
|
||||
{
|
||||
le16_add_cpu(&bt->mid_free_len, -len);
|
||||
le16_add_cpu(&bt->total_item_bytes, len);
|
||||
return (void *)bt + le16_to_cpu(bt->mid_free_len);
|
||||
return (void *)&bt->items[le16_to_cpu(bt->nr_items)] + le16_to_cpu(bt->mid_free_len);
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -236,7 +236,7 @@ static int do_mkfs(struct mkfs_args *args)
|
||||
super->version = cpu_to_le64(SCOUTFS_INTEROP_VERSION);
|
||||
uuid_generate(super->uuid);
|
||||
super->next_ino = cpu_to_le64(SCOUTFS_ROOT_INO + 1);
|
||||
super->next_trans_seq = cpu_to_le64(1);
|
||||
super->seq = cpu_to_le64(1);
|
||||
super->total_meta_blocks = cpu_to_le64(last_meta + 1);
|
||||
super->first_meta_blkno = cpu_to_le64(next_meta);
|
||||
super->last_meta_blkno = cpu_to_le64(last_meta);
|
||||
|
||||
@@ -210,8 +210,8 @@ static int print_logs_item(struct scoutfs_key *key, void *val,
|
||||
/* only items in leaf blocks have values */
|
||||
if (val) {
|
||||
liv = val;
|
||||
printf(" log_item_value: vers %llu flags %x\n",
|
||||
le64_to_cpu(liv->vers), liv->flags);
|
||||
printf(" log_item_value: seq %llu flags %x\n",
|
||||
le64_to_cpu(liv->seq), liv->flags);
|
||||
|
||||
/* deletion items don't have values */
|
||||
if (!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION)) {
|
||||
@@ -289,9 +289,10 @@ static int print_log_trees_item(struct scoutfs_key *key, void *val,
|
||||
" data_avail: "ALCROOT_F"\n"
|
||||
" data_freed: "ALCROOT_F"\n"
|
||||
" srch_file: "SRF_FMT"\n"
|
||||
" max_item_vers: %llu\n"
|
||||
" max_item_seq: %llu\n"
|
||||
" rid: %016llx\n"
|
||||
" nr: %llu\n"
|
||||
" flags: %llx\n"
|
||||
" data_alloc_zone_blocks: %llu\n"
|
||||
" data_alloc_zones: ",
|
||||
AL_HEAD_A(<->meta_avail),
|
||||
@@ -304,9 +305,10 @@ static int print_log_trees_item(struct scoutfs_key *key, void *val,
|
||||
ALCROOT_A(<->data_avail),
|
||||
ALCROOT_A(<->data_freed),
|
||||
SRF_A(<->srch_file),
|
||||
le64_to_cpu(lt->max_item_vers),
|
||||
le64_to_cpu(lt->max_item_seq),
|
||||
le64_to_cpu(lt->rid),
|
||||
le64_to_cpu(lt->nr),
|
||||
le64_to_cpu(lt->flags),
|
||||
le64_to_cpu(lt->data_alloc_zone_blocks));
|
||||
|
||||
for (i = 0; i < SCOUTFS_DATA_ALLOC_ZONE_LE64S; i++) {
|
||||
@@ -383,6 +385,72 @@ static int print_mounted_client_entry(struct scoutfs_key *key, void *val,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_log_merge_item(struct scoutfs_key *key, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
struct scoutfs_log_merge_status *stat;
|
||||
struct scoutfs_log_merge_range *rng;
|
||||
struct scoutfs_log_merge_request *req;
|
||||
struct scoutfs_log_merge_complete *comp;
|
||||
struct scoutfs_log_merge_freeing *fr;
|
||||
|
||||
switch (key->sk_zone) {
|
||||
case SCOUTFS_LOG_MERGE_STATUS_ZONE:
|
||||
stat = val;
|
||||
printf(" status: next_range_key "SK_FMT" nr_req %llu nr_comp %llu"
|
||||
" last_seq %llu seq %llu\n",
|
||||
SK_ARG(&stat->next_range_key),
|
||||
le64_to_cpu(stat->nr_requests),
|
||||
le64_to_cpu(stat->nr_complete),
|
||||
le64_to_cpu(stat->last_seq),
|
||||
le64_to_cpu(stat->seq));
|
||||
break;
|
||||
case SCOUTFS_LOG_MERGE_RANGE_ZONE:
|
||||
rng = val;
|
||||
printf(" range: start "SK_FMT" end "SK_FMT"\n",
|
||||
SK_ARG(&rng->start),
|
||||
SK_ARG(&rng->end));
|
||||
break;
|
||||
case SCOUTFS_LOG_MERGE_REQUEST_ZONE:
|
||||
req = val;
|
||||
printf(" request: logs_root "BTROOT_F" logs_root "BTROOT_F" start "SK_FMT
|
||||
" end "SK_FMT" last_seq %llu rid %016llx seq %llu flags 0x%llx\n",
|
||||
BTROOT_A(&req->logs_root),
|
||||
BTROOT_A(&req->root),
|
||||
SK_ARG(&req->start),
|
||||
SK_ARG(&req->end),
|
||||
le64_to_cpu(req->last_seq),
|
||||
le64_to_cpu(req->rid),
|
||||
le64_to_cpu(req->seq),
|
||||
le64_to_cpu(req->flags));
|
||||
break;
|
||||
case SCOUTFS_LOG_MERGE_COMPLETE_ZONE:
|
||||
comp = val;
|
||||
printf(" complete: root "BTROOT_F" start "SK_FMT" end "SK_FMT
|
||||
" remain "SK_FMT" rid %016llx seq %llu flags %llx\n",
|
||||
BTROOT_A(&comp->root),
|
||||
SK_ARG(&comp->start),
|
||||
SK_ARG(&comp->end),
|
||||
SK_ARG(&comp->remain),
|
||||
le64_to_cpu(comp->rid),
|
||||
le64_to_cpu(comp->seq),
|
||||
le64_to_cpu(comp->flags));
|
||||
break;
|
||||
case SCOUTFS_LOG_MERGE_FREEING_ZONE:
|
||||
fr = val;
|
||||
printf(" freeing: root "BTROOT_F" key "SK_FMT" seq %llu\n",
|
||||
BTROOT_A(&fr->root),
|
||||
SK_ARG(&fr->key),
|
||||
le64_to_cpu(fr->seq));
|
||||
break;
|
||||
default:
|
||||
printf(" (unknown log merge key zone %u)\n", key->sk_zone);
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_alloc_item(struct scoutfs_key *key, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
@@ -859,6 +927,10 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#define BTR_FMT "blkno %llu seq %016llx height %u"
|
||||
#define BTR_ARG(rt) \
|
||||
le64_to_cpu((rt)->ref.blkno), le64_to_cpu((rt)->ref.seq), (rt)->height
|
||||
|
||||
static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
|
||||
{
|
||||
char uuid_str[37];
|
||||
@@ -878,7 +950,7 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
|
||||
printf(" flags: 0x%016llx\n", le64_to_cpu(super->flags));
|
||||
|
||||
/* XXX these are all in a crazy order */
|
||||
printf(" next_ino %llu next_trans_seq %llu\n"
|
||||
printf(" next_ino %llu seq %llu\n"
|
||||
" total_meta_blocks %llu first_meta_blkno %llu last_meta_blkno %llu\n"
|
||||
" total_data_blocks %llu first_data_blkno %llu last_data_blkno %llu\n"
|
||||
" meta_alloc[0]: "ALCROOT_F"\n"
|
||||
@@ -888,12 +960,14 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
|
||||
" server_meta_avail[1]: "AL_HEAD_F"\n"
|
||||
" server_meta_freed[0]: "AL_HEAD_F"\n"
|
||||
" server_meta_freed[1]: "AL_HEAD_F"\n"
|
||||
" mounted_clients root: height %u blkno %llu seq %llu\n"
|
||||
" srch_root root: height %u blkno %llu seq %llu\n"
|
||||
" trans_seqs root: height %u blkno %llu seq %llu\n"
|
||||
" fs_root btree root: height %u blkno %llu seq %llu\n",
|
||||
" fs_root: "BTR_FMT"\n"
|
||||
" logs_root: "BTR_FMT"\n"
|
||||
" log_merge: "BTR_FMT"\n"
|
||||
" trans_seqs: "BTR_FMT"\n"
|
||||
" mounted_clients: "BTR_FMT"\n"
|
||||
" srch_root: "BTR_FMT"\n",
|
||||
le64_to_cpu(super->next_ino),
|
||||
le64_to_cpu(super->next_trans_seq),
|
||||
le64_to_cpu(super->seq),
|
||||
le64_to_cpu(super->total_meta_blocks),
|
||||
le64_to_cpu(super->first_meta_blkno),
|
||||
le64_to_cpu(super->last_meta_blkno),
|
||||
@@ -907,18 +981,12 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
|
||||
AL_HEAD_A(&super->server_meta_avail[1]),
|
||||
AL_HEAD_A(&super->server_meta_freed[0]),
|
||||
AL_HEAD_A(&super->server_meta_freed[1]),
|
||||
super->mounted_clients.height,
|
||||
le64_to_cpu(super->mounted_clients.ref.blkno),
|
||||
le64_to_cpu(super->mounted_clients.ref.seq),
|
||||
super->srch_root.height,
|
||||
le64_to_cpu(super->srch_root.ref.blkno),
|
||||
le64_to_cpu(super->srch_root.ref.seq),
|
||||
super->trans_seqs.height,
|
||||
le64_to_cpu(super->trans_seqs.ref.blkno),
|
||||
le64_to_cpu(super->trans_seqs.ref.seq),
|
||||
super->fs_root.height,
|
||||
le64_to_cpu(super->fs_root.ref.blkno),
|
||||
le64_to_cpu(super->fs_root.ref.seq));
|
||||
BTR_ARG(&super->fs_root),
|
||||
BTR_ARG(&super->logs_root),
|
||||
BTR_ARG(&super->log_merge),
|
||||
BTR_ARG(&super->trans_seqs),
|
||||
BTR_ARG(&super->mounted_clients),
|
||||
BTR_ARG(&super->srch_root));
|
||||
|
||||
printf(" volume options:\n"
|
||||
" set_bits: %016llx\n",
|
||||
@@ -973,6 +1041,11 @@ static int print_volume(int fd)
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
err = print_btree(fd, super, "log_merge", &super->log_merge,
|
||||
print_log_merge_item, NULL);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
for (i = 0; i < array_size(super->server_meta_avail); i++) {
|
||||
snprintf(str, sizeof(str), "server_meta_avail[%u]", i);
|
||||
err = print_alloc_list_block(fd, str,
|
||||
|
||||
Reference in New Issue
Block a user