mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-05 11:45:09 +00:00
Add seq and flags to btree items
The fs log btrees have values that start with a header that stores the item's seq and flags. There's a lot of sketchy code that manipulates the value header as items are passed around. This adds the seq and flags as core item fields in the btree. They're only set by the interfaces that are used to store fs items: _insert_list and _merge. The rest of the btree items that use the main interface don't work with the fields. This was done to help delta items discover when logged items have been merged before the finalized lob btrees are deleted and the code ends up being quite a bit cleaner. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -1526,8 +1526,8 @@ struct foreach_cb_args {
|
||||
void *cb_arg;
|
||||
};
|
||||
|
||||
static int alloc_btree_extent_item_cb(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, void *arg)
|
||||
static int alloc_btree_extent_item_cb(struct super_block *sb, struct scoutfs_key *key, u64 seq,
|
||||
u8 flags, void *val, int val_len, void *arg)
|
||||
{
|
||||
struct foreach_cb_args *cba = arg;
|
||||
struct scoutfs_extent ext;
|
||||
|
||||
@@ -502,9 +502,8 @@ static __le16 insert_value(struct scoutfs_btree_block *bt, __le16 item_off,
|
||||
* This only consumes free space. It's safe to use references to block
|
||||
* structures after this call.
|
||||
*/
|
||||
static void create_item(struct scoutfs_btree_block *bt,
|
||||
struct scoutfs_key *key, void *val, unsigned val_len,
|
||||
struct scoutfs_avl_node *parent, int cmp)
|
||||
static void create_item(struct scoutfs_btree_block *bt, struct scoutfs_key *key, u64 seq, u8 flags,
|
||||
void *val, unsigned val_len, struct scoutfs_avl_node *parent, int cmp)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
|
||||
@@ -516,6 +515,8 @@ static void create_item(struct scoutfs_btree_block *bt,
|
||||
item = end_item(bt);
|
||||
|
||||
item->key = *key;
|
||||
item->seq = cpu_to_le64(seq);
|
||||
item->flags = flags;
|
||||
|
||||
scoutfs_avl_insert(&bt->item_root, parent, &item->node, cmp);
|
||||
leaf_item_hash_insert(bt, item_key(item), ptr_off(bt, item));
|
||||
@@ -558,6 +559,8 @@ static void delete_item(struct scoutfs_btree_block *bt,
|
||||
/* move the final item into the deleted space */
|
||||
if (end != item) {
|
||||
item->key = end->key;
|
||||
item->seq = end->seq;
|
||||
item->flags = end->flags;
|
||||
item->val_off = end->val_off;
|
||||
item->val_len = end->val_len;
|
||||
leaf_item_hash_change(bt, &end->key, ptr_off(bt, item),
|
||||
@@ -606,8 +609,8 @@ static void move_items(struct scoutfs_btree_block *dst,
|
||||
else
|
||||
next = next_item(src, from);
|
||||
|
||||
create_item(dst, item_key(from), item_val(src, from),
|
||||
item_val_len(from), par, cmp);
|
||||
create_item(dst, item_key(from), le64_to_cpu(from->seq), from->flags,
|
||||
item_val(src, from), item_val_len(from), par, cmp);
|
||||
|
||||
if (move_right) {
|
||||
if (par)
|
||||
@@ -680,7 +683,7 @@ static void create_parent_item(struct scoutfs_btree_block *parent,
|
||||
|
||||
scoutfs_avl_search(&parent->item_root, cmp_key_item, key, &cmp, &par,
|
||||
NULL, NULL);
|
||||
create_item(parent, key, &ref, sizeof(ref), par, cmp);
|
||||
create_item(parent, key, 0, 0, &ref, sizeof(ref), par, cmp);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1529,7 +1532,7 @@ int scoutfs_btree_insert(struct super_block *sb,
|
||||
if (node) {
|
||||
ret = -EEXIST;
|
||||
} else {
|
||||
create_item(bt, key, val, val_len, par, cmp);
|
||||
create_item(bt, key, 0, 0, val, val_len, par, cmp);
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
@@ -1630,7 +1633,7 @@ int scoutfs_btree_force(struct super_block *sb,
|
||||
} else {
|
||||
scoutfs_avl_search(&bt->item_root, cmp_key_item, key,
|
||||
&cmp, &par, NULL, NULL);
|
||||
create_item(bt, key, val, val_len, par, cmp);
|
||||
create_item(bt, key, 0, 0, val, val_len, par, cmp);
|
||||
}
|
||||
ret = 0;
|
||||
|
||||
@@ -1849,8 +1852,8 @@ int scoutfs_btree_read_items(struct super_block *sb,
|
||||
if (scoutfs_key_compare(&item->key, end) > 0)
|
||||
break;
|
||||
|
||||
ret = cb(sb, item_key(item), item_val(bt, item),
|
||||
item_val_len(item), arg);
|
||||
ret = cb(sb, item_key(item), le64_to_cpu(item->seq), item->flags,
|
||||
item_val(bt, item), item_val_len(item), arg);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
@@ -1870,6 +1873,10 @@ out:
|
||||
* This can make partial progress before returning an error, leaving
|
||||
* dirty btree blocks with only some of the caller's items. It's up to
|
||||
* the caller to resolve this.
|
||||
*
|
||||
* This, along with merging, are the only places that seq and flags are
|
||||
* set in btree items. They're only used for fs items written through
|
||||
* the item cache and forest of log btrees.
|
||||
*/
|
||||
int scoutfs_btree_insert_list(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
@@ -1895,13 +1902,14 @@ int scoutfs_btree_insert_list(struct super_block *sb,
|
||||
do {
|
||||
item = leaf_item_hash_search(sb, bt, &lst->key);
|
||||
if (item) {
|
||||
update_item_value(bt, item, lst->val,
|
||||
lst->val_len);
|
||||
item->seq = cpu_to_le64(lst->seq);
|
||||
item->flags = lst->flags;
|
||||
update_item_value(bt, item, lst->val, lst->val_len);
|
||||
} else {
|
||||
scoutfs_avl_search(&bt->item_root,
|
||||
cmp_key_item, &lst->key,
|
||||
&cmp, &par, NULL, NULL);
|
||||
create_item(bt, &lst->key, lst->val,
|
||||
create_item(bt, &lst->key, lst->seq, lst->flags, lst->val,
|
||||
lst->val_len, par, cmp);
|
||||
}
|
||||
|
||||
@@ -2017,6 +2025,8 @@ struct merge_pos {
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_avl_node *avl;
|
||||
struct scoutfs_key *key;
|
||||
u64 seq;
|
||||
u8 flags;
|
||||
unsigned int val_len;
|
||||
u8 *val;
|
||||
};
|
||||
@@ -2035,8 +2045,7 @@ static void free_mpos(struct super_block *sb, struct merge_pos *mpos)
|
||||
kfree(mpos);
|
||||
}
|
||||
|
||||
static void insert_mpos(struct rb_root *pos_root, struct merge_pos *ins,
|
||||
scoutfs_btree_merge_cmp_t merge_cmp)
|
||||
static void insert_mpos(struct rb_root *pos_root, struct merge_pos *ins)
|
||||
{
|
||||
struct rb_node **node = &pos_root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
@@ -2050,7 +2059,7 @@ static void insert_mpos(struct rb_root *pos_root, struct merge_pos *ins,
|
||||
|
||||
/* sort merge items by key then newest to oldest */
|
||||
cmp = scoutfs_key_compare(ins->key, mpos->key) ?:
|
||||
-merge_cmp(ins->val, ins->val_len, mpos->val, mpos->val_len);
|
||||
-scoutfs_cmp(ins->seq, mpos->seq);
|
||||
|
||||
if (cmp < 0)
|
||||
node = &(*node)->rb_left;
|
||||
@@ -2069,8 +2078,7 @@ static void insert_mpos(struct rb_root *pos_root, struct merge_pos *ins,
|
||||
* the mpos on error or if there are no more items in the range.
|
||||
*/
|
||||
static int reset_mpos(struct super_block *sb, struct rb_root *pos_root, struct merge_pos *mpos,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
scoutfs_btree_merge_cmp_t merge_cmp)
|
||||
struct scoutfs_key *start, struct scoutfs_key *end)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_avl_node *next;
|
||||
@@ -2123,10 +2131,12 @@ static int reset_mpos(struct super_block *sb, struct rb_root *pos_root, struct m
|
||||
|
||||
/* insert the next item within range at its version */
|
||||
mpos->key = item_key(item);
|
||||
mpos->seq = le64_to_cpu(item->seq);
|
||||
mpos->flags = item->flags;
|
||||
mpos->val_len = item_val_len(item);
|
||||
mpos->val = item_val(mpos->bt, item);
|
||||
|
||||
insert_mpos(pos_root, mpos, merge_cmp);
|
||||
insert_mpos(pos_root, mpos);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
@@ -2137,17 +2147,10 @@ out:
|
||||
* destination root. The order of the input roots doesn't matter, the
|
||||
* items are merged in sorted key order.
|
||||
*
|
||||
* The merge_cmp callback determines the order that the input items are
|
||||
* merged in. The is_del callback determines if a merging item should
|
||||
* be removed from the destination.
|
||||
*
|
||||
* subtree indicates that the destination root is in fact one of many
|
||||
* parent blocks and shouldn't be split or allowed to fall below the
|
||||
* join low water mark.
|
||||
*
|
||||
* drop_val indicates the initial length of the value that should be
|
||||
* dropped when merging items into destination items.
|
||||
*
|
||||
* -ERANGE is returned if the merge doesn't fully exhaust the range, due
|
||||
* to allocators running low or needing to join/split the parent.
|
||||
* *next_ret is set to the next key which hasn't been merged so that the
|
||||
@@ -2161,9 +2164,7 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
struct scoutfs_key *next_ret,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct list_head *inputs,
|
||||
scoutfs_btree_merge_cmp_t merge_cmp,
|
||||
scoutfs_btree_merge_is_del_t merge_is_del, bool subtree,
|
||||
int drop_val, int dirty_limit, int alloc_low)
|
||||
bool subtree, int dirty_limit, int alloc_low)
|
||||
{
|
||||
struct scoutfs_btree_root_head *rhead;
|
||||
struct rb_root pos_root = RB_ROOT;
|
||||
@@ -2194,7 +2195,7 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
RB_CLEAR_NODE(&mpos->node);
|
||||
mpos->root = &rhead->root;
|
||||
|
||||
ret = reset_mpos(sb, &pos_root, mpos, start, end, merge_cmp);
|
||||
ret = reset_mpos(sb, &pos_root, mpos, start, end);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
@@ -2234,19 +2235,13 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
|
||||
for (; mpos; mpos = first_mpos(&pos_root)) {
|
||||
|
||||
/* val must have at least what we need to drop */
|
||||
if (mpos->val_len < drop_val) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* walk to new leaf if we exceed parent ref key */
|
||||
if (scoutfs_key_compare(mpos->key, &kr.end) > 0)
|
||||
break;
|
||||
|
||||
/* see if there's an existing item */
|
||||
item = leaf_item_hash_search(sb, bt, mpos->key);
|
||||
is_del = merge_is_del(mpos->val, mpos->val_len);
|
||||
is_del = !!(mpos->flags & SCOUTFS_ITEM_FLAG_DELETION);
|
||||
|
||||
trace_scoutfs_btree_merge_items(sb, mpos->root,
|
||||
mpos->key, mpos->val_len,
|
||||
@@ -2266,17 +2261,16 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
scoutfs_avl_search(&bt->item_root,
|
||||
cmp_key_item, mpos->key,
|
||||
&cmp, &par, NULL, NULL);
|
||||
create_item(bt, mpos->key,
|
||||
mpos->val + drop_val,
|
||||
mpos->val_len - drop_val, par, cmp);
|
||||
create_item(bt, mpos->key, mpos->seq, mpos->flags,
|
||||
mpos->val, mpos->val_len, par, cmp);
|
||||
scoutfs_inc_counter(sb, btree_merge_insert);
|
||||
}
|
||||
|
||||
/* update existing items */
|
||||
if (item && !is_del) {
|
||||
update_item_value(bt, item,
|
||||
mpos->val + drop_val,
|
||||
mpos->val_len - drop_val);
|
||||
item->seq = cpu_to_le64(mpos->seq);
|
||||
item->flags = mpos->flags;
|
||||
update_item_value(bt, item, mpos->val, mpos->val_len);
|
||||
scoutfs_inc_counter(sb, btree_merge_update);
|
||||
}
|
||||
|
||||
@@ -2300,7 +2294,7 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
next = *mpos->key;
|
||||
scoutfs_key_inc(&next);
|
||||
while (mpos && scoutfs_key_compare(mpos->key, &next) < 0) {
|
||||
ret = reset_mpos(sb, &pos_root, mpos, &next, end, merge_cmp);
|
||||
ret = reset_mpos(sb, &pos_root, mpos, &next, end);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
mpos = first_mpos(&pos_root);
|
||||
|
||||
@@ -20,13 +20,15 @@ struct scoutfs_btree_item_ref {
|
||||
|
||||
/* caller gives an item to the callback */
|
||||
typedef int (*scoutfs_btree_item_cb)(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *key, u64 seq, u8 flags,
|
||||
void *val, int val_len, void *arg);
|
||||
|
||||
/* simple singly-linked list of items */
|
||||
struct scoutfs_btree_item_list {
|
||||
struct scoutfs_btree_item_list *next;
|
||||
struct scoutfs_key key;
|
||||
u64 seq;
|
||||
u8 flags;
|
||||
int val_len;
|
||||
u8 val[0];
|
||||
};
|
||||
@@ -108,14 +110,7 @@ struct scoutfs_btree_root_head {
|
||||
struct list_head head;
|
||||
struct scoutfs_btree_root root;
|
||||
};
|
||||
/*
|
||||
* Compare the values of merge input items whose keys are equal to
|
||||
* determine their merge order.
|
||||
*/
|
||||
typedef int (*scoutfs_btree_merge_cmp_t)(void *a_val, int a_val_len,
|
||||
void *b_val, int b_val_len);
|
||||
/* whether merging item should be removed from destination */
|
||||
typedef bool (*scoutfs_btree_merge_is_del_t)(void *val, int val_len);
|
||||
|
||||
int scoutfs_btree_merge(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
@@ -124,9 +119,7 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
struct scoutfs_key *next_ret,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct list_head *input_list,
|
||||
scoutfs_btree_merge_cmp_t merge_cmp,
|
||||
scoutfs_btree_merge_is_del_t merge_is_del, bool subtree,
|
||||
int drop_val, int dirty_limit, int alloc_low);
|
||||
bool subtree, int dirty_limit, int alloc_low);
|
||||
|
||||
int scoutfs_btree_free_blocks(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
|
||||
@@ -226,20 +226,12 @@ struct forest_read_items_data {
|
||||
void *cb_arg;
|
||||
};
|
||||
|
||||
static int forest_read_items(struct super_block *sb, struct scoutfs_key *key,
|
||||
static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u64 seq, u8 flags,
|
||||
void *val, int val_len, void *arg)
|
||||
{
|
||||
struct forest_read_items_data *rid = arg;
|
||||
struct scoutfs_log_item_value _liv = {0,};
|
||||
struct scoutfs_log_item_value *liv = &_liv;
|
||||
|
||||
if (!rid->is_fs) {
|
||||
liv = val;
|
||||
val += sizeof(struct scoutfs_log_item_value);
|
||||
val_len -= sizeof(struct scoutfs_log_item_value);
|
||||
}
|
||||
|
||||
return rid->cb(sb, key, liv, val, val_len, rid->cb_arg);
|
||||
return rid->cb(sb, key, seq, flags, val, val_len, rid->cb_arg);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -564,26 +556,6 @@ void scoutfs_forest_get_btrees(struct super_block *sb,
|
||||
<->bloom_ref);
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare input items to merge by their log item value seq when their
|
||||
* keys match.
|
||||
*/
|
||||
static int merge_cmp(void *a_val, int a_val_len, void *b_val, int b_val_len)
|
||||
{
|
||||
struct scoutfs_log_item_value *a = a_val;
|
||||
struct scoutfs_log_item_value *b = b_val;
|
||||
|
||||
/* sort merge item by seq */
|
||||
return scoutfs_cmp(le64_to_cpu(a->seq), le64_to_cpu(b->seq));
|
||||
}
|
||||
|
||||
static bool merge_is_del(void *val, int val_len)
|
||||
{
|
||||
struct scoutfs_log_item_value *liv = val;
|
||||
|
||||
return !!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION);
|
||||
}
|
||||
|
||||
#define LOG_MERGE_DELAY_MS (5 * MSEC_PER_SEC)
|
||||
|
||||
/*
|
||||
@@ -673,10 +645,8 @@ static void scoutfs_forest_log_merge_worker(struct work_struct *work)
|
||||
}
|
||||
|
||||
ret = scoutfs_btree_merge(sb, &alloc, &wri, &req.start, &req.end,
|
||||
&next, &comp.root, &inputs, merge_cmp,
|
||||
merge_is_del,
|
||||
&next, &comp.root, &inputs,
|
||||
!!(req.flags & cpu_to_le64(SCOUTFS_LOG_MERGE_REQUEST_SUBTREE)),
|
||||
sizeof(struct scoutfs_log_item_value),
|
||||
SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT, 10);
|
||||
if (ret == -ERANGE) {
|
||||
comp.remain = next;
|
||||
|
||||
@@ -8,10 +8,8 @@ struct scoutfs_block;
|
||||
#include "btree.h"
|
||||
|
||||
/* caller gives an item to the callback */
|
||||
typedef int (*scoutfs_forest_item_cb)(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_log_item_value *liv,
|
||||
void *val, int val_len, void *arg);
|
||||
typedef int (*scoutfs_forest_item_cb)(struct super_block *sb, struct scoutfs_key *key, u64 seq,
|
||||
u8 flags, void *val, int val_len, void *arg);
|
||||
|
||||
int scoutfs_forest_next_hint(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *next);
|
||||
|
||||
@@ -244,11 +244,15 @@ struct scoutfs_btree_root {
|
||||
struct scoutfs_btree_item {
|
||||
struct scoutfs_avl_node node;
|
||||
struct scoutfs_key key;
|
||||
__le64 seq;
|
||||
__le16 val_off;
|
||||
__le16 val_len;
|
||||
__u8 __pad[4];
|
||||
__u8 flags;
|
||||
__u8 __pad[3];
|
||||
};
|
||||
|
||||
#define SCOUTFS_ITEM_FLAG_DELETION (1 << 0)
|
||||
|
||||
struct scoutfs_btree_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
struct scoutfs_avl_root item_root;
|
||||
@@ -465,21 +469,8 @@ struct scoutfs_log_trees {
|
||||
|
||||
#define SCOUTFS_LOG_TREES_FINALIZED (1ULL << 0)
|
||||
|
||||
struct scoutfs_log_item_value {
|
||||
__le64 seq;
|
||||
__u8 flags;
|
||||
__u8 __pad[7];
|
||||
__u8 data[];
|
||||
};
|
||||
|
||||
/*
|
||||
* FS items are limited by the max btree value length with the log item
|
||||
* value header.
|
||||
*/
|
||||
#define SCOUTFS_MAX_VAL_SIZE \
|
||||
(SCOUTFS_BTREE_MAX_VAL_LEN - sizeof(struct scoutfs_log_item_value))
|
||||
|
||||
#define SCOUTFS_LOG_ITEM_FLAG_DELETION (1 << 0)
|
||||
/* FS items are limited by the max btree value length */
|
||||
#define SCOUTFS_MAX_VAL_SIZE SCOUTFS_BTREE_MAX_VAL_LEN
|
||||
|
||||
struct scoutfs_bloom_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
|
||||
@@ -127,7 +127,7 @@ struct cached_page {
|
||||
unsigned long lru_time;
|
||||
struct list_head dirty_list;
|
||||
struct list_head dirty_head;
|
||||
u64 max_liv_seq;
|
||||
u64 max_seq;
|
||||
struct page *page;
|
||||
unsigned int page_off;
|
||||
unsigned int erased_bytes;
|
||||
@@ -142,7 +142,7 @@ struct cached_item {
|
||||
deletion:1; /* negative del item for writing */
|
||||
unsigned int val_len;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_log_item_value liv;
|
||||
u64 seq;
|
||||
char val[0];
|
||||
};
|
||||
|
||||
@@ -386,12 +386,10 @@ static void put_pg(struct super_block *sb, struct cached_page *pg)
|
||||
}
|
||||
}
|
||||
|
||||
static void update_pg_max_liv_seq(struct cached_page *pg, struct cached_item *item)
|
||||
static void update_pg_max_seq(struct cached_page *pg, struct cached_item *item)
|
||||
{
|
||||
u64 liv_seq = le64_to_cpu(item->liv.seq);
|
||||
|
||||
if (liv_seq > pg->max_liv_seq)
|
||||
pg->max_liv_seq = liv_seq;
|
||||
if (item->seq > pg->max_seq)
|
||||
pg->max_seq = item->seq;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -401,8 +399,7 @@ static void update_pg_max_liv_seq(struct cached_page *pg, struct cached_item *it
|
||||
* page or checking the free space first.
|
||||
*/
|
||||
static struct cached_item *alloc_item(struct cached_page *pg,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_log_item_value *liv,
|
||||
struct scoutfs_key *key, u64 seq, bool deletion,
|
||||
void *val, int val_len)
|
||||
{
|
||||
struct cached_item *item;
|
||||
@@ -417,15 +414,15 @@ static struct cached_item *alloc_item(struct cached_page *pg,
|
||||
INIT_LIST_HEAD(&item->dirty_head);
|
||||
item->dirty = 0;
|
||||
item->persistent = 0;
|
||||
item->deletion = !!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION);
|
||||
item->deletion = !!deletion;
|
||||
item->val_len = val_len;
|
||||
item->key = *key;
|
||||
item->liv = *liv;
|
||||
item->seq = seq;
|
||||
|
||||
if (val_len)
|
||||
memcpy(item->val, val, val_len);
|
||||
|
||||
update_pg_max_liv_seq(pg, item);
|
||||
update_pg_max_seq(pg, item);
|
||||
|
||||
return item;
|
||||
}
|
||||
@@ -634,7 +631,7 @@ static void mark_item_dirty(struct super_block *sb,
|
||||
item->dirty = 1;
|
||||
}
|
||||
|
||||
update_pg_max_liv_seq(pg, item);
|
||||
update_pg_max_seq(pg, item);
|
||||
}
|
||||
|
||||
static void clear_item_dirty(struct super_block *sb,
|
||||
@@ -711,7 +708,7 @@ static void move_page_items(struct super_block *sb,
|
||||
if (stop && scoutfs_key_compare(&from->key, stop) >= 0)
|
||||
break;
|
||||
|
||||
to = alloc_item(right, &from->key, &from->liv, from->val,
|
||||
to = alloc_item(right, &from->key, from->seq, from->deletion, from->val,
|
||||
from->val_len);
|
||||
rbtree_insert(&to->node, par, pnode, &right->item_root);
|
||||
par = &to->node;
|
||||
@@ -723,7 +720,6 @@ static void move_page_items(struct super_block *sb,
|
||||
}
|
||||
|
||||
to->persistent = from->persistent;
|
||||
to->deletion = from->deletion;
|
||||
|
||||
erase_item(left, from);
|
||||
}
|
||||
@@ -1356,11 +1352,11 @@ static void del_active_reader(struct item_cache_info *cinf, struct active_reader
|
||||
* insert old versions of items into the tree here so that the trees
|
||||
* don't have to compare seqs.
|
||||
*/
|
||||
static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_log_item_value *liv, void *val,
|
||||
int val_len, void *arg)
|
||||
static int read_page_item(struct super_block *sb, struct scoutfs_key *key, u64 seq, u8 flags,
|
||||
void *val, int val_len, void *arg)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
const bool deletion = !!(flags & SCOUTFS_ITEM_FLAG_DELETION);
|
||||
struct rb_root *root = arg;
|
||||
struct cached_page *right = NULL;
|
||||
struct cached_page *left = NULL;
|
||||
@@ -1374,7 +1370,7 @@ static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
pg = page_rbtree_walk(sb, root, key, key, NULL, NULL, &p_par, &p_pnode);
|
||||
found = item_rbtree_walk(&pg->item_root, key, NULL, &par, &pnode);
|
||||
if (found && (le64_to_cpu(found->liv.seq) >= le64_to_cpu(liv->seq)))
|
||||
if (found && (found->seq >= seq))
|
||||
return 0;
|
||||
|
||||
if (!page_has_room(pg, val_len)) {
|
||||
@@ -1388,7 +1384,7 @@ static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
&pnode);
|
||||
}
|
||||
|
||||
item = alloc_item(pg, key, liv, val, val_len);
|
||||
item = alloc_item(pg, key, seq, deletion, val, val_len);
|
||||
if (!item) {
|
||||
/* simpler split of private pages, no locking/dirty/lru */
|
||||
if (!left)
|
||||
@@ -1411,7 +1407,7 @@ static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
put_pg(sb, pg);
|
||||
|
||||
pg = scoutfs_key_compare(key, &left->end) <= 0 ? left : right;
|
||||
item = alloc_item(pg, key, liv, val, val_len);
|
||||
item = alloc_item(pg, key, seq, deletion, val, val_len);
|
||||
found = item_rbtree_walk(&pg->item_root, key, NULL, &par,
|
||||
&pnode);
|
||||
|
||||
@@ -1824,11 +1820,11 @@ out:
|
||||
* to the last stable seq and ensure that all the items in open
|
||||
* transactions and granted locks will have greater seqs.
|
||||
*/
|
||||
static __le64 item_seq(struct super_block *sb, struct scoutfs_lock *lock)
|
||||
static u64 item_seq(struct super_block *sb, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
return cpu_to_le64(max(sbi->trans_seq, lock->write_seq));
|
||||
return max(sbi->trans_seq, lock->write_seq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1863,7 +1859,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key *key,
|
||||
if (!item || item->deletion) {
|
||||
ret = -ENOENT;
|
||||
} else {
|
||||
item->liv.seq = item_seq(sb, lock);
|
||||
item->seq = item_seq(sb, lock);
|
||||
mark_item_dirty(sb, cinf, pg, NULL, item);
|
||||
ret = 0;
|
||||
}
|
||||
@@ -1883,9 +1879,7 @@ static int item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
int mode, bool force)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct scoutfs_log_item_value liv = {
|
||||
.seq = item_seq(sb, lock),
|
||||
};
|
||||
const u64 seq = item_seq(sb, lock);
|
||||
struct cached_item *found;
|
||||
struct cached_item *item;
|
||||
struct cached_page *pg;
|
||||
@@ -1913,7 +1907,7 @@ static int item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
item = alloc_item(pg, key, &liv, val, val_len);
|
||||
item = alloc_item(pg, key, seq, false, val, val_len);
|
||||
rbtree_insert(&item->node, par, pnode, &pg->item_root);
|
||||
mark_item_dirty(sb, cinf, pg, NULL, item);
|
||||
|
||||
@@ -1958,9 +1952,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct scoutfs_log_item_value liv = {
|
||||
.seq = item_seq(sb, lock),
|
||||
};
|
||||
const u64 seq = item_seq(sb, lock);
|
||||
struct cached_item *item;
|
||||
struct cached_item *found;
|
||||
struct cached_page *pg;
|
||||
@@ -1995,10 +1987,10 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
pg->erased_bytes += item_val_bytes(found->val_len) -
|
||||
item_val_bytes(val_len);
|
||||
found->val_len = val_len;
|
||||
found->liv.seq = liv.seq;
|
||||
found->seq = seq;
|
||||
mark_item_dirty(sb, cinf, pg, NULL, found);
|
||||
} else {
|
||||
item = alloc_item(pg, key, &liv, val, val_len);
|
||||
item = alloc_item(pg, key, seq, false, val, val_len);
|
||||
item->persistent = found->persistent;
|
||||
rbtree_insert(&item->node, par, pnode, &pg->item_root);
|
||||
mark_item_dirty(sb, cinf, pg, NULL, item);
|
||||
@@ -2026,9 +2018,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock, int mode, bool force)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct scoutfs_log_item_value liv = {
|
||||
.seq = item_seq(sb, lock),
|
||||
};
|
||||
const u64 seq = item_seq(sb, lock);
|
||||
struct cached_item *item;
|
||||
struct cached_page *pg;
|
||||
struct rb_node **pnode;
|
||||
@@ -2056,7 +2046,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
}
|
||||
|
||||
if (!item) {
|
||||
item = alloc_item(pg, key, &liv, NULL, 0);
|
||||
item = alloc_item(pg, key, seq, false, NULL, 0);
|
||||
rbtree_insert(&item->node, par, pnode, &pg->item_root);
|
||||
}
|
||||
|
||||
@@ -2069,8 +2059,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
erase_item(pg, item);
|
||||
} else {
|
||||
/* must emit deletion to clobber old persistent item */
|
||||
item->liv.seq = liv.seq;
|
||||
item->liv.flags |= SCOUTFS_LOG_ITEM_FLAG_DELETION;
|
||||
item->seq = seq;
|
||||
item->deletion = 1;
|
||||
pg->erased_bytes += item_val_bytes(item->val_len) -
|
||||
item_val_bytes(0);
|
||||
@@ -2157,16 +2146,10 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
LIST_HEAD(pages);
|
||||
LIST_HEAD(pos);
|
||||
u64 max_seq = 0;
|
||||
int val_len;
|
||||
int bytes;
|
||||
int off;
|
||||
int ret;
|
||||
|
||||
/* we're relying on struct layout to prepend item value headers */
|
||||
BUILD_BUG_ON(offsetof(struct cached_item, val) !=
|
||||
(offsetof(struct cached_item, liv) +
|
||||
member_sizeof(struct cached_item, liv)));
|
||||
|
||||
if (atomic_read(&cinf->dirty_pages) == 0)
|
||||
return 0;
|
||||
|
||||
@@ -2218,10 +2201,9 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
list_sort(NULL, &pg->dirty_list, cmp_item_key);
|
||||
|
||||
list_for_each_entry(item, &pg->dirty_list, dirty_head) {
|
||||
val_len = sizeof(item->liv) + item->val_len;
|
||||
bytes = offsetof(struct scoutfs_btree_item_list,
|
||||
val[val_len]);
|
||||
max_seq = max(max_seq, le64_to_cpu(item->liv.seq));
|
||||
val[item->val_len]);
|
||||
max_seq = max(max_seq, item->seq);
|
||||
|
||||
if (off + bytes > PAGE_SIZE) {
|
||||
page = second;
|
||||
@@ -2237,8 +2219,10 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
prev = &lst->next;
|
||||
|
||||
lst->key = item->key;
|
||||
lst->val_len = val_len;
|
||||
memcpy(lst->val, &item->liv, val_len);
|
||||
lst->seq = item->seq;
|
||||
lst->flags = item->deletion ? SCOUTFS_ITEM_FLAG_DELETION : 0;
|
||||
lst->val_len = item->val_len;
|
||||
memcpy(lst->val, item->val, item->val_len);
|
||||
}
|
||||
|
||||
spin_lock(&cinf->dirty_lock);
|
||||
@@ -2467,7 +2451,7 @@ static int item_lru_shrink(struct shrinker *shrink,
|
||||
|
||||
list_for_each_entry_safe(pg, tmp, &cinf->lru_list, lru_head) {
|
||||
|
||||
if (first_reader_seq <= pg->max_liv_seq) {
|
||||
if (first_reader_seq <= pg->max_seq) {
|
||||
scoutfs_inc_counter(sb, item_shrink_page_reader);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -75,6 +75,9 @@ void btree_append_item(struct scoutfs_btree_block *bt,
|
||||
le16_add_cpu(&bt->total_item_bytes, sizeof(struct scoutfs_btree_item));
|
||||
|
||||
item->key = *key;
|
||||
item->seq = cpu_to_le64(1);
|
||||
item->flags = 0;
|
||||
|
||||
leaf_item_hash_insert(bt, &item->key,
|
||||
cpu_to_le16((void *)item - (void *)bt));
|
||||
if (val_len == 0)
|
||||
|
||||
@@ -178,15 +178,19 @@ static print_func_t find_printer(u8 zone, u8 type)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int print_fs_item(struct scoutfs_key *key, void *val,
|
||||
#define flag_char(val, bit, c) \
|
||||
(((val) & (bit)) ? (c) : '-')
|
||||
|
||||
static int print_fs_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
print_func_t printer;
|
||||
|
||||
printf(" "SK_FMT"\n", SK_ARG(key));
|
||||
printf(" "SK_FMT" %llu %c\n",
|
||||
SK_ARG(key), seq, flag_char(flags, SCOUTFS_ITEM_FLAG_DELETION, 'd'));
|
||||
|
||||
/* only items in leaf blocks have values */
|
||||
if (val) {
|
||||
if (val != NULL && !(flags & SCOUTFS_ITEM_FLAG_DELETION)) {
|
||||
printer = find_printer(key->sk_zone, key->sk_type);
|
||||
if (printer)
|
||||
printer(key, val, val_len);
|
||||
@@ -198,37 +202,6 @@ static int print_fs_item(struct scoutfs_key *key, void *val,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* same as fs item but with a small header in the value */
|
||||
static int print_logs_item(struct scoutfs_key *key, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
struct scoutfs_log_item_value *liv;
|
||||
print_func_t printer;
|
||||
|
||||
printf(" "SK_FMT"\n", SK_ARG(key));
|
||||
|
||||
/* only items in leaf blocks have values */
|
||||
if (val) {
|
||||
liv = val;
|
||||
printf(" log_item_value: seq %llu flags %x\n",
|
||||
le64_to_cpu(liv->seq), liv->flags);
|
||||
|
||||
/* deletion items don't have values */
|
||||
if (!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION)) {
|
||||
printer = find_printer(key->sk_zone,
|
||||
key->sk_type);
|
||||
if (printer)
|
||||
printer(key, val + sizeof(*liv),
|
||||
val_len - sizeof(*liv));
|
||||
else
|
||||
printf(" (unknown zone %u type %u)\n",
|
||||
key->sk_zone, key->sk_type);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define BTREF_F \
|
||||
"blkno %llu seq %llu"
|
||||
#define BTREF_A(ref) \
|
||||
@@ -269,7 +242,7 @@ static int print_logs_item(struct scoutfs_key *key, void *val,
|
||||
le64_to_cpu((srf)->ref.seq)
|
||||
|
||||
/* same as fs item but with a small header in the value */
|
||||
static int print_log_trees_item(struct scoutfs_key *key, void *val,
|
||||
static int print_log_trees_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
struct scoutfs_log_trees *lt = val;
|
||||
@@ -330,7 +303,7 @@ static int print_log_trees_item(struct scoutfs_key *key, void *val,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_srch_root_item(struct scoutfs_key *key, void *val,
|
||||
static int print_srch_root_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
struct scoutfs_srch_compact *sc;
|
||||
@@ -363,7 +336,7 @@ static int print_srch_root_item(struct scoutfs_key *key, void *val,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_trans_seqs_entry(struct scoutfs_key *key, void *val,
|
||||
static int print_trans_seqs_entry(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
printf(" trans_seq %llu rid %016llx\n",
|
||||
@@ -372,7 +345,7 @@ static int print_trans_seqs_entry(struct scoutfs_key *key, void *val,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_mounted_client_entry(struct scoutfs_key *key, void *val,
|
||||
static int print_mounted_client_entry(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
struct scoutfs_mounted_client_btree_val *mcv = val;
|
||||
@@ -387,8 +360,8 @@ static int print_mounted_client_entry(struct scoutfs_key *key, void *val,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_log_merge_item(struct scoutfs_key *key, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
static int print_log_merge_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
struct scoutfs_log_merge_status *stat;
|
||||
struct scoutfs_log_merge_range *rng;
|
||||
@@ -451,7 +424,7 @@ static int print_log_merge_item(struct scoutfs_key *key, void *val,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int print_alloc_item(struct scoutfs_key *key, void *val,
|
||||
static int print_alloc_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
if (key->sk_zone == SCOUTFS_FREE_EXTENT_BLKNO_ZONE)
|
||||
@@ -469,7 +442,7 @@ static int print_alloc_item(struct scoutfs_key *key, void *val,
|
||||
return 0;
|
||||
}
|
||||
|
||||
typedef int (*print_item_func)(struct scoutfs_key *key, void *val,
|
||||
typedef int (*print_item_func)(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
|
||||
unsigned val_len, void *arg);
|
||||
|
||||
static int print_block_ref(struct scoutfs_key *key, void *val,
|
||||
@@ -477,7 +450,7 @@ static int print_block_ref(struct scoutfs_key *key, void *val,
|
||||
{
|
||||
struct scoutfs_block_ref *ref = val;
|
||||
|
||||
func(key, NULL, 0, arg);
|
||||
func(key, 0, 0, NULL, 0, arg);
|
||||
printf(" ref blkno %llu seq %llu\n",
|
||||
le64_to_cpu(ref->blkno), le64_to_cpu(ref->seq));
|
||||
|
||||
@@ -586,7 +559,7 @@ static int print_btree_block(int fd, struct scoutfs_super_block *super,
|
||||
if (level)
|
||||
print_block_ref(key, val, val_len, func, arg);
|
||||
else
|
||||
func(key, val, val_len, arg);
|
||||
func(key, le64_to_cpu(item->seq), item->flags, val, val_len, arg);
|
||||
}
|
||||
|
||||
free(bt);
|
||||
@@ -744,8 +717,8 @@ struct print_recursion_args {
|
||||
};
|
||||
|
||||
/* same as fs item but with a small header in the value */
|
||||
static int print_log_trees_roots(struct scoutfs_key *key, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
static int print_log_trees_roots(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
struct scoutfs_log_trees *lt = val;
|
||||
struct print_recursion_args *pa = arg;
|
||||
@@ -776,14 +749,14 @@ static int print_log_trees_roots(struct scoutfs_key *key, void *val,
|
||||
ret = err;
|
||||
|
||||
err = print_btree(pa->fd, pa->super, "", <->item_root,
|
||||
print_logs_item, NULL);
|
||||
print_fs_item, NULL);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int print_srch_root_files(struct scoutfs_key *key, void *val,
|
||||
static int print_srch_root_files(struct scoutfs_key *key, u64 seq, u8 flags, void *val,
|
||||
unsigned val_len, void *arg)
|
||||
{
|
||||
struct print_recursion_args *pa = arg;
|
||||
@@ -843,7 +816,7 @@ static int print_btree_leaf_items(int fd, struct scoutfs_super_block *super,
|
||||
break;
|
||||
continue;
|
||||
} else {
|
||||
func(key, val, val_len, arg);
|
||||
func(key, le64_to_cpu(item->seq), item->flags, val, val_len, arg);
|
||||
}
|
||||
|
||||
node = avl_next(&bt->item_root, node);
|
||||
|
||||
Reference in New Issue
Block a user