diff --git a/kmod/src/alloc.c b/kmod/src/alloc.c index 392558ef..d27c4df6 100644 --- a/kmod/src/alloc.c +++ b/kmod/src/alloc.c @@ -1526,8 +1526,8 @@ struct foreach_cb_args { void *cb_arg; }; -static int alloc_btree_extent_item_cb(struct super_block *sb, struct scoutfs_key *key, - void *val, int val_len, void *arg) +static int alloc_btree_extent_item_cb(struct super_block *sb, struct scoutfs_key *key, u64 seq, + u8 flags, void *val, int val_len, void *arg) { struct foreach_cb_args *cba = arg; struct scoutfs_extent ext; diff --git a/kmod/src/btree.c b/kmod/src/btree.c index 132edafc..c05d0b06 100644 --- a/kmod/src/btree.c +++ b/kmod/src/btree.c @@ -502,9 +502,8 @@ static __le16 insert_value(struct scoutfs_btree_block *bt, __le16 item_off, * This only consumes free space. It's safe to use references to block * structures after this call. */ -static void create_item(struct scoutfs_btree_block *bt, - struct scoutfs_key *key, void *val, unsigned val_len, - struct scoutfs_avl_node *parent, int cmp) +static void create_item(struct scoutfs_btree_block *bt, struct scoutfs_key *key, u64 seq, u8 flags, + void *val, unsigned val_len, struct scoutfs_avl_node *parent, int cmp) { struct scoutfs_btree_item *item; @@ -516,6 +515,8 @@ static void create_item(struct scoutfs_btree_block *bt, item = end_item(bt); item->key = *key; + item->seq = cpu_to_le64(seq); + item->flags = flags; scoutfs_avl_insert(&bt->item_root, parent, &item->node, cmp); leaf_item_hash_insert(bt, item_key(item), ptr_off(bt, item)); @@ -558,6 +559,8 @@ static void delete_item(struct scoutfs_btree_block *bt, /* move the final item into the deleted space */ if (end != item) { item->key = end->key; + item->seq = end->seq; + item->flags = end->flags; item->val_off = end->val_off; item->val_len = end->val_len; leaf_item_hash_change(bt, &end->key, ptr_off(bt, item), @@ -606,8 +609,8 @@ static void move_items(struct scoutfs_btree_block *dst, else next = next_item(src, from); - create_item(dst, item_key(from), item_val(src, from), - item_val_len(from), par, cmp); + create_item(dst, item_key(from), le64_to_cpu(from->seq), from->flags, + item_val(src, from), item_val_len(from), par, cmp); if (move_right) { if (par) @@ -680,7 +683,7 @@ static void create_parent_item(struct scoutfs_btree_block *parent, scoutfs_avl_search(&parent->item_root, cmp_key_item, key, &cmp, &par, NULL, NULL); - create_item(parent, key, &ref, sizeof(ref), par, cmp); + create_item(parent, key, 0, 0, &ref, sizeof(ref), par, cmp); } /* @@ -1529,7 +1532,7 @@ int scoutfs_btree_insert(struct super_block *sb, if (node) { ret = -EEXIST; } else { - create_item(bt, key, val, val_len, par, cmp); + create_item(bt, key, 0, 0, val, val_len, par, cmp); ret = 0; } } @@ -1630,7 +1633,7 @@ int scoutfs_btree_force(struct super_block *sb, } else { scoutfs_avl_search(&bt->item_root, cmp_key_item, key, &cmp, &par, NULL, NULL); - create_item(bt, key, val, val_len, par, cmp); + create_item(bt, key, 0, 0, val, val_len, par, cmp); } ret = 0; @@ -1849,8 +1852,8 @@ int scoutfs_btree_read_items(struct super_block *sb, if (scoutfs_key_compare(&item->key, end) > 0) break; - ret = cb(sb, item_key(item), item_val(bt, item), - item_val_len(item), arg); + ret = cb(sb, item_key(item), le64_to_cpu(item->seq), item->flags, + item_val(bt, item), item_val_len(item), arg); if (ret < 0) break; @@ -1870,6 +1873,10 @@ out: * This can make partial progress before returning an error, leaving * dirty btree blocks with only some of the caller's items. It's up to * the caller to resolve this. + * + * This, along with merging, are the only places that seq and flags are + * set in btree items. They're only used for fs items written through + * the item cache and forest of log btrees. */ int scoutfs_btree_insert_list(struct super_block *sb, struct scoutfs_alloc *alloc, @@ -1895,13 +1902,14 @@ int scoutfs_btree_insert_list(struct super_block *sb, do { item = leaf_item_hash_search(sb, bt, &lst->key); if (item) { - update_item_value(bt, item, lst->val, - lst->val_len); + item->seq = cpu_to_le64(lst->seq); + item->flags = lst->flags; + update_item_value(bt, item, lst->val, lst->val_len); } else { scoutfs_avl_search(&bt->item_root, cmp_key_item, &lst->key, &cmp, &par, NULL, NULL); - create_item(bt, &lst->key, lst->val, + create_item(bt, &lst->key, lst->seq, lst->flags, lst->val, lst->val_len, par, cmp); } @@ -2017,6 +2025,8 @@ struct merge_pos { struct scoutfs_btree_block *bt; struct scoutfs_avl_node *avl; struct scoutfs_key *key; + u64 seq; + u8 flags; unsigned int val_len; u8 *val; }; @@ -2035,8 +2045,7 @@ static void free_mpos(struct super_block *sb, struct merge_pos *mpos) kfree(mpos); } -static void insert_mpos(struct rb_root *pos_root, struct merge_pos *ins, - scoutfs_btree_merge_cmp_t merge_cmp) +static void insert_mpos(struct rb_root *pos_root, struct merge_pos *ins) { struct rb_node **node = &pos_root->rb_node; struct rb_node *parent = NULL; @@ -2050,7 +2059,7 @@ static void insert_mpos(struct rb_root *pos_root, struct merge_pos *ins, /* sort merge items by key then newest to oldest */ cmp = scoutfs_key_compare(ins->key, mpos->key) ?: - -merge_cmp(ins->val, ins->val_len, mpos->val, mpos->val_len); + -scoutfs_cmp(ins->seq, mpos->seq); if (cmp < 0) node = &(*node)->rb_left; @@ -2069,8 +2078,7 @@ static void insert_mpos(struct rb_root *pos_root, struct merge_pos *ins, * the mpos on error or if there are no more items in the range. */ static int reset_mpos(struct super_block *sb, struct rb_root *pos_root, struct merge_pos *mpos, - struct scoutfs_key *start, struct scoutfs_key *end, - scoutfs_btree_merge_cmp_t merge_cmp) + struct scoutfs_key *start, struct scoutfs_key *end) { struct scoutfs_btree_item *item; struct scoutfs_avl_node *next; @@ -2123,10 +2131,12 @@ static int reset_mpos(struct super_block *sb, struct rb_root *pos_root, struct m /* insert the next item within range at its version */ mpos->key = item_key(item); + mpos->seq = le64_to_cpu(item->seq); + mpos->flags = item->flags; mpos->val_len = item_val_len(item); mpos->val = item_val(mpos->bt, item); - insert_mpos(pos_root, mpos, merge_cmp); + insert_mpos(pos_root, mpos); ret = 0; out: return ret; @@ -2137,17 +2147,10 @@ out: * destination root. The order of the input roots doesn't matter, the * items are merged in sorted key order. * - * The merge_cmp callback determines the order that the input items are - * merged in. The is_del callback determines if a merging item should - * be removed from the destination. - * * subtree indicates that the destination root is in fact one of many * parent blocks and shouldn't be split or allowed to fall below the * join low water mark. * - * drop_val indicates the initial length of the value that should be - * dropped when merging items into destination items. - * * -ERANGE is returned if the merge doesn't fully exhaust the range, due * to allocators running low or needing to join/split the parent. * *next_ret is set to the next key which hasn't been merged so that the @@ -2161,9 +2164,7 @@ int scoutfs_btree_merge(struct super_block *sb, struct scoutfs_key *next_ret, struct scoutfs_btree_root *root, struct list_head *inputs, - scoutfs_btree_merge_cmp_t merge_cmp, - scoutfs_btree_merge_is_del_t merge_is_del, bool subtree, - int drop_val, int dirty_limit, int alloc_low) + bool subtree, int dirty_limit, int alloc_low) { struct scoutfs_btree_root_head *rhead; struct rb_root pos_root = RB_ROOT; @@ -2194,7 +2195,7 @@ int scoutfs_btree_merge(struct super_block *sb, RB_CLEAR_NODE(&mpos->node); mpos->root = &rhead->root; - ret = reset_mpos(sb, &pos_root, mpos, start, end, merge_cmp); + ret = reset_mpos(sb, &pos_root, mpos, start, end); if (ret < 0) goto out; } @@ -2234,19 +2235,13 @@ int scoutfs_btree_merge(struct super_block *sb, for (; mpos; mpos = first_mpos(&pos_root)) { - /* val must have at least what we need to drop */ - if (mpos->val_len < drop_val) { - ret = -EIO; - goto out; - } - /* walk to new leaf if we exceed parent ref key */ if (scoutfs_key_compare(mpos->key, &kr.end) > 0) break; /* see if there's an existing item */ item = leaf_item_hash_search(sb, bt, mpos->key); - is_del = merge_is_del(mpos->val, mpos->val_len); + is_del = !!(mpos->flags & SCOUTFS_ITEM_FLAG_DELETION); trace_scoutfs_btree_merge_items(sb, mpos->root, mpos->key, mpos->val_len, @@ -2266,17 +2261,16 @@ int scoutfs_btree_merge(struct super_block *sb, scoutfs_avl_search(&bt->item_root, cmp_key_item, mpos->key, &cmp, &par, NULL, NULL); - create_item(bt, mpos->key, - mpos->val + drop_val, - mpos->val_len - drop_val, par, cmp); + create_item(bt, mpos->key, mpos->seq, mpos->flags, + mpos->val, mpos->val_len, par, cmp); scoutfs_inc_counter(sb, btree_merge_insert); } /* update existing items */ if (item && !is_del) { - update_item_value(bt, item, - mpos->val + drop_val, - mpos->val_len - drop_val); + item->seq = cpu_to_le64(mpos->seq); + item->flags = mpos->flags; + update_item_value(bt, item, mpos->val, mpos->val_len); scoutfs_inc_counter(sb, btree_merge_update); } @@ -2300,7 +2294,7 @@ int scoutfs_btree_merge(struct super_block *sb, next = *mpos->key; scoutfs_key_inc(&next); while (mpos && scoutfs_key_compare(mpos->key, &next) < 0) { - ret = reset_mpos(sb, &pos_root, mpos, &next, end, merge_cmp); + ret = reset_mpos(sb, &pos_root, mpos, &next, end); if (ret < 0) goto out; mpos = first_mpos(&pos_root); diff --git a/kmod/src/btree.h b/kmod/src/btree.h index 3d27fec2..057aa779 100644 --- a/kmod/src/btree.h +++ b/kmod/src/btree.h @@ -20,13 +20,15 @@ struct scoutfs_btree_item_ref { /* caller gives an item to the callback */ typedef int (*scoutfs_btree_item_cb)(struct super_block *sb, - struct scoutfs_key *key, + struct scoutfs_key *key, u64 seq, u8 flags, void *val, int val_len, void *arg); /* simple singly-linked list of items */ struct scoutfs_btree_item_list { struct scoutfs_btree_item_list *next; struct scoutfs_key key; + u64 seq; + u8 flags; int val_len; u8 val[0]; }; @@ -108,14 +110,7 @@ struct scoutfs_btree_root_head { struct list_head head; struct scoutfs_btree_root root; }; -/* - * Compare the values of merge input items whose keys are equal to - * determine their merge order. - */ -typedef int (*scoutfs_btree_merge_cmp_t)(void *a_val, int a_val_len, - void *b_val, int b_val_len); -/* whether merging item should be removed from destination */ -typedef bool (*scoutfs_btree_merge_is_del_t)(void *val, int val_len); + int scoutfs_btree_merge(struct super_block *sb, struct scoutfs_alloc *alloc, struct scoutfs_block_writer *wri, @@ -124,9 +119,7 @@ int scoutfs_btree_merge(struct super_block *sb, struct scoutfs_key *next_ret, struct scoutfs_btree_root *root, struct list_head *input_list, - scoutfs_btree_merge_cmp_t merge_cmp, - scoutfs_btree_merge_is_del_t merge_is_del, bool subtree, - int drop_val, int dirty_limit, int alloc_low); + bool subtree, int dirty_limit, int alloc_low); int scoutfs_btree_free_blocks(struct super_block *sb, struct scoutfs_alloc *alloc, diff --git a/kmod/src/forest.c b/kmod/src/forest.c index 03d1c486..65cf26f0 100644 --- a/kmod/src/forest.c +++ b/kmod/src/forest.c @@ -226,20 +226,12 @@ struct forest_read_items_data { void *cb_arg; }; -static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, +static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u64 seq, u8 flags, void *val, int val_len, void *arg) { struct forest_read_items_data *rid = arg; - struct scoutfs_log_item_value _liv = {0,}; - struct scoutfs_log_item_value *liv = &_liv; - if (!rid->is_fs) { - liv = val; - val += sizeof(struct scoutfs_log_item_value); - val_len -= sizeof(struct scoutfs_log_item_value); - } - - return rid->cb(sb, key, liv, val, val_len, rid->cb_arg); + return rid->cb(sb, key, seq, flags, val, val_len, rid->cb_arg); } /* @@ -564,26 +556,6 @@ void scoutfs_forest_get_btrees(struct super_block *sb, <->bloom_ref); } -/* - * Compare input items to merge by their log item value seq when their - * keys match. - */ -static int merge_cmp(void *a_val, int a_val_len, void *b_val, int b_val_len) -{ - struct scoutfs_log_item_value *a = a_val; - struct scoutfs_log_item_value *b = b_val; - - /* sort merge item by seq */ - return scoutfs_cmp(le64_to_cpu(a->seq), le64_to_cpu(b->seq)); -} - -static bool merge_is_del(void *val, int val_len) -{ - struct scoutfs_log_item_value *liv = val; - - return !!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION); -} - #define LOG_MERGE_DELAY_MS (5 * MSEC_PER_SEC) /* @@ -673,10 +645,8 @@ static void scoutfs_forest_log_merge_worker(struct work_struct *work) } ret = scoutfs_btree_merge(sb, &alloc, &wri, &req.start, &req.end, - &next, &comp.root, &inputs, merge_cmp, - merge_is_del, + &next, &comp.root, &inputs, !!(req.flags & cpu_to_le64(SCOUTFS_LOG_MERGE_REQUEST_SUBTREE)), - sizeof(struct scoutfs_log_item_value), SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT, 10); if (ret == -ERANGE) { comp.remain = next; diff --git a/kmod/src/forest.h b/kmod/src/forest.h index 0f134e77..1d95b038 100644 --- a/kmod/src/forest.h +++ b/kmod/src/forest.h @@ -8,10 +8,8 @@ struct scoutfs_block; #include "btree.h" /* caller gives an item to the callback */ -typedef int (*scoutfs_forest_item_cb)(struct super_block *sb, - struct scoutfs_key *key, - struct scoutfs_log_item_value *liv, - void *val, int val_len, void *arg); +typedef int (*scoutfs_forest_item_cb)(struct super_block *sb, struct scoutfs_key *key, u64 seq, + u8 flags, void *val, int val_len, void *arg); int scoutfs_forest_next_hint(struct super_block *sb, struct scoutfs_key *key, struct scoutfs_key *next); diff --git a/kmod/src/format.h b/kmod/src/format.h index 1daefcb4..8d9475b7 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -244,11 +244,15 @@ struct scoutfs_btree_root { struct scoutfs_btree_item { struct scoutfs_avl_node node; struct scoutfs_key key; + __le64 seq; __le16 val_off; __le16 val_len; - __u8 __pad[4]; + __u8 flags; + __u8 __pad[3]; }; +#define SCOUTFS_ITEM_FLAG_DELETION (1 << 0) + struct scoutfs_btree_block { struct scoutfs_block_header hdr; struct scoutfs_avl_root item_root; @@ -465,21 +469,8 @@ struct scoutfs_log_trees { #define SCOUTFS_LOG_TREES_FINALIZED (1ULL << 0) -struct scoutfs_log_item_value { - __le64 seq; - __u8 flags; - __u8 __pad[7]; - __u8 data[]; -}; - -/* - * FS items are limited by the max btree value length with the log item - * value header. - */ -#define SCOUTFS_MAX_VAL_SIZE \ - (SCOUTFS_BTREE_MAX_VAL_LEN - sizeof(struct scoutfs_log_item_value)) - -#define SCOUTFS_LOG_ITEM_FLAG_DELETION (1 << 0) +/* FS items are limited by the max btree value length */ +#define SCOUTFS_MAX_VAL_SIZE SCOUTFS_BTREE_MAX_VAL_LEN struct scoutfs_bloom_block { struct scoutfs_block_header hdr; diff --git a/kmod/src/item.c b/kmod/src/item.c index 01c787cd..c05198df 100644 --- a/kmod/src/item.c +++ b/kmod/src/item.c @@ -127,7 +127,7 @@ struct cached_page { unsigned long lru_time; struct list_head dirty_list; struct list_head dirty_head; - u64 max_liv_seq; + u64 max_seq; struct page *page; unsigned int page_off; unsigned int erased_bytes; @@ -142,7 +142,7 @@ struct cached_item { deletion:1; /* negative del item for writing */ unsigned int val_len; struct scoutfs_key key; - struct scoutfs_log_item_value liv; + u64 seq; char val[0]; }; @@ -386,12 +386,10 @@ static void put_pg(struct super_block *sb, struct cached_page *pg) } } -static void update_pg_max_liv_seq(struct cached_page *pg, struct cached_item *item) +static void update_pg_max_seq(struct cached_page *pg, struct cached_item *item) { - u64 liv_seq = le64_to_cpu(item->liv.seq); - - if (liv_seq > pg->max_liv_seq) - pg->max_liv_seq = liv_seq; + if (item->seq > pg->max_seq) + pg->max_seq = item->seq; } /* @@ -401,8 +399,7 @@ static void update_pg_max_liv_seq(struct cached_page *pg, struct cached_item *it * page or checking the free space first. */ static struct cached_item *alloc_item(struct cached_page *pg, - struct scoutfs_key *key, - struct scoutfs_log_item_value *liv, + struct scoutfs_key *key, u64 seq, bool deletion, void *val, int val_len) { struct cached_item *item; @@ -417,15 +414,15 @@ static struct cached_item *alloc_item(struct cached_page *pg, INIT_LIST_HEAD(&item->dirty_head); item->dirty = 0; item->persistent = 0; - item->deletion = !!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION); + item->deletion = !!deletion; item->val_len = val_len; item->key = *key; - item->liv = *liv; + item->seq = seq; if (val_len) memcpy(item->val, val, val_len); - update_pg_max_liv_seq(pg, item); + update_pg_max_seq(pg, item); return item; } @@ -634,7 +631,7 @@ static void mark_item_dirty(struct super_block *sb, item->dirty = 1; } - update_pg_max_liv_seq(pg, item); + update_pg_max_seq(pg, item); } static void clear_item_dirty(struct super_block *sb, @@ -711,7 +708,7 @@ static void move_page_items(struct super_block *sb, if (stop && scoutfs_key_compare(&from->key, stop) >= 0) break; - to = alloc_item(right, &from->key, &from->liv, from->val, + to = alloc_item(right, &from->key, from->seq, from->deletion, from->val, from->val_len); rbtree_insert(&to->node, par, pnode, &right->item_root); par = &to->node; @@ -723,7 +720,6 @@ static void move_page_items(struct super_block *sb, } to->persistent = from->persistent; - to->deletion = from->deletion; erase_item(left, from); } @@ -1356,11 +1352,11 @@ static void del_active_reader(struct item_cache_info *cinf, struct active_reader * insert old versions of items into the tree here so that the trees * don't have to compare seqs. */ -static int read_page_item(struct super_block *sb, struct scoutfs_key *key, - struct scoutfs_log_item_value *liv, void *val, - int val_len, void *arg) +static int read_page_item(struct super_block *sb, struct scoutfs_key *key, u64 seq, u8 flags, + void *val, int val_len, void *arg) { DECLARE_ITEM_CACHE_INFO(sb, cinf); + const bool deletion = !!(flags & SCOUTFS_ITEM_FLAG_DELETION); struct rb_root *root = arg; struct cached_page *right = NULL; struct cached_page *left = NULL; @@ -1374,7 +1370,7 @@ static int read_page_item(struct super_block *sb, struct scoutfs_key *key, pg = page_rbtree_walk(sb, root, key, key, NULL, NULL, &p_par, &p_pnode); found = item_rbtree_walk(&pg->item_root, key, NULL, &par, &pnode); - if (found && (le64_to_cpu(found->liv.seq) >= le64_to_cpu(liv->seq))) + if (found && (found->seq >= seq)) return 0; if (!page_has_room(pg, val_len)) { @@ -1388,7 +1384,7 @@ static int read_page_item(struct super_block *sb, struct scoutfs_key *key, &pnode); } - item = alloc_item(pg, key, liv, val, val_len); + item = alloc_item(pg, key, seq, deletion, val, val_len); if (!item) { /* simpler split of private pages, no locking/dirty/lru */ if (!left) @@ -1411,7 +1407,7 @@ static int read_page_item(struct super_block *sb, struct scoutfs_key *key, put_pg(sb, pg); pg = scoutfs_key_compare(key, &left->end) <= 0 ? left : right; - item = alloc_item(pg, key, liv, val, val_len); + item = alloc_item(pg, key, seq, deletion, val, val_len); found = item_rbtree_walk(&pg->item_root, key, NULL, &par, &pnode); @@ -1824,11 +1820,11 @@ out: * to the last stable seq and ensure that all the items in open * transactions and granted locks will have greater seqs. */ -static __le64 item_seq(struct super_block *sb, struct scoutfs_lock *lock) +static u64 item_seq(struct super_block *sb, struct scoutfs_lock *lock) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - return cpu_to_le64(max(sbi->trans_seq, lock->write_seq)); + return max(sbi->trans_seq, lock->write_seq); } /* @@ -1863,7 +1859,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key *key, if (!item || item->deletion) { ret = -ENOENT; } else { - item->liv.seq = item_seq(sb, lock); + item->seq = item_seq(sb, lock); mark_item_dirty(sb, cinf, pg, NULL, item); ret = 0; } @@ -1883,9 +1879,7 @@ static int item_create(struct super_block *sb, struct scoutfs_key *key, int mode, bool force) { DECLARE_ITEM_CACHE_INFO(sb, cinf); - struct scoutfs_log_item_value liv = { - .seq = item_seq(sb, lock), - }; + const u64 seq = item_seq(sb, lock); struct cached_item *found; struct cached_item *item; struct cached_page *pg; @@ -1913,7 +1907,7 @@ static int item_create(struct super_block *sb, struct scoutfs_key *key, goto unlock; } - item = alloc_item(pg, key, &liv, val, val_len); + item = alloc_item(pg, key, seq, false, val, val_len); rbtree_insert(&item->node, par, pnode, &pg->item_root); mark_item_dirty(sb, cinf, pg, NULL, item); @@ -1958,9 +1952,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key, void *val, int val_len, struct scoutfs_lock *lock) { DECLARE_ITEM_CACHE_INFO(sb, cinf); - struct scoutfs_log_item_value liv = { - .seq = item_seq(sb, lock), - }; + const u64 seq = item_seq(sb, lock); struct cached_item *item; struct cached_item *found; struct cached_page *pg; @@ -1995,10 +1987,10 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key, pg->erased_bytes += item_val_bytes(found->val_len) - item_val_bytes(val_len); found->val_len = val_len; - found->liv.seq = liv.seq; + found->seq = seq; mark_item_dirty(sb, cinf, pg, NULL, found); } else { - item = alloc_item(pg, key, &liv, val, val_len); + item = alloc_item(pg, key, seq, false, val, val_len); item->persistent = found->persistent; rbtree_insert(&item->node, par, pnode, &pg->item_root); mark_item_dirty(sb, cinf, pg, NULL, item); @@ -2026,9 +2018,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key, struct scoutfs_lock *lock, int mode, bool force) { DECLARE_ITEM_CACHE_INFO(sb, cinf); - struct scoutfs_log_item_value liv = { - .seq = item_seq(sb, lock), - }; + const u64 seq = item_seq(sb, lock); struct cached_item *item; struct cached_page *pg; struct rb_node **pnode; @@ -2056,7 +2046,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key, } if (!item) { - item = alloc_item(pg, key, &liv, NULL, 0); + item = alloc_item(pg, key, seq, false, NULL, 0); rbtree_insert(&item->node, par, pnode, &pg->item_root); } @@ -2069,8 +2059,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key, erase_item(pg, item); } else { /* must emit deletion to clobber old persistent item */ - item->liv.seq = liv.seq; - item->liv.flags |= SCOUTFS_LOG_ITEM_FLAG_DELETION; + item->seq = seq; item->deletion = 1; pg->erased_bytes += item_val_bytes(item->val_len) - item_val_bytes(0); @@ -2157,16 +2146,10 @@ int scoutfs_item_write_dirty(struct super_block *sb) LIST_HEAD(pages); LIST_HEAD(pos); u64 max_seq = 0; - int val_len; int bytes; int off; int ret; - /* we're relying on struct layout to prepend item value headers */ - BUILD_BUG_ON(offsetof(struct cached_item, val) != - (offsetof(struct cached_item, liv) + - member_sizeof(struct cached_item, liv))); - if (atomic_read(&cinf->dirty_pages) == 0) return 0; @@ -2218,10 +2201,9 @@ int scoutfs_item_write_dirty(struct super_block *sb) list_sort(NULL, &pg->dirty_list, cmp_item_key); list_for_each_entry(item, &pg->dirty_list, dirty_head) { - val_len = sizeof(item->liv) + item->val_len; bytes = offsetof(struct scoutfs_btree_item_list, - val[val_len]); - max_seq = max(max_seq, le64_to_cpu(item->liv.seq)); + val[item->val_len]); + max_seq = max(max_seq, item->seq); if (off + bytes > PAGE_SIZE) { page = second; @@ -2237,8 +2219,10 @@ int scoutfs_item_write_dirty(struct super_block *sb) prev = &lst->next; lst->key = item->key; - lst->val_len = val_len; - memcpy(lst->val, &item->liv, val_len); + lst->seq = item->seq; + lst->flags = item->deletion ? SCOUTFS_ITEM_FLAG_DELETION : 0; + lst->val_len = item->val_len; + memcpy(lst->val, item->val, item->val_len); } spin_lock(&cinf->dirty_lock); @@ -2467,7 +2451,7 @@ static int item_lru_shrink(struct shrinker *shrink, list_for_each_entry_safe(pg, tmp, &cinf->lru_list, lru_head) { - if (first_reader_seq <= pg->max_liv_seq) { + if (first_reader_seq <= pg->max_seq) { scoutfs_inc_counter(sb, item_shrink_page_reader); continue; } diff --git a/utils/src/btree.c b/utils/src/btree.c index 201c47a5..5a02ca8a 100644 --- a/utils/src/btree.c +++ b/utils/src/btree.c @@ -75,6 +75,9 @@ void btree_append_item(struct scoutfs_btree_block *bt, le16_add_cpu(&bt->total_item_bytes, sizeof(struct scoutfs_btree_item)); item->key = *key; + item->seq = cpu_to_le64(1); + item->flags = 0; + leaf_item_hash_insert(bt, &item->key, cpu_to_le16((void *)item - (void *)bt)); if (val_len == 0) diff --git a/utils/src/print.c b/utils/src/print.c index 05e884b3..efcb4f85 100644 --- a/utils/src/print.c +++ b/utils/src/print.c @@ -178,15 +178,19 @@ static print_func_t find_printer(u8 zone, u8 type) return NULL; } -static int print_fs_item(struct scoutfs_key *key, void *val, +#define flag_char(val, bit, c) \ + (((val) & (bit)) ? (c) : '-') + +static int print_fs_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val, unsigned val_len, void *arg) { print_func_t printer; - printf(" "SK_FMT"\n", SK_ARG(key)); + printf(" "SK_FMT" %llu %c\n", + SK_ARG(key), seq, flag_char(flags, SCOUTFS_ITEM_FLAG_DELETION, 'd')); /* only items in leaf blocks have values */ - if (val) { + if (val != NULL && !(flags & SCOUTFS_ITEM_FLAG_DELETION)) { printer = find_printer(key->sk_zone, key->sk_type); if (printer) printer(key, val, val_len); @@ -198,37 +202,6 @@ static int print_fs_item(struct scoutfs_key *key, void *val, return 0; } -/* same as fs item but with a small header in the value */ -static int print_logs_item(struct scoutfs_key *key, void *val, - unsigned val_len, void *arg) -{ - struct scoutfs_log_item_value *liv; - print_func_t printer; - - printf(" "SK_FMT"\n", SK_ARG(key)); - - /* only items in leaf blocks have values */ - if (val) { - liv = val; - printf(" log_item_value: seq %llu flags %x\n", - le64_to_cpu(liv->seq), liv->flags); - - /* deletion items don't have values */ - if (!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION)) { - printer = find_printer(key->sk_zone, - key->sk_type); - if (printer) - printer(key, val + sizeof(*liv), - val_len - sizeof(*liv)); - else - printf(" (unknown zone %u type %u)\n", - key->sk_zone, key->sk_type); - } - } - - return 0; -} - #define BTREF_F \ "blkno %llu seq %llu" #define BTREF_A(ref) \ @@ -269,7 +242,7 @@ static int print_logs_item(struct scoutfs_key *key, void *val, le64_to_cpu((srf)->ref.seq) /* same as fs item but with a small header in the value */ -static int print_log_trees_item(struct scoutfs_key *key, void *val, +static int print_log_trees_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val, unsigned val_len, void *arg) { struct scoutfs_log_trees *lt = val; @@ -330,7 +303,7 @@ static int print_log_trees_item(struct scoutfs_key *key, void *val, return 0; } -static int print_srch_root_item(struct scoutfs_key *key, void *val, +static int print_srch_root_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val, unsigned val_len, void *arg) { struct scoutfs_srch_compact *sc; @@ -363,7 +336,7 @@ static int print_srch_root_item(struct scoutfs_key *key, void *val, return 0; } -static int print_trans_seqs_entry(struct scoutfs_key *key, void *val, +static int print_trans_seqs_entry(struct scoutfs_key *key, u64 seq, u8 flags, void *val, unsigned val_len, void *arg) { printf(" trans_seq %llu rid %016llx\n", @@ -372,7 +345,7 @@ static int print_trans_seqs_entry(struct scoutfs_key *key, void *val, return 0; } -static int print_mounted_client_entry(struct scoutfs_key *key, void *val, +static int print_mounted_client_entry(struct scoutfs_key *key, u64 seq, u8 flags, void *val, unsigned val_len, void *arg) { struct scoutfs_mounted_client_btree_val *mcv = val; @@ -387,8 +360,8 @@ static int print_mounted_client_entry(struct scoutfs_key *key, void *val, return 0; } -static int print_log_merge_item(struct scoutfs_key *key, void *val, - unsigned val_len, void *arg) +static int print_log_merge_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val, + unsigned val_len, void *arg) { struct scoutfs_log_merge_status *stat; struct scoutfs_log_merge_range *rng; @@ -451,7 +424,7 @@ static int print_log_merge_item(struct scoutfs_key *key, void *val, return 0; } -static int print_alloc_item(struct scoutfs_key *key, void *val, +static int print_alloc_item(struct scoutfs_key *key, u64 seq, u8 flags, void *val, unsigned val_len, void *arg) { if (key->sk_zone == SCOUTFS_FREE_EXTENT_BLKNO_ZONE) @@ -469,7 +442,7 @@ static int print_alloc_item(struct scoutfs_key *key, void *val, return 0; } -typedef int (*print_item_func)(struct scoutfs_key *key, void *val, +typedef int (*print_item_func)(struct scoutfs_key *key, u64 seq, u8 flags, void *val, unsigned val_len, void *arg); static int print_block_ref(struct scoutfs_key *key, void *val, @@ -477,7 +450,7 @@ static int print_block_ref(struct scoutfs_key *key, void *val, { struct scoutfs_block_ref *ref = val; - func(key, NULL, 0, arg); + func(key, 0, 0, NULL, 0, arg); printf(" ref blkno %llu seq %llu\n", le64_to_cpu(ref->blkno), le64_to_cpu(ref->seq)); @@ -586,7 +559,7 @@ static int print_btree_block(int fd, struct scoutfs_super_block *super, if (level) print_block_ref(key, val, val_len, func, arg); else - func(key, val, val_len, arg); + func(key, le64_to_cpu(item->seq), item->flags, val, val_len, arg); } free(bt); @@ -744,8 +717,8 @@ struct print_recursion_args { }; /* same as fs item but with a small header in the value */ -static int print_log_trees_roots(struct scoutfs_key *key, void *val, - unsigned val_len, void *arg) +static int print_log_trees_roots(struct scoutfs_key *key, u64 seq, u8 flags, void *val, + unsigned val_len, void *arg) { struct scoutfs_log_trees *lt = val; struct print_recursion_args *pa = arg; @@ -776,14 +749,14 @@ static int print_log_trees_roots(struct scoutfs_key *key, void *val, ret = err; err = print_btree(pa->fd, pa->super, "", <->item_root, - print_logs_item, NULL); + print_fs_item, NULL); if (err && !ret) ret = err; return ret; } -static int print_srch_root_files(struct scoutfs_key *key, void *val, +static int print_srch_root_files(struct scoutfs_key *key, u64 seq, u8 flags, void *val, unsigned val_len, void *arg) { struct print_recursion_args *pa = arg; @@ -843,7 +816,7 @@ static int print_btree_leaf_items(int fd, struct scoutfs_super_block *super, break; continue; } else { - func(key, val, val_len, arg); + func(key, le64_to_cpu(item->seq), item->flags, val, val_len, arg); } node = avl_next(&bt->item_root, node);