diff --git a/kmod/src/btree.c b/kmod/src/btree.c index 90493967..d17a1b76 100644 --- a/kmod/src/btree.c +++ b/kmod/src/btree.c @@ -56,7 +56,7 @@ * XXX * - do we want a level in the btree header? seems like we would? * - validate structures on read? - * - internal bh/pos/cmp interface is clumsy.. could use cursor + * - internal bh/pos/cmp interface is clumsy.. */ /* number of contiguous bytes used by the item header and val of given len */ @@ -121,6 +121,73 @@ static inline struct scoutfs_key *greatest_key(struct scoutfs_btree_block *bt) return &pos_item(bt, bt->nr_items - 1)->key; } +/* + * Copy as much of the item as fits in the value vector. The min of the + * value vec length and the item length is returned, including possibly + * 0. + */ +static int copy_to_val(struct scoutfs_btree_val *val, + struct scoutfs_btree_item *item) +{ + size_t val_len = le16_to_cpu(item->val_len); + char *val_ptr = item->val; + struct kvec *kv; + size_t bytes; + size_t off; + int i; + + for (i = 0, off = 0; val_len > 0 && i < ARRAY_SIZE(val->vec); i++) { + kv = &val->vec[i]; + + if (WARN_ON_ONCE(kv->iov_len && !kv->iov_base)) + return -EINVAL; + + bytes = min(val_len, kv->iov_len); + if (bytes) + memcpy(kv->iov_base, val_ptr + off, bytes); + + val_len -= bytes; + off += bytes; + } + + return off; +} + +/* + * Copy the caller's value vector into the item in the tree block. This + * is only called when the item should exactly match the value vector. + * + * -EINVAL is returned if the lengths don't match. + */ +static int copy_to_item(struct scoutfs_btree_item *item, + struct scoutfs_btree_val *val) +{ + size_t val_len = le16_to_cpu(item->val_len); + char *val_ptr = item->val; + struct kvec *kv; + size_t bytes; + int i; + + if (val_len != scoutfs_btree_val_length(val)) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(val->vec); i++) { + kv = &val->vec[i]; + + if (WARN_ON_ONCE(kv->iov_len && !kv->iov_base)) + return -EINVAL; + + bytes = min(val_len, kv->iov_len); + if (bytes) + memcpy(val_ptr, kv->iov_base, bytes); + + val_len -= bytes; + val_ptr += bytes; + } + + return 0; +} + /* * Returns the sorted item position that an item with the given key * should occupy. @@ -964,38 +1031,25 @@ static struct buffer_head *btree_walk(struct super_block *sb, return bh; } -static void set_cursor(struct scoutfs_btree_cursor *curs, - struct buffer_head *bh, unsigned int pos, bool write) -{ - struct scoutfs_btree_block *bt = bh_data(bh); - struct scoutfs_btree_item *item = pos_item(bt, pos); - - curs->bh = bh; - curs->pos = pos; - curs->write = write; - - curs->key = &item->key; - curs->seq = le64_to_cpu(item->seq); - curs->val = item->val; - curs->val_len = le16_to_cpu(item->val_len); -} - /* - * Point the caller's cursor at the item if it's found. It can't be - * modified. -ENOENT is returned if the key isn't found in the tree. + * Copy the given value identified by the given key into the caller's + * buffer. The number of bytes copied is returned, -ENOENT if the key + * wasn't found, or -errno on errors. */ int scoutfs_btree_lookup(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *key, - struct scoutfs_btree_cursor *curs) + struct scoutfs_btree_val *val) { + struct scoutfs_btree_item *item; struct scoutfs_btree_block *bt; struct buffer_head *bh; unsigned int pos; int cmp; int ret; - BUG_ON(curs->bh); + trace_printk("key "CKF" val_len %d\n", + CKA(key), scoutfs_btree_val_length(val)); bh = btree_walk(sb, root, key, NULL, 0, 0, 0); if (IS_ERR(bh)) @@ -1004,37 +1058,49 @@ int scoutfs_btree_lookup(struct super_block *sb, pos = find_pos(bt, key, &cmp); if (cmp == 0) { - set_cursor(curs, bh, pos, false); - ret = 0; + item = pos_item(bt, pos); + ret = copy_to_val(val, item); } else { - unlock_block(NULL, bh, false); - scoutfs_block_put(bh); ret = -ENOENT; } + unlock_block(NULL, bh, false); + scoutfs_block_put(bh); + + trace_printk("key "CKF" ret %d\n", CKA(key), ret); + return ret; } /* - * Insert a new item in the tree and point the caller's cursor at it. - * The caller is responsible for setting the value. + * Insert a new item in the tree. * - * -EEXIST is returned if the key is already present in the tree. + * 0 is returned on success. -EEXIST is returned if the key is already + * present in the tree. * - * XXX this walks the treap twice, which isn't great + * If no value pointer is given then the item is created with a zero + * length value. */ int scoutfs_btree_insert(struct super_block *sb, struct scoutfs_btree_root *root, - struct scoutfs_key *key, unsigned int val_len, - struct scoutfs_btree_cursor *curs) + struct scoutfs_key *key, + struct scoutfs_btree_val *val) { + struct scoutfs_btree_item *item; struct scoutfs_btree_block *bt; struct buffer_head *bh; + unsigned int val_len; int pos; int cmp; int ret; - BUG_ON(curs->bh); + if (val) + val_len = scoutfs_btree_val_length(val); + else + val_len = 0; + + if (WARN_ON_ONCE(val_len > SCOUTFS_MAX_ITEM_LEN)) + return -EINVAL; bh = btree_walk(sb, root, key, NULL, val_len, 0, WALK_INSERT); if (IS_ERR(bh)) @@ -1043,15 +1109,18 @@ int scoutfs_btree_insert(struct super_block *sb, pos = find_pos(bt, key, &cmp); if (cmp) { - create_item(bt, pos, key, val_len); - set_cursor(curs, bh, pos, true); - ret = 0; + item = create_item(bt, pos, key, val_len); + if (val) + ret = copy_to_item(item, val); + else + ret = 0; } else { - unlock_block(NULL, bh, true); - scoutfs_block_put(bh); ret = -EEXIST; } + unlock_block(NULL, bh, true); + scoutfs_block_put(bh); + return ret; } @@ -1104,48 +1173,46 @@ out: } /* - * Iterate over items in the tree starting with first and ending with - * last. We point the cursor at each item and return to the caller. - * The caller continues the search with the cursor. + * Find the next key in the tree starting from 'first', and ending at + * 'last'. 'found', 'found_seq', and 'val' are set to the discovered + * item if they're provided. * * The caller can limit results to items with a sequence number greater * than or equal to their sequence number. * - * When there isn't an item in the cursor then we walk the btree to the - * leaf that should contain the key and look for items from there. When - * we exhaust leaves we search the tree again from the next key that was - * increased past the leaf's parent's item. + * The only tricky bit is that they key we're searching for might not + * exist in the tree. We can get to the leaf and find that there are no + * greater items in the leaf. We have to search again from the keys + * greater than the parent item's keys which the walk gives us. We also + * star the search over from this next key if walking while filtering + * based on seqs terminates early. * - * Returns > 0 when the cursor has an item, 0 when done, and -errno on error. + * Returns the bytes copied into the value (0 if not provided), -ENOENT + * if there is no item past first until last, or -errno on errors. + * + * It's a common pattern to use the same key for first and found so we're + * careful to copy first before we modify found. */ static int btree_next(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *first, struct scoutfs_key *last, - u64 seq, int op, struct scoutfs_btree_cursor *curs) + u64 seq, int op, struct scoutfs_key *found, + u64 *found_seq, struct scoutfs_btree_val *val) { + struct scoutfs_btree_item *item; struct scoutfs_btree_block *bt; - struct buffer_head *bh; + struct scoutfs_key start = *first; struct scoutfs_key key = *first; struct scoutfs_key next_key; + struct buffer_head *bh; + int pos; int ret; - if (scoutfs_key_cmp(first, last) > 0) - return 0; - - /* find the next item after the cursor, releasing if we're done */ - if (curs->bh) { - bt = bh_data(curs->bh); - key = *curs->key; - scoutfs_inc_key(&key); - - curs->pos = next_pos_seq(bt, curs->pos, 0, seq, op); - if (curs->pos < bt->nr_items) - set_cursor(curs, curs->bh, curs->pos, curs->write); - else - scoutfs_btree_release(curs); - } + trace_printk("finding next first "CKF" last "CKF"\n", + CKA(&start), CKA(last)); /* find the leaf that contains the next item after the key */ - while (!curs->bh && scoutfs_key_cmp(&key, last) <= 0) { + ret = -ENOENT; + while (scoutfs_key_cmp(&key, last) <= 0) { bh = btree_walk(sb, root, &key, &next_key, 0, seq, op); @@ -1156,49 +1223,60 @@ static int btree_next(struct super_block *sb, struct scoutfs_btree_root *root, } if (IS_ERR(bh)) { - if (bh == ERR_PTR(-ENOENT)) - break; - return PTR_ERR(bh); + ret = PTR_ERR(bh); + break; } bt = bh_data(bh); /* keep trying leaves until next_key passes last */ - curs->pos = find_pos_after_seq(bt, &key, 0, seq, op); - if (curs->pos >= bt->nr_items) { + pos = find_pos_after_seq(bt, &key, 0, seq, op); + if (pos >= bt->nr_items) { key = next_key; unlock_block(NULL, bh, false); scoutfs_block_put(bh); continue; } - set_cursor(curs, bh, curs->pos, false); + item = pos_item(bt, pos); + if (scoutfs_key_cmp(&item->key, last) <= 0) { + *found = item->key; + if (found_seq) + *found_seq = le64_to_cpu(item->seq); + if (val) + ret = copy_to_val(val, item); + else + ret = 0; + } else { + ret = -ENOENT; + } + + unlock_block(NULL, bh, false); + scoutfs_block_put(bh); break; } - /* only return the next item if it's within last */ - if (curs->bh && scoutfs_key_cmp(curs->key, last) <= 0) { - ret = 1; - } else { - scoutfs_btree_release(curs); - ret = 0; - } - + trace_printk("next first "CKF" last "CKF" found "CKF" ret %d\n", + CKA(&start), CKA(last), CKA(found), ret); return ret; } int scoutfs_btree_next(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *first, struct scoutfs_key *last, - struct scoutfs_btree_cursor *curs) + struct scoutfs_key *found, + struct scoutfs_btree_val *val) { - return btree_next(sb, root, first, last, 0, WALK_NEXT, curs); + return btree_next(sb, root, first, last, 0, WALK_NEXT, + found, NULL, val); } int scoutfs_btree_since(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *first, struct scoutfs_key *last, - u64 seq, struct scoutfs_btree_cursor *curs) + u64 seq, struct scoutfs_key *found, u64 *found_seq, + struct scoutfs_btree_val *val) { - return btree_next(sb, root, first, last, seq, WALK_NEXT_SEQ, curs); + return btree_next(sb, root, first, last, seq, WALK_NEXT_SEQ, + found, found_seq, val); } /* @@ -1217,6 +1295,8 @@ int scoutfs_btree_dirty(struct super_block *sb, int cmp; int ret; + trace_printk("key "CKF"\n", CKA(key)); + bh = btree_walk(sb, root, key, NULL, 0, 0, WALK_DIRTY); if (IS_ERR(bh)) return PTR_ERR(bh); @@ -1232,17 +1312,22 @@ int scoutfs_btree_dirty(struct super_block *sb, unlock_block(NULL, bh, true); scoutfs_block_put(bh); + trace_printk("key "CKF" ret %d\n", CKA(key), ret); + return ret; } /* * This is guaranteed not to fail if the caller has already dirtied the * block that contains the item in the current transaction. + * + * 0 is returned on success. -EINVAL is returned if the caller's value + * length doesn't match the existing item's value length. */ int scoutfs_btree_update(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *key, - struct scoutfs_btree_cursor *curs) + struct scoutfs_btree_val *val) { struct scoutfs_btree_item *item; struct scoutfs_btree_block *bt; @@ -1251,8 +1336,6 @@ int scoutfs_btree_update(struct super_block *sb, int cmp; int ret; - BUG_ON(curs->bh); - bh = btree_walk(sb, root, key, NULL, 0, 0, WALK_DIRTY); if (IS_ERR(bh)) return PTR_ERR(bh); @@ -1261,59 +1344,64 @@ int scoutfs_btree_update(struct super_block *sb, pos = find_pos(bt, key, &cmp); if (cmp == 0) { item = pos_item(bt, pos); - item->seq = bt->hdr.seq; - set_cursor(curs, bh, pos, true); - ret = 0; + ret = copy_to_item(item, val); + if (ret == 0) + item->seq = bt->hdr.seq; } else { - unlock_block(NULL, bh, true); - scoutfs_block_put(bh); ret = -ENOENT; } + unlock_block(NULL, bh, true); + scoutfs_block_put(bh); + return ret; } -void scoutfs_btree_release(struct scoutfs_btree_cursor *curs) -{ - if (curs->bh) { - unlock_block(NULL, curs->bh, curs->write); - scoutfs_block_put(curs->bh); - } - curs->bh = NULL; -} - /* - * Find the first missing key between the caller's keys, inclusive. Set - * the caller's hole key and return 0 if we find a missing key. Return - * -ENOSPC if all the keys in the range were present or -errno on errors. + * Set hole to a missing key in the caller's range. * - * The caller ensures that it's safe for us to be walking this region - * of the tree. + * 0 is returned if we find a missing key, -ENOSPC is returned if all + * the keys in the range are present in the tree, and -errno is returned + * if we saw an error. + * + * We try to find the first key in the range. If the next key is past + * the first key then we return the key before the found key. This will + * tend to let us find the hole with one btree search. + * + * We keep searching as long as we keep finding the first key and will + * return -ENOSPC if we fall off the end of the range doing so. */ int scoutfs_btree_hole(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *first, struct scoutfs_key *last, struct scoutfs_key *hole) { - DECLARE_SCOUTFS_BTREE_CURSOR(curs); + struct scoutfs_key key = *first; + struct scoutfs_key found; int ret; - *hole = *first; - while ((ret = scoutfs_btree_next(sb, root, first, last, &curs)) > 0) { - /* return our expected hole if we skipped it */ - if (scoutfs_key_cmp(hole, curs.key) < 0) - break; - - *hole = *curs.key; - scoutfs_inc_key(hole); + if (WARN_ON_ONCE(scoutfs_key_cmp(first, last) > 0)) { + scoutfs_key_set_zero(hole); + return -EINVAL; } - scoutfs_btree_release(&curs); - if (ret >= 0) { - if (scoutfs_key_cmp(hole, last) <= 0) - ret = 0; - else - ret = -ENOSPC; + /* search as long as we keep finding our first key */ + do { + ret = scoutfs_btree_next(sb, root, &key, last, &found, NULL); + } while (ret == 0 && + scoutfs_key_cmp(&found, &key) == 0 && + (scoutfs_inc_key(&key), ret = -ENOSPC, + scoutfs_key_cmp(&key, last) <= 0)); + + if (ret == 0) { + *hole = found; + scoutfs_dec_key(hole); + } else if (ret == -ENOENT) { + *hole = *last; + ret = 0; } + trace_printk("first "CKF" last "CKF" hole "CKF" ret %d\n", + CKA(first), CKA(last), CKA(hole), ret); + return ret; } diff --git a/kmod/src/btree.h b/kmod/src/btree.h index a7a200fd..dc22b4c6 100644 --- a/kmod/src/btree.h +++ b/kmod/src/btree.h @@ -1,51 +1,71 @@ #ifndef _SCOUTFS_BTREE_H_ #define _SCOUTFS_BTREE_H_ -struct scoutfs_btree_cursor { - /* for btree.c */ - struct buffer_head *bh; - unsigned int pos; - bool write; +#include - /* for callers */ - struct scoutfs_key *key; - u64 seq; - void *val; - u16 val_len; +struct scoutfs_btree_val { + struct kvec vec[3]; }; -#define DECLARE_SCOUTFS_BTREE_CURSOR(name) \ - struct scoutfs_btree_cursor name = {NULL,} +static inline void __scoutfs_btree_init_val(struct scoutfs_btree_val *val, + void *ptr0, unsigned int len0, + void *ptr1, unsigned int len1, + void *ptr2, unsigned int len2) +{ + *val = (struct scoutfs_btree_val) { + { { ptr0, len0 }, { ptr1, len1 }, { ptr2, len2 } } + }; +} + +#define _scoutfs_btree_init_val(v, p0, l0, p1, l1, p2, l2, ...) \ + __scoutfs_btree_init_val(v, p0, l0, p1, l1, p2, l2) + +/* + * Provide a nice variadic initialization function without having to + * iterate over the callers arg types. We play some macro games to pad + * out the callers ptr/len pairs to the full possible number. This will + * produce confusing errors if an odd number of arguments is given and + * the padded ptr/length types aren't compatible with the fixed + * arguments in the static inline. + */ +#define scoutfs_btree_init_val(val, ...) \ + _scoutfs_btree_init_val(val, __VA_ARGS__, NULL, 0, NULL, 0, NULL, 0) + +static inline int scoutfs_btree_val_length(struct scoutfs_btree_val *val) +{ + + return iov_length((struct iovec *)val->vec, ARRAY_SIZE(val->vec)); +} int scoutfs_btree_lookup(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *key, - struct scoutfs_btree_cursor *curs); + struct scoutfs_btree_val *val); int scoutfs_btree_insert(struct super_block *sb, struct scoutfs_btree_root *root, - struct scoutfs_key *key, unsigned int val_len, - struct scoutfs_btree_cursor *curs); + struct scoutfs_key *key, + struct scoutfs_btree_val *val); int scoutfs_btree_delete(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *key); int scoutfs_btree_next(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *first, struct scoutfs_key *last, - struct scoutfs_btree_cursor *curs); + struct scoutfs_key *found, + struct scoutfs_btree_val *val); int scoutfs_btree_dirty(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *key); int scoutfs_btree_update(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *key, - struct scoutfs_btree_cursor *curs); + struct scoutfs_btree_val *val); int scoutfs_btree_hole(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *first, struct scoutfs_key *last, struct scoutfs_key *hole); int scoutfs_btree_since(struct super_block *sb, struct scoutfs_btree_root *root, struct scoutfs_key *first, struct scoutfs_key *last, - u64 seq, struct scoutfs_btree_cursor *curs); - -void scoutfs_btree_release(struct scoutfs_btree_cursor *curs); + u64 seq, struct scoutfs_key *found, u64 *found_seq, + struct scoutfs_btree_val *val); #endif diff --git a/kmod/src/dir.c b/kmod/src/dir.c index 13f51a7d..e1c90f31 100644 --- a/kmod/src/dir.c +++ b/kmod/src/dir.c @@ -112,11 +112,6 @@ static unsigned int dent_bytes(unsigned int name_len) return sizeof(struct scoutfs_dirent) + name_len; } -static unsigned int item_name_len(struct scoutfs_btree_cursor *curs) -{ - return curs->val_len - sizeof(struct scoutfs_dirent); -} - /* * Each dirent stores the values that are needed to build the keys of * the items that are removed on unlink so that we don't to search through @@ -190,13 +185,14 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct scoutfs_inode_info *si = SCOUTFS_I(dir); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); struct super_block *sb = dir->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - struct scoutfs_dirent *dent; + struct scoutfs_dirent *dent = NULL; + struct scoutfs_btree_val val; struct dentry_info *di; - struct scoutfs_key first; struct scoutfs_key last; + struct scoutfs_key key; + unsigned int item_len; unsigned int name_len; struct inode *inode; u64 ino = 0; @@ -214,29 +210,52 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry, goto out; } + item_len = offsetof(struct scoutfs_dirent, name[dentry->d_name.len]); + dent = kmalloc(item_len, GFP_KERNEL); + if (!dent) { + ret = -ENOMEM; + goto out; + } + h = name_hash(dentry->d_name.name, dentry->d_name.len, si->salt); - scoutfs_set_key(&first, scoutfs_ino(dir), SCOUTFS_DIRENT_KEY, h); + scoutfs_set_key(&key, scoutfs_ino(dir), SCOUTFS_DIRENT_KEY, h); scoutfs_set_key(&last, scoutfs_ino(dir), SCOUTFS_DIRENT_KEY, last_dirent_key_offset(h)); - while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) { + scoutfs_btree_init_val(&val, dent, item_len); - /* XXX verify */ + for (;;) { + ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + break; + } - dent = curs.val; - name_len = item_name_len(&curs); + /* XXX more verification */ + /* XXX corruption */ + if (ret <= sizeof(struct scoutfs_dirent)) { + ret = -EIO; + break; + } + + + name_len = ret - sizeof(struct scoutfs_dirent); if (scoutfs_names_equal(dentry->d_name.name, dentry->d_name.len, dent->name, name_len)) { ino = le64_to_cpu(dent->ino); - update_dentry_info(di, curs.key, dent); + update_dentry_info(di, &key, dent); + ret = 0; break; } + + scoutfs_inc_key(&key); } - scoutfs_btree_release(&curs); - out: + kfree(dent); + if (ret < 0) inode = ERR_PTR(ret); else if (ino == 0) @@ -281,26 +300,46 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir) struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); + struct scoutfs_btree_val val; struct scoutfs_dirent *dent; - struct scoutfs_key first; + struct scoutfs_key key; struct scoutfs_key last; + unsigned int item_len; unsigned int name_len; - int ret; u32 pos; + int ret; if (!dir_emit_dots(file, dirent, filldir)) return 0; - scoutfs_set_key(&first, scoutfs_ino(inode), SCOUTFS_DIRENT_KEY, + item_len = offsetof(struct scoutfs_dirent, name[SCOUTFS_NAME_LEN]); + dent = kmalloc(item_len, GFP_KERNEL); + if (!dent) + return -ENOMEM; + + scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_DIRENT_KEY, file->f_pos); scoutfs_set_key(&last, scoutfs_ino(inode), SCOUTFS_DIRENT_KEY, SCOUTFS_DIRENT_LAST_POS); - while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) { - dent = curs.val; - name_len = item_name_len(&curs); - pos = scoutfs_key_offset(curs.key); + scoutfs_btree_init_val(&val, dent, item_len); + + for (;;) { + ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + break; + } + + /* XXX corruption */ + if (ret <= sizeof(dent)) { + ret = -EIO; + break; + } + + name_len = ret - sizeof(struct scoutfs_dirent); + pos = scoutfs_key_offset(&key); if (filldir(dirent, dent->name, name_len, pos, le64_to_cpu(dent->ino), dentry_type(dent->type))) { @@ -309,10 +348,10 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir) } file->f_pos = pos + 1; + scoutfs_inc_key(&key); } - scoutfs_btree_release(&curs); - + kfree(dent); return ret; } @@ -325,22 +364,19 @@ static int update_lref_item(struct super_block *sb, struct scoutfs_key *key, u64 dir_ino, u64 dir_off, bool update) { struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); - struct scoutfs_link_backref *lref; + struct scoutfs_link_backref lref; + struct scoutfs_btree_val val; int ret; - if (update) - ret = scoutfs_btree_update(sb, meta, key, &curs); - else - ret = scoutfs_btree_insert(sb, meta, key, sizeof(*lref), &curs); + lref.ino = cpu_to_le64(dir_ino); + lref.offset = cpu_to_le64(dir_off); - /* XXX verify size */ - if (ret == 0) { - lref = curs.val; - lref->ino = cpu_to_le64(dir_ino); - lref->offset = cpu_to_le64(dir_off); - scoutfs_btree_release(&curs); - } + scoutfs_btree_init_val(&val, &lref, sizeof(lref)); + + if (update) + ret = scoutfs_btree_update(sb, meta, key, &val); + else + ret = scoutfs_btree_insert(sb, meta, key, &val); return ret; } @@ -352,8 +388,8 @@ static int add_entry_items(struct inode *dir, struct dentry *dentry, struct super_block *sb = dir->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); struct scoutfs_inode_info *si = SCOUTFS_I(dir); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); - struct scoutfs_dirent *dent; + struct scoutfs_btree_val val; + struct scoutfs_dirent dent; struct scoutfs_key first; struct scoutfs_key last; struct scoutfs_key key; @@ -390,20 +426,19 @@ static int add_entry_items(struct inode *dir, struct dentry *dentry, if (ret) goto out; - ret = scoutfs_btree_insert(sb, meta, &key, bytes, &curs); - if (ret) { + dent.ino = cpu_to_le64(scoutfs_ino(inode)); + dent.counter = lref_key.offset; + dent.type = mode_to_type(inode->i_mode); + + scoutfs_btree_init_val(&val, &dent, sizeof(dent), + (void *)dentry->d_name.name, + dentry->d_name.len); + + ret = scoutfs_btree_insert(sb, meta, &key, &val); + if (ret) scoutfs_btree_delete(sb, meta, &lref_key); - goto out; - } - - dent = curs.val; - dent->ino = cpu_to_le64(scoutfs_ino(inode)); - dent->counter = lref_key.offset; - dent->type = mode_to_type(inode->i_mode); - memcpy(dent->name, dentry->d_name.name, dentry->d_name.len); - update_dentry_info(di, &key, dent); - - scoutfs_btree_release(&curs); + else + update_dentry_info(di, &key, &dent); out: return ret; } @@ -579,11 +614,11 @@ static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd) struct inode *inode = dentry->d_inode; struct super_block *sb = inode->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); loff_t size = i_size_read(inode); - struct scoutfs_key first; - struct scoutfs_key last; + struct scoutfs_btree_val val; + struct scoutfs_key key; char *path; + int bytes; int off; int ret; int k; @@ -600,24 +635,28 @@ static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd) if (!path) return ERR_PTR(-ENOMEM); - scoutfs_set_key(&first, scoutfs_ino(inode), SCOUTFS_SYMLINK_KEY, 0); - scoutfs_set_key(&last, scoutfs_ino(inode), SCOUTFS_SYMLINK_KEY, ~0ULL); + for (off = 0, k = 0; off < size ; k++) { + scoutfs_set_key(&key, scoutfs_ino(inode), + SCOUTFS_SYMLINK_KEY, k); + bytes = min_t(int, size - off, SCOUTFS_MAX_ITEM_LEN); + scoutfs_btree_init_val(&val, path + off, bytes); - off = 0; - k = 0; - while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) { - if (scoutfs_key_offset(curs.key) != k || - off + curs.val_len > size) { + ret = scoutfs_btree_lookup(sb, meta, &key, &val); + if (ret < 0) { /* XXX corruption */ - scoutfs_btree_release(&curs); + if (ret == -ENOENT) + ret = -EIO; + break; + } + + /* XXX corruption */ + if (ret != bytes) { ret = -EIO; break; } - memcpy(path + off, curs.val, curs.val_len); - - off += curs.val_len; - k++; + off += bytes; + ret = 0; } /* XXX corruption */ @@ -661,7 +700,7 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry, { struct super_block *sb = dir->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); + struct scoutfs_btree_val val; struct inode *inode = NULL; struct scoutfs_key key; struct dentry_info *di; @@ -694,12 +733,11 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry, k); bytes = min(name_len - off, SCOUTFS_MAX_ITEM_LEN); - ret = scoutfs_btree_insert(sb, meta, &key, bytes, &curs); + scoutfs_btree_init_val(&val, (char *)symname + off, bytes); + + ret = scoutfs_btree_insert(sb, meta, &key, &val); if (ret) goto out; - - memcpy(curs.val, symname + off, bytes); - scoutfs_btree_release(&curs); } ret = add_entry_items(dir, dentry, inode); @@ -741,24 +779,22 @@ out: int scoutfs_symlink_drop(struct super_block *sb, u64 ino) { struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); - struct scoutfs_key first; - struct scoutfs_key last; struct scoutfs_key key; int ret; + int nr; + int k; - scoutfs_set_key(&first, ino, SCOUTFS_SYMLINK_KEY, 0); - scoutfs_set_key(&last, ino, SCOUTFS_SYMLINK_KEY, ~0ULL); + nr = DIV_ROUND_UP(SCOUTFS_SYMLINK_MAX_SIZE, SCOUTFS_MAX_ITEM_LEN); - while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) { - key = *curs.key; - first = *curs.key; - scoutfs_inc_key(&first); - scoutfs_btree_release(&curs); + for (k = 0; k < nr; k++) { + scoutfs_set_key(&key, ino, SCOUTFS_SYMLINK_KEY, k); ret = scoutfs_btree_delete(sb, meta, &key); - if (ret) + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; break; + } } return ret; @@ -787,9 +823,9 @@ static int add_linkref_name(struct super_block *sb, u64 *dir_ino, u64 ino, { struct scoutfs_btree_root *meta = SCOUTFS_META(sb); struct scoutfs_path_component *comp; - DECLARE_SCOUTFS_BTREE_CURSOR(curs); - struct scoutfs_link_backref *lref; - struct scoutfs_dirent *dent; + struct scoutfs_link_backref lref; + struct scoutfs_btree_val val; + struct scoutfs_dirent dent; struct inode *inode = NULL; struct scoutfs_key first; struct scoutfs_key last; @@ -807,20 +843,28 @@ retry: scoutfs_set_key(&first, ino, SCOUTFS_LINK_BACKREF_KEY, *ctr); scoutfs_set_key(&last, ino, SCOUTFS_LINK_BACKREF_KEY, ~0ULL); - ret = scoutfs_btree_next(sb, meta, &first, &last, &curs); - if (ret <= 0) - goto out; + scoutfs_btree_init_val(&val, &lref, sizeof(lref)); - lref = curs.val; - *dir_ino = le64_to_cpu(lref->ino), - off = le64_to_cpu(lref->offset); - *ctr = scoutfs_key_offset(curs.key); + ret = scoutfs_btree_next(sb, meta, &first, &last, &key, &val); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + goto out; + } + + /* XXX corruption */ + if (ret != sizeof(lref)) { + ret = -EIO; + goto out; + } + + *dir_ino = le64_to_cpu(lref.ino), + off = le64_to_cpu(lref.offset); + *ctr = scoutfs_key_offset(&key); trace_printk("ino %llu ctr %llu dir_ino %llu off %llu\n", ino, *ctr, *dir_ino, off); - scoutfs_btree_release(&curs); - /* XXX corruption, should never be key == U64_MAX */ if (*ctr == U64_MAX) { ret = -EIO; @@ -852,8 +896,10 @@ retry: } scoutfs_set_key(&key, *dir_ino, SCOUTFS_DIRENT_KEY, off); + scoutfs_btree_init_val(&val, &dent, sizeof(dent), + comp->name, SCOUTFS_NAME_LEN); - ret = scoutfs_btree_lookup(sb, meta, &key, &curs); + ret = scoutfs_btree_lookup(sb, meta, &key, &val); if (ret < 0) { /* XXX corruption, should always have dirent for backref */ if (ret == -ENOENT) @@ -861,10 +907,14 @@ retry: goto out; } - dent = curs.val; - len = item_name_len(&curs); + /* XXX corruption */ + if (ret < sizeof(dent)) { + ret = -EIO; + goto out; + } - trace_printk("dent ino %llu len %d\n", le64_to_cpu(dent->ino), len); + len = ret - sizeof(dent); + trace_printk("dent ino %llu len %d\n", le64_to_cpu(dent.ino), len); /* XXX corruption */ if (len < 1 || len > SCOUTFS_NAME_LEN) { @@ -873,18 +923,16 @@ retry: } /* XXX corruption, dirents should always match link backref */ - if (le64_to_cpu(dent->ino) != ino) { + if (le64_to_cpu(dent.ino) != ino) { ret = -EIO; goto out; } (*ctr)++; comp->len = len; - memcpy(comp->name, dent->name, len); list_add(&comp->head, list); comp = NULL; /* won't be freed */ - scoutfs_btree_release(&curs); ret = 1; out: if (inode) { diff --git a/kmod/src/filerw.c b/kmod/src/filerw.c index 75faacc0..4cbac7b3 100644 --- a/kmod/src/filerw.c +++ b/kmod/src/filerw.c @@ -203,12 +203,11 @@ static bool bmap_has_blocks(struct scoutfs_block_map *bmap) int scoutfs_truncate_block_items(struct super_block *sb, u64 ino, u64 size) { struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); - struct scoutfs_block_map *bmap; - struct scoutfs_key first; + struct scoutfs_block_map bmap; + struct scoutfs_btree_val val; struct scoutfs_key last; struct scoutfs_key key; - bool delete; + bool modified; u64 iblock; u64 blkno; int ret; @@ -217,27 +216,38 @@ int scoutfs_truncate_block_items(struct super_block *sb, u64 ino, u64 size) iblock = DIV_ROUND_UP(size, SCOUTFS_BLOCK_SIZE); i = iblock & SCOUTFS_BLOCK_MAP_MASK; - scoutfs_set_key(&first, ino, SCOUTFS_BMAP_KEY, + scoutfs_set_key(&key, ino, SCOUTFS_BMAP_KEY, iblock & ~(u64)SCOUTFS_BLOCK_MAP_MASK); scoutfs_set_key(&last, ino, SCOUTFS_BMAP_KEY, ~0ULL); trace_printk("iblock %llu i %d\n", iblock, i); - while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) { - key = *curs.key; - first = *curs.key; - scoutfs_inc_key(&first); - scoutfs_btree_release(&curs); + scoutfs_btree_init_val(&val, &bmap, sizeof(bmap)); - ret = scoutfs_btree_update(sb, meta, &key, &curs); + for (;;) { + ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + break; + } + + /* XXX corruption */ + if (ret != sizeof(bmap)) { + ret = -EIO; + break; + } + + /* XXX check bmap sanity */ + + /* make sure we can update bmap after freeing */ + ret = scoutfs_btree_dirty(sb, meta, &key); if (ret) break; - /* XXX check sanity */ - bmap = curs.val; - + modified = false; for (; i < SCOUTFS_BLOCK_MAP_COUNT; i++) { - blkno = le64_to_cpu(bmap->blkno[i]); + blkno = le64_to_cpu(bmap.blkno[i]); if (blkno == 0) continue; @@ -245,23 +255,22 @@ int scoutfs_truncate_block_items(struct super_block *sb, u64 ino, u64 size) if (ret) break; - bmap->blkno[i] = 0; + bmap.blkno[i] = 0; + modified = true; } - delete = !bmap_has_blocks(bmap); + i = 0; + + /* dirtying should have prevented these from failing */ + if (!bmap_has_blocks(&bmap)) + scoutfs_btree_delete(sb, meta, &key); + else if (modified) + scoutfs_btree_update(sb, meta, &key, &val); - scoutfs_btree_release(&curs); if (ret) break; - i = 0; - - if (delete) { - ret = scoutfs_btree_delete(sb, meta, &key); - if (ret) - break; - } - /* XXX sync transaction if it's enormous */ + scoutfs_inc_key(&key); } return ret; @@ -303,8 +312,8 @@ static int contig_mapped_blocks(struct inode *inode, u64 iblock, u64 *blkno) { struct super_block *sb = inode->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); - struct scoutfs_block_map *bmap; + struct scoutfs_btree_val val; + struct scoutfs_block_map bmap; struct scoutfs_key key; int ret; int i; @@ -312,18 +321,21 @@ static int contig_mapped_blocks(struct inode *inode, u64 iblock, u64 *blkno) *blkno = 0; set_bmap_key(&key, inode, iblock); - ret = scoutfs_btree_lookup(sb, meta, &key, &curs); - if (!ret) { - bmap = curs.val; + scoutfs_btree_init_val(&val, &bmap, sizeof(bmap)); + ret = scoutfs_btree_lookup(sb, meta, &key, &val); + if (ret == sizeof(bmap)) { i = iblock & SCOUTFS_BLOCK_MAP_MASK; - *blkno = le64_to_cpu(bmap->blkno[i]); + *blkno = le64_to_cpu(bmap.blkno[i]); - while (i < SCOUTFS_BLOCK_MAP_COUNT && bmap->blkno[i]) { + ret = 0; + while (i < SCOUTFS_BLOCK_MAP_COUNT && bmap.blkno[i]) { ret++; i++; } - scoutfs_btree_release(&curs); + } else if (ret >= 0) { + /* XXX corruption */ + ret = -EIO; } else if (ret == -ENOENT) { ret = 0; } @@ -350,8 +362,8 @@ static int map_writable_block(struct inode *inode, u64 iblock, u64 *blkno_ret) { struct super_block *sb = inode->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); - struct scoutfs_block_map *bmap; + struct scoutfs_block_map bmap; + struct scoutfs_btree_val val; struct scoutfs_key key; bool inserted = false; u64 old_blkno = 0; @@ -361,25 +373,35 @@ static int map_writable_block(struct inode *inode, u64 iblock, u64 *blkno_ret) int i; set_bmap_key(&key, inode, iblock); + scoutfs_btree_init_val(&val, &bmap, sizeof(bmap)); - /* we always need a writable block map item */ - ret = scoutfs_btree_update(sb, meta, &key, &curs); + /* see if there's an existing mapping */ + ret = scoutfs_btree_lookup(sb, meta, &key, &val); if (ret < 0 && ret != -ENOENT) goto out; - /* might need to create a new item and delete it after errors */ + /* make sure that updating the bmap item won't fail */ if (ret == -ENOENT) { - ret = scoutfs_btree_insert(sb, meta, &key, sizeof(*bmap), - &curs); - if (ret < 0) + memset(&bmap, 0, sizeof(bmap)); + ret = scoutfs_btree_insert(sb, meta, &key, &val); + if (ret) goto out; - memset(curs.val, 0, sizeof(*bmap)); inserted = true; + + } else { + /* XXX corruption */ + if (ret != sizeof(bmap)) { + ret = -EIO; + goto out; + } + + ret = scoutfs_btree_dirty(sb, meta, &key); + if (ret) + goto out; } - bmap = curs.val; i = iblock & SCOUTFS_BLOCK_MAP_MASK; - old_blkno = le64_to_cpu(bmap->blkno[i]); + old_blkno = le64_to_cpu(bmap.blkno[i]); /* * If the existing block was free in stable then its dirty in @@ -406,12 +428,16 @@ static int map_writable_block(struct inode *inode, u64 iblock, u64 *blkno_ret) goto out; } - bmap->blkno[i] = cpu_to_le64(new_blkno); + bmap.blkno[i] = cpu_to_le64(new_blkno); + + /* dirtying guarantees success */ + err = scoutfs_btree_update(sb, meta, &key, &val); + BUG_ON(err); + *blkno_ret = new_blkno; new_blkno = 0; ret = 0; out: - scoutfs_btree_release(&curs); if (ret) { if (new_blkno) return_file_block(sb, new_blkno); diff --git a/kmod/src/inode.c b/kmod/src/inode.c index 779dee62..e60d92c6 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -125,21 +125,25 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode) static int scoutfs_read_locked_inode(struct inode *inode) { - DECLARE_SCOUTFS_BTREE_CURSOR(curs); struct super_block *sb = inode->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + struct scoutfs_btree_val val; + struct scoutfs_inode sinode; struct scoutfs_key key; int ret; scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_INODE_KEY, 0); + scoutfs_btree_init_val(&val, &sinode, sizeof(sinode)); - ret = scoutfs_btree_lookup(sb, meta, &key, &curs); - if (!ret) { - load_inode(inode, curs.val); - scoutfs_btree_release(&curs); + ret = scoutfs_btree_lookup(sb, meta, &key, &val); + if (ret == sizeof(sinode)) { + load_inode(inode, &sinode); + ret = 0; + } else if (ret >= 0) { + ret = -EIO; } - return 0; + return ret; } static int scoutfs_iget_test(struct inode *inode, void *arg) @@ -252,19 +256,20 @@ int scoutfs_dirty_inode_item(struct inode *inode) */ void scoutfs_update_inode_item(struct inode *inode) { - DECLARE_SCOUTFS_BTREE_CURSOR(curs); struct super_block *sb = inode->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); + struct scoutfs_btree_val val; + struct scoutfs_inode sinode; struct scoutfs_key key; int err; scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_INODE_KEY, 0); + scoutfs_btree_init_val(&val, &sinode, sizeof(sinode)); + store_inode(&sinode, inode); - err = scoutfs_btree_update(sb, meta, &key, &curs); + err = scoutfs_btree_update(sb, meta, &key, &val); BUG_ON(err); - store_inode(curs.val, inode); - scoutfs_btree_release(&curs); trace_scoutfs_update_inode(inode); } @@ -313,8 +318,9 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, dev_t rdev) { struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); struct scoutfs_inode_info *ci; + struct scoutfs_btree_val val; + struct scoutfs_inode sinode; struct scoutfs_key key; struct inode *inode; u64 ino; @@ -341,15 +347,15 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, set_inode_ops(inode); scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_INODE_KEY, 0); + scoutfs_btree_init_val(&val, &sinode, sizeof(sinode)); + store_inode(&sinode, inode); - ret = scoutfs_btree_insert(inode->i_sb, meta, &key, - sizeof(struct scoutfs_inode), &curs); + ret = scoutfs_btree_insert(inode->i_sb, meta, &key, &val); if (ret) { iput(inode); return ERR_PTR(ret); } - scoutfs_btree_release(&curs); return inode; } @@ -359,22 +365,28 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, static void drop_inode_items(struct super_block *sb, u64 ino) { struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); - struct scoutfs_inode *sinode; + struct scoutfs_btree_val val; + struct scoutfs_inode sinode; struct scoutfs_key key; bool release = false; umode_t mode; int ret; - /* sample the inode mode */ + /* sample the inode mode, XXX don't need to copy whole thing here */ scoutfs_set_key(&key, ino, SCOUTFS_INODE_KEY, 0); - ret = scoutfs_btree_lookup(sb, meta, &key, &curs); - if (ret) + scoutfs_btree_init_val(&val, &sinode, sizeof(sinode)); + + ret = scoutfs_btree_lookup(sb, meta, &key, &val); + if (ret < 0) goto out; - sinode = curs.val; - mode = le32_to_cpu(sinode->mode); - scoutfs_btree_release(&curs); + /* XXX corruption */ + if (ret != sizeof(sinode)) { + ret = -EIO; + goto out; + } + + mode = le32_to_cpu(sinode.mode); ret = scoutfs_hold_trans(sb); if (ret) diff --git a/kmod/src/ioctl.c b/kmod/src/ioctl.c index 4f5c20e7..ff7036ae 100644 --- a/kmod/src/ioctl.c +++ b/kmod/src/ioctl.c @@ -39,9 +39,9 @@ static long scoutfs_ioc_inodes_since(struct file *file, unsigned long arg) struct scoutfs_ioctl_inodes_since args; struct scoutfs_ioctl_ino_seq __user *uiseq; struct scoutfs_ioctl_ino_seq iseq; - struct scoutfs_key first; + struct scoutfs_key key; struct scoutfs_key last; - DECLARE_SCOUTFS_BTREE_CURSOR(curs); + u64 seq; long bytes; int ret; @@ -52,34 +52,25 @@ static long scoutfs_ioc_inodes_since(struct file *file, unsigned long arg) if (args.buf_len < sizeof(iseq) || args.buf_len > INT_MAX) return -EINVAL; - scoutfs_set_key(&first, args.first_ino, SCOUTFS_INODE_KEY, 0); + scoutfs_set_key(&key, args.first_ino, SCOUTFS_INODE_KEY, 0); scoutfs_set_key(&last, args.last_ino, SCOUTFS_INODE_KEY, 0); bytes = 0; - while ((ret = scoutfs_btree_since(sb, meta, &first, &last, - args.seq, &curs)) > 0) { + for (;;) { + ret = scoutfs_btree_since(sb, meta, &key, &last, args.seq, + &key, &seq, NULL); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + break; + } - iseq.ino = scoutfs_key_inode(curs.key); - iseq.seq = curs.seq; + iseq.ino = scoutfs_key_inode(&key); + iseq.seq = seq; - /* - * We can't copy to userspace with our locks held - * because faults could try to use tree blocks that we - * have locked. If a non-faulting copy fails we release - * the cursor and try a blocking copy and pick up where - * we left off. - */ - pagefault_disable(); - ret = __copy_to_user_inatomic(uiseq, &iseq, sizeof(iseq)); - pagefault_enable(); - if (ret) { - first = *curs.key; - scoutfs_inc_key(&first); - scoutfs_btree_release(&curs); - if (copy_to_user(uiseq, &iseq, sizeof(iseq))) { - ret = -EFAULT; - break; - } + if (copy_to_user(uiseq, &iseq, sizeof(iseq))) { + ret = -EFAULT; + break; } uiseq++; @@ -88,9 +79,9 @@ static long scoutfs_ioc_inodes_since(struct file *file, unsigned long arg) ret = 0; break; } - } - scoutfs_btree_release(&curs); + scoutfs_inc_key(&key); + } if (bytes) ret = bytes; @@ -219,16 +210,14 @@ static long scoutfs_ioc_find_xattr(struct file *file, unsigned long arg, struct super_block *sb = file_inode(file)->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); struct scoutfs_ioctl_find_xattr args; - DECLARE_SCOUTFS_BTREE_CURSOR(curs); - struct scoutfs_key first; + struct scoutfs_key key; struct scoutfs_key last; char __user *ustr; u64 __user *uino; - u64 inos[32]; char *str; - int nr_inos = 0; int copied = 0; - int ret; + int ret = 0; + u64 ino; u8 type; u64 h; @@ -238,6 +227,9 @@ static long scoutfs_ioc_find_xattr(struct file *file, unsigned long arg, if (args.str_len > SCOUTFS_MAX_XATTR_LEN || args.ino_count > INT_MAX) return -EINVAL; + if (args.first_ino > args.last_ino) + return -EINVAL; + if (args.ino_count == 0) return 0; @@ -262,36 +254,27 @@ static long scoutfs_ioc_find_xattr(struct file *file, unsigned long arg, type = SCOUTFS_XATTR_VAL_HASH_KEY; } - scoutfs_set_key(&first, h, type, args.first_ino); + scoutfs_set_key(&key, h, type, args.first_ino); scoutfs_set_key(&last, h, type, args.last_ino); while (copied < args.ino_count) { - while ((ret = scoutfs_btree_next(sb, meta, &first, &last, - &curs)) > 0) { - inos[nr_inos++] = scoutfs_key_offset(curs.key); - - first = *curs.key; - scoutfs_inc_key(&first); - - if (nr_inos == ARRAY_SIZE(inos) || - (nr_inos + copied) == args.ino_count) { - scoutfs_btree_release(&curs); + ret = scoutfs_btree_next(sb, meta, &key, &last, &key, NULL); + if (ret < 0) { + if (ret == -ENOENT) ret = 0; - break; - } - } - if (ret < 0 || nr_inos == 0) break; + } - if (copy_to_user(uino, inos, nr_inos * sizeof(u64))) { + ino = scoutfs_key_offset(&key); + if (put_user(ino, uino)) { ret = -EFAULT; break; } - uino += nr_inos; - copied += nr_inos; - nr_inos = 0; + uino++; + copied++; + scoutfs_inc_key(&key); } out: diff --git a/kmod/src/xattr.c b/kmod/src/xattr.c index 97ad19d7..abfc6a8c 100644 --- a/kmod/src/xattr.c +++ b/kmod/src/xattr.c @@ -115,30 +115,49 @@ static int search_xattr_items(struct inode *inode, const char *name, { struct super_block *sb = inode->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); - struct scoutfs_key first; - struct scoutfs_key last; + struct scoutfs_btree_val val; struct scoutfs_xattr *xat; + struct scoutfs_key last; + struct scoutfs_key key; + unsigned int max_len; int ret; - set_xattr_keys(inode, &first, &last, name, name_len); + max_len = xat_bytes(SCOUTFS_MAX_XATTR_LEN, SCOUTFS_MAX_XATTR_LEN), + xat = kmalloc(max_len, GFP_KERNEL); + if (!xat) + return -ENOMEM; + + set_xattr_keys(inode, &key, &last, name, name_len); + scoutfs_btree_init_val(&val, xat, max_len); res->found = false; res->other_coll = false; res->found_hole = false; - res->hole_key = first; + res->hole_key = key; - while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) { - xat = curs.val; + for (;;) { + ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + break; + } + + /* XXX corruption */ + if (ret < sizeof(struct scoutfs_xattr) || + ret != xat_bytes(xat->name_len, xat->value_len)) { + ret = -EIO; + break; + } /* found a hole when we skip past next expected key */ if (!res->found_hole && - scoutfs_key_cmp(&res->hole_key, curs.key) < 0) + scoutfs_key_cmp(&res->hole_key, &key) < 0) res->found_hole = true; - /* keep searching for a hole past this cursor key */ + /* keep searching for a hole past this key */ if (!res->found_hole) { - res->hole_key = *curs.key; + res->hole_key = key; scoutfs_inc_key(&res->hole_key); } @@ -147,7 +166,7 @@ static int search_xattr_items(struct inode *inode, const char *name, scoutfs_names_equal(name, name_len, xat->name, xat->name_len)) { res->found = true; - res->key = *curs.key; + res->key = key; res->val_hash = scoutfs_name_hash(xat_value(xat), xat->value_len); } else { @@ -157,11 +176,13 @@ static int search_xattr_items(struct inode *inode, const char *name, /* finished once we have all the caller needs */ if (res->found && res->other_coll && res->found_hole) { ret = 0; - scoutfs_btree_release(&curs); break; } + + scoutfs_inc_key(&key); } + kfree(xat); return ret; } @@ -178,56 +199,55 @@ static int insert_xattr(struct inode *inode, const char *name, { struct super_block *sb = inode->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); bool inserted_name_hash_item = false; - __le64 * __packed refcount; + struct scoutfs_btree_val val; + __le64 refcount; struct scoutfs_key name_key; struct scoutfs_key val_key; - struct scoutfs_xattr *xat; + struct scoutfs_xattr xat; int ret; + /* insert the main xattr item */ set_name_val_keys(&name_key, &val_key, key, val_hash); + scoutfs_btree_init_val(&val, &xat, sizeof(xat), (void *)name, name_len, + (void *)value, size); - ret = scoutfs_btree_insert(sb, meta, key, - xat_bytes(name_len, size), &curs); + xat.name_len = name_len; + xat.value_len = size; + + ret = scoutfs_btree_insert(sb, meta, key, &val); if (ret) return ret; - xat = curs.val; - xat->name_len = name_len; - xat->value_len = size; - memcpy(xat->name, name, name_len); - memcpy(xat_value(xat), value, size); - - scoutfs_btree_release(&curs); - /* insert the name hash item for find_xattr if we're first */ if (!other_coll) { - ret = scoutfs_btree_insert(sb, meta, &name_key, 0, &curs); + ret = scoutfs_btree_insert(sb, meta, &name_key, NULL); /* XXX eexist would be corruption */ if (ret) goto out; - scoutfs_btree_release(&curs); inserted_name_hash_item = true; } /* increment the val hash item for find_xattr, inserting if first */ - ret = scoutfs_btree_update(sb, meta, &val_key, &curs); - if (ret == -ENOENT) { - ret = scoutfs_btree_insert(sb, meta, &val_key, - sizeof(*refcount), &curs); - if (ret == 0) { - /* XXX test sane item size */ - refcount = curs.val; - *refcount = 0; - } - } - if (ret == 0) { - refcount = curs.val; - le64_add_cpu(refcount, 1); - scoutfs_btree_release(&curs); - } + scoutfs_btree_init_val(&val, &refcount, sizeof(refcount)); + ret = scoutfs_btree_lookup(sb, meta, &val_key, &val); + if (ret < 0 && ret != -ENOENT) + goto out; + + if (ret == -ENOENT) { + refcount = cpu_to_le64(1); + ret = scoutfs_btree_insert(sb, meta, &val_key, &val); + } else { + /* XXX corruption */ + if (ret != sizeof(refcount)) { + ret = -EIO; + goto out; + } + + le64_add_cpu(&refcount, 1); + ret = scoutfs_btree_update(sb, meta, &val_key, &val); + } out: if (ret) { scoutfs_btree_delete(sb, meta, key); @@ -247,15 +267,29 @@ static int delete_xattr(struct super_block *sb, struct scoutfs_key *key, bool other_coll, u64 val_hash) { struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); + struct scoutfs_btree_val val; struct scoutfs_key name_key; struct scoutfs_key val_key; - __le64 * __packed refcount; - bool del_val = false; + __le64 refcount; int ret; set_name_val_keys(&name_key, &val_key, key, val_hash); + /* update the val_hash refcount, making sure it's not nonsense */ + scoutfs_btree_init_val(&val, &refcount, sizeof(refcount)); + ret = scoutfs_btree_lookup(sb, meta, &val_key, &val); + if (ret < 0) + goto out; + + /* XXX corruption */ + if (ret != sizeof(refcount)) { + ret = -EIO; + goto out; + } + + le64_add_cpu(&refcount, -1ULL); + + /* ensure that we can update and delete name_ and val_ keys */ if (!other_coll) { ret = scoutfs_btree_dirty(sb, meta, &name_key); if (ret) @@ -272,14 +306,9 @@ static int delete_xattr(struct super_block *sb, struct scoutfs_key *key, if (!other_coll) scoutfs_btree_delete(sb, meta, &name_key); - scoutfs_btree_update(sb, meta, &val_key, &curs); - refcount = curs.val; - le64_add_cpu(refcount, -1ULL); - if (*refcount == 0) - del_val = true; - scoutfs_btree_release(&curs); - - if (del_val) + if (refcount) + scoutfs_btree_update(sb, meta, &val_key, &val); + else scoutfs_btree_delete(sb, meta, &val_key); ret = 0; out: @@ -295,6 +324,11 @@ static int unknown_prefix(const char *name) return strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN); } +/* + * Look up an xattr matching the given name. We walk our xattr items stored + * at the hashed name. We'll only be able to copy out a value that fits + * in the callers buffer. + */ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer, size_t size) { @@ -302,27 +336,49 @@ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer, struct super_block *sb = inode->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); struct scoutfs_inode_info *si = SCOUTFS_I(inode); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); size_t name_len = strlen(name); + struct scoutfs_btree_val val; struct scoutfs_xattr *xat; - struct scoutfs_key first; + struct scoutfs_key key; struct scoutfs_key last; + unsigned int item_len; int ret; if (unknown_prefix(name)) return -EOPNOTSUPP; - set_xattr_keys(inode, &first, &last, name, name_len); + /* make sure we don't allocate an enormous item */ + if (name_len > SCOUTFS_MAX_XATTR_LEN) + return -ENODATA; + size = min_t(size_t, size, SCOUTFS_MAX_XATTR_LEN); + + item_len = xat_bytes(name_len, size); + xat = kmalloc(item_len, GFP_KERNEL); + if (!xat) + return -ENOMEM; + + set_xattr_keys(inode, &key, &last, name, name_len); + scoutfs_btree_init_val(&val, xat, item_len); down_read(&si->xattr_rwsem); - ret = -ENODATA; - while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) { - xat = curs.val; + for (;;) { + ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val); + if (ret < 0) { + if (ret == -ENOENT) + ret = -ENODATA; + break; + } + + /* XXX corruption */ + if (ret < sizeof(struct scoutfs_xattr)) { + ret = -EIO; + break; + } if (!scoutfs_names_equal(name, name_len, xat->name, xat->name_len)) { - ret = -ENODATA; + scoutfs_inc_key(&key); continue; } @@ -333,12 +389,12 @@ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer, else ret = -ERANGE; } - scoutfs_btree_release(&curs); break; } up_read(&si->xattr_rwsem); + kfree(xat); return ret; } @@ -458,39 +514,60 @@ ssize_t scoutfs_listxattr(struct dentry *dentry, char *buffer, size_t size) struct scoutfs_inode_info *si = SCOUTFS_I(inode); struct super_block *sb = inode->i_sb; struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); + struct scoutfs_btree_val val; struct scoutfs_xattr *xat; - struct scoutfs_key first; + struct scoutfs_key key; struct scoutfs_key last; + unsigned int item_len; ssize_t total; int ret; - scoutfs_set_key(&first, scoutfs_ino(inode), SCOUTFS_XATTR_KEY, 0); + item_len = xat_bytes(SCOUTFS_MAX_XATTR_LEN, 0); + xat = kmalloc(item_len, GFP_KERNEL); + if (!xat) + return -ENOMEM; + + scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_XATTR_KEY, 0); scoutfs_set_key(&last, scoutfs_ino(inode), SCOUTFS_XATTR_KEY, ~0ULL); + scoutfs_btree_init_val(&val, xat, item_len); down_read(&si->xattr_rwsem); total = 0; - while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) { - xat = curs.val; - - total += xat->name_len + 1; - if (!size) - continue; - if (!buffer || total > size) { - ret = -ERANGE; + for (;;) { + ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; break; } - memcpy(buffer, xat->name, xat->name_len); - buffer += xat->name_len; - *(buffer++) = '\0'; + /* XXX corruption */ + if (ret < sizeof(struct scoutfs_xattr)) { + ret = -EIO; + break; + } + + total += xat->name_len + 1; + + if (size) { + if (!buffer || total > size) { + ret = -ERANGE; + break; + } + + memcpy(buffer, xat->name, xat->name_len); + buffer += xat->name_len; + *(buffer++) = '\0'; + } + + scoutfs_inc_key(&key); } - scoutfs_btree_release(&curs); - up_read(&si->xattr_rwsem); + kfree(xat); + return ret < 0 ? ret : total; } @@ -504,60 +581,66 @@ ssize_t scoutfs_listxattr(struct dentry *dentry, char *buffer, size_t size) * * Hash items can be shared amongst xattrs whose names or values hash to * the same hash value. We don't bother trying to remove the hash items - * as the last xattr is removed. We remove it the first chance we get, - * try to avoid obviously removing the same hash item next, and allow + * as the last xattr is removed. We always try to remove them and allow * failure when we try to remove a hash item that wasn't found. */ int scoutfs_xattr_drop(struct super_block *sb, u64 ino) { struct scoutfs_btree_root *meta = SCOUTFS_META(sb); - DECLARE_SCOUTFS_BTREE_CURSOR(curs); + struct scoutfs_btree_val val; struct scoutfs_xattr *xat; - struct scoutfs_key first; struct scoutfs_key last; struct scoutfs_key key; struct scoutfs_key name_key; struct scoutfs_key val_key; - __le64 last_name; - __le64 last_val; + unsigned int item_len; u64 val_hash; - bool have_last; int ret; - scoutfs_set_key(&first, ino, SCOUTFS_XATTR_KEY, 0); + scoutfs_set_key(&key, ino, SCOUTFS_XATTR_KEY, 0); scoutfs_set_key(&last, ino, SCOUTFS_XATTR_KEY, ~0ULL); - have_last = false; - while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) { - xat = curs.val; - key = *curs.key; + item_len = xat_bytes(SCOUTFS_MAX_XATTR_LEN, SCOUTFS_MAX_XATTR_LEN), + xat = kmalloc(item_len, GFP_KERNEL); + if (!xat) + return -ENOMEM; + + scoutfs_btree_init_val(&val, xat, item_len); + + for (;;) { + ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val); + if (ret < 0) { + if (ret == -ENOENT) + ret = 0; + break; + } + + /* XXX corruption */ + if (ret < sizeof(struct scoutfs_xattr) || + ret != xat_bytes(xat->name_len, xat->value_len)) { + ret = -EIO; + break; + } + val_hash = scoutfs_name_hash(xat_value(xat), xat->value_len); set_name_val_keys(&name_key, &val_key, &key, val_hash); - first = *curs.key; - scoutfs_inc_key(&first); - scoutfs_btree_release(&curs); + ret = scoutfs_btree_delete(sb, meta, &name_key); + if (ret && ret != -ENOENT) + break; - if (!have_last || last_name != name_key.inode) { - ret = scoutfs_btree_delete(sb, meta, &name_key); - if (ret && ret != -ENOENT) - break; - last_name = name_key.inode; - } - - if (!have_last || last_val != val_key.inode) { - ret = scoutfs_btree_delete(sb, meta, &val_key); - if (ret && ret != -ENOENT) - break; - last_val = val_key.inode; - } - - have_last = true; + ret = scoutfs_btree_delete(sb, meta, &val_key); + if (ret && ret != -ENOENT) + break; ret = scoutfs_btree_delete(sb, meta, &key); if (ret && ret != -ENOENT) break; + + scoutfs_inc_key(&key); } + kfree(xat); + return ret; }