mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-04 11:24:21 +00:00
scoutfs: remove btree cursor
The btree cursor was built to address two problems. First it accelerates iteration by avoiding full descents down the tree by holding on to leaf blocks. Second it lets callers reference item value contents directly to avoid copies. But it also has serious complexity costs. It pushes refcounting and locking out to the caller. There have already been a few bugs where callers did things while holding the cursor without realizing that they're holding a btree lock and can't perform certain btree operations or even copies to user space. Future changes to the allocator to use the btree motivates cleaning up the tree locking which is complicated by the cursor being a stand alone lock reference. Instead of continuing to layer complexity onto this construct let's remove it. The iteration acceleration will be addressed the same way we're going to accelerate the other btree operations: with per-cpu cached leaf block references. Unlike the cursor this doesn't push interface changes out to callers who want repeated btree calls to perform well. We'll leave the value copying for now. If it becomes an issue we can add variants that call a function to operate on the value. Let's hope we don't have to go there. This change replaces the cursor with a vector to memory that the value should be copied to and from. The vector has a fixed number of elements and is wrapped in a struct for easy declaration and initialization. This change to the interface looks noisy but each caller's change is pretty mechanical. They tend to involve: - replace the cursor with the value struct and initialization - allocate some memory to copy the value in to - reading functions return the number of value bytes copied - verify copied bytes makes sense for item being read - getting rid of confusing ((ret = _next())) looping - _next now returns -ENOENT instead of 0 for no next item - _next iterators now need to increase the key themselves - make sure to free allocated mem Sometimes the order of operations changes significantly. Now that we can't modify in place we need to read, modify, write. This looks like changing a modification of the item through the cursor to a lookup/update pattern. The symlink item iterators didn't need to use next because they walk a contiguous set of keys. They're changed to use simple insert or lookup. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
324
kmod/src/btree.c
324
kmod/src/btree.c
@@ -56,7 +56,7 @@
|
||||
* XXX
|
||||
* - do we want a level in the btree header? seems like we would?
|
||||
* - validate structures on read?
|
||||
* - internal bh/pos/cmp interface is clumsy.. could use cursor
|
||||
* - internal bh/pos/cmp interface is clumsy..
|
||||
*/
|
||||
|
||||
/* number of contiguous bytes used by the item header and val of given len */
|
||||
@@ -121,6 +121,73 @@ static inline struct scoutfs_key *greatest_key(struct scoutfs_btree_block *bt)
|
||||
return &pos_item(bt, bt->nr_items - 1)->key;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy as much of the item as fits in the value vector. The min of the
|
||||
* value vec length and the item length is returned, including possibly
|
||||
* 0.
|
||||
*/
|
||||
static int copy_to_val(struct scoutfs_btree_val *val,
|
||||
struct scoutfs_btree_item *item)
|
||||
{
|
||||
size_t val_len = le16_to_cpu(item->val_len);
|
||||
char *val_ptr = item->val;
|
||||
struct kvec *kv;
|
||||
size_t bytes;
|
||||
size_t off;
|
||||
int i;
|
||||
|
||||
for (i = 0, off = 0; val_len > 0 && i < ARRAY_SIZE(val->vec); i++) {
|
||||
kv = &val->vec[i];
|
||||
|
||||
if (WARN_ON_ONCE(kv->iov_len && !kv->iov_base))
|
||||
return -EINVAL;
|
||||
|
||||
bytes = min(val_len, kv->iov_len);
|
||||
if (bytes)
|
||||
memcpy(kv->iov_base, val_ptr + off, bytes);
|
||||
|
||||
val_len -= bytes;
|
||||
off += bytes;
|
||||
}
|
||||
|
||||
return off;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the caller's value vector into the item in the tree block. This
|
||||
* is only called when the item should exactly match the value vector.
|
||||
*
|
||||
* -EINVAL is returned if the lengths don't match.
|
||||
*/
|
||||
static int copy_to_item(struct scoutfs_btree_item *item,
|
||||
struct scoutfs_btree_val *val)
|
||||
{
|
||||
size_t val_len = le16_to_cpu(item->val_len);
|
||||
char *val_ptr = item->val;
|
||||
struct kvec *kv;
|
||||
size_t bytes;
|
||||
int i;
|
||||
|
||||
if (val_len != scoutfs_btree_val_length(val))
|
||||
return -EINVAL;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(val->vec); i++) {
|
||||
kv = &val->vec[i];
|
||||
|
||||
if (WARN_ON_ONCE(kv->iov_len && !kv->iov_base))
|
||||
return -EINVAL;
|
||||
|
||||
bytes = min(val_len, kv->iov_len);
|
||||
if (bytes)
|
||||
memcpy(val_ptr, kv->iov_base, bytes);
|
||||
|
||||
val_len -= bytes;
|
||||
val_ptr += bytes;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the sorted item position that an item with the given key
|
||||
* should occupy.
|
||||
@@ -964,38 +1031,25 @@ static struct buffer_head *btree_walk(struct super_block *sb,
|
||||
return bh;
|
||||
}
|
||||
|
||||
static void set_cursor(struct scoutfs_btree_cursor *curs,
|
||||
struct buffer_head *bh, unsigned int pos, bool write)
|
||||
{
|
||||
struct scoutfs_btree_block *bt = bh_data(bh);
|
||||
struct scoutfs_btree_item *item = pos_item(bt, pos);
|
||||
|
||||
curs->bh = bh;
|
||||
curs->pos = pos;
|
||||
curs->write = write;
|
||||
|
||||
curs->key = &item->key;
|
||||
curs->seq = le64_to_cpu(item->seq);
|
||||
curs->val = item->val;
|
||||
curs->val_len = le16_to_cpu(item->val_len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Point the caller's cursor at the item if it's found. It can't be
|
||||
* modified. -ENOENT is returned if the key isn't found in the tree.
|
||||
* Copy the given value identified by the given key into the caller's
|
||||
* buffer. The number of bytes copied is returned, -ENOENT if the key
|
||||
* wasn't found, or -errno on errors.
|
||||
*/
|
||||
int scoutfs_btree_lookup(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_cursor *curs)
|
||||
struct scoutfs_btree_val *val)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct buffer_head *bh;
|
||||
unsigned int pos;
|
||||
int cmp;
|
||||
int ret;
|
||||
|
||||
BUG_ON(curs->bh);
|
||||
trace_printk("key "CKF" val_len %d\n",
|
||||
CKA(key), scoutfs_btree_val_length(val));
|
||||
|
||||
bh = btree_walk(sb, root, key, NULL, 0, 0, 0);
|
||||
if (IS_ERR(bh))
|
||||
@@ -1004,37 +1058,49 @@ int scoutfs_btree_lookup(struct super_block *sb,
|
||||
|
||||
pos = find_pos(bt, key, &cmp);
|
||||
if (cmp == 0) {
|
||||
set_cursor(curs, bh, pos, false);
|
||||
ret = 0;
|
||||
item = pos_item(bt, pos);
|
||||
ret = copy_to_val(val, item);
|
||||
} else {
|
||||
unlock_block(NULL, bh, false);
|
||||
scoutfs_block_put(bh);
|
||||
ret = -ENOENT;
|
||||
}
|
||||
|
||||
unlock_block(NULL, bh, false);
|
||||
scoutfs_block_put(bh);
|
||||
|
||||
trace_printk("key "CKF" ret %d\n", CKA(key), ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a new item in the tree and point the caller's cursor at it.
|
||||
* The caller is responsible for setting the value.
|
||||
* Insert a new item in the tree.
|
||||
*
|
||||
* -EEXIST is returned if the key is already present in the tree.
|
||||
* 0 is returned on success. -EEXIST is returned if the key is already
|
||||
* present in the tree.
|
||||
*
|
||||
* XXX this walks the treap twice, which isn't great
|
||||
* If no value pointer is given then the item is created with a zero
|
||||
* length value.
|
||||
*/
|
||||
int scoutfs_btree_insert(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key, unsigned int val_len,
|
||||
struct scoutfs_btree_cursor *curs)
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_val *val)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct buffer_head *bh;
|
||||
unsigned int val_len;
|
||||
int pos;
|
||||
int cmp;
|
||||
int ret;
|
||||
|
||||
BUG_ON(curs->bh);
|
||||
if (val)
|
||||
val_len = scoutfs_btree_val_length(val);
|
||||
else
|
||||
val_len = 0;
|
||||
|
||||
if (WARN_ON_ONCE(val_len > SCOUTFS_MAX_ITEM_LEN))
|
||||
return -EINVAL;
|
||||
|
||||
bh = btree_walk(sb, root, key, NULL, val_len, 0, WALK_INSERT);
|
||||
if (IS_ERR(bh))
|
||||
@@ -1043,15 +1109,18 @@ int scoutfs_btree_insert(struct super_block *sb,
|
||||
|
||||
pos = find_pos(bt, key, &cmp);
|
||||
if (cmp) {
|
||||
create_item(bt, pos, key, val_len);
|
||||
set_cursor(curs, bh, pos, true);
|
||||
ret = 0;
|
||||
item = create_item(bt, pos, key, val_len);
|
||||
if (val)
|
||||
ret = copy_to_item(item, val);
|
||||
else
|
||||
ret = 0;
|
||||
} else {
|
||||
unlock_block(NULL, bh, true);
|
||||
scoutfs_block_put(bh);
|
||||
ret = -EEXIST;
|
||||
}
|
||||
|
||||
unlock_block(NULL, bh, true);
|
||||
scoutfs_block_put(bh);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1104,48 +1173,46 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over items in the tree starting with first and ending with
|
||||
* last. We point the cursor at each item and return to the caller.
|
||||
* The caller continues the search with the cursor.
|
||||
* Find the next key in the tree starting from 'first', and ending at
|
||||
* 'last'. 'found', 'found_seq', and 'val' are set to the discovered
|
||||
* item if they're provided.
|
||||
*
|
||||
* The caller can limit results to items with a sequence number greater
|
||||
* than or equal to their sequence number.
|
||||
*
|
||||
* When there isn't an item in the cursor then we walk the btree to the
|
||||
* leaf that should contain the key and look for items from there. When
|
||||
* we exhaust leaves we search the tree again from the next key that was
|
||||
* increased past the leaf's parent's item.
|
||||
* The only tricky bit is that they key we're searching for might not
|
||||
* exist in the tree. We can get to the leaf and find that there are no
|
||||
* greater items in the leaf. We have to search again from the keys
|
||||
* greater than the parent item's keys which the walk gives us. We also
|
||||
* star the search over from this next key if walking while filtering
|
||||
* based on seqs terminates early.
|
||||
*
|
||||
* Returns > 0 when the cursor has an item, 0 when done, and -errno on error.
|
||||
* Returns the bytes copied into the value (0 if not provided), -ENOENT
|
||||
* if there is no item past first until last, or -errno on errors.
|
||||
*
|
||||
* It's a common pattern to use the same key for first and found so we're
|
||||
* careful to copy first before we modify found.
|
||||
*/
|
||||
static int btree_next(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last,
|
||||
u64 seq, int op, struct scoutfs_btree_cursor *curs)
|
||||
u64 seq, int op, struct scoutfs_key *found,
|
||||
u64 *found_seq, struct scoutfs_btree_val *val)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct buffer_head *bh;
|
||||
struct scoutfs_key start = *first;
|
||||
struct scoutfs_key key = *first;
|
||||
struct scoutfs_key next_key;
|
||||
struct buffer_head *bh;
|
||||
int pos;
|
||||
int ret;
|
||||
|
||||
if (scoutfs_key_cmp(first, last) > 0)
|
||||
return 0;
|
||||
|
||||
/* find the next item after the cursor, releasing if we're done */
|
||||
if (curs->bh) {
|
||||
bt = bh_data(curs->bh);
|
||||
key = *curs->key;
|
||||
scoutfs_inc_key(&key);
|
||||
|
||||
curs->pos = next_pos_seq(bt, curs->pos, 0, seq, op);
|
||||
if (curs->pos < bt->nr_items)
|
||||
set_cursor(curs, curs->bh, curs->pos, curs->write);
|
||||
else
|
||||
scoutfs_btree_release(curs);
|
||||
}
|
||||
trace_printk("finding next first "CKF" last "CKF"\n",
|
||||
CKA(&start), CKA(last));
|
||||
|
||||
/* find the leaf that contains the next item after the key */
|
||||
while (!curs->bh && scoutfs_key_cmp(&key, last) <= 0) {
|
||||
ret = -ENOENT;
|
||||
while (scoutfs_key_cmp(&key, last) <= 0) {
|
||||
|
||||
bh = btree_walk(sb, root, &key, &next_key, 0, seq, op);
|
||||
|
||||
@@ -1156,49 +1223,60 @@ static int btree_next(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
}
|
||||
|
||||
if (IS_ERR(bh)) {
|
||||
if (bh == ERR_PTR(-ENOENT))
|
||||
break;
|
||||
return PTR_ERR(bh);
|
||||
ret = PTR_ERR(bh);
|
||||
break;
|
||||
}
|
||||
bt = bh_data(bh);
|
||||
|
||||
/* keep trying leaves until next_key passes last */
|
||||
curs->pos = find_pos_after_seq(bt, &key, 0, seq, op);
|
||||
if (curs->pos >= bt->nr_items) {
|
||||
pos = find_pos_after_seq(bt, &key, 0, seq, op);
|
||||
if (pos >= bt->nr_items) {
|
||||
key = next_key;
|
||||
unlock_block(NULL, bh, false);
|
||||
scoutfs_block_put(bh);
|
||||
continue;
|
||||
}
|
||||
|
||||
set_cursor(curs, bh, curs->pos, false);
|
||||
item = pos_item(bt, pos);
|
||||
if (scoutfs_key_cmp(&item->key, last) <= 0) {
|
||||
*found = item->key;
|
||||
if (found_seq)
|
||||
*found_seq = le64_to_cpu(item->seq);
|
||||
if (val)
|
||||
ret = copy_to_val(val, item);
|
||||
else
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -ENOENT;
|
||||
}
|
||||
|
||||
unlock_block(NULL, bh, false);
|
||||
scoutfs_block_put(bh);
|
||||
break;
|
||||
}
|
||||
|
||||
/* only return the next item if it's within last */
|
||||
if (curs->bh && scoutfs_key_cmp(curs->key, last) <= 0) {
|
||||
ret = 1;
|
||||
} else {
|
||||
scoutfs_btree_release(curs);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
trace_printk("next first "CKF" last "CKF" found "CKF" ret %d\n",
|
||||
CKA(&start), CKA(last), CKA(found), ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_btree_next(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last,
|
||||
struct scoutfs_btree_cursor *curs)
|
||||
struct scoutfs_key *found,
|
||||
struct scoutfs_btree_val *val)
|
||||
{
|
||||
return btree_next(sb, root, first, last, 0, WALK_NEXT, curs);
|
||||
return btree_next(sb, root, first, last, 0, WALK_NEXT,
|
||||
found, NULL, val);
|
||||
}
|
||||
|
||||
int scoutfs_btree_since(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last,
|
||||
u64 seq, struct scoutfs_btree_cursor *curs)
|
||||
u64 seq, struct scoutfs_key *found, u64 *found_seq,
|
||||
struct scoutfs_btree_val *val)
|
||||
{
|
||||
return btree_next(sb, root, first, last, seq, WALK_NEXT_SEQ, curs);
|
||||
return btree_next(sb, root, first, last, seq, WALK_NEXT_SEQ,
|
||||
found, found_seq, val);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1217,6 +1295,8 @@ int scoutfs_btree_dirty(struct super_block *sb,
|
||||
int cmp;
|
||||
int ret;
|
||||
|
||||
trace_printk("key "CKF"\n", CKA(key));
|
||||
|
||||
bh = btree_walk(sb, root, key, NULL, 0, 0, WALK_DIRTY);
|
||||
if (IS_ERR(bh))
|
||||
return PTR_ERR(bh);
|
||||
@@ -1232,17 +1312,22 @@ int scoutfs_btree_dirty(struct super_block *sb,
|
||||
unlock_block(NULL, bh, true);
|
||||
scoutfs_block_put(bh);
|
||||
|
||||
trace_printk("key "CKF" ret %d\n", CKA(key), ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is guaranteed not to fail if the caller has already dirtied the
|
||||
* block that contains the item in the current transaction.
|
||||
*
|
||||
* 0 is returned on success. -EINVAL is returned if the caller's value
|
||||
* length doesn't match the existing item's value length.
|
||||
*/
|
||||
int scoutfs_btree_update(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_cursor *curs)
|
||||
struct scoutfs_btree_val *val)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
@@ -1251,8 +1336,6 @@ int scoutfs_btree_update(struct super_block *sb,
|
||||
int cmp;
|
||||
int ret;
|
||||
|
||||
BUG_ON(curs->bh);
|
||||
|
||||
bh = btree_walk(sb, root, key, NULL, 0, 0, WALK_DIRTY);
|
||||
if (IS_ERR(bh))
|
||||
return PTR_ERR(bh);
|
||||
@@ -1261,59 +1344,64 @@ int scoutfs_btree_update(struct super_block *sb,
|
||||
pos = find_pos(bt, key, &cmp);
|
||||
if (cmp == 0) {
|
||||
item = pos_item(bt, pos);
|
||||
item->seq = bt->hdr.seq;
|
||||
set_cursor(curs, bh, pos, true);
|
||||
ret = 0;
|
||||
ret = copy_to_item(item, val);
|
||||
if (ret == 0)
|
||||
item->seq = bt->hdr.seq;
|
||||
} else {
|
||||
unlock_block(NULL, bh, true);
|
||||
scoutfs_block_put(bh);
|
||||
ret = -ENOENT;
|
||||
}
|
||||
|
||||
unlock_block(NULL, bh, true);
|
||||
scoutfs_block_put(bh);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void scoutfs_btree_release(struct scoutfs_btree_cursor *curs)
|
||||
{
|
||||
if (curs->bh) {
|
||||
unlock_block(NULL, curs->bh, curs->write);
|
||||
scoutfs_block_put(curs->bh);
|
||||
}
|
||||
curs->bh = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the first missing key between the caller's keys, inclusive. Set
|
||||
* the caller's hole key and return 0 if we find a missing key. Return
|
||||
* -ENOSPC if all the keys in the range were present or -errno on errors.
|
||||
* Set hole to a missing key in the caller's range.
|
||||
*
|
||||
* The caller ensures that it's safe for us to be walking this region
|
||||
* of the tree.
|
||||
* 0 is returned if we find a missing key, -ENOSPC is returned if all
|
||||
* the keys in the range are present in the tree, and -errno is returned
|
||||
* if we saw an error.
|
||||
*
|
||||
* We try to find the first key in the range. If the next key is past
|
||||
* the first key then we return the key before the found key. This will
|
||||
* tend to let us find the hole with one btree search.
|
||||
*
|
||||
* We keep searching as long as we keep finding the first key and will
|
||||
* return -ENOSPC if we fall off the end of the range doing so.
|
||||
*/
|
||||
int scoutfs_btree_hole(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first,
|
||||
struct scoutfs_key *last, struct scoutfs_key *hole)
|
||||
{
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_key key = *first;
|
||||
struct scoutfs_key found;
|
||||
int ret;
|
||||
|
||||
*hole = *first;
|
||||
while ((ret = scoutfs_btree_next(sb, root, first, last, &curs)) > 0) {
|
||||
/* return our expected hole if we skipped it */
|
||||
if (scoutfs_key_cmp(hole, curs.key) < 0)
|
||||
break;
|
||||
|
||||
*hole = *curs.key;
|
||||
scoutfs_inc_key(hole);
|
||||
if (WARN_ON_ONCE(scoutfs_key_cmp(first, last) > 0)) {
|
||||
scoutfs_key_set_zero(hole);
|
||||
return -EINVAL;
|
||||
}
|
||||
scoutfs_btree_release(&curs);
|
||||
|
||||
if (ret >= 0) {
|
||||
if (scoutfs_key_cmp(hole, last) <= 0)
|
||||
ret = 0;
|
||||
else
|
||||
ret = -ENOSPC;
|
||||
/* search as long as we keep finding our first key */
|
||||
do {
|
||||
ret = scoutfs_btree_next(sb, root, &key, last, &found, NULL);
|
||||
} while (ret == 0 &&
|
||||
scoutfs_key_cmp(&found, &key) == 0 &&
|
||||
(scoutfs_inc_key(&key), ret = -ENOSPC,
|
||||
scoutfs_key_cmp(&key, last) <= 0));
|
||||
|
||||
if (ret == 0) {
|
||||
*hole = found;
|
||||
scoutfs_dec_key(hole);
|
||||
} else if (ret == -ENOENT) {
|
||||
*hole = *last;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
trace_printk("first "CKF" last "CKF" hole "CKF" ret %d\n",
|
||||
CKA(first), CKA(last), CKA(hole), ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1,51 +1,71 @@
|
||||
#ifndef _SCOUTFS_BTREE_H_
|
||||
#define _SCOUTFS_BTREE_H_
|
||||
|
||||
struct scoutfs_btree_cursor {
|
||||
/* for btree.c */
|
||||
struct buffer_head *bh;
|
||||
unsigned int pos;
|
||||
bool write;
|
||||
#include <linux/uio.h>
|
||||
|
||||
/* for callers */
|
||||
struct scoutfs_key *key;
|
||||
u64 seq;
|
||||
void *val;
|
||||
u16 val_len;
|
||||
struct scoutfs_btree_val {
|
||||
struct kvec vec[3];
|
||||
};
|
||||
|
||||
#define DECLARE_SCOUTFS_BTREE_CURSOR(name) \
|
||||
struct scoutfs_btree_cursor name = {NULL,}
|
||||
static inline void __scoutfs_btree_init_val(struct scoutfs_btree_val *val,
|
||||
void *ptr0, unsigned int len0,
|
||||
void *ptr1, unsigned int len1,
|
||||
void *ptr2, unsigned int len2)
|
||||
{
|
||||
*val = (struct scoutfs_btree_val) {
|
||||
{ { ptr0, len0 }, { ptr1, len1 }, { ptr2, len2 } }
|
||||
};
|
||||
}
|
||||
|
||||
#define _scoutfs_btree_init_val(v, p0, l0, p1, l1, p2, l2, ...) \
|
||||
__scoutfs_btree_init_val(v, p0, l0, p1, l1, p2, l2)
|
||||
|
||||
/*
|
||||
* Provide a nice variadic initialization function without having to
|
||||
* iterate over the callers arg types. We play some macro games to pad
|
||||
* out the callers ptr/len pairs to the full possible number. This will
|
||||
* produce confusing errors if an odd number of arguments is given and
|
||||
* the padded ptr/length types aren't compatible with the fixed
|
||||
* arguments in the static inline.
|
||||
*/
|
||||
#define scoutfs_btree_init_val(val, ...) \
|
||||
_scoutfs_btree_init_val(val, __VA_ARGS__, NULL, 0, NULL, 0, NULL, 0)
|
||||
|
||||
static inline int scoutfs_btree_val_length(struct scoutfs_btree_val *val)
|
||||
{
|
||||
|
||||
return iov_length((struct iovec *)val->vec, ARRAY_SIZE(val->vec));
|
||||
}
|
||||
|
||||
int scoutfs_btree_lookup(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_cursor *curs);
|
||||
struct scoutfs_btree_val *val);
|
||||
int scoutfs_btree_insert(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key, unsigned int val_len,
|
||||
struct scoutfs_btree_cursor *curs);
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_val *val);
|
||||
int scoutfs_btree_delete(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key);
|
||||
int scoutfs_btree_next(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last,
|
||||
struct scoutfs_btree_cursor *curs);
|
||||
struct scoutfs_key *found,
|
||||
struct scoutfs_btree_val *val);
|
||||
int scoutfs_btree_dirty(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key);
|
||||
int scoutfs_btree_update(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_cursor *curs);
|
||||
struct scoutfs_btree_val *val);
|
||||
int scoutfs_btree_hole(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first,
|
||||
struct scoutfs_key *last, struct scoutfs_key *hole);
|
||||
int scoutfs_btree_since(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last,
|
||||
u64 seq, struct scoutfs_btree_cursor *curs);
|
||||
|
||||
void scoutfs_btree_release(struct scoutfs_btree_cursor *curs);
|
||||
u64 seq, struct scoutfs_key *found, u64 *found_seq,
|
||||
struct scoutfs_btree_val *val);
|
||||
|
||||
#endif
|
||||
|
||||
256
kmod/src/dir.c
256
kmod/src/dir.c
@@ -112,11 +112,6 @@ static unsigned int dent_bytes(unsigned int name_len)
|
||||
return sizeof(struct scoutfs_dirent) + name_len;
|
||||
}
|
||||
|
||||
static unsigned int item_name_len(struct scoutfs_btree_cursor *curs)
|
||||
{
|
||||
return curs->val_len - sizeof(struct scoutfs_dirent);
|
||||
}
|
||||
|
||||
/*
|
||||
* Each dirent stores the values that are needed to build the keys of
|
||||
* the items that are removed on unlink so that we don't to search through
|
||||
@@ -190,13 +185,14 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry,
|
||||
unsigned int flags)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(dir);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct super_block *sb = dir->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
struct scoutfs_dirent *dent;
|
||||
struct scoutfs_dirent *dent = NULL;
|
||||
struct scoutfs_btree_val val;
|
||||
struct dentry_info *di;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
unsigned int item_len;
|
||||
unsigned int name_len;
|
||||
struct inode *inode;
|
||||
u64 ino = 0;
|
||||
@@ -214,29 +210,52 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry,
|
||||
goto out;
|
||||
}
|
||||
|
||||
item_len = offsetof(struct scoutfs_dirent, name[dentry->d_name.len]);
|
||||
dent = kmalloc(item_len, GFP_KERNEL);
|
||||
if (!dent) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
h = name_hash(dentry->d_name.name, dentry->d_name.len, si->salt);
|
||||
|
||||
scoutfs_set_key(&first, scoutfs_ino(dir), SCOUTFS_DIRENT_KEY, h);
|
||||
scoutfs_set_key(&key, scoutfs_ino(dir), SCOUTFS_DIRENT_KEY, h);
|
||||
scoutfs_set_key(&last, scoutfs_ino(dir), SCOUTFS_DIRENT_KEY,
|
||||
last_dirent_key_offset(h));
|
||||
|
||||
while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) {
|
||||
scoutfs_btree_init_val(&val, dent, item_len);
|
||||
|
||||
/* XXX verify */
|
||||
for (;;) {
|
||||
ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
dent = curs.val;
|
||||
name_len = item_name_len(&curs);
|
||||
/* XXX more verification */
|
||||
/* XXX corruption */
|
||||
if (ret <= sizeof(struct scoutfs_dirent)) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
name_len = ret - sizeof(struct scoutfs_dirent);
|
||||
if (scoutfs_names_equal(dentry->d_name.name, dentry->d_name.len,
|
||||
dent->name, name_len)) {
|
||||
ino = le64_to_cpu(dent->ino);
|
||||
update_dentry_info(di, curs.key, dent);
|
||||
update_dentry_info(di, &key, dent);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
scoutfs_inc_key(&key);
|
||||
}
|
||||
|
||||
scoutfs_btree_release(&curs);
|
||||
|
||||
out:
|
||||
kfree(dent);
|
||||
|
||||
if (ret < 0)
|
||||
inode = ERR_PTR(ret);
|
||||
else if (ino == 0)
|
||||
@@ -281,26 +300,46 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir)
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_dirent *dent;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key last;
|
||||
unsigned int item_len;
|
||||
unsigned int name_len;
|
||||
int ret;
|
||||
u32 pos;
|
||||
int ret;
|
||||
|
||||
if (!dir_emit_dots(file, dirent, filldir))
|
||||
return 0;
|
||||
|
||||
scoutfs_set_key(&first, scoutfs_ino(inode), SCOUTFS_DIRENT_KEY,
|
||||
item_len = offsetof(struct scoutfs_dirent, name[SCOUTFS_NAME_LEN]);
|
||||
dent = kmalloc(item_len, GFP_KERNEL);
|
||||
if (!dent)
|
||||
return -ENOMEM;
|
||||
|
||||
scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_DIRENT_KEY,
|
||||
file->f_pos);
|
||||
scoutfs_set_key(&last, scoutfs_ino(inode), SCOUTFS_DIRENT_KEY,
|
||||
SCOUTFS_DIRENT_LAST_POS);
|
||||
|
||||
while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) {
|
||||
dent = curs.val;
|
||||
name_len = item_name_len(&curs);
|
||||
pos = scoutfs_key_offset(curs.key);
|
||||
scoutfs_btree_init_val(&val, dent, item_len);
|
||||
|
||||
for (;;) {
|
||||
ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* XXX corruption */
|
||||
if (ret <= sizeof(dent)) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
name_len = ret - sizeof(struct scoutfs_dirent);
|
||||
pos = scoutfs_key_offset(&key);
|
||||
|
||||
if (filldir(dirent, dent->name, name_len, pos,
|
||||
le64_to_cpu(dent->ino), dentry_type(dent->type))) {
|
||||
@@ -309,10 +348,10 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir)
|
||||
}
|
||||
|
||||
file->f_pos = pos + 1;
|
||||
scoutfs_inc_key(&key);
|
||||
}
|
||||
|
||||
scoutfs_btree_release(&curs);
|
||||
|
||||
kfree(dent);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -325,22 +364,19 @@ static int update_lref_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
u64 dir_ino, u64 dir_off, bool update)
|
||||
{
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_link_backref *lref;
|
||||
struct scoutfs_link_backref lref;
|
||||
struct scoutfs_btree_val val;
|
||||
int ret;
|
||||
|
||||
if (update)
|
||||
ret = scoutfs_btree_update(sb, meta, key, &curs);
|
||||
else
|
||||
ret = scoutfs_btree_insert(sb, meta, key, sizeof(*lref), &curs);
|
||||
lref.ino = cpu_to_le64(dir_ino);
|
||||
lref.offset = cpu_to_le64(dir_off);
|
||||
|
||||
/* XXX verify size */
|
||||
if (ret == 0) {
|
||||
lref = curs.val;
|
||||
lref->ino = cpu_to_le64(dir_ino);
|
||||
lref->offset = cpu_to_le64(dir_off);
|
||||
scoutfs_btree_release(&curs);
|
||||
}
|
||||
scoutfs_btree_init_val(&val, &lref, sizeof(lref));
|
||||
|
||||
if (update)
|
||||
ret = scoutfs_btree_update(sb, meta, key, &val);
|
||||
else
|
||||
ret = scoutfs_btree_insert(sb, meta, key, &val);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -352,8 +388,8 @@ static int add_entry_items(struct inode *dir, struct dentry *dentry,
|
||||
struct super_block *sb = dir->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(dir);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_dirent *dent;
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_dirent dent;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
@@ -390,20 +426,19 @@ static int add_entry_items(struct inode *dir, struct dentry *dentry,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_btree_insert(sb, meta, &key, bytes, &curs);
|
||||
if (ret) {
|
||||
dent.ino = cpu_to_le64(scoutfs_ino(inode));
|
||||
dent.counter = lref_key.offset;
|
||||
dent.type = mode_to_type(inode->i_mode);
|
||||
|
||||
scoutfs_btree_init_val(&val, &dent, sizeof(dent),
|
||||
(void *)dentry->d_name.name,
|
||||
dentry->d_name.len);
|
||||
|
||||
ret = scoutfs_btree_insert(sb, meta, &key, &val);
|
||||
if (ret)
|
||||
scoutfs_btree_delete(sb, meta, &lref_key);
|
||||
goto out;
|
||||
}
|
||||
|
||||
dent = curs.val;
|
||||
dent->ino = cpu_to_le64(scoutfs_ino(inode));
|
||||
dent->counter = lref_key.offset;
|
||||
dent->type = mode_to_type(inode->i_mode);
|
||||
memcpy(dent->name, dentry->d_name.name, dentry->d_name.len);
|
||||
update_dentry_info(di, &key, dent);
|
||||
|
||||
scoutfs_btree_release(&curs);
|
||||
else
|
||||
update_dentry_info(di, &key, &dent);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -579,11 +614,11 @@ static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
loff_t size = i_size_read(inode);
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_key key;
|
||||
char *path;
|
||||
int bytes;
|
||||
int off;
|
||||
int ret;
|
||||
int k;
|
||||
@@ -600,24 +635,28 @@ static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
if (!path)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
scoutfs_set_key(&first, scoutfs_ino(inode), SCOUTFS_SYMLINK_KEY, 0);
|
||||
scoutfs_set_key(&last, scoutfs_ino(inode), SCOUTFS_SYMLINK_KEY, ~0ULL);
|
||||
for (off = 0, k = 0; off < size ; k++) {
|
||||
scoutfs_set_key(&key, scoutfs_ino(inode),
|
||||
SCOUTFS_SYMLINK_KEY, k);
|
||||
bytes = min_t(int, size - off, SCOUTFS_MAX_ITEM_LEN);
|
||||
scoutfs_btree_init_val(&val, path + off, bytes);
|
||||
|
||||
off = 0;
|
||||
k = 0;
|
||||
while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) {
|
||||
if (scoutfs_key_offset(curs.key) != k ||
|
||||
off + curs.val_len > size) {
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &val);
|
||||
if (ret < 0) {
|
||||
/* XXX corruption */
|
||||
scoutfs_btree_release(&curs);
|
||||
if (ret == -ENOENT)
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
/* XXX corruption */
|
||||
if (ret != bytes) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
memcpy(path + off, curs.val, curs.val_len);
|
||||
|
||||
off += curs.val_len;
|
||||
k++;
|
||||
off += bytes;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
/* XXX corruption */
|
||||
@@ -661,7 +700,7 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
{
|
||||
struct super_block *sb = dir->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_btree_val val;
|
||||
struct inode *inode = NULL;
|
||||
struct scoutfs_key key;
|
||||
struct dentry_info *di;
|
||||
@@ -694,12 +733,11 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
k);
|
||||
bytes = min(name_len - off, SCOUTFS_MAX_ITEM_LEN);
|
||||
|
||||
ret = scoutfs_btree_insert(sb, meta, &key, bytes, &curs);
|
||||
scoutfs_btree_init_val(&val, (char *)symname + off, bytes);
|
||||
|
||||
ret = scoutfs_btree_insert(sb, meta, &key, &val);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
memcpy(curs.val, symname + off, bytes);
|
||||
scoutfs_btree_release(&curs);
|
||||
}
|
||||
|
||||
ret = add_entry_items(dir, dentry, inode);
|
||||
@@ -741,24 +779,22 @@ out:
|
||||
int scoutfs_symlink_drop(struct super_block *sb, u64 ino)
|
||||
{
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
int nr;
|
||||
int k;
|
||||
|
||||
scoutfs_set_key(&first, ino, SCOUTFS_SYMLINK_KEY, 0);
|
||||
scoutfs_set_key(&last, ino, SCOUTFS_SYMLINK_KEY, ~0ULL);
|
||||
nr = DIV_ROUND_UP(SCOUTFS_SYMLINK_MAX_SIZE, SCOUTFS_MAX_ITEM_LEN);
|
||||
|
||||
while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) {
|
||||
key = *curs.key;
|
||||
first = *curs.key;
|
||||
scoutfs_inc_key(&first);
|
||||
scoutfs_btree_release(&curs);
|
||||
for (k = 0; k < nr; k++) {
|
||||
scoutfs_set_key(&key, ino, SCOUTFS_SYMLINK_KEY, k);
|
||||
|
||||
ret = scoutfs_btree_delete(sb, meta, &key);
|
||||
if (ret)
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -787,9 +823,9 @@ static int add_linkref_name(struct super_block *sb, u64 *dir_ino, u64 ino,
|
||||
{
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
struct scoutfs_path_component *comp;
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_link_backref *lref;
|
||||
struct scoutfs_dirent *dent;
|
||||
struct scoutfs_link_backref lref;
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_dirent dent;
|
||||
struct inode *inode = NULL;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
@@ -807,20 +843,28 @@ retry:
|
||||
scoutfs_set_key(&first, ino, SCOUTFS_LINK_BACKREF_KEY, *ctr);
|
||||
scoutfs_set_key(&last, ino, SCOUTFS_LINK_BACKREF_KEY, ~0ULL);
|
||||
|
||||
ret = scoutfs_btree_next(sb, meta, &first, &last, &curs);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
scoutfs_btree_init_val(&val, &lref, sizeof(lref));
|
||||
|
||||
lref = curs.val;
|
||||
*dir_ino = le64_to_cpu(lref->ino),
|
||||
off = le64_to_cpu(lref->offset);
|
||||
*ctr = scoutfs_key_offset(curs.key);
|
||||
ret = scoutfs_btree_next(sb, meta, &first, &last, &key, &val);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* XXX corruption */
|
||||
if (ret != sizeof(lref)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
*dir_ino = le64_to_cpu(lref.ino),
|
||||
off = le64_to_cpu(lref.offset);
|
||||
*ctr = scoutfs_key_offset(&key);
|
||||
|
||||
trace_printk("ino %llu ctr %llu dir_ino %llu off %llu\n",
|
||||
ino, *ctr, *dir_ino, off);
|
||||
|
||||
scoutfs_btree_release(&curs);
|
||||
|
||||
/* XXX corruption, should never be key == U64_MAX */
|
||||
if (*ctr == U64_MAX) {
|
||||
ret = -EIO;
|
||||
@@ -852,8 +896,10 @@ retry:
|
||||
}
|
||||
|
||||
scoutfs_set_key(&key, *dir_ino, SCOUTFS_DIRENT_KEY, off);
|
||||
scoutfs_btree_init_val(&val, &dent, sizeof(dent),
|
||||
comp->name, SCOUTFS_NAME_LEN);
|
||||
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &curs);
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &val);
|
||||
if (ret < 0) {
|
||||
/* XXX corruption, should always have dirent for backref */
|
||||
if (ret == -ENOENT)
|
||||
@@ -861,10 +907,14 @@ retry:
|
||||
goto out;
|
||||
}
|
||||
|
||||
dent = curs.val;
|
||||
len = item_name_len(&curs);
|
||||
/* XXX corruption */
|
||||
if (ret < sizeof(dent)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
trace_printk("dent ino %llu len %d\n", le64_to_cpu(dent->ino), len);
|
||||
len = ret - sizeof(dent);
|
||||
trace_printk("dent ino %llu len %d\n", le64_to_cpu(dent.ino), len);
|
||||
|
||||
/* XXX corruption */
|
||||
if (len < 1 || len > SCOUTFS_NAME_LEN) {
|
||||
@@ -873,18 +923,16 @@ retry:
|
||||
}
|
||||
|
||||
/* XXX corruption, dirents should always match link backref */
|
||||
if (le64_to_cpu(dent->ino) != ino) {
|
||||
if (le64_to_cpu(dent.ino) != ino) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
(*ctr)++;
|
||||
comp->len = len;
|
||||
memcpy(comp->name, dent->name, len);
|
||||
list_add(&comp->head, list);
|
||||
comp = NULL; /* won't be freed */
|
||||
|
||||
scoutfs_btree_release(&curs);
|
||||
ret = 1;
|
||||
out:
|
||||
if (inode) {
|
||||
|
||||
@@ -203,12 +203,11 @@ static bool bmap_has_blocks(struct scoutfs_block_map *bmap)
|
||||
int scoutfs_truncate_block_items(struct super_block *sb, u64 ino, u64 size)
|
||||
{
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_block_map *bmap;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_block_map bmap;
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
bool delete;
|
||||
bool modified;
|
||||
u64 iblock;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
@@ -217,27 +216,38 @@ int scoutfs_truncate_block_items(struct super_block *sb, u64 ino, u64 size)
|
||||
iblock = DIV_ROUND_UP(size, SCOUTFS_BLOCK_SIZE);
|
||||
i = iblock & SCOUTFS_BLOCK_MAP_MASK;
|
||||
|
||||
scoutfs_set_key(&first, ino, SCOUTFS_BMAP_KEY,
|
||||
scoutfs_set_key(&key, ino, SCOUTFS_BMAP_KEY,
|
||||
iblock & ~(u64)SCOUTFS_BLOCK_MAP_MASK);
|
||||
scoutfs_set_key(&last, ino, SCOUTFS_BMAP_KEY, ~0ULL);
|
||||
|
||||
trace_printk("iblock %llu i %d\n", iblock, i);
|
||||
|
||||
while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) {
|
||||
key = *curs.key;
|
||||
first = *curs.key;
|
||||
scoutfs_inc_key(&first);
|
||||
scoutfs_btree_release(&curs);
|
||||
scoutfs_btree_init_val(&val, &bmap, sizeof(bmap));
|
||||
|
||||
ret = scoutfs_btree_update(sb, meta, &key, &curs);
|
||||
for (;;) {
|
||||
ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* XXX corruption */
|
||||
if (ret != sizeof(bmap)) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
/* XXX check bmap sanity */
|
||||
|
||||
/* make sure we can update bmap after freeing */
|
||||
ret = scoutfs_btree_dirty(sb, meta, &key);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/* XXX check sanity */
|
||||
bmap = curs.val;
|
||||
|
||||
modified = false;
|
||||
for (; i < SCOUTFS_BLOCK_MAP_COUNT; i++) {
|
||||
blkno = le64_to_cpu(bmap->blkno[i]);
|
||||
blkno = le64_to_cpu(bmap.blkno[i]);
|
||||
if (blkno == 0)
|
||||
continue;
|
||||
|
||||
@@ -245,23 +255,22 @@ int scoutfs_truncate_block_items(struct super_block *sb, u64 ino, u64 size)
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
bmap->blkno[i] = 0;
|
||||
bmap.blkno[i] = 0;
|
||||
modified = true;
|
||||
}
|
||||
delete = !bmap_has_blocks(bmap);
|
||||
i = 0;
|
||||
|
||||
/* dirtying should have prevented these from failing */
|
||||
if (!bmap_has_blocks(&bmap))
|
||||
scoutfs_btree_delete(sb, meta, &key);
|
||||
else if (modified)
|
||||
scoutfs_btree_update(sb, meta, &key, &val);
|
||||
|
||||
scoutfs_btree_release(&curs);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
i = 0;
|
||||
|
||||
if (delete) {
|
||||
ret = scoutfs_btree_delete(sb, meta, &key);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
|
||||
/* XXX sync transaction if it's enormous */
|
||||
scoutfs_inc_key(&key);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -303,8 +312,8 @@ static int contig_mapped_blocks(struct inode *inode, u64 iblock, u64 *blkno)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_block_map *bmap;
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_block_map bmap;
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
int i;
|
||||
@@ -312,18 +321,21 @@ static int contig_mapped_blocks(struct inode *inode, u64 iblock, u64 *blkno)
|
||||
*blkno = 0;
|
||||
|
||||
set_bmap_key(&key, inode, iblock);
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &curs);
|
||||
if (!ret) {
|
||||
bmap = curs.val;
|
||||
scoutfs_btree_init_val(&val, &bmap, sizeof(bmap));
|
||||
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &val);
|
||||
if (ret == sizeof(bmap)) {
|
||||
i = iblock & SCOUTFS_BLOCK_MAP_MASK;
|
||||
*blkno = le64_to_cpu(bmap->blkno[i]);
|
||||
*blkno = le64_to_cpu(bmap.blkno[i]);
|
||||
|
||||
while (i < SCOUTFS_BLOCK_MAP_COUNT && bmap->blkno[i]) {
|
||||
ret = 0;
|
||||
while (i < SCOUTFS_BLOCK_MAP_COUNT && bmap.blkno[i]) {
|
||||
ret++;
|
||||
i++;
|
||||
}
|
||||
scoutfs_btree_release(&curs);
|
||||
} else if (ret >= 0) {
|
||||
/* XXX corruption */
|
||||
ret = -EIO;
|
||||
} else if (ret == -ENOENT) {
|
||||
ret = 0;
|
||||
}
|
||||
@@ -350,8 +362,8 @@ static int map_writable_block(struct inode *inode, u64 iblock, u64 *blkno_ret)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_block_map *bmap;
|
||||
struct scoutfs_block_map bmap;
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_key key;
|
||||
bool inserted = false;
|
||||
u64 old_blkno = 0;
|
||||
@@ -361,25 +373,35 @@ static int map_writable_block(struct inode *inode, u64 iblock, u64 *blkno_ret)
|
||||
int i;
|
||||
|
||||
set_bmap_key(&key, inode, iblock);
|
||||
scoutfs_btree_init_val(&val, &bmap, sizeof(bmap));
|
||||
|
||||
/* we always need a writable block map item */
|
||||
ret = scoutfs_btree_update(sb, meta, &key, &curs);
|
||||
/* see if there's an existing mapping */
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &val);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
|
||||
/* might need to create a new item and delete it after errors */
|
||||
/* make sure that updating the bmap item won't fail */
|
||||
if (ret == -ENOENT) {
|
||||
ret = scoutfs_btree_insert(sb, meta, &key, sizeof(*bmap),
|
||||
&curs);
|
||||
if (ret < 0)
|
||||
memset(&bmap, 0, sizeof(bmap));
|
||||
ret = scoutfs_btree_insert(sb, meta, &key, &val);
|
||||
if (ret)
|
||||
goto out;
|
||||
memset(curs.val, 0, sizeof(*bmap));
|
||||
inserted = true;
|
||||
|
||||
} else {
|
||||
/* XXX corruption */
|
||||
if (ret != sizeof(bmap)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_btree_dirty(sb, meta, &key);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
bmap = curs.val;
|
||||
i = iblock & SCOUTFS_BLOCK_MAP_MASK;
|
||||
old_blkno = le64_to_cpu(bmap->blkno[i]);
|
||||
old_blkno = le64_to_cpu(bmap.blkno[i]);
|
||||
|
||||
/*
|
||||
* If the existing block was free in stable then its dirty in
|
||||
@@ -406,12 +428,16 @@ static int map_writable_block(struct inode *inode, u64 iblock, u64 *blkno_ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
bmap->blkno[i] = cpu_to_le64(new_blkno);
|
||||
bmap.blkno[i] = cpu_to_le64(new_blkno);
|
||||
|
||||
/* dirtying guarantees success */
|
||||
err = scoutfs_btree_update(sb, meta, &key, &val);
|
||||
BUG_ON(err);
|
||||
|
||||
*blkno_ret = new_blkno;
|
||||
new_blkno = 0;
|
||||
ret = 0;
|
||||
out:
|
||||
scoutfs_btree_release(&curs);
|
||||
if (ret) {
|
||||
if (new_blkno)
|
||||
return_file_block(sb, new_blkno);
|
||||
|
||||
@@ -125,21 +125,25 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
|
||||
static int scoutfs_read_locked_inode(struct inode *inode)
|
||||
{
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_INODE_KEY, 0);
|
||||
scoutfs_btree_init_val(&val, &sinode, sizeof(sinode));
|
||||
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &curs);
|
||||
if (!ret) {
|
||||
load_inode(inode, curs.val);
|
||||
scoutfs_btree_release(&curs);
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &val);
|
||||
if (ret == sizeof(sinode)) {
|
||||
load_inode(inode, &sinode);
|
||||
ret = 0;
|
||||
} else if (ret >= 0) {
|
||||
ret = -EIO;
|
||||
}
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int scoutfs_iget_test(struct inode *inode, void *arg)
|
||||
@@ -252,19 +256,20 @@ int scoutfs_dirty_inode_item(struct inode *inode)
|
||||
*/
|
||||
void scoutfs_update_inode_item(struct inode *inode)
|
||||
{
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
int err;
|
||||
|
||||
scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_INODE_KEY, 0);
|
||||
scoutfs_btree_init_val(&val, &sinode, sizeof(sinode));
|
||||
store_inode(&sinode, inode);
|
||||
|
||||
err = scoutfs_btree_update(sb, meta, &key, &curs);
|
||||
err = scoutfs_btree_update(sb, meta, &key, &val);
|
||||
BUG_ON(err);
|
||||
|
||||
store_inode(curs.val, inode);
|
||||
scoutfs_btree_release(&curs);
|
||||
trace_scoutfs_update_inode(inode);
|
||||
}
|
||||
|
||||
@@ -313,8 +318,9 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
|
||||
umode_t mode, dev_t rdev)
|
||||
{
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_inode_info *ci;
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
struct inode *inode;
|
||||
u64 ino;
|
||||
@@ -341,15 +347,15 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
|
||||
set_inode_ops(inode);
|
||||
|
||||
scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_INODE_KEY, 0);
|
||||
scoutfs_btree_init_val(&val, &sinode, sizeof(sinode));
|
||||
store_inode(&sinode, inode);
|
||||
|
||||
ret = scoutfs_btree_insert(inode->i_sb, meta, &key,
|
||||
sizeof(struct scoutfs_inode), &curs);
|
||||
ret = scoutfs_btree_insert(inode->i_sb, meta, &key, &val);
|
||||
if (ret) {
|
||||
iput(inode);
|
||||
return ERR_PTR(ret);
|
||||
}
|
||||
|
||||
scoutfs_btree_release(&curs);
|
||||
return inode;
|
||||
}
|
||||
|
||||
@@ -359,22 +365,28 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
|
||||
static void drop_inode_items(struct super_block *sb, u64 ino)
|
||||
{
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_inode *sinode;
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
bool release = false;
|
||||
umode_t mode;
|
||||
int ret;
|
||||
|
||||
/* sample the inode mode */
|
||||
/* sample the inode mode, XXX don't need to copy whole thing here */
|
||||
scoutfs_set_key(&key, ino, SCOUTFS_INODE_KEY, 0);
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &curs);
|
||||
if (ret)
|
||||
scoutfs_btree_init_val(&val, &sinode, sizeof(sinode));
|
||||
|
||||
ret = scoutfs_btree_lookup(sb, meta, &key, &val);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sinode = curs.val;
|
||||
mode = le32_to_cpu(sinode->mode);
|
||||
scoutfs_btree_release(&curs);
|
||||
/* XXX corruption */
|
||||
if (ret != sizeof(sinode)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
mode = le32_to_cpu(sinode.mode);
|
||||
|
||||
ret = scoutfs_hold_trans(sb);
|
||||
if (ret)
|
||||
|
||||
@@ -39,9 +39,9 @@ static long scoutfs_ioc_inodes_since(struct file *file, unsigned long arg)
|
||||
struct scoutfs_ioctl_inodes_since args;
|
||||
struct scoutfs_ioctl_ino_seq __user *uiseq;
|
||||
struct scoutfs_ioctl_ino_seq iseq;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key last;
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
u64 seq;
|
||||
long bytes;
|
||||
int ret;
|
||||
|
||||
@@ -52,34 +52,25 @@ static long scoutfs_ioc_inodes_since(struct file *file, unsigned long arg)
|
||||
if (args.buf_len < sizeof(iseq) || args.buf_len > INT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
scoutfs_set_key(&first, args.first_ino, SCOUTFS_INODE_KEY, 0);
|
||||
scoutfs_set_key(&key, args.first_ino, SCOUTFS_INODE_KEY, 0);
|
||||
scoutfs_set_key(&last, args.last_ino, SCOUTFS_INODE_KEY, 0);
|
||||
|
||||
bytes = 0;
|
||||
while ((ret = scoutfs_btree_since(sb, meta, &first, &last,
|
||||
args.seq, &curs)) > 0) {
|
||||
for (;;) {
|
||||
ret = scoutfs_btree_since(sb, meta, &key, &last, args.seq,
|
||||
&key, &seq, NULL);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
iseq.ino = scoutfs_key_inode(curs.key);
|
||||
iseq.seq = curs.seq;
|
||||
iseq.ino = scoutfs_key_inode(&key);
|
||||
iseq.seq = seq;
|
||||
|
||||
/*
|
||||
* We can't copy to userspace with our locks held
|
||||
* because faults could try to use tree blocks that we
|
||||
* have locked. If a non-faulting copy fails we release
|
||||
* the cursor and try a blocking copy and pick up where
|
||||
* we left off.
|
||||
*/
|
||||
pagefault_disable();
|
||||
ret = __copy_to_user_inatomic(uiseq, &iseq, sizeof(iseq));
|
||||
pagefault_enable();
|
||||
if (ret) {
|
||||
first = *curs.key;
|
||||
scoutfs_inc_key(&first);
|
||||
scoutfs_btree_release(&curs);
|
||||
if (copy_to_user(uiseq, &iseq, sizeof(iseq))) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
if (copy_to_user(uiseq, &iseq, sizeof(iseq))) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
uiseq++;
|
||||
@@ -88,9 +79,9 @@ static long scoutfs_ioc_inodes_since(struct file *file, unsigned long arg)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
scoutfs_btree_release(&curs);
|
||||
scoutfs_inc_key(&key);
|
||||
}
|
||||
|
||||
if (bytes)
|
||||
ret = bytes;
|
||||
@@ -219,16 +210,14 @@ static long scoutfs_ioc_find_xattr(struct file *file, unsigned long arg,
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
struct scoutfs_ioctl_find_xattr args;
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key last;
|
||||
char __user *ustr;
|
||||
u64 __user *uino;
|
||||
u64 inos[32];
|
||||
char *str;
|
||||
int nr_inos = 0;
|
||||
int copied = 0;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
u64 ino;
|
||||
u8 type;
|
||||
u64 h;
|
||||
|
||||
@@ -238,6 +227,9 @@ static long scoutfs_ioc_find_xattr(struct file *file, unsigned long arg,
|
||||
if (args.str_len > SCOUTFS_MAX_XATTR_LEN || args.ino_count > INT_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
if (args.first_ino > args.last_ino)
|
||||
return -EINVAL;
|
||||
|
||||
if (args.ino_count == 0)
|
||||
return 0;
|
||||
|
||||
@@ -262,36 +254,27 @@ static long scoutfs_ioc_find_xattr(struct file *file, unsigned long arg,
|
||||
type = SCOUTFS_XATTR_VAL_HASH_KEY;
|
||||
}
|
||||
|
||||
scoutfs_set_key(&first, h, type, args.first_ino);
|
||||
scoutfs_set_key(&key, h, type, args.first_ino);
|
||||
scoutfs_set_key(&last, h, type, args.last_ino);
|
||||
|
||||
while (copied < args.ino_count) {
|
||||
|
||||
while ((ret = scoutfs_btree_next(sb, meta, &first, &last,
|
||||
&curs)) > 0) {
|
||||
inos[nr_inos++] = scoutfs_key_offset(curs.key);
|
||||
|
||||
first = *curs.key;
|
||||
scoutfs_inc_key(&first);
|
||||
|
||||
if (nr_inos == ARRAY_SIZE(inos) ||
|
||||
(nr_inos + copied) == args.ino_count) {
|
||||
scoutfs_btree_release(&curs);
|
||||
ret = scoutfs_btree_next(sb, meta, &key, &last, &key, NULL);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ret < 0 || nr_inos == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (copy_to_user(uino, inos, nr_inos * sizeof(u64))) {
|
||||
ino = scoutfs_key_offset(&key);
|
||||
if (put_user(ino, uino)) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
uino += nr_inos;
|
||||
copied += nr_inos;
|
||||
nr_inos = 0;
|
||||
uino++;
|
||||
copied++;
|
||||
scoutfs_inc_key(&key);
|
||||
}
|
||||
|
||||
out:
|
||||
|
||||
297
kmod/src/xattr.c
297
kmod/src/xattr.c
@@ -115,30 +115,49 @@ static int search_xattr_items(struct inode *inode, const char *name,
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_xattr *xat;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
unsigned int max_len;
|
||||
int ret;
|
||||
|
||||
set_xattr_keys(inode, &first, &last, name, name_len);
|
||||
max_len = xat_bytes(SCOUTFS_MAX_XATTR_LEN, SCOUTFS_MAX_XATTR_LEN),
|
||||
xat = kmalloc(max_len, GFP_KERNEL);
|
||||
if (!xat)
|
||||
return -ENOMEM;
|
||||
|
||||
set_xattr_keys(inode, &key, &last, name, name_len);
|
||||
scoutfs_btree_init_val(&val, xat, max_len);
|
||||
|
||||
res->found = false;
|
||||
res->other_coll = false;
|
||||
res->found_hole = false;
|
||||
res->hole_key = first;
|
||||
res->hole_key = key;
|
||||
|
||||
while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) {
|
||||
xat = curs.val;
|
||||
for (;;) {
|
||||
ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* XXX corruption */
|
||||
if (ret < sizeof(struct scoutfs_xattr) ||
|
||||
ret != xat_bytes(xat->name_len, xat->value_len)) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
/* found a hole when we skip past next expected key */
|
||||
if (!res->found_hole &&
|
||||
scoutfs_key_cmp(&res->hole_key, curs.key) < 0)
|
||||
scoutfs_key_cmp(&res->hole_key, &key) < 0)
|
||||
res->found_hole = true;
|
||||
|
||||
/* keep searching for a hole past this cursor key */
|
||||
/* keep searching for a hole past this key */
|
||||
if (!res->found_hole) {
|
||||
res->hole_key = *curs.key;
|
||||
res->hole_key = key;
|
||||
scoutfs_inc_key(&res->hole_key);
|
||||
}
|
||||
|
||||
@@ -147,7 +166,7 @@ static int search_xattr_items(struct inode *inode, const char *name,
|
||||
scoutfs_names_equal(name, name_len, xat->name,
|
||||
xat->name_len)) {
|
||||
res->found = true;
|
||||
res->key = *curs.key;
|
||||
res->key = key;
|
||||
res->val_hash = scoutfs_name_hash(xat_value(xat),
|
||||
xat->value_len);
|
||||
} else {
|
||||
@@ -157,11 +176,13 @@ static int search_xattr_items(struct inode *inode, const char *name,
|
||||
/* finished once we have all the caller needs */
|
||||
if (res->found && res->other_coll && res->found_hole) {
|
||||
ret = 0;
|
||||
scoutfs_btree_release(&curs);
|
||||
break;
|
||||
}
|
||||
|
||||
scoutfs_inc_key(&key);
|
||||
}
|
||||
|
||||
kfree(xat);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -178,56 +199,55 @@ static int insert_xattr(struct inode *inode, const char *name,
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
bool inserted_name_hash_item = false;
|
||||
__le64 * __packed refcount;
|
||||
struct scoutfs_btree_val val;
|
||||
__le64 refcount;
|
||||
struct scoutfs_key name_key;
|
||||
struct scoutfs_key val_key;
|
||||
struct scoutfs_xattr *xat;
|
||||
struct scoutfs_xattr xat;
|
||||
int ret;
|
||||
|
||||
/* insert the main xattr item */
|
||||
set_name_val_keys(&name_key, &val_key, key, val_hash);
|
||||
scoutfs_btree_init_val(&val, &xat, sizeof(xat), (void *)name, name_len,
|
||||
(void *)value, size);
|
||||
|
||||
ret = scoutfs_btree_insert(sb, meta, key,
|
||||
xat_bytes(name_len, size), &curs);
|
||||
xat.name_len = name_len;
|
||||
xat.value_len = size;
|
||||
|
||||
ret = scoutfs_btree_insert(sb, meta, key, &val);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
xat = curs.val;
|
||||
xat->name_len = name_len;
|
||||
xat->value_len = size;
|
||||
memcpy(xat->name, name, name_len);
|
||||
memcpy(xat_value(xat), value, size);
|
||||
|
||||
scoutfs_btree_release(&curs);
|
||||
|
||||
/* insert the name hash item for find_xattr if we're first */
|
||||
if (!other_coll) {
|
||||
ret = scoutfs_btree_insert(sb, meta, &name_key, 0, &curs);
|
||||
ret = scoutfs_btree_insert(sb, meta, &name_key, NULL);
|
||||
/* XXX eexist would be corruption */
|
||||
if (ret)
|
||||
goto out;
|
||||
scoutfs_btree_release(&curs);
|
||||
inserted_name_hash_item = true;
|
||||
}
|
||||
|
||||
/* increment the val hash item for find_xattr, inserting if first */
|
||||
ret = scoutfs_btree_update(sb, meta, &val_key, &curs);
|
||||
if (ret == -ENOENT) {
|
||||
ret = scoutfs_btree_insert(sb, meta, &val_key,
|
||||
sizeof(*refcount), &curs);
|
||||
if (ret == 0) {
|
||||
/* XXX test sane item size */
|
||||
refcount = curs.val;
|
||||
*refcount = 0;
|
||||
}
|
||||
}
|
||||
if (ret == 0) {
|
||||
refcount = curs.val;
|
||||
le64_add_cpu(refcount, 1);
|
||||
scoutfs_btree_release(&curs);
|
||||
}
|
||||
scoutfs_btree_init_val(&val, &refcount, sizeof(refcount));
|
||||
|
||||
ret = scoutfs_btree_lookup(sb, meta, &val_key, &val);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
|
||||
if (ret == -ENOENT) {
|
||||
refcount = cpu_to_le64(1);
|
||||
ret = scoutfs_btree_insert(sb, meta, &val_key, &val);
|
||||
} else {
|
||||
/* XXX corruption */
|
||||
if (ret != sizeof(refcount)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
le64_add_cpu(&refcount, 1);
|
||||
ret = scoutfs_btree_update(sb, meta, &val_key, &val);
|
||||
}
|
||||
out:
|
||||
if (ret) {
|
||||
scoutfs_btree_delete(sb, meta, key);
|
||||
@@ -247,15 +267,29 @@ static int delete_xattr(struct super_block *sb, struct scoutfs_key *key,
|
||||
bool other_coll, u64 val_hash)
|
||||
{
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_key name_key;
|
||||
struct scoutfs_key val_key;
|
||||
__le64 * __packed refcount;
|
||||
bool del_val = false;
|
||||
__le64 refcount;
|
||||
int ret;
|
||||
|
||||
set_name_val_keys(&name_key, &val_key, key, val_hash);
|
||||
|
||||
/* update the val_hash refcount, making sure it's not nonsense */
|
||||
scoutfs_btree_init_val(&val, &refcount, sizeof(refcount));
|
||||
ret = scoutfs_btree_lookup(sb, meta, &val_key, &val);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* XXX corruption */
|
||||
if (ret != sizeof(refcount)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
le64_add_cpu(&refcount, -1ULL);
|
||||
|
||||
/* ensure that we can update and delete name_ and val_ keys */
|
||||
if (!other_coll) {
|
||||
ret = scoutfs_btree_dirty(sb, meta, &name_key);
|
||||
if (ret)
|
||||
@@ -272,14 +306,9 @@ static int delete_xattr(struct super_block *sb, struct scoutfs_key *key,
|
||||
if (!other_coll)
|
||||
scoutfs_btree_delete(sb, meta, &name_key);
|
||||
|
||||
scoutfs_btree_update(sb, meta, &val_key, &curs);
|
||||
refcount = curs.val;
|
||||
le64_add_cpu(refcount, -1ULL);
|
||||
if (*refcount == 0)
|
||||
del_val = true;
|
||||
scoutfs_btree_release(&curs);
|
||||
|
||||
if (del_val)
|
||||
if (refcount)
|
||||
scoutfs_btree_update(sb, meta, &val_key, &val);
|
||||
else
|
||||
scoutfs_btree_delete(sb, meta, &val_key);
|
||||
ret = 0;
|
||||
out:
|
||||
@@ -295,6 +324,11 @@ static int unknown_prefix(const char *name)
|
||||
return strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
|
||||
}
|
||||
|
||||
/*
|
||||
* Look up an xattr matching the given name. We walk our xattr items stored
|
||||
* at the hashed name. We'll only be able to copy out a value that fits
|
||||
* in the callers buffer.
|
||||
*/
|
||||
ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer,
|
||||
size_t size)
|
||||
{
|
||||
@@ -302,27 +336,49 @@ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer,
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
size_t name_len = strlen(name);
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_xattr *xat;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key last;
|
||||
unsigned int item_len;
|
||||
int ret;
|
||||
|
||||
if (unknown_prefix(name))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
set_xattr_keys(inode, &first, &last, name, name_len);
|
||||
/* make sure we don't allocate an enormous item */
|
||||
if (name_len > SCOUTFS_MAX_XATTR_LEN)
|
||||
return -ENODATA;
|
||||
size = min_t(size_t, size, SCOUTFS_MAX_XATTR_LEN);
|
||||
|
||||
item_len = xat_bytes(name_len, size);
|
||||
xat = kmalloc(item_len, GFP_KERNEL);
|
||||
if (!xat)
|
||||
return -ENOMEM;
|
||||
|
||||
set_xattr_keys(inode, &key, &last, name, name_len);
|
||||
scoutfs_btree_init_val(&val, xat, item_len);
|
||||
|
||||
down_read(&si->xattr_rwsem);
|
||||
|
||||
ret = -ENODATA;
|
||||
while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) {
|
||||
xat = curs.val;
|
||||
for (;;) {
|
||||
ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = -ENODATA;
|
||||
break;
|
||||
}
|
||||
|
||||
/* XXX corruption */
|
||||
if (ret < sizeof(struct scoutfs_xattr)) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!scoutfs_names_equal(name, name_len, xat->name,
|
||||
xat->name_len)) {
|
||||
ret = -ENODATA;
|
||||
scoutfs_inc_key(&key);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -333,12 +389,12 @@ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer,
|
||||
else
|
||||
ret = -ERANGE;
|
||||
}
|
||||
scoutfs_btree_release(&curs);
|
||||
break;
|
||||
}
|
||||
|
||||
up_read(&si->xattr_rwsem);
|
||||
|
||||
kfree(xat);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -458,39 +514,60 @@ ssize_t scoutfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_xattr *xat;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key last;
|
||||
unsigned int item_len;
|
||||
ssize_t total;
|
||||
int ret;
|
||||
|
||||
scoutfs_set_key(&first, scoutfs_ino(inode), SCOUTFS_XATTR_KEY, 0);
|
||||
item_len = xat_bytes(SCOUTFS_MAX_XATTR_LEN, 0);
|
||||
xat = kmalloc(item_len, GFP_KERNEL);
|
||||
if (!xat)
|
||||
return -ENOMEM;
|
||||
|
||||
scoutfs_set_key(&key, scoutfs_ino(inode), SCOUTFS_XATTR_KEY, 0);
|
||||
scoutfs_set_key(&last, scoutfs_ino(inode), SCOUTFS_XATTR_KEY, ~0ULL);
|
||||
scoutfs_btree_init_val(&val, xat, item_len);
|
||||
|
||||
down_read(&si->xattr_rwsem);
|
||||
|
||||
total = 0;
|
||||
while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) {
|
||||
xat = curs.val;
|
||||
|
||||
total += xat->name_len + 1;
|
||||
if (!size)
|
||||
continue;
|
||||
if (!buffer || total > size) {
|
||||
ret = -ERANGE;
|
||||
for (;;) {
|
||||
ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
memcpy(buffer, xat->name, xat->name_len);
|
||||
buffer += xat->name_len;
|
||||
*(buffer++) = '\0';
|
||||
/* XXX corruption */
|
||||
if (ret < sizeof(struct scoutfs_xattr)) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
total += xat->name_len + 1;
|
||||
|
||||
if (size) {
|
||||
if (!buffer || total > size) {
|
||||
ret = -ERANGE;
|
||||
break;
|
||||
}
|
||||
|
||||
memcpy(buffer, xat->name, xat->name_len);
|
||||
buffer += xat->name_len;
|
||||
*(buffer++) = '\0';
|
||||
}
|
||||
|
||||
scoutfs_inc_key(&key);
|
||||
}
|
||||
|
||||
scoutfs_btree_release(&curs);
|
||||
|
||||
up_read(&si->xattr_rwsem);
|
||||
|
||||
kfree(xat);
|
||||
|
||||
return ret < 0 ? ret : total;
|
||||
}
|
||||
|
||||
@@ -504,60 +581,66 @@ ssize_t scoutfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
|
||||
*
|
||||
* Hash items can be shared amongst xattrs whose names or values hash to
|
||||
* the same hash value. We don't bother trying to remove the hash items
|
||||
* as the last xattr is removed. We remove it the first chance we get,
|
||||
* try to avoid obviously removing the same hash item next, and allow
|
||||
* as the last xattr is removed. We always try to remove them and allow
|
||||
* failure when we try to remove a hash item that wasn't found.
|
||||
*/
|
||||
int scoutfs_xattr_drop(struct super_block *sb, u64 ino)
|
||||
{
|
||||
struct scoutfs_btree_root *meta = SCOUTFS_META(sb);
|
||||
DECLARE_SCOUTFS_BTREE_CURSOR(curs);
|
||||
struct scoutfs_btree_val val;
|
||||
struct scoutfs_xattr *xat;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key name_key;
|
||||
struct scoutfs_key val_key;
|
||||
__le64 last_name;
|
||||
__le64 last_val;
|
||||
unsigned int item_len;
|
||||
u64 val_hash;
|
||||
bool have_last;
|
||||
int ret;
|
||||
|
||||
scoutfs_set_key(&first, ino, SCOUTFS_XATTR_KEY, 0);
|
||||
scoutfs_set_key(&key, ino, SCOUTFS_XATTR_KEY, 0);
|
||||
scoutfs_set_key(&last, ino, SCOUTFS_XATTR_KEY, ~0ULL);
|
||||
|
||||
have_last = false;
|
||||
while ((ret = scoutfs_btree_next(sb, meta, &first, &last, &curs)) > 0) {
|
||||
xat = curs.val;
|
||||
key = *curs.key;
|
||||
item_len = xat_bytes(SCOUTFS_MAX_XATTR_LEN, SCOUTFS_MAX_XATTR_LEN),
|
||||
xat = kmalloc(item_len, GFP_KERNEL);
|
||||
if (!xat)
|
||||
return -ENOMEM;
|
||||
|
||||
scoutfs_btree_init_val(&val, xat, item_len);
|
||||
|
||||
for (;;) {
|
||||
ret = scoutfs_btree_next(sb, meta, &key, &last, &key, &val);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* XXX corruption */
|
||||
if (ret < sizeof(struct scoutfs_xattr) ||
|
||||
ret != xat_bytes(xat->name_len, xat->value_len)) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
val_hash = scoutfs_name_hash(xat_value(xat), xat->value_len);
|
||||
set_name_val_keys(&name_key, &val_key, &key, val_hash);
|
||||
|
||||
first = *curs.key;
|
||||
scoutfs_inc_key(&first);
|
||||
scoutfs_btree_release(&curs);
|
||||
ret = scoutfs_btree_delete(sb, meta, &name_key);
|
||||
if (ret && ret != -ENOENT)
|
||||
break;
|
||||
|
||||
if (!have_last || last_name != name_key.inode) {
|
||||
ret = scoutfs_btree_delete(sb, meta, &name_key);
|
||||
if (ret && ret != -ENOENT)
|
||||
break;
|
||||
last_name = name_key.inode;
|
||||
}
|
||||
|
||||
if (!have_last || last_val != val_key.inode) {
|
||||
ret = scoutfs_btree_delete(sb, meta, &val_key);
|
||||
if (ret && ret != -ENOENT)
|
||||
break;
|
||||
last_val = val_key.inode;
|
||||
}
|
||||
|
||||
have_last = true;
|
||||
ret = scoutfs_btree_delete(sb, meta, &val_key);
|
||||
if (ret && ret != -ENOENT)
|
||||
break;
|
||||
|
||||
ret = scoutfs_btree_delete(sb, meta, &key);
|
||||
if (ret && ret != -ENOENT)
|
||||
break;
|
||||
|
||||
scoutfs_inc_key(&key);
|
||||
}
|
||||
|
||||
kfree(xat);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user