Use contiguous key struct instead of kvecs

Using kvecs for keys seemed like a good idea because there were a few
uses that had keys in fragmented memory: dirent keys made up of an
on-stack struct and the file name in the dentry, and keys straddling the
pages that make up a cached segment.

But it hasn't worked out very well.  The code to perform ops on keys
by iterating over vectors is pretty fiddly.  And the raw kvecs only
describe the actively referenced key, they know nothing about the total
size of the buffer that the key resides in.  Some ops can't check that
they're not clobbering things, they're relying on callers not to mess
up.

And critically, the kvec iteration's become a bottleneck.  It turns out
that comparing keys is a very hot path in the item cache.  All the code
to initialize and iterate over two key vectors adds up when each high
level fs operation is a few tree descents and each tree descent is a
bunch of compares.

So let's back off and have a specific struct for tracking keys that are
stored in contiguous memory regions.  Users ensure that keys are
contiguous.  The code ends up being a lot clearer, code now can see how
big the full key buffer is, and the rbtree node comparison fast path is
now just a memcmp.

Almost all of the changes in the patch are mechanical semantic changes
involving types, function names, args, and occasionaly slightly
different return conventions.

A slightly more involved change is that now dirent key users have to
manage an allocated contiguous key with a copy of the path from the
dentry.

Item reading is now a little more clever about calculating the greatest
range it can cache by initially walking all the segments instead of
trying to do it as it runs out of items in each segment.

The largest meaningful change is that now keys can't straddle page
boundaries in memory which means they can't cross block boundaries in
the segment.  We align key offsets to the next block as we write keys to
segments that would have straddled a block.

We then also have to account for that padding when building segments.
We add a helper that calculates if a given number of items will fit in a
segment which is used by item dirtying, segment writing, and compaction.

I left the tracepoint formatting for another patch.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2017-01-19 13:30:04 -08:00
parent 8a302609f2
commit 2bc1617280
14 changed files with 794 additions and 459 deletions

View File

@@ -3,5 +3,5 @@ obj-$(CONFIG_SCOUTFS_FS) := scoutfs.o
CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include
scoutfs-y += alloc.o bio.o block.o btree.o buddy.o compact.o counters.o crc.o \
dir.o filerw.o kvec.o inode.o ioctl.o item.o manifest.o msg.o \
name.o seg.o scoutfs_trace.o super.o trans.o treap.o xattr.o
dir.o filerw.o kvec.o inode.o ioctl.o item.o key.o manifest.o \
msg.o name.o seg.o scoutfs_trace.o super.o trans.o treap.o xattr.o

View File

@@ -70,8 +70,8 @@ struct compact_seg {
u64 segno;
u64 seq;
u8 level;
SCOUTFS_DECLARE_KVEC(first);
SCOUTFS_DECLARE_KVEC(last);
struct scoutfs_key_buf *first;
struct scoutfs_key_buf *last;
struct scoutfs_segment *seg;
int pos;
int saved_pos;
@@ -92,25 +92,45 @@ struct compact_cursor {
struct compact_seg *saved_lower;
};
static void free_cseg(struct compact_seg *cseg)
static void free_cseg(struct super_block *sb, struct compact_seg *cseg)
{
WARN_ON_ONCE(!list_empty(&cseg->entry));
scoutfs_seg_put(cseg->seg);
scoutfs_kvec_kfree(cseg->first);
scoutfs_kvec_kfree(cseg->last);
scoutfs_key_free(sb, cseg->first);
scoutfs_key_free(sb, cseg->last);
kfree(cseg);
}
static void free_cseg_list(struct list_head *list)
static struct compact_seg *alloc_cseg(struct super_block *sb,
struct scoutfs_key_buf *first,
struct scoutfs_key_buf *last)
{
struct compact_seg *cseg;
cseg = kzalloc(sizeof(struct compact_seg), GFP_NOFS);
if (cseg) {
INIT_LIST_HEAD(&cseg->entry);
cseg->first = scoutfs_key_dup(sb, first);
cseg->last = scoutfs_key_dup(sb, last);
if (!cseg->first || !cseg->last) {
free_cseg(sb, cseg);
cseg = NULL;
}
}
return cseg;
}
static void free_cseg_list(struct super_block *sb, struct list_head *list)
{
struct compact_seg *cseg;
struct compact_seg *tmp;
list_for_each_entry_safe(cseg, tmp, list, entry) {
list_del_init(&cseg->entry);
free_cseg(cseg);
free_cseg(sb, cseg);
}
}
@@ -177,18 +197,18 @@ static struct compact_seg *next_spos(struct compact_cursor *curs,
* update items.
*/
static int next_item(struct super_block *sb, struct compact_cursor *curs,
struct kvec *item_key, struct kvec *item_val)
struct scoutfs_key_buf *item_key, struct kvec *item_val)
{
struct compact_seg *upper = curs->upper;
struct compact_seg *lower = curs->lower;
SCOUTFS_DECLARE_KVEC(lower_key);
struct scoutfs_key_buf lower_key;
SCOUTFS_DECLARE_KVEC(lower_val);
int cmp;
int ret;
if (upper) {
ret = scoutfs_seg_item_kvecs(upper->seg, upper->pos,
item_key, item_val);
ret = scoutfs_seg_item_ptrs(upper->seg, upper->pos,
item_key, item_val);
if (ret < 0)
upper = NULL;
}
@@ -198,8 +218,8 @@ static int next_item(struct super_block *sb, struct compact_cursor *curs,
if (ret)
goto out;
ret = scoutfs_seg_item_kvecs(lower->seg, lower->pos,
lower_key, lower_val);
ret = scoutfs_seg_item_ptrs(lower->seg, lower->pos,
&lower_key, lower_val);
if (ret == 0)
break;
lower = next_spos(curs, lower);
@@ -217,14 +237,14 @@ static int next_item(struct super_block *sb, struct compact_cursor *curs,
* > 0: return lower, advance lower
*/
if (upper && lower)
cmp = scoutfs_kvec_memcmp(item_key, lower_key);
cmp = scoutfs_key_compare(item_key, &lower_key);
else if (upper)
cmp = -1;
else
cmp = 1;
if (cmp > 0) {
scoutfs_kvec_clone(item_key, lower_key);
scoutfs_key_clone(item_key, &lower_key);
scoutfs_kvec_clone(item_val, lower_val);
}
@@ -248,28 +268,27 @@ out:
static int count_items(struct super_block *sb, struct compact_cursor *curs,
u32 *nr_items, u32 *key_bytes)
{
SCOUTFS_DECLARE_KVEC(item_key);
struct scoutfs_key_buf item_key;
SCOUTFS_DECLARE_KVEC(item_val);
u32 total;
u32 items = 0;
u32 keys = 0;
u32 vals = 0;
int ret;
*nr_items = 0;
*key_bytes = 0;
total = sizeof(struct scoutfs_segment_block);
while ((ret = next_item(sb, curs, item_key, item_val)) > 0) {
while ((ret = next_item(sb, curs, &item_key, item_val)) > 0) {
total += sizeof(struct scoutfs_segment_item) +
scoutfs_kvec_length(item_key) +
scoutfs_kvec_length(item_val);
items++;
keys += item_key.key_len;
vals += scoutfs_kvec_length(item_val);
if (total > SCOUTFS_SEGMENT_SIZE) {
ret = 0;
if (!scoutfs_seg_fits_single(items, keys, vals))
break;
}
(*nr_items)++;
(*key_bytes) += scoutfs_kvec_length(item_key);
*nr_items = items;
*key_bytes = keys;
}
return ret;
@@ -279,23 +298,23 @@ static int compact_items(struct super_block *sb, struct compact_cursor *curs,
struct scoutfs_segment *seg, u32 nr_items,
u32 key_bytes)
{
SCOUTFS_DECLARE_KVEC(item_key);
struct scoutfs_key_buf item_key;
SCOUTFS_DECLARE_KVEC(item_val);
int ret;
ret = next_item(sb, curs, item_key, item_val);
ret = next_item(sb, curs, &item_key, item_val);
if (ret <= 0)
goto out;
scoutfs_seg_first_item(sb, seg, item_key, item_val,
scoutfs_seg_first_item(sb, seg, &item_key, item_val,
nr_items, key_bytes);
while (--nr_items) {
ret = next_item(sb, curs, item_key, item_val);
ret = next_item(sb, curs, &item_key, item_val);
if (ret <= 0)
break;
scoutfs_seg_append_item(sb, seg, item_key, item_val);
scoutfs_seg_append_item(sb, seg, &item_key, item_val);
}
out:
@@ -307,11 +326,11 @@ static int compact_segments(struct super_block *sb,
struct scoutfs_bio_completion *comp,
struct list_head *results)
{
struct scoutfs_key_buf upper_next;
struct scoutfs_segment *seg;
struct compact_seg *cseg;
struct compact_seg *upper;
struct compact_seg *lower;
SCOUTFS_DECLARE_KVEC(upper_next);
u32 key_bytes;
u32 nr_items;
int ret;
@@ -328,13 +347,7 @@ static int compact_segments(struct super_block *sb,
*/
if (upper && upper->pos == 0 &&
(!lower ||
scoutfs_kvec_memcmp(upper->last, lower->first) < 0)) {
cseg = kzalloc(sizeof(struct compact_seg), GFP_NOFS);
if (!cseg) {
ret = -ENOMEM;
break;
}
scoutfs_key_compare(upper->last, lower->first) < 0)) {
/*
* XXX blah! these csegs are getting
@@ -342,11 +355,8 @@ static int compact_segments(struct super_block *sb,
* entry iterator that reading and compacting
* can use.
*/
ret = scoutfs_kvec_dup_flatten(cseg->first,
upper->first) ?:
scoutfs_kvec_dup_flatten(cseg->last, upper->last);
if (ret) {
kfree(cseg);
cseg = alloc_cseg(sb, upper->first, upper->last);
if (!cseg) {
ret = -ENOMEM;
break;
}
@@ -376,14 +386,14 @@ static int compact_segments(struct super_block *sb,
*/
if (lower && lower->pos == 0 &&
(!upper ||
(!scoutfs_seg_item_kvecs(upper->seg, upper->pos,
upper_next, NULL) &&
scoutfs_kvec_memcmp(upper_next, lower->last) > 0))) {
(!scoutfs_seg_item_ptrs(upper->seg, upper->pos,
&upper_next, NULL) &&
scoutfs_key_compare(&upper_next, lower->last) > 0))) {
curs->lower = next_spos(curs, lower);
list_del_init(&lower->entry);
free_cseg(lower);
free_cseg(sb, lower);
scoutfs_inc_counter(sb, compact_segment_skipped);
continue;
@@ -404,6 +414,7 @@ static int compact_segments(struct super_block *sb,
break;
}
/* no cseg keys, manifest update uses seg item keys */
cseg = kzalloc(sizeof(struct compact_seg), GFP_NOFS);
if (!cseg) {
ret = -ENOMEM;
@@ -436,14 +447,16 @@ static int compact_segments(struct super_block *sb,
return ret;
}
int scoutfs_compact_add(struct super_block *sb, void *data, struct kvec *first,
struct kvec *last, u64 segno, u64 seq, u8 level)
int scoutfs_compact_add(struct super_block *sb, void *data,
struct scoutfs_key_buf *first,
struct scoutfs_key_buf *last, u64 segno, u64 seq,
u8 level)
{
struct compact_cursor *curs = data;
struct compact_seg *cseg;
int ret;
cseg = kzalloc(sizeof(struct compact_seg), GFP_NOFS);
cseg = alloc_cseg(sb, first, last);
if (!cseg) {
ret = -ENOMEM;
goto out;
@@ -451,11 +464,6 @@ int scoutfs_compact_add(struct super_block *sb, void *data, struct kvec *first,
list_add_tail(&cseg->entry, &curs->csegs);
ret = scoutfs_kvec_dup_flatten(cseg->first, first) ?:
scoutfs_kvec_dup_flatten(cseg->last, last);
if (ret)
goto out;
cseg->segno = segno;
cseg->seq = seq;
cseg->level = level;
@@ -594,8 +602,8 @@ static void scoutfs_compact_func(struct work_struct *work)
out:
if (ret)
free_result_segnos(sb, &results);
free_cseg_list(&curs.csegs);
free_cseg_list(&results);
free_cseg_list(sb, &curs.csegs);
free_cseg_list(sb, &results);
WARN_ON_ONCE(ret);
trace_printk("ret %d\n", ret);

View File

@@ -3,8 +3,10 @@
void scoutfs_compact_kick(struct super_block *sb);
int scoutfs_compact_add(struct super_block *sb, void *data, struct kvec *first,
struct kvec *last, u64 segno, u64 seq, u8 level);
int scoutfs_compact_add(struct super_block *sb, void *data,
struct scoutfs_key_buf *first,
struct scoutfs_key_buf *last, u64 segno, u64 seq,
u8 level);
int scoutfs_compact_setup(struct super_block *sb);
void scoutfs_compact_destroy(struct super_block *sb);

View File

@@ -97,13 +97,32 @@ static unsigned int dentry_type(unsigned int type)
return DT_UNKNOWN;
}
static struct scoutfs_key_buf *alloc_dirent_key(struct super_block *sb,
struct inode *dir,
struct dentry *dentry)
{
struct scoutfs_dirent_key *dkey;
struct scoutfs_key_buf *key;
key = scoutfs_key_alloc(sb, offsetof(struct scoutfs_dirent_key,
name[dentry->d_name.len]));
if (key) {
dkey = key->data;
dkey->type = SCOUTFS_DIRENT_KEY;
dkey->ino = cpu_to_be64(scoutfs_ino(dir));
memcpy(dkey->name, (void *)dentry->d_name.name,
dentry->d_name.len);
}
return key;
}
static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry,
unsigned int flags)
{
struct super_block *sb = dir->i_sb;
struct scoutfs_dirent_key dkey;
struct scoutfs_key_buf *key = NULL;
struct scoutfs_dirent dent;
SCOUTFS_DECLARE_KVEC(key);
SCOUTFS_DECLARE_KVEC(val);
struct inode *inode;
u64 ino = 0;
@@ -114,10 +133,11 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry,
goto out;
}
dkey.type = SCOUTFS_DIRENT_KEY;
dkey.ino = cpu_to_be64(scoutfs_ino(dir));
scoutfs_kvec_init(key, &dkey, sizeof(dkey),
(void *)dentry->d_name.name, dentry->d_name.len);
key = alloc_dirent_key(sb, dir, dentry);
if (!key) {
ret = -ENOMEM;
goto out;
}
scoutfs_kvec_init(val, &dent, sizeof(dent));
@@ -137,6 +157,8 @@ out:
else
inode = scoutfs_iget(sb, ino);
scoutfs_key_free(sb, key);
return d_splice_alias(inode, dentry);
}
@@ -162,6 +184,17 @@ static int dir_emit_dots(struct file *file, void *dirent, filldir_t filldir)
return 1;
}
static void init_readdir_key(struct scoutfs_key_buf *key,
struct scoutfs_readdir_key *rkey,
struct inode *inode, loff_t pos)
{
rkey->type = SCOUTFS_READDIR_KEY;
rkey->ino = cpu_to_be64(scoutfs_ino(inode));
rkey->pos = cpu_to_be64(pos);
scoutfs_key_init(key, rkey, sizeof(struct scoutfs_readdir_key));
}
/*
* readdir simply iterates over the dirent items for the dir inode and
* uses their offset as the readdir position.
@@ -174,10 +207,10 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir)
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
struct scoutfs_dirent *dent;
struct scoutfs_key_buf key;
struct scoutfs_key_buf last_key;
struct scoutfs_readdir_key rkey;
struct scoutfs_readdir_key last_rkey;
SCOUTFS_DECLARE_KVEC(key);
SCOUTFS_DECLARE_KVEC(last_key);
SCOUTFS_DECLARE_KVEC(val);
unsigned int item_len;
unsigned int name_len;
@@ -187,15 +220,7 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir)
if (!dir_emit_dots(file, dirent, filldir))
return 0;
rkey.type = SCOUTFS_READDIR_KEY;
rkey.ino = cpu_to_be64(scoutfs_ino(inode));
/* pos set in each loop */
scoutfs_kvec_init(key, &rkey, sizeof(rkey));
last_rkey.type = SCOUTFS_READDIR_KEY;
last_rkey.ino = cpu_to_be64(scoutfs_ino(inode));
last_rkey.pos = cpu_to_be64(SCOUTFS_DIRENT_LAST_POS);
scoutfs_kvec_init(last_key, &last_rkey, sizeof(last_rkey));
init_readdir_key(&last_key, &last_rkey, inode, SCOUTFS_DIRENT_LAST_POS);
item_len = offsetof(struct scoutfs_dirent, name[SCOUTFS_NAME_LEN]);
dent = kmalloc(item_len, GFP_KERNEL);
@@ -203,9 +228,10 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir)
return -ENOMEM;
for (;;) {
rkey.pos = cpu_to_be64(file->f_pos);
init_readdir_key(&key, &rkey, inode, file->f_pos);
scoutfs_kvec_init(val, dent, item_len);
ret = scoutfs_item_next_same_min(sb, key, last_key, val,
ret = scoutfs_item_next_same_min(sb, &key, &last_key, val,
offsetof(struct scoutfs_dirent, name[1]));
if (ret < 0) {
if (ret == -ENOENT)
@@ -261,9 +287,8 @@ static int add_entry_items(struct inode *dir, struct dentry *dentry,
struct inode *inode)
{
struct super_block *sb = dir->i_sb;
struct scoutfs_dirent_key dkey;
struct scoutfs_key_buf *key;
struct scoutfs_dirent dent;
SCOUTFS_DECLARE_KVEC(key);
SCOUTFS_DECLARE_KVEC(val);
int ret;
@@ -275,10 +300,9 @@ static int add_entry_items(struct inode *dir, struct dentry *dentry,
return ret;
/* dirent item for lookup */
dkey.type = SCOUTFS_DIRENT_KEY;
dkey.ino = cpu_to_be64(scoutfs_ino(dir));
scoutfs_kvec_init(key, &dkey, sizeof(dkey),
(void *)dentry->d_name.name, dentry->d_name.len);
key = alloc_dirent_key(sb, dir, dentry);
if (!key)
return -ENOMEM;
dent.ino = cpu_to_le64(scoutfs_ino(inode));
dent.type = mode_to_type(inode->i_mode);
@@ -323,6 +347,7 @@ out_dent:
}
#endif
scoutfs_key_free(sb, key);
return ret;
}
@@ -423,8 +448,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
struct super_block *sb = dir->i_sb;
struct inode *inode = dentry->d_inode;
struct timespec ts = current_kernel_time();
struct scoutfs_dirent_key dkey;
SCOUTFS_DECLARE_KVEC(key);
struct scoutfs_key_buf *key = NULL;
int ret = 0;
/* will need to add deletion items */
@@ -443,10 +467,11 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
goto out;
/* XXX same items as add_entry_items */
dkey.type = SCOUTFS_DIRENT_KEY;
dkey.ino = cpu_to_be64(scoutfs_ino(dir));
scoutfs_kvec_init(key, &dkey, sizeof(dkey),
(void *)dentry->d_name.name, dentry->d_name.len);
key = alloc_dirent_key(sb, dir, dentry);
if (!key) {
ret = -ENOMEM;
goto out;
}
ret = scoutfs_item_delete(sb, key);
if (ret)
@@ -478,6 +503,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
scoutfs_update_inode_item(dir);
out:
scoutfs_key_free(sb, key);
scoutfs_release_trans(sb);
return ret;
}

View File

@@ -127,26 +127,28 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
ci->data_version = le64_to_cpu(cinode->data_version);
}
static void set_inode_key(struct scoutfs_inode_key *ikey, u64 ino)
static void init_inode_key(struct scoutfs_key_buf *key,
struct scoutfs_inode_key *ikey, u64 ino)
{
ikey->type = SCOUTFS_INODE_KEY;
ikey->ino = cpu_to_be64(ino);
scoutfs_key_init(key, ikey, sizeof(struct scoutfs_inode_key));
}
static int scoutfs_read_locked_inode(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
struct scoutfs_inode_key ikey;
struct scoutfs_key_buf key;
struct scoutfs_inode sinode;
SCOUTFS_DECLARE_KVEC(key);
SCOUTFS_DECLARE_KVEC(val);
int ret;
set_inode_key(&ikey, scoutfs_ino(inode));
scoutfs_kvec_init(key, &ikey, sizeof(ikey));
init_inode_key(&key, &ikey, scoutfs_ino(inode));
scoutfs_kvec_init(val, &sinode, sizeof(sinode));
ret = scoutfs_item_lookup_exact(sb, key, val, sizeof(sinode));
ret = scoutfs_item_lookup_exact(sb, &key, val, sizeof(sinode));
if (ret == 0)
load_inode(inode, &sinode);
@@ -269,16 +271,15 @@ int scoutfs_dirty_inode_item(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
struct scoutfs_inode_key ikey;
struct scoutfs_key_buf key;
struct scoutfs_inode sinode;
SCOUTFS_DECLARE_KVEC(key);
int ret;
store_inode(&sinode, inode);
set_inode_key(&ikey, scoutfs_ino(inode));
scoutfs_kvec_init(key, &ikey, sizeof(ikey));
init_inode_key(&key, &ikey, scoutfs_ino(inode));
ret = scoutfs_item_dirty(sb, key);
ret = scoutfs_item_dirty(sb, &key);
if (!ret)
trace_scoutfs_dirty_inode(inode);
return ret;
@@ -297,18 +298,17 @@ void scoutfs_update_inode_item(struct inode *inode)
{
struct super_block *sb = inode->i_sb;
struct scoutfs_inode_key ikey;
struct scoutfs_key_buf key;
struct scoutfs_inode sinode;
SCOUTFS_DECLARE_KVEC(key);
SCOUTFS_DECLARE_KVEC(val);
int err;
store_inode(&sinode, inode);
set_inode_key(&ikey, scoutfs_ino(inode));
scoutfs_kvec_init(key, &ikey, sizeof(ikey));
init_inode_key(&key, &ikey, scoutfs_ino(inode));
scoutfs_kvec_init(val, &sinode, sizeof(sinode));
err = scoutfs_item_update(sb, key, val);
err = scoutfs_item_update(sb, &key, val);
BUG_ON(err);
trace_scoutfs_update_inode(inode);
@@ -388,8 +388,8 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
{
struct scoutfs_inode_info *ci;
struct scoutfs_inode_key ikey;
struct scoutfs_key_buf key;
struct scoutfs_inode sinode;
SCOUTFS_DECLARE_KVEC(key);
SCOUTFS_DECLARE_KVEC(val);
struct inode *inode;
u64 ino;
@@ -419,11 +419,10 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
set_inode_ops(inode);
store_inode(&sinode, inode);
set_inode_key(&ikey, scoutfs_ino(inode));
scoutfs_kvec_init(key, &ikey, sizeof(ikey));
init_inode_key(&key, &ikey, scoutfs_ino(inode));
scoutfs_kvec_init(val, &sinode, sizeof(sinode));
ret = scoutfs_item_create(sb, key, val);
ret = scoutfs_item_create(sb, &key, val);
if (ret) {
iput(inode);
return ERR_PTR(ret);

View File

@@ -59,22 +59,24 @@ struct cached_item {
};
long dirty;
SCOUTFS_DECLARE_KVEC(key);
struct scoutfs_key_buf *key;
SCOUTFS_DECLARE_KVEC(val);
};
struct cached_range {
struct rb_node node;
SCOUTFS_DECLARE_KVEC(start);
SCOUTFS_DECLARE_KVEC(end);
struct scoutfs_key_buf *start;
struct scoutfs_key_buf *end;
};
/*
* Walk the item rbtree and return the item found and the next and
* prev items.
*/
static struct cached_item *walk_items(struct rb_root *root, struct kvec *key,
static struct cached_item *walk_items(struct rb_root *root,
struct scoutfs_key_buf *key,
struct cached_item **prev,
struct cached_item **next)
{
@@ -88,7 +90,7 @@ static struct cached_item *walk_items(struct rb_root *root, struct kvec *key,
while (node) {
item = container_of(node, struct cached_item, node);
cmp = scoutfs_kvec_memcmp(key, item->key);
cmp = scoutfs_key_compare(key, item->key);
if (cmp < 0) {
*next = item;
node = node->rb_left;
@@ -104,7 +106,8 @@ static struct cached_item *walk_items(struct rb_root *root, struct kvec *key,
}
static struct cached_item *find_item(struct super_block *sb,
struct rb_root *root, struct kvec *key)
struct rb_root *root,
struct scoutfs_key_buf *key)
{
struct cached_item *prev;
struct cached_item *next;
@@ -120,7 +123,8 @@ static struct cached_item *find_item(struct super_block *sb,
return item;
}
static struct cached_item *next_item(struct rb_root *root, struct kvec *key)
static struct cached_item *next_item(struct rb_root *root,
struct scoutfs_key_buf *key)
{
struct cached_item *prev;
struct cached_item *next;
@@ -234,7 +238,7 @@ static int insert_item(struct rb_root *root, struct cached_item *ins)
parent = *node;
item = container_of(*node, struct cached_item, node);
cmp = scoutfs_kvec_memcmp(ins->key, item->key);
cmp = scoutfs_key_compare(ins->key, item->key);
if (cmp < 0) {
if (ins->dirty)
item->dirty |= LEFT_DIRTY;
@@ -263,7 +267,8 @@ static int insert_item(struct rb_root *root, struct cached_item *ins)
* cached range.
*/
static bool check_range(struct super_block *sb, struct rb_root *root,
struct kvec *key, struct kvec *end)
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *end)
{
struct rb_node *node = root->rb_node;
struct cached_range *next = NULL;
@@ -273,34 +278,34 @@ static bool check_range(struct super_block *sb, struct rb_root *root,
while (node) {
rng = container_of(node, struct cached_range, node);
cmp = scoutfs_kvec_cmp_overlap(key, key,
rng->start, rng->end);
cmp = scoutfs_key_compare_ranges(key, key,
rng->start, rng->end);
if (cmp < 0) {
next = rng;
node = node->rb_left;
} else if (cmp > 0) {
node = node->rb_right;
} else {
scoutfs_kvec_memcpy_truncate(end, rng->end);
scoutfs_key_copy(end, rng->end);
scoutfs_inc_counter(sb, item_range_hit);
return true;
}
}
if (next)
scoutfs_kvec_memcpy_truncate(end, next->start);
scoutfs_key_copy(end, next->start);
else
scoutfs_kvec_set_max_key(end);
scoutfs_key_set_max(end);
scoutfs_inc_counter(sb, item_range_miss);
return false;
}
static void free_range(struct cached_range *rng)
static void free_range(struct super_block *sb, struct cached_range *rng)
{
if (!IS_ERR_OR_NULL(rng)) {
scoutfs_kvec_kfree(rng->start);
scoutfs_kvec_kfree(rng->end);
scoutfs_key_free(sb, rng->start);
scoutfs_key_free(sb, rng->end);
kfree(rng);
}
}
@@ -332,8 +337,8 @@ restart:
parent = *node;
rng = container_of(*node, struct cached_range, node);
cmp = scoutfs_kvec_cmp_overlap(ins->start, ins->end,
rng->start, rng->end);
cmp = scoutfs_key_compare_ranges(ins->start, ins->end,
rng->start, rng->end);
/* simple iteration until we overlap */
if (cmp < 0) {
node = &(*node)->rb_left;
@@ -343,24 +348,24 @@ restart:
continue;
}
start_cmp = scoutfs_kvec_memcmp(ins->start, rng->start);
end_cmp = scoutfs_kvec_memcmp(ins->end, rng->end);
start_cmp = scoutfs_key_compare(ins->start, rng->start);
end_cmp = scoutfs_key_compare(ins->end, rng->end);
/* free our insertion if we're entirely within an existing */
if (start_cmp >= 0 && end_cmp <= 0) {
free_range(ins);
free_range(sb, ins);
return;
}
/* expand to cover partial overlap before freeing */
if (start_cmp < 0 && end_cmp < 0)
scoutfs_kvec_swap(ins->end, rng->end);
swap(ins->end, rng->end);
else if (start_cmp > 0 && end_cmp > 0)
scoutfs_kvec_swap(ins->start, rng->start);
swap(ins->start, rng->start);
/* remove and free all overlaps and restart the descent */
rb_erase(&rng->node, root);
free_range(rng);
free_range(sb, rng);
goto restart;
}
@@ -373,25 +378,25 @@ restart:
* value vector. The amount of bytes copied is returned which can be 0
* or truncated if the caller's buffer isn't big enough.
*/
int scoutfs_item_lookup(struct super_block *sb, struct kvec *key,
int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key_buf *key,
struct kvec *val)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct item_cache *cac = sbi->item_cache;
SCOUTFS_DECLARE_KVEC(end);
struct scoutfs_key_buf *end;
struct cached_item *item;
unsigned long flags;
int ret;
trace_scoutfs_item_lookup(sb, key, val);
// trace_scoutfs_item_lookup(sb, key, val);
ret = scoutfs_kvec_alloc_key(end);
if (ret)
end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE);
if (!end) {
ret = -ENOMEM;
goto out;
}
do {
scoutfs_kvec_init_key(end);
spin_lock_irqsave(&cac->lock, flags);
item = find_item(sb, &cac->items, key);
@@ -407,7 +412,7 @@ int scoutfs_item_lookup(struct super_block *sb, struct kvec *key,
} while (ret == -ENODATA &&
(ret = scoutfs_manifest_read_items(sb, key, end)) == 0);
scoutfs_kvec_kfree(end);
scoutfs_key_free(sb, end);
out:
trace_printk("ret %d\n", ret);
return ret;
@@ -423,8 +428,9 @@ out:
*
* Returns 0 or -errno.
*/
int scoutfs_item_lookup_exact(struct super_block *sb, struct kvec *key,
struct kvec *val, int size)
int scoutfs_item_lookup_exact(struct super_block *sb,
struct scoutfs_key_buf *key, struct kvec *val,
int size)
{
int ret;
@@ -444,55 +450,51 @@ int scoutfs_item_lookup_exact(struct super_block *sb, struct kvec *key,
* -ENOENT is returned if there are no items between the given and last
* keys.
*
* The next item's key is copied to the caller's key. -ENOBUFS is
* returned if the item's key didn't fit in the caller's key.
* The next item's key is copied to the caller's key. The caller is
* responsible for dealing with key lengths and truncation.
*
* The next item's value is copied into the callers value. The number
* of value bytes copied is returned. The copied value can be truncated
* by the caller's value buffer length.
*/
int scoutfs_item_next(struct super_block *sb, struct kvec *key,
struct kvec *last, struct kvec *val)
int scoutfs_item_next(struct super_block *sb, struct scoutfs_key_buf *key,
struct scoutfs_key_buf *last, struct kvec *val)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct item_cache *cac = sbi->item_cache;
SCOUTFS_DECLARE_KVEC(read_start);
SCOUTFS_DECLARE_KVEC(read_end);
SCOUTFS_DECLARE_KVEC(range_end);
struct scoutfs_key_buf *read_start = NULL;
struct scoutfs_key_buf *read_end = NULL;
struct scoutfs_key_buf *range_end = NULL;
struct cached_item *item;
unsigned long flags;
bool cached;
int ret;
/* convenience to avoid searching if caller iterates past their last */
if (scoutfs_kvec_length(key) > scoutfs_kvec_length(last)) {
if (scoutfs_key_compare(key, last) > 0) {
ret = -ENOENT;
goto out;
}
ret = scoutfs_kvec_alloc_key(range_end);
if (ret)
read_start = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE);
read_end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE);
range_end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE);
if (!read_start || !read_end || !range_end) {
ret = -ENOMEM;
goto out;
}
spin_lock_irqsave(&cac->lock, flags);
for(;;) {
scoutfs_kvec_init_key(range_end);
/* see if we have a usable item in cache and before last */
cached = check_range(sb, &cac->ranges, key, range_end);
if (cached && (item = next_item(&cac->items, key)) &&
scoutfs_kvec_memcmp(item->key, range_end) <= 0 &&
scoutfs_kvec_memcmp(item->key, last) <= 0) {
scoutfs_key_compare(item->key, range_end) <= 0 &&
scoutfs_key_compare(item->key, last) <= 0) {
if (scoutfs_kvec_length(item->key) >
scoutfs_kvec_length(key)) {
ret = -ENOBUFS;
break;
}
scoutfs_kvec_memcpy_truncate(key, item->key);
scoutfs_key_copy(key, item->key);
if (val)
ret = scoutfs_kvec_memcpy(val, item->val);
else
@@ -502,13 +504,13 @@ int scoutfs_item_next(struct super_block *sb, struct kvec *key,
if (!cached) {
/* missing cache starts at key */
scoutfs_kvec_clone(read_start, key);
scoutfs_kvec_clone(read_end, range_end);
scoutfs_key_copy(read_start, key);
scoutfs_key_copy(read_end, range_end);
} else if (scoutfs_kvec_memcmp(range_end, last) < 0) {
} else if (scoutfs_key_compare(range_end, last) < 0) {
/* missing cache starts at range_end */
scoutfs_kvec_clone(read_start, range_end);
scoutfs_kvec_clone(read_end, last);
scoutfs_key_copy(read_start, range_end);
scoutfs_key_copy(read_end, last);
} else {
/* no items and we have cache between key and last */
@@ -526,9 +528,11 @@ int scoutfs_item_next(struct super_block *sb, struct kvec *key,
}
spin_unlock_irqrestore(&cac->lock, flags);
scoutfs_kvec_kfree(range_end);
out:
scoutfs_key_free(sb, read_start);
scoutfs_key_free(sb, read_end);
scoutfs_key_free(sb, range_end);
trace_printk("ret %d\n", ret);
return ret;
}
@@ -539,10 +543,12 @@ out:
* size mismatches as a sign of corruption. A found key larger than the
* found key buffer gives -ENOBUFS and is a sign of corruption.
*/
int scoutfs_item_next_same_min(struct super_block *sb, struct kvec *key,
struct kvec *last, struct kvec *val, int len)
int scoutfs_item_next_same_min(struct super_block *sb,
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *last,
struct kvec *val, int len)
{
int key_len = scoutfs_kvec_length(key);
int key_len = key->key_len;
int ret;
trace_printk("key len %u min val len %d\n", key_len, len);
@@ -551,8 +557,7 @@ int scoutfs_item_next_same_min(struct super_block *sb, struct kvec *key,
return -EINVAL;
ret = scoutfs_item_next(sb, key, last, val);
if (ret == -ENOBUFS ||
(ret >= 0 && (scoutfs_kvec_length(key) != key_len || ret < len)))
if (ret >= 0 && (key->key_len != key_len || ret < len))
ret = -EIO;
trace_printk("ret %d\n", ret);
@@ -560,10 +565,10 @@ int scoutfs_item_next_same_min(struct super_block *sb, struct kvec *key,
return ret;
}
static void free_item(struct cached_item *item)
static void free_item(struct super_block *sb, struct cached_item *item)
{
if (!IS_ERR_OR_NULL(item)) {
scoutfs_kvec_kfree(item->key);
scoutfs_key_free(sb, item->key);
scoutfs_kvec_kfree(item->val);
kfree(item);
}
@@ -591,7 +596,7 @@ static void mark_item_dirty(struct item_cache *cac,
item->dirty |= ITEM_DIRTY;
cac->nr_dirty_items++;
cac->dirty_key_bytes += scoutfs_kvec_length(item->key);
cac->dirty_key_bytes += item->key->key_len;
cac->dirty_val_bytes += scoutfs_kvec_length(item->val);
update_dirty_parents(item);
@@ -608,7 +613,7 @@ static void clear_item_dirty(struct item_cache *cac,
item->dirty &= ~ITEM_DIRTY;
cac->nr_dirty_items--;
cac->dirty_key_bytes -= scoutfs_kvec_length(item->key);
cac->dirty_key_bytes -= item->key->key_len;
cac->dirty_val_bytes -= scoutfs_kvec_length(item->val);
WARN_ON_ONCE(cac->nr_dirty_items < 0 || cac->dirty_key_bytes < 0 ||
@@ -617,15 +622,17 @@ static void clear_item_dirty(struct item_cache *cac,
update_dirty_parents(item);
}
static struct cached_item *alloc_item(struct kvec *key, struct kvec *val)
static struct cached_item *alloc_item(struct super_block *sb,
struct scoutfs_key_buf *key,
struct kvec *val)
{
struct cached_item *item;
item = kzalloc(sizeof(struct cached_item), GFP_NOFS);
if (item) {
if (scoutfs_kvec_dup_flatten(item->key, key) ||
scoutfs_kvec_dup_flatten(item->val, val)) {
free_item(item);
item->key = scoutfs_key_dup(sb, key);
if (!item->key || scoutfs_kvec_dup_flatten(item->val, val)) {
free_item(sb, item);
item = NULL;
}
}
@@ -639,7 +646,7 @@ static struct cached_item *alloc_item(struct kvec *key, struct kvec *val)
*
* XXX but it doesn't read.. is that weird? Seems weird.
*/
int scoutfs_item_create(struct super_block *sb, struct kvec *key,
int scoutfs_item_create(struct super_block *sb, struct scoutfs_key_buf *key,
struct kvec *val)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
@@ -648,7 +655,7 @@ int scoutfs_item_create(struct super_block *sb, struct kvec *key,
unsigned long flags;
int ret;
item = alloc_item(key, val);
item = alloc_item(sb, key, val);
if (!item)
return -ENOMEM;
@@ -661,7 +668,7 @@ int scoutfs_item_create(struct super_block *sb, struct kvec *key,
spin_unlock_irqrestore(&cac->lock, flags);
if (ret)
free_item(item);
free_item(sb, item);
return ret;
}
@@ -672,12 +679,12 @@ int scoutfs_item_create(struct super_block *sb, struct kvec *key,
* and we add with _tail to maintain that order.
*/
int scoutfs_item_add_batch(struct super_block *sb, struct list_head *list,
struct kvec *key, struct kvec *val)
struct scoutfs_key_buf *key, struct kvec *val)
{
struct cached_item *item;
int ret;
item = alloc_item(key, val);
item = alloc_item(sb, key, val);
if (item) {
list_add_tail(&item->entry, list);
ret = 0;
@@ -705,7 +712,8 @@ int scoutfs_item_add_batch(struct super_block *sb, struct list_head *list,
* that will be inserted.
*/
int scoutfs_item_insert_batch(struct super_block *sb, struct list_head *list,
struct kvec *start, struct kvec *end)
struct scoutfs_key_buf *start,
struct scoutfs_key_buf *end)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct item_cache *cac = sbi->item_cache;
@@ -715,18 +723,18 @@ int scoutfs_item_insert_batch(struct super_block *sb, struct list_head *list,
unsigned long flags;
int ret;
trace_scoutfs_item_insert_batch(sb, start, end);
// trace_scoutfs_item_insert_batch(sb, start, end);
if (WARN_ON_ONCE(scoutfs_kvec_memcmp(start, end) > 0))
if (WARN_ON_ONCE(scoutfs_key_compare(start, end) > 0))
return -EINVAL;
rng = kzalloc(sizeof(struct cached_range), GFP_NOFS);
if (rng && (scoutfs_kvec_dup_flatten(rng->start, start) ||
scoutfs_kvec_dup_flatten(rng->end, end))) {
free_range(rng);
rng = NULL;
if (rng) {
rng->start = scoutfs_key_dup(sb, start);
rng->end = scoutfs_key_dup(sb, end);
}
if (!rng) {
if (!rng || !rng->start || !rng->end) {
free_range(sb, rng);
ret = -ENOMEM;
goto out;
}
@@ -745,18 +753,18 @@ int scoutfs_item_insert_batch(struct super_block *sb, struct list_head *list,
ret = 0;
out:
scoutfs_item_free_batch(list);
scoutfs_item_free_batch(sb, list);
return ret;
}
void scoutfs_item_free_batch(struct list_head *list)
void scoutfs_item_free_batch(struct super_block *sb, struct list_head *list)
{
struct cached_item *item;
struct cached_item *tmp;
list_for_each_entry_safe(item, tmp, list, entry) {
list_del_init(&item->entry);
free_item(item);
free_item(sb, item);
}
}
@@ -765,22 +773,22 @@ void scoutfs_item_free_batch(struct list_head *list)
* If the item exists make sure it's dirty and pinned. It can be read
* if it wasn't cached. -ENOENT is returned if the item doesn't exist.
*/
int scoutfs_item_dirty(struct super_block *sb, struct kvec *key)
int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key_buf *key)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct item_cache *cac = sbi->item_cache;
SCOUTFS_DECLARE_KVEC(end);
struct scoutfs_key_buf *end;
struct cached_item *item;
unsigned long flags;
int ret;
ret = scoutfs_kvec_alloc_key(end);
if (ret)
end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE);
if (!end) {
ret = -ENOMEM;
goto out;
}
do {
scoutfs_kvec_init_key(end);
spin_lock_irqsave(&cac->lock, flags);
item = find_item(sb, &cac->items, key);
@@ -798,7 +806,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct kvec *key)
} while (ret == -ENODATA &&
(ret = scoutfs_manifest_read_items(sb, key, end)) == 0);
scoutfs_kvec_kfree(end);
scoutfs_key_free(sb, end);
out:
trace_printk("ret %d\n", ret);
return ret;
@@ -810,20 +818,22 @@ out:
*
* Returns -ENOENT if the item doesn't exist.
*/
int scoutfs_item_update(struct super_block *sb, struct kvec *key,
int scoutfs_item_update(struct super_block *sb, struct scoutfs_key_buf *key,
struct kvec *val)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct item_cache *cac = sbi->item_cache;
struct scoutfs_key_buf *end;
SCOUTFS_DECLARE_KVEC(up_val);
SCOUTFS_DECLARE_KVEC(end);
struct cached_item *item;
unsigned long flags;
int ret;
ret = scoutfs_kvec_alloc_key(end);
if (ret)
end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE);
if (!end) {
ret = -ENOMEM;
goto out;
}
if (val) {
ret = scoutfs_kvec_dup_flatten(up_val, val);
@@ -834,8 +844,6 @@ int scoutfs_item_update(struct super_block *sb, struct kvec *key,
}
do {
scoutfs_kvec_init_key(end);
spin_lock_irqsave(&cac->lock, flags);
item = find_item(sb, &cac->items, key);
@@ -855,7 +863,7 @@ int scoutfs_item_update(struct super_block *sb, struct kvec *key,
} while (ret == -ENODATA &&
(ret = scoutfs_manifest_read_items(sb, key, end)) == 0);
out:
scoutfs_kvec_kfree(end);
scoutfs_key_free(sb, end);
scoutfs_kvec_kfree(up_val);
trace_printk("ret %d\n", ret);
@@ -866,7 +874,7 @@ out:
* XXX how nice, it'd just creates a cached deletion item. It doesn't
* have to read.
*/
int scoutfs_item_delete(struct super_block *sb, struct kvec *key)
int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key_buf *key)
{
return WARN_ON_ONCE(-EINVAL);
}
@@ -931,33 +939,39 @@ static struct cached_item *next_dirty(struct cached_item *item)
return NULL;
}
/*
* The total number of bytes that will be stored in segments if we were
* to write out all the currently dirty items.
*
* XXX this isn't strictly correct because item's aren't of a uniform
* size. We might need more segments when large items leave gaps at the
* tail of each segment as it is filled with sorted items. It's close
* enough for now.
*/
long scoutfs_item_dirty_bytes(struct super_block *sb)
bool scoutfs_item_has_dirty(struct super_block *sb)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct item_cache *cac = sbi->item_cache;
unsigned long flags;
long bytes;
bool has;
spin_lock_irqsave(&cac->lock, flags);
bytes = (cac->nr_dirty_items * sizeof(struct scoutfs_segment_item)) +
cac->dirty_key_bytes + cac->dirty_val_bytes;
has = cac->nr_dirty_items != 0;
spin_unlock_irqrestore(&cac->lock, flags);
bytes += DIV_ROUND_UP(bytes, SCOUTFS_SEGMENT_SIZE) *
sizeof(struct scoutfs_segment_block);
return has;
}
return bytes;
/*
* Returns true if adding more items with the given count, keys, and values
* still fits in a single item along with the current dirty items.
*/
bool scoutfs_item_dirty_fits_single(struct super_block *sb, u32 nr_items,
u32 key_bytes, u32 val_bytes)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct item_cache *cac = sbi->item_cache;
unsigned long flags;
bool fits;
spin_lock_irqsave(&cac->lock, flags);
fits = scoutfs_seg_fits_single(nr_items + cac->nr_dirty_items,
key_bytes + cac->dirty_key_bytes,
val_bytes + cac->dirty_val_bytes);
spin_unlock_irqrestore(&cac->lock, flags);
return fits;
}
/*
@@ -968,24 +982,25 @@ static void count_seg_items(struct item_cache *cac, u32 *nr_items,
u32 *key_bytes)
{
struct cached_item *item;
u32 total;
u32 items = 0;
u32 keys = 0;
u32 vals = 0;
*nr_items = 0;
*key_bytes = 0;
total = sizeof(struct scoutfs_segment_block);
for (item = first_dirty(cac->items.rb_node); item;
item = next_dirty(item)) {
total += sizeof(struct scoutfs_segment_item) +
scoutfs_kvec_length(item->key) +
scoutfs_kvec_length(item->val);
items++;
keys += item->key->key_len;
vals += scoutfs_kvec_length(item->val);
if (total > SCOUTFS_SEGMENT_SIZE)
if (!scoutfs_seg_fits_single(items, keys, vals))
break;
(*nr_items)++;
(*key_bytes) += scoutfs_kvec_length(item->key);
*nr_items = items;
*key_bytes = keys;
}
}
@@ -1062,14 +1077,14 @@ void scoutfs_item_destroy(struct super_block *sb)
item = container_of(node, struct cached_item, node);
node = rb_next(node);
rb_erase(&item->node, &cac->items);
free_item(item);
free_item(sb, item);
}
for (node = rb_first(&cac->ranges); node; ) {
rng = container_of(node, struct cached_range, node);
node = rb_next(node);
rb_erase(&rng->node, &cac->items);
free_range(rng);
free_range(sb, rng);
}
kfree(cac);

View File

@@ -4,31 +4,38 @@
#include <linux/uio.h>
struct scoutfs_segment;
struct scoutfs_key_buf;
int scoutfs_item_lookup(struct super_block *sb, struct kvec *key,
int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key_buf *key,
struct kvec *val);
int scoutfs_item_lookup_exact(struct super_block *sb, struct kvec *key,
struct kvec *val, int size);
int scoutfs_item_next(struct super_block *sb, struct kvec *key,
struct kvec *last, struct kvec *val);
int scoutfs_item_next_same_min(struct super_block *sb, struct kvec *key,
struct kvec *last, struct kvec *val, int len);
int scoutfs_item_insert(struct super_block *sb, struct kvec *key,
int scoutfs_item_lookup_exact(struct super_block *sb,
struct scoutfs_key_buf *key, struct kvec *val,
int size);
int scoutfs_item_next(struct super_block *sb, struct scoutfs_key_buf *key,
struct scoutfs_key_buf *last, struct kvec *val);
int scoutfs_item_next_same_min(struct super_block *sb,
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *last,
struct kvec *val, int len);
int scoutfs_item_insert(struct super_block *sb, struct scoutfs_key_buf *key,
struct kvec *val);
int scoutfs_item_create(struct super_block *sb, struct kvec *key,
int scoutfs_item_create(struct super_block *sb, struct scoutfs_key_buf *key,
struct kvec *val);
int scoutfs_item_dirty(struct super_block *sb, struct kvec *key);
int scoutfs_item_update(struct super_block *sb, struct kvec *key,
int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key_buf *key);
int scoutfs_item_update(struct super_block *sb, struct scoutfs_key_buf *key,
struct kvec *val);
int scoutfs_item_delete(struct super_block *sb, struct kvec *key);
int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key_buf *key);
int scoutfs_item_add_batch(struct super_block *sb, struct list_head *list,
struct kvec *key, struct kvec *val);
struct scoutfs_key_buf *key, struct kvec *val);
int scoutfs_item_insert_batch(struct super_block *sb, struct list_head *list,
struct kvec *start, struct kvec *end);
void scoutfs_item_free_batch(struct list_head *list);
struct scoutfs_key_buf *start,
struct scoutfs_key_buf *end);
void scoutfs_item_free_batch(struct super_block *sb, struct list_head *list);
long scoutfs_item_dirty_bytes(struct super_block *sb);
bool scoutfs_item_has_dirty(struct super_block *sb);
bool scoutfs_item_dirty_fits_single(struct super_block *sb, u32 nr_items,
u32 key_bytes, u32 val_bytes);
int scoutfs_item_dirty_seg(struct super_block *sb, struct scoutfs_segment *seg);
int scoutfs_item_setup(struct super_block *sb);

92
kmod/src/key.c Normal file
View File

@@ -0,0 +1,92 @@
/*
* Copyright (C) 2017 Versity Software, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include "key.h"
struct scoutfs_key_buf *scoutfs_key_alloc(struct super_block *sb, u16 len)
{
struct scoutfs_key_buf *key;
if (WARN_ON_ONCE(len > SCOUTFS_MAX_KEY_SIZE))
return NULL;
key = kmalloc(sizeof(struct scoutfs_key_buf) + len, GFP_NOFS);
if (key) {
key->data = key + 1;
key->key_len = len;
key->buf_len = len;
}
return key;
}
struct scoutfs_key_buf *scoutfs_key_dup(struct super_block *sb,
struct scoutfs_key_buf *key)
{
struct scoutfs_key_buf *dup;
dup = scoutfs_key_alloc(sb, key->key_len);
if (dup)
memcpy(dup->data, key->data, dup->key_len);
return dup;
}
void scoutfs_key_free(struct super_block *sb, struct scoutfs_key_buf *key)
{
kfree(key);
}
/*
* Keys are large multi-byte big-endian values. To correctly increase
* or decrease keys we need to start by extending the key to the full
* precision using the max key size, setting the least significant bytes
* to 0.
*/
static void extend_zeros(struct scoutfs_key_buf *key)
{
if (key->key_len < SCOUTFS_MAX_KEY_SIZE &&
!WARN_ON_ONCE(key->buf_len != SCOUTFS_MAX_KEY_SIZE)) {
memset(key->data + key->key_len, 0,
key->buf_len - key->key_len);
key->key_len = key->buf_len;
}
}
void scoutfs_key_inc(struct scoutfs_key_buf *key)
{
u8 *bytes = key->data;
int i;
extend_zeros(key);
for (i = key->key_len - 1; i >= 0; i--) {
if (++bytes[i] != 0)
break;
}
}
void scoutfs_key_dec(struct scoutfs_key_buf *key)
{
u8 *bytes = key->data;
int i;
extend_zeros(key);
for (i = key->key_len - 1; i >= 0; i--) {
if (--bytes[i] != 255)
break;
}
}

View File

@@ -4,6 +4,120 @@
#include <linux/types.h>
#include "format.h"
struct scoutfs_key_buf {
void *data;
u16 key_len;
u16 buf_len;
};
struct scoutfs_key_buf *scoutfs_key_alloc(struct super_block *sb, u16 len);
struct scoutfs_key_buf *scoutfs_key_dup(struct super_block *sb,
struct scoutfs_key_buf *key);
void scoutfs_key_free(struct super_block *sb, struct scoutfs_key_buf *key);
void scoutfs_key_inc(struct scoutfs_key_buf *key);
void scoutfs_key_dec(struct scoutfs_key_buf *key);
/*
* Point the key buf, usually statically allocated, at an existing
* contiguous key stored elsewhere.
*/
static inline void scoutfs_key_init(struct scoutfs_key_buf *key,
void *data, u16 len)
{
WARN_ON_ONCE(len > SCOUTFS_MAX_KEY_SIZE);
key->data = data;
key->key_len = len;
key->buf_len = len;
}
/*
* Compare the fs keys in segment sort order.
*/
static inline int scoutfs_key_compare(struct scoutfs_key_buf *a,
struct scoutfs_key_buf *b)
{
return memcmp(a->data, b->data, min(a->key_len, b->key_len)) ?:
a->key_len < b->key_len ? -1 : a->key_len > b->key_len ? 1 : 0;
}
/*
* Compare ranges of keys where overlapping is equality. Returns:
* -1: a_end < b_start
* 1: a_start > b_end
* else 0: ranges overlap
*/
static inline int scoutfs_key_compare_ranges(struct scoutfs_key_buf *a_start,
struct scoutfs_key_buf *a_end,
struct scoutfs_key_buf *b_start,
struct scoutfs_key_buf *b_end)
{
return scoutfs_key_compare(a_end, b_start) < 0 ? -1 :
scoutfs_key_compare(a_start, b_end) > 0 ? 1 :
0;
}
/*
* Copy as much of the contents of the source buffer that fits into the
* dest buffer.
*/
static inline void scoutfs_key_copy(struct scoutfs_key_buf *dst,
struct scoutfs_key_buf *src)
{
dst->key_len = min(dst->buf_len, src->key_len);
memcpy(dst->data, src->data, dst->key_len);
}
/*
* Initialize the dst buffer to point to the source buffer in all ways,
* including the buf len. The contents of the buffer are shared by the
* fields describing the buffers are not.
*/
static inline void scoutfs_key_clone(struct scoutfs_key_buf *dst,
struct scoutfs_key_buf *src)
{
*dst = *src;
}
/*
* Memset as much of the length as fits in the buffer and set that to
* the new key length.
*/
static inline void scoutfs_key_memset(struct scoutfs_key_buf *key, int c,
u16 len)
{
if (WARN_ON_ONCE(len > SCOUTFS_MAX_KEY_SIZE))
return;
key->key_len = min(key->buf_len, len);
memset(key->data, c, key->key_len);
}
/*
* Set the contents of the buffer to the smallest possible key by sort
* order. It might be truncated if the buffer isn't large enough.
*/
static inline void scoutfs_key_set_min(struct scoutfs_key_buf *key)
{
scoutfs_key_memset(key, 0, sizeof(struct scoutfs_inode_key));
}
/*
* Set the contents of the buffer to the largest possible key by sort
* order. It might be truncated if the buffer isn't large enough.
*/
static inline void scoutfs_key_set_max(struct scoutfs_key_buf *key)
{
scoutfs_key_memset(key, 0xff, sizeof(struct scoutfs_inode_key));
}
/*
* What follows are the key functions for the small fixed size btree
* keys. It will all be removed once the callers are converted from
* the btree to the item cache.
*/
#define CKF "%llu.%u.%llu"
#define CKA(key) \
le64_to_cpu((key)->inode), (key)->type, le64_to_cpu((key)->offset)

View File

@@ -52,7 +52,7 @@ struct manifest {
/* calculated on mount, const thereafter */
u64 level_limits[SCOUTFS_MANIFEST_MAX_LEVEL + 1];
SCOUTFS_DECLARE_KVEC(compact_keys[SCOUTFS_MANIFEST_MAX_LEVEL + 1]);
struct scoutfs_key_buf *compact_keys[SCOUTFS_MANIFEST_MAX_LEVEL + 1];
};
#define DECLARE_MANIFEST(sb, name) \
@@ -75,16 +75,16 @@ struct manifest_ref {
struct scoutfs_segment *seg;
int found_ctr;
int pos;
u16 first_key_len;
u16 last_key_len;
u8 level;
u8 keys[0];
struct scoutfs_key_buf *first;
struct scoutfs_key_buf *last;
};
struct manifest_fill_args {
struct scoutfs_manifest_entry ment;
struct kvec *first;
struct kvec *last;
struct scoutfs_key_buf *first;
struct scoutfs_key_buf *last;
};
/*
@@ -93,41 +93,33 @@ struct manifest_fill_args {
*/
struct manifest_search_key {
u64 seq;
struct kvec *key;
struct scoutfs_key_buf *key;
u8 level;
};
static void init_ment_keys(struct scoutfs_manifest_entry *ment,
struct kvec *first, struct kvec *last)
struct scoutfs_key_buf *first,
struct scoutfs_key_buf *last)
{
if (first)
scoutfs_kvec_init(first, ment->keys,
le16_to_cpu(ment->first_key_len));
scoutfs_key_init(first, ment->keys,
le16_to_cpu(ment->first_key_len));
if (last)
scoutfs_kvec_init(last, ment->keys +
le16_to_cpu(ment->first_key_len),
le16_to_cpu(ment->last_key_len));
scoutfs_key_init(last, ment->keys +
le16_to_cpu(ment->first_key_len),
le16_to_cpu(ment->last_key_len));
}
static void init_ref_keys(struct manifest_ref *ref, struct kvec *first,
struct kvec *last)
{
if (first)
scoutfs_kvec_init(first, ref->keys, ref->first_key_len);
if (last)
scoutfs_kvec_init(last, ref->keys + ref->first_key_len,
ref->last_key_len);
}
static bool cmp_range_ment(struct kvec *key, struct kvec *end,
static bool cmp_range_ment(struct scoutfs_key_buf *key,
struct scoutfs_key_buf *end,
struct scoutfs_manifest_entry *ment)
{
SCOUTFS_DECLARE_KVEC(first);
SCOUTFS_DECLARE_KVEC(last);
struct scoutfs_key_buf first;
struct scoutfs_key_buf last;
init_ment_keys(ment, first, last);
init_ment_keys(ment, &first, &last);
return scoutfs_kvec_cmp_overlap(key, end, first, last);
return scoutfs_key_compare_ranges(key, end, &first, &last);
}
static u64 get_level_count(struct manifest *mani,
@@ -187,8 +179,10 @@ static void add_level_count(struct super_block *sb, struct manifest *mani,
*
* This must be called with the manifest lock held.
*/
int scoutfs_manifest_add(struct super_block *sb, struct kvec *first,
struct kvec *last, u64 segno, u64 seq, u8 level)
int scoutfs_manifest_add(struct super_block *sb,
struct scoutfs_key_buf *first,
struct scoutfs_key_buf *last, u64 segno, u64 seq,
u8 level)
{
DECLARE_MANIFEST(sb, mani);
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
@@ -200,15 +194,15 @@ int scoutfs_manifest_add(struct super_block *sb, struct kvec *first,
unsigned bytes;
int ret;
trace_scoutfs_manifest_add(sb, first, last, segno, seq, level);
// trace_scoutfs_manifest_add(sb, first, last, segno, seq, level);
key_bytes = scoutfs_kvec_length(first) + scoutfs_kvec_length(last);
key_bytes = first->key_len + last->key_len;
bytes = offsetof(struct scoutfs_manifest_entry, keys[key_bytes]);
args.ment.segno = cpu_to_le64(segno);
args.ment.seq = cpu_to_le64(seq);
args.ment.first_key_len = cpu_to_le16(scoutfs_kvec_length(first));
args.ment.last_key_len = cpu_to_le16(scoutfs_kvec_length(last));
args.ment.first_key_len = cpu_to_le16(first->key_len);
args.ment.last_key_len = cpu_to_le16(last->key_len);
args.ment.level = level;
args.first = first;
@@ -233,8 +227,8 @@ int scoutfs_manifest_add(struct super_block *sb, struct kvec *first,
/*
* This must be called with the manifest lock held.
*/
int scoutfs_manifest_dirty(struct super_block *sb, struct kvec *first, u64 seq,
u8 level)
int scoutfs_manifest_dirty(struct super_block *sb,
struct scoutfs_key_buf *first, u64 seq, u8 level)
{
DECLARE_MANIFEST(sb, mani);
struct scoutfs_manifest_entry *ment;
@@ -255,8 +249,8 @@ int scoutfs_manifest_dirty(struct super_block *sb, struct kvec *first, u64 seq,
/*
* This must be called with the manifest lock held.
*/
int scoutfs_manifest_del(struct super_block *sb, struct kvec *first, u64 seq,
u8 level)
int scoutfs_manifest_del(struct super_block *sb, struct scoutfs_key_buf *first,
u64 seq, u8 level)
{
DECLARE_MANIFEST(sb, mani);
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
@@ -299,41 +293,43 @@ int scoutfs_manifest_unlock(struct super_block *sb)
return 0;
}
static int alloc_add_ref(struct list_head *list,
static void free_ref(struct super_block *sb, struct manifest_ref *ref)
{
if (!IS_ERR_OR_NULL(ref)) {
WARN_ON_ONCE(!list_empty(&ref->entry));
scoutfs_seg_put(ref->seg);
scoutfs_key_free(sb, ref->first);
scoutfs_key_free(sb, ref->last);
kfree(ref);
}
}
static int alloc_add_ref(struct super_block *sb, struct list_head *list,
struct scoutfs_manifest_entry *ment)
{
SCOUTFS_DECLARE_KVEC(ment_first);
SCOUTFS_DECLARE_KVEC(ment_last);
SCOUTFS_DECLARE_KVEC(first);
SCOUTFS_DECLARE_KVEC(last);
struct scoutfs_key_buf ment_first;
struct scoutfs_key_buf ment_last;
struct manifest_ref *ref;
unsigned bytes;
init_ment_keys(ment, ment_first, ment_last);
init_ment_keys(ment, &ment_first, &ment_last);
bytes = scoutfs_kvec_length(ment_first) +
scoutfs_kvec_length(ment_first);
ref = kmalloc(offsetof(struct manifest_ref, keys[bytes]), GFP_NOFS);
if (!ref)
ref = kzalloc(sizeof(struct manifest_ref), GFP_NOFS);
if (ref) {
ref->first = scoutfs_key_dup(sb, &ment_first);
ref->last = scoutfs_key_dup(sb, &ment_last);
}
if (!ref || !ref->first || !ref->last) {
free_ref(sb, ref);
return -ENOMEM;
memset(ref, 0, offsetof(struct manifest_ref, keys));
}
ref->segno = le64_to_cpu(ment->segno);
ref->seq = le64_to_cpu(ment->seq);
ref->level = ment->level;
ref->first_key_len = le16_to_cpu(ment->first_key_len);
ref->last_key_len = le16_to_cpu(ment->last_key_len);
init_ref_keys(ref, first, last);
scoutfs_kvec_memcpy(first, ment_first);
scoutfs_kvec_memcpy(last, ment_last);
list_add_tail(&ref->entry, list);
return 0;
}
/*
@@ -349,13 +345,14 @@ static int alloc_add_ref(struct list_head *list,
* segment starting with the key.
*/
static int get_range_refs(struct super_block *sb, struct manifest *mani,
struct kvec *key, struct kvec *end,
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *end,
struct list_head *ref_list)
{
struct scoutfs_manifest_entry *ment;
struct manifest_search_key skey;
SCOUTFS_DECLARE_KVEC(first);
SCOUTFS_DECLARE_KVEC(last);
struct scoutfs_key_buf first;
struct scoutfs_key_buf last;
struct manifest_ref *ref;
struct manifest_ref *tmp;
int ret;
@@ -369,7 +366,7 @@ static int get_range_refs(struct super_block *sb, struct manifest *mani,
ment = scoutfs_treap_lookup_prev(mani->treap, &skey);
while (!IS_ERR_OR_NULL(ment)) {
if (cmp_range_ment(key, end, ment) == 0) {
ret = alloc_add_ref(ref_list, ment);
ret = alloc_add_ref(sb, ref_list, ment);
if (ret)
goto out;
}
@@ -396,8 +393,8 @@ static int get_range_refs(struct super_block *sb, struct manifest *mani,
}
if (ment) {
init_ment_keys(ment, first, last);
ret = alloc_add_ref(ref_list, ment);
init_ment_keys(ment, &first, &last);
ret = alloc_add_ref(sb, ref_list, ment);
if (ret)
goto out;
}
@@ -411,7 +408,7 @@ out:
if (ret) {
list_for_each_entry_safe(ref, tmp, ref_list, entry) {
list_del_init(&ref->entry);
kfree(ref);
free_ref(sb, ref);
}
}
@@ -446,16 +443,17 @@ out:
*/
#define MAX_ITEMS_READ 32
int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key,
struct kvec *end)
int scoutfs_manifest_read_items(struct super_block *sb,
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *end)
{
DECLARE_MANIFEST(sb, mani);
SCOUTFS_DECLARE_KVEC(item_key);
struct scoutfs_key_buf item_key;
struct scoutfs_key_buf found_key;
struct scoutfs_key_buf batch_end;
struct scoutfs_key_buf seg_end;
SCOUTFS_DECLARE_KVEC(item_val);
SCOUTFS_DECLARE_KVEC(found_key);
SCOUTFS_DECLARE_KVEC(found_val);
SCOUTFS_DECLARE_KVEC(batch_end);
SCOUTFS_DECLARE_KVEC(seg_end);
struct scoutfs_segment *seg;
struct manifest_ref *ref;
struct manifest_ref *tmp;
@@ -486,7 +484,7 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key,
ref->seg = seg;
}
/* wait for submitted segments and search for starting pos */
/* always wait for submitted segments */
list_for_each_entry(ref, &ref_list, entry) {
if (!ref->seg)
break;
@@ -494,15 +492,29 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key,
err = scoutfs_seg_wait(sb, ref->seg);
if (err && !ret)
ret = err;
if (ret == 0)
ref->pos = scoutfs_seg_find_pos(ref->seg, key);
}
if (ret)
goto out;
scoutfs_kvec_init_null(batch_end);
scoutfs_kvec_init_null(seg_end);
/* start from the next item from the key in each segment */
list_for_each_entry(ref, &ref_list, entry)
ref->pos = scoutfs_seg_find_pos(ref->seg, key);
/*
* Find the greatest range we can cover if we walk all the
* segments. We only have level 0 segments for the missing
* range so that's the greatest. Then we shrink the range by
* the limit of each higher level segment that intersected with
* our starting key.
*/
scoutfs_key_clone(&seg_end, end);
list_for_each_entry(ref, &ref_list, entry) {
if (ref->level > 0 &&
scoutfs_key_compare(ref->last, &seg_end) < 0) {
scoutfs_key_clone(&seg_end, ref->last);
}
}
found_ctr = 0;
for (n = 0; n < MAX_ITEMS_READ; n++) {
@@ -512,37 +524,26 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key,
/* find the next least key from the pos in each segment */
list_for_each_entry_safe(ref, tmp, &ref_list, entry) {
if (ref->pos == -1)
continue;
/*
* Check the next item in the segment. We're
* done with the segment if there are no more
* items or if the next item is past the
* caller's end. We record either the caller's
* end or the segment end if it's a l1+ segment for
* use as the batch end if we don't see more items.
* caller's end.
*/
ret = scoutfs_seg_item_kvecs(ref->seg, ref->pos,
item_key, item_val);
if (ret < 0) {
if (ref->level > 0) {
init_ref_keys(ref, NULL, item_key);
scoutfs_kvec_clone_less(seg_end,
item_key);
}
} else if (scoutfs_kvec_memcmp(item_key, end) > 0) {
scoutfs_kvec_clone_less(seg_end, end);
ret = -ENOENT;
}
if (ret < 0) {
list_del_init(&ref->entry);
scoutfs_seg_put(ref->seg);
kfree(ref);
ret = scoutfs_seg_item_ptrs(ref->seg, ref->pos,
&item_key, item_val);
if (ret < 0 || scoutfs_key_compare(&item_key, end) > 0){
ref->pos = -1;
continue;
}
/* see if it's the new least item */
if (found) {
cmp = scoutfs_kvec_memcmp(item_key, found_key);
cmp = scoutfs_key_compare(&item_key,
&found_key);
if (cmp >= 0) {
if (cmp == 0)
ref->found_ctr = found_ctr;
@@ -551,7 +552,7 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key,
}
/* remember new least key */
scoutfs_kvec_clone(found_key, item_key);
scoutfs_key_clone(&found_key, &item_key);
scoutfs_kvec_clone(found_val, item_val);
ref->found_ctr = ++found_ctr;
found = true;
@@ -559,7 +560,7 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key,
/* ran out of keys in segs, range extends to seg end */
if (!found) {
scoutfs_kvec_clone(batch_end, seg_end);
scoutfs_key_clone(&batch_end, &seg_end);
ret = 0;
break;
}
@@ -569,18 +570,18 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key,
* have items it's not a failure and the end of the cached
* range is the last successfully added item.
*/
ret = scoutfs_item_add_batch(sb, &batch, found_key, found_val);
ret = scoutfs_item_add_batch(sb, &batch, &found_key, found_val);
if (ret) {
if (n > 0)
ret = 0;
break;
}
/* the last successful key determines the range */
scoutfs_kvec_clone(batch_end, found_key);
/* the last successful key determines range end until run out */
scoutfs_key_clone(&batch_end, &found_key);
/* if we just saw the end key then we're done */
if (scoutfs_kvec_memcmp(found_key, end) == 0) {
if (scoutfs_key_compare(&found_key, end) == 0) {
ret = 0;
break;
}
@@ -595,14 +596,13 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key,
}
if (ret)
scoutfs_item_free_batch(&batch);
scoutfs_item_free_batch(sb, &batch);
else
ret = scoutfs_item_insert_batch(sb, &batch, key, batch_end);
ret = scoutfs_item_insert_batch(sb, &batch, key, &batch_end);
out:
list_for_each_entry_safe(ref, tmp, &ref_list, entry) {
list_del_init(&ref->entry);
scoutfs_seg_put(ref->seg);
kfree(ref);
free_ref(sb, ref);
}
return ret;
@@ -677,10 +677,10 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data)
struct scoutfs_manifest_entry *ment;
struct scoutfs_manifest_entry *over;
struct manifest_search_key skey;
SCOUTFS_DECLARE_KVEC(ment_first);
SCOUTFS_DECLARE_KVEC(ment_last);
SCOUTFS_DECLARE_KVEC(over_first);
SCOUTFS_DECLARE_KVEC(over_last);
struct scoutfs_key_buf ment_first;
struct scoutfs_key_buf ment_last;
struct scoutfs_key_buf over_first;
struct scoutfs_key_buf over_last;
int level;
int ret;
int i;
@@ -710,9 +710,7 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data)
skey.seq = 0;
ment = scoutfs_treap_lookup_next(mani->treap, &skey);
if (ment == NULL || ment->level != level) {
/* XXX ugh, these kvecs are the worst */
scoutfs_kvec_init(skey.key,
skey.key[0].iov_base, 0);
scoutfs_key_set_min(skey.key);
ment = scoutfs_treap_lookup_next(mani->treap, &skey);
}
}
@@ -726,17 +724,17 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data)
goto out;
}
init_ment_keys(ment, ment_first, ment_last);
init_ment_keys(ment, &ment_first, &ment_last);
/* add the upper input segment */
ret = scoutfs_compact_add(sb, data, ment_first, ment_last,
ret = scoutfs_compact_add(sb, data, &ment_first, &ment_last,
le64_to_cpu(ment->segno),
le64_to_cpu(ment->seq), level);
if (ret)
goto out;
/* start with the first overlapping at the next level */
skey.key = ment_first;
skey.key = &ment_first;
skey.level = level + 1;
skey.seq = 0;
over = scoutfs_treap_lookup(mani->treap, &skey);
@@ -750,13 +748,13 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data)
if (!over || over->level != (ment->level + 1))
break;
init_ment_keys(over, over_first, over_last);
init_ment_keys(over, &over_first, &over_last);
if (scoutfs_kvec_cmp_overlap(ment_first, ment_last,
over_first, over_last) != 0)
if (scoutfs_key_compare_ranges(&ment_first, &ment_last,
&over_first, &over_last) != 0)
break;
ret = scoutfs_compact_add(sb, data, over_first, over_last,
ret = scoutfs_compact_add(sb, data, &over_first, &over_last,
le64_to_cpu(over->segno),
le64_to_cpu(over->seq), level + 1);
if (ret)
@@ -765,10 +763,9 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data)
over = scoutfs_treap_next(mani->treap, over);
}
/* record the next key to start from, not exact */
scoutfs_kvec_init_key(mani->compact_keys[level]);
scoutfs_kvec_memcpy_truncate(mani->compact_keys[level], ment_last);
scoutfs_kvec_be_inc(mani->compact_keys[level]);
/* record the next key to start from */
scoutfs_key_copy(mani->compact_keys[level], &ment_last);
scoutfs_key_inc(mani->compact_keys[level]);
ret = 0;
out:
@@ -800,8 +797,8 @@ static int manifest_treap_compare(void *key, void *data)
{
struct manifest_search_key *skey = key;
struct scoutfs_manifest_entry *ment = data;
SCOUTFS_DECLARE_KVEC(first);
SCOUTFS_DECLARE_KVEC(last);
struct scoutfs_key_buf first;
struct scoutfs_key_buf last;
int cmp;
if (skey->level < ment->level) {
@@ -818,13 +815,13 @@ static int manifest_treap_compare(void *key, void *data)
goto out;
}
init_ment_keys(ment, first, last);
init_ment_keys(ment, &first, &last);
if (skey->seq == 0) {
cmp = scoutfs_kvec_cmp_overlap(skey->key, skey->key,
first, last);
cmp = scoutfs_key_compare_ranges(skey->key, skey->key,
&first, &last);
} else {
cmp = scoutfs_kvec_memcmp(skey->key, first) ?:
cmp = scoutfs_key_compare(skey->key, &first) ?:
scoutfs_cmp_u64s(skey->seq, le64_to_cpu(ment->seq));
}
@@ -836,14 +833,14 @@ static void manifest_treap_fill(void *data, void *arg)
{
struct scoutfs_manifest_entry *ment = data;
struct manifest_fill_args *args = arg;
SCOUTFS_DECLARE_KVEC(ment_first);
SCOUTFS_DECLARE_KVEC(ment_last);
struct scoutfs_key_buf ment_first;
struct scoutfs_key_buf ment_last;
*ment = args->ment;
init_ment_keys(ment, ment_first, ment_last);
scoutfs_kvec_memcpy(ment_first, args->first);
scoutfs_kvec_memcpy(ment_last, args->last);
init_ment_keys(ment, &ment_first, &ment_last);
scoutfs_key_copy(&ment_first, args->first);
scoutfs_key_copy(&ment_last, args->last);
}
static struct scoutfs_treap_ops manifest_treap_ops = {
@@ -858,7 +855,6 @@ int scoutfs_manifest_setup(struct super_block *sb)
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_super_block *super = &sbi->super;
struct manifest *mani;
int ret;
int i;
mani = kzalloc(sizeof(struct manifest), GFP_KERNEL);
@@ -876,14 +872,17 @@ int scoutfs_manifest_setup(struct super_block *sb)
}
for (i = 0; i < ARRAY_SIZE(mani->compact_keys); i++) {
ret = scoutfs_kvec_alloc_key(mani->compact_keys[i]);
if (ret) {
mani->compact_keys[i] = scoutfs_key_alloc(sb,
SCOUTFS_MAX_KEY_SIZE);
if (!mani->compact_keys[i]) {
while (--i >= 0)
scoutfs_kvec_kfree(mani->compact_keys[i]);
scoutfs_key_free(sb, mani->compact_keys[i]);
scoutfs_treap_free(mani->treap);
kfree(mani);
return -ENOMEM;
}
scoutfs_key_set_min(mani->compact_keys[i]);
}
for (i = ARRAY_SIZE(super->manifest.level_counts) - 1; i >= 0; i--) {
@@ -915,7 +914,7 @@ void scoutfs_manifest_destroy(struct super_block *sb)
if (mani) {
scoutfs_treap_free(mani->treap);
for (i = 0; i < ARRAY_SIZE(mani->compact_keys); i++)
scoutfs_kvec_kfree(mani->compact_keys[i]);
scoutfs_key_free(sb, mani->compact_keys[i]);
kfree(mani);
}
}

View File

@@ -1,20 +1,25 @@
#ifndef _SCOUTFS_MANIFEST_H_
#define _SCOUTFS_MANIFEST_H_
int scoutfs_manifest_add(struct super_block *sb, struct kvec *first,
struct kvec *last, u64 segno, u64 seq, u8 level);
int scoutfs_manifest_dirty(struct super_block *sb, struct kvec *first, u64 seq,
u8 level);
int scoutfs_manifest_del(struct super_block *sb, struct kvec *first, u64 seq,
struct scoutfs_key_buf;
int scoutfs_manifest_add(struct super_block *sb,
struct scoutfs_key_buf *first,
struct scoutfs_key_buf *last, u64 segno, u64 seq,
u8 level);
int scoutfs_manifest_dirty(struct super_block *sb,
struct scoutfs_key_buf *first, u64 seq, u8 level);
int scoutfs_manifest_del(struct super_block *sb, struct scoutfs_key_buf *first,
u64 seq, u8 level);
int scoutfs_manifest_has_dirty(struct super_block *sb);
int scoutfs_manifest_dirty_ring(struct super_block *sb);
int scoutfs_manifest_lock(struct super_block *sb);
int scoutfs_manifest_unlock(struct super_block *sb);
int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key,
struct kvec *until);
int scoutfs_manifest_read_items(struct super_block *sb,
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *end);
u64 scoutfs_manifest_level_count(struct super_block *sb, u8 level);
int scoutfs_manifest_next_compact(struct super_block *sb, void *data);

View File

@@ -24,6 +24,7 @@
#include "cmp.h"
#include "manifest.h"
#include "alloc.h"
#include "key.h"
/*
* seg.c should just be about the cache and io, and maybe
@@ -339,7 +340,7 @@ static void *off_ptr(struct scoutfs_segment *seg, u32 off)
return page_address(seg->pages[pg]) + pg_off;
}
static u32 pos_off(struct scoutfs_segment *seg, u32 pos)
static u32 pos_off(u32 pos)
{
/* items need of be a power of two */
BUILD_BUG_ON(!is_power_of_2(sizeof(struct scoutfs_segment_item)));
@@ -352,7 +353,7 @@ static u32 pos_off(struct scoutfs_segment *seg, u32 pos)
static void *pos_ptr(struct scoutfs_segment *seg, u32 pos)
{
return off_ptr(seg, pos_off(seg, pos));
return off_ptr(seg, pos_off(pos));
}
/*
@@ -416,8 +417,8 @@ static void kvec_from_pages(struct scoutfs_segment *seg,
off_ptr(seg, off + first), len - first);
}
int scoutfs_seg_item_kvecs(struct scoutfs_segment *seg, int pos,
struct kvec *key, struct kvec *val)
int scoutfs_seg_item_ptrs(struct scoutfs_segment *seg, int pos,
struct scoutfs_key_buf *key, struct kvec *val)
{
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
struct native_item item;
@@ -428,7 +429,7 @@ int scoutfs_seg_item_kvecs(struct scoutfs_segment *seg, int pos,
load_item(seg, pos, &item);
if (key)
kvec_from_pages(seg, key, item.key_off, item.key_len);
scoutfs_key_init(key, off_ptr(seg, item.key_off), item.key_len);
if (val)
kvec_from_pages(seg, val, item.val_off, item.val_len);
@@ -440,10 +441,11 @@ int scoutfs_seg_item_kvecs(struct scoutfs_segment *seg, int pos,
* This can return the number of positions if the key is greater than
* all the keys.
*/
static int find_key_pos(struct scoutfs_segment *seg, struct kvec *search)
static int find_key_pos(struct scoutfs_segment *seg,
struct scoutfs_key_buf *search)
{
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
SCOUTFS_DECLARE_KVEC(key);
struct scoutfs_key_buf key;
unsigned int start = 0;
unsigned int end = le32_to_cpu(sblk->nr_items);
unsigned int pos = 0;
@@ -451,9 +453,9 @@ static int find_key_pos(struct scoutfs_segment *seg, struct kvec *search)
while (start < end) {
pos = start + (end - start) / 2;
scoutfs_seg_item_kvecs(seg, pos, key, NULL);
scoutfs_seg_item_ptrs(seg, pos, &key, NULL);
cmp = scoutfs_kvec_memcmp(search, key);
cmp = scoutfs_key_compare(search, &key);
if (cmp < 0)
end = pos;
else if (cmp > 0)
@@ -465,11 +467,51 @@ static int find_key_pos(struct scoutfs_segment *seg, struct kvec *search)
return pos;
}
int scoutfs_seg_find_pos(struct scoutfs_segment *seg, struct kvec *key)
int scoutfs_seg_find_pos(struct scoutfs_segment *seg,
struct scoutfs_key_buf *key)
{
return find_key_pos(seg, key);
}
/*
* Keys are aligned to the next block boundary if they'd cross a block
* boundary. To find the first value offset we have to assume that
* there will be a worst case key alignment at every block boundary.
*/
static u32 first_val_off(u32 nr_items, u32 key_bytes)
{
u32 key_padding = SCOUTFS_MAX_KEY_SIZE - 1;
u32 partial_block = SCOUTFS_BLOCK_SIZE - key_padding;
u32 first_key_off = pos_off(nr_items);
u32 block_off = first_key_off & SCOUTFS_BLOCK_MASK;
u32 total_padding = ((block_off + key_bytes) / partial_block) *
key_padding;
return first_key_off + key_bytes + total_padding;
}
/*
* Returns true if the given number of items with the given total byte
* counts of keys and values fits inside a single segment.
*/
bool scoutfs_seg_fits_single(u32 nr_items, u32 key_bytes, u32 val_bytes)
{
return (first_val_off(nr_items, key_bytes) + val_bytes)
<= SCOUTFS_SEGMENT_SIZE;
}
static u32 align_key_off(struct scoutfs_segment *seg, u32 key_off, u32 len)
{
u32 space = SCOUTFS_BLOCK_SIZE - (key_off & SCOUTFS_BLOCK_MASK);
if (len > space) {
memset(off_ptr(seg, key_off), 0, space);
return key_off + space;
}
return key_off;
}
/*
* Store the first item in the segment. The caller knows the number
* of items and bytes of keys that determine where the keys and values
@@ -478,14 +520,14 @@ int scoutfs_seg_find_pos(struct scoutfs_segment *seg, struct kvec *key)
* This should never fail because any item must always fit in a segment.
*/
void scoutfs_seg_first_item(struct super_block *sb, struct scoutfs_segment *seg,
struct kvec *key, struct kvec *val,
struct scoutfs_key_buf *key, struct kvec *val,
unsigned int nr_items, unsigned int key_bytes)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_super_block *super = &sbi->super;
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
struct native_item item;
SCOUTFS_DECLARE_KVEC(item_key);
struct scoutfs_key_buf item_key;
SCOUTFS_DECLARE_KVEC(item_val);
u32 key_off;
u32 val_off;
@@ -495,31 +537,33 @@ void scoutfs_seg_first_item(struct super_block *sb, struct scoutfs_segment *seg,
sblk->seq = super->next_seg_seq;
le64_add_cpu(&super->next_seg_seq, 1);
key_off = pos_off(seg, nr_items);
val_off = key_off + key_bytes;
key_off = align_key_off(seg, pos_off(nr_items), key->key_len);
val_off = first_val_off(nr_items, key_bytes);
sblk->nr_items = cpu_to_le32(1);
trace_printk("first item offs key %u val %u\n", key_off, val_off);
item.seq = 1;
item.key_off = key_off;
item.val_off = val_off;
item.key_len = scoutfs_kvec_length(key);
item.key_len = key->key_len;
item.val_len = scoutfs_kvec_length(val);
store_item(seg, 0, &item);
scoutfs_seg_item_kvecs(seg, 0, item_key, item_val);
scoutfs_kvec_memcpy(item_key, key);
scoutfs_seg_item_ptrs(seg, 0, &item_key, item_val);
scoutfs_key_copy(&item_key, key);
scoutfs_kvec_memcpy(item_val, val);
}
void scoutfs_seg_append_item(struct super_block *sb,
struct scoutfs_segment *seg,
struct kvec *key, struct kvec *val)
struct scoutfs_key_buf *key, struct kvec *val)
{
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
struct native_item item;
struct native_item prev;
SCOUTFS_DECLARE_KVEC(item_key);
struct scoutfs_key_buf item_key;
SCOUTFS_DECLARE_KVEC(item_val);
u32 pos;
@@ -529,14 +573,18 @@ void scoutfs_seg_append_item(struct super_block *sb,
load_item(seg, pos - 1, &prev);
item.seq = 1;
item.key_off = prev.key_off + prev.key_len;
item.key_len = scoutfs_kvec_length(key);
item.key_off = align_key_off(seg, prev.key_off + prev.key_len,
key->key_len);
item.key_len = key->key_len;
item.val_off = prev.val_off + prev.val_len;
item.val_len = scoutfs_kvec_length(val);
store_item(seg, pos, &item);
scoutfs_seg_item_kvecs(seg, pos, item_key, item_val);
scoutfs_kvec_memcpy(item_key, key);
trace_printk("item %u offs key %u val %u\n",
pos, item.key_off, item.val_off);
scoutfs_seg_item_ptrs(seg, pos, &item_key, item_val);
scoutfs_key_copy(&item_key, key);
scoutfs_kvec_memcpy(item_val, val);
}
@@ -548,16 +596,16 @@ int scoutfs_seg_manifest_add(struct super_block *sb,
{
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
struct native_item item;
SCOUTFS_DECLARE_KVEC(first);
SCOUTFS_DECLARE_KVEC(last);
struct scoutfs_key_buf first;
struct scoutfs_key_buf last;
load_item(seg, 0, &item);
kvec_from_pages(seg, first, item.key_off, item.key_len);
scoutfs_key_init(&first, off_ptr(seg, item.key_off), item.key_len);
load_item(seg, le32_to_cpu(sblk->nr_items) - 1, &item);
kvec_from_pages(seg, last, item.key_off, item.key_len);
scoutfs_key_init(&last, off_ptr(seg, item.key_off), item.key_len);
return scoutfs_manifest_add(sb, first, last, le64_to_cpu(sblk->segno),
return scoutfs_manifest_add(sb, &first, &last, le64_to_cpu(sblk->segno),
le64_to_cpu(sblk->seq), level);
}
@@ -566,12 +614,12 @@ int scoutfs_seg_manifest_del(struct super_block *sb,
{
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
struct native_item item;
SCOUTFS_DECLARE_KVEC(first);
struct scoutfs_key_buf first;
load_item(seg, 0, &item);
kvec_from_pages(seg, first, item.key_off, item.key_len);
scoutfs_key_init(&first, off_ptr(seg, item.key_off), item.key_len);
return scoutfs_manifest_del(sb, first, le64_to_cpu(sblk->seq), level);
return scoutfs_manifest_del(sb, &first, le64_to_cpu(sblk->seq), level);
}
int scoutfs_seg_setup(struct super_block *sb)

View File

@@ -3,15 +3,17 @@
struct scoutfs_bio_completion;
struct scoutfs_segment;
struct scoutfs_key_buf;
struct kvec;
struct scoutfs_segment *scoutfs_seg_submit_read(struct super_block *sb,
u64 segno);
int scoutfs_seg_wait(struct super_block *sb, struct scoutfs_segment *seg);
int scoutfs_seg_find_pos(struct scoutfs_segment *seg, struct kvec *key);
int scoutfs_seg_item_kvecs(struct scoutfs_segment *seg, int pos,
struct kvec *key, struct kvec *val);
int scoutfs_seg_find_pos(struct scoutfs_segment *seg,
struct scoutfs_key_buf *key);
int scoutfs_seg_item_ptrs(struct scoutfs_segment *seg, int pos,
struct scoutfs_key_buf *key, struct kvec *val);
void scoutfs_seg_get(struct scoutfs_segment *seg);
void scoutfs_seg_put(struct scoutfs_segment *seg);
@@ -19,12 +21,13 @@ void scoutfs_seg_put(struct scoutfs_segment *seg);
int scoutfs_seg_alloc(struct super_block *sb, struct scoutfs_segment **seg_ret);
int scoutfs_seg_free_segno(struct super_block *sb,
struct scoutfs_segment *seg);
bool scoutfs_seg_fits_single(u32 nr_items, u32 key_bytes, u32 val_bytes);
void scoutfs_seg_first_item(struct super_block *sb, struct scoutfs_segment *seg,
struct kvec *key, struct kvec *val,
struct scoutfs_key_buf *key, struct kvec *val,
unsigned int nr_items, unsigned int key_bytes);
void scoutfs_seg_append_item(struct super_block *sb,
struct scoutfs_segment *seg,
struct kvec *key, struct kvec *val);
struct scoutfs_key_buf *key, struct kvec *val);
int scoutfs_seg_manifest_add(struct super_block *sb,
struct scoutfs_segment *seg, u8 level);
int scoutfs_seg_manifest_del(struct super_block *sb,

View File

@@ -98,15 +98,15 @@ void scoutfs_trans_write_func(struct work_struct *work)
scoutfs_filerw_free_alloc(sb);
#endif
trace_printk("dirty bytes %ld manifest dirty %d alloc dirty %d\n",
scoutfs_item_dirty_bytes(sb),
trace_printk("items dirty %d manifest dirty %d alloc dirty %d\n",
scoutfs_item_has_dirty(sb),
scoutfs_manifest_has_dirty(sb),
scoutfs_alloc_has_dirty(sb));
/*
* XXX this needs serious work to handle errors.
*/
while (scoutfs_item_dirty_bytes(sb)) {
while (scoutfs_item_has_dirty(sb)) {
seg = NULL;
ret = scoutfs_seg_alloc(sb, &seg) ?:
scoutfs_item_dirty_seg(sb, seg) ?:
@@ -222,14 +222,27 @@ int scoutfs_file_fsync(struct file *file, loff_t start, loff_t end,
/*
* I think the holder that creates the most dirty item data is
* symlinking, which can create all the entry items and a symlink target
* item with a full 4k path. We go a little nuts and just set it to two
* blocks.
* symlinking which can create an inode, the three dirent items with a
* full file name, and a symlink item with a full path.
*
* XXX This divides the segment size to set the hard limit on the number of
* concurrent holders so we'll want this to be more precise.
* XXX Assuming the worst case here too aggressively limits the number
* of concurrent holders that can work without being blocked when they
* know they'll dirty much less. We may want to have callers pass in
* their item, key, and val budgets if that's not too fragile.
*
* XXX fix to use real backref and symlink items, placeholders for now
*/
#define MOST_DIRTY (2 * SCOUTFS_BLOCK_SIZE)
#define HOLD_WORST_ITEMS 5
#define HOLD_WORST_KEYS (sizeof(struct scoutfs_inode_key) + \
sizeof(struct scoutfs_dirent_key) + SCOUTFS_NAME_LEN +\
sizeof(struct scoutfs_readdir_key) + \
sizeof(struct scoutfs_readdir_key) + \
sizeof(struct scoutfs_inode_key))
#define HOLD_WORST_VALS (sizeof(struct scoutfs_inode) + \
sizeof(struct scoutfs_dirent) + \
sizeof(struct scoutfs_dirent) + SCOUTFS_NAME_LEN + \
sizeof(struct scoutfs_dirent) + SCOUTFS_NAME_LEN + \
SCOUTFS_SYMLINK_MAX_SIZE)
/*
* We're able to hold the transaction if the current dirty item bytes
@@ -239,10 +252,12 @@ int scoutfs_file_fsync(struct file *file, loff_t start, loff_t end,
static bool hold_acquired(struct super_block *sb)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
long bytes;
int with_us;
int holds;
int before;
u32 items;
u32 keys;
u32 vals;
holds = atomic_read(&sbi->trans_holds);
for (;;) {
@@ -258,8 +273,10 @@ static bool hold_acquired(struct super_block *sb)
/* see if we all would fill the segment */
with_us = holds + 1;
bytes = (with_us * MOST_DIRTY) + scoutfs_item_dirty_bytes(sb);
if (bytes > SCOUTFS_SEGMENT_SIZE) {
items = with_us * HOLD_WORST_ITEMS;
keys = with_us * HOLD_WORST_KEYS;
vals = with_us * HOLD_WORST_VALS;
if (!scoutfs_item_dirty_fits_single(sb, items, keys, vals)) {
scoutfs_sync_fs(sb, 0);
return false;
}