diff --git a/kmod/src/Makefile b/kmod/src/Makefile index 798ba33a..828cce93 100644 --- a/kmod/src/Makefile +++ b/kmod/src/Makefile @@ -3,5 +3,5 @@ obj-$(CONFIG_SCOUTFS_FS) := scoutfs.o CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include scoutfs-y += alloc.o bio.o block.o btree.o buddy.o compact.o counters.o crc.o \ - dir.o filerw.o kvec.o inode.o ioctl.o item.o manifest.o msg.o \ - name.o seg.o scoutfs_trace.o super.o trans.o treap.o xattr.o + dir.o filerw.o kvec.o inode.o ioctl.o item.o key.o manifest.o \ + msg.o name.o seg.o scoutfs_trace.o super.o trans.o treap.o xattr.o diff --git a/kmod/src/compact.c b/kmod/src/compact.c index 14c35c98..b6e0965d 100644 --- a/kmod/src/compact.c +++ b/kmod/src/compact.c @@ -70,8 +70,8 @@ struct compact_seg { u64 segno; u64 seq; u8 level; - SCOUTFS_DECLARE_KVEC(first); - SCOUTFS_DECLARE_KVEC(last); + struct scoutfs_key_buf *first; + struct scoutfs_key_buf *last; struct scoutfs_segment *seg; int pos; int saved_pos; @@ -92,25 +92,45 @@ struct compact_cursor { struct compact_seg *saved_lower; }; -static void free_cseg(struct compact_seg *cseg) +static void free_cseg(struct super_block *sb, struct compact_seg *cseg) { WARN_ON_ONCE(!list_empty(&cseg->entry)); scoutfs_seg_put(cseg->seg); - scoutfs_kvec_kfree(cseg->first); - scoutfs_kvec_kfree(cseg->last); + scoutfs_key_free(sb, cseg->first); + scoutfs_key_free(sb, cseg->last); kfree(cseg); } -static void free_cseg_list(struct list_head *list) +static struct compact_seg *alloc_cseg(struct super_block *sb, + struct scoutfs_key_buf *first, + struct scoutfs_key_buf *last) +{ + struct compact_seg *cseg; + + cseg = kzalloc(sizeof(struct compact_seg), GFP_NOFS); + if (cseg) { + INIT_LIST_HEAD(&cseg->entry); + cseg->first = scoutfs_key_dup(sb, first); + cseg->last = scoutfs_key_dup(sb, last); + if (!cseg->first || !cseg->last) { + free_cseg(sb, cseg); + cseg = NULL; + } + } + + return cseg; +} + +static void free_cseg_list(struct super_block *sb, struct list_head *list) { struct compact_seg *cseg; struct compact_seg *tmp; list_for_each_entry_safe(cseg, tmp, list, entry) { list_del_init(&cseg->entry); - free_cseg(cseg); + free_cseg(sb, cseg); } } @@ -177,18 +197,18 @@ static struct compact_seg *next_spos(struct compact_cursor *curs, * update items. */ static int next_item(struct super_block *sb, struct compact_cursor *curs, - struct kvec *item_key, struct kvec *item_val) + struct scoutfs_key_buf *item_key, struct kvec *item_val) { struct compact_seg *upper = curs->upper; struct compact_seg *lower = curs->lower; - SCOUTFS_DECLARE_KVEC(lower_key); + struct scoutfs_key_buf lower_key; SCOUTFS_DECLARE_KVEC(lower_val); int cmp; int ret; if (upper) { - ret = scoutfs_seg_item_kvecs(upper->seg, upper->pos, - item_key, item_val); + ret = scoutfs_seg_item_ptrs(upper->seg, upper->pos, + item_key, item_val); if (ret < 0) upper = NULL; } @@ -198,8 +218,8 @@ static int next_item(struct super_block *sb, struct compact_cursor *curs, if (ret) goto out; - ret = scoutfs_seg_item_kvecs(lower->seg, lower->pos, - lower_key, lower_val); + ret = scoutfs_seg_item_ptrs(lower->seg, lower->pos, + &lower_key, lower_val); if (ret == 0) break; lower = next_spos(curs, lower); @@ -217,14 +237,14 @@ static int next_item(struct super_block *sb, struct compact_cursor *curs, * > 0: return lower, advance lower */ if (upper && lower) - cmp = scoutfs_kvec_memcmp(item_key, lower_key); + cmp = scoutfs_key_compare(item_key, &lower_key); else if (upper) cmp = -1; else cmp = 1; if (cmp > 0) { - scoutfs_kvec_clone(item_key, lower_key); + scoutfs_key_clone(item_key, &lower_key); scoutfs_kvec_clone(item_val, lower_val); } @@ -248,28 +268,27 @@ out: static int count_items(struct super_block *sb, struct compact_cursor *curs, u32 *nr_items, u32 *key_bytes) { - SCOUTFS_DECLARE_KVEC(item_key); + struct scoutfs_key_buf item_key; SCOUTFS_DECLARE_KVEC(item_val); - u32 total; + u32 items = 0; + u32 keys = 0; + u32 vals = 0; int ret; *nr_items = 0; *key_bytes = 0; - total = sizeof(struct scoutfs_segment_block); - while ((ret = next_item(sb, curs, item_key, item_val)) > 0) { + while ((ret = next_item(sb, curs, &item_key, item_val)) > 0) { - total += sizeof(struct scoutfs_segment_item) + - scoutfs_kvec_length(item_key) + - scoutfs_kvec_length(item_val); + items++; + keys += item_key.key_len; + vals += scoutfs_kvec_length(item_val); - if (total > SCOUTFS_SEGMENT_SIZE) { - ret = 0; + if (!scoutfs_seg_fits_single(items, keys, vals)) break; - } - (*nr_items)++; - (*key_bytes) += scoutfs_kvec_length(item_key); + *nr_items = items; + *key_bytes = keys; } return ret; @@ -279,23 +298,23 @@ static int compact_items(struct super_block *sb, struct compact_cursor *curs, struct scoutfs_segment *seg, u32 nr_items, u32 key_bytes) { - SCOUTFS_DECLARE_KVEC(item_key); + struct scoutfs_key_buf item_key; SCOUTFS_DECLARE_KVEC(item_val); int ret; - ret = next_item(sb, curs, item_key, item_val); + ret = next_item(sb, curs, &item_key, item_val); if (ret <= 0) goto out; - scoutfs_seg_first_item(sb, seg, item_key, item_val, + scoutfs_seg_first_item(sb, seg, &item_key, item_val, nr_items, key_bytes); while (--nr_items) { - ret = next_item(sb, curs, item_key, item_val); + ret = next_item(sb, curs, &item_key, item_val); if (ret <= 0) break; - scoutfs_seg_append_item(sb, seg, item_key, item_val); + scoutfs_seg_append_item(sb, seg, &item_key, item_val); } out: @@ -307,11 +326,11 @@ static int compact_segments(struct super_block *sb, struct scoutfs_bio_completion *comp, struct list_head *results) { + struct scoutfs_key_buf upper_next; struct scoutfs_segment *seg; struct compact_seg *cseg; struct compact_seg *upper; struct compact_seg *lower; - SCOUTFS_DECLARE_KVEC(upper_next); u32 key_bytes; u32 nr_items; int ret; @@ -328,13 +347,7 @@ static int compact_segments(struct super_block *sb, */ if (upper && upper->pos == 0 && (!lower || - scoutfs_kvec_memcmp(upper->last, lower->first) < 0)) { - - cseg = kzalloc(sizeof(struct compact_seg), GFP_NOFS); - if (!cseg) { - ret = -ENOMEM; - break; - } + scoutfs_key_compare(upper->last, lower->first) < 0)) { /* * XXX blah! these csegs are getting @@ -342,11 +355,8 @@ static int compact_segments(struct super_block *sb, * entry iterator that reading and compacting * can use. */ - ret = scoutfs_kvec_dup_flatten(cseg->first, - upper->first) ?: - scoutfs_kvec_dup_flatten(cseg->last, upper->last); - if (ret) { - kfree(cseg); + cseg = alloc_cseg(sb, upper->first, upper->last); + if (!cseg) { ret = -ENOMEM; break; } @@ -376,14 +386,14 @@ static int compact_segments(struct super_block *sb, */ if (lower && lower->pos == 0 && (!upper || - (!scoutfs_seg_item_kvecs(upper->seg, upper->pos, - upper_next, NULL) && - scoutfs_kvec_memcmp(upper_next, lower->last) > 0))) { + (!scoutfs_seg_item_ptrs(upper->seg, upper->pos, + &upper_next, NULL) && + scoutfs_key_compare(&upper_next, lower->last) > 0))) { curs->lower = next_spos(curs, lower); list_del_init(&lower->entry); - free_cseg(lower); + free_cseg(sb, lower); scoutfs_inc_counter(sb, compact_segment_skipped); continue; @@ -404,6 +414,7 @@ static int compact_segments(struct super_block *sb, break; } + /* no cseg keys, manifest update uses seg item keys */ cseg = kzalloc(sizeof(struct compact_seg), GFP_NOFS); if (!cseg) { ret = -ENOMEM; @@ -436,14 +447,16 @@ static int compact_segments(struct super_block *sb, return ret; } -int scoutfs_compact_add(struct super_block *sb, void *data, struct kvec *first, - struct kvec *last, u64 segno, u64 seq, u8 level) +int scoutfs_compact_add(struct super_block *sb, void *data, + struct scoutfs_key_buf *first, + struct scoutfs_key_buf *last, u64 segno, u64 seq, + u8 level) { struct compact_cursor *curs = data; struct compact_seg *cseg; int ret; - cseg = kzalloc(sizeof(struct compact_seg), GFP_NOFS); + cseg = alloc_cseg(sb, first, last); if (!cseg) { ret = -ENOMEM; goto out; @@ -451,11 +464,6 @@ int scoutfs_compact_add(struct super_block *sb, void *data, struct kvec *first, list_add_tail(&cseg->entry, &curs->csegs); - ret = scoutfs_kvec_dup_flatten(cseg->first, first) ?: - scoutfs_kvec_dup_flatten(cseg->last, last); - if (ret) - goto out; - cseg->segno = segno; cseg->seq = seq; cseg->level = level; @@ -594,8 +602,8 @@ static void scoutfs_compact_func(struct work_struct *work) out: if (ret) free_result_segnos(sb, &results); - free_cseg_list(&curs.csegs); - free_cseg_list(&results); + free_cseg_list(sb, &curs.csegs); + free_cseg_list(sb, &results); WARN_ON_ONCE(ret); trace_printk("ret %d\n", ret); diff --git a/kmod/src/compact.h b/kmod/src/compact.h index 48312d57..5241ff11 100644 --- a/kmod/src/compact.h +++ b/kmod/src/compact.h @@ -3,8 +3,10 @@ void scoutfs_compact_kick(struct super_block *sb); -int scoutfs_compact_add(struct super_block *sb, void *data, struct kvec *first, - struct kvec *last, u64 segno, u64 seq, u8 level); +int scoutfs_compact_add(struct super_block *sb, void *data, + struct scoutfs_key_buf *first, + struct scoutfs_key_buf *last, u64 segno, u64 seq, + u8 level); int scoutfs_compact_setup(struct super_block *sb); void scoutfs_compact_destroy(struct super_block *sb); diff --git a/kmod/src/dir.c b/kmod/src/dir.c index f979fec6..c67675b5 100644 --- a/kmod/src/dir.c +++ b/kmod/src/dir.c @@ -97,13 +97,32 @@ static unsigned int dentry_type(unsigned int type) return DT_UNKNOWN; } +static struct scoutfs_key_buf *alloc_dirent_key(struct super_block *sb, + struct inode *dir, + struct dentry *dentry) +{ + struct scoutfs_dirent_key *dkey; + struct scoutfs_key_buf *key; + + key = scoutfs_key_alloc(sb, offsetof(struct scoutfs_dirent_key, + name[dentry->d_name.len])); + if (key) { + dkey = key->data; + dkey->type = SCOUTFS_DIRENT_KEY; + dkey->ino = cpu_to_be64(scoutfs_ino(dir)); + memcpy(dkey->name, (void *)dentry->d_name.name, + dentry->d_name.len); + } + + return key; +} + static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { struct super_block *sb = dir->i_sb; - struct scoutfs_dirent_key dkey; + struct scoutfs_key_buf *key = NULL; struct scoutfs_dirent dent; - SCOUTFS_DECLARE_KVEC(key); SCOUTFS_DECLARE_KVEC(val); struct inode *inode; u64 ino = 0; @@ -114,10 +133,11 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry, goto out; } - dkey.type = SCOUTFS_DIRENT_KEY; - dkey.ino = cpu_to_be64(scoutfs_ino(dir)); - scoutfs_kvec_init(key, &dkey, sizeof(dkey), - (void *)dentry->d_name.name, dentry->d_name.len); + key = alloc_dirent_key(sb, dir, dentry); + if (!key) { + ret = -ENOMEM; + goto out; + } scoutfs_kvec_init(val, &dent, sizeof(dent)); @@ -137,6 +157,8 @@ out: else inode = scoutfs_iget(sb, ino); + scoutfs_key_free(sb, key); + return d_splice_alias(inode, dentry); } @@ -162,6 +184,17 @@ static int dir_emit_dots(struct file *file, void *dirent, filldir_t filldir) return 1; } +static void init_readdir_key(struct scoutfs_key_buf *key, + struct scoutfs_readdir_key *rkey, + struct inode *inode, loff_t pos) +{ + rkey->type = SCOUTFS_READDIR_KEY; + rkey->ino = cpu_to_be64(scoutfs_ino(inode)); + rkey->pos = cpu_to_be64(pos); + + scoutfs_key_init(key, rkey, sizeof(struct scoutfs_readdir_key)); +} + /* * readdir simply iterates over the dirent items for the dir inode and * uses their offset as the readdir position. @@ -174,10 +207,10 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir) struct inode *inode = file_inode(file); struct super_block *sb = inode->i_sb; struct scoutfs_dirent *dent; + struct scoutfs_key_buf key; + struct scoutfs_key_buf last_key; struct scoutfs_readdir_key rkey; struct scoutfs_readdir_key last_rkey; - SCOUTFS_DECLARE_KVEC(key); - SCOUTFS_DECLARE_KVEC(last_key); SCOUTFS_DECLARE_KVEC(val); unsigned int item_len; unsigned int name_len; @@ -187,15 +220,7 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir) if (!dir_emit_dots(file, dirent, filldir)) return 0; - rkey.type = SCOUTFS_READDIR_KEY; - rkey.ino = cpu_to_be64(scoutfs_ino(inode)); - /* pos set in each loop */ - scoutfs_kvec_init(key, &rkey, sizeof(rkey)); - - last_rkey.type = SCOUTFS_READDIR_KEY; - last_rkey.ino = cpu_to_be64(scoutfs_ino(inode)); - last_rkey.pos = cpu_to_be64(SCOUTFS_DIRENT_LAST_POS); - scoutfs_kvec_init(last_key, &last_rkey, sizeof(last_rkey)); + init_readdir_key(&last_key, &last_rkey, inode, SCOUTFS_DIRENT_LAST_POS); item_len = offsetof(struct scoutfs_dirent, name[SCOUTFS_NAME_LEN]); dent = kmalloc(item_len, GFP_KERNEL); @@ -203,9 +228,10 @@ static int scoutfs_readdir(struct file *file, void *dirent, filldir_t filldir) return -ENOMEM; for (;;) { - rkey.pos = cpu_to_be64(file->f_pos); + init_readdir_key(&key, &rkey, inode, file->f_pos); + scoutfs_kvec_init(val, dent, item_len); - ret = scoutfs_item_next_same_min(sb, key, last_key, val, + ret = scoutfs_item_next_same_min(sb, &key, &last_key, val, offsetof(struct scoutfs_dirent, name[1])); if (ret < 0) { if (ret == -ENOENT) @@ -261,9 +287,8 @@ static int add_entry_items(struct inode *dir, struct dentry *dentry, struct inode *inode) { struct super_block *sb = dir->i_sb; - struct scoutfs_dirent_key dkey; + struct scoutfs_key_buf *key; struct scoutfs_dirent dent; - SCOUTFS_DECLARE_KVEC(key); SCOUTFS_DECLARE_KVEC(val); int ret; @@ -275,10 +300,9 @@ static int add_entry_items(struct inode *dir, struct dentry *dentry, return ret; /* dirent item for lookup */ - dkey.type = SCOUTFS_DIRENT_KEY; - dkey.ino = cpu_to_be64(scoutfs_ino(dir)); - scoutfs_kvec_init(key, &dkey, sizeof(dkey), - (void *)dentry->d_name.name, dentry->d_name.len); + key = alloc_dirent_key(sb, dir, dentry); + if (!key) + return -ENOMEM; dent.ino = cpu_to_le64(scoutfs_ino(inode)); dent.type = mode_to_type(inode->i_mode); @@ -323,6 +347,7 @@ out_dent: } #endif + scoutfs_key_free(sb, key); return ret; } @@ -423,8 +448,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry) struct super_block *sb = dir->i_sb; struct inode *inode = dentry->d_inode; struct timespec ts = current_kernel_time(); - struct scoutfs_dirent_key dkey; - SCOUTFS_DECLARE_KVEC(key); + struct scoutfs_key_buf *key = NULL; int ret = 0; /* will need to add deletion items */ @@ -443,10 +467,11 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry) goto out; /* XXX same items as add_entry_items */ - dkey.type = SCOUTFS_DIRENT_KEY; - dkey.ino = cpu_to_be64(scoutfs_ino(dir)); - scoutfs_kvec_init(key, &dkey, sizeof(dkey), - (void *)dentry->d_name.name, dentry->d_name.len); + key = alloc_dirent_key(sb, dir, dentry); + if (!key) { + ret = -ENOMEM; + goto out; + } ret = scoutfs_item_delete(sb, key); if (ret) @@ -478,6 +503,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry) scoutfs_update_inode_item(dir); out: + scoutfs_key_free(sb, key); scoutfs_release_trans(sb); return ret; } diff --git a/kmod/src/inode.c b/kmod/src/inode.c index c34babf7..48864f94 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -127,26 +127,28 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode) ci->data_version = le64_to_cpu(cinode->data_version); } -static void set_inode_key(struct scoutfs_inode_key *ikey, u64 ino) +static void init_inode_key(struct scoutfs_key_buf *key, + struct scoutfs_inode_key *ikey, u64 ino) { ikey->type = SCOUTFS_INODE_KEY; ikey->ino = cpu_to_be64(ino); + + scoutfs_key_init(key, ikey, sizeof(struct scoutfs_inode_key)); } static int scoutfs_read_locked_inode(struct inode *inode) { struct super_block *sb = inode->i_sb; struct scoutfs_inode_key ikey; + struct scoutfs_key_buf key; struct scoutfs_inode sinode; - SCOUTFS_DECLARE_KVEC(key); SCOUTFS_DECLARE_KVEC(val); int ret; - set_inode_key(&ikey, scoutfs_ino(inode)); - scoutfs_kvec_init(key, &ikey, sizeof(ikey)); + init_inode_key(&key, &ikey, scoutfs_ino(inode)); scoutfs_kvec_init(val, &sinode, sizeof(sinode)); - ret = scoutfs_item_lookup_exact(sb, key, val, sizeof(sinode)); + ret = scoutfs_item_lookup_exact(sb, &key, val, sizeof(sinode)); if (ret == 0) load_inode(inode, &sinode); @@ -269,16 +271,15 @@ int scoutfs_dirty_inode_item(struct inode *inode) { struct super_block *sb = inode->i_sb; struct scoutfs_inode_key ikey; + struct scoutfs_key_buf key; struct scoutfs_inode sinode; - SCOUTFS_DECLARE_KVEC(key); int ret; store_inode(&sinode, inode); - set_inode_key(&ikey, scoutfs_ino(inode)); - scoutfs_kvec_init(key, &ikey, sizeof(ikey)); + init_inode_key(&key, &ikey, scoutfs_ino(inode)); - ret = scoutfs_item_dirty(sb, key); + ret = scoutfs_item_dirty(sb, &key); if (!ret) trace_scoutfs_dirty_inode(inode); return ret; @@ -297,18 +298,17 @@ void scoutfs_update_inode_item(struct inode *inode) { struct super_block *sb = inode->i_sb; struct scoutfs_inode_key ikey; + struct scoutfs_key_buf key; struct scoutfs_inode sinode; - SCOUTFS_DECLARE_KVEC(key); SCOUTFS_DECLARE_KVEC(val); int err; store_inode(&sinode, inode); - set_inode_key(&ikey, scoutfs_ino(inode)); - scoutfs_kvec_init(key, &ikey, sizeof(ikey)); + init_inode_key(&key, &ikey, scoutfs_ino(inode)); scoutfs_kvec_init(val, &sinode, sizeof(sinode)); - err = scoutfs_item_update(sb, key, val); + err = scoutfs_item_update(sb, &key, val); BUG_ON(err); trace_scoutfs_update_inode(inode); @@ -388,8 +388,8 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, { struct scoutfs_inode_info *ci; struct scoutfs_inode_key ikey; + struct scoutfs_key_buf key; struct scoutfs_inode sinode; - SCOUTFS_DECLARE_KVEC(key); SCOUTFS_DECLARE_KVEC(val); struct inode *inode; u64 ino; @@ -419,11 +419,10 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, set_inode_ops(inode); store_inode(&sinode, inode); - set_inode_key(&ikey, scoutfs_ino(inode)); - scoutfs_kvec_init(key, &ikey, sizeof(ikey)); + init_inode_key(&key, &ikey, scoutfs_ino(inode)); scoutfs_kvec_init(val, &sinode, sizeof(sinode)); - ret = scoutfs_item_create(sb, key, val); + ret = scoutfs_item_create(sb, &key, val); if (ret) { iput(inode); return ERR_PTR(ret); diff --git a/kmod/src/item.c b/kmod/src/item.c index b5bb2c5e..85c4942b 100644 --- a/kmod/src/item.c +++ b/kmod/src/item.c @@ -59,22 +59,24 @@ struct cached_item { }; long dirty; - SCOUTFS_DECLARE_KVEC(key); + struct scoutfs_key_buf *key; + SCOUTFS_DECLARE_KVEC(val); }; struct cached_range { struct rb_node node; - SCOUTFS_DECLARE_KVEC(start); - SCOUTFS_DECLARE_KVEC(end); + struct scoutfs_key_buf *start; + struct scoutfs_key_buf *end; }; /* * Walk the item rbtree and return the item found and the next and * prev items. */ -static struct cached_item *walk_items(struct rb_root *root, struct kvec *key, +static struct cached_item *walk_items(struct rb_root *root, + struct scoutfs_key_buf *key, struct cached_item **prev, struct cached_item **next) { @@ -88,7 +90,7 @@ static struct cached_item *walk_items(struct rb_root *root, struct kvec *key, while (node) { item = container_of(node, struct cached_item, node); - cmp = scoutfs_kvec_memcmp(key, item->key); + cmp = scoutfs_key_compare(key, item->key); if (cmp < 0) { *next = item; node = node->rb_left; @@ -104,7 +106,8 @@ static struct cached_item *walk_items(struct rb_root *root, struct kvec *key, } static struct cached_item *find_item(struct super_block *sb, - struct rb_root *root, struct kvec *key) + struct rb_root *root, + struct scoutfs_key_buf *key) { struct cached_item *prev; struct cached_item *next; @@ -120,7 +123,8 @@ static struct cached_item *find_item(struct super_block *sb, return item; } -static struct cached_item *next_item(struct rb_root *root, struct kvec *key) +static struct cached_item *next_item(struct rb_root *root, + struct scoutfs_key_buf *key) { struct cached_item *prev; struct cached_item *next; @@ -234,7 +238,7 @@ static int insert_item(struct rb_root *root, struct cached_item *ins) parent = *node; item = container_of(*node, struct cached_item, node); - cmp = scoutfs_kvec_memcmp(ins->key, item->key); + cmp = scoutfs_key_compare(ins->key, item->key); if (cmp < 0) { if (ins->dirty) item->dirty |= LEFT_DIRTY; @@ -263,7 +267,8 @@ static int insert_item(struct rb_root *root, struct cached_item *ins) * cached range. */ static bool check_range(struct super_block *sb, struct rb_root *root, - struct kvec *key, struct kvec *end) + struct scoutfs_key_buf *key, + struct scoutfs_key_buf *end) { struct rb_node *node = root->rb_node; struct cached_range *next = NULL; @@ -273,34 +278,34 @@ static bool check_range(struct super_block *sb, struct rb_root *root, while (node) { rng = container_of(node, struct cached_range, node); - cmp = scoutfs_kvec_cmp_overlap(key, key, - rng->start, rng->end); + cmp = scoutfs_key_compare_ranges(key, key, + rng->start, rng->end); if (cmp < 0) { next = rng; node = node->rb_left; } else if (cmp > 0) { node = node->rb_right; } else { - scoutfs_kvec_memcpy_truncate(end, rng->end); + scoutfs_key_copy(end, rng->end); scoutfs_inc_counter(sb, item_range_hit); return true; } } if (next) - scoutfs_kvec_memcpy_truncate(end, next->start); + scoutfs_key_copy(end, next->start); else - scoutfs_kvec_set_max_key(end); + scoutfs_key_set_max(end); scoutfs_inc_counter(sb, item_range_miss); return false; } -static void free_range(struct cached_range *rng) +static void free_range(struct super_block *sb, struct cached_range *rng) { if (!IS_ERR_OR_NULL(rng)) { - scoutfs_kvec_kfree(rng->start); - scoutfs_kvec_kfree(rng->end); + scoutfs_key_free(sb, rng->start); + scoutfs_key_free(sb, rng->end); kfree(rng); } } @@ -332,8 +337,8 @@ restart: parent = *node; rng = container_of(*node, struct cached_range, node); - cmp = scoutfs_kvec_cmp_overlap(ins->start, ins->end, - rng->start, rng->end); + cmp = scoutfs_key_compare_ranges(ins->start, ins->end, + rng->start, rng->end); /* simple iteration until we overlap */ if (cmp < 0) { node = &(*node)->rb_left; @@ -343,24 +348,24 @@ restart: continue; } - start_cmp = scoutfs_kvec_memcmp(ins->start, rng->start); - end_cmp = scoutfs_kvec_memcmp(ins->end, rng->end); + start_cmp = scoutfs_key_compare(ins->start, rng->start); + end_cmp = scoutfs_key_compare(ins->end, rng->end); /* free our insertion if we're entirely within an existing */ if (start_cmp >= 0 && end_cmp <= 0) { - free_range(ins); + free_range(sb, ins); return; } /* expand to cover partial overlap before freeing */ if (start_cmp < 0 && end_cmp < 0) - scoutfs_kvec_swap(ins->end, rng->end); + swap(ins->end, rng->end); else if (start_cmp > 0 && end_cmp > 0) - scoutfs_kvec_swap(ins->start, rng->start); + swap(ins->start, rng->start); /* remove and free all overlaps and restart the descent */ rb_erase(&rng->node, root); - free_range(rng); + free_range(sb, rng); goto restart; } @@ -373,25 +378,25 @@ restart: * value vector. The amount of bytes copied is returned which can be 0 * or truncated if the caller's buffer isn't big enough. */ -int scoutfs_item_lookup(struct super_block *sb, struct kvec *key, +int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key_buf *key, struct kvec *val) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct item_cache *cac = sbi->item_cache; - SCOUTFS_DECLARE_KVEC(end); + struct scoutfs_key_buf *end; struct cached_item *item; unsigned long flags; int ret; - trace_scoutfs_item_lookup(sb, key, val); +// trace_scoutfs_item_lookup(sb, key, val); - ret = scoutfs_kvec_alloc_key(end); - if (ret) + end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE); + if (!end) { + ret = -ENOMEM; goto out; + } do { - scoutfs_kvec_init_key(end); - spin_lock_irqsave(&cac->lock, flags); item = find_item(sb, &cac->items, key); @@ -407,7 +412,7 @@ int scoutfs_item_lookup(struct super_block *sb, struct kvec *key, } while (ret == -ENODATA && (ret = scoutfs_manifest_read_items(sb, key, end)) == 0); - scoutfs_kvec_kfree(end); + scoutfs_key_free(sb, end); out: trace_printk("ret %d\n", ret); return ret; @@ -423,8 +428,9 @@ out: * * Returns 0 or -errno. */ -int scoutfs_item_lookup_exact(struct super_block *sb, struct kvec *key, - struct kvec *val, int size) +int scoutfs_item_lookup_exact(struct super_block *sb, + struct scoutfs_key_buf *key, struct kvec *val, + int size) { int ret; @@ -444,55 +450,51 @@ int scoutfs_item_lookup_exact(struct super_block *sb, struct kvec *key, * -ENOENT is returned if there are no items between the given and last * keys. * - * The next item's key is copied to the caller's key. -ENOBUFS is - * returned if the item's key didn't fit in the caller's key. + * The next item's key is copied to the caller's key. The caller is + * responsible for dealing with key lengths and truncation. * * The next item's value is copied into the callers value. The number * of value bytes copied is returned. The copied value can be truncated * by the caller's value buffer length. */ -int scoutfs_item_next(struct super_block *sb, struct kvec *key, - struct kvec *last, struct kvec *val) +int scoutfs_item_next(struct super_block *sb, struct scoutfs_key_buf *key, + struct scoutfs_key_buf *last, struct kvec *val) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct item_cache *cac = sbi->item_cache; - SCOUTFS_DECLARE_KVEC(read_start); - SCOUTFS_DECLARE_KVEC(read_end); - SCOUTFS_DECLARE_KVEC(range_end); + struct scoutfs_key_buf *read_start = NULL; + struct scoutfs_key_buf *read_end = NULL; + struct scoutfs_key_buf *range_end = NULL; struct cached_item *item; unsigned long flags; bool cached; int ret; /* convenience to avoid searching if caller iterates past their last */ - if (scoutfs_kvec_length(key) > scoutfs_kvec_length(last)) { + if (scoutfs_key_compare(key, last) > 0) { ret = -ENOENT; goto out; } - ret = scoutfs_kvec_alloc_key(range_end); - if (ret) + read_start = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE); + read_end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE); + range_end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE); + if (!read_start || !read_end || !range_end) { + ret = -ENOMEM; goto out; + } spin_lock_irqsave(&cac->lock, flags); for(;;) { - scoutfs_kvec_init_key(range_end); - /* see if we have a usable item in cache and before last */ cached = check_range(sb, &cac->ranges, key, range_end); if (cached && (item = next_item(&cac->items, key)) && - scoutfs_kvec_memcmp(item->key, range_end) <= 0 && - scoutfs_kvec_memcmp(item->key, last) <= 0) { + scoutfs_key_compare(item->key, range_end) <= 0 && + scoutfs_key_compare(item->key, last) <= 0) { - if (scoutfs_kvec_length(item->key) > - scoutfs_kvec_length(key)) { - ret = -ENOBUFS; - break; - } - - scoutfs_kvec_memcpy_truncate(key, item->key); + scoutfs_key_copy(key, item->key); if (val) ret = scoutfs_kvec_memcpy(val, item->val); else @@ -502,13 +504,13 @@ int scoutfs_item_next(struct super_block *sb, struct kvec *key, if (!cached) { /* missing cache starts at key */ - scoutfs_kvec_clone(read_start, key); - scoutfs_kvec_clone(read_end, range_end); + scoutfs_key_copy(read_start, key); + scoutfs_key_copy(read_end, range_end); - } else if (scoutfs_kvec_memcmp(range_end, last) < 0) { + } else if (scoutfs_key_compare(range_end, last) < 0) { /* missing cache starts at range_end */ - scoutfs_kvec_clone(read_start, range_end); - scoutfs_kvec_clone(read_end, last); + scoutfs_key_copy(read_start, range_end); + scoutfs_key_copy(read_end, last); } else { /* no items and we have cache between key and last */ @@ -526,9 +528,11 @@ int scoutfs_item_next(struct super_block *sb, struct kvec *key, } spin_unlock_irqrestore(&cac->lock, flags); - - scoutfs_kvec_kfree(range_end); out: + scoutfs_key_free(sb, read_start); + scoutfs_key_free(sb, read_end); + scoutfs_key_free(sb, range_end); + trace_printk("ret %d\n", ret); return ret; } @@ -539,10 +543,12 @@ out: * size mismatches as a sign of corruption. A found key larger than the * found key buffer gives -ENOBUFS and is a sign of corruption. */ -int scoutfs_item_next_same_min(struct super_block *sb, struct kvec *key, - struct kvec *last, struct kvec *val, int len) +int scoutfs_item_next_same_min(struct super_block *sb, + struct scoutfs_key_buf *key, + struct scoutfs_key_buf *last, + struct kvec *val, int len) { - int key_len = scoutfs_kvec_length(key); + int key_len = key->key_len; int ret; trace_printk("key len %u min val len %d\n", key_len, len); @@ -551,8 +557,7 @@ int scoutfs_item_next_same_min(struct super_block *sb, struct kvec *key, return -EINVAL; ret = scoutfs_item_next(sb, key, last, val); - if (ret == -ENOBUFS || - (ret >= 0 && (scoutfs_kvec_length(key) != key_len || ret < len))) + if (ret >= 0 && (key->key_len != key_len || ret < len)) ret = -EIO; trace_printk("ret %d\n", ret); @@ -560,10 +565,10 @@ int scoutfs_item_next_same_min(struct super_block *sb, struct kvec *key, return ret; } -static void free_item(struct cached_item *item) +static void free_item(struct super_block *sb, struct cached_item *item) { if (!IS_ERR_OR_NULL(item)) { - scoutfs_kvec_kfree(item->key); + scoutfs_key_free(sb, item->key); scoutfs_kvec_kfree(item->val); kfree(item); } @@ -591,7 +596,7 @@ static void mark_item_dirty(struct item_cache *cac, item->dirty |= ITEM_DIRTY; cac->nr_dirty_items++; - cac->dirty_key_bytes += scoutfs_kvec_length(item->key); + cac->dirty_key_bytes += item->key->key_len; cac->dirty_val_bytes += scoutfs_kvec_length(item->val); update_dirty_parents(item); @@ -608,7 +613,7 @@ static void clear_item_dirty(struct item_cache *cac, item->dirty &= ~ITEM_DIRTY; cac->nr_dirty_items--; - cac->dirty_key_bytes -= scoutfs_kvec_length(item->key); + cac->dirty_key_bytes -= item->key->key_len; cac->dirty_val_bytes -= scoutfs_kvec_length(item->val); WARN_ON_ONCE(cac->nr_dirty_items < 0 || cac->dirty_key_bytes < 0 || @@ -617,15 +622,17 @@ static void clear_item_dirty(struct item_cache *cac, update_dirty_parents(item); } -static struct cached_item *alloc_item(struct kvec *key, struct kvec *val) +static struct cached_item *alloc_item(struct super_block *sb, + struct scoutfs_key_buf *key, + struct kvec *val) { struct cached_item *item; item = kzalloc(sizeof(struct cached_item), GFP_NOFS); if (item) { - if (scoutfs_kvec_dup_flatten(item->key, key) || - scoutfs_kvec_dup_flatten(item->val, val)) { - free_item(item); + item->key = scoutfs_key_dup(sb, key); + if (!item->key || scoutfs_kvec_dup_flatten(item->val, val)) { + free_item(sb, item); item = NULL; } } @@ -639,7 +646,7 @@ static struct cached_item *alloc_item(struct kvec *key, struct kvec *val) * * XXX but it doesn't read.. is that weird? Seems weird. */ -int scoutfs_item_create(struct super_block *sb, struct kvec *key, +int scoutfs_item_create(struct super_block *sb, struct scoutfs_key_buf *key, struct kvec *val) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); @@ -648,7 +655,7 @@ int scoutfs_item_create(struct super_block *sb, struct kvec *key, unsigned long flags; int ret; - item = alloc_item(key, val); + item = alloc_item(sb, key, val); if (!item) return -ENOMEM; @@ -661,7 +668,7 @@ int scoutfs_item_create(struct super_block *sb, struct kvec *key, spin_unlock_irqrestore(&cac->lock, flags); if (ret) - free_item(item); + free_item(sb, item); return ret; } @@ -672,12 +679,12 @@ int scoutfs_item_create(struct super_block *sb, struct kvec *key, * and we add with _tail to maintain that order. */ int scoutfs_item_add_batch(struct super_block *sb, struct list_head *list, - struct kvec *key, struct kvec *val) + struct scoutfs_key_buf *key, struct kvec *val) { struct cached_item *item; int ret; - item = alloc_item(key, val); + item = alloc_item(sb, key, val); if (item) { list_add_tail(&item->entry, list); ret = 0; @@ -705,7 +712,8 @@ int scoutfs_item_add_batch(struct super_block *sb, struct list_head *list, * that will be inserted. */ int scoutfs_item_insert_batch(struct super_block *sb, struct list_head *list, - struct kvec *start, struct kvec *end) + struct scoutfs_key_buf *start, + struct scoutfs_key_buf *end) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct item_cache *cac = sbi->item_cache; @@ -715,18 +723,18 @@ int scoutfs_item_insert_batch(struct super_block *sb, struct list_head *list, unsigned long flags; int ret; - trace_scoutfs_item_insert_batch(sb, start, end); +// trace_scoutfs_item_insert_batch(sb, start, end); - if (WARN_ON_ONCE(scoutfs_kvec_memcmp(start, end) > 0)) + if (WARN_ON_ONCE(scoutfs_key_compare(start, end) > 0)) return -EINVAL; rng = kzalloc(sizeof(struct cached_range), GFP_NOFS); - if (rng && (scoutfs_kvec_dup_flatten(rng->start, start) || - scoutfs_kvec_dup_flatten(rng->end, end))) { - free_range(rng); - rng = NULL; + if (rng) { + rng->start = scoutfs_key_dup(sb, start); + rng->end = scoutfs_key_dup(sb, end); } - if (!rng) { + if (!rng || !rng->start || !rng->end) { + free_range(sb, rng); ret = -ENOMEM; goto out; } @@ -745,18 +753,18 @@ int scoutfs_item_insert_batch(struct super_block *sb, struct list_head *list, ret = 0; out: - scoutfs_item_free_batch(list); + scoutfs_item_free_batch(sb, list); return ret; } -void scoutfs_item_free_batch(struct list_head *list) +void scoutfs_item_free_batch(struct super_block *sb, struct list_head *list) { struct cached_item *item; struct cached_item *tmp; list_for_each_entry_safe(item, tmp, list, entry) { list_del_init(&item->entry); - free_item(item); + free_item(sb, item); } } @@ -765,22 +773,22 @@ void scoutfs_item_free_batch(struct list_head *list) * If the item exists make sure it's dirty and pinned. It can be read * if it wasn't cached. -ENOENT is returned if the item doesn't exist. */ -int scoutfs_item_dirty(struct super_block *sb, struct kvec *key) +int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key_buf *key) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct item_cache *cac = sbi->item_cache; - SCOUTFS_DECLARE_KVEC(end); + struct scoutfs_key_buf *end; struct cached_item *item; unsigned long flags; int ret; - ret = scoutfs_kvec_alloc_key(end); - if (ret) + end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE); + if (!end) { + ret = -ENOMEM; goto out; + } do { - scoutfs_kvec_init_key(end); - spin_lock_irqsave(&cac->lock, flags); item = find_item(sb, &cac->items, key); @@ -798,7 +806,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct kvec *key) } while (ret == -ENODATA && (ret = scoutfs_manifest_read_items(sb, key, end)) == 0); - scoutfs_kvec_kfree(end); + scoutfs_key_free(sb, end); out: trace_printk("ret %d\n", ret); return ret; @@ -810,20 +818,22 @@ out: * * Returns -ENOENT if the item doesn't exist. */ -int scoutfs_item_update(struct super_block *sb, struct kvec *key, +int scoutfs_item_update(struct super_block *sb, struct scoutfs_key_buf *key, struct kvec *val) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct item_cache *cac = sbi->item_cache; + struct scoutfs_key_buf *end; SCOUTFS_DECLARE_KVEC(up_val); - SCOUTFS_DECLARE_KVEC(end); struct cached_item *item; unsigned long flags; int ret; - ret = scoutfs_kvec_alloc_key(end); - if (ret) + end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE); + if (!end) { + ret = -ENOMEM; goto out; + } if (val) { ret = scoutfs_kvec_dup_flatten(up_val, val); @@ -834,8 +844,6 @@ int scoutfs_item_update(struct super_block *sb, struct kvec *key, } do { - scoutfs_kvec_init_key(end); - spin_lock_irqsave(&cac->lock, flags); item = find_item(sb, &cac->items, key); @@ -855,7 +863,7 @@ int scoutfs_item_update(struct super_block *sb, struct kvec *key, } while (ret == -ENODATA && (ret = scoutfs_manifest_read_items(sb, key, end)) == 0); out: - scoutfs_kvec_kfree(end); + scoutfs_key_free(sb, end); scoutfs_kvec_kfree(up_val); trace_printk("ret %d\n", ret); @@ -866,7 +874,7 @@ out: * XXX how nice, it'd just creates a cached deletion item. It doesn't * have to read. */ -int scoutfs_item_delete(struct super_block *sb, struct kvec *key) +int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key_buf *key) { return WARN_ON_ONCE(-EINVAL); } @@ -931,33 +939,39 @@ static struct cached_item *next_dirty(struct cached_item *item) return NULL; } -/* - * The total number of bytes that will be stored in segments if we were - * to write out all the currently dirty items. - * - * XXX this isn't strictly correct because item's aren't of a uniform - * size. We might need more segments when large items leave gaps at the - * tail of each segment as it is filled with sorted items. It's close - * enough for now. - */ -long scoutfs_item_dirty_bytes(struct super_block *sb) +bool scoutfs_item_has_dirty(struct super_block *sb) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct item_cache *cac = sbi->item_cache; unsigned long flags; - long bytes; + bool has; spin_lock_irqsave(&cac->lock, flags); - - bytes = (cac->nr_dirty_items * sizeof(struct scoutfs_segment_item)) + - cac->dirty_key_bytes + cac->dirty_val_bytes; - + has = cac->nr_dirty_items != 0; spin_unlock_irqrestore(&cac->lock, flags); - bytes += DIV_ROUND_UP(bytes, SCOUTFS_SEGMENT_SIZE) * - sizeof(struct scoutfs_segment_block); + return has; +} - return bytes; +/* + * Returns true if adding more items with the given count, keys, and values + * still fits in a single item along with the current dirty items. + */ +bool scoutfs_item_dirty_fits_single(struct super_block *sb, u32 nr_items, + u32 key_bytes, u32 val_bytes) +{ + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); + struct item_cache *cac = sbi->item_cache; + unsigned long flags; + bool fits; + + spin_lock_irqsave(&cac->lock, flags); + fits = scoutfs_seg_fits_single(nr_items + cac->nr_dirty_items, + key_bytes + cac->dirty_key_bytes, + val_bytes + cac->dirty_val_bytes); + spin_unlock_irqrestore(&cac->lock, flags); + + return fits; } /* @@ -968,24 +982,25 @@ static void count_seg_items(struct item_cache *cac, u32 *nr_items, u32 *key_bytes) { struct cached_item *item; - u32 total; + u32 items = 0; + u32 keys = 0; + u32 vals = 0; *nr_items = 0; *key_bytes = 0; - total = sizeof(struct scoutfs_segment_block); for (item = first_dirty(cac->items.rb_node); item; item = next_dirty(item)) { - total += sizeof(struct scoutfs_segment_item) + - scoutfs_kvec_length(item->key) + - scoutfs_kvec_length(item->val); + items++; + keys += item->key->key_len; + vals += scoutfs_kvec_length(item->val); - if (total > SCOUTFS_SEGMENT_SIZE) + if (!scoutfs_seg_fits_single(items, keys, vals)) break; - (*nr_items)++; - (*key_bytes) += scoutfs_kvec_length(item->key); + *nr_items = items; + *key_bytes = keys; } } @@ -1062,14 +1077,14 @@ void scoutfs_item_destroy(struct super_block *sb) item = container_of(node, struct cached_item, node); node = rb_next(node); rb_erase(&item->node, &cac->items); - free_item(item); + free_item(sb, item); } for (node = rb_first(&cac->ranges); node; ) { rng = container_of(node, struct cached_range, node); node = rb_next(node); rb_erase(&rng->node, &cac->items); - free_range(rng); + free_range(sb, rng); } kfree(cac); diff --git a/kmod/src/item.h b/kmod/src/item.h index 81746822..3c7c6057 100644 --- a/kmod/src/item.h +++ b/kmod/src/item.h @@ -4,31 +4,38 @@ #include struct scoutfs_segment; +struct scoutfs_key_buf; -int scoutfs_item_lookup(struct super_block *sb, struct kvec *key, +int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key_buf *key, struct kvec *val); -int scoutfs_item_lookup_exact(struct super_block *sb, struct kvec *key, - struct kvec *val, int size); -int scoutfs_item_next(struct super_block *sb, struct kvec *key, - struct kvec *last, struct kvec *val); -int scoutfs_item_next_same_min(struct super_block *sb, struct kvec *key, - struct kvec *last, struct kvec *val, int len); -int scoutfs_item_insert(struct super_block *sb, struct kvec *key, +int scoutfs_item_lookup_exact(struct super_block *sb, + struct scoutfs_key_buf *key, struct kvec *val, + int size); +int scoutfs_item_next(struct super_block *sb, struct scoutfs_key_buf *key, + struct scoutfs_key_buf *last, struct kvec *val); +int scoutfs_item_next_same_min(struct super_block *sb, + struct scoutfs_key_buf *key, + struct scoutfs_key_buf *last, + struct kvec *val, int len); +int scoutfs_item_insert(struct super_block *sb, struct scoutfs_key_buf *key, struct kvec *val); -int scoutfs_item_create(struct super_block *sb, struct kvec *key, +int scoutfs_item_create(struct super_block *sb, struct scoutfs_key_buf *key, struct kvec *val); -int scoutfs_item_dirty(struct super_block *sb, struct kvec *key); -int scoutfs_item_update(struct super_block *sb, struct kvec *key, +int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key_buf *key); +int scoutfs_item_update(struct super_block *sb, struct scoutfs_key_buf *key, struct kvec *val); -int scoutfs_item_delete(struct super_block *sb, struct kvec *key); +int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key_buf *key); int scoutfs_item_add_batch(struct super_block *sb, struct list_head *list, - struct kvec *key, struct kvec *val); + struct scoutfs_key_buf *key, struct kvec *val); int scoutfs_item_insert_batch(struct super_block *sb, struct list_head *list, - struct kvec *start, struct kvec *end); -void scoutfs_item_free_batch(struct list_head *list); + struct scoutfs_key_buf *start, + struct scoutfs_key_buf *end); +void scoutfs_item_free_batch(struct super_block *sb, struct list_head *list); -long scoutfs_item_dirty_bytes(struct super_block *sb); +bool scoutfs_item_has_dirty(struct super_block *sb); +bool scoutfs_item_dirty_fits_single(struct super_block *sb, u32 nr_items, + u32 key_bytes, u32 val_bytes); int scoutfs_item_dirty_seg(struct super_block *sb, struct scoutfs_segment *seg); int scoutfs_item_setup(struct super_block *sb); diff --git a/kmod/src/key.c b/kmod/src/key.c new file mode 100644 index 00000000..9795f797 --- /dev/null +++ b/kmod/src/key.c @@ -0,0 +1,92 @@ +/* + * Copyright (C) 2017 Versity Software, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include + +#include "key.h" + +struct scoutfs_key_buf *scoutfs_key_alloc(struct super_block *sb, u16 len) +{ + struct scoutfs_key_buf *key; + + if (WARN_ON_ONCE(len > SCOUTFS_MAX_KEY_SIZE)) + return NULL; + + key = kmalloc(sizeof(struct scoutfs_key_buf) + len, GFP_NOFS); + if (key) { + key->data = key + 1; + key->key_len = len; + key->buf_len = len; + } + + return key; +} + +struct scoutfs_key_buf *scoutfs_key_dup(struct super_block *sb, + struct scoutfs_key_buf *key) +{ + struct scoutfs_key_buf *dup; + + dup = scoutfs_key_alloc(sb, key->key_len); + if (dup) + memcpy(dup->data, key->data, dup->key_len); + return dup; +} + +void scoutfs_key_free(struct super_block *sb, struct scoutfs_key_buf *key) +{ + kfree(key); +} + +/* + * Keys are large multi-byte big-endian values. To correctly increase + * or decrease keys we need to start by extending the key to the full + * precision using the max key size, setting the least significant bytes + * to 0. + */ +static void extend_zeros(struct scoutfs_key_buf *key) +{ + if (key->key_len < SCOUTFS_MAX_KEY_SIZE && + !WARN_ON_ONCE(key->buf_len != SCOUTFS_MAX_KEY_SIZE)) { + memset(key->data + key->key_len, 0, + key->buf_len - key->key_len); + key->key_len = key->buf_len; + } +} + +void scoutfs_key_inc(struct scoutfs_key_buf *key) +{ + u8 *bytes = key->data; + int i; + + extend_zeros(key); + + for (i = key->key_len - 1; i >= 0; i--) { + if (++bytes[i] != 0) + break; + } +} + +void scoutfs_key_dec(struct scoutfs_key_buf *key) +{ + u8 *bytes = key->data; + int i; + + extend_zeros(key); + + for (i = key->key_len - 1; i >= 0; i--) { + if (--bytes[i] != 255) + break; + } +} diff --git a/kmod/src/key.h b/kmod/src/key.h index cb8460d6..a63244ba 100644 --- a/kmod/src/key.h +++ b/kmod/src/key.h @@ -4,6 +4,120 @@ #include #include "format.h" +struct scoutfs_key_buf { + void *data; + u16 key_len; + u16 buf_len; +}; + +struct scoutfs_key_buf *scoutfs_key_alloc(struct super_block *sb, u16 len); +struct scoutfs_key_buf *scoutfs_key_dup(struct super_block *sb, + struct scoutfs_key_buf *key); +void scoutfs_key_free(struct super_block *sb, struct scoutfs_key_buf *key); +void scoutfs_key_inc(struct scoutfs_key_buf *key); +void scoutfs_key_dec(struct scoutfs_key_buf *key); + + +/* + * Point the key buf, usually statically allocated, at an existing + * contiguous key stored elsewhere. + */ +static inline void scoutfs_key_init(struct scoutfs_key_buf *key, + void *data, u16 len) +{ + WARN_ON_ONCE(len > SCOUTFS_MAX_KEY_SIZE); + + key->data = data; + key->key_len = len; + key->buf_len = len; +} + +/* + * Compare the fs keys in segment sort order. + */ +static inline int scoutfs_key_compare(struct scoutfs_key_buf *a, + struct scoutfs_key_buf *b) +{ + return memcmp(a->data, b->data, min(a->key_len, b->key_len)) ?: + a->key_len < b->key_len ? -1 : a->key_len > b->key_len ? 1 : 0; +} + +/* + * Compare ranges of keys where overlapping is equality. Returns: + * -1: a_end < b_start + * 1: a_start > b_end + * else 0: ranges overlap + */ +static inline int scoutfs_key_compare_ranges(struct scoutfs_key_buf *a_start, + struct scoutfs_key_buf *a_end, + struct scoutfs_key_buf *b_start, + struct scoutfs_key_buf *b_end) +{ + return scoutfs_key_compare(a_end, b_start) < 0 ? -1 : + scoutfs_key_compare(a_start, b_end) > 0 ? 1 : + 0; +} + +/* + * Copy as much of the contents of the source buffer that fits into the + * dest buffer. + */ +static inline void scoutfs_key_copy(struct scoutfs_key_buf *dst, + struct scoutfs_key_buf *src) +{ + dst->key_len = min(dst->buf_len, src->key_len); + memcpy(dst->data, src->data, dst->key_len); +} + +/* + * Initialize the dst buffer to point to the source buffer in all ways, + * including the buf len. The contents of the buffer are shared by the + * fields describing the buffers are not. + */ +static inline void scoutfs_key_clone(struct scoutfs_key_buf *dst, + struct scoutfs_key_buf *src) +{ + *dst = *src; +} + +/* + * Memset as much of the length as fits in the buffer and set that to + * the new key length. + */ +static inline void scoutfs_key_memset(struct scoutfs_key_buf *key, int c, + u16 len) +{ + if (WARN_ON_ONCE(len > SCOUTFS_MAX_KEY_SIZE)) + return; + + key->key_len = min(key->buf_len, len); + memset(key->data, c, key->key_len); +} + +/* + * Set the contents of the buffer to the smallest possible key by sort + * order. It might be truncated if the buffer isn't large enough. + */ +static inline void scoutfs_key_set_min(struct scoutfs_key_buf *key) +{ + scoutfs_key_memset(key, 0, sizeof(struct scoutfs_inode_key)); +} + +/* + * Set the contents of the buffer to the largest possible key by sort + * order. It might be truncated if the buffer isn't large enough. + */ +static inline void scoutfs_key_set_max(struct scoutfs_key_buf *key) +{ + scoutfs_key_memset(key, 0xff, sizeof(struct scoutfs_inode_key)); +} + +/* + * What follows are the key functions for the small fixed size btree + * keys. It will all be removed once the callers are converted from + * the btree to the item cache. + */ + #define CKF "%llu.%u.%llu" #define CKA(key) \ le64_to_cpu((key)->inode), (key)->type, le64_to_cpu((key)->offset) diff --git a/kmod/src/manifest.c b/kmod/src/manifest.c index 2a96d9da..98ac6234 100644 --- a/kmod/src/manifest.c +++ b/kmod/src/manifest.c @@ -52,7 +52,7 @@ struct manifest { /* calculated on mount, const thereafter */ u64 level_limits[SCOUTFS_MANIFEST_MAX_LEVEL + 1]; - SCOUTFS_DECLARE_KVEC(compact_keys[SCOUTFS_MANIFEST_MAX_LEVEL + 1]); + struct scoutfs_key_buf *compact_keys[SCOUTFS_MANIFEST_MAX_LEVEL + 1]; }; #define DECLARE_MANIFEST(sb, name) \ @@ -75,16 +75,16 @@ struct manifest_ref { struct scoutfs_segment *seg; int found_ctr; int pos; - u16 first_key_len; - u16 last_key_len; u8 level; - u8 keys[0]; + + struct scoutfs_key_buf *first; + struct scoutfs_key_buf *last; }; struct manifest_fill_args { struct scoutfs_manifest_entry ment; - struct kvec *first; - struct kvec *last; + struct scoutfs_key_buf *first; + struct scoutfs_key_buf *last; }; /* @@ -93,41 +93,33 @@ struct manifest_fill_args { */ struct manifest_search_key { u64 seq; - struct kvec *key; + struct scoutfs_key_buf *key; u8 level; }; static void init_ment_keys(struct scoutfs_manifest_entry *ment, - struct kvec *first, struct kvec *last) + struct scoutfs_key_buf *first, + struct scoutfs_key_buf *last) { if (first) - scoutfs_kvec_init(first, ment->keys, - le16_to_cpu(ment->first_key_len)); + scoutfs_key_init(first, ment->keys, + le16_to_cpu(ment->first_key_len)); if (last) - scoutfs_kvec_init(last, ment->keys + - le16_to_cpu(ment->first_key_len), - le16_to_cpu(ment->last_key_len)); + scoutfs_key_init(last, ment->keys + + le16_to_cpu(ment->first_key_len), + le16_to_cpu(ment->last_key_len)); } -static void init_ref_keys(struct manifest_ref *ref, struct kvec *first, - struct kvec *last) -{ - if (first) - scoutfs_kvec_init(first, ref->keys, ref->first_key_len); - if (last) - scoutfs_kvec_init(last, ref->keys + ref->first_key_len, - ref->last_key_len); -} - -static bool cmp_range_ment(struct kvec *key, struct kvec *end, +static bool cmp_range_ment(struct scoutfs_key_buf *key, + struct scoutfs_key_buf *end, struct scoutfs_manifest_entry *ment) { - SCOUTFS_DECLARE_KVEC(first); - SCOUTFS_DECLARE_KVEC(last); + struct scoutfs_key_buf first; + struct scoutfs_key_buf last; - init_ment_keys(ment, first, last); + init_ment_keys(ment, &first, &last); - return scoutfs_kvec_cmp_overlap(key, end, first, last); + return scoutfs_key_compare_ranges(key, end, &first, &last); } static u64 get_level_count(struct manifest *mani, @@ -187,8 +179,10 @@ static void add_level_count(struct super_block *sb, struct manifest *mani, * * This must be called with the manifest lock held. */ -int scoutfs_manifest_add(struct super_block *sb, struct kvec *first, - struct kvec *last, u64 segno, u64 seq, u8 level) +int scoutfs_manifest_add(struct super_block *sb, + struct scoutfs_key_buf *first, + struct scoutfs_key_buf *last, u64 segno, u64 seq, + u8 level) { DECLARE_MANIFEST(sb, mani); struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); @@ -200,15 +194,15 @@ int scoutfs_manifest_add(struct super_block *sb, struct kvec *first, unsigned bytes; int ret; - trace_scoutfs_manifest_add(sb, first, last, segno, seq, level); +// trace_scoutfs_manifest_add(sb, first, last, segno, seq, level); - key_bytes = scoutfs_kvec_length(first) + scoutfs_kvec_length(last); + key_bytes = first->key_len + last->key_len; bytes = offsetof(struct scoutfs_manifest_entry, keys[key_bytes]); args.ment.segno = cpu_to_le64(segno); args.ment.seq = cpu_to_le64(seq); - args.ment.first_key_len = cpu_to_le16(scoutfs_kvec_length(first)); - args.ment.last_key_len = cpu_to_le16(scoutfs_kvec_length(last)); + args.ment.first_key_len = cpu_to_le16(first->key_len); + args.ment.last_key_len = cpu_to_le16(last->key_len); args.ment.level = level; args.first = first; @@ -233,8 +227,8 @@ int scoutfs_manifest_add(struct super_block *sb, struct kvec *first, /* * This must be called with the manifest lock held. */ -int scoutfs_manifest_dirty(struct super_block *sb, struct kvec *first, u64 seq, - u8 level) +int scoutfs_manifest_dirty(struct super_block *sb, + struct scoutfs_key_buf *first, u64 seq, u8 level) { DECLARE_MANIFEST(sb, mani); struct scoutfs_manifest_entry *ment; @@ -255,8 +249,8 @@ int scoutfs_manifest_dirty(struct super_block *sb, struct kvec *first, u64 seq, /* * This must be called with the manifest lock held. */ -int scoutfs_manifest_del(struct super_block *sb, struct kvec *first, u64 seq, - u8 level) +int scoutfs_manifest_del(struct super_block *sb, struct scoutfs_key_buf *first, + u64 seq, u8 level) { DECLARE_MANIFEST(sb, mani); struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); @@ -299,41 +293,43 @@ int scoutfs_manifest_unlock(struct super_block *sb) return 0; } -static int alloc_add_ref(struct list_head *list, +static void free_ref(struct super_block *sb, struct manifest_ref *ref) +{ + if (!IS_ERR_OR_NULL(ref)) { + WARN_ON_ONCE(!list_empty(&ref->entry)); + scoutfs_seg_put(ref->seg); + scoutfs_key_free(sb, ref->first); + scoutfs_key_free(sb, ref->last); + kfree(ref); + } +} + +static int alloc_add_ref(struct super_block *sb, struct list_head *list, struct scoutfs_manifest_entry *ment) { - SCOUTFS_DECLARE_KVEC(ment_first); - SCOUTFS_DECLARE_KVEC(ment_last); - SCOUTFS_DECLARE_KVEC(first); - SCOUTFS_DECLARE_KVEC(last); + struct scoutfs_key_buf ment_first; + struct scoutfs_key_buf ment_last; struct manifest_ref *ref; - unsigned bytes; - init_ment_keys(ment, ment_first, ment_last); + init_ment_keys(ment, &ment_first, &ment_last); - bytes = scoutfs_kvec_length(ment_first) + - scoutfs_kvec_length(ment_first); - - ref = kmalloc(offsetof(struct manifest_ref, keys[bytes]), GFP_NOFS); - if (!ref) + ref = kzalloc(sizeof(struct manifest_ref), GFP_NOFS); + if (ref) { + ref->first = scoutfs_key_dup(sb, &ment_first); + ref->last = scoutfs_key_dup(sb, &ment_last); + } + if (!ref || !ref->first || !ref->last) { + free_ref(sb, ref); return -ENOMEM; - - memset(ref, 0, offsetof(struct manifest_ref, keys)); + } ref->segno = le64_to_cpu(ment->segno); ref->seq = le64_to_cpu(ment->seq); ref->level = ment->level; - ref->first_key_len = le16_to_cpu(ment->first_key_len); - ref->last_key_len = le16_to_cpu(ment->last_key_len); - - init_ref_keys(ref, first, last); - scoutfs_kvec_memcpy(first, ment_first); - scoutfs_kvec_memcpy(last, ment_last); list_add_tail(&ref->entry, list); return 0; - } /* @@ -349,13 +345,14 @@ static int alloc_add_ref(struct list_head *list, * segment starting with the key. */ static int get_range_refs(struct super_block *sb, struct manifest *mani, - struct kvec *key, struct kvec *end, + struct scoutfs_key_buf *key, + struct scoutfs_key_buf *end, struct list_head *ref_list) { struct scoutfs_manifest_entry *ment; struct manifest_search_key skey; - SCOUTFS_DECLARE_KVEC(first); - SCOUTFS_DECLARE_KVEC(last); + struct scoutfs_key_buf first; + struct scoutfs_key_buf last; struct manifest_ref *ref; struct manifest_ref *tmp; int ret; @@ -369,7 +366,7 @@ static int get_range_refs(struct super_block *sb, struct manifest *mani, ment = scoutfs_treap_lookup_prev(mani->treap, &skey); while (!IS_ERR_OR_NULL(ment)) { if (cmp_range_ment(key, end, ment) == 0) { - ret = alloc_add_ref(ref_list, ment); + ret = alloc_add_ref(sb, ref_list, ment); if (ret) goto out; } @@ -396,8 +393,8 @@ static int get_range_refs(struct super_block *sb, struct manifest *mani, } if (ment) { - init_ment_keys(ment, first, last); - ret = alloc_add_ref(ref_list, ment); + init_ment_keys(ment, &first, &last); + ret = alloc_add_ref(sb, ref_list, ment); if (ret) goto out; } @@ -411,7 +408,7 @@ out: if (ret) { list_for_each_entry_safe(ref, tmp, ref_list, entry) { list_del_init(&ref->entry); - kfree(ref); + free_ref(sb, ref); } } @@ -446,16 +443,17 @@ out: */ #define MAX_ITEMS_READ 32 -int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key, - struct kvec *end) +int scoutfs_manifest_read_items(struct super_block *sb, + struct scoutfs_key_buf *key, + struct scoutfs_key_buf *end) { DECLARE_MANIFEST(sb, mani); - SCOUTFS_DECLARE_KVEC(item_key); + struct scoutfs_key_buf item_key; + struct scoutfs_key_buf found_key; + struct scoutfs_key_buf batch_end; + struct scoutfs_key_buf seg_end; SCOUTFS_DECLARE_KVEC(item_val); - SCOUTFS_DECLARE_KVEC(found_key); SCOUTFS_DECLARE_KVEC(found_val); - SCOUTFS_DECLARE_KVEC(batch_end); - SCOUTFS_DECLARE_KVEC(seg_end); struct scoutfs_segment *seg; struct manifest_ref *ref; struct manifest_ref *tmp; @@ -486,7 +484,7 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key, ref->seg = seg; } - /* wait for submitted segments and search for starting pos */ + /* always wait for submitted segments */ list_for_each_entry(ref, &ref_list, entry) { if (!ref->seg) break; @@ -494,15 +492,29 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key, err = scoutfs_seg_wait(sb, ref->seg); if (err && !ret) ret = err; - - if (ret == 0) - ref->pos = scoutfs_seg_find_pos(ref->seg, key); } if (ret) goto out; - scoutfs_kvec_init_null(batch_end); - scoutfs_kvec_init_null(seg_end); + /* start from the next item from the key in each segment */ + list_for_each_entry(ref, &ref_list, entry) + ref->pos = scoutfs_seg_find_pos(ref->seg, key); + + /* + * Find the greatest range we can cover if we walk all the + * segments. We only have level 0 segments for the missing + * range so that's the greatest. Then we shrink the range by + * the limit of each higher level segment that intersected with + * our starting key. + */ + scoutfs_key_clone(&seg_end, end); + list_for_each_entry(ref, &ref_list, entry) { + if (ref->level > 0 && + scoutfs_key_compare(ref->last, &seg_end) < 0) { + scoutfs_key_clone(&seg_end, ref->last); + } + } + found_ctr = 0; for (n = 0; n < MAX_ITEMS_READ; n++) { @@ -512,37 +524,26 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key, /* find the next least key from the pos in each segment */ list_for_each_entry_safe(ref, tmp, &ref_list, entry) { + if (ref->pos == -1) + continue; /* * Check the next item in the segment. We're * done with the segment if there are no more * items or if the next item is past the - * caller's end. We record either the caller's - * end or the segment end if it's a l1+ segment for - * use as the batch end if we don't see more items. + * caller's end. */ - ret = scoutfs_seg_item_kvecs(ref->seg, ref->pos, - item_key, item_val); - if (ret < 0) { - if (ref->level > 0) { - init_ref_keys(ref, NULL, item_key); - scoutfs_kvec_clone_less(seg_end, - item_key); - } - } else if (scoutfs_kvec_memcmp(item_key, end) > 0) { - scoutfs_kvec_clone_less(seg_end, end); - ret = -ENOENT; - } - if (ret < 0) { - list_del_init(&ref->entry); - scoutfs_seg_put(ref->seg); - kfree(ref); + ret = scoutfs_seg_item_ptrs(ref->seg, ref->pos, + &item_key, item_val); + if (ret < 0 || scoutfs_key_compare(&item_key, end) > 0){ + ref->pos = -1; continue; } /* see if it's the new least item */ if (found) { - cmp = scoutfs_kvec_memcmp(item_key, found_key); + cmp = scoutfs_key_compare(&item_key, + &found_key); if (cmp >= 0) { if (cmp == 0) ref->found_ctr = found_ctr; @@ -551,7 +552,7 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key, } /* remember new least key */ - scoutfs_kvec_clone(found_key, item_key); + scoutfs_key_clone(&found_key, &item_key); scoutfs_kvec_clone(found_val, item_val); ref->found_ctr = ++found_ctr; found = true; @@ -559,7 +560,7 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key, /* ran out of keys in segs, range extends to seg end */ if (!found) { - scoutfs_kvec_clone(batch_end, seg_end); + scoutfs_key_clone(&batch_end, &seg_end); ret = 0; break; } @@ -569,18 +570,18 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key, * have items it's not a failure and the end of the cached * range is the last successfully added item. */ - ret = scoutfs_item_add_batch(sb, &batch, found_key, found_val); + ret = scoutfs_item_add_batch(sb, &batch, &found_key, found_val); if (ret) { if (n > 0) ret = 0; break; } - /* the last successful key determines the range */ - scoutfs_kvec_clone(batch_end, found_key); + /* the last successful key determines range end until run out */ + scoutfs_key_clone(&batch_end, &found_key); /* if we just saw the end key then we're done */ - if (scoutfs_kvec_memcmp(found_key, end) == 0) { + if (scoutfs_key_compare(&found_key, end) == 0) { ret = 0; break; } @@ -595,14 +596,13 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key, } if (ret) - scoutfs_item_free_batch(&batch); + scoutfs_item_free_batch(sb, &batch); else - ret = scoutfs_item_insert_batch(sb, &batch, key, batch_end); + ret = scoutfs_item_insert_batch(sb, &batch, key, &batch_end); out: list_for_each_entry_safe(ref, tmp, &ref_list, entry) { list_del_init(&ref->entry); - scoutfs_seg_put(ref->seg); - kfree(ref); + free_ref(sb, ref); } return ret; @@ -677,10 +677,10 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) struct scoutfs_manifest_entry *ment; struct scoutfs_manifest_entry *over; struct manifest_search_key skey; - SCOUTFS_DECLARE_KVEC(ment_first); - SCOUTFS_DECLARE_KVEC(ment_last); - SCOUTFS_DECLARE_KVEC(over_first); - SCOUTFS_DECLARE_KVEC(over_last); + struct scoutfs_key_buf ment_first; + struct scoutfs_key_buf ment_last; + struct scoutfs_key_buf over_first; + struct scoutfs_key_buf over_last; int level; int ret; int i; @@ -710,9 +710,7 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) skey.seq = 0; ment = scoutfs_treap_lookup_next(mani->treap, &skey); if (ment == NULL || ment->level != level) { - /* XXX ugh, these kvecs are the worst */ - scoutfs_kvec_init(skey.key, - skey.key[0].iov_base, 0); + scoutfs_key_set_min(skey.key); ment = scoutfs_treap_lookup_next(mani->treap, &skey); } } @@ -726,17 +724,17 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) goto out; } - init_ment_keys(ment, ment_first, ment_last); + init_ment_keys(ment, &ment_first, &ment_last); /* add the upper input segment */ - ret = scoutfs_compact_add(sb, data, ment_first, ment_last, + ret = scoutfs_compact_add(sb, data, &ment_first, &ment_last, le64_to_cpu(ment->segno), le64_to_cpu(ment->seq), level); if (ret) goto out; /* start with the first overlapping at the next level */ - skey.key = ment_first; + skey.key = &ment_first; skey.level = level + 1; skey.seq = 0; over = scoutfs_treap_lookup(mani->treap, &skey); @@ -750,13 +748,13 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) if (!over || over->level != (ment->level + 1)) break; - init_ment_keys(over, over_first, over_last); + init_ment_keys(over, &over_first, &over_last); - if (scoutfs_kvec_cmp_overlap(ment_first, ment_last, - over_first, over_last) != 0) + if (scoutfs_key_compare_ranges(&ment_first, &ment_last, + &over_first, &over_last) != 0) break; - ret = scoutfs_compact_add(sb, data, over_first, over_last, + ret = scoutfs_compact_add(sb, data, &over_first, &over_last, le64_to_cpu(over->segno), le64_to_cpu(over->seq), level + 1); if (ret) @@ -765,10 +763,9 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) over = scoutfs_treap_next(mani->treap, over); } - /* record the next key to start from, not exact */ - scoutfs_kvec_init_key(mani->compact_keys[level]); - scoutfs_kvec_memcpy_truncate(mani->compact_keys[level], ment_last); - scoutfs_kvec_be_inc(mani->compact_keys[level]); + /* record the next key to start from */ + scoutfs_key_copy(mani->compact_keys[level], &ment_last); + scoutfs_key_inc(mani->compact_keys[level]); ret = 0; out: @@ -800,8 +797,8 @@ static int manifest_treap_compare(void *key, void *data) { struct manifest_search_key *skey = key; struct scoutfs_manifest_entry *ment = data; - SCOUTFS_DECLARE_KVEC(first); - SCOUTFS_DECLARE_KVEC(last); + struct scoutfs_key_buf first; + struct scoutfs_key_buf last; int cmp; if (skey->level < ment->level) { @@ -818,13 +815,13 @@ static int manifest_treap_compare(void *key, void *data) goto out; } - init_ment_keys(ment, first, last); + init_ment_keys(ment, &first, &last); if (skey->seq == 0) { - cmp = scoutfs_kvec_cmp_overlap(skey->key, skey->key, - first, last); + cmp = scoutfs_key_compare_ranges(skey->key, skey->key, + &first, &last); } else { - cmp = scoutfs_kvec_memcmp(skey->key, first) ?: + cmp = scoutfs_key_compare(skey->key, &first) ?: scoutfs_cmp_u64s(skey->seq, le64_to_cpu(ment->seq)); } @@ -836,14 +833,14 @@ static void manifest_treap_fill(void *data, void *arg) { struct scoutfs_manifest_entry *ment = data; struct manifest_fill_args *args = arg; - SCOUTFS_DECLARE_KVEC(ment_first); - SCOUTFS_DECLARE_KVEC(ment_last); + struct scoutfs_key_buf ment_first; + struct scoutfs_key_buf ment_last; *ment = args->ment; - init_ment_keys(ment, ment_first, ment_last); - scoutfs_kvec_memcpy(ment_first, args->first); - scoutfs_kvec_memcpy(ment_last, args->last); + init_ment_keys(ment, &ment_first, &ment_last); + scoutfs_key_copy(&ment_first, args->first); + scoutfs_key_copy(&ment_last, args->last); } static struct scoutfs_treap_ops manifest_treap_ops = { @@ -858,7 +855,6 @@ int scoutfs_manifest_setup(struct super_block *sb) struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct scoutfs_super_block *super = &sbi->super; struct manifest *mani; - int ret; int i; mani = kzalloc(sizeof(struct manifest), GFP_KERNEL); @@ -876,14 +872,17 @@ int scoutfs_manifest_setup(struct super_block *sb) } for (i = 0; i < ARRAY_SIZE(mani->compact_keys); i++) { - ret = scoutfs_kvec_alloc_key(mani->compact_keys[i]); - if (ret) { + mani->compact_keys[i] = scoutfs_key_alloc(sb, + SCOUTFS_MAX_KEY_SIZE); + if (!mani->compact_keys[i]) { while (--i >= 0) - scoutfs_kvec_kfree(mani->compact_keys[i]); + scoutfs_key_free(sb, mani->compact_keys[i]); scoutfs_treap_free(mani->treap); kfree(mani); return -ENOMEM; } + + scoutfs_key_set_min(mani->compact_keys[i]); } for (i = ARRAY_SIZE(super->manifest.level_counts) - 1; i >= 0; i--) { @@ -915,7 +914,7 @@ void scoutfs_manifest_destroy(struct super_block *sb) if (mani) { scoutfs_treap_free(mani->treap); for (i = 0; i < ARRAY_SIZE(mani->compact_keys); i++) - scoutfs_kvec_kfree(mani->compact_keys[i]); + scoutfs_key_free(sb, mani->compact_keys[i]); kfree(mani); } } diff --git a/kmod/src/manifest.h b/kmod/src/manifest.h index 5e529cd5..d788aeaf 100644 --- a/kmod/src/manifest.h +++ b/kmod/src/manifest.h @@ -1,20 +1,25 @@ #ifndef _SCOUTFS_MANIFEST_H_ #define _SCOUTFS_MANIFEST_H_ -int scoutfs_manifest_add(struct super_block *sb, struct kvec *first, - struct kvec *last, u64 segno, u64 seq, u8 level); -int scoutfs_manifest_dirty(struct super_block *sb, struct kvec *first, u64 seq, - u8 level); -int scoutfs_manifest_del(struct super_block *sb, struct kvec *first, u64 seq, +struct scoutfs_key_buf; + +int scoutfs_manifest_add(struct super_block *sb, + struct scoutfs_key_buf *first, + struct scoutfs_key_buf *last, u64 segno, u64 seq, u8 level); +int scoutfs_manifest_dirty(struct super_block *sb, + struct scoutfs_key_buf *first, u64 seq, u8 level); +int scoutfs_manifest_del(struct super_block *sb, struct scoutfs_key_buf *first, + u64 seq, u8 level); int scoutfs_manifest_has_dirty(struct super_block *sb); int scoutfs_manifest_dirty_ring(struct super_block *sb); int scoutfs_manifest_lock(struct super_block *sb); int scoutfs_manifest_unlock(struct super_block *sb); -int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key, - struct kvec *until); +int scoutfs_manifest_read_items(struct super_block *sb, + struct scoutfs_key_buf *key, + struct scoutfs_key_buf *end); u64 scoutfs_manifest_level_count(struct super_block *sb, u8 level); int scoutfs_manifest_next_compact(struct super_block *sb, void *data); diff --git a/kmod/src/seg.c b/kmod/src/seg.c index f2034b8e..f881e701 100644 --- a/kmod/src/seg.c +++ b/kmod/src/seg.c @@ -24,6 +24,7 @@ #include "cmp.h" #include "manifest.h" #include "alloc.h" +#include "key.h" /* * seg.c should just be about the cache and io, and maybe @@ -339,7 +340,7 @@ static void *off_ptr(struct scoutfs_segment *seg, u32 off) return page_address(seg->pages[pg]) + pg_off; } -static u32 pos_off(struct scoutfs_segment *seg, u32 pos) +static u32 pos_off(u32 pos) { /* items need of be a power of two */ BUILD_BUG_ON(!is_power_of_2(sizeof(struct scoutfs_segment_item))); @@ -352,7 +353,7 @@ static u32 pos_off(struct scoutfs_segment *seg, u32 pos) static void *pos_ptr(struct scoutfs_segment *seg, u32 pos) { - return off_ptr(seg, pos_off(seg, pos)); + return off_ptr(seg, pos_off(pos)); } /* @@ -416,8 +417,8 @@ static void kvec_from_pages(struct scoutfs_segment *seg, off_ptr(seg, off + first), len - first); } -int scoutfs_seg_item_kvecs(struct scoutfs_segment *seg, int pos, - struct kvec *key, struct kvec *val) +int scoutfs_seg_item_ptrs(struct scoutfs_segment *seg, int pos, + struct scoutfs_key_buf *key, struct kvec *val) { struct scoutfs_segment_block *sblk = off_ptr(seg, 0); struct native_item item; @@ -428,7 +429,7 @@ int scoutfs_seg_item_kvecs(struct scoutfs_segment *seg, int pos, load_item(seg, pos, &item); if (key) - kvec_from_pages(seg, key, item.key_off, item.key_len); + scoutfs_key_init(key, off_ptr(seg, item.key_off), item.key_len); if (val) kvec_from_pages(seg, val, item.val_off, item.val_len); @@ -440,10 +441,11 @@ int scoutfs_seg_item_kvecs(struct scoutfs_segment *seg, int pos, * This can return the number of positions if the key is greater than * all the keys. */ -static int find_key_pos(struct scoutfs_segment *seg, struct kvec *search) +static int find_key_pos(struct scoutfs_segment *seg, + struct scoutfs_key_buf *search) { struct scoutfs_segment_block *sblk = off_ptr(seg, 0); - SCOUTFS_DECLARE_KVEC(key); + struct scoutfs_key_buf key; unsigned int start = 0; unsigned int end = le32_to_cpu(sblk->nr_items); unsigned int pos = 0; @@ -451,9 +453,9 @@ static int find_key_pos(struct scoutfs_segment *seg, struct kvec *search) while (start < end) { pos = start + (end - start) / 2; - scoutfs_seg_item_kvecs(seg, pos, key, NULL); + scoutfs_seg_item_ptrs(seg, pos, &key, NULL); - cmp = scoutfs_kvec_memcmp(search, key); + cmp = scoutfs_key_compare(search, &key); if (cmp < 0) end = pos; else if (cmp > 0) @@ -465,11 +467,51 @@ static int find_key_pos(struct scoutfs_segment *seg, struct kvec *search) return pos; } -int scoutfs_seg_find_pos(struct scoutfs_segment *seg, struct kvec *key) +int scoutfs_seg_find_pos(struct scoutfs_segment *seg, + struct scoutfs_key_buf *key) { return find_key_pos(seg, key); } +/* + * Keys are aligned to the next block boundary if they'd cross a block + * boundary. To find the first value offset we have to assume that + * there will be a worst case key alignment at every block boundary. + */ +static u32 first_val_off(u32 nr_items, u32 key_bytes) +{ + u32 key_padding = SCOUTFS_MAX_KEY_SIZE - 1; + u32 partial_block = SCOUTFS_BLOCK_SIZE - key_padding; + u32 first_key_off = pos_off(nr_items); + u32 block_off = first_key_off & SCOUTFS_BLOCK_MASK; + u32 total_padding = ((block_off + key_bytes) / partial_block) * + key_padding; + + return first_key_off + key_bytes + total_padding; +} + +/* + * Returns true if the given number of items with the given total byte + * counts of keys and values fits inside a single segment. + */ +bool scoutfs_seg_fits_single(u32 nr_items, u32 key_bytes, u32 val_bytes) +{ + return (first_val_off(nr_items, key_bytes) + val_bytes) + <= SCOUTFS_SEGMENT_SIZE; +} + +static u32 align_key_off(struct scoutfs_segment *seg, u32 key_off, u32 len) +{ + u32 space = SCOUTFS_BLOCK_SIZE - (key_off & SCOUTFS_BLOCK_MASK); + + if (len > space) { + memset(off_ptr(seg, key_off), 0, space); + return key_off + space; + } + + return key_off; +} + /* * Store the first item in the segment. The caller knows the number * of items and bytes of keys that determine where the keys and values @@ -478,14 +520,14 @@ int scoutfs_seg_find_pos(struct scoutfs_segment *seg, struct kvec *key) * This should never fail because any item must always fit in a segment. */ void scoutfs_seg_first_item(struct super_block *sb, struct scoutfs_segment *seg, - struct kvec *key, struct kvec *val, + struct scoutfs_key_buf *key, struct kvec *val, unsigned int nr_items, unsigned int key_bytes) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct scoutfs_super_block *super = &sbi->super; struct scoutfs_segment_block *sblk = off_ptr(seg, 0); struct native_item item; - SCOUTFS_DECLARE_KVEC(item_key); + struct scoutfs_key_buf item_key; SCOUTFS_DECLARE_KVEC(item_val); u32 key_off; u32 val_off; @@ -495,31 +537,33 @@ void scoutfs_seg_first_item(struct super_block *sb, struct scoutfs_segment *seg, sblk->seq = super->next_seg_seq; le64_add_cpu(&super->next_seg_seq, 1); - key_off = pos_off(seg, nr_items); - val_off = key_off + key_bytes; + key_off = align_key_off(seg, pos_off(nr_items), key->key_len); + val_off = first_val_off(nr_items, key_bytes); sblk->nr_items = cpu_to_le32(1); + trace_printk("first item offs key %u val %u\n", key_off, val_off); + item.seq = 1; item.key_off = key_off; item.val_off = val_off; - item.key_len = scoutfs_kvec_length(key); + item.key_len = key->key_len; item.val_len = scoutfs_kvec_length(val); store_item(seg, 0, &item); - scoutfs_seg_item_kvecs(seg, 0, item_key, item_val); - scoutfs_kvec_memcpy(item_key, key); + scoutfs_seg_item_ptrs(seg, 0, &item_key, item_val); + scoutfs_key_copy(&item_key, key); scoutfs_kvec_memcpy(item_val, val); } void scoutfs_seg_append_item(struct super_block *sb, struct scoutfs_segment *seg, - struct kvec *key, struct kvec *val) + struct scoutfs_key_buf *key, struct kvec *val) { struct scoutfs_segment_block *sblk = off_ptr(seg, 0); struct native_item item; struct native_item prev; - SCOUTFS_DECLARE_KVEC(item_key); + struct scoutfs_key_buf item_key; SCOUTFS_DECLARE_KVEC(item_val); u32 pos; @@ -529,14 +573,18 @@ void scoutfs_seg_append_item(struct super_block *sb, load_item(seg, pos - 1, &prev); item.seq = 1; - item.key_off = prev.key_off + prev.key_len; - item.key_len = scoutfs_kvec_length(key); + item.key_off = align_key_off(seg, prev.key_off + prev.key_len, + key->key_len); + item.key_len = key->key_len; item.val_off = prev.val_off + prev.val_len; item.val_len = scoutfs_kvec_length(val); store_item(seg, pos, &item); - scoutfs_seg_item_kvecs(seg, pos, item_key, item_val); - scoutfs_kvec_memcpy(item_key, key); + trace_printk("item %u offs key %u val %u\n", + pos, item.key_off, item.val_off); + + scoutfs_seg_item_ptrs(seg, pos, &item_key, item_val); + scoutfs_key_copy(&item_key, key); scoutfs_kvec_memcpy(item_val, val); } @@ -548,16 +596,16 @@ int scoutfs_seg_manifest_add(struct super_block *sb, { struct scoutfs_segment_block *sblk = off_ptr(seg, 0); struct native_item item; - SCOUTFS_DECLARE_KVEC(first); - SCOUTFS_DECLARE_KVEC(last); + struct scoutfs_key_buf first; + struct scoutfs_key_buf last; load_item(seg, 0, &item); - kvec_from_pages(seg, first, item.key_off, item.key_len); + scoutfs_key_init(&first, off_ptr(seg, item.key_off), item.key_len); load_item(seg, le32_to_cpu(sblk->nr_items) - 1, &item); - kvec_from_pages(seg, last, item.key_off, item.key_len); + scoutfs_key_init(&last, off_ptr(seg, item.key_off), item.key_len); - return scoutfs_manifest_add(sb, first, last, le64_to_cpu(sblk->segno), + return scoutfs_manifest_add(sb, &first, &last, le64_to_cpu(sblk->segno), le64_to_cpu(sblk->seq), level); } @@ -566,12 +614,12 @@ int scoutfs_seg_manifest_del(struct super_block *sb, { struct scoutfs_segment_block *sblk = off_ptr(seg, 0); struct native_item item; - SCOUTFS_DECLARE_KVEC(first); + struct scoutfs_key_buf first; load_item(seg, 0, &item); - kvec_from_pages(seg, first, item.key_off, item.key_len); + scoutfs_key_init(&first, off_ptr(seg, item.key_off), item.key_len); - return scoutfs_manifest_del(sb, first, le64_to_cpu(sblk->seq), level); + return scoutfs_manifest_del(sb, &first, le64_to_cpu(sblk->seq), level); } int scoutfs_seg_setup(struct super_block *sb) diff --git a/kmod/src/seg.h b/kmod/src/seg.h index 597e9955..e43b2268 100644 --- a/kmod/src/seg.h +++ b/kmod/src/seg.h @@ -3,15 +3,17 @@ struct scoutfs_bio_completion; struct scoutfs_segment; +struct scoutfs_key_buf; struct kvec; struct scoutfs_segment *scoutfs_seg_submit_read(struct super_block *sb, u64 segno); int scoutfs_seg_wait(struct super_block *sb, struct scoutfs_segment *seg); -int scoutfs_seg_find_pos(struct scoutfs_segment *seg, struct kvec *key); -int scoutfs_seg_item_kvecs(struct scoutfs_segment *seg, int pos, - struct kvec *key, struct kvec *val); +int scoutfs_seg_find_pos(struct scoutfs_segment *seg, + struct scoutfs_key_buf *key); +int scoutfs_seg_item_ptrs(struct scoutfs_segment *seg, int pos, + struct scoutfs_key_buf *key, struct kvec *val); void scoutfs_seg_get(struct scoutfs_segment *seg); void scoutfs_seg_put(struct scoutfs_segment *seg); @@ -19,12 +21,13 @@ void scoutfs_seg_put(struct scoutfs_segment *seg); int scoutfs_seg_alloc(struct super_block *sb, struct scoutfs_segment **seg_ret); int scoutfs_seg_free_segno(struct super_block *sb, struct scoutfs_segment *seg); +bool scoutfs_seg_fits_single(u32 nr_items, u32 key_bytes, u32 val_bytes); void scoutfs_seg_first_item(struct super_block *sb, struct scoutfs_segment *seg, - struct kvec *key, struct kvec *val, + struct scoutfs_key_buf *key, struct kvec *val, unsigned int nr_items, unsigned int key_bytes); void scoutfs_seg_append_item(struct super_block *sb, struct scoutfs_segment *seg, - struct kvec *key, struct kvec *val); + struct scoutfs_key_buf *key, struct kvec *val); int scoutfs_seg_manifest_add(struct super_block *sb, struct scoutfs_segment *seg, u8 level); int scoutfs_seg_manifest_del(struct super_block *sb, diff --git a/kmod/src/trans.c b/kmod/src/trans.c index bbd543ff..65db65ec 100644 --- a/kmod/src/trans.c +++ b/kmod/src/trans.c @@ -98,15 +98,15 @@ void scoutfs_trans_write_func(struct work_struct *work) scoutfs_filerw_free_alloc(sb); #endif - trace_printk("dirty bytes %ld manifest dirty %d alloc dirty %d\n", - scoutfs_item_dirty_bytes(sb), + trace_printk("items dirty %d manifest dirty %d alloc dirty %d\n", + scoutfs_item_has_dirty(sb), scoutfs_manifest_has_dirty(sb), scoutfs_alloc_has_dirty(sb)); /* * XXX this needs serious work to handle errors. */ - while (scoutfs_item_dirty_bytes(sb)) { + while (scoutfs_item_has_dirty(sb)) { seg = NULL; ret = scoutfs_seg_alloc(sb, &seg) ?: scoutfs_item_dirty_seg(sb, seg) ?: @@ -222,14 +222,27 @@ int scoutfs_file_fsync(struct file *file, loff_t start, loff_t end, /* * I think the holder that creates the most dirty item data is - * symlinking, which can create all the entry items and a symlink target - * item with a full 4k path. We go a little nuts and just set it to two - * blocks. + * symlinking which can create an inode, the three dirent items with a + * full file name, and a symlink item with a full path. * - * XXX This divides the segment size to set the hard limit on the number of - * concurrent holders so we'll want this to be more precise. + * XXX Assuming the worst case here too aggressively limits the number + * of concurrent holders that can work without being blocked when they + * know they'll dirty much less. We may want to have callers pass in + * their item, key, and val budgets if that's not too fragile. + * + * XXX fix to use real backref and symlink items, placeholders for now */ -#define MOST_DIRTY (2 * SCOUTFS_BLOCK_SIZE) +#define HOLD_WORST_ITEMS 5 +#define HOLD_WORST_KEYS (sizeof(struct scoutfs_inode_key) + \ + sizeof(struct scoutfs_dirent_key) + SCOUTFS_NAME_LEN +\ + sizeof(struct scoutfs_readdir_key) + \ + sizeof(struct scoutfs_readdir_key) + \ + sizeof(struct scoutfs_inode_key)) +#define HOLD_WORST_VALS (sizeof(struct scoutfs_inode) + \ + sizeof(struct scoutfs_dirent) + \ + sizeof(struct scoutfs_dirent) + SCOUTFS_NAME_LEN + \ + sizeof(struct scoutfs_dirent) + SCOUTFS_NAME_LEN + \ + SCOUTFS_SYMLINK_MAX_SIZE) /* * We're able to hold the transaction if the current dirty item bytes @@ -239,10 +252,12 @@ int scoutfs_file_fsync(struct file *file, loff_t start, loff_t end, static bool hold_acquired(struct super_block *sb) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - long bytes; int with_us; int holds; int before; + u32 items; + u32 keys; + u32 vals; holds = atomic_read(&sbi->trans_holds); for (;;) { @@ -258,8 +273,10 @@ static bool hold_acquired(struct super_block *sb) /* see if we all would fill the segment */ with_us = holds + 1; - bytes = (with_us * MOST_DIRTY) + scoutfs_item_dirty_bytes(sb); - if (bytes > SCOUTFS_SEGMENT_SIZE) { + items = with_us * HOLD_WORST_ITEMS; + keys = with_us * HOLD_WORST_KEYS; + vals = with_us * HOLD_WORST_VALS; + if (!scoutfs_item_dirty_fits_single(sb, items, keys, vals)) { scoutfs_sync_fs(sb, 0); return false; }