diff --git a/kmod/src/format.h b/kmod/src/format.h index 63cc07be..7e91afa1 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -160,6 +160,9 @@ struct scoutfs_segment_block { #define SCOUTFS_ORPHAN_KEY 10 #define SCOUTFS_FREE_EXTENT_BLKNO_KEY 11 #define SCOUTFS_FREE_EXTENT_BLOCKS_KEY 12 +#define SCOUTFS_INODE_INDEX_CTIME_KEY 13 +#define SCOUTFS_INODE_INDEX_MTIME_KEY 14 +#define SCOUTFS_INODE_INDEX_SIZE_KEY 15 /* not found in the fs */ #define SCOUTFS_MAX_UNUSED_KEY 253 #define SCOUTFS_NET_ADDR_KEY 254 @@ -249,6 +252,18 @@ struct scoutfs_symlink_key { __be64 ino; } __packed; +struct scoutfs_betimespec { + __be64 sec; + __be32 nsec; +} __packed; + +struct scoutfs_inode_index_key { + __u8 type; + __be64 major; + __be32 minor; + __be64 ino; +} __packed; + /* XXX does this exist upstream somewhere? */ #define member_sizeof(TYPE, MEMBER) (sizeof(((TYPE *)0)->MEMBER)) diff --git a/kmod/src/inode.c b/kmod/src/inode.c index 506cdd74..43c6c730 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -184,6 +184,16 @@ static void set_inode_ops(struct inode *inode) mapping_set_gfp_mask(inode->i_mapping, GFP_USER); } +static void set_item_info(struct inode *inode) +{ + struct scoutfs_inode_info *si = SCOUTFS_I(inode); + + si->have_item = true; + si->item_size = i_size_read(inode); + si->item_ctime = inode->i_ctime; + si->item_mtime = inode->i_mtime; +} + static void load_inode(struct inode *inode, struct scoutfs_inode *cinode) { struct scoutfs_inode_info *ci = SCOUTFS_I(inode); @@ -203,6 +213,8 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode) ci->data_version = le64_to_cpu(cinode->data_version); ci->next_readdir_pos = le64_to_cpu(cinode->next_readdir_pos); + + set_item_info(inode); } void scoutfs_inode_init_key(struct scoutfs_key_buf *key, @@ -362,6 +374,77 @@ int scoutfs_dirty_inode_item(struct inode *inode) return ret; } +/* + * Make sure inode index items are kept in sync with the fields that are + * set in the inode items. This must be called any time the contents of + * the inode items are updated. + * + * This is effectively a RMW on the inode fields so the caller needs to + * lock the inode so that it's the only one working with the index items + * for a given set of fields in the inode. + * + * But it doesn't need to lock the index item keys. By locking the + * inode we've ensured that we can safely log deletion and insertion + * items in our log. The indexes are eventually consistent so we don't + * need to wrap them locks. + * + * XXX this needs more supporting work from the rest of the + * infrastructure: + * + * - Deleting and creating the items needs to forcefully set those dirty + * items in the cache without first trying to read them from segments. + * - the reading ioctl needs to forcefully invalidate the index items + * as it walks. + * - maybe the reading ioctl needs to verify fields with inodes? + * - final inode deletion needs to invalidate the index items for + * each inode as it deletes items based on the locked inode fields. + * - make sure deletion items safely vanish w/o finding existing item + * - ... error handling :( + */ +static int update_index(struct inode *inode, u8 type, u64 now_major, + u32 now_minor, u64 then_major, u32 then_minor) +{ + struct scoutfs_inode_info *si = SCOUTFS_I(inode); + struct super_block *sb = inode->i_sb; + struct scoutfs_inode_index_key ins_ikey; + struct scoutfs_inode_index_key del_ikey; + struct scoutfs_key_buf ins; + struct scoutfs_key_buf del; + int ret; + int err; + + trace_printk("ino %llu have %u now %llu.%u then %llu.%u \n", + scoutfs_ino(inode), si->have_item, + now_major, now_minor, then_major, then_minor); + + if (si->have_item && now_major == then_major && now_minor == then_minor) + return 0; + + ins_ikey.type = type; + ins_ikey.major = cpu_to_be64(now_major); + ins_ikey.minor = cpu_to_be32(now_minor); + ins_ikey.ino = cpu_to_be64(scoutfs_ino(inode)); + scoutfs_key_init(&ins, &ins_ikey, sizeof(ins_ikey)); + + ret = scoutfs_item_create(sb, &ins, NULL); + if (ret || !si->have_item) + return ret; + + del_ikey.type = type; + del_ikey.major = cpu_to_be64(then_major); + del_ikey.minor = cpu_to_be32(then_minor); + del_ikey.ino = cpu_to_be64(scoutfs_ino(inode)); + scoutfs_key_init(&del, &del_ikey, sizeof(del_ikey)); + + ret = scoutfs_item_delete(sb, &del); + if (ret) { + err = scoutfs_item_delete(sb, &ins); + BUG_ON(err); + } + + return ret; +} + /* * Every time we modify the inode in memory we copy it to its inode * item. This lets us write out items without having to track down @@ -373,13 +456,25 @@ int scoutfs_dirty_inode_item(struct inode *inode) */ void scoutfs_update_inode_item(struct inode *inode) { + struct scoutfs_inode_info *si = SCOUTFS_I(inode); struct super_block *sb = inode->i_sb; struct scoutfs_inode_key ikey; struct scoutfs_key_buf key; struct scoutfs_inode sinode; SCOUTFS_DECLARE_KVEC(val); + int ret; int err; + ret = update_index(inode, SCOUTFS_INODE_INDEX_CTIME_KEY, + inode->i_ctime.tv_sec, inode->i_ctime.tv_nsec, + si->item_ctime.tv_sec, si->item_ctime.tv_nsec) ?: + update_index(inode, SCOUTFS_INODE_INDEX_MTIME_KEY, + inode->i_mtime.tv_sec, inode->i_mtime.tv_nsec, + si->item_mtime.tv_sec, si->item_mtime.tv_nsec) ?: + update_index(inode, SCOUTFS_INODE_INDEX_SIZE_KEY, + i_size_read(inode), 0, si->item_size, 0); + BUG_ON(ret); + store_inode(&sinode, inode); scoutfs_inode_init_key(&key, &ikey, scoutfs_ino(inode)); @@ -392,6 +487,7 @@ void scoutfs_update_inode_item(struct inode *inode) BUG_ON(err); } + set_item_info(inode); trace_scoutfs_update_inode(inode); } @@ -562,6 +658,7 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, ci->ino = ino; ci->data_version = 0; ci->next_readdir_pos = SCOUTFS_DIRENT_FIRST_POS; + ci->have_item = false; inode->i_ino = ino; /* XXX overflow */ inode_init_owner(inode, dir, mode); diff --git a/kmod/src/inode.h b/kmod/src/inode.h index 59da8f7a..5f453996 100644 --- a/kmod/src/inode.h +++ b/kmod/src/inode.h @@ -8,6 +8,10 @@ struct scoutfs_inode_info { u64 ino; u64 data_version; u64 next_readdir_pos; + bool have_item; + u64 item_size; + struct timespec item_ctime; + struct timespec item_mtime; /* initialized once for slab object */ seqcount_t seqcount; diff --git a/kmod/src/ioctl.c b/kmod/src/ioctl.c index 48f814d0..44a24682 100644 --- a/kmod/src/ioctl.c +++ b/kmod/src/ioctl.c @@ -28,84 +28,81 @@ #include "super.h" #include "inode.h" #include "trans.h" +#include "item.h" #include "data.h" /* - * Find all the inodes that have had keys of a given type modified since - * a given sequence number. The user's arg struct specifies the inode - * range to search within and the sequence value to return results from. - * Different ioctls call this for different key types. - * - * When this is used for file data items the user is trying to find - * inodes whose data has changed since a given time in the past. - * - * XXX We'll need to improve the walk and search to notice when file - * data items have been truncated away. - * - * Inodes and their sequence numbers are copied out to userspace in - * inode order, not sequence order. + * Walk one of the inode index items. This is a thin ioctl wrapper + * around the core item interface. */ -static long scoutfs_ioc_inodes_since(struct file *file, unsigned long arg, - u8 type) +static long scoutfs_ioc_walk_inodes(struct file *file, unsigned long arg) { struct super_block *sb = file_inode(file)->i_sb; - struct scoutfs_ioctl_inodes_since __user *uargs = (void __user *)arg; - struct scoutfs_ioctl_inodes_since args; - struct scoutfs_ioctl_ino_seq __user *uiseq; - struct scoutfs_ioctl_ino_seq iseq; - struct scoutfs_inode_key last_ikey; - struct scoutfs_inode_key ikey; - struct scoutfs_key_buf last; + struct scoutfs_ioctl_walk_inodes __user *uwalk = (void __user *)arg; + struct scoutfs_ioctl_walk_inodes walk; + struct scoutfs_ioctl_walk_inodes_entry ent; + struct scoutfs_inode_index_key last_ikey; + struct scoutfs_inode_index_key ikey; + struct scoutfs_key_buf last_key; struct scoutfs_key_buf key; - long bytes; - u64 seq; - int ret; + int ret = 0; + u32 nr; - if (copy_from_user(&args, uargs, sizeof(args))) + if (copy_from_user(&walk, uwalk, sizeof(walk))) return -EFAULT; - uiseq = (void __user *)(unsigned long)args.buf_ptr; - if (args.buf_len < sizeof(iseq) || args.buf_len > INT_MAX) + trace_printk("index %u first %llu.%u.%llu last %llu.%u.%llu\n", + walk.index, walk.first.major, walk.first.minor, + walk.first.ino, walk.last.major, walk.last.minor, + walk.last.ino); + + if (walk.index == SCOUTFS_IOC_WALK_INODES_CTIME) + ikey.type = SCOUTFS_INODE_INDEX_CTIME_KEY; + else if (walk.index == SCOUTFS_IOC_WALK_INODES_MTIME) + ikey.type = SCOUTFS_INODE_INDEX_MTIME_KEY; + else if (walk.index == SCOUTFS_IOC_WALK_INODES_SIZE) + ikey.type = SCOUTFS_INODE_INDEX_SIZE_KEY; + else return -EINVAL; - scoutfs_inode_init_key(&key, &ikey, args.first_ino); - scoutfs_inode_init_key(&last, &last_ikey, args.last_ino); + ikey.major = cpu_to_be64(walk.first.major); + ikey.minor = cpu_to_be32(walk.first.minor); + ikey.ino = cpu_to_be64(walk.first.ino); + scoutfs_key_init(&key, &ikey, sizeof(ikey)); - bytes = 0; - for (;;) { + last_ikey.type = ikey.type; + last_ikey.major = cpu_to_be64(walk.last.major); + last_ikey.minor = cpu_to_be32(walk.last.minor); + last_ikey.ino = cpu_to_be64(walk.last.ino); + scoutfs_key_init(&last_key, &last_ikey, sizeof(last_ikey)); - /* XXX item cache needs to search by seq */ - seq = !!sb; - ret = WARN_ON_ONCE(-EINVAL); -// ret = scoutfs_item_since(sb, &key, &last, args.seq, &seq, NULL); + /* cap nr to the max the ioctl can return to a compat task */ + walk.nr_entries = min_t(u64, walk.nr_entries, INT_MAX); + + for (nr = 0; nr < walk.nr_entries; + nr++, walk.entries_ptr += sizeof(ent)) { + + ret = scoutfs_item_next_same(sb, &key, &last_key, NULL); if (ret < 0) { if (ret == -ENOENT) ret = 0; break; } - iseq.ino = be64_to_cpu(ikey.ino); - iseq.seq = seq; + ent.major = be64_to_cpu(ikey.major); + ent.minor = be32_to_cpu(ikey.minor); + ent.ino = be64_to_cpu(ikey.ino); - if (copy_to_user(uiseq, &iseq, sizeof(iseq))) { + if (copy_to_user((void __user *)walk.entries_ptr, &ent, + sizeof(ent))) { ret = -EFAULT; break; } - uiseq++; - bytes += sizeof(iseq); - if (bytes + sizeof(iseq) > args.buf_len) { - ret = 0; - break; - } - - last_ikey.ino = cpu_to_be64(iseq.ino + 1); + scoutfs_key_inc_cur_len(&key); } - if (bytes) - ret = bytes; - - return ret; + return nr ?: ret; } struct ino_path_cursor { @@ -419,12 +416,10 @@ out: long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { switch (cmd) { - case SCOUTFS_IOC_INODES_SINCE: - return scoutfs_ioc_inodes_since(file, arg, SCOUTFS_INODE_KEY); + case SCOUTFS_IOC_WALK_INODES: + return scoutfs_ioc_walk_inodes(file, arg); case SCOUTFS_IOC_INO_PATH: return scoutfs_ioc_ino_path(file, arg); - case SCOUTFS_IOC_INODE_DATA_SINCE: - return WARN_ON_ONCE(-EINVAL); case SCOUTFS_IOC_DATA_VERSION: return scoutfs_ioc_data_version(file, arg); case SCOUTFS_IOC_RELEASE: diff --git a/kmod/src/ioctl.h b/kmod/src/ioctl.h index e84db63c..4d529550 100644 --- a/kmod/src/ioctl.h +++ b/kmod/src/ioctl.h @@ -6,25 +6,54 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg); /* XXX I have no idea how these are chosen. */ #define SCOUTFS_IOCTL_MAGIC 's' -struct scoutfs_ioctl_ino_seq { +struct scoutfs_ioctl_walk_inodes_entry { + __u64 major; + __u32 minor; __u64 ino; - __u64 seq; -} __packed; - -struct scoutfs_ioctl_inodes_since { - __u64 first_ino; - __u64 last_ino; - __u64 seq; - __u64 buf_ptr; - __u32 buf_len; } __packed; /* - * Adds entries to the user's buffer for each inode whose sequence - * number is greater than or equal to the given seq. + * Walk inodes in an index that is sorted by one of their fields. + * + * Each index is built from generic index items that have major and + * minor values that are set to the field being indexed. In time + * indices, for example, major is seconds and minor is nanoseconds. + * + * @first The first index entry that can be returned. + * @last The last index entry that can be returned. + * @entries_ptr Pointer to emory containing buffer for entry results. + * @nr_entries The number of entries that can fit in the buffer. + * @index Which index to walk, enumerated in _WALK_INODES_ constants. + * + * To start iterating first can be memset to 0 and last to 0xff. Then + * after each set of results first can be set to the last entry returned + * and then the fields can be incremented in reverse sort order (ino < + * minor < major) as each increasingly significant value wraps around to + * 0. + * + * If first is greater than last then the walk will return 0 entries. */ -#define SCOUTFS_IOC_INODES_SINCE _IOW(SCOUTFS_IOCTL_MAGIC, 1, \ - struct scoutfs_ioctl_inodes_since) +struct scoutfs_ioctl_walk_inodes { + struct scoutfs_ioctl_walk_inodes_entry first; + struct scoutfs_ioctl_walk_inodes_entry last; + __u64 entries_ptr; + __u32 nr_entries; + __u8 index; +} __packed; + +enum { + SCOUTFS_IOC_WALK_INODES_CTIME = 0, + SCOUTFS_IOC_WALK_INODES_MTIME, + SCOUTFS_IOC_WALK_INODES_SIZE, + SCOUTFS_IOC_WALK_INODES_UNKNOWN, +}; + +/* + * Adds entries to the user's buffer for each inode that is found in the + * given index between the first and last positions. + */ +#define SCOUTFS_IOC_WALK_INODES _IOW(SCOUTFS_IOCTL_MAGIC, 1, \ + struct scoutfs_ioctl_walk_inodes) /* * Fill the path buffer with the next path to the target inode. An @@ -80,9 +109,6 @@ struct scoutfs_ioctl_ino_path { #define SCOUTFS_IOC_INO_PATH _IOW(SCOUTFS_IOCTL_MAGIC, 2, \ struct scoutfs_ioctl_ino_path) -#define SCOUTFS_IOC_INODE_DATA_SINCE _IOW(SCOUTFS_IOCTL_MAGIC, 3, \ - struct scoutfs_ioctl_inodes_since) - #define SCOUTFS_IOC_DATA_VERSION _IOW(SCOUTFS_IOCTL_MAGIC, 4, __u64) struct scoutfs_ioctl_release {