From d480243c115e4eae926161cfaaa8342a0a115c1a Mon Sep 17 00:00:00 2001 From: Auke Kok Date: Thu, 11 May 2023 15:32:57 -0400 Subject: [PATCH] Support .read/write_iter callbacks in lieu of .aio_read/write The aio_read and aio_write callbacks are no longer used by newer kernels which now uses iter based readers and writers. We can avoid implementing plain .read and .write as an iter will be generated when needed for us automatically. We add a new data_wait_check_iter() function accordingly. With these methods removed from the kernel, the el8 kernel no longer uses the extended ops wrapper struct and is much closer now to upstream. As a result, a lot of methods are moving around from inode_dir_operations to and from inode_file_operations etc, and perhaps things will look a bit more structured as a result. As a result, we need a slightly different data_wait_check() that accounts for the iter and offset properly. Signed-off-by: Auke Kok --- kmod/src/Makefile.kernelcompat | 23 +++++++ kmod/src/data.c | 36 +++++++++++ kmod/src/data.h | 3 + kmod/src/dir.c | 93 ++++++++++++++++++++++------ kmod/src/dir.h | 4 ++ kmod/src/file.c | 108 +++++++++++++++++++++++++++++++++ kmod/src/file.h | 5 ++ kmod/src/inode.c | 19 +++++- kmod/src/inode.h | 5 ++ kmod/src/ioctl.c | 2 + 10 files changed, 276 insertions(+), 22 deletions(-) diff --git a/kmod/src/Makefile.kernelcompat b/kmod/src/Makefile.kernelcompat index 1ea7a805..55c5a256 100644 --- a/kmod/src/Makefile.kernelcompat +++ b/kmod/src/Makefile.kernelcompat @@ -225,3 +225,26 @@ endif ifneq (,$(shell grep 'readahead.*struct readahead_control' include/linux/fs.h)) ccflags-y += -DKC_FILE_AOPS_READAHEAD endif + +# +# v4.0-rc7-1743-g8436318205b9 +# +# .aio_read and .aio_write no longer exist. All reads and writes now use the +# .read_iter and .write_iter methods, or must implement .read and .write (which +# we don't). +# +ifneq (,$(shell grep 'ssize_t.*aio_read' include/linux/fs.h)) +ccflags-y += -DKC_LINUX_HAVE_FOP_AIO_READ=1 +endif + +# +# rhel7 has a custom inode_operations_wrapper struct that is discarded +# entirely in favor of upstream structure since rhel8. +# +ifneq (,$(shell grep 'void.*follow_link.*struct dentry' include/linux/fs.h)) +ccflags-y += -DKC_LINUX_HAVE_RHEL_IOPS_WRAPPER=1 +endif + +ifneq (,$(shell grep 'size_t.*ki_left;' include/linux/aio.h)) +ccflags-y += -DKC_LINUX_AIO_KI_LEFT=1 +endif diff --git a/kmod/src/data.c b/kmod/src/data.c index 987f90a1..78dd74ad 100644 --- a/kmod/src/data.c +++ b/kmod/src/data.c @@ -1807,6 +1807,37 @@ int scoutfs_data_wait_check_iov(struct inode *inode, const struct iovec *iov, return ret; } +int scoutfs_data_wait_check_iter(struct inode *inode, loff_t pos, struct iov_iter *iter, + u8 sef, u8 op, struct scoutfs_data_wait *dw, + struct scoutfs_lock *lock) +{ + size_t count = iov_iter_count(iter); + size_t off = iter->iov_offset; + const struct iovec *iov; + size_t len; + int ret = 0; + + for (iov = iter->iov; count > 0; iov++) { + len = iov->iov_len - off; + if (len == 0) + continue; + + /* aren't we waiting on too much data here ? */ + ret = scoutfs_data_wait_check(inode, pos, len, + sef, op, dw, lock); + + if (ret != 0) + break; + + + pos += len; + count -= len; + off = 0; + } + + return ret; +} + int scoutfs_data_wait(struct inode *inode, struct scoutfs_data_wait *dw) { DECLARE_DATA_WAIT_ROOT(inode->i_sb, rt); @@ -1909,10 +1940,15 @@ const struct address_space_operations scoutfs_file_aops = { }; const struct file_operations scoutfs_file_fops = { +#ifdef KC_LINUX_HAVE_FOP_AIO_READ .read = do_sync_read, .write = do_sync_write, .aio_read = scoutfs_file_aio_read, .aio_write = scoutfs_file_aio_write, +#else + .read_iter = scoutfs_file_read_iter, + .write_iter = scoutfs_file_write_iter, +#endif .unlocked_ioctl = scoutfs_ioctl, .fsync = scoutfs_file_fsync, .llseek = scoutfs_file_llseek, diff --git a/kmod/src/data.h b/kmod/src/data.h index a34854eb..1fbfce9b 100644 --- a/kmod/src/data.h +++ b/kmod/src/data.h @@ -65,6 +65,9 @@ int scoutfs_data_wait_check_iov(struct inode *inode, const struct iovec *iov, unsigned long nr_segs, loff_t pos, u8 sef, u8 op, struct scoutfs_data_wait *ow, struct scoutfs_lock *lock); +int scoutfs_data_wait_check_iter(struct inode *inode, loff_t pos, struct iov_iter *iter, + u8 sef, u8 op, struct scoutfs_data_wait *ow, + struct scoutfs_lock *lock); bool scoutfs_data_wait_found(struct scoutfs_data_wait *ow); int scoutfs_data_wait(struct inode *inode, struct scoutfs_data_wait *ow); diff --git a/kmod/src/dir.c b/kmod/src/dir.c index f8bf50e9..357d33c6 100644 --- a/kmod/src/dir.c +++ b/kmod/src/dir.c @@ -1059,14 +1059,14 @@ static int symlink_item_ops(struct super_block *sb, enum symlink_ops op, u64 ino } /* - * Full a buffer with the null terminated symlink, point nd at it, and - * return it so put_link can free it once the vfs is done. + * Fill a buffer with the null terminated symlink, and return it + * so callers can free it once the vfs is done. * * We chose to pay the runtime cost of per-call allocation and copy * overhead instead of wiring up symlinks to the page cache, storing * each small link in a full page, and later having to reclaim them. */ -static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd) +static void *scoutfs_get_link_target(struct dentry *dentry) { struct inode *inode = dentry->d_inode; struct super_block *sb = inode->i_sb; @@ -1125,32 +1125,41 @@ out: if (ret < 0) { kfree(path); path = ERR_PTR(ret); - } else { - nd_set_link(nd, path); } + scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ); return path; } +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER +static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + char *path; + + path = scoutfs_get_link_target(dentry); + if (!IS_ERR_OR_NULL(path)) + nd_set_link(nd, path); + return path; +} + static void scoutfs_put_link(struct dentry *dentry, struct nameidata *nd, void *cookie) { if (!IS_ERR_OR_NULL(cookie)) kfree(cookie); } +#else +static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done) +{ + char *path; -const struct inode_operations scoutfs_symlink_iops = { - .readlink = generic_readlink, - .follow_link = scoutfs_follow_link, - .put_link = scoutfs_put_link, - .getattr = scoutfs_getattr, - .setattr = scoutfs_setattr, - .setxattr = generic_setxattr, - .getxattr = generic_getxattr, - .listxattr = scoutfs_listxattr, - .removexattr = generic_removexattr, - .get_acl = scoutfs_get_acl, -}; + path = scoutfs_get_link_target(dentry); + if (!IS_ERR_OR_NULL(path)) + set_delayed_call(done, kfree_link, path); + + return path; +} +#endif /* * Symlink target paths can be annoyingly large. We store relatively @@ -1811,12 +1820,14 @@ out_unlock: return ret; } +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER static int scoutfs_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) { return scoutfs_rename_common(old_dir, old_dentry, new_dir, new_dentry, 0); } +#endif static int scoutfs_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, @@ -1886,6 +1897,37 @@ out: return ret; } +const struct inode_operations scoutfs_symlink_iops = { +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER + .readlink = generic_readlink, + .follow_link = scoutfs_follow_link, + .put_link = scoutfs_put_link, +#else + .get_link = scoutfs_get_link, +#endif + .getattr = scoutfs_getattr, + .setattr = scoutfs_setattr, +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, +#endif + .listxattr = scoutfs_listxattr, +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER + .removexattr = generic_removexattr, +#endif + .get_acl = scoutfs_get_acl, +#ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER + .tmpfile = scoutfs_tmpfile, + .rename = scoutfs_rename_common, + .symlink = scoutfs_symlink, + .unlink = scoutfs_unlink, + .link = scoutfs_link, + .mkdir = scoutfs_mkdir, + .create = scoutfs_create, + .lookup = scoutfs_lookup, +#endif +}; + const struct file_operations scoutfs_dir_fops = { .KC_FOP_READDIR = scoutfs_readdir, #ifdef KC_FMODE_KABI_ITERATE @@ -1897,9 +1939,12 @@ const struct file_operations scoutfs_dir_fops = { }; - +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER const struct inode_operations_wrapper scoutfs_dir_iops = { .ops = { +#else +const struct inode_operations scoutfs_dir_iops = { +#endif .lookup = scoutfs_lookup, .mknod = scoutfs_mknod, .create = scoutfs_create, @@ -1907,17 +1952,25 @@ const struct inode_operations_wrapper scoutfs_dir_iops = { .link = scoutfs_link, .unlink = scoutfs_unlink, .rmdir = scoutfs_unlink, - .rename = scoutfs_rename, .getattr = scoutfs_getattr, .setattr = scoutfs_setattr, +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER + .rename = scoutfs_rename, .setxattr = generic_setxattr, .getxattr = generic_getxattr, - .listxattr = scoutfs_listxattr, .removexattr = generic_removexattr, +#endif + .listxattr = scoutfs_listxattr, .get_acl = scoutfs_get_acl, .symlink = scoutfs_symlink, .permission = scoutfs_permission, +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER }, +#endif .tmpfile = scoutfs_tmpfile, +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER .rename2 = scoutfs_rename2, +#else + .rename = scoutfs_rename2, +#endif }; diff --git a/kmod/src/dir.h b/kmod/src/dir.h index 9bd1f193..9985b7c3 100644 --- a/kmod/src/dir.h +++ b/kmod/src/dir.h @@ -5,7 +5,11 @@ #include "lock.h" extern const struct file_operations scoutfs_dir_fops; +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER extern const struct inode_operations_wrapper scoutfs_dir_iops; +#else +extern const struct inode_operations scoutfs_dir_iops; +#endif extern const struct inode_operations scoutfs_symlink_iops; extern const struct dentry_operations scoutfs_dentry_ops; diff --git a/kmod/src/file.c b/kmod/src/file.c index 08058592..fd31a9d0 100644 --- a/kmod/src/file.c +++ b/kmod/src/file.c @@ -29,6 +29,7 @@ #include "per_task.h" #include "omap.h" +#ifdef KC_LINUX_HAVE_FOP_AIO_READ /* * Start a high level file read. We check for offline extents in the * read region here so that we only check the extents once. We use the @@ -146,6 +147,113 @@ out: return ret; } +#else +ssize_t scoutfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct scoutfs_inode_info *si = SCOUTFS_I(inode); + struct super_block *sb = inode->i_sb; + struct scoutfs_lock *scoutfs_inode_lock = NULL; + SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent); + DECLARE_DATA_WAIT(dw); + int ret; + +retry: + /* protect checked extents from release */ + inode_lock(inode); + atomic_inc(&inode->i_dio_count); + inode_unlock(inode); + + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, + SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock); + if (ret) + goto out; + + if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) { + ret = scoutfs_data_wait_check_iter(inode, iocb->ki_pos, to, + SEF_OFFLINE, + SCOUTFS_IOC_DWO_READ, + &dw, scoutfs_inode_lock); + if (ret != 0) + goto out; + } else { + WARN_ON_ONCE(true); + } + + ret = generic_file_read_iter(iocb, to); + +out: + inode_dio_end(inode); + scoutfs_per_task_del(&si->pt_data_lock, &pt_ent); + scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_READ); + + if (scoutfs_data_wait_found(&dw)) { + ret = scoutfs_data_wait(inode, &dw); + if (ret == 0) + goto retry; + } + return ret; +} + +ssize_t scoutfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from) +{ + struct file *file = iocb->ki_filp; + struct inode *inode = file_inode(file); + struct scoutfs_inode_info *si = SCOUTFS_I(inode); + struct super_block *sb = inode->i_sb; + struct scoutfs_lock *scoutfs_inode_lock = NULL; + SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent); + DECLARE_DATA_WAIT(dw); + int ret; + int written; + +retry: + inode_lock(inode); + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE, + SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock); + if (ret) + goto out; + + ret = generic_write_checks(iocb, from); + if (ret <= 0) + goto out; + + ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock); + if (ret) + goto out; + + if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) { + /* data_version is per inode, whole file must be online */ + ret = scoutfs_data_wait_check_iter(inode, iocb->ki_pos, from, + SEF_OFFLINE, + SCOUTFS_IOC_DWO_WRITE, + &dw, scoutfs_inode_lock); + if (ret != 0) + goto out; + } + + /* XXX: remove SUID bit */ + + written = __generic_file_write_iter(iocb, from); + +out: + scoutfs_per_task_del(&si->pt_data_lock, &pt_ent); + scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_WRITE); + inode_unlock(inode); + + if (scoutfs_data_wait_found(&dw)) { + ret = scoutfs_data_wait(inode, &dw); + if (ret == 0) + goto retry; + } + + if (ret > 0 || ret == -EIOCBQUEUED) + ret = generic_write_sync(iocb, written); + + return written ? written : ret; +} +#endif int scoutfs_permission(struct inode *inode, int mask) { diff --git a/kmod/src/file.h b/kmod/src/file.h index 82d86618..82829ef5 100644 --- a/kmod/src/file.h +++ b/kmod/src/file.h @@ -1,10 +1,15 @@ #ifndef _SCOUTFS_FILE_H_ #define _SCOUTFS_FILE_H_ +#ifdef KC_LINUX_HAVE_FOP_AIO_READ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, unsigned long nr_segs, loff_t pos); +#else +ssize_t scoutfs_file_read_iter(struct kiocb *, struct iov_iter *); +ssize_t scoutfs_file_write_iter(struct kiocb *, struct iov_iter *); +#endif int scoutfs_permission(struct inode *inode, int mask); loff_t scoutfs_file_llseek(struct file *file, loff_t offset, int whence); diff --git a/kmod/src/inode.c b/kmod/src/inode.c index 54c9850d..540004cc 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -143,10 +143,12 @@ void scoutfs_destroy_inode(struct inode *inode) static const struct inode_operations scoutfs_file_iops = { .getattr = scoutfs_getattr, .setattr = scoutfs_setattr, +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER .setxattr = generic_setxattr, .getxattr = generic_getxattr, - .listxattr = scoutfs_listxattr, .removexattr = generic_removexattr, +#endif + .listxattr = scoutfs_listxattr, .get_acl = scoutfs_get_acl, .fiemap = scoutfs_data_fiemap, }; @@ -154,10 +156,12 @@ static const struct inode_operations scoutfs_file_iops = { static const struct inode_operations scoutfs_special_iops = { .getattr = scoutfs_getattr, .setattr = scoutfs_setattr, +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER .setxattr = generic_setxattr, .getxattr = generic_getxattr, - .listxattr = scoutfs_listxattr, .removexattr = generic_removexattr, +#endif + .listxattr = scoutfs_listxattr, .get_acl = scoutfs_get_acl, }; @@ -174,8 +178,12 @@ static void set_inode_ops(struct inode *inode) inode->i_fop = &scoutfs_file_fops; break; case S_IFDIR: +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER inode->i_op = &scoutfs_dir_iops.ops; inode->i_flags |= S_IOPS_WRAPPER; +#else + inode->i_op = &scoutfs_dir_iops; +#endif inode->i_fop = &scoutfs_dir_fops; break; case S_IFLNK: @@ -340,10 +348,17 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock) return ret; } +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) { struct inode *inode = dentry->d_inode; +#else +int scoutfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) +{ + struct inode *inode = d_inode(path->dentry); +#endif struct super_block *sb = inode->i_sb; struct scoutfs_lock *lock = NULL; int ret; diff --git a/kmod/src/inode.h b/kmod/src/inode.h index 86e1b9fb..607ac9f8 100644 --- a/kmod/src/inode.h +++ b/kmod/src/inode.h @@ -123,8 +123,13 @@ void scoutfs_inode_get_onoff(struct inode *inode, s64 *on, s64 *off); int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock); int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock); +#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat); +#else +int scoutfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags); +#endif int scoutfs_setattr(struct dentry *dentry, struct iattr *attr); int scoutfs_inode_orphan_create(struct super_block *sb, u64 ino, struct scoutfs_lock *lock, diff --git a/kmod/src/ioctl.c b/kmod/src/ioctl.c index fb8c8c46..6bf70403 100644 --- a/kmod/src/ioctl.c +++ b/kmod/src/ioctl.c @@ -480,8 +480,10 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg) /* the iocb is really only used for the file pointer :P */ init_sync_kiocb(&kiocb, file); kiocb.ki_pos = args.offset; +#ifdef KC_LINUX_AIO_KI_LEFT kiocb.ki_left = args.length; kiocb.ki_nbytes = args.length; +#endif iov.iov_base = (void __user *)(unsigned long)args.buf_ptr; iov.iov_len = args.length;