diff --git a/kmod/src/Makefile b/kmod/src/Makefile index 975ec319..d96c4967 100644 --- a/kmod/src/Makefile +++ b/kmod/src/Makefile @@ -8,6 +8,7 @@ CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include -include $(src)/Makefile.kernelcompat scoutfs-y += \ + acl.o \ avl.o \ alloc.o \ block.o \ diff --git a/kmod/src/Makefile.kernelcompat b/kmod/src/Makefile.kernelcompat index bd236c43..d7995cc5 100644 --- a/kmod/src/Makefile.kernelcompat +++ b/kmod/src/Makefile.kernelcompat @@ -34,3 +34,12 @@ endif ifneq (,$(shell grep 'FMODE_KABI_ITERATE' include/linux/fs.h)) ccflags-y += -DKC_FMODE_KABI_ITERATE endif + +# +# v4.7-rc2-23-g0d4d717f2583 +# +# Added user_ns argument to posix_acl_valid +# +ifneq (,$(shell grep 'posix_acl_valid.*user_ns,' include/linux/posix_acl.h)) +ccflags-y += -DKC_POSIX_ACL_VALID_USER_NS +endif diff --git a/kmod/src/acl.c b/kmod/src/acl.c new file mode 100644 index 00000000..11a83266 --- /dev/null +++ b/kmod/src/acl.c @@ -0,0 +1,355 @@ +/* + * Copyright (C) 2022 Versity Software, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include +#include + +#include "format.h" +#include "super.h" +#include "scoutfs_trace.h" +#include "xattr.h" +#include "acl.h" +#include "inode.h" +#include "trans.h" + +/* + * POSIX draft ACLs are stored as full xattr items with the entries + * encoded as the kernel's posix_acl_xattr_{header,entry} value structs. + * + * They're accessed and modified via user facing synthetic xattrs, iops + * calls from the kernel, during inode mode changes, and during inode + * creation. + * + * ACL access devolves into xattr access which is relatively expensive + * so we maintain the cached native form in the vfs inode. We drop the + * cache in lock invalidation which means that cached acl access must + * always be performed under cluster locking. + */ + +static int acl_xattr_name_len(int type, char **name, size_t *name_len) +{ + int ret = 0; + + switch (type) { + case ACL_TYPE_ACCESS: + *name = XATTR_NAME_POSIX_ACL_ACCESS; + if (name_len) + *name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1; + break; + case ACL_TYPE_DEFAULT: + *name = XATTR_NAME_POSIX_ACL_DEFAULT; + if (name_len) + *name_len = sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock) +{ + struct posix_acl *acl; + char *value = NULL; + char *name; + int ret; + + if (!IS_POSIXACL(inode)) + return NULL; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + ret = acl_xattr_name_len(type, &name, NULL); + if (ret < 0) + return ERR_PTR(ret); + + ret = scoutfs_xattr_get_locked(inode, name, NULL, 0, lock); + if (ret > 0) { + value = kzalloc(ret, GFP_NOFS); + if (!value) + ret = -ENOMEM; + else + ret = scoutfs_xattr_get_locked(inode, name, value, ret, lock); + } + if (ret > 0) { + acl = posix_acl_from_xattr(&init_user_ns, value, ret); + } else if (ret == -ENODATA || ret == 0) { + acl = NULL; + } else { + acl = ERR_PTR(ret); + } + + /* can set null negative cache */ + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); + + kfree(value); + + return acl; +} + +struct posix_acl *scoutfs_get_acl(struct inode *inode, int type) +{ + struct super_block *sb = inode->i_sb; + struct scoutfs_lock *lock = NULL; + struct posix_acl *acl; + int ret; + + if (!IS_POSIXACL(inode)) + return NULL; + + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock); + if (ret < 0) { + acl = ERR_PTR(ret); + } else { + acl = scoutfs_get_acl_locked(inode, type, lock); + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); + } + + return acl; +} + +/* + * The caller has acquired the locks and dirtied the inode, they'll + * update the inode item if we return 0. + */ +int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type, + struct scoutfs_lock *lock, struct list_head *ind_locks) +{ + static const struct scoutfs_xattr_prefix_tags tgs = {0,}; /* never scoutfs. prefix */ + bool set_mode = false; + char *value = NULL; + umode_t new_mode; + size_t name_len; + char *name; + int size = 0; + int ret; + + ret = acl_xattr_name_len(type, &name, &name_len); + if (ret < 0) + return ret; + + switch (type) { + case ACL_TYPE_ACCESS: + if (acl) { + ret = posix_acl_update_mode(inode, &new_mode, &acl); + if (ret < 0) + goto out; + set_mode = true; + } + break; + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) { + ret = acl ? -EINVAL : 0; + goto out; + } + break; + } + + if (acl) { + size = posix_acl_xattr_size(acl->a_count); + value = kmalloc(size, GFP_NOFS); + if (!value) { + ret = -ENOMEM; + goto out; + } + + ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); + if (ret < 0) + goto out; + } + + ret = scoutfs_xattr_set_locked(inode, name, name_len, value, size, 0, &tgs, + lock, NULL, ind_locks); + if (ret == 0 && set_mode) { + inode->i_mode = new_mode; + if (!value) { + /* can be setting an acl that only affects mode, didn't need xattr */ + inode_inc_iversion(inode); + inode->i_ctime = CURRENT_TIME; + } + } + +out: + if (!ret) + set_cached_acl(inode, type, acl); + + kfree(value); + + return ret; +} + +int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + struct super_block *sb = inode->i_sb; + struct scoutfs_lock *lock = NULL; + LIST_HEAD(ind_locks); + int ret; + + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE, SCOUTFS_LKF_REFRESH_INODE, inode, &lock) ?: + scoutfs_inode_index_lock_hold(inode, &ind_locks, false, true); + if (ret == 0) { + ret = scoutfs_dirty_inode_item(inode, lock) ?: + scoutfs_set_acl_locked(inode, acl, type, lock, &ind_locks); + if (ret == 0) + scoutfs_update_inode_item(inode, lock, &ind_locks); + + scoutfs_release_trans(sb); + scoutfs_inode_index_unlock(sb, &ind_locks); + } + + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE); + return ret; +} + +int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size, + int type) +{ + struct posix_acl *acl; + int ret = 0; + + if (!IS_POSIXACL(dentry->d_inode)) + return -EOPNOTSUPP; + + acl = scoutfs_get_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + + ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); + posix_acl_release(acl); + + return ret; +} + +int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size, + int flags, int type) +{ + struct posix_acl *acl = NULL; + int ret; + + if (!inode_owner_or_capable(dentry->d_inode)) + return -EPERM; + + if (!IS_POSIXACL(dentry->d_inode)) + return -EOPNOTSUPP; + + if (value) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (acl) { + ret = kc_posix_acl_valid(&init_user_ns, acl); + if (ret) + goto out; + } + } + + ret = scoutfs_set_acl(dentry->d_inode, acl, type); +out: + posix_acl_release(acl); + + return ret; +} + +/* + * Apply the parent's default acl to new inodes access acl and inherit + * it as the default for new directories. The caller holds locks and a + * transaction. + */ +int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir, + struct scoutfs_lock *lock, struct scoutfs_lock *dir_lock, + struct list_head *ind_locks) +{ + struct posix_acl *acl = NULL; + int ret = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (IS_POSIXACL(dir)) { + acl = scoutfs_get_acl_locked(dir, ACL_TYPE_DEFAULT, dir_lock); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + + if (!acl) + inode->i_mode &= ~current_umask(); + } + + if (IS_POSIXACL(dir) && acl) { + if (S_ISDIR(inode->i_mode)) { + ret = scoutfs_set_acl_locked(inode, acl, ACL_TYPE_DEFAULT, + lock, ind_locks); + if (ret) + goto out; + } + ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + if (ret < 0) + return ret; + if (ret > 0) + ret = scoutfs_set_acl_locked(inode, acl, ACL_TYPE_ACCESS, + lock, ind_locks); + } else { + cache_no_acl(inode); + } +out: + posix_acl_release(acl); + return ret; +} + +/* + * Update the access ACL based on a newly set mode. If we return an + * error then the xattr wasn't changed. + * + * Annoyingly, setattr_copy has logic that transforms the final set mode + * that we want to use to update the acl. But we don't want to modify + * the other inode fields while discovering the resulting mode. We're + * relying on acl_chmod not caring about the transformation (currently + * just clears sgid). It would be better if we could get the resulting + * mode to give to acl_chmod without modifying the other inode fields. + * + * The caller has the inode mutex, a cluster lock, transaction, and will + * update the inode item if we return success. + */ +int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr, + struct scoutfs_lock *lock, struct list_head *ind_locks) +{ + struct posix_acl *acl; + int ret = 0; + + if (!IS_POSIXACL(inode) || !(attr->ia_valid & ATTR_MODE)) + return 0; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + acl = scoutfs_get_acl_locked(inode, ACL_TYPE_ACCESS, lock); + if (IS_ERR_OR_NULL(acl)) + return PTR_ERR(acl); + + ret = posix_acl_chmod(&acl, GFP_KERNEL, attr->ia_mode); + if (ret) + return ret; + + ret = scoutfs_set_acl_locked(inode, acl, ACL_TYPE_ACCESS, lock, ind_locks); + posix_acl_release(acl); + return ret; +} diff --git a/kmod/src/acl.h b/kmod/src/acl.h new file mode 100644 index 00000000..a9235eb5 --- /dev/null +++ b/kmod/src/acl.h @@ -0,0 +1,18 @@ +#ifndef _SCOUTFS_ACL_H_ +#define _SCOUTFS_ACL_H_ + +struct posix_acl *scoutfs_get_acl(struct inode *inode, int type); +struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock); +int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type, + struct scoutfs_lock *lock, struct list_head *ind_locks); +int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size, + int type); +int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size, + int flags, int type); +int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr, + struct scoutfs_lock *lock, struct list_head *ind_locks); +int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir, + struct scoutfs_lock *lock, struct scoutfs_lock *dir_lock, + struct list_head *ind_locks); +#endif diff --git a/kmod/src/dir.c b/kmod/src/dir.c index 00734909..e4b83256 100644 --- a/kmod/src/dir.c +++ b/kmod/src/dir.c @@ -32,6 +32,7 @@ #include "hash.h" #include "omap.h" #include "forest.h" +#include "acl.h" #include "counters.h" #include "scoutfs_trace.h" @@ -765,7 +766,8 @@ retry: if (ret) goto out_unlock; - ret = scoutfs_new_inode(sb, dir, mode, rdev, ino, *inode_lock, &inode); + ret = scoutfs_new_inode(sb, dir, mode, rdev, ino, *inode_lock, &inode) ?: + scoutfs_init_acl_locked(inode, dir, *inode_lock, *dir_lock, ind_locks); if (ret < 0) goto out; @@ -1242,10 +1244,11 @@ const struct inode_operations scoutfs_symlink_iops = { .put_link = scoutfs_put_link, .getattr = scoutfs_getattr, .setattr = scoutfs_setattr, - .setxattr = scoutfs_setxattr, - .getxattr = scoutfs_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, .listxattr = scoutfs_listxattr, - .removexattr = scoutfs_removexattr, + .removexattr = generic_removexattr, + .get_acl = scoutfs_get_acl, }; /* @@ -1978,10 +1981,11 @@ const struct inode_operations_wrapper scoutfs_dir_iops = { .rename = scoutfs_rename, .getattr = scoutfs_getattr, .setattr = scoutfs_setattr, - .setxattr = scoutfs_setxattr, - .getxattr = scoutfs_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, .listxattr = scoutfs_listxattr, - .removexattr = scoutfs_removexattr, + .removexattr = generic_removexattr, + .get_acl = scoutfs_get_acl, .symlink = scoutfs_symlink, .permission = scoutfs_permission, }, diff --git a/kmod/src/inode.c b/kmod/src/inode.c index 62ef3093..02456fde 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -36,6 +36,7 @@ #include "omap.h" #include "forest.h" #include "btree.h" +#include "acl.h" /* * XXX @@ -136,20 +137,22 @@ void scoutfs_destroy_inode(struct inode *inode) static const struct inode_operations scoutfs_file_iops = { .getattr = scoutfs_getattr, .setattr = scoutfs_setattr, - .setxattr = scoutfs_setxattr, - .getxattr = scoutfs_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, .listxattr = scoutfs_listxattr, - .removexattr = scoutfs_removexattr, + .removexattr = generic_removexattr, + .get_acl = scoutfs_get_acl, .fiemap = scoutfs_data_fiemap, }; static const struct inode_operations scoutfs_special_iops = { .getattr = scoutfs_getattr, .setattr = scoutfs_setattr, - .setxattr = scoutfs_setxattr, - .getxattr = scoutfs_getxattr, + .setxattr = generic_setxattr, + .getxattr = generic_getxattr, .listxattr = scoutfs_listxattr, - .removexattr = scoutfs_removexattr, + .removexattr = generic_removexattr, + .get_acl = scoutfs_get_acl, }; /* @@ -507,10 +510,15 @@ retry: if (ret) goto out; + ret = scoutfs_acl_chmod_locked(inode, attr, lock, &ind_locks); + if (ret < 0) + goto release; + setattr_copy(inode, attr); inode_inc_iversion(inode); scoutfs_update_inode_item(inode, lock, &ind_locks); +release: scoutfs_release_trans(sb); scoutfs_inode_index_unlock(sb, &ind_locks); out: diff --git a/kmod/src/kernelcompat.h b/kmod/src/kernelcompat.h index 6899f684..12e16a10 100644 --- a/kmod/src/kernelcompat.h +++ b/kmod/src/kernelcompat.h @@ -46,4 +46,10 @@ static inline int dir_emit_dots(struct file *file, void *dirent, } #endif +#ifdef KC_POSIX_ACL_VALID_USER_NS +#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(user_ns, acl) +#else +#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(acl) +#endif + #endif diff --git a/kmod/src/lock.c b/kmod/src/lock.c index 9275e3a4..db682063 100644 --- a/kmod/src/lock.c +++ b/kmod/src/lock.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "super.h" #include "lock.h" @@ -156,6 +157,8 @@ static void invalidate_inode(struct super_block *sb, u64 ino) if (!linfo->unmounting) d_prune_aliases(inode); + forget_all_cached_acls(inode); + si->drop_invalidated = true; if (scoutfs_lock_is_covered(sb, &si->ino_lock_cov) && inode->i_nlink > 0) { iput(inode); diff --git a/kmod/src/options.c b/kmod/src/options.c index a447a931..474d2b0d 100644 --- a/kmod/src/options.c +++ b/kmod/src/options.c @@ -29,14 +29,18 @@ #include "inode.h" enum { + Opt_acl, Opt_metadev_path, + Opt_noacl, Opt_orphan_scan_delay_ms, Opt_quorum_slot_nr, Opt_err, }; static const match_table_t tokens = { + {Opt_acl, "acl"}, {Opt_metadev_path, "metadev_path=%s"}, + {Opt_noacl, "noacl"}, {Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"}, {Opt_quorum_slot_nr, "quorum_slot_nr=%s"}, {Opt_err, NULL} @@ -110,7 +114,7 @@ static void init_default_options(struct scoutfs_mount_options *opts) { memset(opts, 0, sizeof(*opts)); opts->quorum_slot_nr = -1; - opts->orphan_scan_delay_ms = DEFAULT_ORPHAN_SCAN_DELAY_MS; + opts->orphan_scan_delay_ms = -1; } /* @@ -134,12 +138,20 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m token = match_token(p, tokens, args); switch (token) { + case Opt_acl: + sb->s_flags |= MS_POSIXACL; + break; + case Opt_metadev_path: ret = parse_bdev_path(sb, &args[0], &opts->metadev_path); if (ret < 0) return ret; break; + case Opt_noacl: + sb->s_flags &= ~MS_POSIXACL; + break; + case Opt_orphan_scan_delay_ms: if (opts->orphan_scan_delay_ms != -1) { scoutfs_err(sb, "multiple orphan_scan_delay_ms options provided, only provide one."); @@ -181,6 +193,9 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m } } + if (opts->orphan_scan_delay_ms == -1) + opts->orphan_scan_delay_ms = DEFAULT_ORPHAN_SCAN_DELAY_MS; + if (!opts->metadev_path) { scoutfs_err(sb, "Required mount option \"metadev_path\" not found"); return -EINVAL; @@ -250,10 +265,15 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root) { struct super_block *sb = root->d_sb; struct scoutfs_mount_options opts; + const bool is_acl = !!(sb->s_flags & MS_POSIXACL); scoutfs_options_read(sb, &opts); + if (is_acl) + seq_puts(seq, ",acl"); seq_printf(seq, ",metadev_path=%s", opts.metadev_path); + if (!is_acl) + seq_puts(seq, ",noacl"); seq_printf(seq, ",orphan_scan_delay_ms=%u", opts.orphan_scan_delay_ms); if (opts.quorum_slot_nr >= 0) seq_printf(seq, ",quorum_slot_nr=%d", opts.quorum_slot_nr); diff --git a/kmod/src/super.c b/kmod/src/super.c index d38fcb65..0e7fb3e8 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -47,6 +47,7 @@ #include "omap.h" #include "volopt.h" #include "fence.h" +#include "xattr.h" #include "scoutfs_trace.h" static struct dentry *scoutfs_debugfs_root; @@ -483,7 +484,8 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_op = &scoutfs_super_ops; sb->s_export_op = &scoutfs_export_ops; - sb->s_flags |= MS_I_VERSION; + sb->s_xattr = scoutfs_xattr_handlers; + sb->s_flags |= MS_I_VERSION | MS_POSIXACL; /* btree blocks use long lived bh->b_data refs */ mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS); diff --git a/kmod/src/xattr.c b/kmod/src/xattr.c index a87b69c2..abdf8e0b 100644 --- a/kmod/src/xattr.c +++ b/kmod/src/xattr.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "format.h" #include "inode.h" @@ -26,6 +27,7 @@ #include "xattr.h" #include "lock.h" #include "hash.h" +#include "acl.h" #include "scoutfs_trace.h" /* @@ -79,16 +81,6 @@ static void init_xattr_key(struct scoutfs_key *key, u64 ino, u32 name_hash, #define SCOUTFS_XATTR_PREFIX "scoutfs." #define SCOUTFS_XATTR_PREFIX_LEN (sizeof(SCOUTFS_XATTR_PREFIX) - 1) -static int unknown_prefix(const char *name) -{ - return strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) && - strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) && - strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) && - strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)&& - strncmp(name, SCOUTFS_XATTR_PREFIX, SCOUTFS_XATTR_PREFIX_LEN); -} - - #define HIDE_TAG "hide." #define SRCH_TAG "srch." #define TOTL_TAG "totl." @@ -455,22 +447,17 @@ out: * Copy the value for the given xattr name into the caller's buffer, if it * fits. Return the bytes copied or -ERANGE if it doesn't fit. */ -ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer, - size_t size) +int scoutfs_xattr_get_locked(struct inode *inode, const char *name, void *buffer, size_t size, + struct scoutfs_lock *lck) { - struct inode *inode = dentry->d_inode; struct scoutfs_inode_info *si = SCOUTFS_I(inode); struct super_block *sb = inode->i_sb; struct scoutfs_xattr *xat = NULL; - struct scoutfs_lock *lck = NULL; struct scoutfs_key key; unsigned int xat_bytes; size_t name_len; int ret; - if (unknown_prefix(name)) - return -EOPNOTSUPP; - name_len = strlen(name); if (name_len > SCOUTFS_XATTR_MAX_NAME_LEN) return -ENODATA; @@ -480,10 +467,6 @@ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer, if (!xat) return -ENOMEM; - ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lck); - if (ret) - goto out; - down_read(&si->xattr_rwsem); ret = get_next_xattr(inode, &key, xat, xat_bytes, name, name_len, 0, 0, lck); @@ -509,12 +492,27 @@ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer, ret = copy_xattr_value(sb, &key, xat, xat_bytes, buffer, size, lck); unlock: up_read(&si->xattr_rwsem); - scoutfs_unlock(sb, lck, SCOUTFS_LOCK_READ); -out: + kfree(xat); return ret; } +static int scoutfs_xattr_get(struct dentry *dentry, const char *name, void *buffer, size_t size) +{ + struct inode *inode = dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct scoutfs_lock *lock = NULL; + int ret; + + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock); + if (ret == 0) { + ret = scoutfs_xattr_get_locked(inode, name, buffer, size, lock); + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); + } + + return ret; +} + void scoutfs_xattr_init_totl_key(struct scoutfs_key *key, u64 *name) { scoutfs_key_set_zeros(key); @@ -619,30 +617,32 @@ int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len) * cause creation to fail if the xattr already exists (_CREATE) or * doesn't already exist (_REPLACE). xattrs can have a zero length * value. + * + * The caller has acquired cluster locks, holds a transaction, and has + * dirtied the inode item so that they can update it after we modify it. + * The caller has to know the tags to acquire cluster locks before + * holding the transaction so they pass in the parsed tags, or all 0s for + * non scoutfs. prefixes. */ -static int scoutfs_xattr_set(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_len, + const void *value, size_t size, int flags, + const struct scoutfs_xattr_prefix_tags *tgs, + struct scoutfs_lock *lck, struct scoutfs_lock *totl_lock, + struct list_head *ind_locks) { - struct inode *inode = dentry->d_inode; struct scoutfs_inode_info *si = SCOUTFS_I(inode); struct super_block *sb = inode->i_sb; const u64 ino = scoutfs_ino(inode); struct scoutfs_xattr_totl_val tval = {0,}; - struct scoutfs_xattr_prefix_tags tgs; struct scoutfs_xattr *xat = NULL; - struct scoutfs_lock *lck = NULL; - struct scoutfs_lock *totl_lock = NULL; - size_t name_len = strlen(name); struct scoutfs_key totl_key; struct scoutfs_key key; bool undo_srch = false; bool undo_totl = false; - LIST_HEAD(ind_locks); u8 found_parts; unsigned int xat_bytes_totl; unsigned int xat_bytes; unsigned int val_len; - u64 ind_seq; u64 total; u64 hash = 0; u64 id = 0; @@ -651,6 +651,9 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name, trace_scoutfs_xattr_set(sb, name_len, value, size, flags); + if (WARN_ON_ONCE(tgs->totl && !totl_lock)) + return -EINVAL; + /* mirror the syscall's errors for large names and values */ if (name_len > SCOUTFS_XATTR_MAX_NAME_LEN) return -ERANGE; @@ -661,16 +664,10 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name, (flags & ~(XATTR_CREATE | XATTR_REPLACE))) return -EINVAL; - if (unknown_prefix(name)) - return -EOPNOTSUPP; - - if (scoutfs_xattr_parse_tags(name, name_len, &tgs) != 0) - return -EINVAL; - - if ((tgs.hide | tgs.srch | tgs.totl) && !capable(CAP_SYS_ADMIN)) + if ((tgs->hide | tgs->srch | tgs->totl) && !capable(CAP_SYS_ADMIN)) return -EPERM; - if (tgs.totl && ((ret = parse_totl_key(&totl_key, name, name_len)) != 0)) + if (tgs->totl && ((ret = parse_totl_key(&totl_key, name, name_len)) != 0)) return ret; /* allocate enough to always read an existing xattr's totl */ @@ -679,51 +676,44 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name, /* but store partial first item that only includes the new xattr's value */ xat_bytes = first_item_bytes(name_len, size); xat = kmalloc(xat_bytes_totl, GFP_NOFS); - if (!xat) { - ret = -ENOMEM; - goto out; - } - - ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE, - SCOUTFS_LKF_REFRESH_INODE, inode, &lck); - if (ret) - goto out; + if (!xat) + return -ENOMEM; down_write(&si->xattr_rwsem); /* find an existing xattr to delete, including possible totl value */ ret = get_next_xattr(inode, &key, xat, xat_bytes_totl, name, name_len, 0, 0, lck); if (ret < 0 && ret != -ENOENT) - goto unlock; + goto out; /* check existence constraint flags */ if (ret == -ENOENT && (flags & XATTR_REPLACE)) { ret = -ENODATA; - goto unlock; + goto out; } else if (ret >= 0 && (flags & XATTR_CREATE)) { ret = -EEXIST; - goto unlock; + goto out; } /* not an error to delete something that doesn't exist */ if (ret == -ENOENT && !value) { ret = 0; - goto unlock; + goto out; } /* s64 count delta if we create or delete */ - if (tgs.totl) + if (tgs->totl) tval.count = cpu_to_le64((u64)!!(value) - (u64)!!(ret != -ENOENT)); /* found fields in key will also be used */ found_parts = ret >= 0 ? xattr_nr_parts(xat) : 0; - if (found_parts && tgs.totl) { + if (found_parts && tgs->totl) { /* parse old totl value before we clobber xat buf */ val_len = ret - offsetof(struct scoutfs_xattr, name[xat->name_len]); ret = parse_totl_u64(&xat->name[xat->name_len], val_len, &total); if (ret < 0) - goto unlock; + goto out; le64_add_cpu(&tval.total, -total); } @@ -742,15 +732,90 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name, min(size, SCOUTFS_XATTR_MAX_PART_SIZE - offsetof(struct scoutfs_xattr, name[name_len]))); - if (tgs.totl) { + if (tgs->totl) { ret = parse_totl_u64(value, size, &total); if (ret < 0) - goto unlock; + goto out; } le64_add_cpu(&tval.total, total); } + if (tgs->srch && !(found_parts && value)) { + if (found_parts) + id = le64_to_cpu(key.skx_id); + hash = scoutfs_hash64(name, name_len); + ret = scoutfs_forest_srch_add(sb, hash, ino, id); + if (ret < 0) + goto out; + undo_srch = true; + } + + if (tgs->totl) { + ret = apply_totl_delta(sb, &totl_key, &tval, totl_lock); + if (ret < 0) + goto out; + undo_totl = true; + } + + if (found_parts && value) + ret = change_xattr_items(inode, id, xat, xat_bytes, value, size, + xattr_nr_parts(xat), found_parts, lck); + else if (found_parts) + ret = delete_xattr_items(inode, le64_to_cpu(key.skx_name_hash), + le64_to_cpu(key.skx_id), found_parts, + lck); + else + ret = create_xattr_items(inode, id, xat, xat_bytes, value, size, + xattr_nr_parts(xat), lck); + if (ret < 0) + goto out; + + /* XXX do these want i_mutex or anything? */ + inode_inc_iversion(inode); + inode->i_ctime = CURRENT_TIME; + ret = 0; + +out: + if (ret < 0 && undo_srch) { + err = scoutfs_forest_srch_add(sb, hash, ino, id); + BUG_ON(err); + } + if (ret < 0 && undo_totl) { + /* _delta() on dirty items shouldn't fail */ + tval.total = cpu_to_le64(-le64_to_cpu(tval.total)); + tval.count = cpu_to_le64(-le64_to_cpu(tval.count)); + err = apply_totl_delta(sb, &totl_key, &tval, totl_lock); + BUG_ON(err); + } + + up_write(&si->xattr_rwsem); + kfree(xat); + + return ret; +} + +static int scoutfs_xattr_set(struct dentry *dentry, const char *name, const void *value, + size_t size, int flags) +{ + struct inode *inode = dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct scoutfs_xattr_prefix_tags tgs; + struct scoutfs_lock *totl_lock = NULL; + struct scoutfs_lock *lck = NULL; + size_t name_len = strlen(name); + LIST_HEAD(ind_locks); + u64 ind_seq; + int ret; + + if (scoutfs_xattr_parse_tags(name, name_len, &tgs) != 0) + return -EINVAL; + + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE, + SCOUTFS_LKF_REFRESH_INODE, inode, &lck); + if (ret) + goto unlock; + if (tgs.totl) { ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &totl_lock); if (ret) @@ -770,80 +835,98 @@ retry: if (ret < 0) goto release; - if (tgs.srch && !(found_parts && value)) { - if (found_parts) - id = le64_to_cpu(key.skx_id); - hash = scoutfs_hash64(name, name_len); - ret = scoutfs_forest_srch_add(sb, hash, ino, id); - if (ret < 0) - goto release; - undo_srch = true; - } - - if (tgs.totl) { - ret = apply_totl_delta(sb, &totl_key, &tval, totl_lock); - if (ret < 0) - goto release; - undo_totl = true; - } - - if (found_parts && value) - ret = change_xattr_items(inode, id, xat, xat_bytes, value, size, - xattr_nr_parts(xat), found_parts, lck); - else if (found_parts) - ret = delete_xattr_items(inode, le64_to_cpu(key.skx_name_hash), - le64_to_cpu(key.skx_id), found_parts, - lck); - else - ret = create_xattr_items(inode, id, xat, xat_bytes, value, size, - xattr_nr_parts(xat), lck); - if (ret < 0) - goto release; - - /* XXX do these want i_mutex or anything? */ - inode_inc_iversion(inode); - inode->i_ctime = CURRENT_TIME; - scoutfs_update_inode_item(inode, lck, &ind_locks); - ret = 0; + ret = scoutfs_xattr_set_locked(dentry->d_inode, name, name_len, value, size, flags, &tgs, + lck, totl_lock, &ind_locks); + if (ret == 0) + scoutfs_update_inode_item(inode, lck, &ind_locks); release: - if (ret < 0 && undo_srch) { - err = scoutfs_forest_srch_add(sb, hash, ino, id); - BUG_ON(err); - } - if (ret < 0 && undo_totl) { - /* _delta() on dirty items shouldn't fail */ - tval.total = cpu_to_le64(-le64_to_cpu(tval.total)); - tval.count = cpu_to_le64(-le64_to_cpu(tval.count)); - err = apply_totl_delta(sb, &totl_key, &tval, totl_lock); - BUG_ON(err); - } - scoutfs_release_trans(sb); scoutfs_inode_index_unlock(sb, &ind_locks); unlock: - up_write(&si->xattr_rwsem); scoutfs_unlock(sb, lck, SCOUTFS_LOCK_WRITE); scoutfs_unlock(sb, totl_lock, SCOUTFS_LOCK_WRITE_ONLY); -out: - kfree(xat); return ret; } -int scoutfs_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags) +/* + * Future kernels have this amazing hack to rewind the name to get the + * skipped prefix. We're back in the stone ages without the handler + * arg, so we Just Know that this is possible. This will become a + * compat hook to either call the kernel's xattr_full_name(handler), or + * our hack to use the flags as the prefix length. + */ +static const char *full_name_hack(void *handler, const char *name, int len) { - if (size == 0) - value = ""; /* set empty value */ + return name - len; +} +static int scoutfs_xattr_get_handler(struct dentry *dentry, const char *name, + void *value, size_t size, int handler_flags) +{ + name = full_name_hack(NULL, name, handler_flags); + return scoutfs_xattr_get(dentry, name, value, size); +} + +static int scoutfs_xattr_set_handler(struct dentry *dentry, const char *name, + const void *value, size_t size, int flags, int handler_flags) +{ + name = full_name_hack(NULL, name, handler_flags); return scoutfs_xattr_set(dentry, name, value, size, flags); } -int scoutfs_removexattr(struct dentry *dentry, const char *name) -{ - return scoutfs_xattr_set(dentry, name, NULL, 0, XATTR_REPLACE); -} +static const struct xattr_handler scoutfs_xattr_user_handler = { + .prefix = XATTR_USER_PREFIX, + .flags = XATTR_USER_PREFIX_LEN, + .get = scoutfs_xattr_get_handler, + .set = scoutfs_xattr_set_handler, +}; + +static const struct xattr_handler scoutfs_xattr_scoutfs_handler = { + .prefix = SCOUTFS_XATTR_PREFIX, + .flags = SCOUTFS_XATTR_PREFIX_LEN, + .get = scoutfs_xattr_get_handler, + .set = scoutfs_xattr_set_handler, +}; + +static const struct xattr_handler scoutfs_xattr_trusted_handler = { + .prefix = XATTR_TRUSTED_PREFIX, + .flags = XATTR_TRUSTED_PREFIX_LEN, + .get = scoutfs_xattr_get_handler, + .set = scoutfs_xattr_set_handler, +}; + +static const struct xattr_handler scoutfs_xattr_security_handler = { + .prefix = XATTR_SECURITY_PREFIX, + .flags = XATTR_SECURITY_PREFIX_LEN, + .get = scoutfs_xattr_get_handler, + .set = scoutfs_xattr_set_handler, +}; + +static const struct xattr_handler scoutfs_xattr_acl_access_handler = { + .prefix = XATTR_NAME_POSIX_ACL_ACCESS, + .flags = ACL_TYPE_ACCESS, + .get = scoutfs_acl_get_xattr, + .set = scoutfs_acl_set_xattr, +}; + +static const struct xattr_handler scoutfs_xattr_acl_default_handler = { + .prefix = XATTR_NAME_POSIX_ACL_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .get = scoutfs_acl_get_xattr, + .set = scoutfs_acl_set_xattr, +}; + +const struct xattr_handler *scoutfs_xattr_handlers[] = { + &scoutfs_xattr_user_handler, + &scoutfs_xattr_scoutfs_handler, + &scoutfs_xattr_trusted_handler, + &scoutfs_xattr_security_handler, + &scoutfs_xattr_acl_access_handler, + &scoutfs_xattr_acl_default_handler, + NULL +}; ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer, size_t size, __u32 *hash_pos, __u64 *id_pos, diff --git a/kmod/src/xattr.h b/kmod/src/xattr.h index cbc6c599..1cb14566 100644 --- a/kmod/src/xattr.h +++ b/kmod/src/xattr.h @@ -1,25 +1,29 @@ #ifndef _SCOUTFS_XATTR_H_ #define _SCOUTFS_XATTR_H_ -ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer, - size_t size); -int scoutfs_setxattr(struct dentry *dentry, const char *name, - const void *value, size_t size, int flags); -int scoutfs_removexattr(struct dentry *dentry, const char *name); -ssize_t scoutfs_listxattr(struct dentry *dentry, char *buffer, size_t size); -ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer, - size_t size, __u32 *hash_pos, __u64 *id_pos, - bool e_range, bool show_hidden); - -int scoutfs_xattr_drop(struct super_block *sb, u64 ino, - struct scoutfs_lock *lock); - struct scoutfs_xattr_prefix_tags { unsigned long hide:1, srch:1, totl:1; }; +extern const struct xattr_handler *scoutfs_xattr_handlers[]; + +int scoutfs_xattr_get_locked(struct inode *inode, const char *name, void *buffer, size_t size, + struct scoutfs_lock *lck); +int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_len, + const void *value, size_t size, int flags, + const struct scoutfs_xattr_prefix_tags *tgs, + struct scoutfs_lock *lck, struct scoutfs_lock *totl_lock, + struct list_head *ind_locks); + +ssize_t scoutfs_listxattr(struct dentry *dentry, char *buffer, size_t size); +ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer, + size_t size, __u32 *hash_pos, __u64 *id_pos, + bool e_range, bool show_hidden); +int scoutfs_xattr_drop(struct super_block *sb, u64 ino, + struct scoutfs_lock *lock); + int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len, struct scoutfs_xattr_prefix_tags *tgs); diff --git a/tests/golden/xfstests b/tests/golden/xfstests index 9a818f51..cc382847 100644 --- a/tests/golden/xfstests +++ b/tests/golden/xfstests @@ -40,6 +40,7 @@ generic/092 generic/098 generic/101 generic/104 +generic/105 generic/106 generic/107 generic/117 @@ -51,6 +52,7 @@ generic/184 generic/221 generic/228 generic/236 +generic/237 generic/245 generic/249 generic/257 @@ -63,6 +65,7 @@ generic/308 generic/309 generic/313 generic/315 +generic/319 generic/322 generic/335 generic/336 @@ -72,6 +75,7 @@ generic/342 generic/343 generic/348 generic/360 +generic/375 generic/376 generic/377 Not @@ -282,4 +286,4 @@ shared/004 shared/032 shared/051 shared/289 -Passed all 75 tests +Passed all 79 tests diff --git a/tests/tests/simple-xattr-unit.sh b/tests/tests/simple-xattr-unit.sh index beaba237..31cc5878 100644 --- a/tests/tests/simple-xattr-unit.sh +++ b/tests/tests/simple-xattr-unit.sh @@ -36,7 +36,8 @@ test_xattr_lengths() { else echo "$name=\"$val\"" > "$T_TMP.good" fi - cmp "$T_TMP.good" "$T_TMP.got" || exit 1 + cmp "$T_TMP.good" "$T_TMP.got" || \ + t_fail "cmp failed name len $name_len val len $val_len" setfattr -x $name "$FILE" } diff --git a/tests/tests/xfstests.sh b/tests/tests/xfstests.sh index 140a3819..13349368 100644 --- a/tests/tests/xfstests.sh +++ b/tests/tests/xfstests.sh @@ -65,7 +65,6 @@ generic/030 # mmap missing generic/075 # file content mismatch failures (fds, etc) generic/080 # mmap missing generic/103 # enospc causes trans commit failures -generic/105 # needs trigage: something about acls generic/108 # mount fails on failing device? generic/112 # file content mismatch failures (fds, etc) generic/120 # (can't exec 'cause no mmap) @@ -73,17 +72,14 @@ generic/126 # (can't exec 'cause no mmap) generic/141 # mmap missing generic/213 # enospc causes trans commit failures generic/215 # mmap missing -generic/237 # wrong error return from failing setfacl? generic/246 # mmap missing generic/247 # mmap missing generic/248 # mmap missing -generic/319 # utils output change? update branch? generic/321 # requires selinux enabled for '+' in ls? generic/325 # mmap missing generic/338 # BUG_ON update inode error handling generic/346 # mmap missing generic/347 # _dmthin_mount doesn't work? -generic/375 # utils output change? update branch? EOF t_restore_output diff --git a/utils/man/scoutfs.5 b/utils/man/scoutfs.5 index f6cbe193..8b20483b 100644 --- a/utils/man/scoutfs.5 +++ b/utils/man/scoutfs.5 @@ -15,12 +15,27 @@ general mount options described in the .BR mount (8) manual page. .TP +.B acl +The acl mount option enables support for POSIX Access Control Lists +as detailed in +.BR acl (5) . +Support for POSIX ACLs is the default. +.TP .B metadev_path= The metadev_path option specifies the path to the block device that contains the filesystem's metadata. .sp This option is required. .TP +.B noacl +The noacl mount option disables the default support for POSIX Access +Control Lists. Any existing system.posix_acl_default and +system.posix_acl_access extended attributes remain in inodes. They +will appear in listings from +.BR listxattr (5) +but specific retrieval or reomval operations will fail. They will be +used for enforcement again if ACL support is later enabled. +.TP .B orphan_scan_delay_ms= This option sets the average expected delay, in milliseconds, between each mount's scan of the global orphaned inode list. Jitter is added to