diff --git a/kmod/src/Makefile b/kmod/src/Makefile index 975ec319..d96c4967 100644 --- a/kmod/src/Makefile +++ b/kmod/src/Makefile @@ -8,6 +8,7 @@ CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include -include $(src)/Makefile.kernelcompat scoutfs-y += \ + acl.o \ avl.o \ alloc.o \ block.o \ diff --git a/kmod/src/acl.c b/kmod/src/acl.c new file mode 100644 index 00000000..d6ab6a3c --- /dev/null +++ b/kmod/src/acl.c @@ -0,0 +1,355 @@ +/* + * Copyright (C) 2022 Versity Software, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include +#include +#include +#include +#include +#include + +#include "format.h" +#include "super.h" +#include "scoutfs_trace.h" +#include "xattr.h" +#include "acl.h" +#include "inode.h" +#include "trans.h" + +/* + * POSIX draft ACLs are stored as full xattr items with the entries + * encoded as the kernel's posix_acl_xattr_{header,entry} value structs. + * + * They're accessed and modified via user facing synthetic xattrs, iops + * calls from the kernel, during inode mode changes, and during inode + * creation. + * + * ACL access devolves into xattr access which is relatively expensive + * so we maintain the cached native form in the vfs inode. We drop the + * cache in lock invalidation which means that cached acl access must + * always be performed under cluster locking. + */ + +static int acl_xattr_name_len(int type, char **name, size_t *name_len) +{ + int ret = 0; + + switch (type) { + case ACL_TYPE_ACCESS: + *name = XATTR_NAME_POSIX_ACL_ACCESS; + if (name_len) + *name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1; + break; + case ACL_TYPE_DEFAULT: + *name = XATTR_NAME_POSIX_ACL_DEFAULT; + if (name_len) + *name_len = sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1; + break; + default: + ret = -EINVAL; + break; + } + + return ret; +} + +struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock) +{ + struct posix_acl *acl; + char *value = NULL; + char *name; + int ret; + + if (!IS_POSIXACL(inode)) + return NULL; + + acl = get_cached_acl(inode, type); + if (acl != ACL_NOT_CACHED) + return acl; + + ret = acl_xattr_name_len(type, &name, NULL); + if (ret < 0) + return ERR_PTR(ret); + + ret = scoutfs_xattr_get_locked(inode, name, NULL, 0, lock); + if (ret > 0) { + value = kzalloc(ret, GFP_NOFS); + if (!value) + ret = -ENOMEM; + else + ret = scoutfs_xattr_get_locked(inode, name, value, ret, lock); + } + if (ret > 0) { + acl = posix_acl_from_xattr(&init_user_ns, value, ret); + } else if (ret == -ENODATA || ret == 0) { + acl = NULL; + } else { + acl = ERR_PTR(ret); + } + + /* can set null negative cache */ + if (!IS_ERR(acl)) + set_cached_acl(inode, type, acl); + + kfree(value); + + return acl; +} + +struct posix_acl *scoutfs_get_acl(struct inode *inode, int type) +{ + struct super_block *sb = inode->i_sb; + struct scoutfs_lock *lock = NULL; + struct posix_acl *acl; + int ret; + + if (!IS_POSIXACL(inode)) + return NULL; + + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock); + if (ret < 0) { + acl = ERR_PTR(ret); + } else { + acl = scoutfs_get_acl_locked(inode, type, lock); + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ); + } + + return acl; +} + +/* + * The caller has acquired the locks and dirtied the inode, they'll + * update the inode item if we return 0. + */ +int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type, + struct scoutfs_lock *lock, struct list_head *ind_locks) +{ + static const struct scoutfs_xattr_prefix_tags tgs = {0,}; /* never scoutfs. prefix */ + bool set_mode = false; + char *value = NULL; + umode_t new_mode; + size_t name_len; + char *name; + int size = 0; + int ret; + + ret = acl_xattr_name_len(type, &name, &name_len); + if (ret < 0) + return ret; + + switch (type) { + case ACL_TYPE_ACCESS: + if (acl) { + ret = posix_acl_update_mode(inode, &new_mode, &acl); + if (ret < 0) + goto out; + set_mode = true; + } + break; + case ACL_TYPE_DEFAULT: + if (!S_ISDIR(inode->i_mode)) { + ret = acl ? -EINVAL : 0; + goto out; + } + break; + } + + if (acl) { + size = posix_acl_xattr_size(acl->a_count); + value = kmalloc(size, GFP_NOFS); + if (!value) { + ret = -ENOMEM; + goto out; + } + + ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); + if (ret < 0) + goto out; + } + + ret = scoutfs_xattr_set_locked(inode, name, name_len, value, size, 0, &tgs, + lock, NULL, ind_locks); + if (ret == 0 && set_mode) { + inode->i_mode = new_mode; + if (!value) { + /* can be setting an acl that only affects mode, didn't need xattr */ + inode_inc_iversion(inode); + inode->i_ctime = CURRENT_TIME; + } + } + +out: + if (!ret) + set_cached_acl(inode, type, acl); + + kfree(value); + + return ret; +} + +int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) +{ + struct super_block *sb = inode->i_sb; + struct scoutfs_lock *lock = NULL; + LIST_HEAD(ind_locks); + int ret; + + ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE, SCOUTFS_LKF_REFRESH_INODE, inode, &lock) ?: + scoutfs_inode_index_lock_hold(inode, &ind_locks, false, true); + if (ret == 0) { + ret = scoutfs_dirty_inode_item(inode, lock) ?: + scoutfs_set_acl_locked(inode, acl, type, lock, &ind_locks); + if (ret == 0) + scoutfs_update_inode_item(inode, lock, &ind_locks); + + scoutfs_release_trans(sb); + scoutfs_inode_index_unlock(sb, &ind_locks); + } + + scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE); + return ret; +} + +int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size, + int type) +{ + struct posix_acl *acl; + int ret = 0; + + if (!IS_POSIXACL(dentry->d_inode)) + return -EOPNOTSUPP; + + acl = scoutfs_get_acl(dentry->d_inode, type); + if (IS_ERR(acl)) + return PTR_ERR(acl); + if (acl == NULL) + return -ENODATA; + + ret = posix_acl_to_xattr(&init_user_ns, acl, value, size); + posix_acl_release(acl); + + return ret; +} + +int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size, + int flags, int type) +{ + struct posix_acl *acl = NULL; + int ret; + + if (!inode_owner_or_capable(dentry->d_inode)) + return -EPERM; + + if (!IS_POSIXACL(dentry->d_inode)) + return -EOPNOTSUPP; + + if (value) { + acl = posix_acl_from_xattr(&init_user_ns, value, size); + if (IS_ERR(acl)) + return PTR_ERR(acl); + + if (acl) { + ret = posix_acl_valid(&init_user_ns, acl); + if (ret) + goto out; + } + } + + ret = scoutfs_set_acl(dentry->d_inode, acl, type); +out: + posix_acl_release(acl); + + return ret; +} + +/* + * Apply the parent's default acl to new inodes access acl and inherit + * it as the default for new directories. The caller holds locks and a + * transaction. + */ +int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir, + struct scoutfs_lock *lock, struct scoutfs_lock *dir_lock, + struct list_head *ind_locks) +{ + struct posix_acl *acl = NULL; + int ret = 0; + + if (!S_ISLNK(inode->i_mode)) { + if (IS_POSIXACL(dir)) { + acl = scoutfs_get_acl_locked(dir, ACL_TYPE_DEFAULT, dir_lock); + if (IS_ERR(acl)) + return PTR_ERR(acl); + } + + if (!acl) + inode->i_mode &= ~current_umask(); + } + + if (IS_POSIXACL(dir) && acl) { + if (S_ISDIR(inode->i_mode)) { + ret = scoutfs_set_acl_locked(inode, acl, ACL_TYPE_DEFAULT, + lock, ind_locks); + if (ret) + goto out; + } + ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode); + if (ret < 0) + return ret; + if (ret > 0) + ret = scoutfs_set_acl_locked(inode, acl, ACL_TYPE_ACCESS, + lock, ind_locks); + } else { + cache_no_acl(inode); + } +out: + posix_acl_release(acl); + return ret; +} + +/* + * Update the access ACL based on a newly set mode. If we return an + * error then the xattr wasn't changed. + * + * Annoyingly, setattr_copy has logic that transforms the final set mode + * that we want to use to update the acl. But we don't want to modify + * the other inode fields while discovering the resulting mode. We're + * relying on acl_chmod not caring about the transformation (currently + * just clears sgid). It would be better if we could get the resulting + * mode to give to acl_chmod without modifying the other inode fields. + * + * The caller has the inode mutex, a cluster lock, transaction, and will + * update the inode item if we return success. + */ +int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr, + struct scoutfs_lock *lock, struct list_head *ind_locks) +{ + struct posix_acl *acl; + int ret = 0; + + if (!IS_POSIXACL(inode) || !(attr->ia_valid & ATTR_MODE)) + return 0; + + if (S_ISLNK(inode->i_mode)) + return -EOPNOTSUPP; + + acl = scoutfs_get_acl_locked(inode, ACL_TYPE_ACCESS, lock); + if (IS_ERR_OR_NULL(acl)) + return PTR_ERR(acl); + + ret = posix_acl_chmod(&acl, GFP_KERNEL, attr->ia_mode); + if (ret) + return ret; + + ret = scoutfs_set_acl_locked(inode, acl, ACL_TYPE_ACCESS, lock, ind_locks); + posix_acl_release(acl); + return ret; +} diff --git a/kmod/src/acl.h b/kmod/src/acl.h new file mode 100644 index 00000000..a9235eb5 --- /dev/null +++ b/kmod/src/acl.h @@ -0,0 +1,18 @@ +#ifndef _SCOUTFS_ACL_H_ +#define _SCOUTFS_ACL_H_ + +struct posix_acl *scoutfs_get_acl(struct inode *inode, int type); +struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock); +int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type); +int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type, + struct scoutfs_lock *lock, struct list_head *ind_locks); +int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size, + int type); +int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size, + int flags, int type); +int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr, + struct scoutfs_lock *lock, struct list_head *ind_locks); +int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir, + struct scoutfs_lock *lock, struct scoutfs_lock *dir_lock, + struct list_head *ind_locks); +#endif diff --git a/kmod/src/dir.c b/kmod/src/dir.c index 92b9eb29..e4b83256 100644 --- a/kmod/src/dir.c +++ b/kmod/src/dir.c @@ -32,6 +32,7 @@ #include "hash.h" #include "omap.h" #include "forest.h" +#include "acl.h" #include "counters.h" #include "scoutfs_trace.h" @@ -765,7 +766,8 @@ retry: if (ret) goto out_unlock; - ret = scoutfs_new_inode(sb, dir, mode, rdev, ino, *inode_lock, &inode); + ret = scoutfs_new_inode(sb, dir, mode, rdev, ino, *inode_lock, &inode) ?: + scoutfs_init_acl_locked(inode, dir, *inode_lock, *dir_lock, ind_locks); if (ret < 0) goto out; @@ -1246,6 +1248,7 @@ const struct inode_operations scoutfs_symlink_iops = { .getxattr = generic_getxattr, .listxattr = scoutfs_listxattr, .removexattr = generic_removexattr, + .get_acl = scoutfs_get_acl, }; /* @@ -1982,6 +1985,7 @@ const struct inode_operations_wrapper scoutfs_dir_iops = { .getxattr = generic_getxattr, .listxattr = scoutfs_listxattr, .removexattr = generic_removexattr, + .get_acl = scoutfs_get_acl, .symlink = scoutfs_symlink, .permission = scoutfs_permission, }, diff --git a/kmod/src/inode.c b/kmod/src/inode.c index 9b8beede..02456fde 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -36,6 +36,7 @@ #include "omap.h" #include "forest.h" #include "btree.h" +#include "acl.h" /* * XXX @@ -140,6 +141,7 @@ static const struct inode_operations scoutfs_file_iops = { .getxattr = generic_getxattr, .listxattr = scoutfs_listxattr, .removexattr = generic_removexattr, + .get_acl = scoutfs_get_acl, .fiemap = scoutfs_data_fiemap, }; @@ -150,6 +152,7 @@ static const struct inode_operations scoutfs_special_iops = { .getxattr = generic_getxattr, .listxattr = scoutfs_listxattr, .removexattr = generic_removexattr, + .get_acl = scoutfs_get_acl, }; /* @@ -507,10 +510,15 @@ retry: if (ret) goto out; + ret = scoutfs_acl_chmod_locked(inode, attr, lock, &ind_locks); + if (ret < 0) + goto release; + setattr_copy(inode, attr); inode_inc_iversion(inode); scoutfs_update_inode_item(inode, lock, &ind_locks); +release: scoutfs_release_trans(sb); scoutfs_inode_index_unlock(sb, &ind_locks); out: diff --git a/kmod/src/lock.c b/kmod/src/lock.c index 9275e3a4..db682063 100644 --- a/kmod/src/lock.c +++ b/kmod/src/lock.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "super.h" #include "lock.h" @@ -156,6 +157,8 @@ static void invalidate_inode(struct super_block *sb, u64 ino) if (!linfo->unmounting) d_prune_aliases(inode); + forget_all_cached_acls(inode); + si->drop_invalidated = true; if (scoutfs_lock_is_covered(sb, &si->ino_lock_cov) && inode->i_nlink > 0) { iput(inode); diff --git a/kmod/src/options.c b/kmod/src/options.c index a447a931..9ff18c9d 100644 --- a/kmod/src/options.c +++ b/kmod/src/options.c @@ -29,14 +29,18 @@ #include "inode.h" enum { + Opt_acl, Opt_metadev_path, + Opt_noacl, Opt_orphan_scan_delay_ms, Opt_quorum_slot_nr, Opt_err, }; static const match_table_t tokens = { + {Opt_acl, "acl"}, {Opt_metadev_path, "metadev_path=%s"}, + {Opt_noacl, "noacl"}, {Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"}, {Opt_quorum_slot_nr, "quorum_slot_nr=%s"}, {Opt_err, NULL} @@ -134,12 +138,20 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m token = match_token(p, tokens, args); switch (token) { + case Opt_acl: + sb->s_flags |= MS_POSIXACL; + break; + case Opt_metadev_path: ret = parse_bdev_path(sb, &args[0], &opts->metadev_path); if (ret < 0) return ret; break; + case Opt_noacl: + sb->s_flags &= ~MS_POSIXACL; + break; + case Opt_orphan_scan_delay_ms: if (opts->orphan_scan_delay_ms != -1) { scoutfs_err(sb, "multiple orphan_scan_delay_ms options provided, only provide one."); @@ -250,10 +262,15 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root) { struct super_block *sb = root->d_sb; struct scoutfs_mount_options opts; + const bool is_acl = !!(sb->s_flags & MS_POSIXACL); scoutfs_options_read(sb, &opts); + if (is_acl) + seq_puts(seq, ",acl"); seq_printf(seq, ",metadev_path=%s", opts.metadev_path); + if (!is_acl) + seq_puts(seq, ",noacl"); seq_printf(seq, ",orphan_scan_delay_ms=%u", opts.orphan_scan_delay_ms); if (opts.quorum_slot_nr >= 0) seq_printf(seq, ",quorum_slot_nr=%d", opts.quorum_slot_nr); diff --git a/kmod/src/super.c b/kmod/src/super.c index f786d23f..0e7fb3e8 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -485,7 +485,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent) sb->s_op = &scoutfs_super_ops; sb->s_export_op = &scoutfs_export_ops; sb->s_xattr = scoutfs_xattr_handlers; - sb->s_flags |= MS_I_VERSION; + sb->s_flags |= MS_I_VERSION | MS_POSIXACL; /* btree blocks use long lived bh->b_data refs */ mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS); diff --git a/kmod/src/xattr.c b/kmod/src/xattr.c index 04a9dc0b..abdf8e0b 100644 --- a/kmod/src/xattr.c +++ b/kmod/src/xattr.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "format.h" #include "inode.h" @@ -26,6 +27,7 @@ #include "xattr.h" #include "lock.h" #include "hash.h" +#include "acl.h" #include "scoutfs_trace.h" /* @@ -902,11 +904,27 @@ static const struct xattr_handler scoutfs_xattr_security_handler = { .set = scoutfs_xattr_set_handler, }; +static const struct xattr_handler scoutfs_xattr_acl_access_handler = { + .prefix = XATTR_NAME_POSIX_ACL_ACCESS, + .flags = ACL_TYPE_ACCESS, + .get = scoutfs_acl_get_xattr, + .set = scoutfs_acl_set_xattr, +}; + +static const struct xattr_handler scoutfs_xattr_acl_default_handler = { + .prefix = XATTR_NAME_POSIX_ACL_DEFAULT, + .flags = ACL_TYPE_DEFAULT, + .get = scoutfs_acl_get_xattr, + .set = scoutfs_acl_set_xattr, +}; + const struct xattr_handler *scoutfs_xattr_handlers[] = { &scoutfs_xattr_user_handler, &scoutfs_xattr_scoutfs_handler, &scoutfs_xattr_trusted_handler, &scoutfs_xattr_security_handler, + &scoutfs_xattr_acl_access_handler, + &scoutfs_xattr_acl_default_handler, NULL }; diff --git a/utils/man/scoutfs.5 b/utils/man/scoutfs.5 index f6cbe193..8b20483b 100644 --- a/utils/man/scoutfs.5 +++ b/utils/man/scoutfs.5 @@ -15,12 +15,27 @@ general mount options described in the .BR mount (8) manual page. .TP +.B acl +The acl mount option enables support for POSIX Access Control Lists +as detailed in +.BR acl (5) . +Support for POSIX ACLs is the default. +.TP .B metadev_path= The metadev_path option specifies the path to the block device that contains the filesystem's metadata. .sp This option is required. .TP +.B noacl +The noacl mount option disables the default support for POSIX Access +Control Lists. Any existing system.posix_acl_default and +system.posix_acl_access extended attributes remain in inodes. They +will appear in listings from +.BR listxattr (5) +but specific retrieval or reomval operations will fail. They will be +used for enforcement again if ACL support is later enabled. +.TP .B orphan_scan_delay_ms= This option sets the average expected delay, in milliseconds, between each mount's scan of the global orphaned inode list. Jitter is added to