mirror of
https://github.com/versity/scoutfs.git
synced 2026-06-09 21:22:36 +00:00
Compare commits
11 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 0c3085c6e8 | |||
| 97f2d1ce8d | |||
| bd14a369e3 | |||
| 448dd74663 | |||
| 57785066c0 | |||
| b23022444a | |||
| 64931c395d | |||
| 380442515e | |||
| 750e998e40 | |||
| 747a8bc53d | |||
| 77327ae713 |
@@ -1,35 +1,6 @@
|
||||
Versity ScoutFS Release Notes
|
||||
=============================
|
||||
|
||||
---
|
||||
v1.32
|
||||
\
|
||||
*June 2, 2026*
|
||||
|
||||
Fix writing POSIX ACLs over NFS mounts that export the scoutfs
|
||||
filesystem.
|
||||
|
||||
Add support for kernels in the RHEL 9.8 minor release.
|
||||
|
||||
Reduce unneeded block allocation when data\_prealloc\_contig\_only was
|
||||
set to 0. This will help achieve more efficient data space usage when
|
||||
writing small files.
|
||||
|
||||
---
|
||||
v1.31
|
||||
\
|
||||
*May 5, 2026*
|
||||
|
||||
Fix race between modifying quota rules and internal reading of the rules
|
||||
that tripped an assertion.
|
||||
|
||||
Fix a bug that could skip merging totl items under specific heavy write
|
||||
loads. This could lead to merged totl items incorrectly tracking the
|
||||
sum of all the contributing totl xattrs.
|
||||
|
||||
Fix many small low risk bugs in error paths that were found with code
|
||||
analysis and testing.
|
||||
|
||||
---
|
||||
v1.30
|
||||
\
|
||||
|
||||
@@ -13,6 +13,7 @@ scoutfs-y += \
|
||||
avl.o \
|
||||
alloc.o \
|
||||
block.o \
|
||||
bsearch_index.o \
|
||||
btree.o \
|
||||
client.o \
|
||||
counters.o \
|
||||
@@ -36,6 +37,7 @@ scoutfs-y += \
|
||||
per_task.o \
|
||||
quorum.o \
|
||||
quota.o \
|
||||
raw.o \
|
||||
recov.o \
|
||||
scoutfs_trace.o \
|
||||
server.o \
|
||||
|
||||
@@ -6,6 +6,231 @@
|
||||
|
||||
ccflags-y += -include $(src)/kernelcompat.h
|
||||
|
||||
#
|
||||
# v3.18-rc2-19-gb5ae6b15bd73
|
||||
#
|
||||
# Folds d_materialise_unique into d_splice_alias. Note reversal
|
||||
# of arguments (Also note Documentation/filesystems/porting.rst)
|
||||
#
|
||||
ifneq (,$(shell grep 'd_materialise_unique' include/linux/dcache.h))
|
||||
ccflags-y += -DKC_D_MATERIALISE_UNIQUE=1
|
||||
endif
|
||||
|
||||
#
|
||||
# RHEL extended the fop struct so to use it we have to set
|
||||
# a flag to indicate that the struct is large enough and
|
||||
# contains the pointer.
|
||||
#
|
||||
ifneq (,$(shell grep 'FMODE_KABI_ITERATE' include/linux/fs.h))
|
||||
ccflags-y += -DKC_FMODE_KABI_ITERATE
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.7-rc2-23-g0d4d717f2583
|
||||
#
|
||||
# Added user_ns argument to posix_acl_valid
|
||||
#
|
||||
ifneq (,$(shell grep 'posix_acl_valid.*user_namespace' include/linux/posix_acl.h))
|
||||
ccflags-y += -DKC_POSIX_ACL_VALID_USER_NS
|
||||
endif
|
||||
|
||||
#
|
||||
# v5.3-12296-g6d2052d188d9
|
||||
#
|
||||
# The RBCOMPUTE function is now passed an extra flag, and should return a bool
|
||||
# to indicate whether the propagated callback should stop or not.
|
||||
#
|
||||
ifneq (,$(shell grep 'static inline bool RBNAME.*_compute_max' include/linux/rbtree_augmented.h))
|
||||
ccflags-y += -DKC_RB_TREE_AUGMENTED_COMPUTE_MAX
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.13-25-g37bc15392a23
|
||||
#
|
||||
# Renames posix_acl_create to __posix_acl_create and provide some
|
||||
# new interfaces for creating ACLs
|
||||
#
|
||||
ifneq (,$(shell grep '__posix_acl_create' include/linux/posix_acl.h))
|
||||
ccflags-y += -DKC___POSIX_ACL_CREATE
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.8-rc1-29-g31051c85b5e2
|
||||
#
|
||||
# inode_change_ok() removed - replace with setattr_prepare()
|
||||
# v5.11-rc4-7-g2f221d6f7b88 removes extern attribute
|
||||
#
|
||||
ifneq (,$(shell grep 'int setattr_prepare' include/linux/fs.h))
|
||||
ccflags-y += -DKC_SETATTR_PREPARE
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.15-rc3-4-gae5e165d855d
|
||||
#
|
||||
# linux/iversion.h needs to manually be included for code that
|
||||
# manipulates this field.
|
||||
#
|
||||
ifneq (,$(shell grep -s 'define _LINUX_IVERSION_H' include/linux/iversion.h))
|
||||
ccflags-y += -DKC_NEED_LINUX_IVERSION_H=1
|
||||
endif
|
||||
|
||||
# v4.11-12447-g104b4e5139fe
|
||||
#
|
||||
# Renamed __percpu_counter_add to percpu_counter_add_batch to clarify
|
||||
# that the __ wasn't less safe, just took an extra parameter.
|
||||
#
|
||||
ifneq (,$(shell grep 'percpu_counter_add_batch' include/linux/percpu_counter.h))
|
||||
ccflags-y += -DKC_PERCPU_COUNTER_ADD_BATCH
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.11-4550-g7dea19f9ee63
|
||||
#
|
||||
# Introduced memalloc_nofs_{save,restore} preferred instead of _noio_.
|
||||
#
|
||||
ifneq (,$(shell grep 'memalloc_nofs_save' include/linux/sched/mm.h))
|
||||
ccflags-y += -DKC_MEMALLOC_NOFS_SAVE
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.7-12414-g1eff9d322a44
|
||||
#
|
||||
# Renamed bi_rw to bi_opf to force old code to catch up. We use it as a
|
||||
# single switch between old and new bio structures.
|
||||
#
|
||||
ifneq (,$(shell grep 'bi_opf' include/linux/blk_types.h))
|
||||
ccflags-y += -DKC_BIO_BI_OPF
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.12-rc2-201-g4e4cbee93d56
|
||||
#
|
||||
# Moves to bi_status BLK_STS_ API instead of having a mix of error
|
||||
# end_io args or bi_error.
|
||||
#
|
||||
ifneq (,$(shell grep 'bi_status' include/linux/blk_types.h))
|
||||
ccflags-y += -DKC_BIO_BI_STATUS
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.11-8765-ga0b02131c5fc
|
||||
#
|
||||
# Remove the old ->shrink() API, ->{scan,count}_objects is preferred.
|
||||
#
|
||||
ifneq (,$(shell grep '(*shrink)' include/linux/shrinker.h))
|
||||
ccflags-y += -DKC_SHRINKER_SHRINK
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.19-4777-g6bec00352861
|
||||
#
|
||||
# backing_dev_info is removed from address_space. Instead we need to use
|
||||
# inode_to_bdi() inline from <backing-dev.h>.
|
||||
#
|
||||
ifneq (,$(shell grep 'struct backing_dev_info.*backing_dev_info' include/linux/fs.h))
|
||||
ccflags-y += -DKC_LINUX_BACKING_DEV_INFO=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.3-9290-ge409de992e3e
|
||||
#
|
||||
# xattr handlers are now passed a struct that contains `flags`
|
||||
#
|
||||
ifneq (,$(shell grep 'int...get..const struct xattr_handler.*struct dentry.*dentry,' include/linux/xattr.h))
|
||||
ccflags-y += -DKC_XATTR_STRUCT_XATTR_HANDLER=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.16-rc1-1-g9b2c45d479d0
|
||||
#
|
||||
# kernel_getsockname() and kernel_getpeername dropped addrlen arg
|
||||
#
|
||||
ifneq (,$(shell grep 'kernel_getsockname.*,$$' include/linux/net.h))
|
||||
ccflags-y += -DKC_KERNEL_GETSOCKNAME_ADDRLEN=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.1-rc1-410-geeb1bd5c40ed
|
||||
#
|
||||
# Adds a struct net parameter to sock_create_kern
|
||||
#
|
||||
ifneq (,$(shell grep 'sock_create_kern.*struct net' include/linux/net.h))
|
||||
ccflags-y += -DKC_SOCK_CREATE_KERN_NET=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.17-rc6-7-g95582b008388
|
||||
#
|
||||
# Kernel has current_time(inode) to uniformly retreive timespec in the right unit
|
||||
#
|
||||
ifneq (,$(shell grep 'struct timespec64 current_time' include/linux/fs.h))
|
||||
ccflags-y += -DKC_CURRENT_TIME_INODE=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.9-12228-g530e9b76ae8f
|
||||
#
|
||||
# register_cpu_notifier and family were all removed and to be
|
||||
# replaced with cpuhp_* API calls.
|
||||
#
|
||||
ifneq (,$(shell grep 'define register_hotcpu_notifier' include/linux/cpu.h))
|
||||
ccflags-y += -DKC_CPU_NOTIFIER
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.14-rc8-130-gccad2365668f
|
||||
#
|
||||
# generic_file_buffered_write is removed, backport it
|
||||
#
|
||||
ifneq (,$(shell grep 'extern ssize_t generic_file_buffered_write' include/linux/fs.h))
|
||||
ccflags-y += -DKC_GENERIC_FILE_BUFFERED_WRITE=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v5.7-438-g8151b4c8bee4
|
||||
#
|
||||
# struct address_space_operations switches away from .readpages to .readahead
|
||||
#
|
||||
# RHEL has backported this feature all the way to RHEL8, as part of RHEL_KABI,
|
||||
# which means we need to detect this very precisely
|
||||
#
|
||||
ifneq (,$(shell grep 'readahead.*struct readahead_control' include/linux/fs.h))
|
||||
ccflags-y += -DKC_FILE_AOPS_READAHEAD
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.0-rc7-1743-g8436318205b9
|
||||
#
|
||||
# .aio_read and .aio_write no longer exist. All reads and writes now use the
|
||||
# .read_iter and .write_iter methods, or must implement .read and .write (which
|
||||
# we don't).
|
||||
#
|
||||
ifneq (,$(shell grep 'ssize_t.*aio_read' include/linux/fs.h))
|
||||
ccflags-y += -DKC_LINUX_HAVE_FOP_AIO_READ=1
|
||||
endif
|
||||
|
||||
#
|
||||
# rhel7 has a custom inode_operations_wrapper struct that is discarded
|
||||
# entirely in favor of upstream structure since rhel8.
|
||||
#
|
||||
ifneq (,$(shell grep 'void.*follow_link.*struct dentry' include/linux/fs.h))
|
||||
ccflags-y += -DKC_LINUX_HAVE_RHEL_IOPS_WRAPPER=1
|
||||
endif
|
||||
|
||||
ifneq (,$(shell grep 'size_t.*ki_left;' include/linux/aio.h))
|
||||
ccflags-y += -DKC_LINUX_AIO_KI_LEFT=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.4-rc4-4-g98e9cb5711c6
|
||||
#
|
||||
# Introduces a new xattr_handler .name member that can be used to match the
|
||||
# entire field, instead of just a prefix. For these kernels, we must use
|
||||
# the new .name field instead.
|
||||
ifneq (,$(shell grep 'static inline const char .xattr_prefix' include/linux/xattr.h))
|
||||
ccflags-y += -DKC_XATTR_HANDLER_NAME=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v5.19-rc4-96-g342a72a33407
|
||||
#
|
||||
@@ -109,6 +334,14 @@ ifneq (,$(shell grep 'int tcp_sock_set_keepintvl' include/linux/tcp.h))
|
||||
ccflags-y += -DKC_HAVE_TCP_SET_SOCKFN
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.16-rc3-13-ga84d1169164b
|
||||
#
|
||||
# Fixes y2038 issues with struct timeval.
|
||||
ifneq (,$(shell grep -s '^struct __kernel_old_timeval .' include/uapi/linux/time_types.h))
|
||||
ccflags-y += -DKC_KERNEL_OLD_TIMEVAL_STRUCT
|
||||
endif
|
||||
|
||||
#
|
||||
# v5.19-rc4-52-ge33c267ab70d
|
||||
#
|
||||
@@ -178,6 +411,47 @@ ifneq (,$(shell grep 'struct file.*bdev_file_open_by_path.const char.*path' incl
|
||||
ccflags-y += -DKC_BDEV_FILE_OPEN_BY_PATH
|
||||
endif
|
||||
|
||||
# v4.0-rc7-1796-gfe0f07d08ee3
|
||||
#
|
||||
# direct-io changes modify inode_dio_done to now be called inode_dio_end
|
||||
ifneq (,$(shell grep 'void inode_dio_end.struct inode' include/linux/fs.h))
|
||||
ccflags-y += -DKC_INODE_DIO_END
|
||||
endif
|
||||
|
||||
#
|
||||
# v5.0-6476-g3d3539018d2c
|
||||
#
|
||||
# page fault handlers return a bitmask vm_fault_t instead
|
||||
# Note: el8's header has a slightly modified prefix here
|
||||
ifneq (,$(shell grep 'typedef.*__bitwise unsigned.*int vm_fault_t' include/linux/mm_types.h))
|
||||
ccflags-y += -DKC_MM_VM_FAULT_T
|
||||
endif
|
||||
|
||||
# v3.19-499-gd83a08db5ba6
|
||||
#
|
||||
# .remap pages becomes obsolete
|
||||
ifneq (,$(shell grep 'int ..remap_pages..struct vm_area_struct' include/linux/mm.h))
|
||||
ccflags-y += -DKC_MM_REMAP_PAGES
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.19-4742-g503c358cf192
|
||||
#
|
||||
# list_lru_shrink_count() and list_lru_shrink_walk() introduced
|
||||
#
|
||||
ifneq (,$(shell grep 'list_lru_shrink_count.*struct list_lru' include/linux/list_lru.h))
|
||||
ccflags-y += -DKC_LIST_LRU_SHRINK_COUNT_WALK
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.19-4757-g3f97b163207c
|
||||
#
|
||||
# lru_list_walk_cb lru arg added
|
||||
#
|
||||
ifneq (,$(shell grep 'struct list_head \*item, spinlock_t \*lock, void \*cb_arg' include/linux/list_lru.h))
|
||||
ccflags-y += -DKC_LIST_LRU_WALK_CB_ITEM_LOCK
|
||||
endif
|
||||
|
||||
#
|
||||
# v6.7-rc4-153-g0a97c01cd20b
|
||||
#
|
||||
@@ -196,6 +470,15 @@ ifneq (,$(shell grep 'struct list_lru_one \*list, spinlock_t \*lock, void \*cb_a
|
||||
ccflags-y += -DKC_LIST_LRU_WALK_CB_LIST_LOCK
|
||||
endif
|
||||
|
||||
#
|
||||
# v5.1-rc4-273-ge9b98e162aa5
|
||||
#
|
||||
# introduce stack trace helpers
|
||||
#
|
||||
ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h))
|
||||
ccflags-y += -DKC_STACK_TRACE_SAVE
|
||||
endif
|
||||
|
||||
#
|
||||
# v6.1-rc1-2-g138060ba92b3
|
||||
#
|
||||
@@ -213,12 +496,3 @@ endif
|
||||
ifneq (,$(shell grep 'struct posix_acl.*get_inode_acl' include/linux/fs.h))
|
||||
ccflags-y += -DKC_GET_INODE_ACL
|
||||
endif
|
||||
|
||||
#
|
||||
# v6.15-13744-g41cb08555c41
|
||||
#
|
||||
# from_timer renamed to timer_container_of.
|
||||
#
|
||||
ifneq (,$(shell grep 'define timer_container_of' include/linux/timer.h))
|
||||
ccflags-y += -DKC_TIMER_CONTAINER_OF
|
||||
endif
|
||||
|
||||
+29
-5
@@ -16,7 +16,6 @@
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/posix_acl.h>
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
#include <linux/iversion.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "super.h"
|
||||
@@ -70,6 +69,15 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
|
||||
char *name;
|
||||
int ret;
|
||||
|
||||
#ifndef KC___POSIX_ACL_CREATE
|
||||
if (!IS_POSIXACL(inode))
|
||||
return NULL;
|
||||
|
||||
acl = get_cached_acl(inode, type);
|
||||
if (acl != ACL_NOT_CACHED)
|
||||
return acl;
|
||||
#endif
|
||||
|
||||
ret = acl_xattr_name_len(type, &name, NULL);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
@@ -115,6 +123,11 @@ struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
|
||||
return ERR_PTR(-ECHILD);
|
||||
#endif
|
||||
|
||||
#ifndef KC___POSIX_ACL_CREATE
|
||||
if (!IS_POSIXACL(inode))
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
|
||||
if (ret < 0) {
|
||||
acl = ERR_PTR(ret);
|
||||
@@ -203,8 +216,7 @@ int scoutfs_set_acl(KC_VFS_NS_DEF
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
#else
|
||||
int scoutfs_set_acl(KC_VFS_NS_DEF
|
||||
struct inode *inode, struct posix_acl *acl, int type)
|
||||
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
|
||||
{
|
||||
#endif
|
||||
struct super_block *sb = inode->i_sb;
|
||||
@@ -227,11 +239,17 @@ int scoutfs_set_acl(KC_VFS_NS_DEF
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
return ret;
|
||||
}
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
int scoutfs_acl_get_xattr(const struct xattr_handler *handler, struct dentry *dentry,
|
||||
struct inode *inode, const char *name, void *value,
|
||||
size_t size)
|
||||
{
|
||||
int type = handler->flags;
|
||||
#else
|
||||
int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
|
||||
int type)
|
||||
{
|
||||
#endif
|
||||
struct posix_acl *acl;
|
||||
int ret = 0;
|
||||
|
||||
@@ -254,6 +272,7 @@ int scoutfs_acl_get_xattr(const struct xattr_handler *handler, struct dentry *de
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
|
||||
KC_VFS_NS_DEF
|
||||
struct dentry *dentry,
|
||||
@@ -261,6 +280,11 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
|
||||
size_t size, int flags)
|
||||
{
|
||||
int type = handler->flags;
|
||||
#else
|
||||
int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
|
||||
int flags, int type)
|
||||
{
|
||||
#endif
|
||||
struct posix_acl *acl = NULL;
|
||||
int ret;
|
||||
|
||||
@@ -276,7 +300,7 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
|
||||
return PTR_ERR(acl);
|
||||
|
||||
if (acl) {
|
||||
ret = posix_acl_valid(&init_user_ns, acl);
|
||||
ret = kc_posix_acl_valid(&init_user_ns, acl);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@@ -285,7 +309,7 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
|
||||
#ifdef KC_SET_ACL_DENTRY
|
||||
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
|
||||
#else
|
||||
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry->d_inode, acl, type);
|
||||
ret = scoutfs_set_acl(dentry->d_inode, acl, type);
|
||||
#endif
|
||||
out:
|
||||
posix_acl_release(acl);
|
||||
|
||||
+8
-2
@@ -5,8 +5,7 @@
|
||||
int scoutfs_set_acl(KC_VFS_NS_DEF
|
||||
struct dentry *dentry, struct posix_acl *acl, int type);
|
||||
#else
|
||||
int scoutfs_set_acl(KC_VFS_NS_DEF
|
||||
struct inode *inode, struct posix_acl *acl, int type);
|
||||
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
|
||||
#endif
|
||||
#ifdef KC_GET_INODE_ACL
|
||||
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu);
|
||||
@@ -16,6 +15,7 @@ struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
|
||||
struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
|
||||
int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
|
||||
struct scoutfs_lock *lock, struct list_head *ind_locks);
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
int scoutfs_acl_get_xattr(const struct xattr_handler *, struct dentry *dentry,
|
||||
struct inode *inode, const char *name, void *value,
|
||||
size_t size);
|
||||
@@ -24,6 +24,12 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *,
|
||||
struct dentry *dentry,
|
||||
struct inode *inode, const char *name, const void *value,
|
||||
size_t size, int flags);
|
||||
#else
|
||||
int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
|
||||
int type);
|
||||
int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
|
||||
int flags, int type);
|
||||
#endif
|
||||
int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr,
|
||||
struct scoutfs_lock *lock, struct list_head *ind_locks);
|
||||
int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir,
|
||||
|
||||
+12
-22
@@ -218,7 +218,6 @@ static void block_free_work(struct work_struct *work)
|
||||
|
||||
llist_for_each_entry_safe(bp, tmp, deleted, free_node) {
|
||||
block_free(sb, bp);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -444,13 +443,13 @@ static void block_end_io(struct super_block *sb, blk_opf_t opf,
|
||||
wake_up(&binf->waitq);
|
||||
}
|
||||
|
||||
static void block_bio_end_io(struct bio *bio)
|
||||
static void KC_DECLARE_BIO_END_IO(block_bio_end_io, struct bio *bio)
|
||||
{
|
||||
struct block_private *bp = bio->bi_private;
|
||||
struct super_block *sb = bp->sb;
|
||||
|
||||
TRACE_BLOCK(end_io, bp);
|
||||
block_end_io(sb, bio->bi_opf, bp, blk_status_to_errno(bio->bi_status));
|
||||
block_end_io(sb, kc_bio_get_opf(bio), bp, kc_bio_get_errno(bio));
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
@@ -468,6 +467,9 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
sector_t sector;
|
||||
int ret = 0;
|
||||
|
||||
if (scoutfs_forcing_unmount(sb))
|
||||
return -ENOLINK;
|
||||
|
||||
sector = bp->bl.blkno << (SCOUTFS_BLOCK_LG_SHIFT - 9);
|
||||
|
||||
WARN_ON_ONCE(bp->bl.blkno == U64_MAX);
|
||||
@@ -478,17 +480,6 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
set_bit(BLOCK_BIT_IO_BUSY, &bp->bits);
|
||||
block_get(bp);
|
||||
|
||||
/*
|
||||
* A second thread may already be waiting on this block's completion
|
||||
* after this thread won the race to submit the block. We exit through
|
||||
* the block_end_io error path which sets BLOCK_BIT_ERROR and assures
|
||||
* that other callers in the waitq get woken up.
|
||||
*/
|
||||
if (scoutfs_forcing_unmount(sb)) {
|
||||
ret = -ENOLINK;
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
blk_start_plug(&plug);
|
||||
|
||||
for (off = 0; off < SCOUTFS_BLOCK_LG_SIZE; off += PAGE_SIZE) {
|
||||
@@ -499,7 +490,7 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
break;
|
||||
}
|
||||
|
||||
bio->bi_iter.bi_sector = sector + (off >> 9);
|
||||
kc_bio_set_sector(bio, sector + (off >> 9));
|
||||
bio->bi_end_io = block_bio_end_io;
|
||||
bio->bi_private = bp;
|
||||
|
||||
@@ -516,17 +507,16 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
BUG();
|
||||
|
||||
if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
|
||||
submit_bio(bio);
|
||||
kc_submit_bio(bio);
|
||||
bio = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (bio)
|
||||
submit_bio(bio);
|
||||
kc_submit_bio(bio);
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
end_io:
|
||||
/* let racing end_io know we're done */
|
||||
block_end_io(sb, opf, bp, ret);
|
||||
|
||||
@@ -1179,11 +1169,11 @@ struct sm_block_completion {
|
||||
int err;
|
||||
};
|
||||
|
||||
static void sm_block_bio_end_io(struct bio *bio)
|
||||
static void KC_DECLARE_BIO_END_IO(sm_block_bio_end_io, struct bio *bio)
|
||||
{
|
||||
struct sm_block_completion *sbc = bio->bi_private;
|
||||
|
||||
sbc->err = blk_status_to_errno(bio->bi_status);
|
||||
sbc->err = kc_bio_get_errno(bio);
|
||||
complete(&sbc->comp);
|
||||
bio_put(bio);
|
||||
}
|
||||
@@ -1236,7 +1226,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, blk_op
|
||||
goto out;
|
||||
}
|
||||
|
||||
bio->bi_iter.bi_sector = blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9);
|
||||
kc_bio_set_sector(bio, blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9));
|
||||
bio->bi_end_io = sm_block_bio_end_io;
|
||||
bio->bi_private = &sbc;
|
||||
bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
|
||||
@@ -1244,7 +1234,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, blk_op
|
||||
init_completion(&sbc.comp);
|
||||
sbc.err = 0;
|
||||
|
||||
submit_bio(bio);
|
||||
kc_submit_bio(bio);
|
||||
|
||||
wait_for_completion(&sbc.comp);
|
||||
ret = sbc.err;
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
#ifndef _SCOUTFS_BLOCK_H_
|
||||
#define _SCOUTFS_BLOCK_H_
|
||||
|
||||
struct scoutfs_alloc;
|
||||
|
||||
struct scoutfs_block_writer {
|
||||
spinlock_t lock;
|
||||
struct list_head dirty_list;
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/bsearch.h>
|
||||
|
||||
#include "bsearch_index.h"
|
||||
|
||||
struct bsearch_index_key {
|
||||
int (*cmp)(const void *key, const void *elt);
|
||||
/* the key has to be const, so we have to update the index through a pointer */
|
||||
void **index_elt;
|
||||
const void *key;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
static int cmp_index(const void *key, const void *elt)
|
||||
{
|
||||
const struct bsearch_index_key *bik = key;
|
||||
int cmp = bik->cmp(bik->key, elt);
|
||||
|
||||
if (cmp > 0)
|
||||
*(bik->index_elt) = (void *)elt + bik->size;
|
||||
else
|
||||
*(bik->index_elt) = (void *)elt;
|
||||
|
||||
return cmp;
|
||||
}
|
||||
|
||||
/*
|
||||
* A bsearch() wrapper that returns the index of the element of the
|
||||
* array that the key would be stored in to maintain sort order. It's
|
||||
* the first element where the existing element is greater than the key.
|
||||
* It returns the size of the array if the key is greater than the last
|
||||
* element in the array.
|
||||
*/
|
||||
size_t bsearch_index(const void *key, const void *base, size_t num, size_t size,
|
||||
int (*cmp)(const void *key, const void *elt))
|
||||
{
|
||||
void *index_elt = (void *)base;
|
||||
struct bsearch_index_key bik = {
|
||||
.cmp = cmp,
|
||||
.index_elt = &index_elt,
|
||||
.key = key,
|
||||
.size = size,
|
||||
};
|
||||
|
||||
bsearch(&bik, base, num, size, cmp_index);
|
||||
return ((unsigned long)index_elt - (unsigned long)base) / size;
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
#ifndef _SCOUTFS_BSEARCH_INDEX_H_
|
||||
#define _SCOUTFS_BSEARCH_INDEX_H_
|
||||
|
||||
size_t bsearch_index(const void *key, const void *base, size_t num, size_t size,
|
||||
int (*cmp)(const void *key, const void *elt));
|
||||
|
||||
#endif
|
||||
+20
-4
@@ -1816,6 +1816,11 @@ int scoutfs_btree_dirty(struct super_block *sb,
|
||||
* Call the users callback on all the items in the leaf that we find.
|
||||
* We also set the caller's keys for the first and last possible keys
|
||||
* that could exist in the leaf block.
|
||||
*
|
||||
* The callback can set a new key to continue reading from rather than
|
||||
* iterating over all the items. It modifies the key and returns
|
||||
* -ESRCH, which performs a new avl search. If the modified key falls
|
||||
* outside of the range of keys in the block then we return.
|
||||
*/
|
||||
int scoutfs_btree_read_items(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
@@ -1829,6 +1834,7 @@ int scoutfs_btree_read_items(struct super_block *sb,
|
||||
struct scoutfs_avl_node *next_node;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct btree_walk_key_range kr;
|
||||
struct scoutfs_key cb_key;
|
||||
struct scoutfs_block *bl;
|
||||
int ret;
|
||||
|
||||
@@ -1842,22 +1848,32 @@ int scoutfs_btree_read_items(struct super_block *sb,
|
||||
if (scoutfs_key_compare(&kr.end, end) < 0)
|
||||
*end = kr.end;
|
||||
|
||||
node = scoutfs_avl_search(&bt->item_root, cmp_key_item, start, NULL,
|
||||
cb_key = *start;
|
||||
search:
|
||||
node = scoutfs_avl_search(&bt->item_root, cmp_key_item, &cb_key, NULL,
|
||||
NULL, &next_node, NULL) ?: next_node;
|
||||
while (node) {
|
||||
item = node_item(node);
|
||||
if (scoutfs_key_compare(&item->key, end) > 0)
|
||||
break;
|
||||
|
||||
ret = cb(sb, item_key(item), le64_to_cpu(item->seq), item->flags,
|
||||
cb_key = *item_key(item);
|
||||
ret = cb(sb, &cb_key, le64_to_cpu(item->seq), item->flags,
|
||||
item_val(bt, item), item_val_len(item), arg);
|
||||
if (ret < 0)
|
||||
break;
|
||||
if (ret < 0) {
|
||||
if (ret == -ESRCH) {
|
||||
if (scoutfs_key_compare(&cb_key, start) >= 0)
|
||||
goto search;
|
||||
ret = 0;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
node = scoutfs_avl_next(&bt->item_root, node);
|
||||
}
|
||||
|
||||
scoutfs_block_put(sb, bl);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
+81
-12
@@ -23,7 +23,6 @@
|
||||
#include <linux/fiemap.h>
|
||||
#include <linux/writeback.h>
|
||||
#include <linux/overflow.h>
|
||||
#include <linux/iversion.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "super.h"
|
||||
@@ -423,8 +422,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
|
||||
|
||||
mutex_lock(&datinf->mutex);
|
||||
|
||||
scoutfs_inode_get_onoff(inode, &online, &offline);
|
||||
|
||||
/* default to single allocation at the written block */
|
||||
start = iblock;
|
||||
count = 1;
|
||||
@@ -447,6 +444,7 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
|
||||
* the preallocation size to the number of online
|
||||
* blocks.
|
||||
*/
|
||||
scoutfs_inode_get_onoff(inode, &online, &offline);
|
||||
if (iblock > 1 && iblock == online) {
|
||||
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
|
||||
iblock, 1, &found);
|
||||
@@ -488,13 +486,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
|
||||
/* trim count by next extent after iblock */
|
||||
if (found.len && found.start > start && found.start < start + count)
|
||||
count = (found.start - start);
|
||||
|
||||
/*
|
||||
* Ramp the aligned region size up proportionally with
|
||||
* the file's online block count rather than jumping to
|
||||
* the full prealloc size.
|
||||
*/
|
||||
count = max_t(u64, 1, min(count, online));
|
||||
}
|
||||
|
||||
/* overall prealloc limit */
|
||||
@@ -758,6 +749,54 @@ static int scoutfs_readpage(struct file *file, struct page *page)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef KC_FILE_AOPS_READAHEAD
|
||||
/*
|
||||
* This is used for opportunistic read-ahead which can throw the pages
|
||||
* away if it needs to. If the caller didn't deal with offline extents
|
||||
* then we drop those pages rather than trying to wait. Whoever is
|
||||
* staging offline extents should be doing it in enormous chunks so that
|
||||
* read-ahead can ramp up within each staged region. The check for
|
||||
* offline extents is cheap when the inode has no offline extents.
|
||||
*/
|
||||
static int scoutfs_readpages(struct file *file, struct address_space *mapping,
|
||||
struct list_head *pages, unsigned nr_pages)
|
||||
{
|
||||
struct inode *inode = file->f_inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
struct page *page;
|
||||
struct page *tmp;
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
list_for_each_entry_safe(page, tmp, pages, lru) {
|
||||
ret = scoutfs_data_wait_check(inode, page_offset(page),
|
||||
PAGE_SIZE, SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_READ, NULL,
|
||||
inode_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0) {
|
||||
list_del(&page->lru);
|
||||
put_page(page);
|
||||
if (--nr_pages == 0) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ret = mpage_readpages(mapping, pages, nr_pages, scoutfs_get_block_read);
|
||||
out:
|
||||
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
|
||||
BUG_ON(!list_empty(pages));
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static void scoutfs_readahead(struct readahead_control *rac)
|
||||
{
|
||||
struct inode *inode = rac->file->f_inode;
|
||||
@@ -779,6 +818,7 @@ static void scoutfs_readahead(struct readahead_control *rac)
|
||||
|
||||
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int scoutfs_writepage(struct page *page, struct writeback_control *wbc)
|
||||
{
|
||||
@@ -1219,7 +1259,7 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
struct data_ext_args from_args;
|
||||
struct data_ext_args to_args;
|
||||
struct scoutfs_extent ext;
|
||||
struct timespec64 cur_time;
|
||||
struct kc_timespec cur_time;
|
||||
LIST_HEAD(locks);
|
||||
bool done = false;
|
||||
loff_t from_size;
|
||||
@@ -2015,9 +2055,15 @@ int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_MM_VM_FAULT_T
|
||||
static vm_fault_t scoutfs_data_page_mkwrite(struct vm_fault *vmf)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
#else
|
||||
static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,
|
||||
struct vm_fault *vmf)
|
||||
{
|
||||
#endif
|
||||
struct page *page = vmf->page;
|
||||
struct file *file = vma->vm_file;
|
||||
struct inode *inode = file_inode(file);
|
||||
@@ -2159,9 +2205,14 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_MM_VM_FAULT_T
|
||||
static vm_fault_t scoutfs_data_filemap_fault(struct vm_fault *vmf)
|
||||
{
|
||||
struct vm_area_struct *vma = vmf->vma;
|
||||
#else
|
||||
static int scoutfs_data_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
#endif
|
||||
struct file *file = vma->vm_file;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
@@ -2196,11 +2247,15 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef KC_MM_VM_FAULT_T
|
||||
ret = filemap_fault(vmf);
|
||||
#else
|
||||
ret = filemap_fault(vma, vmf);
|
||||
#endif
|
||||
|
||||
out:
|
||||
if (scoutfs_per_task_del(&si->pt_data_lock, &pt_ent))
|
||||
inode_dio_end(inode);
|
||||
kc_inode_dio_end(inode);
|
||||
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
|
||||
if (scoutfs_data_wait_found(&dw)) {
|
||||
err = scoutfs_data_wait(inode, &dw);
|
||||
@@ -2218,6 +2273,9 @@ out:
|
||||
static const struct vm_operations_struct scoutfs_data_file_vm_ops = {
|
||||
.fault = scoutfs_data_filemap_fault,
|
||||
.page_mkwrite = scoutfs_data_page_mkwrite,
|
||||
#ifdef KC_MM_REMAP_PAGES
|
||||
.remap_pages = generic_file_remap_pages,
|
||||
#endif
|
||||
};
|
||||
|
||||
static int scoutfs_file_mmap(struct file *file, struct vm_area_struct *vma)
|
||||
@@ -2235,7 +2293,11 @@ const struct address_space_operations scoutfs_file_aops = {
|
||||
#else
|
||||
.readpage = scoutfs_readpage,
|
||||
#endif
|
||||
#ifndef KC_FILE_AOPS_READAHEAD
|
||||
.readpages = scoutfs_readpages,
|
||||
#else
|
||||
.readahead = scoutfs_readahead,
|
||||
#endif
|
||||
.writepage = scoutfs_writepage,
|
||||
.writepages = scoutfs_writepages,
|
||||
.write_begin = scoutfs_write_begin,
|
||||
@@ -2243,10 +2305,17 @@ const struct address_space_operations scoutfs_file_aops = {
|
||||
};
|
||||
|
||||
const struct file_operations scoutfs_file_fops = {
|
||||
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
|
||||
.read = do_sync_read,
|
||||
.write = do_sync_write,
|
||||
.aio_read = scoutfs_file_aio_read,
|
||||
.aio_write = scoutfs_file_aio_write,
|
||||
#else
|
||||
.read_iter = scoutfs_file_read_iter,
|
||||
.write_iter = scoutfs_file_write_iter,
|
||||
.splice_read = generic_file_splice_read,
|
||||
.splice_write = iter_file_splice_write,
|
||||
#endif
|
||||
.mmap = scoutfs_file_mmap,
|
||||
.unlocked_ioctl = scoutfs_ioctl,
|
||||
.fsync = scoutfs_file_fsync,
|
||||
|
||||
+101
-8
@@ -18,7 +18,6 @@
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/namei.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/iversion.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "file.h"
|
||||
@@ -423,7 +422,18 @@ out:
|
||||
else
|
||||
inode = scoutfs_iget(sb, ino, 0, 0);
|
||||
|
||||
return d_splice_alias(inode, dentry);
|
||||
/*
|
||||
* We can't splice dir aliases into the dcache. dir entries
|
||||
* might have changed on other nodes so our dcache could still
|
||||
* contain them, rather than having been moved in rename. For
|
||||
* dirs, we use d_materialize_unique to remove any existing
|
||||
* aliases which must be stale. Our inode numbers aren't reused
|
||||
* so inodes pointed to by entries can't change types.
|
||||
*/
|
||||
if (!IS_ERR_OR_NULL(inode) && S_ISDIR(inode->i_mode))
|
||||
return d_materialise_unique(dentry, inode);
|
||||
else
|
||||
return d_splice_alias(inode, dentry);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -952,7 +962,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
struct super_block *sb = dir->i_sb;
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct timespec64 ts = current_time(inode);
|
||||
struct kc_timespec ts = current_time(inode);
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
struct scoutfs_lock *orph_lock = NULL;
|
||||
struct scoutfs_lock *dir_lock = NULL;
|
||||
@@ -1187,6 +1197,24 @@ out:
|
||||
return path;
|
||||
}
|
||||
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
{
|
||||
char *path;
|
||||
|
||||
path = scoutfs_get_link_target(dentry);
|
||||
if (!IS_ERR_OR_NULL(path))
|
||||
nd_set_link(nd, path);
|
||||
return path;
|
||||
}
|
||||
|
||||
static void scoutfs_put_link(struct dentry *dentry, struct nameidata *nd,
|
||||
void *cookie)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(cookie))
|
||||
kfree(cookie);
|
||||
}
|
||||
#else
|
||||
static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
|
||||
{
|
||||
char *path;
|
||||
@@ -1197,6 +1225,7 @@ static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode,
|
||||
|
||||
return path;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Symlink target paths can be annoyingly large. We store relatively
|
||||
@@ -1606,7 +1635,7 @@ static int scoutfs_rename_common(KC_VFS_NS_DEF
|
||||
struct scoutfs_lock *orph_lock = NULL;
|
||||
struct scoutfs_dirent new_dent;
|
||||
struct scoutfs_dirent old_dent;
|
||||
struct timespec64 now;
|
||||
struct kc_timespec now;
|
||||
bool ins_new = false;
|
||||
bool del_new = false;
|
||||
bool ins_old = false;
|
||||
@@ -1618,9 +1647,6 @@ static int scoutfs_rename_common(KC_VFS_NS_DEF
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
if (flags & ~RENAME_NOREPLACE)
|
||||
return -EINVAL;
|
||||
|
||||
trace_scoutfs_rename(sb, old_dir, old_dentry, new_dir, new_dentry);
|
||||
|
||||
old_hash = dirent_name_hash(old_dentry->d_name.name,
|
||||
@@ -1866,7 +1892,36 @@ out_unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
static int scoutfs_rename(struct inode *old_dir,
|
||||
struct dentry *old_dentry, struct inode *new_dir,
|
||||
struct dentry *new_dentry)
|
||||
{
|
||||
return scoutfs_rename_common(KC_VFS_INIT_NS
|
||||
old_dir, old_dentry, new_dir, new_dentry, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int scoutfs_rename2(KC_VFS_NS_DEF
|
||||
struct inode *old_dir,
|
||||
struct dentry *old_dentry, struct inode *new_dir,
|
||||
struct dentry *new_dentry, unsigned int flags)
|
||||
{
|
||||
if (flags & ~RENAME_NOREPLACE)
|
||||
return -EINVAL;
|
||||
|
||||
return scoutfs_rename_common(KC_VFS_NS
|
||||
old_dir, old_dentry, new_dir, new_dentry, flags);
|
||||
}
|
||||
|
||||
#ifdef KC_FMODE_KABI_ITERATE
|
||||
/* we only need this to set the iterate flag for kabi :/ */
|
||||
static int scoutfs_dir_open(struct inode *inode, struct file *file)
|
||||
{
|
||||
file->f_mode |= FMODE_KABI_ITERATE;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int scoutfs_tmpfile(KC_VFS_NS_DEF
|
||||
struct inode *dir,
|
||||
@@ -1936,15 +1991,29 @@ out:
|
||||
}
|
||||
|
||||
const struct inode_operations scoutfs_symlink_iops = {
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.readlink = generic_readlink,
|
||||
.follow_link = scoutfs_follow_link,
|
||||
.put_link = scoutfs_put_link,
|
||||
#else
|
||||
.get_link = scoutfs_get_link,
|
||||
#endif
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
#ifdef KC_GET_INODE_ACL
|
||||
.get_inode_acl = scoutfs_get_acl,
|
||||
#else
|
||||
.get_acl = scoutfs_get_acl,
|
||||
#endif
|
||||
#ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.tmpfile = scoutfs_tmpfile,
|
||||
.rename = scoutfs_rename_common,
|
||||
.symlink = scoutfs_symlink,
|
||||
@@ -1953,17 +2022,26 @@ const struct inode_operations scoutfs_symlink_iops = {
|
||||
.mkdir = scoutfs_mkdir,
|
||||
.create = scoutfs_create,
|
||||
.lookup = scoutfs_lookup,
|
||||
#endif
|
||||
};
|
||||
|
||||
const struct file_operations scoutfs_dir_fops = {
|
||||
.iterate = scoutfs_readdir,
|
||||
#ifdef KC_FMODE_KABI_ITERATE
|
||||
.open = scoutfs_dir_open,
|
||||
#endif
|
||||
.unlocked_ioctl = scoutfs_ioctl,
|
||||
.fsync = scoutfs_file_fsync,
|
||||
.llseek = generic_file_llseek,
|
||||
};
|
||||
|
||||
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
const struct inode_operations_wrapper scoutfs_dir_iops = {
|
||||
.ops = {
|
||||
#else
|
||||
const struct inode_operations scoutfs_dir_iops = {
|
||||
#endif
|
||||
.lookup = scoutfs_lookup,
|
||||
.mknod = scoutfs_mknod,
|
||||
.create = scoutfs_create,
|
||||
@@ -1973,15 +2051,30 @@ const struct inode_operations scoutfs_dir_iops = {
|
||||
.rmdir = scoutfs_unlink,
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.rename = scoutfs_rename,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
#ifdef KC_GET_INODE_ACL
|
||||
.get_inode_acl = scoutfs_get_acl,
|
||||
#else
|
||||
.get_acl = scoutfs_get_acl,
|
||||
#endif
|
||||
#ifdef KC_SET_ACL_DENTRY
|
||||
.set_acl = scoutfs_set_acl,
|
||||
#endif
|
||||
.symlink = scoutfs_symlink,
|
||||
.permission = scoutfs_permission,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
},
|
||||
#endif
|
||||
.tmpfile = scoutfs_tmpfile,
|
||||
.rename = scoutfs_rename_common,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.rename2 = scoutfs_rename2,
|
||||
#else
|
||||
.rename = scoutfs_rename2,
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -5,7 +5,11 @@
|
||||
#include "lock.h"
|
||||
|
||||
extern const struct file_operations scoutfs_dir_fops;
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
extern const struct inode_operations_wrapper scoutfs_dir_iops;
|
||||
#else
|
||||
extern const struct inode_operations scoutfs_dir_iops;
|
||||
#endif
|
||||
extern const struct inode_operations scoutfs_symlink_iops;
|
||||
|
||||
extern const struct dentry_operations scoutfs_dentry_ops;
|
||||
|
||||
+3
-2
@@ -222,7 +222,7 @@ static struct attribute *fence_attrs[] = {
|
||||
|
||||
static void fence_timeout(struct timer_list *timer)
|
||||
{
|
||||
struct pending_fence *fence = timer_container_of(fence, timer, timer);
|
||||
struct pending_fence *fence = from_timer(fence, timer, timer);
|
||||
struct super_block *sb = fence->sb;
|
||||
DECLARE_FENCE_INFO(sb, fi);
|
||||
|
||||
@@ -424,7 +424,8 @@ int scoutfs_fence_setup(struct super_block *sb)
|
||||
goto out;
|
||||
}
|
||||
|
||||
fi->wq = alloc_workqueue("scoutfs_fence", WQ_UNBOUND, 0);
|
||||
fi->wq = alloc_workqueue("scoutfs_fence",
|
||||
WQ_UNBOUND | WQ_NON_REENTRANT, 0);
|
||||
if (!fi->wq) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
|
||||
+128
@@ -30,6 +30,133 @@
|
||||
#include "omap.h"
|
||||
#include "quota.h"
|
||||
|
||||
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
|
||||
/*
|
||||
* Start a high level file read. We check for offline extents in the
|
||||
* read region here so that we only check the extents once. We use the
|
||||
* dio count to prevent releasing while we're reading after we've
|
||||
* checked the extents.
|
||||
*/
|
||||
ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *scoutfs_inode_lock = NULL;
|
||||
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
|
||||
DECLARE_DATA_WAIT(dw);
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
/* protect checked extents from release */
|
||||
inode_lock(inode);
|
||||
atomic_inc(&inode->i_dio_count);
|
||||
inode_unlock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
|
||||
ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos,
|
||||
SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_READ,
|
||||
&dw, scoutfs_inode_lock);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
} else {
|
||||
WARN_ON_ONCE(true);
|
||||
}
|
||||
|
||||
ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
|
||||
|
||||
out:
|
||||
inode_dio_done(inode);
|
||||
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
|
||||
scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_READ);
|
||||
|
||||
if (scoutfs_data_wait_found(&dw)) {
|
||||
ret = scoutfs_data_wait(inode, &dw);
|
||||
if (ret == 0)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *scoutfs_inode_lock = NULL;
|
||||
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
|
||||
DECLARE_DATA_WAIT(dw);
|
||||
int ret;
|
||||
|
||||
if (iocb->ki_left == 0) /* Does this even happen? */
|
||||
return 0;
|
||||
|
||||
retry:
|
||||
inode_lock(inode);
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_inode_check_retention(inode);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
|
||||
/* data_version is per inode, whole file must be online */
|
||||
ret = scoutfs_data_wait_check(inode, 0, i_size_read(inode),
|
||||
SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_WRITE,
|
||||
&dw, scoutfs_inode_lock);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_quota_check_data(sb, inode);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* XXX: remove SUID bit */
|
||||
|
||||
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
|
||||
|
||||
out:
|
||||
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
|
||||
scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_WRITE);
|
||||
inode_unlock(inode);
|
||||
|
||||
if (scoutfs_data_wait_found(&dw)) {
|
||||
ret = scoutfs_data_wait(inode, &dw);
|
||||
if (ret == 0)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (ret > 0 || ret == -EIOCBQUEUED) {
|
||||
ssize_t err;
|
||||
|
||||
err = generic_write_sync(file, pos, ret);
|
||||
if (err < 0 && ret > 0)
|
||||
ret = err;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
ssize_t scoutfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
@@ -138,6 +265,7 @@ out:
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
int scoutfs_permission(KC_VFS_NS_DEF
|
||||
struct inode *inode, int mask)
|
||||
|
||||
@@ -1,8 +1,15 @@
|
||||
#ifndef _SCOUTFS_FILE_H_
|
||||
#define _SCOUTFS_FILE_H_
|
||||
|
||||
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
|
||||
ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos);
|
||||
ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos);
|
||||
#else
|
||||
ssize_t scoutfs_file_read_iter(struct kiocb *, struct iov_iter *);
|
||||
ssize_t scoutfs_file_write_iter(struct kiocb *, struct iov_iter *);
|
||||
#endif
|
||||
int scoutfs_permission(KC_VFS_NS_DEF
|
||||
struct inode *inode, int mask);
|
||||
loff_t scoutfs_file_llseek(struct file *file, loff_t offset, int whence);
|
||||
|
||||
+54
-34
@@ -114,6 +114,42 @@ static struct scoutfs_block *read_bloom_ref(struct super_block *sb, struct scout
|
||||
return bl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns >0 if there was a bloom block and all the bits were present.
|
||||
*/
|
||||
static int all_bloom_bits_present(struct super_block *sb, struct scoutfs_block_ref *ref,
|
||||
struct forest_bloom_nrs *bloom)
|
||||
{
|
||||
struct scoutfs_bloom_block *bb;
|
||||
struct scoutfs_block *bl;
|
||||
int i;
|
||||
|
||||
if (ref->blkno == 0)
|
||||
return 0;
|
||||
|
||||
bl = read_bloom_ref(sb, ref);
|
||||
if (IS_ERR(bl))
|
||||
return PTR_ERR(bl);
|
||||
|
||||
bb = bl->data;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(bloom->nrs); i++) {
|
||||
if (!test_bit_le(bloom->nrs[i], bb->bits))
|
||||
break;
|
||||
}
|
||||
|
||||
scoutfs_block_put(sb, bl);
|
||||
|
||||
/* one of the bloom bits wasn't set */
|
||||
if (i != ARRAY_SIZE(bloom->nrs)) {
|
||||
scoutfs_inc_counter(sb, forest_bloom_fail);
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, forest_bloom_pass);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is an unlocked iteration across all the btrees to find a hint at
|
||||
* the next key that the caller could read. It's used to find out what
|
||||
@@ -227,9 +263,13 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
|
||||
}
|
||||
|
||||
/*
|
||||
* For each forest btree whose bloom block indicates that the lock might
|
||||
* have items stored, call the caller's callback for every item in the
|
||||
* leaf block in each tree which contains the key.
|
||||
* Call the caller's callback for every item in the leaf blocks in each
|
||||
* forest btree that contain the caller's key.
|
||||
*
|
||||
* If a bloom key is provided then each log tree's bloom block is
|
||||
* checked and only trees with all the bloom key's bloom bits set will
|
||||
* be read from. When the bloom key is null all trees will be read
|
||||
* from.
|
||||
*
|
||||
* The btree iter calls clamp the caller's range to the tightest range
|
||||
* that covers all the blocks. Any keys outside of this range can't be
|
||||
@@ -248,24 +288,17 @@ int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_r
|
||||
.cb_arg = arg,
|
||||
};
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_bloom_block *bb;
|
||||
struct forest_bloom_nrs bloom;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_block *bl;
|
||||
struct scoutfs_key ltk;
|
||||
struct scoutfs_key orig_start = *start;
|
||||
struct scoutfs_key orig_end = *end;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
scoutfs_inc_counter(sb, forest_read_items);
|
||||
calc_bloom_nrs(&bloom, bloom_key);
|
||||
if (bloom_key)
|
||||
calc_bloom_nrs(&bloom, bloom_key);
|
||||
|
||||
trace_scoutfs_forest_using_roots(sb, &roots->fs_root, &roots->logs_root);
|
||||
|
||||
*start = orig_start;
|
||||
*end = orig_end;
|
||||
|
||||
/* start with fs root items */
|
||||
rid.fic |= FIC_FS_ROOT;
|
||||
ret = scoutfs_btree_read_items(sb, &roots->fs_root, key, start, end,
|
||||
@@ -292,30 +325,17 @@ int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_r
|
||||
goto out; /* including stale */
|
||||
}
|
||||
|
||||
if (lt.bloom_ref.blkno == 0)
|
||||
/* we're not expecting -ENOENT from _read_items */
|
||||
if (lt.item_root.ref.blkno == 0)
|
||||
continue;
|
||||
|
||||
bl = read_bloom_ref(sb, <.bloom_ref);
|
||||
if (IS_ERR(bl)) {
|
||||
ret = PTR_ERR(bl);
|
||||
goto out;
|
||||
if (bloom_key) {
|
||||
ret = all_bloom_bits_present(sb, <.bloom_ref, &bloom);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret == 0)
|
||||
continue;
|
||||
}
|
||||
bb = bl->data;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(bloom.nrs); i++) {
|
||||
if (!test_bit_le(bloom.nrs[i], bb->bits))
|
||||
break;
|
||||
}
|
||||
|
||||
scoutfs_block_put(sb, bl);
|
||||
|
||||
/* one of the bloom bits wasn't set */
|
||||
if (i != ARRAY_SIZE(bloom.nrs)) {
|
||||
scoutfs_inc_counter(sb, forest_bloom_fail);
|
||||
continue;
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, forest_bloom_pass);
|
||||
|
||||
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) &&
|
||||
(merge_input_seq == 0 ||
|
||||
@@ -783,7 +803,7 @@ int scoutfs_forest_setup(struct super_block *sb)
|
||||
scoutfs_forest_log_merge_worker);
|
||||
sbi->forest_info = finf;
|
||||
|
||||
finf->workq = alloc_workqueue("scoutfs_log_merge",
|
||||
finf->workq = alloc_workqueue("scoutfs_log_merge", WQ_NON_REENTRANT |
|
||||
WQ_UNBOUND | WQ_HIGHPRI, 0);
|
||||
if (!finf->workq) {
|
||||
ret = -ENOMEM;
|
||||
|
||||
+26
-2
@@ -21,7 +21,6 @@
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/iversion.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "super.h"
|
||||
@@ -144,26 +143,40 @@ void scoutfs_destroy_inode(struct inode *inode)
|
||||
static const struct inode_operations scoutfs_file_iops = {
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
#ifdef KC_GET_INODE_ACL
|
||||
.get_inode_acl = scoutfs_get_acl,
|
||||
#else
|
||||
.get_acl = scoutfs_get_acl,
|
||||
#endif
|
||||
#ifdef KC_SET_ACL_DENTRY
|
||||
.set_acl = scoutfs_set_acl,
|
||||
#endif
|
||||
.fiemap = scoutfs_data_fiemap,
|
||||
};
|
||||
|
||||
static const struct inode_operations scoutfs_special_iops = {
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
#ifdef KC_GET_INODE_ACL
|
||||
.get_inode_acl = scoutfs_get_acl,
|
||||
#else
|
||||
.get_acl = scoutfs_get_acl,
|
||||
#endif
|
||||
#ifdef KC_SET_ACL_DENTRY
|
||||
.set_acl = scoutfs_set_acl,
|
||||
#endif
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -179,7 +192,12 @@ static void set_inode_ops(struct inode *inode)
|
||||
inode->i_fop = &scoutfs_file_fops;
|
||||
break;
|
||||
case S_IFDIR:
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
inode->i_op = &scoutfs_dir_iops.ops;
|
||||
inode->i_flags |= S_IOPS_WRAPPER;
|
||||
#else
|
||||
inode->i_op = &scoutfs_dir_iops;
|
||||
#endif
|
||||
inode->i_fop = &scoutfs_dir_fops;
|
||||
break;
|
||||
case S_IFLNK:
|
||||
@@ -363,11 +381,18 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
#else
|
||||
int scoutfs_getattr(KC_VFS_NS_DEF
|
||||
const struct path *path, struct kstat *stat,
|
||||
u32 request_mask, unsigned int query_flags)
|
||||
{
|
||||
struct inode *inode = d_inode(path->dentry);
|
||||
#endif
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
int ret;
|
||||
@@ -524,7 +549,6 @@ retry:
|
||||
goto out;
|
||||
if (scoutfs_data_wait_found(&dw)) {
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
lock = NULL;
|
||||
|
||||
/* XXX callee locks instead? */
|
||||
inode_unlock(inode);
|
||||
|
||||
+6
-1
@@ -23,7 +23,7 @@ struct scoutfs_inode_info {
|
||||
u64 offline_blocks;
|
||||
u64 proj;
|
||||
u32 flags;
|
||||
struct timespec64 crtime;
|
||||
struct kc_timespec crtime;
|
||||
|
||||
/*
|
||||
* Protects per-inode extent items, most particularly readers
|
||||
@@ -131,9 +131,14 @@ int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock);
|
||||
int scoutfs_inode_check_retention(struct inode *inode);
|
||||
|
||||
int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock);
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat);
|
||||
#else
|
||||
int scoutfs_getattr(KC_VFS_NS_DEF
|
||||
const struct path *path, struct kstat *stat,
|
||||
u32 request_mask, unsigned int query_flags);
|
||||
#endif
|
||||
int scoutfs_setattr(KC_VFS_NS_DEF
|
||||
struct dentry *dentry, struct iattr *attr);
|
||||
|
||||
|
||||
+62
-31
@@ -49,6 +49,7 @@
|
||||
#include "quota.h"
|
||||
#include "scoutfs_trace.h"
|
||||
#include "util.h"
|
||||
#include "raw.h"
|
||||
|
||||
/*
|
||||
* We make inode index items coherent by locking fixed size regions of
|
||||
@@ -489,7 +490,6 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct kiocb kiocb;
|
||||
struct iovec iov;
|
||||
struct iov_iter iter;
|
||||
size_t written;
|
||||
loff_t end_size;
|
||||
loff_t isize;
|
||||
@@ -515,6 +515,10 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
/* the iocb is really only used for the file pointer :P */
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
kiocb.ki_pos = args.offset;
|
||||
#ifdef KC_LINUX_AIO_KI_LEFT
|
||||
kiocb.ki_left = args.length;
|
||||
kiocb.ki_nbytes = args.length;
|
||||
#endif
|
||||
iov.iov_base = (void __user *)(unsigned long)args.buf_ptr;
|
||||
iov.iov_len = args.length;
|
||||
|
||||
@@ -556,9 +560,8 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
pos = args.offset;
|
||||
written = 0;
|
||||
do {
|
||||
iov_iter_init(&iter, WRITE, &iov, 1, args.length);
|
||||
ret = kc_generic_perform_write(&kiocb, &iter, pos);
|
||||
|
||||
ret = generic_file_buffered_write(&kiocb, &iov, 1, pos, &pos,
|
||||
args.length, written);
|
||||
BUG_ON(ret == -EIOCBQUEUED);
|
||||
if (ret > 0)
|
||||
written += ret;
|
||||
@@ -1737,39 +1740,65 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_inject_totl_delta(struct file *file, unsigned long arg)
|
||||
static long scoutfs_ioc_raw_read_meta_seq(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_inject_totl_delta __user *uitd = (void __user *)arg;
|
||||
struct scoutfs_ioctl_inject_totl_delta itd;
|
||||
struct scoutfs_xattr_totl_val tval;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_ioctl_raw_read_meta_seq __user *urms = (void __user *)arg;
|
||||
struct scoutfs_ioctl_raw_read_meta_seq rms;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&itd, uitd, sizeof(itd)))
|
||||
return -EFAULT;
|
||||
|
||||
scoutfs_xattr_init_totl_key(&key, itd.name);
|
||||
tval.total = cpu_to_le64((u64)itd.total);
|
||||
tval.count = cpu_to_le64((u64)itd.count);
|
||||
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &lock);
|
||||
if (ret < 0)
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_hold_trans(sb, true);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
if (copy_from_user(&rms, urms, sizeof(rms))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_item_delta(sb, &key, &tval, sizeof(tval), lock);
|
||||
if (rms.results_size == 0) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_release_trans(sb);
|
||||
unlock:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
if (rms.results_size < sizeof(struct scoutfs_ioctl_meta_seq) ||
|
||||
rms.results_size > INT_MAX) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_raw_read_meta_seq(sb, &rms, &rms.last);
|
||||
if (ret >= 0 && copy_to_user(&urms->last, &rms.last, sizeof(rms.last)))
|
||||
ret = -EFAULT;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_raw_read_inode_info(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_raw_read_inode_info __user *urii = (void __user *)arg;
|
||||
struct scoutfs_ioctl_raw_read_inode_info rii;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(&rii, urii, sizeof(rii))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (rii.inos_count == 0 || rii.results_size > INT_MAX ||
|
||||
!IS_ALIGNED(rii.inos_ptr, __alignof__(__u64))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_raw_read_inode_info(sb, &rii);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -1825,8 +1854,10 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
return scoutfs_ioc_read_xattr_index(file, arg);
|
||||
case SCOUTFS_IOC_PUNCH_OFFLINE:
|
||||
return scoutfs_ioc_punch_offline(file, arg);
|
||||
case SCOUTFS_IOC_INJECT_TOTL_DELTA:
|
||||
return scoutfs_ioc_inject_totl_delta(file, arg);
|
||||
case SCOUTFS_IOC_RAW_READ_META_SEQ:
|
||||
return scoutfs_ioc_raw_read_meta_seq(file, arg);
|
||||
case SCOUTFS_IOC_RAW_READ_INODE_INFO:
|
||||
return scoutfs_ioc_raw_read_inode_info(file, arg);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
||||
+160
-22
@@ -15,20 +15,6 @@
|
||||
|
||||
#define SCOUTFS_IOCTL_MAGIC 0xE8 /* arbitrarily chosen hole in ioctl-number.rst */
|
||||
|
||||
/*
|
||||
* Packed scoutfs keys rarely cross the ioctl boundary so we have a
|
||||
* translation struct.
|
||||
*/
|
||||
struct scoutfs_ioctl_key {
|
||||
__le64 _sk_first;
|
||||
__le64 _sk_second;
|
||||
__le64 _sk_third;
|
||||
__u8 _sk_fourth;
|
||||
__u8 sk_type;
|
||||
__u8 sk_zone;
|
||||
__u8 _pad[5];
|
||||
};
|
||||
|
||||
struct scoutfs_ioctl_walk_inodes_entry {
|
||||
__u64 major;
|
||||
__u64 ino;
|
||||
@@ -877,16 +863,168 @@ struct scoutfs_ioctl_punch_offline {
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 24, struct scoutfs_ioctl_punch_offline)
|
||||
|
||||
/*
|
||||
* Inject a signed (total, count) delta at the totl key @name (a, b, c
|
||||
* match the trailing dotted u64s of a totl xattr name).
|
||||
* Read meta_seq items without cluster locking.
|
||||
*
|
||||
* @start is the first meta_seq item value that could be returned.
|
||||
* {0,0} is the minimum.
|
||||
*
|
||||
* @end is the last meta_seq item value that could be returned.
|
||||
* {U64_MAX, U64_MAX} is the maximum.
|
||||
*
|
||||
* @last is only set on success from the call. It's the last meta_seq
|
||||
* item that could have been returned. This lets the caller detect that
|
||||
* the full input range wasn't explored. Another call can be made with
|
||||
* start set to just after this.
|
||||
*
|
||||
* @results_ptr is a pointer to an array of (struct
|
||||
* scoutfs_ioctl_meta_seq) elements that were found in the input range.
|
||||
*
|
||||
* @results_size is the count of elements in the results_ptr array and
|
||||
* the maximum number of results that can be returned. There must be
|
||||
* room for at least one result.
|
||||
*
|
||||
* Return existing meta_seq items starting from @start until @last.
|
||||
* Partial results can be returned and is indicated by @last being set
|
||||
* to an item before @last.
|
||||
*
|
||||
* The results are sorted first by increasing meta_seq and then by
|
||||
* increasing ino. All of the results are from one version of file
|
||||
* system metadata. This means that an inode can not be found multiple
|
||||
* times within the results of one call.
|
||||
*
|
||||
* This call ignores currently dirty transactions and reads persistent
|
||||
* items directly. A transaction can be written after this call and
|
||||
* cause meta_seq items to appear before or within the results from this
|
||||
* call.
|
||||
*
|
||||
* The number of meta_seq items stored in the results buffer is returned
|
||||
* and @last is updated. 0 items can be returned if none are found
|
||||
* within the input range.
|
||||
*
|
||||
* Unique errors:
|
||||
*
|
||||
* -EINVAL: The result count was 0 or greater than INT_MAX.
|
||||
*
|
||||
* -ESTALE: The results could not be read from one stable version of
|
||||
* file system metadata. Decrease the number of inodes requested.
|
||||
*/
|
||||
struct scoutfs_ioctl_inject_totl_delta {
|
||||
__u64 name[SCOUTFS_IOCTL_XATTR_TOTAL_NAME_NR];
|
||||
__s64 total;
|
||||
__s64 count;
|
||||
struct scoutfs_ioctl_meta_seq {
|
||||
__u64 meta_seq;
|
||||
__u64 ino;
|
||||
};
|
||||
struct scoutfs_ioctl_raw_read_meta_seq {
|
||||
struct scoutfs_ioctl_meta_seq start;
|
||||
struct scoutfs_ioctl_meta_seq end;
|
||||
struct scoutfs_ioctl_meta_seq last;
|
||||
__u64 results_ptr;
|
||||
__u32 results_size;
|
||||
__u32 _pad;
|
||||
};
|
||||
#define SCOUTFS_IOC_RAW_READ_META_SEQ \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 25, struct scoutfs_ioctl_raw_read_meta_seq)
|
||||
|
||||
|
||||
/*
|
||||
* Read inode metadata without cluster locking.
|
||||
*
|
||||
* @inos_ptr is a pointer to an aligned array of 64bit inode numbers.
|
||||
*
|
||||
* @inos_count is the number of elements in the array. The inode
|
||||
* numbers must not be zero, must strictly increase, and must not
|
||||
* contain any duplicates.
|
||||
*
|
||||
* @names_ptr is a pointer to a byte array of xattr names to return with
|
||||
* each inode. The names are identical to those used in
|
||||
* {get,set}xattr(2). The names must be null terminated and no two
|
||||
* names may be equal.
|
||||
*
|
||||
* @names_count is the number of names that will be found in the
|
||||
* names_ptr buffer.
|
||||
*
|
||||
* @results_ptr is a pointer to a buffer that will be filled by the read
|
||||
* inode info results. The result structs and payloads are not aligned.
|
||||
* Callers will almost certainly need to copy them into aligned
|
||||
* addresses before referencing their contents.
|
||||
*
|
||||
* @results_size is the number of bytes available in the results_ptr
|
||||
* buffer.
|
||||
*
|
||||
* For each inode an _INODE result will always be returned. Then a
|
||||
* _XATTR result will be returned for each xattr on the inode that
|
||||
* matches one of the given input names.
|
||||
*
|
||||
* Each call will not return partial results. -ERANGE is returned if the
|
||||
* results for the requested inodes do not fit in the results buffer.
|
||||
*
|
||||
* The info for one call is from one consistent version of the file
|
||||
* system metadata. The call can have to retry if it sees metadata
|
||||
* change during its call. -ESTALE will be returned if it was not able
|
||||
* to read all the inodes info from one metadata version. The number of
|
||||
* inodes being read can be decreased to avoid this.
|
||||
*
|
||||
* Inodes with an nlink of 0 are not returned.
|
||||
*
|
||||
* The size in bytes of filled results is returned. A non-zero return
|
||||
* will always include at least one full
|
||||
* (struct scoutfs_ioctl_raw_read_result) header.
|
||||
*
|
||||
* Unique errors:
|
||||
*
|
||||
* -EINVAL: The inode count can't be zero. The inos ptr must be aligned
|
||||
* to __u64 alignment. The results buffer size can't be larger than
|
||||
* INT_MAX. Inode numbers can't be zero, must be sorted, and can't
|
||||
* have duplicates. The xattr names must be unique, null terminated,
|
||||
* and less than 256 bytes long.
|
||||
*
|
||||
* -ERANGE: The results for the requested inodes do not fit in the
|
||||
* results buffer. Increase the buffer size (perhaps allowing for all
|
||||
* xattrs with large values) or decrease the number of inodes per call.
|
||||
*
|
||||
* -ESTALE: The results could not be read from one stable version of
|
||||
* file system metadata. Decrease the number of inodes requested.
|
||||
*
|
||||
* -EUCLEAN: Internal xattr metadata is inconsistent.
|
||||
*/
|
||||
|
||||
struct scoutfs_ioctl_raw_read_inode_info {
|
||||
__u64 inos_ptr;
|
||||
__u32 inos_count;
|
||||
__u32 names_count;
|
||||
__u64 names_ptr;
|
||||
__u64 results_ptr;
|
||||
__u32 results_size;
|
||||
__u8 _pad[4];
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_INJECT_TOTL_DELTA \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 25, struct scoutfs_ioctl_inject_totl_delta)
|
||||
/*
|
||||
* @type is one of the enums that determines the type of the following
|
||||
* result payload.
|
||||
*
|
||||
* @size is the number of bytes of result payload immediately following
|
||||
* the result struct. It does not include the size of the result struct
|
||||
* header.
|
||||
*/
|
||||
struct scoutfs_ioctl_raw_read_result {
|
||||
__u32 size;
|
||||
__u8 _pad[7];
|
||||
__u8 type;
|
||||
};
|
||||
|
||||
/*
|
||||
* The _INODE result contains an initial 64bit inode number followed by a
|
||||
* struct scoutfs_inode as defined in format.h. The size includes the
|
||||
* 8byte initial inode number. With that subtracted the size of the
|
||||
* inode struct defines its version (and so the fields it supports).
|
||||
*/
|
||||
#define SCOUTFS_IOC_RAW_READ_RESULT_INODE 1
|
||||
/*
|
||||
* The result payload contains the null terminated name and the value.
|
||||
* The value size can be found by subtracting the null terminated name
|
||||
* length from the result size.
|
||||
*/
|
||||
#define SCOUTFS_IOC_RAW_READ_RESULT_XATTR 2
|
||||
|
||||
#define SCOUTFS_IOC_RAW_READ_INODE_INFO \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 25, struct scoutfs_ioctl_raw_read_inode_info)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -79,6 +79,9 @@ struct item_cache_info {
|
||||
struct super_block *sb;
|
||||
struct item_percpu_pages __percpu *pcpu_pages;
|
||||
KC_DEFINE_SHRINKER(shrinker);
|
||||
#ifdef KC_CPU_NOTIFIER
|
||||
struct notifier_block notifier;
|
||||
#endif
|
||||
|
||||
/* often walked, but per-cpu refs are fast path */
|
||||
rwlock_t rwlock;
|
||||
@@ -2581,6 +2584,22 @@ static unsigned long item_cache_scan_objects(struct shrinker *shrink,
|
||||
return freed;
|
||||
}
|
||||
|
||||
#ifdef KC_CPU_NOTIFIER
|
||||
static int item_cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
struct item_cache_info *cinf = container_of(nfb,
|
||||
struct item_cache_info,
|
||||
notifier);
|
||||
struct super_block *sb = cinf->sb;
|
||||
unsigned long cpu = (unsigned long)hcpu;
|
||||
|
||||
if (action == CPU_DEAD)
|
||||
drop_pcpu_pages(sb, cinf, cpu);
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
int scoutfs_item_setup(struct super_block *sb)
|
||||
{
|
||||
@@ -2611,6 +2630,10 @@ int scoutfs_item_setup(struct super_block *sb)
|
||||
KC_INIT_SHRINKER_FUNCS(&cinf->shrinker, item_cache_count_objects,
|
||||
item_cache_scan_objects);
|
||||
KC_REGISTER_SHRINKER(&cinf->shrinker, "scoutfs-item:" SCSBF, SCSB_ARGS(sb));
|
||||
#ifdef KC_CPU_NOTIFIER
|
||||
cinf->notifier.notifier_call = item_cpu_callback;
|
||||
register_hotcpu_notifier(&cinf->notifier);
|
||||
#endif
|
||||
|
||||
sbi->item_cache_info = cinf;
|
||||
return 0;
|
||||
@@ -2628,6 +2651,9 @@ void scoutfs_item_destroy(struct super_block *sb)
|
||||
int cpu;
|
||||
|
||||
if (cinf) {
|
||||
#ifdef KC_CPU_NOTIFIER
|
||||
unregister_hotcpu_notifier(&cinf->notifier);
|
||||
#endif
|
||||
KC_UNREGISTER_SHRINKER(&cinf->shrinker);
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
|
||||
@@ -3,8 +3,119 @@
|
||||
|
||||
#include "kernelcompat.h"
|
||||
|
||||
#ifdef KC_SHRINKER_SHRINK
|
||||
#include <linux/shrinker.h>
|
||||
/*
|
||||
* If a target doesn't have that .{count,scan}_objects() interface then
|
||||
* we have a .shrink() helper that performs the shrink work in terms of
|
||||
* count/scan.
|
||||
*/
|
||||
int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
struct kc_shrinker_wrapper *wrapper = container_of(shrink, struct kc_shrinker_wrapper, shrink);
|
||||
unsigned long nr;
|
||||
unsigned long rc;
|
||||
|
||||
if (sc->nr_to_scan != 0) {
|
||||
rc = wrapper->scan_objects(shrink, sc);
|
||||
/* translate magic values to the equivalent for older kernels */
|
||||
if (rc == SHRINK_STOP)
|
||||
return -1;
|
||||
else if (rc == SHRINK_EMPTY)
|
||||
return 0;
|
||||
}
|
||||
|
||||
nr = wrapper->count_objects(shrink, sc);
|
||||
|
||||
return min_t(unsigned long, nr, INT_MAX);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KC_CURRENT_TIME_INODE
|
||||
struct timespec64 kc_current_time(struct inode *inode)
|
||||
{
|
||||
struct timespec64 now;
|
||||
unsigned gran;
|
||||
|
||||
getnstimeofday64(&now);
|
||||
|
||||
if (unlikely(!inode->i_sb)) {
|
||||
WARN(1, "current_time() called with uninitialized super_block in the inode");
|
||||
return now;
|
||||
}
|
||||
|
||||
gran = inode->i_sb->s_time_gran;
|
||||
|
||||
/* Avoid division in the common cases 1 ns and 1 s. */
|
||||
if (gran == 1) {
|
||||
/* nothing */
|
||||
} else if (gran == NSEC_PER_SEC) {
|
||||
now.tv_nsec = 0;
|
||||
} else if (gran > 1 && gran < NSEC_PER_SEC) {
|
||||
now.tv_nsec -= now.tv_nsec % gran;
|
||||
} else {
|
||||
WARN(1, "illegal file time granularity: %u", gran);
|
||||
}
|
||||
|
||||
return now;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
|
||||
ssize_t
|
||||
kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos, loff_t *ppos,
|
||||
size_t count, ssize_t written)
|
||||
{
|
||||
ssize_t status;
|
||||
struct iov_iter i;
|
||||
|
||||
iov_iter_init(&i, WRITE, iov, nr_segs, count);
|
||||
status = kc_generic_perform_write(iocb, &i, pos);
|
||||
|
||||
if (likely(status >= 0)) {
|
||||
written += status;
|
||||
*ppos = pos + status;
|
||||
}
|
||||
|
||||
return written ? written : status;
|
||||
}
|
||||
#endif
|
||||
|
||||
#include <linux/list_lru.h>
|
||||
|
||||
#ifdef KC_LIST_LRU_WALK_CB_ITEM_LOCK
|
||||
static enum lru_status kc_isolate(struct list_head *item, spinlock_t *lock, void *cb_arg)
|
||||
{
|
||||
struct kc_isolate_args *args = cb_arg;
|
||||
|
||||
/* isolate doesn't use list, nr_items updated in caller */
|
||||
return args->isolate(item, NULL, args->cb_arg);
|
||||
}
|
||||
|
||||
unsigned long kc_list_lru_walk(struct list_lru *lru, kc_list_lru_walk_cb_t isolate, void *cb_arg,
|
||||
unsigned long nr_to_walk)
|
||||
{
|
||||
struct kc_isolate_args args = {
|
||||
.isolate = isolate,
|
||||
.cb_arg = cb_arg,
|
||||
};
|
||||
|
||||
return list_lru_walk(lru, kc_isolate, &args, nr_to_walk);
|
||||
}
|
||||
|
||||
unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
|
||||
kc_list_lru_walk_cb_t isolate, void *cb_arg)
|
||||
{
|
||||
struct kc_isolate_args args = {
|
||||
.isolate = isolate,
|
||||
.cb_arg = cb_arg,
|
||||
};
|
||||
|
||||
return list_lru_shrink_walk(lru, sc, kc_isolate, &args);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef KC_LIST_LRU_WALK_CB_LIST_LOCK
|
||||
static enum lru_status kc_isolate(struct list_head *item, struct list_lru_one *list,
|
||||
spinlock_t *lock, void *cb_arg)
|
||||
|
||||
+282
-5
@@ -4,6 +4,146 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
/*
|
||||
* v4.15-rc3-4-gae5e165d855d
|
||||
*
|
||||
* new API for handling inode->i_version. This forces us to
|
||||
* include this API where we need. We include it here for
|
||||
* convenience instead of where it's needed.
|
||||
*/
|
||||
#ifdef KC_NEED_LINUX_IVERSION_H
|
||||
#include <linux/iversion.h>
|
||||
#else
|
||||
/*
|
||||
* Kernels before above version will need to fall back to
|
||||
* manipulating inode->i_version as previous with degraded
|
||||
* methods.
|
||||
*/
|
||||
#define inode_set_iversion_queried(inode, val) \
|
||||
do { \
|
||||
(inode)->i_version = val; \
|
||||
} while (0)
|
||||
#define inode_peek_iversion(inode) \
|
||||
({ \
|
||||
(inode)->i_version; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#ifdef KC_POSIX_ACL_VALID_USER_NS
|
||||
#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(user_ns, acl)
|
||||
#else
|
||||
#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(acl)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* v3.6-rc1-24-gdbf2576e37da
|
||||
*
|
||||
* All workqueues are now non-reentrant, and the bit flag is removed
|
||||
* shortly after its uses were removed.
|
||||
*/
|
||||
#ifndef WQ_NON_REENTRANT
|
||||
#define WQ_NON_REENTRANT 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* v3.18-rc2-19-gb5ae6b15bd73
|
||||
*
|
||||
* Folds d_materialise_unique into d_splice_alias. Note reversal
|
||||
* of arguments (Also note Documentation/filesystems/porting.rst)
|
||||
*/
|
||||
#ifndef KC_D_MATERIALISE_UNIQUE
|
||||
#define d_materialise_unique(dentry, inode) d_splice_alias(inode, dentry)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* v4.8-rc1-29-g31051c85b5e2
|
||||
*
|
||||
* fall back to inode_change_ok() if setattr_prepare() isn't available
|
||||
*/
|
||||
#ifndef KC_SETATTR_PREPARE
|
||||
#define setattr_prepare(dentry, attr) inode_change_ok(d_inode(dentry), attr)
|
||||
#endif
|
||||
|
||||
#ifndef KC___POSIX_ACL_CREATE
|
||||
#define __posix_acl_create posix_acl_create
|
||||
#define __posix_acl_chmod posix_acl_chmod
|
||||
#endif
|
||||
|
||||
#ifndef KC_PERCPU_COUNTER_ADD_BATCH
|
||||
#define percpu_counter_add_batch __percpu_counter_add
|
||||
#endif
|
||||
|
||||
#ifndef KC_MEMALLOC_NOFS_SAVE
|
||||
#define memalloc_nofs_save memalloc_noio_save
|
||||
#define memalloc_nofs_restore memalloc_noio_restore
|
||||
#endif
|
||||
|
||||
#ifdef KC_BIO_BI_OPF
|
||||
#define kc_bio_get_opf(bio) \
|
||||
({ \
|
||||
(bio)->bi_opf; \
|
||||
})
|
||||
#define kc_bio_set_opf(bio, opf) \
|
||||
do { \
|
||||
(bio)->bi_opf = opf; \
|
||||
} while (0)
|
||||
#define kc_bio_set_sector(bio, sect) \
|
||||
do { \
|
||||
(bio)->bi_iter.bi_sector = sect;\
|
||||
} while (0)
|
||||
#define kc_submit_bio(bio) submit_bio(bio)
|
||||
#else
|
||||
#define kc_bio_get_opf(bio) \
|
||||
({ \
|
||||
(bio)->bi_rw; \
|
||||
})
|
||||
#define kc_bio_set_opf(bio, opf) \
|
||||
do { \
|
||||
(bio)->bi_rw = opf; \
|
||||
} while (0)
|
||||
#define kc_bio_set_sector(bio, sect) \
|
||||
do { \
|
||||
(bio)->bi_sector = sect; \
|
||||
} while (0)
|
||||
#define kc_submit_bio(bio) \
|
||||
do { \
|
||||
submit_bio((bio)->bi_rw, bio); \
|
||||
} while (0)
|
||||
#define bio_set_dev(bio, bdev) \
|
||||
do { \
|
||||
(bio)->bi_bdev = (bdev); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#ifdef KC_BIO_BI_STATUS
|
||||
#define KC_DECLARE_BIO_END_IO(name, bio) name(bio)
|
||||
#define kc_bio_get_errno(bio) ({ blk_status_to_errno((bio)->bi_status); })
|
||||
#else
|
||||
#define KC_DECLARE_BIO_END_IO(name, bio) name(bio, int _error_arg)
|
||||
#define kc_bio_get_errno(bio) ({ (int)((void)(bio), _error_arg); })
|
||||
#endif
|
||||
|
||||
/*
|
||||
* v4.13-rc1-6-ge462ec50cb5f
|
||||
*
|
||||
* MS_* (mount) flags from <linux/mount.h> should not be used in the kernel
|
||||
* anymore from 4.x onwards. Instead, we need to use the SB_* (superblock) flags
|
||||
*/
|
||||
#ifndef SB_POSIXACL
|
||||
#define SB_POSIXACL MS_POSIXACL
|
||||
#define SB_I_VERSION MS_I_VERSION
|
||||
#endif
|
||||
|
||||
#ifndef KC_CURRENT_TIME_INODE
|
||||
struct timespec64 kc_current_time(struct inode *inode);
|
||||
#define current_time kc_current_time
|
||||
#define kc_timespec timespec
|
||||
#else
|
||||
#define kc_timespec timespec64
|
||||
#endif
|
||||
|
||||
#ifndef KC_SHRINKER_SHRINK
|
||||
|
||||
#define KC_DEFINE_SHRINKER(name) struct shrinker name
|
||||
#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do { \
|
||||
__typeof__(name) _shrink = (name); \
|
||||
@@ -20,7 +160,77 @@
|
||||
#endif /* KC_SHRINKER_NAME */
|
||||
#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr))
|
||||
#define KC_SHRINKER_FN(ptr) (ptr)
|
||||
#else
|
||||
|
||||
#include <linux/shrinker.h>
|
||||
#ifndef SHRINK_STOP
|
||||
#define SHRINK_STOP (~0UL)
|
||||
#define SHRINK_EMPTY (~0UL - 1)
|
||||
#endif
|
||||
|
||||
int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc);
|
||||
struct kc_shrinker_wrapper {
|
||||
unsigned long (*count_objects)(struct shrinker *, struct shrink_control *sc);
|
||||
unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc);
|
||||
struct shrinker shrink;
|
||||
};
|
||||
|
||||
#define KC_DEFINE_SHRINKER(name) struct kc_shrinker_wrapper name;
|
||||
#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do { \
|
||||
struct kc_shrinker_wrapper *_wrap = (name); \
|
||||
_wrap->count_objects = (countfn); \
|
||||
_wrap->scan_objects = (scanfn); \
|
||||
_wrap->shrink.shrink = kc_shrink_wrapper_fn; \
|
||||
_wrap->shrink.seeks = DEFAULT_SEEKS; \
|
||||
} while (0)
|
||||
#define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(container_of(ptr, struct kc_shrinker_wrapper, shrink), type, shrinker)
|
||||
#define KC_REGISTER_SHRINKER(ptr, fmt, ...) (register_shrinker(ptr.shrink))
|
||||
#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr.shrink))
|
||||
#define KC_SHRINKER_FN(ptr) (ptr.shrink)
|
||||
|
||||
#endif /* KC_SHRINKER_SHRINK */
|
||||
|
||||
#ifdef KC_KERNEL_GETSOCKNAME_ADDRLEN
|
||||
#include <linux/net.h>
|
||||
#include <linux/inet.h>
|
||||
static inline int kc_kernel_getsockname(struct socket *sock, struct sockaddr *addr)
|
||||
{
|
||||
int addrlen = sizeof(struct sockaddr_in);
|
||||
int ret = kernel_getsockname(sock, addr, &addrlen);
|
||||
if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
|
||||
return -EAFNOSUPPORT;
|
||||
else if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return sizeof(struct sockaddr_in);
|
||||
}
|
||||
static inline int kc_kernel_getpeername(struct socket *sock, struct sockaddr *addr)
|
||||
{
|
||||
int addrlen = sizeof(struct sockaddr_in);
|
||||
int ret = kernel_getpeername(sock, addr, &addrlen);
|
||||
if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
|
||||
return -EAFNOSUPPORT;
|
||||
else if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return sizeof(struct sockaddr_in);
|
||||
}
|
||||
#else
|
||||
#define kc_kernel_getsockname(sock, addr) kernel_getsockname(sock, addr)
|
||||
#define kc_kernel_getpeername(sock, addr) kernel_getpeername(sock, addr)
|
||||
#endif
|
||||
|
||||
#ifdef KC_SOCK_CREATE_KERN_NET
|
||||
#define kc_sock_create_kern(family, type, proto, res) sock_create_kern(&init_net, family, type, proto, res)
|
||||
#else
|
||||
#define kc_sock_create_kern sock_create_kern
|
||||
#endif
|
||||
|
||||
#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
|
||||
ssize_t kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos, loff_t *ppos,
|
||||
size_t count, ssize_t written);
|
||||
#define generic_file_buffered_write kc_generic_file_buffered_write
|
||||
#ifdef KC_GENERIC_PERFORM_WRITE_KIOCB_IOV_ITER
|
||||
static inline int kc_generic_perform_write(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
|
||||
{
|
||||
@@ -34,6 +244,7 @@ static inline int kc_generic_perform_write(struct kiocb *iocb, struct iov_iter *
|
||||
return generic_perform_write(file, iter, pos);
|
||||
}
|
||||
#endif
|
||||
#endif // KC_GENERIC_FILE_BUFFERED_WRITE
|
||||
|
||||
#ifndef KC_HAVE_BLK_OPF_T
|
||||
/* typedef __u32 __bitwise blk_opf_t; */
|
||||
@@ -77,7 +288,7 @@ static inline struct bio *kc_bio_alloc(struct block_device *bdev, unsigned short
|
||||
{
|
||||
struct bio *b = bio_alloc(gfp_mask, nr_vecs);
|
||||
if (b) {
|
||||
b->bi_opf = opf;
|
||||
kc_bio_set_opf(b, opf);
|
||||
bio_set_dev(b, bdev);
|
||||
}
|
||||
return b;
|
||||
@@ -88,6 +299,11 @@ static inline struct bio *kc_bio_alloc(struct block_device *bdev, unsigned short
|
||||
#define fiemap_prep(inode, fieinfo, start, len, flags) fiemap_check_flags(fieinfo, flags)
|
||||
#endif
|
||||
|
||||
#ifndef KC_KERNEL_OLD_TIMEVAL_STRUCT
|
||||
#define __kernel_old_timeval timeval
|
||||
#define ns_to_kernel_old_timeval(ktime) ns_to_timeval(ktime.tv64)
|
||||
#endif
|
||||
|
||||
#ifdef KC_SOCK_SET_SNDTIMEO
|
||||
#include <net/sock.h>
|
||||
static inline int kc_sock_set_sndtimeo(struct socket *sock, s64 secs)
|
||||
@@ -184,14 +400,45 @@ static inline int kc_tcp_sock_set_nodelay(struct socket *sock)
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef KC_INODE_DIO_END
|
||||
#define kc_inode_dio_end inode_dio_end
|
||||
#else
|
||||
#define kc_inode_dio_end inode_dio_done
|
||||
#endif
|
||||
|
||||
#ifndef KC_MM_VM_FAULT_T
|
||||
typedef unsigned int vm_fault_t;
|
||||
static inline vm_fault_t vmf_error(int err)
|
||||
{
|
||||
if (err == -ENOMEM)
|
||||
return VM_FAULT_OOM;
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
#endif
|
||||
|
||||
#include <linux/list_lru.h>
|
||||
|
||||
#ifndef KC_LIST_LRU_SHRINK_COUNT_WALK
|
||||
/* we don't bother with sc->{nid,memcg} (which doesn't exist in oldest kernels) */
|
||||
static inline unsigned long list_lru_shrink_count(struct list_lru *lru,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
return list_lru_count(lru);
|
||||
}
|
||||
static inline unsigned long
|
||||
list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
|
||||
list_lru_walk_cb isolate, void *cb_arg)
|
||||
{
|
||||
return list_lru_walk(lru, isolate, cb_arg, sc->nr_to_scan);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KC_LIST_LRU_ADD_OBJ
|
||||
#define list_lru_add_obj list_lru_add
|
||||
#define list_lru_del_obj list_lru_del
|
||||
#endif
|
||||
|
||||
#if defined(KC_LIST_LRU_WALK_CB_LIST_LOCK)
|
||||
#if defined(KC_LIST_LRU_WALK_CB_LIST_LOCK) || defined(KC_LIST_LRU_WALK_CB_ITEM_LOCK)
|
||||
struct list_lru_one;
|
||||
typedef enum lru_status (*kc_list_lru_walk_cb_t)(struct list_head *item, struct list_lru_one *list,
|
||||
void *cb_arg);
|
||||
@@ -207,9 +454,39 @@ unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_contro
|
||||
#define kc_list_lru_shrink_walk list_lru_shrink_walk
|
||||
#endif
|
||||
|
||||
#ifndef KC_TIMER_CONTAINER_OF
|
||||
#define timer_container_of(var, callback_timer, timer_fieldname) \
|
||||
from_timer(var, callback_timer, timer_fieldname)
|
||||
#if defined(KC_LIST_LRU_WALK_CB_ITEM_LOCK)
|
||||
/* isolate moved by hand, nr_items updated in walk as _REMOVE returned */
|
||||
static inline void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
|
||||
struct list_head *head)
|
||||
{
|
||||
list_move(item, head);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KC_STACK_TRACE_SAVE
|
||||
#include <linux/stacktrace.h>
|
||||
static inline unsigned int stack_trace_save(unsigned long *store, unsigned int size,
|
||||
unsigned int skipnr)
|
||||
{
|
||||
struct stack_trace trace = {
|
||||
.entries = store,
|
||||
.max_entries = size,
|
||||
.skip = skipnr,
|
||||
};
|
||||
|
||||
save_stack_trace(&trace);
|
||||
return trace.nr_entries;
|
||||
}
|
||||
|
||||
static inline void stack_trace_print(unsigned long *entries, unsigned int nr_entries, int spaces)
|
||||
{
|
||||
struct stack_trace trace = {
|
||||
.entries = entries,
|
||||
.nr_entries = nr_entries,
|
||||
};
|
||||
|
||||
print_stack_trace(&trace, spaces);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
+14
-9
@@ -813,7 +813,6 @@ int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
|
||||
|
||||
out:
|
||||
if (!lock) {
|
||||
kfree(ireq);
|
||||
ret = scoutfs_client_lock_response(sb, net_id, nl);
|
||||
BUG_ON(ret); /* lock server doesn't fence timed out client requests */
|
||||
}
|
||||
@@ -1094,19 +1093,24 @@ out_unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void scoutfs_lock_get_fs_item_range(u64 ino, struct scoutfs_key *start, struct scoutfs_key *end)
|
||||
{
|
||||
scoutfs_key_set_zeros(start);
|
||||
start->sk_zone = SCOUTFS_FS_ZONE;
|
||||
start->ski_ino = cpu_to_le64(ino & ~(u64)SCOUTFS_LOCK_INODE_GROUP_MASK);
|
||||
|
||||
scoutfs_key_set_ones(end);
|
||||
end->sk_zone = SCOUTFS_FS_ZONE;
|
||||
end->ski_ino = cpu_to_le64(ino | SCOUTFS_LOCK_INODE_GROUP_MASK);
|
||||
}
|
||||
|
||||
int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int flags, u64 ino,
|
||||
struct scoutfs_lock **ret_lock)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
|
||||
scoutfs_key_set_zeros(&start);
|
||||
start.sk_zone = SCOUTFS_FS_ZONE;
|
||||
start.ski_ino = cpu_to_le64(ino & ~(u64)SCOUTFS_LOCK_INODE_GROUP_MASK);
|
||||
|
||||
scoutfs_key_set_ones(&end);
|
||||
end.sk_zone = SCOUTFS_FS_ZONE;
|
||||
end.ski_ino = cpu_to_le64(ino | SCOUTFS_LOCK_INODE_GROUP_MASK);
|
||||
scoutfs_lock_get_fs_item_range(ino, &start, &end);
|
||||
|
||||
return lock_key_range(sb, mode, flags, &start, &end, ret_lock);
|
||||
}
|
||||
@@ -1693,7 +1697,8 @@ int scoutfs_lock_setup(struct super_block *sb)
|
||||
}
|
||||
|
||||
linfo->workq = alloc_workqueue("scoutfs_lock_client_work",
|
||||
WQ_UNBOUND | WQ_HIGHPRI, 0);
|
||||
WQ_NON_REENTRANT | WQ_UNBOUND |
|
||||
WQ_HIGHPRI, 0);
|
||||
if (!linfo->workq) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
|
||||
@@ -65,6 +65,7 @@ int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
|
||||
int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_key *key);
|
||||
|
||||
void scoutfs_lock_get_fs_item_range(u64 ino, struct scoutfs_key *start, struct scoutfs_key *end);
|
||||
int scoutfs_lock_inode(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct inode *inode, struct scoutfs_lock **ret_lock);
|
||||
int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int flags, u64 ino,
|
||||
|
||||
+13
-10
@@ -525,7 +525,7 @@ static int process_response(struct scoutfs_net_connection *conn,
|
||||
struct super_block *sb = conn->sb;
|
||||
struct message_send *msend;
|
||||
scoutfs_net_response_t resp_func = NULL;
|
||||
void *resp_data = NULL;
|
||||
void *resp_data;
|
||||
|
||||
spin_lock(&conn->lock);
|
||||
|
||||
@@ -804,7 +804,7 @@ static void scoutfs_net_recv_worker(struct work_struct *work)
|
||||
if (invalid_message(conn, nh)) {
|
||||
scoutfs_inc_counter(sb, net_recv_invalid_message);
|
||||
ret = -EBADMSG;
|
||||
goto out;
|
||||
break;
|
||||
}
|
||||
|
||||
data_len = le16_to_cpu(nh->data_len);
|
||||
@@ -1113,11 +1113,11 @@ static int sock_opts_and_names(struct super_block *sb,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
|
||||
ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = kernel_getpeername(sock, (struct sockaddr *)&conn->peername);
|
||||
ret = kc_kernel_getpeername(sock, (struct sockaddr *)&conn->peername);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -1218,7 +1218,7 @@ static void scoutfs_net_connect_worker(struct work_struct *work)
|
||||
|
||||
trace_scoutfs_net_connect_work_enter(sb, 0, 0);
|
||||
|
||||
ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
|
||||
ret = kc_sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -1517,7 +1517,8 @@ scoutfs_net_alloc_conn(struct super_block *sb,
|
||||
conn->ordered_proc_wlists = kmalloc_array(nr, sizeof(struct scoutfs_work_list),
|
||||
GFP_NOFS);
|
||||
conn->workq = alloc_workqueue("scoutfs_net_%s",
|
||||
WQ_UNBOUND, 0, name_suffix);
|
||||
WQ_UNBOUND | WQ_NON_REENTRANT, 0,
|
||||
name_suffix);
|
||||
}
|
||||
if (!conn || (info_size && !conn->info) || !conn->workq || !conn->ordered_proc_wlists) {
|
||||
if (conn) {
|
||||
@@ -1629,7 +1630,7 @@ int scoutfs_net_bind(struct super_block *sb,
|
||||
if (WARN_ON_ONCE(conn->sock))
|
||||
return -EINVAL;
|
||||
|
||||
ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
|
||||
ret = kc_sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -1650,7 +1651,7 @@ int scoutfs_net_bind(struct super_block *sb,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
|
||||
ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -2098,9 +2099,11 @@ int scoutfs_net_setup(struct super_block *sb)
|
||||
scoutfs_tseq_tree_init(&ninf->msg_tseq_tree, net_tseq_show_msg);
|
||||
|
||||
ninf->shutdown_workq = alloc_workqueue("scoutfs_net_shutdown",
|
||||
WQ_UNBOUND, 0);
|
||||
WQ_UNBOUND | WQ_NON_REENTRANT,
|
||||
0);
|
||||
ninf->destroy_workq = alloc_workqueue("scoutfs_net_destroy",
|
||||
WQ_UNBOUND, 0);
|
||||
WQ_UNBOUND | WQ_NON_REENTRANT,
|
||||
0);
|
||||
if (!ninf->shutdown_workq || !ninf->destroy_workq) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
|
||||
+3
-2
@@ -183,7 +183,7 @@ static int create_socket(struct super_block *sb)
|
||||
int addrlen;
|
||||
int ret;
|
||||
|
||||
ret = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
|
||||
ret = kc_sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
|
||||
if (ret) {
|
||||
scoutfs_err(sb, "quorum couldn't create udp socket: %d", ret);
|
||||
goto out;
|
||||
@@ -1332,7 +1332,8 @@ int scoutfs_quorum_setup(struct super_block *sb)
|
||||
|
||||
/* a high priority single threaded context without mem reclaim */
|
||||
qinf->workq = alloc_workqueue("scoutfs_quorum_work",
|
||||
WQ_UNBOUND | WQ_HIGHPRI, 1);
|
||||
WQ_NON_REENTRANT | WQ_UNBOUND |
|
||||
WQ_HIGHPRI, 1);
|
||||
if (!qinf->workq) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
|
||||
+10
-13
@@ -1114,7 +1114,6 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
|
||||
goto release;
|
||||
}
|
||||
|
||||
wait_event(qtinf->waitq, !ruleset_is_busy(qtinf));
|
||||
scoutfs_quota_invalidate(sb);
|
||||
ret = 0;
|
||||
|
||||
@@ -1143,17 +1142,12 @@ void scoutfs_quota_get_lock_range(struct scoutfs_key *start, struct scoutfs_key
|
||||
}
|
||||
|
||||
/*
|
||||
* Mark the cached ruleset invalid and free the previous one once readers
|
||||
* drain. Called from cluster lock invalidation and from quota rule
|
||||
* modification.
|
||||
*
|
||||
* Cluster lock invalidation runs only after the lock layer has drained
|
||||
* local READ users. Since EBUSY is set only while a reader holds READ,
|
||||
* the reader has already published by the time we run.
|
||||
*
|
||||
* Quota rule modification waits on the waitq for any in-flight reader
|
||||
* to publish before calling here, so the next check rebuilds against
|
||||
* the newly written rules rather than the reader's stale result.
|
||||
* This is called during cluster lock invalidation to indicate that the
|
||||
* ruleset is no longer protected by cluster locking and might have been
|
||||
* modified. We mark the ruleset invalid and free it once all readers
|
||||
* drain. The next check will acquire the cluster lock and read the
|
||||
* rules. Because this is called during invalidation this is serialized
|
||||
* with write holders of cluster locks so we can never see -EBUSY here.
|
||||
*/
|
||||
void scoutfs_quota_invalidate(struct super_block *sb)
|
||||
{
|
||||
@@ -1167,10 +1161,13 @@ void scoutfs_quota_invalidate(struct super_block *sb)
|
||||
|
||||
spin_lock(&qtinf->lock);
|
||||
rs = rcu_dereference_protected(qtinf->ruleset, lockdep_is_held(&qtinf->lock));
|
||||
if (rs == ERR_PTR(-ENOENT) || !IS_ERR(rs))
|
||||
if (rs != ERR_PTR(-EINVAL))
|
||||
rcu_assign_pointer(qtinf->ruleset, ERR_PTR(-EINVAL));
|
||||
spin_unlock(&qtinf->lock);
|
||||
|
||||
/* cluster locking should have prevented this */
|
||||
BUG_ON(rs == ERR_PTR(-EBUSY));
|
||||
|
||||
if (!IS_ERR(rs))
|
||||
call_rcu(&rs->rcu, free_ruleset_rcu);
|
||||
|
||||
|
||||
+744
@@ -0,0 +1,744 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "key.h"
|
||||
#include "block.h"
|
||||
#include "inode.h"
|
||||
#include "forest.h"
|
||||
#include "client.h"
|
||||
#include "ioctl.h"
|
||||
#include "lock.h"
|
||||
#include "xattr.h"
|
||||
#include "attr_x.h"
|
||||
#include "bsearch_index.h"
|
||||
#include "raw.h"
|
||||
|
||||
struct fs_item {
|
||||
struct list_head head;
|
||||
struct scoutfs_key key;
|
||||
u64 seq;
|
||||
int val_len;
|
||||
bool deletion;
|
||||
/* val is aligned so we can deref structs in vals */
|
||||
u8 val[0] __aligned(ARCH_KMALLOC_MINALIGN);
|
||||
};
|
||||
|
||||
static int save_fs_item(struct list_head *list, struct scoutfs_key *key, u64 seq, u8 flags,
|
||||
void *val, int val_len)
|
||||
{
|
||||
struct fs_item *fsi;
|
||||
|
||||
/* max btree val len is hundreds of bytes */
|
||||
fsi = kmalloc(offsetof(struct fs_item, val[val_len]), GFP_NOFS);
|
||||
if (!fsi)
|
||||
return -ENOMEM;
|
||||
|
||||
fsi->key = *key;
|
||||
fsi->seq = seq;
|
||||
fsi->val_len = val_len;
|
||||
fsi->deletion = !!(flags & SCOUTFS_ITEM_FLAG_DELETION);
|
||||
if (val_len > 0)
|
||||
memcpy(fsi->val, val, val_len);
|
||||
list_add_tail(&fsi->head, list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void free_fs_item(struct fs_item *fsi)
|
||||
{
|
||||
if (!list_empty(&fsi->head))
|
||||
list_del_init(&fsi->head);
|
||||
kfree(fsi);
|
||||
}
|
||||
|
||||
static void free_fs_items(struct list_head *list)
|
||||
{
|
||||
struct fs_item *fsi;
|
||||
struct fs_item *tmp;
|
||||
|
||||
list_for_each_entry_safe(fsi, tmp, list, head)
|
||||
free_fs_item(fsi);
|
||||
}
|
||||
|
||||
static struct fs_item *next_fs_item(struct list_head *list, struct fs_item *fsi)
|
||||
{
|
||||
list_for_each_entry_continue(fsi, list, head)
|
||||
return fsi;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int cmp_fs_items(void *priv, KC_LIST_CMP_CONST struct list_head *A,
|
||||
KC_LIST_CMP_CONST struct list_head *B)
|
||||
{
|
||||
KC_LIST_CMP_CONST struct fs_item *a =
|
||||
container_of(A, KC_LIST_CMP_CONST struct fs_item, head);
|
||||
KC_LIST_CMP_CONST struct fs_item *b =
|
||||
container_of(B, KC_LIST_CMP_CONST struct fs_item, head);
|
||||
|
||||
return scoutfs_key_compare(&a->key, &b->key) ?: -scoutfs_cmp(a->seq, b->seq);
|
||||
}
|
||||
|
||||
static void sort_and_remove(struct list_head *list, struct scoutfs_key *end)
|
||||
{
|
||||
struct fs_item *prev;
|
||||
struct fs_item *fsi;
|
||||
struct fs_item *tmp;
|
||||
|
||||
list_sort(NULL, list, cmp_fs_items);
|
||||
|
||||
/* start by removing any items read before end was decreased by later blocks */
|
||||
list_for_each_entry_safe_reverse(fsi, tmp, list, head) {
|
||||
if (scoutfs_key_compare(&fsi->key, end) > 0)
|
||||
free_fs_item(fsi);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
prev = NULL;
|
||||
list_for_each_entry_safe(fsi, tmp, list, head) {
|
||||
/* remove this item if it's an older version of previous item */
|
||||
if (prev && scoutfs_key_compare(&prev->key, &fsi->key) == 0) {
|
||||
free_fs_item(fsi);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* remove previous deletion item once it has removed all older versions */
|
||||
if (prev && prev->deletion)
|
||||
free_fs_item(prev);
|
||||
|
||||
/* next item might match this, record to compare */
|
||||
prev = fsi;
|
||||
}
|
||||
|
||||
/* remove the last item if it's a deletion */
|
||||
list_for_each_entry_reverse(fsi, list, head) {
|
||||
if (fsi->deletion)
|
||||
free_fs_item(fsi);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
static int save_all_items(struct super_block *sb, struct scoutfs_key *key, u64 seq, u8 flags,
|
||||
void *val, int val_len, int fic, void *arg)
|
||||
{
|
||||
struct list_head *list = arg;
|
||||
|
||||
return save_fs_item(list, key, seq, flags, val, val_len);
|
||||
}
|
||||
|
||||
/* -------------- */
|
||||
|
||||
static void ms_from_key(struct scoutfs_ioctl_meta_seq *ms, struct scoutfs_key *key)
|
||||
{
|
||||
ms->meta_seq = le64_to_cpu(key->skii_major);
|
||||
ms->ino = le64_to_cpu(key->skii_ino);
|
||||
}
|
||||
|
||||
/*
|
||||
* Increment the key's ino->meta_seq so that we don't land between items.
|
||||
*/
|
||||
static void inc_meta_seq(struct scoutfs_key *key)
|
||||
{
|
||||
le64_add_cpu(&key->skii_ino, 1);
|
||||
if (key->skii_ino == 0)
|
||||
le64_add_cpu(&key->skii_major, 1);
|
||||
}
|
||||
|
||||
int scoutfs_raw_read_meta_seq(struct super_block *sb,
|
||||
struct scoutfs_ioctl_raw_read_meta_seq *rms,
|
||||
struct scoutfs_ioctl_meta_seq *last_ret)
|
||||
{
|
||||
struct scoutfs_ioctl_meta_seq __user *ums;
|
||||
struct scoutfs_ioctl_meta_seq ms;
|
||||
struct scoutfs_net_roots roots;
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key end;
|
||||
struct fs_item *fsi;
|
||||
struct fs_item *tmp;
|
||||
LIST_HEAD(list);
|
||||
int retries;
|
||||
int copied;
|
||||
int count;
|
||||
int ret;
|
||||
|
||||
ums = (void __user *)rms->results_ptr;
|
||||
count = rms->results_size / sizeof(struct scoutfs_ioctl_meta_seq);
|
||||
retries = 10;
|
||||
copied = 0;
|
||||
|
||||
scoutfs_inode_init_index_key(&last, SCOUTFS_INODE_INDEX_META_SEQ_TYPE,
|
||||
rms->end.meta_seq, 0, rms->end.ino);
|
||||
|
||||
retry:
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
scoutfs_inode_init_index_key(&key, SCOUTFS_INODE_INDEX_META_SEQ_TYPE,
|
||||
rms->start.meta_seq, 0, rms->start.ino);
|
||||
|
||||
for (;;) {
|
||||
start = key;
|
||||
end = last;
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, 0, &key, NULL, &start, &end,
|
||||
save_all_items, &list);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sort_and_remove(&list, &end);
|
||||
|
||||
list_for_each_entry_safe(fsi, tmp, &list, head) {
|
||||
|
||||
if (copied == count) {
|
||||
/* results are full, set end to before item can't return */
|
||||
end = fsi->key;
|
||||
le64_add_cpu(&end.skii_ino, -1ULL);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ms_from_key(&ms, &fsi->key);
|
||||
if (copy_to_user(&ums[copied], &ms, sizeof(ms))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
free_fs_item(fsi);
|
||||
copied++;
|
||||
}
|
||||
|
||||
if (scoutfs_key_compare(&end, &last) >= 0) {
|
||||
end = last;
|
||||
break;
|
||||
}
|
||||
|
||||
key = end;
|
||||
inc_meta_seq(&key);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
free_fs_items(&list);
|
||||
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
|
||||
if (ret == -ESTALE && copied == 0 && retries-- > 0)
|
||||
goto retry;
|
||||
|
||||
ms_from_key(last_ret, &end);
|
||||
|
||||
return ret ?: copied;
|
||||
}
|
||||
|
||||
/* -------------- */
|
||||
|
||||
struct inode_info_context {
|
||||
size_t nr_inos;
|
||||
u64 *inos;
|
||||
|
||||
size_t nr_names;
|
||||
struct xattr_name {
|
||||
u64 hash;
|
||||
char *name;
|
||||
u8 name_len; /* no null */
|
||||
} *names;
|
||||
|
||||
struct list_head fs_items;
|
||||
};
|
||||
|
||||
static int cmp_u64(const void *A, const void *B)
|
||||
{
|
||||
const u64 *a = A;
|
||||
const u64 *b = B;
|
||||
|
||||
return scoutfs_cmp(*a, *b);
|
||||
}
|
||||
|
||||
static int cmp_name_hash(const void *A, const void *B)
|
||||
{
|
||||
const struct xattr_name *a = A;
|
||||
const struct xattr_name *b = B;
|
||||
|
||||
return scoutfs_cmp(a->hash, b->hash);
|
||||
}
|
||||
|
||||
static int cmp_name_string(const void *A, const void *B)
|
||||
{
|
||||
const struct xattr_name *a = A;
|
||||
const struct xattr_name *b = B;
|
||||
|
||||
return scoutfs_cmp(a->name_len, b->name_len) ?: memcmp(a->name, b->name, a->name_len);
|
||||
}
|
||||
|
||||
static int setup_context(struct inode_info_context *ctx,
|
||||
struct scoutfs_ioctl_raw_read_inode_info *rii)
|
||||
{
|
||||
__u64 __user *uinos = (void __user *)rii->inos_ptr;
|
||||
char __user *uname;
|
||||
long len_null;
|
||||
long len;
|
||||
int ret;
|
||||
u32 i;
|
||||
|
||||
ctx->nr_inos = rii->inos_count;
|
||||
ctx->nr_names = rii->names_count;
|
||||
INIT_LIST_HEAD(&ctx->fs_items);
|
||||
|
||||
ctx->inos = kvmalloc_array(ctx->nr_inos, sizeof(ctx->inos[0]), GFP_KERNEL);
|
||||
ctx->names = kvcalloc(ctx->nr_names, sizeof(ctx->names[0]), GFP_KERNEL);
|
||||
if (!ctx->inos || !ctx->names) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(ctx->inos, uinos, ctx->nr_inos * sizeof(ctx->inos[0]))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* inos must not be 0 and must increase and contain no duplicates */
|
||||
if (ctx->inos[0] == 0) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
for (i = 1; i < ctx->nr_inos; i++) {
|
||||
if (ctx->inos[i] <= ctx->inos[i - 1]) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
uname = (void __user *)rii->names_ptr;
|
||||
for (i = 0; i < ctx->nr_names; i++) {
|
||||
len_null = SCOUTFS_XATTR_MAX_NAME_LEN + 1;
|
||||
ret = strnlen_user(uname, len_null);
|
||||
if (ret <= 1 || ret > len_null) {
|
||||
if (ret >= 0)
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
len_null = ret;
|
||||
len = len_null - 1;
|
||||
|
||||
ctx->names[i].name_len = len;
|
||||
ctx->names[i].name = kmalloc(len_null, GFP_KERNEL);
|
||||
if (!ctx->names[i].name) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = strncpy_from_user(ctx->names[i].name, uname, len_null);
|
||||
if (ret != len) {
|
||||
if (ret >= 0)
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ctx->names[i].hash = scoutfs_xattr_name_hash(ctx->names[i].name, len);
|
||||
uname += len_null;
|
||||
}
|
||||
|
||||
/* make sure all the names differ */
|
||||
sort(ctx->names, ctx->nr_names, sizeof(ctx->names[0]), cmp_name_string, NULL);
|
||||
for (i = 1; i < ctx->nr_names; i++) {
|
||||
if (cmp_name_string(&ctx->names[i - 1], &ctx->names[i]) == 0) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* then leave them sorted by hash */
|
||||
sort(ctx->names, ctx->nr_names, sizeof(ctx->names[0]), cmp_name_hash, NULL);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void free_context(struct inode_info_context *ctx)
|
||||
{
|
||||
int i;
|
||||
|
||||
kvfree(ctx->inos);
|
||||
|
||||
if (ctx->names) {
|
||||
for (i = 0; i < ctx->nr_names; i++) {
|
||||
if (!ctx->names[i].name)
|
||||
break;
|
||||
kfree(ctx->names[i].name);
|
||||
}
|
||||
kvfree(ctx->names);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Iterate over fs items and save any that we're interested in. We want
|
||||
* inode struct items and any xattr items whose hashes collide with the
|
||||
* xattr names we're searching for.
|
||||
*
|
||||
* Our forest calls can be advancing through the key space as we see
|
||||
* slices that intersect with blocks in trees. And each forest caller
|
||||
* can be resetting the key position to the start of each forest block
|
||||
* it reads in an intersection.
|
||||
*
|
||||
* From this callback's perspective, the key can be jumping all over the
|
||||
* place. We don't have any iterative position state. For each key we
|
||||
* decide if we want to save it and then set the key to the next key we
|
||||
* want after the current key. We'll combine all the saved keys later.
|
||||
*/
|
||||
static int save_info_items(struct super_block *sb, struct scoutfs_key *key, u64 seq,
|
||||
u8 flags, void *val, int val_len, int fic, void *arg)
|
||||
{
|
||||
u64 ino = le64_to_cpu(key->_sk_first);
|
||||
struct inode_info_context *ctx = arg;
|
||||
struct xattr_name name;
|
||||
size_t name_ind;
|
||||
size_t ino_ind;
|
||||
bool hash_match;
|
||||
bool ino_match;
|
||||
int ret;
|
||||
|
||||
ino_ind = bsearch_index(&ino, ctx->inos, ctx->nr_inos, sizeof(ctx->inos[0]), cmp_u64);
|
||||
ino_match = ino_ind < ctx->nr_inos && ctx->inos[ino_ind] == ino;
|
||||
|
||||
/* jump to to next ino, could be for this key if we're before the ino struct */
|
||||
if (!ino_match || key->sk_type < SCOUTFS_INODE_TYPE)
|
||||
goto next_inode;
|
||||
|
||||
/* find our search position in xattrs */
|
||||
if (key->sk_type < SCOUTFS_XATTR_TYPE) {
|
||||
name_ind = 0;
|
||||
hash_match = false;
|
||||
|
||||
} else if (key->sk_type == SCOUTFS_XATTR_TYPE) {
|
||||
name = (struct xattr_name) { .hash = le64_to_cpu(key->skx_name_hash) };
|
||||
name_ind = bsearch_index(&name, ctx->names, ctx->nr_names, sizeof(ctx->names[0]),
|
||||
cmp_name_hash);
|
||||
hash_match = name_ind < ctx->nr_names && ctx->names[name_ind].hash == name.hash;
|
||||
} else {
|
||||
name_ind = ctx->nr_names;
|
||||
hash_match = false;
|
||||
}
|
||||
|
||||
/* save inode items for our search and all xattr items that match search hashes */
|
||||
if (key->sk_type == SCOUTFS_INODE_TYPE || hash_match) {
|
||||
ret = save_fs_item(&ctx->fs_items, key, seq, flags, val, val_len);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* let the caller continue iterating through matching xattr items */
|
||||
if (hash_match) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* jump to the next xattr */
|
||||
if (name_ind < ctx->nr_names) {
|
||||
scoutfs_xattr_init_key(key, ino, ctx->names[name_ind].hash, 0);
|
||||
ret = -ESRCH;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* no more xattrs, must be done with this ino */
|
||||
ino_ind++;
|
||||
|
||||
next_inode:
|
||||
/* now jump to next inode struct key, or we're done */
|
||||
if (ino_ind < ctx->nr_inos)
|
||||
scoutfs_inode_init_key(key, ctx->inos[ino_ind]);
|
||||
else
|
||||
scoutfs_key_set_ones(key);
|
||||
|
||||
ret = -ESRCH;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int copy_to_user_off(void __user *dst, size_t *dst_off, size_t dst_size,
|
||||
void *src, size_t copy_size)
|
||||
{
|
||||
if (copy_size == 0)
|
||||
return 0;
|
||||
if (*dst_off + copy_size > dst_size)
|
||||
return -ERANGE;
|
||||
if (copy_to_user(dst + *dst_off, src, copy_size))
|
||||
return -EFAULT;
|
||||
|
||||
*dst_off += copy_size;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int copy_result_to_user(void __user *ures, size_t *off, size_t size, u8 type,
|
||||
void *a_data, size_t a_len, void *b_data, size_t b_len,
|
||||
size_t extra_size)
|
||||
{
|
||||
struct scoutfs_ioctl_raw_read_result res;
|
||||
const size_t szof_res = sizeof(struct scoutfs_ioctl_raw_read_result);
|
||||
|
||||
memzero_explicit(&res, szof_res);
|
||||
res = (struct scoutfs_ioctl_raw_read_result) {
|
||||
.size = a_len + b_len + extra_size,
|
||||
.type = type,
|
||||
};
|
||||
|
||||
return copy_to_user_off(ures, off, size, &res, szof_res) ?:
|
||||
(a_len ? copy_to_user_off(ures, off, size, a_data, a_len) : 0) ?:
|
||||
(b_len ? copy_to_user_off(ures, off, size, b_data, b_len) : 0);
|
||||
}
|
||||
|
||||
static int copy_item_results_to_user(struct super_block *sb, struct inode_info_context *ctx,
|
||||
void __user *ures, size_t *off, size_t size,
|
||||
struct fs_item *fsi)
|
||||
{
|
||||
struct scoutfs_inode *cinode;
|
||||
struct scoutfs_xattr *xat;
|
||||
static char null = '\0';
|
||||
size_t len;
|
||||
u64 ino;
|
||||
int ret = 0;
|
||||
|
||||
if (fsi->key.sk_type == SCOUTFS_INODE_TYPE) {
|
||||
cinode = (void *)fsi->val;
|
||||
ino = le64_to_cpu(fsi->key.ski_ino);
|
||||
|
||||
ret = copy_result_to_user(ures, off, size, SCOUTFS_IOC_RAW_READ_RESULT_INODE,
|
||||
&ino, sizeof(ino), cinode, sizeof(struct scoutfs_inode),
|
||||
0);
|
||||
|
||||
} else if (fsi->key.sk_type == SCOUTFS_XATTR_TYPE) {
|
||||
if (fsi->key.skx_part == 0) {
|
||||
xat = (void *)fsi->val;
|
||||
ret = copy_result_to_user(ures, off, size,
|
||||
SCOUTFS_IOC_RAW_READ_RESULT_XATTR, xat->name,
|
||||
xat->name_len, &null, sizeof(null),
|
||||
le16_to_cpu(xat->val_len));
|
||||
if (ret == 0 && xat->val_len != 0) {
|
||||
/* then append the start of the value */
|
||||
len = fsi->val_len -
|
||||
offsetof(struct scoutfs_xattr, name[xat->name_len]);
|
||||
ret = copy_to_user_off(ures, off, size, xat->name + xat->name_len,
|
||||
len);
|
||||
}
|
||||
} else {
|
||||
/* continue appending partial values */
|
||||
ret = copy_to_user_off(ures, off, size, fsi->val, fsi->val_len);
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool ignore_zero_nlink(struct inode_info_context *ctx, struct fs_item *fsi)
|
||||
{
|
||||
struct scoutfs_inode *cinode = (void *)fsi->val;
|
||||
|
||||
return cinode->nlink == 0;
|
||||
}
|
||||
|
||||
static bool ignore_xattr_name(struct inode_info_context *ctx, struct fs_item *fsi)
|
||||
{
|
||||
struct scoutfs_xattr *xat = (void *)fsi->val;
|
||||
struct xattr_name name = {
|
||||
.hash = le64_to_cpu(fsi->key.skx_name_hash),
|
||||
.name = xat->name,
|
||||
.name_len = xat->name_len,
|
||||
};
|
||||
size_t i;
|
||||
|
||||
for (i = bsearch_index(&name, ctx->names, ctx->nr_names, sizeof(ctx->names[0]),
|
||||
cmp_name_hash);
|
||||
i < ctx->nr_names && name.hash == ctx->names[i].hash; i++) {
|
||||
if (cmp_name_string(&name, &ctx->names[i]) == 0)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static int copy_results_to_user(struct super_block *sb, struct inode_info_context *ctx,
|
||||
struct scoutfs_ioctl_raw_read_inode_info *rii)
|
||||
{
|
||||
void __user *ures = (void __user *)rii->results_ptr;
|
||||
struct scoutfs_xattr *xat;
|
||||
struct fs_item *next;
|
||||
struct fs_item *fsi;
|
||||
struct fs_item *tmp;
|
||||
size_t xattr_end;
|
||||
size_t off;
|
||||
__le64 in_ino;
|
||||
__le64 in_id;
|
||||
int ret;
|
||||
|
||||
in_ino = 0;
|
||||
xattr_end = 0;
|
||||
in_id = 0;
|
||||
off = 0;
|
||||
|
||||
list_for_each_entry_safe(fsi, tmp, &ctx->fs_items, head) {
|
||||
/*
|
||||
* ignore:
|
||||
* - inodes with an nlink of 0
|
||||
* - all items for an ino after the inode struct that we're ignoring
|
||||
* - first xattr parts with a name we don't need
|
||||
* - additional xattr parts when we ignored the first
|
||||
*/
|
||||
if ((fsi->key.sk_type == SCOUTFS_INODE_TYPE && ignore_zero_nlink(ctx, fsi)) ||
|
||||
(fsi->key.sk_type > SCOUTFS_INODE_TYPE && fsi->key._sk_first != in_ino) ||
|
||||
(fsi->key.sk_type == SCOUTFS_XATTR_TYPE &&
|
||||
((fsi->key.skx_part == 0 && ignore_xattr_name(ctx, fsi)) ||
|
||||
(fsi->key.skx_part > 0 && fsi->key.skx_id != in_id)))) {
|
||||
free_fs_item(fsi);
|
||||
in_ino = 0;
|
||||
in_id = 0;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* advance ino/xattr stream context state machine */
|
||||
if (fsi->key.sk_type == SCOUTFS_INODE_TYPE) {
|
||||
in_ino = fsi->key.ski_ino;
|
||||
in_id = 0;
|
||||
} else if (fsi->key.sk_type == SCOUTFS_XATTR_TYPE && fsi->key.skx_part == 0) {
|
||||
in_id = fsi->key.skx_id;
|
||||
/* save the required offset after the complete xattr */
|
||||
xat = (void *)fsi->val;
|
||||
xattr_end = off + sizeof(struct scoutfs_ioctl_raw_read_result) +
|
||||
xat->name_len + 1 + le16_to_cpu(xat->val_len);
|
||||
}
|
||||
|
||||
/* copy results, usually with header, but additional xattr parts copied raw */
|
||||
ret = copy_item_results_to_user(sb, ctx, ures, &off, rii->results_size, fsi);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* make sure we saw all xattr parts and copied the correct size */
|
||||
if (xattr_end > 0 &&
|
||||
!((next = next_fs_item(&ctx->fs_items, fsi)) &&
|
||||
next->key.sk_type == SCOUTFS_XATTR_TYPE && next->key.skx_ino == in_ino &&
|
||||
next->key.skx_id == in_id)) {
|
||||
if (off != xattr_end) {
|
||||
ret = -EUCLEAN;
|
||||
goto out;
|
||||
}
|
||||
xattr_end = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret ?: off;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the key is for an inode we're not interested in, or if its past
|
||||
* the xattr items, then advance to the next inode. This is used
|
||||
* between forest read items calls to avoid leaf blocks. The callback
|
||||
* takes care of iterating through the items for an inode across
|
||||
* multiple leaves.
|
||||
*/
|
||||
static void advance_key_ino(struct scoutfs_key *key, struct inode_info_context *ctx)
|
||||
{
|
||||
u64 ino = le64_to_cpu(key->_sk_first);
|
||||
size_t ino_ind;
|
||||
|
||||
ino_ind = bsearch_index(&ino, ctx->inos, ctx->nr_inos, sizeof(ctx->inos[0]), cmp_u64);
|
||||
if (ino_ind < ctx->nr_inos && ctx->inos[ino_ind] == ino) {
|
||||
if (key->sk_type <= SCOUTFS_XATTR_TYPE)
|
||||
return;
|
||||
else
|
||||
ino_ind++;
|
||||
}
|
||||
|
||||
if (ino_ind < ctx->nr_inos)
|
||||
scoutfs_inode_init_key(key, ctx->inos[ino_ind]);
|
||||
else
|
||||
scoutfs_key_set_ones(key);
|
||||
}
|
||||
|
||||
int scoutfs_raw_read_inode_info(struct super_block *sb,
|
||||
struct scoutfs_ioctl_raw_read_inode_info *rii)
|
||||
{
|
||||
struct inode_info_context ctx = {0, };
|
||||
struct scoutfs_net_roots roots;
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
struct scoutfs_key lock_start;
|
||||
struct scoutfs_key lock_end;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key end;
|
||||
LIST_HEAD(list);
|
||||
int retries = 10;
|
||||
int ret;
|
||||
|
||||
ret = setup_context(&ctx, rii);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (ctx.nr_names > 0)
|
||||
scoutfs_xattr_init_key(&last, ctx.inos[ctx.nr_inos -1],
|
||||
ctx.names[ctx.nr_names - 1].hash, U64_MAX);
|
||||
else
|
||||
scoutfs_inode_init_key(&last, ctx.inos[ctx.nr_inos - 1]);
|
||||
|
||||
retry:
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
scoutfs_inode_init_key(&key, ctx.inos[0]);
|
||||
|
||||
while (scoutfs_key_compare(&key, &last) <= 0) {
|
||||
scoutfs_lock_get_fs_item_range(le64_to_cpu(key._sk_first), &lock_start, &lock_end);
|
||||
|
||||
start = key;
|
||||
end = last;
|
||||
if (scoutfs_key_compare(&lock_end, &end) < 0)
|
||||
end = lock_end;
|
||||
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, 0, &key, &lock_start,
|
||||
&start, &end, save_info_items, &ctx);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* save each sorted batch, might have partial results for an inode */
|
||||
sort_and_remove(&ctx.fs_items, &end);
|
||||
list_splice_tail_init(&ctx.fs_items, &list);
|
||||
|
||||
key = end;
|
||||
if (!scoutfs_key_is_ones(&key)) {
|
||||
scoutfs_key_inc(&key);
|
||||
advance_key_ino(&key, &ctx);
|
||||
}
|
||||
}
|
||||
|
||||
list_splice_tail_init(&list, &ctx.fs_items);
|
||||
ret = copy_results_to_user(sb, &ctx, rii);
|
||||
out:
|
||||
free_fs_items(&list);
|
||||
free_fs_items(&ctx.fs_items);
|
||||
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
|
||||
if (ret == -ESTALE && retries-- > 0)
|
||||
goto retry;
|
||||
|
||||
free_context(&ctx);
|
||||
return ret;
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
#ifndef _SCOUTFS_RAW_H_
|
||||
#define _SCOUTFS_RAW_H_
|
||||
|
||||
int scoutfs_raw_read_meta_seq(struct super_block *sb,
|
||||
struct scoutfs_ioctl_raw_read_meta_seq *rms,
|
||||
struct scoutfs_ioctl_meta_seq *last_ret);
|
||||
int scoutfs_raw_read_inode_info(struct super_block *sb,
|
||||
struct scoutfs_ioctl_raw_read_inode_info *rii);
|
||||
|
||||
#endif
|
||||
+1
-1
@@ -134,7 +134,7 @@ static int recov_finished(struct recov_info *recinf)
|
||||
|
||||
static void timer_callback(struct timer_list *timer)
|
||||
{
|
||||
struct recov_info *recinf = timer_container_of(recinf, timer, timer);
|
||||
struct recov_info *recinf = from_timer(recinf, timer, timer);
|
||||
|
||||
recinf->timeout_fn(recinf->sb);
|
||||
}
|
||||
|
||||
+3
-2
@@ -1077,7 +1077,8 @@ static int next_log_merge_range(struct super_block *sb, struct scoutfs_btree_roo
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
init_log_merge_key(&key, SCOUTFS_LOG_MERGE_RANGE_ZONE, 0, 0);
|
||||
key = *start;
|
||||
key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
|
||||
scoutfs_key_set_ones(&rng->start);
|
||||
|
||||
do {
|
||||
@@ -4751,7 +4752,7 @@ int scoutfs_server_setup(struct super_block *sb)
|
||||
INIT_DELAYED_WORK(&server->reclaim_dwork, reclaim_worker);
|
||||
|
||||
server->wq = alloc_workqueue("scoutfs_server",
|
||||
WQ_UNBOUND, 0);
|
||||
WQ_UNBOUND | WQ_NON_REENTRANT, 0);
|
||||
if (!server->wq) {
|
||||
kfree(server);
|
||||
return -ENOMEM;
|
||||
|
||||
+2
-1
@@ -2392,7 +2392,8 @@ int scoutfs_srch_setup(struct super_block *sb)
|
||||
goto out;
|
||||
|
||||
srinf->workq = alloc_workqueue("scoutfs_srch_compact",
|
||||
WQ_UNBOUND | WQ_HIGHPRI, 0);
|
||||
WQ_NON_REENTRANT | WQ_UNBOUND |
|
||||
WQ_HIGHPRI, 0);
|
||||
if (!srinf->workq) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
|
||||
@@ -46,6 +46,7 @@ static struct scoutfs_tseq_entry *tseq_rb_next(struct scoutfs_tseq_entry *ent)
|
||||
return rb_entry(node, struct scoutfs_tseq_entry, node);
|
||||
}
|
||||
|
||||
#ifdef KC_RB_TREE_AUGMENTED_COMPUTE_MAX
|
||||
static bool tseq_compute_total(struct scoutfs_tseq_entry *ent, bool exit)
|
||||
{
|
||||
loff_t total = 1 + tseq_node_total(ent->node.rb_left) +
|
||||
@@ -60,6 +61,17 @@ static bool tseq_compute_total(struct scoutfs_tseq_entry *ent, bool exit)
|
||||
|
||||
RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
|
||||
node, total, tseq_compute_total);
|
||||
#else
|
||||
|
||||
static loff_t tseq_compute_total(struct scoutfs_tseq_entry *ent)
|
||||
{
|
||||
return 1 + tseq_node_total(ent->node.rb_left) +
|
||||
tseq_node_total(ent->node.rb_right);
|
||||
}
|
||||
|
||||
RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
|
||||
node, loff_t, total, tseq_compute_total);
|
||||
#endif
|
||||
|
||||
void scoutfs_tseq_tree_init(struct scoutfs_tseq_tree *tree,
|
||||
scoutfs_tseq_show_t show)
|
||||
|
||||
+47
-14
@@ -16,7 +16,6 @@
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/posix_acl.h>
|
||||
#include <linux/iversion.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "inode.h"
|
||||
@@ -48,7 +47,7 @@
|
||||
* - add acl support and call generic xattr->handlers for SYSTEM
|
||||
*/
|
||||
|
||||
static u32 xattr_name_hash(const char *name, unsigned int name_len)
|
||||
u32 scoutfs_xattr_name_hash(const char *name, unsigned int name_len)
|
||||
{
|
||||
return crc32c(U32_MAX, name, name_len);
|
||||
}
|
||||
@@ -66,8 +65,7 @@ static unsigned int xattr_nr_parts(struct scoutfs_xattr *xat)
|
||||
le16_to_cpu(xat->val_len));
|
||||
}
|
||||
|
||||
static void init_xattr_key(struct scoutfs_key *key, u64 ino, u32 name_hash,
|
||||
u64 id)
|
||||
void scoutfs_xattr_init_key(struct scoutfs_key *key, u64 ino, u32 name_hash, u64 id)
|
||||
{
|
||||
*key = (struct scoutfs_key) {
|
||||
.sk_zone = SCOUTFS_FS_ZONE,
|
||||
@@ -188,10 +186,10 @@ static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
|
||||
return -EINVAL;
|
||||
|
||||
if (name_len)
|
||||
name_hash = xattr_name_hash(name, name_len);
|
||||
name_hash = scoutfs_xattr_name_hash(name, name_len);
|
||||
|
||||
init_xattr_key(key, scoutfs_ino(inode), name_hash, id);
|
||||
init_xattr_key(&last, scoutfs_ino(inode), U32_MAX, U64_MAX);
|
||||
scoutfs_xattr_init_key(key, scoutfs_ino(inode), name_hash, id);
|
||||
scoutfs_xattr_init_key(&last, scoutfs_ino(inode), U32_MAX, U64_MAX);
|
||||
|
||||
for (;;) {
|
||||
ret = scoutfs_item_next(sb, key, &last, xat, xat_bytes, lock);
|
||||
@@ -336,8 +334,8 @@ static int create_xattr_items(struct inode *inode, u64 id, struct scoutfs_xattr
|
||||
int len;
|
||||
int i;
|
||||
|
||||
init_xattr_key(&key, scoutfs_ino(inode),
|
||||
xattr_name_hash(xat->name, xat->name_len), id);
|
||||
scoutfs_xattr_init_key(&key, scoutfs_ino(inode),
|
||||
scoutfs_xattr_name_hash(xat->name, xat->name_len), id);
|
||||
|
||||
for (i = 0; i < new_parts; i++) {
|
||||
key.skx_part = i;
|
||||
@@ -366,7 +364,7 @@ static int delete_xattr_items(struct inode *inode, u32 name_hash, u64 id,
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
init_xattr_key(&key, scoutfs_ino(inode), name_hash, id);
|
||||
scoutfs_xattr_init_key(&key, scoutfs_ino(inode), name_hash, id);
|
||||
|
||||
/* dirty additional existing old items */
|
||||
for (i = 1; i < nr_parts; i++) {
|
||||
@@ -408,8 +406,8 @@ static int change_xattr_items(struct inode *inode, u64 id,
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
init_xattr_key(&key, scoutfs_ino(inode),
|
||||
xattr_name_hash(xat->name, xat->name_len), id);
|
||||
scoutfs_xattr_init_key(&key, scoutfs_ino(inode),
|
||||
scoutfs_xattr_name_hash(xat->name, xat->name_len), id);
|
||||
|
||||
/* dirty existing old items */
|
||||
for (i = 0; i < old_parts; i++) {
|
||||
@@ -995,17 +993,38 @@ unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
/*
|
||||
* Future kernels have this amazing hack to rewind the name to get the
|
||||
* skipped prefix. We're back in the stone ages without the handler
|
||||
* arg, so we Just Know that this is possible. This will become a
|
||||
* compat hook to either call the kernel's xattr_full_name(handler), or
|
||||
* our hack to use the flags as the prefix length.
|
||||
*/
|
||||
static const char *full_name_hack(const char *name, int len)
|
||||
{
|
||||
return name - len;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int scoutfs_xattr_get_handler
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
(const struct xattr_handler *handler, struct dentry *dentry,
|
||||
struct inode *inode, const char *name, void *value,
|
||||
size_t size)
|
||||
{
|
||||
name = xattr_full_name(handler, name);
|
||||
#else
|
||||
(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size, int handler_flags)
|
||||
{
|
||||
name = full_name_hack(name, handler_flags);
|
||||
#endif
|
||||
return scoutfs_xattr_get(dentry, name, value, size);
|
||||
}
|
||||
|
||||
static int scoutfs_xattr_set_handler
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
(const struct xattr_handler *handler,
|
||||
KC_VFS_NS_DEF
|
||||
struct dentry *dentry,
|
||||
@@ -1013,6 +1032,12 @@ static int scoutfs_xattr_set_handler
|
||||
size_t size, int flags)
|
||||
{
|
||||
name = xattr_full_name(handler, name);
|
||||
#else
|
||||
(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int handler_flags)
|
||||
{
|
||||
name = full_name_hack(name, handler_flags);
|
||||
#endif
|
||||
return scoutfs_xattr_set(dentry, name, value, size, flags);
|
||||
}
|
||||
|
||||
@@ -1045,14 +1070,22 @@ static const struct xattr_handler scoutfs_xattr_security_handler = {
|
||||
};
|
||||
|
||||
static const struct xattr_handler scoutfs_xattr_acl_access_handler = {
|
||||
#ifdef KC_XATTR_HANDLER_NAME
|
||||
.name = XATTR_NAME_POSIX_ACL_ACCESS,
|
||||
#else
|
||||
.prefix = XATTR_NAME_POSIX_ACL_ACCESS,
|
||||
#endif
|
||||
.flags = ACL_TYPE_ACCESS,
|
||||
.get = scoutfs_acl_get_xattr,
|
||||
.set = scoutfs_acl_set_xattr,
|
||||
};
|
||||
|
||||
static const struct xattr_handler scoutfs_xattr_acl_default_handler = {
|
||||
#ifdef KC_XATTR_HANDLER_NAME
|
||||
.name = XATTR_NAME_POSIX_ACL_DEFAULT,
|
||||
#else
|
||||
.prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
|
||||
#endif
|
||||
.flags = ACL_TYPE_DEFAULT,
|
||||
.get = scoutfs_acl_get_xattr,
|
||||
.set = scoutfs_acl_set_xattr,
|
||||
@@ -1190,8 +1223,8 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
goto out;
|
||||
}
|
||||
|
||||
init_xattr_key(&key, ino, 0, 0);
|
||||
init_xattr_key(&last, ino, U32_MAX, U64_MAX);
|
||||
scoutfs_xattr_init_key(&key, ino, 0, 0);
|
||||
scoutfs_xattr_init_key(&last, ino, U32_MAX, U64_MAX);
|
||||
|
||||
for (;;) {
|
||||
ret = scoutfs_item_next(sb, &key, &last, (void *)xat, bytes,
|
||||
|
||||
@@ -10,6 +10,9 @@ struct scoutfs_xattr_prefix_tags {
|
||||
|
||||
extern const struct xattr_handler *scoutfs_xattr_handlers[];
|
||||
|
||||
u32 scoutfs_xattr_name_hash(const char *name, unsigned int name_len);
|
||||
void scoutfs_xattr_init_key(struct scoutfs_key *key, u64 ino, u32 name_hash, u64 id);
|
||||
|
||||
int scoutfs_xattr_get_locked(struct inode *inode, const char *name, void *buffer, size_t size,
|
||||
struct scoutfs_lock *lck);
|
||||
int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_len,
|
||||
|
||||
+1
-1
@@ -12,4 +12,4 @@ src/o_tmpfile_umask
|
||||
src/o_tmpfile_linkat
|
||||
src/mmap_stress
|
||||
src/mmap_validate
|
||||
src/totl-delta-inject
|
||||
src/watch_raw_inode_change
|
||||
|
||||
+1
-1
@@ -16,7 +16,7 @@ BIN := src/createmany \
|
||||
src/o_tmpfile_linkat \
|
||||
src/mmap_stress \
|
||||
src/mmap_validate \
|
||||
src/totl-delta-inject
|
||||
src/watch_raw_inode_change
|
||||
|
||||
DEPS := $(wildcard src/*.d)
|
||||
|
||||
|
||||
@@ -171,13 +171,6 @@ t_filter_dmesg()
|
||||
# orphan log trees reclaim is handled, not an error
|
||||
re="$re|scoutfs .* reclaiming orphan log trees"
|
||||
|
||||
# nfs can emit a whole range of messages we can ignore
|
||||
re="$re|Installing knfsd .*"
|
||||
re="$re|nfsd: .*"
|
||||
re="$re|NFSD: .*"
|
||||
re="$re|RPC: .*"
|
||||
re="$re|FS-Cache: .*"
|
||||
|
||||
# fencing tests force unmounts and trigger timeouts
|
||||
re="$re|scoutfs .* forcing unmount"
|
||||
re="$re|scoutfs .* reconnect timed out"
|
||||
|
||||
@@ -1,32 +0,0 @@
|
||||
== write via NFS, read both sides
|
||||
== POSIX ACL set via NFS, read both sides
|
||||
user::rw-
|
||||
user:22222:rw-
|
||||
group::r--
|
||||
mask::rw-
|
||||
other::r--
|
||||
|
||||
user::rw-
|
||||
user:22222:rw-
|
||||
group::r--
|
||||
mask::rw-
|
||||
other::r--
|
||||
|
||||
== POSIX ACL set on scoutfs, read via NFS
|
||||
user::rw-
|
||||
user:22222:rw-
|
||||
group::r--
|
||||
group:44444:r--
|
||||
mask::rw-
|
||||
other::r--
|
||||
|
||||
== default ACL inheritance via NFS
|
||||
user::rw-
|
||||
user:22222:rwx #effective:rw-
|
||||
group::r-x #effective:r--
|
||||
mask::rw-
|
||||
other::r--
|
||||
|
||||
== NFS read demand-stages a released file
|
||||
1
|
||||
== cleanup
|
||||
@@ -8,10 +8,10 @@
|
||||
/mnt/test/test/data-prealloc/file-1: extents: 32
|
||||
/mnt/test/test/data-prealloc/file-2: extents: 32
|
||||
== any writes to region prealloc get full extents
|
||||
/mnt/test/test/data-prealloc/file-1: extents: 8
|
||||
/mnt/test/test/data-prealloc/file-2: extents: 8
|
||||
/mnt/test/test/data-prealloc/file-1: extents: 8
|
||||
/mnt/test/test/data-prealloc/file-2: extents: 8
|
||||
/mnt/test/test/data-prealloc/file-1: extents: 4
|
||||
/mnt/test/test/data-prealloc/file-2: extents: 4
|
||||
/mnt/test/test/data-prealloc/file-1: extents: 4
|
||||
/mnt/test/test/data-prealloc/file-2: extents: 4
|
||||
== streaming offline writes get full extents either way
|
||||
/mnt/test/test/data-prealloc/file-1: extents: 4
|
||||
/mnt/test/test/data-prealloc/file-2: extents: 4
|
||||
@@ -20,8 +20,8 @@
|
||||
== goofy preallocation amounts work
|
||||
/mnt/test/test/data-prealloc/file-1: extents: 6
|
||||
/mnt/test/test/data-prealloc/file-2: extents: 6
|
||||
/mnt/test/test/data-prealloc/file-1: extents: 10
|
||||
/mnt/test/test/data-prealloc/file-2: extents: 10
|
||||
/mnt/test/test/data-prealloc/file-1: extents: 6
|
||||
/mnt/test/test/data-prealloc/file-2: extents: 6
|
||||
/mnt/test/test/data-prealloc/file-1: extents: 3
|
||||
/mnt/test/test/data-prealloc/file-2: extents: 3
|
||||
== block writes into region allocs hole
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
== ensuring utils and module for old versions
|
||||
== unmounting test fs and removing test module
|
||||
== testing combinations of old and new format versions
|
||||
== restoring test module and mount
|
||||
@@ -1,6 +0,0 @@
|
||||
== setup
|
||||
== concurrent quota mod and check across mounts
|
||||
== verify quota rules are consistent after race
|
||||
== verify file creation still works under quota
|
||||
file visible on mount 1
|
||||
== cleanup
|
||||
@@ -1,10 +0,0 @@
|
||||
== setup three files contributing to totl 8888.0.0
|
||||
== merge baseline into fs_root
|
||||
8888.0.0 = 42, 3
|
||||
== inject (+128, +2) unbalances totl 8888.0.0
|
||||
8888.0.0 = 170, 5
|
||||
== unlink f3 (value 32) produces a -32/-1 delta
|
||||
8888.0.0 = 138, 4
|
||||
== inject (-128, -2) restores accounting for the remaining files
|
||||
8888.0.0 = 10, 2
|
||||
== cleanup
|
||||
+1
-3
@@ -3,7 +3,6 @@ basic-block-counts.sh
|
||||
basic-bad-mounts.sh
|
||||
basic-posix-acl.sh
|
||||
basic-acl-consistency.sh
|
||||
basic-nfs.sh
|
||||
inode-items-updated.sh
|
||||
simple-inode-index.sh
|
||||
simple-staging.sh
|
||||
@@ -19,6 +18,7 @@ offline-extent-waiting.sh
|
||||
move-blocks.sh
|
||||
projects.sh
|
||||
large-fragmented-free.sh
|
||||
format-version-forward-back.sh
|
||||
enospc.sh
|
||||
mmap.sh
|
||||
srch-safe-merge-pos.sh
|
||||
@@ -29,8 +29,6 @@ totl-xattr-tag.sh
|
||||
basic-xattr-indx.sh
|
||||
quota.sh
|
||||
totl-merge-read.sh
|
||||
quota-invalidate-race.sh
|
||||
totl-delta-inject.sh
|
||||
lock-refleak.sh
|
||||
lock-shrink-consistency.sh
|
||||
lock-shrink-read-race.sh
|
||||
|
||||
@@ -1,121 +0,0 @@
|
||||
/*
|
||||
* Test helper that calls SCOUTFS_IOC_INJECT_TOTL_DELTA to seed
|
||||
* arbitrary totl deltas.
|
||||
*
|
||||
* Copyright (C) 2026 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include <inttypes.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/types.h>
|
||||
|
||||
#include "ioctl.h"
|
||||
|
||||
static void usage(const char *prog)
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Usage: %s <mountpoint> <a>.<b>.<c> <total> <count>\n",
|
||||
prog);
|
||||
exit(2);
|
||||
}
|
||||
|
||||
static int parse_s64(const char *s, int64_t *out)
|
||||
{
|
||||
char *end;
|
||||
int64_t v;
|
||||
|
||||
errno = 0;
|
||||
v = strtoll(s, &end, 0);
|
||||
if (errno || *end != '\0' || end == s)
|
||||
return -1;
|
||||
*out = v;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse "<a>.<b>.<c>" into abc[0..2] (skxt_a, skxt_b, skxt_c). Each
|
||||
* component must be a non-empty unsigned base-0 integer.
|
||||
*/
|
||||
static int parse_dotted_name(const char *s, uint64_t abc[3])
|
||||
{
|
||||
const char *p = s;
|
||||
char *end;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
if (*p == '\0' || *p == '.')
|
||||
return -1;
|
||||
errno = 0;
|
||||
abc[i] = strtoull(p, &end, 0);
|
||||
if (errno || end == p)
|
||||
return -1;
|
||||
|
||||
if (i < 2) {
|
||||
if (*end != '.')
|
||||
return -1;
|
||||
p = end + 1;
|
||||
} else {
|
||||
if (*end != '\0')
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct scoutfs_ioctl_inject_totl_delta itd = {{0,}};
|
||||
uint64_t abc[3];
|
||||
int64_t total, count;
|
||||
int fd;
|
||||
int ret;
|
||||
|
||||
if (argc != 5)
|
||||
usage(argv[0]);
|
||||
|
||||
if (parse_dotted_name(argv[2], abc) ||
|
||||
parse_s64(argv[3], &total) ||
|
||||
parse_s64(argv[4], &count)) {
|
||||
fprintf(stderr, "could not parse arguments\n");
|
||||
usage(argv[0]);
|
||||
}
|
||||
|
||||
itd.name[0] = abc[0];
|
||||
itd.name[1] = abc[1];
|
||||
itd.name[2] = abc[2];
|
||||
itd.total = total;
|
||||
itd.count = count;
|
||||
|
||||
fd = open(argv[1], O_RDONLY | O_DIRECTORY);
|
||||
if (fd < 0) {
|
||||
fprintf(stderr, "open(%s): %s\n", argv[1], strerror(errno));
|
||||
return 1;
|
||||
}
|
||||
|
||||
ret = ioctl(fd, SCOUTFS_IOC_INJECT_TOTL_DELTA, &itd);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr,
|
||||
"INJECT_TOTL_DELTA(%" PRIu64 ".%" PRIu64 ".%" PRIu64
|
||||
", total=%" PRId64 ", count=%" PRId64 "): %s\n",
|
||||
abc[0], abc[1], abc[2], total, count, strerror(errno));
|
||||
close(fd);
|
||||
return 1;
|
||||
}
|
||||
|
||||
close(fd);
|
||||
return 0;
|
||||
}
|
||||
@@ -0,0 +1,664 @@
|
||||
/*
|
||||
* Copyright (C) 2026 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <time.h>
|
||||
#include <linux/types.h>
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "../../utils/src/util.h"
|
||||
#include "ioctl.h"
|
||||
#include "format.h"
|
||||
|
||||
/*
|
||||
* This is a quick example of using the raw reading ioctls to get info
|
||||
* on inodes as they change. We maintain an array of meta_seq items for
|
||||
* inodes that we've seen. If we read the current meta_seq items and
|
||||
* see differences then we get inode info and update our array with what
|
||||
* we find.
|
||||
*
|
||||
* This only maintains one array and sorts it back and forth as we walk
|
||||
* the meta_seq items and then search by inode number. This will
|
||||
* eventually use far too much cpu as the number of inodes increases.
|
||||
*/
|
||||
|
||||
#define MSF "%llu.%llu"
|
||||
#define MSA(ms) (ms)->meta_seq, (ms)->ino
|
||||
#define NERRF "nerr %d (\"%s\")"
|
||||
#define NERRA(nerr) nerr, strerror(-nerr)
|
||||
|
||||
#define prerror(fmt, args...) \
|
||||
fprintf(stderr, "error: "fmt"\n", ##args)
|
||||
|
||||
#define prdebug(fmt, args...) \
|
||||
do { \
|
||||
if (opts.debug) \
|
||||
printf(fmt"\n", ##args); \
|
||||
} while (0)
|
||||
|
||||
static struct opts {
|
||||
bool debug;
|
||||
char *path;
|
||||
char *names;
|
||||
size_t names_size;
|
||||
size_t names_count;
|
||||
} opts;
|
||||
|
||||
struct stats {
|
||||
__u64 start;
|
||||
__u64 last;
|
||||
|
||||
struct per_call {
|
||||
__u64 begin;
|
||||
__u64 calls;
|
||||
__u64 time;
|
||||
__u64 inos;
|
||||
} rms, rii;
|
||||
|
||||
__u64 inodes;
|
||||
__u64 add;
|
||||
__u64 remove;
|
||||
__u64 update;
|
||||
|
||||
unsigned lines;
|
||||
} stats;
|
||||
|
||||
struct meta_seq_array {
|
||||
size_t nr;
|
||||
size_t alloc;
|
||||
struct scoutfs_ioctl_meta_seq *ms;
|
||||
};
|
||||
|
||||
#define INO_BATCH 1000
|
||||
/* *2 for gratuitous allowance for struct expansion */
|
||||
#define RESULTS_SIZE (INO_BATCH * 2 * (sizeof(struct scoutfs_ioctl_raw_read_result) + \
|
||||
sizeof(__u64) + \
|
||||
180 /* ~= sizeof(struct scoutfs_inode) */ + \
|
||||
sizeof(struct scoutfs_ioctl_inode_attr_x)))
|
||||
|
||||
#define NSEC_PER_SEC 1000000000
|
||||
|
||||
static __u64 get_ns(void)
|
||||
{
|
||||
struct timespec tp;
|
||||
int ret;
|
||||
|
||||
ret = clock_gettime(CLOCK_MONOTONIC, &tp);
|
||||
if (ret != 0) {
|
||||
ret = -errno;
|
||||
prerror("clock_gettime() error: "NERRF, NERRA(ret));
|
||||
exit(2);
|
||||
}
|
||||
|
||||
return ((__u64)tp.tv_sec * NSEC_PER_SEC) + (__u64)tp.tv_nsec;
|
||||
}
|
||||
static void begin_call(struct per_call *pc)
|
||||
{
|
||||
pc->begin = get_ns();
|
||||
}
|
||||
|
||||
static void end_call(struct per_call *pc)
|
||||
{
|
||||
pc->calls++;
|
||||
pc->time += get_ns() - pc->begin;
|
||||
}
|
||||
|
||||
static int expand_array(struct meta_seq_array *arr, size_t additional)
|
||||
{
|
||||
#define ALLOC_BATCH (1024 * 1024 / (sizeof(struct scoutfs_ioctl_meta_seq)))
|
||||
struct scoutfs_ioctl_meta_seq *ms;
|
||||
size_t expand;
|
||||
|
||||
if (arr->nr + additional <= arr->alloc)
|
||||
return 0;
|
||||
|
||||
expand = arr->alloc + ALLOC_BATCH;
|
||||
ms = reallocarray(arr->ms, expand, sizeof(arr->ms[0]));
|
||||
if (!ms) {
|
||||
prerror("allocating ms array with %zu elements failed", expand);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
arr->alloc = expand;
|
||||
arr->ms = ms;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void inc_ms(struct scoutfs_ioctl_meta_seq *ms)
|
||||
{
|
||||
if (++ms->ino == 0)
|
||||
ms->meta_seq++;
|
||||
}
|
||||
|
||||
static void set_ms(struct scoutfs_ioctl_meta_seq *ms, __u64 meta_seq, __u64 ino)
|
||||
{
|
||||
ms->meta_seq = meta_seq;
|
||||
ms->ino = ino;
|
||||
}
|
||||
|
||||
static int compar_ms_ino(const void *A, const void *B)
|
||||
{
|
||||
const struct scoutfs_ioctl_meta_seq *a = A;
|
||||
const struct scoutfs_ioctl_meta_seq *b = B;
|
||||
|
||||
return a->ino < b->ino ? -1 : a->ino > b->ino ? 1 : 0;
|
||||
}
|
||||
|
||||
static int compar_ms_meta_seq(const void *A, const void *B)
|
||||
{
|
||||
const struct scoutfs_ioctl_meta_seq *a = A;
|
||||
const struct scoutfs_ioctl_meta_seq *b = B;
|
||||
|
||||
return a->meta_seq < b->meta_seq ? -1 : a->meta_seq > b->meta_seq ? 1 :
|
||||
compar_ms_ino(A, B);
|
||||
}
|
||||
|
||||
static int compar_u64(const void *A, const void *B)
|
||||
{
|
||||
const __u64 *a = A;
|
||||
const __u64 *b = B;
|
||||
|
||||
return *a < *b ? -1 : *a > *b ? 1 : 0;
|
||||
}
|
||||
|
||||
struct bsearch_ind_key {
|
||||
int (*compar)(const void *a, const void *b);
|
||||
void *key;
|
||||
size_t size;
|
||||
void **index;
|
||||
};
|
||||
|
||||
static int bsearch_ind_compar(const void *a, const void *b)
|
||||
{
|
||||
const struct bsearch_ind_key *bik = (const void *)((unsigned long)a ^ 1);
|
||||
int cmp;
|
||||
|
||||
/* this key hack only works if compar is always called where a is key and b is &base[..] */
|
||||
assert((unsigned long)a & 1);
|
||||
assert(!((unsigned long)b & 1));
|
||||
|
||||
cmp = bik->compar(bik->key, b);
|
||||
if (cmp > 0)
|
||||
*(bik->index) = (void *)b + bik->size;
|
||||
else
|
||||
*(bik->index) = (void *)b;
|
||||
|
||||
return cmp;
|
||||
}
|
||||
|
||||
static size_t bsearch_ind(const void *key, const void *base, size_t nmemb, size_t size,
|
||||
int (*compar)(const void *a, const void *b))
|
||||
{
|
||||
void *index = (void *)base;
|
||||
struct bsearch_ind_key bik = {
|
||||
.compar = compar,
|
||||
.key = (void *)key,
|
||||
.size = size,
|
||||
.index = &index,
|
||||
};
|
||||
|
||||
bsearch((void *)(((unsigned long)&bik) | 1), base, nmemb, size, bsearch_ind_compar);
|
||||
|
||||
return (index - base) / size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a sorted list of inode numbers for the meta_seq items that
|
||||
* differ between the results from raw_read_meta_seq and the items we
|
||||
* have saved in our array.
|
||||
*/
|
||||
static int differing_inos(__u64 *inos, struct meta_seq_array *arr,
|
||||
struct scoutfs_ioctl_meta_seq *start,
|
||||
struct scoutfs_ioctl_meta_seq *last,
|
||||
struct scoutfs_ioctl_meta_seq *ms, size_t nr)
|
||||
{
|
||||
size_t arr_last;
|
||||
size_t a;
|
||||
size_t m;
|
||||
int nr_inos;
|
||||
int cmp;
|
||||
int i;
|
||||
int n;
|
||||
|
||||
/* find where we're going to stop in arr */
|
||||
arr_last = bsearch_ind(last, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
|
||||
if (arr_last < arr->nr && compar_ms_meta_seq(&arr->ms[arr_last], last) == 0)
|
||||
arr_last++;
|
||||
|
||||
a = bsearch_ind(start, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
|
||||
|
||||
for (m = 0, nr_inos = 0; (a < arr_last || m < nr) && nr_inos < INO_BATCH; ) {
|
||||
|
||||
prdebug("diffing: m %zu nr %zu | a %zu arr_last %zu | nr_inos %d",
|
||||
m, nr, a, arr_last, nr_inos);
|
||||
if (a < arr_last)
|
||||
prdebug(" arr->ms[%zu] = "MSF, a, MSA(&arr->ms[a]));
|
||||
if (m < nr)
|
||||
prdebug(" ms[%zu] = "MSF, m, MSA(&ms[m]));
|
||||
|
||||
/* setup comparison to copy lesser or only */
|
||||
if (a < arr_last && m < nr)
|
||||
cmp = compar_ms_meta_seq(&arr->ms[a], &ms[m]);
|
||||
else if (a < arr_last)
|
||||
cmp = -1;
|
||||
else
|
||||
cmp = 1;
|
||||
|
||||
prdebug(" cmp %d", cmp);
|
||||
|
||||
if (cmp == 0) {
|
||||
/* ignore both when they match */
|
||||
a++;
|
||||
m++;
|
||||
} else if (cmp < 0) {
|
||||
inos[nr_inos++] = arr->ms[a++].ino;
|
||||
} else { /* cmp > 0 */
|
||||
inos[nr_inos++] = ms[m++].ino;
|
||||
}
|
||||
}
|
||||
|
||||
/* if we didn't consume all the read meta_seq then we might need to clamp last */
|
||||
if (m < nr && compar_ms_meta_seq(&ms[m], last) <= 0) {
|
||||
*last = ms[m];
|
||||
last->ino--; /* must be non-zero, can't wrap */
|
||||
}
|
||||
|
||||
/* sort and remove duplicate inode numbers */
|
||||
if (nr_inos > 0) {
|
||||
qsort(inos, nr_inos, sizeof(inos[0]), compar_u64);
|
||||
for (i = 1, n = 1; i < nr_inos; i++) {
|
||||
if (inos[i] != inos[n - 1])
|
||||
inos[n++] = inos[i];
|
||||
}
|
||||
nr_inos = n;
|
||||
}
|
||||
|
||||
return nr_inos;
|
||||
}
|
||||
|
||||
/*
|
||||
* We're not really validating the result stream. We assume that the offset currently
|
||||
* points at an inode. We fill the caller's ms with its info then iterate through
|
||||
* all its results until the next ino.
|
||||
*/
|
||||
static ssize_t read_inode_results(void *buf, size_t off, size_t size,
|
||||
struct scoutfs_ioctl_meta_seq *found)
|
||||
{
|
||||
struct scoutfs_ioctl_raw_read_result res;
|
||||
size_t len;
|
||||
__le64 ms;
|
||||
|
||||
found->ino = 0;
|
||||
|
||||
while (off < size) {
|
||||
memcpy(&res, buf + off, sizeof(res));
|
||||
prdebug("res %u %u", res.type, res.size);
|
||||
|
||||
if (res.type == SCOUTFS_IOC_RAW_READ_RESULT_INODE && found->ino != 0)
|
||||
break;
|
||||
|
||||
off += sizeof(res);
|
||||
|
||||
switch(res.type) {
|
||||
case SCOUTFS_IOC_RAW_READ_RESULT_INODE:
|
||||
memcpy(&found->ino, buf + off, sizeof(__u64));
|
||||
memcpy(&ms, buf + off + sizeof(__u64) +
|
||||
offsetof(struct scoutfs_inode, meta_seq), sizeof(__le64));
|
||||
found->meta_seq = le64_to_cpu(ms);
|
||||
prdebug("res ino %llu ms %llu", found->ino, found->meta_seq);
|
||||
break;
|
||||
|
||||
case SCOUTFS_IOC_RAW_READ_RESULT_XATTR:
|
||||
len = strlen((char *)buf + off) + 1;
|
||||
prdebug("res xattr '%s' len %d: '%.*s'",
|
||||
(char *)buf + off,
|
||||
(int)(res.size - len),
|
||||
(int)(res.size - len),
|
||||
(char *)buf + off + len);
|
||||
break;
|
||||
};
|
||||
off += res.size;
|
||||
}
|
||||
|
||||
return off;
|
||||
}
|
||||
|
||||
/*
|
||||
* inos[] contains the inode numbers that we're interested in. Get
|
||||
* their info and update our array with what we find.
|
||||
*/
|
||||
static int read_inode_info(int fd, void *buf, struct meta_seq_array *arr, __u64 *inos, int nr_inos)
|
||||
{
|
||||
struct scoutfs_ioctl_raw_read_inode_info rii;
|
||||
struct scoutfs_ioctl_meta_seq found;
|
||||
struct scoutfs_ioctl_meta_seq ms;
|
||||
ssize_t off;
|
||||
size_t size;
|
||||
size_t ind;
|
||||
size_t added;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
rii = (struct scoutfs_ioctl_raw_read_inode_info) {
|
||||
.inos_ptr = (unsigned long)inos,
|
||||
.inos_count = nr_inos,
|
||||
.names_ptr = (unsigned long)opts.names,
|
||||
.names_count = opts.names_count,
|
||||
.results_ptr = (unsigned long)buf,
|
||||
.results_size = RESULTS_SIZE,
|
||||
};
|
||||
|
||||
begin_call(&stats.rii);
|
||||
ret = ioctl(fd, SCOUTFS_IOC_RAW_READ_INODE_INFO, &rii);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
prerror("READ_INODE_INFO ioctl failed: "NERRF, NERRA(ret));
|
||||
goto out;
|
||||
}
|
||||
end_call(&stats.rii);
|
||||
|
||||
prdebug("gii ret %d", ret);
|
||||
|
||||
off = 0;
|
||||
size = ret;
|
||||
set_ms(&found, 0, 0);
|
||||
added = 0;
|
||||
i = 0;
|
||||
|
||||
/* sort by ino so we can search by ino for updates */
|
||||
qsort(arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_ino);
|
||||
|
||||
while (i < nr_inos) {
|
||||
/* find next ino */
|
||||
if (!found.ino && off < size) {
|
||||
off = read_inode_results(buf, off, size, &found);
|
||||
if (off < 0) {
|
||||
ret = off;
|
||||
goto out;
|
||||
}
|
||||
stats.rii.inos++;
|
||||
}
|
||||
|
||||
if (i < nr_inos && (!found.ino || inos[i] < found.ino)) {
|
||||
/* delete any record of inodes we didn't find */
|
||||
set_ms(&ms, UINT64_MAX, inos[i]);
|
||||
i++;
|
||||
|
||||
} else if (found.ino) {
|
||||
/* update/add arr to match the found ino */
|
||||
ms = found;
|
||||
if (i < nr_inos && inos[i] == found.ino)
|
||||
i++;
|
||||
set_ms(&found, 0, 0);
|
||||
}
|
||||
|
||||
/* find existing record */
|
||||
ind = bsearch_ind(&ms, arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_ino);
|
||||
if (ind < arr->nr && arr->ms[ind].ino == ms.ino) {
|
||||
/* update existing ino, can be marking for deletion */
|
||||
prdebug("updating arr [%zu] ino %llu ms %llu -> %llu",
|
||||
ind, ms.ino, arr->ms[ind].meta_seq, ms.meta_seq);
|
||||
arr->ms[ind].meta_seq = ms.meta_seq;
|
||||
if (ms.meta_seq == UINT64_MAX)
|
||||
stats.remove++;
|
||||
else
|
||||
stats.update++;
|
||||
|
||||
} else if (ms.meta_seq != UINT64_MAX) {
|
||||
/* append new found, maintaining existing sorting */
|
||||
arr->ms[arr->nr + added] = ms;
|
||||
prdebug("adding arr [%zu] ino %llu ms %llu",
|
||||
arr->nr + added, ms.ino, ms.meta_seq);
|
||||
added++;
|
||||
stats.add++;
|
||||
}
|
||||
}
|
||||
|
||||
/* sort by seq again for next meta seq read */
|
||||
arr->nr += added;
|
||||
qsort(arr->ms, arr->nr, sizeof(arr->ms[0]), compar_ms_meta_seq);
|
||||
|
||||
/* and trim off any deletions */
|
||||
while (arr->nr > 0 && arr->ms[arr->nr - 1].meta_seq == UINT64_MAX)
|
||||
arr->nr--;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static double secs(u64 a_ns, u64 b_ns)
|
||||
{
|
||||
return (double)(a_ns - b_ns) / NSEC_PER_SEC;
|
||||
}
|
||||
|
||||
static double nr_per_sec(u64 nr, __u64 nsec)
|
||||
{
|
||||
if (nsec == 0)
|
||||
return 0;
|
||||
|
||||
return (double)nr / secs(nsec, 0);
|
||||
}
|
||||
|
||||
static void print_stats(void)
|
||||
{
|
||||
u64 now = get_ns();
|
||||
|
||||
if (secs(now, stats.last) < 1.0)
|
||||
return;
|
||||
|
||||
if ((stats.lines++ % 16) == 0) {
|
||||
printf("%6s | %-29s | %-23s | %-23s\n",
|
||||
"", "inodes", "meta_seq", "inode_info");
|
||||
printf("%6s | %8s %6s %6s %6s | %7s %7s %7s | %7s %7s %7s\n",
|
||||
"now",
|
||||
"total", "add", "remove", "update",
|
||||
"calls", "inos", "inos/s",
|
||||
"calls", "inos", "inos/s");
|
||||
}
|
||||
|
||||
printf("%6.3lf | %8llu %6llu %6llu %6llu | %7llu %7llu %7.0lf | %7llu %7llu %7.0lf\n",
|
||||
secs(now, stats.start),
|
||||
stats.inodes, stats.add, stats.remove, stats.update,
|
||||
stats.rms.calls, stats.rms.inos, nr_per_sec(stats.rms.inos, stats.rms.time),
|
||||
stats.rii.calls, stats.rii.inos, nr_per_sec(stats.rms.inos, stats.rii.time));
|
||||
|
||||
stats.last = now;
|
||||
|
||||
{
|
||||
struct stats save = stats;
|
||||
stats = (struct stats) {
|
||||
.start = save.start,
|
||||
.last = save.last,
|
||||
.lines = save.lines,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
static void add_xattr(char *name)
|
||||
{
|
||||
size_t len_null;
|
||||
char *names;
|
||||
int ret;
|
||||
|
||||
len_null = strlen(name) + 1;
|
||||
names = realloc(opts.names, opts.names_size + len_null);
|
||||
if (!names) {
|
||||
ret = -errno;
|
||||
prerror("allocation of xattr names buffer failed: "NERRF, NERRA(ret));
|
||||
exit(3);
|
||||
}
|
||||
|
||||
memcpy(names + opts.names_size, name, len_null);
|
||||
|
||||
opts.names = names;
|
||||
opts.names_size += len_null;
|
||||
opts.names_count++;
|
||||
}
|
||||
|
||||
static bool parse_opts(int argc, char **argv)
|
||||
{
|
||||
bool usage = false;
|
||||
int c;
|
||||
|
||||
opts = (struct opts) {
|
||||
.debug = false,
|
||||
};
|
||||
|
||||
while ((c = getopt(argc, argv, "dp:x:")) != -1) {
|
||||
switch(c) {
|
||||
case 'd':
|
||||
opts.debug = true;
|
||||
break;
|
||||
case 'p':
|
||||
opts.path = strdup(optarg);
|
||||
break;
|
||||
case 'x':
|
||||
add_xattr(optarg);
|
||||
break;
|
||||
case '?':
|
||||
printf("Unknown option '%c'\n", optopt);
|
||||
usage = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!usage) {
|
||||
usage = true;
|
||||
if (!opts.path)
|
||||
printf("need -p path option\n");
|
||||
else
|
||||
usage = false;
|
||||
}
|
||||
|
||||
if (usage) {
|
||||
printf("\nusage:\n"
|
||||
" -d | enable verbose debugging output\n"
|
||||
" -p PATH | path to file system to watch\n"
|
||||
" -x NAME | try to read named xattr with inodes, can be many\n"
|
||||
);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct scoutfs_ioctl_raw_read_meta_seq rms = {0,};
|
||||
struct scoutfs_ioctl_meta_seq *ms;
|
||||
struct meta_seq_array arr = {0,};
|
||||
__u64 *inos = NULL;
|
||||
void *buf = NULL;
|
||||
int fd = -1;
|
||||
int nr_inos;
|
||||
int nr;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
if (!parse_opts(argc, argv))
|
||||
exit(1);
|
||||
|
||||
inos = calloc(INO_BATCH, sizeof(inos[0]));
|
||||
buf = malloc(RESULTS_SIZE);
|
||||
if (!inos || !buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
rms.results_ptr = (unsigned long)buf;
|
||||
rms.results_size = min(RESULTS_SIZE, INO_BATCH * sizeof(struct scoutfs_ioctl_meta_seq));
|
||||
|
||||
fd = open(opts.path, O_RDONLY);
|
||||
if (fd == -1) {
|
||||
perror("error");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
stats.start = get_ns();
|
||||
|
||||
for (;;) {
|
||||
set_ms(&rms.start, 0, 0);
|
||||
set_ms(&rms.end, UINT64_MAX, UINT64_MAX);
|
||||
|
||||
do {
|
||||
begin_call(&stats.rms);
|
||||
ret = ioctl(fd, SCOUTFS_IOC_RAW_READ_META_SEQ, &rms);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
prerror("READ_META_SEQ ioctl failed, "
|
||||
"start "MSF" end "MSF", "NERRF,
|
||||
MSA(&rms.start), MSA(&rms.end), NERRA(ret));
|
||||
goto out;
|
||||
}
|
||||
end_call(&stats.rms);
|
||||
stats.rms.inos += ret;
|
||||
|
||||
prdebug("RMS last "MSF" ret %d:", MSA(&rms.last), ret);
|
||||
|
||||
nr = ret;
|
||||
ms = buf;
|
||||
|
||||
if (opts.debug && nr > 0) {
|
||||
for (i = 0; i < nr; i++)
|
||||
prdebug(" [%u] "MSF"", i, MSA(&ms[i]));
|
||||
}
|
||||
|
||||
nr_inos = differing_inos(inos, &arr, &rms.start, &rms.last, ms, nr);
|
||||
|
||||
if (nr_inos > 0) {
|
||||
prdebug("diff inos %d:", nr_inos);
|
||||
for (i = 0; i < nr_inos; i++)
|
||||
prdebug(" [%u] %llu", i, inos[i]);
|
||||
|
||||
ret = expand_array(&arr, nr_inos) ?:
|
||||
read_inode_info(fd, buf, &arr, inos, nr_inos);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
stats.inodes = arr.nr;
|
||||
print_stats();
|
||||
|
||||
rms.start = rms.last;
|
||||
inc_ms(&rms.start);
|
||||
|
||||
} while (rms.last.meta_seq != UINT64_MAX || rms.last.ino != UINT64_MAX);
|
||||
|
||||
|
||||
sleep(1);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
|
||||
free(inos);
|
||||
free(buf);
|
||||
free(arr.ms);
|
||||
free(opts.names);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
#
|
||||
# Test basic scoutfs-nfs interactions:
|
||||
# - read/write
|
||||
# - stage/release and data wait
|
||||
# - nfs setacl/getacl mapping
|
||||
#
|
||||
|
||||
t_require_commands scoutfs setfacl getfacl exportfs mount.nfs umount \
|
||||
stat dd cmp systemctl
|
||||
|
||||
systemctl start nfs-server >> "$T_TMPDIR/nfs.log" 2>&1 || \
|
||||
t_skip "nfs-server not available"
|
||||
|
||||
# Keep file creation modes deterministic for the ACL golden output.
|
||||
umask 022
|
||||
|
||||
EXPORT_OPTS="rw,async,no_root_squash,no_subtree_check,fsid=42"
|
||||
NFS_MNT="$T_TMP.nfs"
|
||||
NFS_DIR="$NFS_MNT/test/basic-nfs"
|
||||
|
||||
filter() { sed "s@$T_TMPDIR@T_TMPDIR@g" | t_filter_fs; }
|
||||
gf() { getfacl -n --omit-header "$@" 2>/dev/null; }
|
||||
|
||||
teardown_nfs()
|
||||
{
|
||||
(
|
||||
umount "$NFS_MNT"
|
||||
exportfs -u "127.0.0.1:$T_M0"
|
||||
exportfs -f
|
||||
systemctl stop nfs-server
|
||||
rmdir "$NFS_MNT"
|
||||
) >> "$T_TMPDIR/nfs.log" 2>&1
|
||||
}
|
||||
trap teardown_nfs EXIT
|
||||
|
||||
exportfs -u "127.0.0.1:$T_M0" >> "$T_TMPDIR/nfs.log" 2>&1 || true
|
||||
t_quiet mkdir -p "$NFS_MNT"
|
||||
exportfs -o "$EXPORT_OPTS" "127.0.0.1:$T_M0" >> "$T_TMPDIR/nfs.log" 2>&1
|
||||
mount.nfs -o vers=3,noac,actimeo=0 "127.0.0.1:$T_M0" "$NFS_MNT" >> "$T_TMPDIR/nfs.log" 2>&1
|
||||
|
||||
test -d "$NFS_DIR" || t_fail "test dir $NFS_DIR not visible over NFS"
|
||||
|
||||
echo "== write via NFS, read both sides"
|
||||
dd if=/dev/urandom bs=4096 count=1 of="$T_TMP.data" status=none
|
||||
cp "$T_TMP.data" "$NFS_DIR/file"
|
||||
cmp "$T_TMP.data" "$T_D0/file"
|
||||
cmp "$T_TMP.data" "$NFS_DIR/file"
|
||||
|
||||
echo "== POSIX ACL set via NFS, read both sides"
|
||||
setfacl -m u:22222:rw "$NFS_DIR/file" 2>&1 | filter
|
||||
gf "$NFS_DIR/file"
|
||||
gf "$T_D0/file"
|
||||
|
||||
echo "== POSIX ACL set on scoutfs, read via NFS"
|
||||
setfacl -m g:44444:r "$T_D0/file" 2>&1 | filter
|
||||
gf "$NFS_DIR/file"
|
||||
|
||||
echo "== default ACL inheritance via NFS"
|
||||
mkdir "$NFS_DIR/d"
|
||||
setfacl -d -m u:22222:rwx "$NFS_DIR/d" 2>&1 | filter
|
||||
touch "$NFS_DIR/d/child"
|
||||
gf "$NFS_DIR/d/child"
|
||||
|
||||
echo "== NFS read demand-stages a released file"
|
||||
dd if=/dev/urandom bs=4096 count=1 of="$T_TMP.big" status=none
|
||||
cp "$T_TMP.big" "$T_D0/big"
|
||||
sync
|
||||
vers=$(scoutfs stat -s data_version "$T_D0/big")
|
||||
t_quiet scoutfs release "$T_D0/big" -V "$vers" -o 0 -l 4K
|
||||
|
||||
# NFS read against the offline file blocks in scoutfs_read waiting
|
||||
# for the data to come back online.
|
||||
cat "$NFS_DIR/big" > "$T_TMP.read" &
|
||||
read_pid=$!
|
||||
sleep 1
|
||||
scoutfs data-waiting -B 0 -I 0 -p "$T_D0" | wc -l
|
||||
|
||||
t_quiet scoutfs stage "$T_TMP.big" "$T_D0/big" -V "$vers" -o 0 -l 4096
|
||||
wait "$read_pid"
|
||||
cmp "$T_TMP.big" "$T_TMP.read"
|
||||
|
||||
echo "== cleanup"
|
||||
rm -f "$T_D0/file" "$T_D0/big"
|
||||
rm -rf "$T_D0/d"
|
||||
|
||||
t_pass
|
||||
@@ -0,0 +1,184 @@
|
||||
#
|
||||
# Test our basic ability to work with different format versions.
|
||||
#
|
||||
# The current code being tested has a range of supported format
|
||||
# versions. For each of the older supported format versions we have a
|
||||
# git hash of the commit before the next greater version was introduced.
|
||||
# We build versions of the scoutfs utility and kernel module for the
|
||||
# last commit in tree that had a lesser supported version as its max
|
||||
# supported version. We use those binaries to test forward and back
|
||||
# compat as new and old code works with a persistent volume with a given
|
||||
# format version.
|
||||
#
|
||||
|
||||
# not supported on el8 or higher
|
||||
if [ $(source /etc/os-release ; echo ${VERSION_ID:0:1}) -gt 7 ]; then
|
||||
t_skip_permitted "Unsupported OS version"
|
||||
fi
|
||||
|
||||
mount_has_format_version()
|
||||
{
|
||||
local mnt="$1"
|
||||
local vers="$2"
|
||||
local sysfs_fmt_vers="$(t_sysfs_path_from_mnt $SCR)/format_version"
|
||||
|
||||
test "$(cat $sysfs_fmt_vers)" == "$vers"
|
||||
}
|
||||
|
||||
SCR="/mnt/scoutfs.scratch"
|
||||
|
||||
MIN=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_min:"){print $2}')
|
||||
MAX=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_max:"){print $2}')
|
||||
|
||||
echo "min: $MIN max: $MAX" > "$T_TMP.log"
|
||||
|
||||
test "$MIN" -gt 0 -a "$MAX" -gt 0 -a "$MIN" -le "$MAX" || \
|
||||
t_fail "parsed bad versions, min: $MIN max: $MAX"
|
||||
|
||||
test "$MIN" == "$MAX" && \
|
||||
t_skip "only one supported format version: $MIN"
|
||||
|
||||
# prepare dir and wipe any weird old partial state
|
||||
builds="$T_RESULTS/format_version_builds"
|
||||
mkdir -p "$builds"
|
||||
|
||||
echo "== ensuring utils and module for old versions"
|
||||
declare -A commits
|
||||
commits[1]=c3c4b080
|
||||
for vers in $(seq $MIN $((MAX - 1))); do
|
||||
dir="$builds/$vers"
|
||||
platform=$(uname -rp)
|
||||
buildmark="$dir/buildmark"
|
||||
commit="${commits[$vers]}"
|
||||
|
||||
test -n "$commit" || \
|
||||
t_fail "no commit for vers $vers"
|
||||
|
||||
# have our files for this version
|
||||
test "$(cat $buildmark 2>&1)" == "$platform" && \
|
||||
continue
|
||||
|
||||
# build as one big sequence of commands that can return failure
|
||||
(
|
||||
set -o pipefail
|
||||
|
||||
rm -rf $dir &&
|
||||
mkdir -p $dir/building &&
|
||||
cd "$T_TESTS/.." &&
|
||||
git archive --format=tar "$commit" | tar -C "$dir/building" -xf - &&
|
||||
cd - &&
|
||||
find $dir &&
|
||||
make -C "$dir/building" &&
|
||||
mv $dir/building/utils/src/scoutfs $dir &&
|
||||
mv $dir/building/kmod/src/scoutfs.ko $dir &&
|
||||
rm -rf $dir/building &&
|
||||
echo "$platform" > $buildmark &&
|
||||
find $dir &&
|
||||
cat $buildmark
|
||||
) >> "$T_TMP.log" 2>&1 || t_fail "version $vers build failed"
|
||||
done
|
||||
|
||||
echo "== unmounting test fs and removing test module"
|
||||
t_quiet t_umount_all
|
||||
t_quiet rmmod scoutfs
|
||||
|
||||
echo "== testing combinations of old and new format versions"
|
||||
mkdir -p "$SCR"
|
||||
for vers in $(seq $MIN $((MAX - 1))); do
|
||||
old_scoutfs="$builds/$vers/scoutfs"
|
||||
old_module="$builds/$vers/scoutfs.ko"
|
||||
|
||||
echo "mkfs $vers" >> "$T_TMP.log"
|
||||
t_quiet $old_scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" \
|
||||
|| t_fail "mkfs $vers failed"
|
||||
|
||||
echo "mount $vers with $vers" >> "$T_TMP.log"
|
||||
t_quiet insmod $old_module
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$vers"
|
||||
|
||||
echo "creating files in $vers" >> "$T_TMP.log"
|
||||
t_quiet touch "$SCR/file-"{1,2,3}
|
||||
stat "$SCR"/file-* > "$T_TMP.stat" || \
|
||||
t_fail "stat in $vers failed"
|
||||
|
||||
echo "remounting $vers fs with $MAX" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
insmod "$T_MODULE"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$vers"
|
||||
|
||||
echo "verifying stat in $vers with $MAX" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "keep/update/del existing, create new in $vers" >> "$T_TMP.log"
|
||||
t_quiet touch "$SCR/file-2"
|
||||
t_quiet rm -f "$SCR/file-3"
|
||||
t_quiet touch "$SCR/file-4"
|
||||
stat "$SCR"/file-* > "$T_TMP.stat" || \
|
||||
t_fail "stat in $vers failed"
|
||||
|
||||
echo "remounting $vers fs with $vers" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
insmod "$old_module"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$vers"
|
||||
|
||||
echo "verifying stat in $vers with $vers" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "changing format vers to $MAX" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
t_quiet scoutfs change-format-version -F -V $MAX $T_EX_META_DEV "$T_EX_DATA_DEV"
|
||||
|
||||
echo "mount fs $MAX with old $vers should fail" >> "$T_TMP.log"
|
||||
insmod "$old_module"
|
||||
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR" >> "$T_TMP.log" 2>&1
|
||||
if [ "$?" == "0" ]; then
|
||||
umount "$SCR"
|
||||
t_fail "old code ver $vers able to mount new ver $MAX"
|
||||
fi
|
||||
|
||||
echo "remounting $MAX fs with $MAX" >> "$T_TMP.log"
|
||||
rmmod scoutfs
|
||||
insmod "$T_MODULE"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$MAX"
|
||||
|
||||
echo "verifying stat in $MAX with $MAX" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "keep/update/del existing, create new in $MAX" >> "$T_TMP.log"
|
||||
t_quiet touch "$SCR/file-2"
|
||||
t_quiet rm -f "$SCR/file-4"
|
||||
t_quiet touch "$SCR/file-5"
|
||||
stat "$SCR"/file-* > "$T_TMP.stat" || \
|
||||
t_fail "stat in $MAX failed"
|
||||
|
||||
echo "remounting $MAX fs with $MAX again" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$MAX"
|
||||
|
||||
echo "verifying stat in $MAX with $MAX again" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "done with old vers $vers" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
done
|
||||
|
||||
echo "== restoring test module and mount"
|
||||
insmod "$T_MODULE"
|
||||
t_mount_all
|
||||
|
||||
t_pass
|
||||
@@ -1,70 +0,0 @@
|
||||
#
|
||||
# Regression for the BUG_ON in scoutfs_quota_invalidate when a concurrent
|
||||
# ruleset read on one mount races with a quota rule modification.
|
||||
#
|
||||
|
||||
t_require_mounts 2
|
||||
|
||||
TEST_UID=22222
|
||||
SET_UID="--ruid=$TEST_UID --euid=$TEST_UID"
|
||||
|
||||
echo "== setup"
|
||||
mkdir -p "$T_D0/dir"
|
||||
chown --quiet $TEST_UID "$T_D0/dir"
|
||||
|
||||
# totl xattr gives quota checks something to consult
|
||||
setfattr -n scoutfs.totl.test.1.1.1 -v 1 "$T_D0/dir"
|
||||
|
||||
echo "== concurrent quota mod and check across mounts"
|
||||
|
||||
(
|
||||
for i in $(seq 1 20); do
|
||||
scoutfs quota-add -p "$T_M0" \
|
||||
-r "1 1,L,- 1,L,- $i,L,- I 999999 -" 2>/dev/null
|
||||
scoutfs quota-del -p "$T_M0" \
|
||||
-r "1 1,L,- 1,L,- $i,L,- I 999999 -" 2>/dev/null
|
||||
done
|
||||
) &
|
||||
MOD_PID=$!
|
||||
|
||||
# same mount as the mod: races local read against invalidate
|
||||
(
|
||||
for i in $(seq 1 50); do
|
||||
setpriv $SET_UID touch "$T_D0/dir/race0_$i" 2>/dev/null
|
||||
rm -f "$T_D0/dir/race0_$i"
|
||||
done
|
||||
) &
|
||||
CHECK0_PID=$!
|
||||
|
||||
# other mount: drives cross-node lock traffic
|
||||
(
|
||||
for i in $(seq 1 50); do
|
||||
setpriv $SET_UID touch "$T_D1/dir/race1_$i" 2>/dev/null
|
||||
rm -f "$T_D1/dir/race1_$i"
|
||||
done
|
||||
) &
|
||||
CHECK1_PID=$!
|
||||
|
||||
t_quiet wait $MOD_PID
|
||||
t_quiet wait $CHECK0_PID
|
||||
t_quiet wait $CHECK1_PID
|
||||
|
||||
echo "== verify quota rules are consistent after race"
|
||||
scoutfs quota-wipe -p "$T_M0"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
|
||||
echo "== verify file creation still works under quota"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 999999 -"
|
||||
sync
|
||||
echo 1 > $(t_debugfs_path)/drop_weak_item_cache
|
||||
echo 1 > $(t_debugfs_path)/drop_quota_check_cache
|
||||
setpriv $SET_UID touch "$T_D0/dir/verify_file"
|
||||
test -f "$T_D1/dir/verify_file" && echo "file visible on mount 1"
|
||||
rm -f "$T_D0/dir/verify_file"
|
||||
scoutfs quota-wipe -p "$T_M0"
|
||||
|
||||
echo "== cleanup"
|
||||
setfattr -x scoutfs.totl.test.1.1.1 "$T_D0/dir"
|
||||
rm -rf "$T_D0/dir"
|
||||
|
||||
t_pass
|
||||
@@ -1,43 +0,0 @@
|
||||
#
|
||||
# Exercise the SCOUTFS_IOC_INJECT_TOTL_DELTA ioctl that injects totl
|
||||
# deltas directly via totl-delta-inject(1).
|
||||
#
|
||||
|
||||
t_require_commands setfattr scoutfs sync rm touch totl-delta-inject
|
||||
|
||||
# force a log merge then read-xattr-totals filtered to our own keys
|
||||
read_totals()
|
||||
{
|
||||
t_force_log_merge
|
||||
sync
|
||||
echo 1 > $(t_debugfs_path)/drop_weak_item_cache
|
||||
scoutfs read-xattr-totals -p "$T_M0" | \
|
||||
grep -E '^8888\.' || true
|
||||
}
|
||||
|
||||
echo "== setup three files contributing to totl 8888.0.0"
|
||||
touch "$T_D0/f1" "$T_D0/f2" "$T_D0/f3"
|
||||
setfattr -n scoutfs.totl.inj.8888.0.0 -v 2 "$T_D0/f1"
|
||||
setfattr -n scoutfs.totl.inj.8888.0.0 -v 8 "$T_D0/f2"
|
||||
setfattr -n scoutfs.totl.inj.8888.0.0 -v 32 "$T_D0/f3"
|
||||
|
||||
echo "== merge baseline into fs_root"
|
||||
read_totals
|
||||
|
||||
echo "== inject (+128, +2) unbalances totl 8888.0.0"
|
||||
totl-delta-inject "$T_M0" 8888.0.0 128 2
|
||||
read_totals
|
||||
|
||||
echo "== unlink f3 (value 32) produces a -32/-1 delta"
|
||||
rm -f "$T_D0/f3"
|
||||
read_totals
|
||||
|
||||
echo "== inject (-128, -2) restores accounting for the remaining files"
|
||||
totl-delta-inject "$T_M0" 8888.0.0 -128 -2
|
||||
read_totals
|
||||
|
||||
echo "== cleanup"
|
||||
rm -f "$T_D0/f1" "$T_D0/f2"
|
||||
read_totals
|
||||
|
||||
t_pass
|
||||
Reference in New Issue
Block a user