Compare commits

..

1 Commits

Author SHA1 Message Date
Auke Kok e011c80452 Do not fence connections without valid greeting
There is no reason to fence any connection that hasn't sent a valid
greeting, since they haven't progressed far enough for it to make
sense. Skip the fence call for these connections and let the existing
destroy path tear them down. Any real client will reconnect.

server_notify_down() previously treated any zero rid teardown as
the listening socket going down and called stop_server(). That's
correct for the listener legitimately, but not for any conn that
never completed a greeting.

Adds a test that sends a short garbage payload to each quorum port and
verifies that no greeting-less connection remains past the reconnect
timeout (20s) and that the filesystem still works afterwards.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-06 16:44:37 -07:00
35 changed files with 1441 additions and 230 deletions
-14
View File
@@ -1,20 +1,6 @@
Versity ScoutFS Release Notes
=============================
---
v1.32
\
*June 2, 2026*
Fix writing POSIX ACLs over NFS mounts that export the scoutfs
filesystem.
Add support for kernels in the RHEL 9.8 minor release.
Reduce unneeded block allocation when data\_prealloc\_contig\_only was
set to 0. This will help achieve more efficient data space usage when
writing small files.
---
v1.31
\
+283 -9
View File
@@ -6,6 +6,231 @@
ccflags-y += -include $(src)/kernelcompat.h
#
# v3.18-rc2-19-gb5ae6b15bd73
#
# Folds d_materialise_unique into d_splice_alias. Note reversal
# of arguments (Also note Documentation/filesystems/porting.rst)
#
ifneq (,$(shell grep 'd_materialise_unique' include/linux/dcache.h))
ccflags-y += -DKC_D_MATERIALISE_UNIQUE=1
endif
#
# RHEL extended the fop struct so to use it we have to set
# a flag to indicate that the struct is large enough and
# contains the pointer.
#
ifneq (,$(shell grep 'FMODE_KABI_ITERATE' include/linux/fs.h))
ccflags-y += -DKC_FMODE_KABI_ITERATE
endif
#
# v4.7-rc2-23-g0d4d717f2583
#
# Added user_ns argument to posix_acl_valid
#
ifneq (,$(shell grep 'posix_acl_valid.*user_namespace' include/linux/posix_acl.h))
ccflags-y += -DKC_POSIX_ACL_VALID_USER_NS
endif
#
# v5.3-12296-g6d2052d188d9
#
# The RBCOMPUTE function is now passed an extra flag, and should return a bool
# to indicate whether the propagated callback should stop or not.
#
ifneq (,$(shell grep 'static inline bool RBNAME.*_compute_max' include/linux/rbtree_augmented.h))
ccflags-y += -DKC_RB_TREE_AUGMENTED_COMPUTE_MAX
endif
#
# v3.13-25-g37bc15392a23
#
# Renames posix_acl_create to __posix_acl_create and provide some
# new interfaces for creating ACLs
#
ifneq (,$(shell grep '__posix_acl_create' include/linux/posix_acl.h))
ccflags-y += -DKC___POSIX_ACL_CREATE
endif
#
# v4.8-rc1-29-g31051c85b5e2
#
# inode_change_ok() removed - replace with setattr_prepare()
# v5.11-rc4-7-g2f221d6f7b88 removes extern attribute
#
ifneq (,$(shell grep 'int setattr_prepare' include/linux/fs.h))
ccflags-y += -DKC_SETATTR_PREPARE
endif
#
# v4.15-rc3-4-gae5e165d855d
#
# linux/iversion.h needs to manually be included for code that
# manipulates this field.
#
ifneq (,$(shell grep -s 'define _LINUX_IVERSION_H' include/linux/iversion.h))
ccflags-y += -DKC_NEED_LINUX_IVERSION_H=1
endif
# v4.11-12447-g104b4e5139fe
#
# Renamed __percpu_counter_add to percpu_counter_add_batch to clarify
# that the __ wasn't less safe, just took an extra parameter.
#
ifneq (,$(shell grep 'percpu_counter_add_batch' include/linux/percpu_counter.h))
ccflags-y += -DKC_PERCPU_COUNTER_ADD_BATCH
endif
#
# v4.11-4550-g7dea19f9ee63
#
# Introduced memalloc_nofs_{save,restore} preferred instead of _noio_.
#
ifneq (,$(shell grep 'memalloc_nofs_save' include/linux/sched/mm.h))
ccflags-y += -DKC_MEMALLOC_NOFS_SAVE
endif
#
# v4.7-12414-g1eff9d322a44
#
# Renamed bi_rw to bi_opf to force old code to catch up. We use it as a
# single switch between old and new bio structures.
#
ifneq (,$(shell grep 'bi_opf' include/linux/blk_types.h))
ccflags-y += -DKC_BIO_BI_OPF
endif
#
# v4.12-rc2-201-g4e4cbee93d56
#
# Moves to bi_status BLK_STS_ API instead of having a mix of error
# end_io args or bi_error.
#
ifneq (,$(shell grep 'bi_status' include/linux/blk_types.h))
ccflags-y += -DKC_BIO_BI_STATUS
endif
#
# v3.11-8765-ga0b02131c5fc
#
# Remove the old ->shrink() API, ->{scan,count}_objects is preferred.
#
ifneq (,$(shell grep '(*shrink)' include/linux/shrinker.h))
ccflags-y += -DKC_SHRINKER_SHRINK
endif
#
# v3.19-4777-g6bec00352861
#
# backing_dev_info is removed from address_space. Instead we need to use
# inode_to_bdi() inline from <backing-dev.h>.
#
ifneq (,$(shell grep 'struct backing_dev_info.*backing_dev_info' include/linux/fs.h))
ccflags-y += -DKC_LINUX_BACKING_DEV_INFO=1
endif
#
# v4.3-9290-ge409de992e3e
#
# xattr handlers are now passed a struct that contains `flags`
#
ifneq (,$(shell grep 'int...get..const struct xattr_handler.*struct dentry.*dentry,' include/linux/xattr.h))
ccflags-y += -DKC_XATTR_STRUCT_XATTR_HANDLER=1
endif
#
# v4.16-rc1-1-g9b2c45d479d0
#
# kernel_getsockname() and kernel_getpeername dropped addrlen arg
#
ifneq (,$(shell grep 'kernel_getsockname.*,$$' include/linux/net.h))
ccflags-y += -DKC_KERNEL_GETSOCKNAME_ADDRLEN=1
endif
#
# v4.1-rc1-410-geeb1bd5c40ed
#
# Adds a struct net parameter to sock_create_kern
#
ifneq (,$(shell grep 'sock_create_kern.*struct net' include/linux/net.h))
ccflags-y += -DKC_SOCK_CREATE_KERN_NET=1
endif
#
# v4.17-rc6-7-g95582b008388
#
# Kernel has current_time(inode) to uniformly retreive timespec in the right unit
#
ifneq (,$(shell grep 'struct timespec64 current_time' include/linux/fs.h))
ccflags-y += -DKC_CURRENT_TIME_INODE=1
endif
#
# v4.9-12228-g530e9b76ae8f
#
# register_cpu_notifier and family were all removed and to be
# replaced with cpuhp_* API calls.
#
ifneq (,$(shell grep 'define register_hotcpu_notifier' include/linux/cpu.h))
ccflags-y += -DKC_CPU_NOTIFIER
endif
#
# v3.14-rc8-130-gccad2365668f
#
# generic_file_buffered_write is removed, backport it
#
ifneq (,$(shell grep 'extern ssize_t generic_file_buffered_write' include/linux/fs.h))
ccflags-y += -DKC_GENERIC_FILE_BUFFERED_WRITE=1
endif
#
# v5.7-438-g8151b4c8bee4
#
# struct address_space_operations switches away from .readpages to .readahead
#
# RHEL has backported this feature all the way to RHEL8, as part of RHEL_KABI,
# which means we need to detect this very precisely
#
ifneq (,$(shell grep 'readahead.*struct readahead_control' include/linux/fs.h))
ccflags-y += -DKC_FILE_AOPS_READAHEAD
endif
#
# v4.0-rc7-1743-g8436318205b9
#
# .aio_read and .aio_write no longer exist. All reads and writes now use the
# .read_iter and .write_iter methods, or must implement .read and .write (which
# we don't).
#
ifneq (,$(shell grep 'ssize_t.*aio_read' include/linux/fs.h))
ccflags-y += -DKC_LINUX_HAVE_FOP_AIO_READ=1
endif
#
# rhel7 has a custom inode_operations_wrapper struct that is discarded
# entirely in favor of upstream structure since rhel8.
#
ifneq (,$(shell grep 'void.*follow_link.*struct dentry' include/linux/fs.h))
ccflags-y += -DKC_LINUX_HAVE_RHEL_IOPS_WRAPPER=1
endif
ifneq (,$(shell grep 'size_t.*ki_left;' include/linux/aio.h))
ccflags-y += -DKC_LINUX_AIO_KI_LEFT=1
endif
#
# v4.4-rc4-4-g98e9cb5711c6
#
# Introduces a new xattr_handler .name member that can be used to match the
# entire field, instead of just a prefix. For these kernels, we must use
# the new .name field instead.
ifneq (,$(shell grep 'static inline const char .xattr_prefix' include/linux/xattr.h))
ccflags-y += -DKC_XATTR_HANDLER_NAME=1
endif
#
# v5.19-rc4-96-g342a72a33407
#
@@ -109,6 +334,14 @@ ifneq (,$(shell grep 'int tcp_sock_set_keepintvl' include/linux/tcp.h))
ccflags-y += -DKC_HAVE_TCP_SET_SOCKFN
endif
#
# v4.16-rc3-13-ga84d1169164b
#
# Fixes y2038 issues with struct timeval.
ifneq (,$(shell grep -s '^struct __kernel_old_timeval .' include/uapi/linux/time_types.h))
ccflags-y += -DKC_KERNEL_OLD_TIMEVAL_STRUCT
endif
#
# v5.19-rc4-52-ge33c267ab70d
#
@@ -178,6 +411,47 @@ ifneq (,$(shell grep 'struct file.*bdev_file_open_by_path.const char.*path' incl
ccflags-y += -DKC_BDEV_FILE_OPEN_BY_PATH
endif
# v4.0-rc7-1796-gfe0f07d08ee3
#
# direct-io changes modify inode_dio_done to now be called inode_dio_end
ifneq (,$(shell grep 'void inode_dio_end.struct inode' include/linux/fs.h))
ccflags-y += -DKC_INODE_DIO_END
endif
#
# v5.0-6476-g3d3539018d2c
#
# page fault handlers return a bitmask vm_fault_t instead
# Note: el8's header has a slightly modified prefix here
ifneq (,$(shell grep 'typedef.*__bitwise unsigned.*int vm_fault_t' include/linux/mm_types.h))
ccflags-y += -DKC_MM_VM_FAULT_T
endif
# v3.19-499-gd83a08db5ba6
#
# .remap pages becomes obsolete
ifneq (,$(shell grep 'int ..remap_pages..struct vm_area_struct' include/linux/mm.h))
ccflags-y += -DKC_MM_REMAP_PAGES
endif
#
# v3.19-4742-g503c358cf192
#
# list_lru_shrink_count() and list_lru_shrink_walk() introduced
#
ifneq (,$(shell grep 'list_lru_shrink_count.*struct list_lru' include/linux/list_lru.h))
ccflags-y += -DKC_LIST_LRU_SHRINK_COUNT_WALK
endif
#
# v3.19-4757-g3f97b163207c
#
# lru_list_walk_cb lru arg added
#
ifneq (,$(shell grep 'struct list_head \*item, spinlock_t \*lock, void \*cb_arg' include/linux/list_lru.h))
ccflags-y += -DKC_LIST_LRU_WALK_CB_ITEM_LOCK
endif
#
# v6.7-rc4-153-g0a97c01cd20b
#
@@ -196,6 +470,15 @@ ifneq (,$(shell grep 'struct list_lru_one \*list, spinlock_t \*lock, void \*cb_a
ccflags-y += -DKC_LIST_LRU_WALK_CB_LIST_LOCK
endif
#
# v5.1-rc4-273-ge9b98e162aa5
#
# introduce stack trace helpers
#
ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h))
ccflags-y += -DKC_STACK_TRACE_SAVE
endif
#
# v6.1-rc1-2-g138060ba92b3
#
@@ -213,12 +496,3 @@ endif
ifneq (,$(shell grep 'struct posix_acl.*get_inode_acl' include/linux/fs.h))
ccflags-y += -DKC_GET_INODE_ACL
endif
#
# v6.15-13744-g41cb08555c41
#
# from_timer renamed to timer_container_of.
#
ifneq (,$(shell grep 'define timer_container_of' include/linux/timer.h))
ccflags-y += -DKC_TIMER_CONTAINER_OF
endif
+29 -5
View File
@@ -16,7 +16,6 @@
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/iversion.h>
#include "format.h"
#include "super.h"
@@ -70,6 +69,15 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
char *name;
int ret;
#ifndef KC___POSIX_ACL_CREATE
if (!IS_POSIXACL(inode))
return NULL;
acl = get_cached_acl(inode, type);
if (acl != ACL_NOT_CACHED)
return acl;
#endif
ret = acl_xattr_name_len(type, &name, NULL);
if (ret < 0)
return ERR_PTR(ret);
@@ -115,6 +123,11 @@ struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
return ERR_PTR(-ECHILD);
#endif
#ifndef KC___POSIX_ACL_CREATE
if (!IS_POSIXACL(inode))
return NULL;
#endif
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
if (ret < 0) {
acl = ERR_PTR(ret);
@@ -203,8 +216,7 @@ int scoutfs_set_acl(KC_VFS_NS_DEF
{
struct inode *inode = dentry->d_inode;
#else
int scoutfs_set_acl(KC_VFS_NS_DEF
struct inode *inode, struct posix_acl *acl, int type)
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
#endif
struct super_block *sb = inode->i_sb;
@@ -227,11 +239,17 @@ int scoutfs_set_acl(KC_VFS_NS_DEF
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
return ret;
}
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
int scoutfs_acl_get_xattr(const struct xattr_handler *handler, struct dentry *dentry,
struct inode *inode, const char *name, void *value,
size_t size)
{
int type = handler->flags;
#else
int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
int type)
{
#endif
struct posix_acl *acl;
int ret = 0;
@@ -254,6 +272,7 @@ int scoutfs_acl_get_xattr(const struct xattr_handler *handler, struct dentry *de
return ret;
}
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
KC_VFS_NS_DEF
struct dentry *dentry,
@@ -261,6 +280,11 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
size_t size, int flags)
{
int type = handler->flags;
#else
int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
int flags, int type)
{
#endif
struct posix_acl *acl = NULL;
int ret;
@@ -276,7 +300,7 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
return PTR_ERR(acl);
if (acl) {
ret = posix_acl_valid(&init_user_ns, acl);
ret = kc_posix_acl_valid(&init_user_ns, acl);
if (ret)
goto out;
}
@@ -285,7 +309,7 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
#ifdef KC_SET_ACL_DENTRY
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
#else
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry->d_inode, acl, type);
ret = scoutfs_set_acl(dentry->d_inode, acl, type);
#endif
out:
posix_acl_release(acl);
+8 -2
View File
@@ -5,8 +5,7 @@
int scoutfs_set_acl(KC_VFS_NS_DEF
struct dentry *dentry, struct posix_acl *acl, int type);
#else
int scoutfs_set_acl(KC_VFS_NS_DEF
struct inode *inode, struct posix_acl *acl, int type);
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#endif
#ifdef KC_GET_INODE_ACL
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu);
@@ -16,6 +15,7 @@ struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
struct scoutfs_lock *lock, struct list_head *ind_locks);
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
int scoutfs_acl_get_xattr(const struct xattr_handler *, struct dentry *dentry,
struct inode *inode, const char *name, void *value,
size_t size);
@@ -24,6 +24,12 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *,
struct dentry *dentry,
struct inode *inode, const char *name, const void *value,
size_t size, int flags);
#else
int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
int type);
int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
int flags, int type);
#endif
int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr,
struct scoutfs_lock *lock, struct list_head *ind_locks);
int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir,
+9 -9
View File
@@ -444,13 +444,13 @@ static void block_end_io(struct super_block *sb, blk_opf_t opf,
wake_up(&binf->waitq);
}
static void block_bio_end_io(struct bio *bio)
static void KC_DECLARE_BIO_END_IO(block_bio_end_io, struct bio *bio)
{
struct block_private *bp = bio->bi_private;
struct super_block *sb = bp->sb;
TRACE_BLOCK(end_io, bp);
block_end_io(sb, bio->bi_opf, bp, blk_status_to_errno(bio->bi_status));
block_end_io(sb, kc_bio_get_opf(bio), bp, kc_bio_get_errno(bio));
bio_put(bio);
}
@@ -499,7 +499,7 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
break;
}
bio->bi_iter.bi_sector = sector + (off >> 9);
kc_bio_set_sector(bio, sector + (off >> 9));
bio->bi_end_io = block_bio_end_io;
bio->bi_private = bp;
@@ -516,13 +516,13 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
BUG();
if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
submit_bio(bio);
kc_submit_bio(bio);
bio = NULL;
}
}
if (bio)
submit_bio(bio);
kc_submit_bio(bio);
blk_finish_plug(&plug);
@@ -1179,11 +1179,11 @@ struct sm_block_completion {
int err;
};
static void sm_block_bio_end_io(struct bio *bio)
static void KC_DECLARE_BIO_END_IO(sm_block_bio_end_io, struct bio *bio)
{
struct sm_block_completion *sbc = bio->bi_private;
sbc->err = blk_status_to_errno(bio->bi_status);
sbc->err = kc_bio_get_errno(bio);
complete(&sbc->comp);
bio_put(bio);
}
@@ -1236,7 +1236,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, blk_op
goto out;
}
bio->bi_iter.bi_sector = blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9);
kc_bio_set_sector(bio, blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9));
bio->bi_end_io = sm_block_bio_end_io;
bio->bi_private = &sbc;
bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
@@ -1244,7 +1244,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, blk_op
init_completion(&sbc.comp);
sbc.err = 0;
submit_bio(bio);
kc_submit_bio(bio);
wait_for_completion(&sbc.comp);
ret = sbc.err;
+81 -12
View File
@@ -23,7 +23,6 @@
#include <linux/fiemap.h>
#include <linux/writeback.h>
#include <linux/overflow.h>
#include <linux/iversion.h>
#include "format.h"
#include "super.h"
@@ -423,8 +422,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
mutex_lock(&datinf->mutex);
scoutfs_inode_get_onoff(inode, &online, &offline);
/* default to single allocation at the written block */
start = iblock;
count = 1;
@@ -447,6 +444,7 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
* the preallocation size to the number of online
* blocks.
*/
scoutfs_inode_get_onoff(inode, &online, &offline);
if (iblock > 1 && iblock == online) {
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
iblock, 1, &found);
@@ -488,13 +486,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
/* trim count by next extent after iblock */
if (found.len && found.start > start && found.start < start + count)
count = (found.start - start);
/*
* Ramp the aligned region size up proportionally with
* the file's online block count rather than jumping to
* the full prealloc size.
*/
count = max_t(u64, 1, min(count, online));
}
/* overall prealloc limit */
@@ -758,6 +749,54 @@ static int scoutfs_readpage(struct file *file, struct page *page)
return ret;
}
#ifndef KC_FILE_AOPS_READAHEAD
/*
* This is used for opportunistic read-ahead which can throw the pages
* away if it needs to. If the caller didn't deal with offline extents
* then we drop those pages rather than trying to wait. Whoever is
* staging offline extents should be doing it in enormous chunks so that
* read-ahead can ramp up within each staged region. The check for
* offline extents is cheap when the inode has no offline extents.
*/
static int scoutfs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
struct inode *inode = file->f_inode;
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *inode_lock = NULL;
struct page *page;
struct page *tmp;
int ret;
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
if (ret)
goto out;
list_for_each_entry_safe(page, tmp, pages, lru) {
ret = scoutfs_data_wait_check(inode, page_offset(page),
PAGE_SIZE, SEF_OFFLINE,
SCOUTFS_IOC_DWO_READ, NULL,
inode_lock);
if (ret < 0)
goto out;
if (ret > 0) {
list_del(&page->lru);
put_page(page);
if (--nr_pages == 0) {
ret = 0;
goto out;
}
}
}
ret = mpage_readpages(mapping, pages, nr_pages, scoutfs_get_block_read);
out:
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
BUG_ON(!list_empty(pages));
return ret;
}
#else
static void scoutfs_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->file->f_inode;
@@ -779,6 +818,7 @@ static void scoutfs_readahead(struct readahead_control *rac)
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
}
#endif
static int scoutfs_writepage(struct page *page, struct writeback_control *wbc)
{
@@ -1219,7 +1259,7 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
struct data_ext_args from_args;
struct data_ext_args to_args;
struct scoutfs_extent ext;
struct timespec64 cur_time;
struct kc_timespec cur_time;
LIST_HEAD(locks);
bool done = false;
loff_t from_size;
@@ -2015,9 +2055,15 @@ int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock,
return ret;
}
#ifdef KC_MM_VM_FAULT_T
static vm_fault_t scoutfs_data_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
#else
static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
#endif
struct page *page = vmf->page;
struct file *file = vma->vm_file;
struct inode *inode = file_inode(file);
@@ -2159,9 +2205,14 @@ out:
return ret;
}
#ifdef KC_MM_VM_FAULT_T
static vm_fault_t scoutfs_data_filemap_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
#else
static int scoutfs_data_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
#endif
struct file *file = vma->vm_file;
struct inode *inode = file_inode(file);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
@@ -2196,11 +2247,15 @@ retry:
}
}
#ifdef KC_MM_VM_FAULT_T
ret = filemap_fault(vmf);
#else
ret = filemap_fault(vma, vmf);
#endif
out:
if (scoutfs_per_task_del(&si->pt_data_lock, &pt_ent))
inode_dio_end(inode);
kc_inode_dio_end(inode);
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
if (scoutfs_data_wait_found(&dw)) {
err = scoutfs_data_wait(inode, &dw);
@@ -2218,6 +2273,9 @@ out:
static const struct vm_operations_struct scoutfs_data_file_vm_ops = {
.fault = scoutfs_data_filemap_fault,
.page_mkwrite = scoutfs_data_page_mkwrite,
#ifdef KC_MM_REMAP_PAGES
.remap_pages = generic_file_remap_pages,
#endif
};
static int scoutfs_file_mmap(struct file *file, struct vm_area_struct *vma)
@@ -2235,7 +2293,11 @@ const struct address_space_operations scoutfs_file_aops = {
#else
.readpage = scoutfs_readpage,
#endif
#ifndef KC_FILE_AOPS_READAHEAD
.readpages = scoutfs_readpages,
#else
.readahead = scoutfs_readahead,
#endif
.writepage = scoutfs_writepage,
.writepages = scoutfs_writepages,
.write_begin = scoutfs_write_begin,
@@ -2243,10 +2305,17 @@ const struct address_space_operations scoutfs_file_aops = {
};
const struct file_operations scoutfs_file_fops = {
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
.read = do_sync_read,
.write = do_sync_write,
.aio_read = scoutfs_file_aio_read,
.aio_write = scoutfs_file_aio_write,
#else
.read_iter = scoutfs_file_read_iter,
.write_iter = scoutfs_file_write_iter,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
#endif
.mmap = scoutfs_file_mmap,
.unlocked_ioctl = scoutfs_ioctl,
.fsync = scoutfs_file_fsync,
+101 -8
View File
@@ -18,7 +18,6 @@
#include <linux/xattr.h>
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/iversion.h>
#include "format.h"
#include "file.h"
@@ -423,7 +422,18 @@ out:
else
inode = scoutfs_iget(sb, ino, 0, 0);
return d_splice_alias(inode, dentry);
/*
* We can't splice dir aliases into the dcache. dir entries
* might have changed on other nodes so our dcache could still
* contain them, rather than having been moved in rename. For
* dirs, we use d_materialize_unique to remove any existing
* aliases which must be stale. Our inode numbers aren't reused
* so inodes pointed to by entries can't change types.
*/
if (!IS_ERR_OR_NULL(inode) && S_ISDIR(inode->i_mode))
return d_materialise_unique(dentry, inode);
else
return d_splice_alias(inode, dentry);
}
/*
@@ -952,7 +962,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
{
struct super_block *sb = dir->i_sb;
struct inode *inode = dentry->d_inode;
struct timespec64 ts = current_time(inode);
struct kc_timespec ts = current_time(inode);
struct scoutfs_lock *inode_lock = NULL;
struct scoutfs_lock *orph_lock = NULL;
struct scoutfs_lock *dir_lock = NULL;
@@ -1187,6 +1197,24 @@ out:
return path;
}
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
char *path;
path = scoutfs_get_link_target(dentry);
if (!IS_ERR_OR_NULL(path))
nd_set_link(nd, path);
return path;
}
static void scoutfs_put_link(struct dentry *dentry, struct nameidata *nd,
void *cookie)
{
if (!IS_ERR_OR_NULL(cookie))
kfree(cookie);
}
#else
static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
{
char *path;
@@ -1197,6 +1225,7 @@ static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode,
return path;
}
#endif
/*
* Symlink target paths can be annoyingly large. We store relatively
@@ -1606,7 +1635,7 @@ static int scoutfs_rename_common(KC_VFS_NS_DEF
struct scoutfs_lock *orph_lock = NULL;
struct scoutfs_dirent new_dent;
struct scoutfs_dirent old_dent;
struct timespec64 now;
struct kc_timespec now;
bool ins_new = false;
bool del_new = false;
bool ins_old = false;
@@ -1618,9 +1647,6 @@ static int scoutfs_rename_common(KC_VFS_NS_DEF
int ret;
int err;
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
trace_scoutfs_rename(sb, old_dir, old_dentry, new_dir, new_dentry);
old_hash = dirent_name_hash(old_dentry->d_name.name,
@@ -1866,7 +1892,36 @@ out_unlock:
return ret;
}
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
static int scoutfs_rename(struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry)
{
return scoutfs_rename_common(KC_VFS_INIT_NS
old_dir, old_dentry, new_dir, new_dentry, 0);
}
#endif
static int scoutfs_rename2(KC_VFS_NS_DEF
struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
return scoutfs_rename_common(KC_VFS_NS
old_dir, old_dentry, new_dir, new_dentry, flags);
}
#ifdef KC_FMODE_KABI_ITERATE
/* we only need this to set the iterate flag for kabi :/ */
static int scoutfs_dir_open(struct inode *inode, struct file *file)
{
file->f_mode |= FMODE_KABI_ITERATE;
return 0;
}
#endif
static int scoutfs_tmpfile(KC_VFS_NS_DEF
struct inode *dir,
@@ -1936,15 +1991,29 @@ out:
}
const struct inode_operations scoutfs_symlink_iops = {
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.readlink = generic_readlink,
.follow_link = scoutfs_follow_link,
.put_link = scoutfs_put_link,
#else
.get_link = scoutfs_get_link,
#endif
.getattr = scoutfs_getattr,
.setattr = scoutfs_setattr,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.removexattr = generic_removexattr,
#endif
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.tmpfile = scoutfs_tmpfile,
.rename = scoutfs_rename_common,
.symlink = scoutfs_symlink,
@@ -1953,17 +2022,26 @@ const struct inode_operations scoutfs_symlink_iops = {
.mkdir = scoutfs_mkdir,
.create = scoutfs_create,
.lookup = scoutfs_lookup,
#endif
};
const struct file_operations scoutfs_dir_fops = {
.iterate = scoutfs_readdir,
#ifdef KC_FMODE_KABI_ITERATE
.open = scoutfs_dir_open,
#endif
.unlocked_ioctl = scoutfs_ioctl,
.fsync = scoutfs_file_fsync,
.llseek = generic_file_llseek,
};
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
const struct inode_operations_wrapper scoutfs_dir_iops = {
.ops = {
#else
const struct inode_operations scoutfs_dir_iops = {
#endif
.lookup = scoutfs_lookup,
.mknod = scoutfs_mknod,
.create = scoutfs_create,
@@ -1973,15 +2051,30 @@ const struct inode_operations scoutfs_dir_iops = {
.rmdir = scoutfs_unlink,
.getattr = scoutfs_getattr,
.setattr = scoutfs_setattr,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.rename = scoutfs_rename,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_SET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
.symlink = scoutfs_symlink,
.permission = scoutfs_permission,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
},
#endif
.tmpfile = scoutfs_tmpfile,
.rename = scoutfs_rename_common,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.rename2 = scoutfs_rename2,
#else
.rename = scoutfs_rename2,
#endif
};
+4
View File
@@ -5,7 +5,11 @@
#include "lock.h"
extern const struct file_operations scoutfs_dir_fops;
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
extern const struct inode_operations_wrapper scoutfs_dir_iops;
#else
extern const struct inode_operations scoutfs_dir_iops;
#endif
extern const struct inode_operations scoutfs_symlink_iops;
extern const struct dentry_operations scoutfs_dentry_ops;
+3 -2
View File
@@ -222,7 +222,7 @@ static struct attribute *fence_attrs[] = {
static void fence_timeout(struct timer_list *timer)
{
struct pending_fence *fence = timer_container_of(fence, timer, timer);
struct pending_fence *fence = from_timer(fence, timer, timer);
struct super_block *sb = fence->sb;
DECLARE_FENCE_INFO(sb, fi);
@@ -424,7 +424,8 @@ int scoutfs_fence_setup(struct super_block *sb)
goto out;
}
fi->wq = alloc_workqueue("scoutfs_fence", WQ_UNBOUND, 0);
fi->wq = alloc_workqueue("scoutfs_fence",
WQ_UNBOUND | WQ_NON_REENTRANT, 0);
if (!fi->wq) {
ret = -ENOMEM;
goto out;
+128
View File
@@ -30,6 +30,133 @@
#include "omap.h"
#include "quota.h"
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
/*
* Start a high level file read. We check for offline extents in the
* read region here so that we only check the extents once. We use the
* dio count to prevent releasing while we're reading after we've
* checked the extents.
*/
ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *scoutfs_inode_lock = NULL;
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
DECLARE_DATA_WAIT(dw);
int ret;
retry:
/* protect checked extents from release */
inode_lock(inode);
atomic_inc(&inode->i_dio_count);
inode_unlock(inode);
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
if (ret)
goto out;
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos,
SEF_OFFLINE,
SCOUTFS_IOC_DWO_READ,
&dw, scoutfs_inode_lock);
if (ret != 0)
goto out;
} else {
WARN_ON_ONCE(true);
}
ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
out:
inode_dio_done(inode);
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_READ);
if (scoutfs_data_wait_found(&dw)) {
ret = scoutfs_data_wait(inode, &dw);
if (ret == 0)
goto retry;
}
return ret;
}
ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *scoutfs_inode_lock = NULL;
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
DECLARE_DATA_WAIT(dw);
int ret;
if (iocb->ki_left == 0) /* Does this even happen? */
return 0;
retry:
inode_lock(inode);
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
if (ret)
goto out;
ret = scoutfs_inode_check_retention(inode);
if (ret < 0)
goto out;
ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock);
if (ret)
goto out;
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
/* data_version is per inode, whole file must be online */
ret = scoutfs_data_wait_check(inode, 0, i_size_read(inode),
SEF_OFFLINE,
SCOUTFS_IOC_DWO_WRITE,
&dw, scoutfs_inode_lock);
if (ret != 0)
goto out;
}
ret = scoutfs_quota_check_data(sb, inode);
if (ret)
goto out;
/* XXX: remove SUID bit */
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
out:
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_WRITE);
inode_unlock(inode);
if (scoutfs_data_wait_found(&dw)) {
ret = scoutfs_data_wait(inode, &dw);
if (ret == 0)
goto retry;
}
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
err = generic_write_sync(file, pos, ret);
if (err < 0 && ret > 0)
ret = err;
}
return ret;
}
#else
ssize_t scoutfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
@@ -138,6 +265,7 @@ out:
return ret;
}
#endif
int scoutfs_permission(KC_VFS_NS_DEF
struct inode *inode, int mask)
+7
View File
@@ -1,8 +1,15 @@
#ifndef _SCOUTFS_FILE_H_
#define _SCOUTFS_FILE_H_
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
#else
ssize_t scoutfs_file_read_iter(struct kiocb *, struct iov_iter *);
ssize_t scoutfs_file_write_iter(struct kiocb *, struct iov_iter *);
#endif
int scoutfs_permission(KC_VFS_NS_DEF
struct inode *inode, int mask);
loff_t scoutfs_file_llseek(struct file *file, loff_t offset, int whence);
+1 -1
View File
@@ -783,7 +783,7 @@ int scoutfs_forest_setup(struct super_block *sb)
scoutfs_forest_log_merge_worker);
sbi->forest_info = finf;
finf->workq = alloc_workqueue("scoutfs_log_merge",
finf->workq = alloc_workqueue("scoutfs_log_merge", WQ_NON_REENTRANT |
WQ_UNBOUND | WQ_HIGHPRI, 0);
if (!finf->workq) {
ret = -ENOMEM;
+26 -1
View File
@@ -21,7 +21,6 @@
#include <linux/list_sort.h>
#include <linux/workqueue.h>
#include <linux/buffer_head.h>
#include <linux/iversion.h>
#include "format.h"
#include "super.h"
@@ -144,26 +143,40 @@ void scoutfs_destroy_inode(struct inode *inode)
static const struct inode_operations scoutfs_file_iops = {
.getattr = scoutfs_getattr,
.setattr = scoutfs_setattr,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_SET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
.fiemap = scoutfs_data_fiemap,
};
static const struct inode_operations scoutfs_special_iops = {
.getattr = scoutfs_getattr,
.setattr = scoutfs_setattr,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_SET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
};
/*
@@ -179,7 +192,12 @@ static void set_inode_ops(struct inode *inode)
inode->i_fop = &scoutfs_file_fops;
break;
case S_IFDIR:
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
inode->i_op = &scoutfs_dir_iops.ops;
inode->i_flags |= S_IOPS_WRAPPER;
#else
inode->i_op = &scoutfs_dir_iops;
#endif
inode->i_fop = &scoutfs_dir_fops;
break;
case S_IFLNK:
@@ -363,11 +381,18 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock)
return ret;
}
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
#else
int scoutfs_getattr(KC_VFS_NS_DEF
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
#endif
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *lock = NULL;
int ret;
+6 -1
View File
@@ -23,7 +23,7 @@ struct scoutfs_inode_info {
u64 offline_blocks;
u64 proj;
u32 flags;
struct timespec64 crtime;
struct kc_timespec crtime;
/*
* Protects per-inode extent items, most particularly readers
@@ -131,9 +131,14 @@ int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock);
int scoutfs_inode_check_retention(struct inode *inode);
int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock);
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
#else
int scoutfs_getattr(KC_VFS_NS_DEF
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags);
#endif
int scoutfs_setattr(KC_VFS_NS_DEF
struct dentry *dentry, struct iattr *attr);
+6 -4
View File
@@ -489,7 +489,6 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
struct scoutfs_lock *lock = NULL;
struct kiocb kiocb;
struct iovec iov;
struct iov_iter iter;
size_t written;
loff_t end_size;
loff_t isize;
@@ -515,6 +514,10 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
/* the iocb is really only used for the file pointer :P */
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = args.offset;
#ifdef KC_LINUX_AIO_KI_LEFT
kiocb.ki_left = args.length;
kiocb.ki_nbytes = args.length;
#endif
iov.iov_base = (void __user *)(unsigned long)args.buf_ptr;
iov.iov_len = args.length;
@@ -556,9 +559,8 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
pos = args.offset;
written = 0;
do {
iov_iter_init(&iter, WRITE, &iov, 1, args.length);
ret = kc_generic_perform_write(&kiocb, &iter, pos);
ret = generic_file_buffered_write(&kiocb, &iov, 1, pos, &pos,
args.length, written);
BUG_ON(ret == -EIOCBQUEUED);
if (ret > 0)
written += ret;
+26
View File
@@ -79,6 +79,9 @@ struct item_cache_info {
struct super_block *sb;
struct item_percpu_pages __percpu *pcpu_pages;
KC_DEFINE_SHRINKER(shrinker);
#ifdef KC_CPU_NOTIFIER
struct notifier_block notifier;
#endif
/* often walked, but per-cpu refs are fast path */
rwlock_t rwlock;
@@ -2581,6 +2584,22 @@ static unsigned long item_cache_scan_objects(struct shrinker *shrink,
return freed;
}
#ifdef KC_CPU_NOTIFIER
static int item_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
struct item_cache_info *cinf = container_of(nfb,
struct item_cache_info,
notifier);
struct super_block *sb = cinf->sb;
unsigned long cpu = (unsigned long)hcpu;
if (action == CPU_DEAD)
drop_pcpu_pages(sb, cinf, cpu);
return NOTIFY_OK;
}
#endif
int scoutfs_item_setup(struct super_block *sb)
{
@@ -2611,6 +2630,10 @@ int scoutfs_item_setup(struct super_block *sb)
KC_INIT_SHRINKER_FUNCS(&cinf->shrinker, item_cache_count_objects,
item_cache_scan_objects);
KC_REGISTER_SHRINKER(&cinf->shrinker, "scoutfs-item:" SCSBF, SCSB_ARGS(sb));
#ifdef KC_CPU_NOTIFIER
cinf->notifier.notifier_call = item_cpu_callback;
register_hotcpu_notifier(&cinf->notifier);
#endif
sbi->item_cache_info = cinf;
return 0;
@@ -2628,6 +2651,9 @@ void scoutfs_item_destroy(struct super_block *sb)
int cpu;
if (cinf) {
#ifdef KC_CPU_NOTIFIER
unregister_hotcpu_notifier(&cinf->notifier);
#endif
KC_UNREGISTER_SHRINKER(&cinf->shrinker);
for_each_possible_cpu(cpu)
+111
View File
@@ -3,8 +3,119 @@
#include "kernelcompat.h"
#ifdef KC_SHRINKER_SHRINK
#include <linux/shrinker.h>
/*
* If a target doesn't have that .{count,scan}_objects() interface then
* we have a .shrink() helper that performs the shrink work in terms of
* count/scan.
*/
int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc)
{
struct kc_shrinker_wrapper *wrapper = container_of(shrink, struct kc_shrinker_wrapper, shrink);
unsigned long nr;
unsigned long rc;
if (sc->nr_to_scan != 0) {
rc = wrapper->scan_objects(shrink, sc);
/* translate magic values to the equivalent for older kernels */
if (rc == SHRINK_STOP)
return -1;
else if (rc == SHRINK_EMPTY)
return 0;
}
nr = wrapper->count_objects(shrink, sc);
return min_t(unsigned long, nr, INT_MAX);
}
#endif
#ifndef KC_CURRENT_TIME_INODE
struct timespec64 kc_current_time(struct inode *inode)
{
struct timespec64 now;
unsigned gran;
getnstimeofday64(&now);
if (unlikely(!inode->i_sb)) {
WARN(1, "current_time() called with uninitialized super_block in the inode");
return now;
}
gran = inode->i_sb->s_time_gran;
/* Avoid division in the common cases 1 ns and 1 s. */
if (gran == 1) {
/* nothing */
} else if (gran == NSEC_PER_SEC) {
now.tv_nsec = 0;
} else if (gran > 1 && gran < NSEC_PER_SEC) {
now.tv_nsec -= now.tv_nsec % gran;
} else {
WARN(1, "illegal file time granularity: %u", gran);
}
return now;
}
#endif
#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
ssize_t
kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos, loff_t *ppos,
size_t count, ssize_t written)
{
ssize_t status;
struct iov_iter i;
iov_iter_init(&i, WRITE, iov, nr_segs, count);
status = kc_generic_perform_write(iocb, &i, pos);
if (likely(status >= 0)) {
written += status;
*ppos = pos + status;
}
return written ? written : status;
}
#endif
#include <linux/list_lru.h>
#ifdef KC_LIST_LRU_WALK_CB_ITEM_LOCK
static enum lru_status kc_isolate(struct list_head *item, spinlock_t *lock, void *cb_arg)
{
struct kc_isolate_args *args = cb_arg;
/* isolate doesn't use list, nr_items updated in caller */
return args->isolate(item, NULL, args->cb_arg);
}
unsigned long kc_list_lru_walk(struct list_lru *lru, kc_list_lru_walk_cb_t isolate, void *cb_arg,
unsigned long nr_to_walk)
{
struct kc_isolate_args args = {
.isolate = isolate,
.cb_arg = cb_arg,
};
return list_lru_walk(lru, kc_isolate, &args, nr_to_walk);
}
unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
kc_list_lru_walk_cb_t isolate, void *cb_arg)
{
struct kc_isolate_args args = {
.isolate = isolate,
.cb_arg = cb_arg,
};
return list_lru_shrink_walk(lru, sc, kc_isolate, &args);
}
#endif
#ifdef KC_LIST_LRU_WALK_CB_LIST_LOCK
static enum lru_status kc_isolate(struct list_head *item, struct list_lru_one *list,
spinlock_t *lock, void *cb_arg)
+282 -5
View File
@@ -4,6 +4,146 @@
#include <linux/kernel.h>
#include <linux/fs.h>
/*
* v4.15-rc3-4-gae5e165d855d
*
* new API for handling inode->i_version. This forces us to
* include this API where we need. We include it here for
* convenience instead of where it's needed.
*/
#ifdef KC_NEED_LINUX_IVERSION_H
#include <linux/iversion.h>
#else
/*
* Kernels before above version will need to fall back to
* manipulating inode->i_version as previous with degraded
* methods.
*/
#define inode_set_iversion_queried(inode, val) \
do { \
(inode)->i_version = val; \
} while (0)
#define inode_peek_iversion(inode) \
({ \
(inode)->i_version; \
})
#endif
#ifdef KC_POSIX_ACL_VALID_USER_NS
#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(user_ns, acl)
#else
#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(acl)
#endif
/*
* v3.6-rc1-24-gdbf2576e37da
*
* All workqueues are now non-reentrant, and the bit flag is removed
* shortly after its uses were removed.
*/
#ifndef WQ_NON_REENTRANT
#define WQ_NON_REENTRANT 0
#endif
/*
* v3.18-rc2-19-gb5ae6b15bd73
*
* Folds d_materialise_unique into d_splice_alias. Note reversal
* of arguments (Also note Documentation/filesystems/porting.rst)
*/
#ifndef KC_D_MATERIALISE_UNIQUE
#define d_materialise_unique(dentry, inode) d_splice_alias(inode, dentry)
#endif
/*
* v4.8-rc1-29-g31051c85b5e2
*
* fall back to inode_change_ok() if setattr_prepare() isn't available
*/
#ifndef KC_SETATTR_PREPARE
#define setattr_prepare(dentry, attr) inode_change_ok(d_inode(dentry), attr)
#endif
#ifndef KC___POSIX_ACL_CREATE
#define __posix_acl_create posix_acl_create
#define __posix_acl_chmod posix_acl_chmod
#endif
#ifndef KC_PERCPU_COUNTER_ADD_BATCH
#define percpu_counter_add_batch __percpu_counter_add
#endif
#ifndef KC_MEMALLOC_NOFS_SAVE
#define memalloc_nofs_save memalloc_noio_save
#define memalloc_nofs_restore memalloc_noio_restore
#endif
#ifdef KC_BIO_BI_OPF
#define kc_bio_get_opf(bio) \
({ \
(bio)->bi_opf; \
})
#define kc_bio_set_opf(bio, opf) \
do { \
(bio)->bi_opf = opf; \
} while (0)
#define kc_bio_set_sector(bio, sect) \
do { \
(bio)->bi_iter.bi_sector = sect;\
} while (0)
#define kc_submit_bio(bio) submit_bio(bio)
#else
#define kc_bio_get_opf(bio) \
({ \
(bio)->bi_rw; \
})
#define kc_bio_set_opf(bio, opf) \
do { \
(bio)->bi_rw = opf; \
} while (0)
#define kc_bio_set_sector(bio, sect) \
do { \
(bio)->bi_sector = sect; \
} while (0)
#define kc_submit_bio(bio) \
do { \
submit_bio((bio)->bi_rw, bio); \
} while (0)
#define bio_set_dev(bio, bdev) \
do { \
(bio)->bi_bdev = (bdev); \
} while (0)
#endif
#ifdef KC_BIO_BI_STATUS
#define KC_DECLARE_BIO_END_IO(name, bio) name(bio)
#define kc_bio_get_errno(bio) ({ blk_status_to_errno((bio)->bi_status); })
#else
#define KC_DECLARE_BIO_END_IO(name, bio) name(bio, int _error_arg)
#define kc_bio_get_errno(bio) ({ (int)((void)(bio), _error_arg); })
#endif
/*
* v4.13-rc1-6-ge462ec50cb5f
*
* MS_* (mount) flags from <linux/mount.h> should not be used in the kernel
* anymore from 4.x onwards. Instead, we need to use the SB_* (superblock) flags
*/
#ifndef SB_POSIXACL
#define SB_POSIXACL MS_POSIXACL
#define SB_I_VERSION MS_I_VERSION
#endif
#ifndef KC_CURRENT_TIME_INODE
struct timespec64 kc_current_time(struct inode *inode);
#define current_time kc_current_time
#define kc_timespec timespec
#else
#define kc_timespec timespec64
#endif
#ifndef KC_SHRINKER_SHRINK
#define KC_DEFINE_SHRINKER(name) struct shrinker name
#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do { \
__typeof__(name) _shrink = (name); \
@@ -20,7 +160,77 @@
#endif /* KC_SHRINKER_NAME */
#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr))
#define KC_SHRINKER_FN(ptr) (ptr)
#else
#include <linux/shrinker.h>
#ifndef SHRINK_STOP
#define SHRINK_STOP (~0UL)
#define SHRINK_EMPTY (~0UL - 1)
#endif
int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc);
struct kc_shrinker_wrapper {
unsigned long (*count_objects)(struct shrinker *, struct shrink_control *sc);
unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc);
struct shrinker shrink;
};
#define KC_DEFINE_SHRINKER(name) struct kc_shrinker_wrapper name;
#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do { \
struct kc_shrinker_wrapper *_wrap = (name); \
_wrap->count_objects = (countfn); \
_wrap->scan_objects = (scanfn); \
_wrap->shrink.shrink = kc_shrink_wrapper_fn; \
_wrap->shrink.seeks = DEFAULT_SEEKS; \
} while (0)
#define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(container_of(ptr, struct kc_shrinker_wrapper, shrink), type, shrinker)
#define KC_REGISTER_SHRINKER(ptr, fmt, ...) (register_shrinker(ptr.shrink))
#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr.shrink))
#define KC_SHRINKER_FN(ptr) (ptr.shrink)
#endif /* KC_SHRINKER_SHRINK */
#ifdef KC_KERNEL_GETSOCKNAME_ADDRLEN
#include <linux/net.h>
#include <linux/inet.h>
static inline int kc_kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
int addrlen = sizeof(struct sockaddr_in);
int ret = kernel_getsockname(sock, addr, &addrlen);
if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
return -EAFNOSUPPORT;
else if (ret < 0)
return ret;
return sizeof(struct sockaddr_in);
}
static inline int kc_kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
int addrlen = sizeof(struct sockaddr_in);
int ret = kernel_getpeername(sock, addr, &addrlen);
if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
return -EAFNOSUPPORT;
else if (ret < 0)
return ret;
return sizeof(struct sockaddr_in);
}
#else
#define kc_kernel_getsockname(sock, addr) kernel_getsockname(sock, addr)
#define kc_kernel_getpeername(sock, addr) kernel_getpeername(sock, addr)
#endif
#ifdef KC_SOCK_CREATE_KERN_NET
#define kc_sock_create_kern(family, type, proto, res) sock_create_kern(&init_net, family, type, proto, res)
#else
#define kc_sock_create_kern sock_create_kern
#endif
#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
ssize_t kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos, loff_t *ppos,
size_t count, ssize_t written);
#define generic_file_buffered_write kc_generic_file_buffered_write
#ifdef KC_GENERIC_PERFORM_WRITE_KIOCB_IOV_ITER
static inline int kc_generic_perform_write(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
@@ -34,6 +244,7 @@ static inline int kc_generic_perform_write(struct kiocb *iocb, struct iov_iter *
return generic_perform_write(file, iter, pos);
}
#endif
#endif // KC_GENERIC_FILE_BUFFERED_WRITE
#ifndef KC_HAVE_BLK_OPF_T
/* typedef __u32 __bitwise blk_opf_t; */
@@ -77,7 +288,7 @@ static inline struct bio *kc_bio_alloc(struct block_device *bdev, unsigned short
{
struct bio *b = bio_alloc(gfp_mask, nr_vecs);
if (b) {
b->bi_opf = opf;
kc_bio_set_opf(b, opf);
bio_set_dev(b, bdev);
}
return b;
@@ -88,6 +299,11 @@ static inline struct bio *kc_bio_alloc(struct block_device *bdev, unsigned short
#define fiemap_prep(inode, fieinfo, start, len, flags) fiemap_check_flags(fieinfo, flags)
#endif
#ifndef KC_KERNEL_OLD_TIMEVAL_STRUCT
#define __kernel_old_timeval timeval
#define ns_to_kernel_old_timeval(ktime) ns_to_timeval(ktime.tv64)
#endif
#ifdef KC_SOCK_SET_SNDTIMEO
#include <net/sock.h>
static inline int kc_sock_set_sndtimeo(struct socket *sock, s64 secs)
@@ -184,14 +400,45 @@ static inline int kc_tcp_sock_set_nodelay(struct socket *sock)
}
#endif
#ifdef KC_INODE_DIO_END
#define kc_inode_dio_end inode_dio_end
#else
#define kc_inode_dio_end inode_dio_done
#endif
#ifndef KC_MM_VM_FAULT_T
typedef unsigned int vm_fault_t;
static inline vm_fault_t vmf_error(int err)
{
if (err == -ENOMEM)
return VM_FAULT_OOM;
return VM_FAULT_SIGBUS;
}
#endif
#include <linux/list_lru.h>
#ifndef KC_LIST_LRU_SHRINK_COUNT_WALK
/* we don't bother with sc->{nid,memcg} (which doesn't exist in oldest kernels) */
static inline unsigned long list_lru_shrink_count(struct list_lru *lru,
struct shrink_control *sc)
{
return list_lru_count(lru);
}
static inline unsigned long
list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
list_lru_walk_cb isolate, void *cb_arg)
{
return list_lru_walk(lru, isolate, cb_arg, sc->nr_to_scan);
}
#endif
#ifndef KC_LIST_LRU_ADD_OBJ
#define list_lru_add_obj list_lru_add
#define list_lru_del_obj list_lru_del
#endif
#if defined(KC_LIST_LRU_WALK_CB_LIST_LOCK)
#if defined(KC_LIST_LRU_WALK_CB_LIST_LOCK) || defined(KC_LIST_LRU_WALK_CB_ITEM_LOCK)
struct list_lru_one;
typedef enum lru_status (*kc_list_lru_walk_cb_t)(struct list_head *item, struct list_lru_one *list,
void *cb_arg);
@@ -207,9 +454,39 @@ unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_contro
#define kc_list_lru_shrink_walk list_lru_shrink_walk
#endif
#ifndef KC_TIMER_CONTAINER_OF
#define timer_container_of(var, callback_timer, timer_fieldname) \
from_timer(var, callback_timer, timer_fieldname)
#if defined(KC_LIST_LRU_WALK_CB_ITEM_LOCK)
/* isolate moved by hand, nr_items updated in walk as _REMOVE returned */
static inline void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
struct list_head *head)
{
list_move(item, head);
}
#endif
#ifndef KC_STACK_TRACE_SAVE
#include <linux/stacktrace.h>
static inline unsigned int stack_trace_save(unsigned long *store, unsigned int size,
unsigned int skipnr)
{
struct stack_trace trace = {
.entries = store,
.max_entries = size,
.skip = skipnr,
};
save_stack_trace(&trace);
return trace.nr_entries;
}
static inline void stack_trace_print(unsigned long *entries, unsigned int nr_entries, int spaces)
{
struct stack_trace trace = {
.entries = entries,
.nr_entries = nr_entries,
};
print_stack_trace(&trace, spaces);
}
#endif
#endif
+2 -1
View File
@@ -1693,7 +1693,8 @@ int scoutfs_lock_setup(struct super_block *sb)
}
linfo->workq = alloc_workqueue("scoutfs_lock_client_work",
WQ_UNBOUND | WQ_HIGHPRI, 0);
WQ_NON_REENTRANT | WQ_UNBOUND |
WQ_HIGHPRI, 0);
if (!linfo->workq) {
ret = -ENOMEM;
goto out;
+28 -17
View File
@@ -1113,11 +1113,11 @@ static int sock_opts_and_names(struct super_block *sb,
if (ret)
goto out;
ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
if (ret < 0)
goto out;
ret = kernel_getpeername(sock, (struct sockaddr *)&conn->peername);
ret = kc_kernel_getpeername(sock, (struct sockaddr *)&conn->peername);
if (ret < 0)
goto out;
@@ -1218,7 +1218,7 @@ static void scoutfs_net_connect_worker(struct work_struct *work)
trace_scoutfs_net_connect_work_enter(sb, 0, 0);
ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
ret = kc_sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret)
goto out;
@@ -1452,15 +1452,23 @@ restart:
set_conn_fl(acc, reconn_freeing);
spin_unlock(&conn->lock);
if (!test_conn_fl(conn, shutting_down)) {
scoutfs_info(sb, "client "SIN_FMT" reconnect timed out, fencing",
SIN_ARG(&acc->last_peername));
ret = scoutfs_fence_start(sb, acc->rid,
acc->last_peername.sin_addr.s_addr,
SCOUTFS_FENCE_CLIENT_RECONNECT);
if (ret) {
scoutfs_err(sb, "client fence returned err %d, shutting down server",
ret);
scoutfs_server_stop(sb);
/*
* Connections that never completed a valid greeting
* (port scans, malformed traffic, half-open peers)
* haven't progressed far enough to warrant fencing.
* Drop them. Any real client will reconnect.
*/
if (test_conn_fl(acc, valid_greeting)) {
scoutfs_info(sb, "client "SIN_FMT" reconnect timed out, fencing",
SIN_ARG(&acc->last_peername));
ret = scoutfs_fence_start(sb, acc->rid,
acc->last_peername.sin_addr.s_addr,
SCOUTFS_FENCE_CLIENT_RECONNECT);
if (ret) {
scoutfs_err(sb, "client fence returned err %d, shutting down server",
ret);
scoutfs_server_stop(sb);
}
}
}
destroy_conn(acc);
@@ -1517,7 +1525,8 @@ scoutfs_net_alloc_conn(struct super_block *sb,
conn->ordered_proc_wlists = kmalloc_array(nr, sizeof(struct scoutfs_work_list),
GFP_NOFS);
conn->workq = alloc_workqueue("scoutfs_net_%s",
WQ_UNBOUND, 0, name_suffix);
WQ_UNBOUND | WQ_NON_REENTRANT, 0,
name_suffix);
}
if (!conn || (info_size && !conn->info) || !conn->workq || !conn->ordered_proc_wlists) {
if (conn) {
@@ -1629,7 +1638,7 @@ int scoutfs_net_bind(struct super_block *sb,
if (WARN_ON_ONCE(conn->sock))
return -EINVAL;
ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
ret = kc_sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret)
goto out;
@@ -1650,7 +1659,7 @@ int scoutfs_net_bind(struct super_block *sb,
if (ret < 0)
goto out;
ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
if (ret < 0)
goto out;
@@ -2098,9 +2107,11 @@ int scoutfs_net_setup(struct super_block *sb)
scoutfs_tseq_tree_init(&ninf->msg_tseq_tree, net_tseq_show_msg);
ninf->shutdown_workq = alloc_workqueue("scoutfs_net_shutdown",
WQ_UNBOUND, 0);
WQ_UNBOUND | WQ_NON_REENTRANT,
0);
ninf->destroy_workq = alloc_workqueue("scoutfs_net_destroy",
WQ_UNBOUND, 0);
WQ_UNBOUND | WQ_NON_REENTRANT,
0);
if (!ninf->shutdown_workq || !ninf->destroy_workq) {
ret = -ENOMEM;
goto out;
+3 -2
View File
@@ -183,7 +183,7 @@ static int create_socket(struct super_block *sb)
int addrlen;
int ret;
ret = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
ret = kc_sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (ret) {
scoutfs_err(sb, "quorum couldn't create udp socket: %d", ret);
goto out;
@@ -1332,7 +1332,8 @@ int scoutfs_quorum_setup(struct super_block *sb)
/* a high priority single threaded context without mem reclaim */
qinf->workq = alloc_workqueue("scoutfs_quorum_work",
WQ_UNBOUND | WQ_HIGHPRI, 1);
WQ_NON_REENTRANT | WQ_UNBOUND |
WQ_HIGHPRI, 1);
if (!qinf->workq) {
ret = -ENOMEM;
goto out;
+1 -1
View File
@@ -134,7 +134,7 @@ static int recov_finished(struct recov_info *recinf)
static void timer_callback(struct timer_list *timer)
{
struct recov_info *recinf = timer_container_of(recinf, timer, timer);
struct recov_info *recinf = from_timer(recinf, timer, timer);
recinf->timeout_fn(recinf->sb);
}
+3 -2
View File
@@ -4315,7 +4315,8 @@ static void server_notify_down(struct super_block *sb,
spin_unlock(&server->lock);
free_farewell_requests(sb, rid);
} else {
} else if (!conn->listening_conn) {
/* only the listener going down should stop the server */
stop_server(server);
}
}
@@ -4751,7 +4752,7 @@ int scoutfs_server_setup(struct super_block *sb)
INIT_DELAYED_WORK(&server->reclaim_dwork, reclaim_worker);
server->wq = alloc_workqueue("scoutfs_server",
WQ_UNBOUND, 0);
WQ_UNBOUND | WQ_NON_REENTRANT, 0);
if (!server->wq) {
kfree(server);
return -ENOMEM;
+2 -1
View File
@@ -2392,7 +2392,8 @@ int scoutfs_srch_setup(struct super_block *sb)
goto out;
srinf->workq = alloc_workqueue("scoutfs_srch_compact",
WQ_UNBOUND | WQ_HIGHPRI, 0);
WQ_NON_REENTRANT | WQ_UNBOUND |
WQ_HIGHPRI, 0);
if (!srinf->workq) {
ret = -ENOMEM;
goto out;
+12
View File
@@ -46,6 +46,7 @@ static struct scoutfs_tseq_entry *tseq_rb_next(struct scoutfs_tseq_entry *ent)
return rb_entry(node, struct scoutfs_tseq_entry, node);
}
#ifdef KC_RB_TREE_AUGMENTED_COMPUTE_MAX
static bool tseq_compute_total(struct scoutfs_tseq_entry *ent, bool exit)
{
loff_t total = 1 + tseq_node_total(ent->node.rb_left) +
@@ -60,6 +61,17 @@ static bool tseq_compute_total(struct scoutfs_tseq_entry *ent, bool exit)
RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
node, total, tseq_compute_total);
#else
static loff_t tseq_compute_total(struct scoutfs_tseq_entry *ent)
{
return 1 + tseq_node_total(ent->node.rb_left) +
tseq_node_total(ent->node.rb_right);
}
RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
node, loff_t, total, tseq_compute_total);
#endif
void scoutfs_tseq_tree_init(struct scoutfs_tseq_tree *tree,
scoutfs_tseq_show_t show)
+35 -1
View File
@@ -16,7 +16,6 @@
#include <linux/xattr.h>
#include <linux/crc32c.h>
#include <linux/posix_acl.h>
#include <linux/iversion.h>
#include "format.h"
#include "inode.h"
@@ -995,17 +994,38 @@ unlock:
return ret;
}
#ifndef KC_XATTR_STRUCT_XATTR_HANDLER
/*
* Future kernels have this amazing hack to rewind the name to get the
* skipped prefix. We're back in the stone ages without the handler
* arg, so we Just Know that this is possible. This will become a
* compat hook to either call the kernel's xattr_full_name(handler), or
* our hack to use the flags as the prefix length.
*/
static const char *full_name_hack(const char *name, int len)
{
return name - len;
}
#endif
static int scoutfs_xattr_get_handler
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
(const struct xattr_handler *handler, struct dentry *dentry,
struct inode *inode, const char *name, void *value,
size_t size)
{
name = xattr_full_name(handler, name);
#else
(struct dentry *dentry, const char *name,
void *value, size_t size, int handler_flags)
{
name = full_name_hack(name, handler_flags);
#endif
return scoutfs_xattr_get(dentry, name, value, size);
}
static int scoutfs_xattr_set_handler
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
(const struct xattr_handler *handler,
KC_VFS_NS_DEF
struct dentry *dentry,
@@ -1013,6 +1033,12 @@ static int scoutfs_xattr_set_handler
size_t size, int flags)
{
name = xattr_full_name(handler, name);
#else
(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags, int handler_flags)
{
name = full_name_hack(name, handler_flags);
#endif
return scoutfs_xattr_set(dentry, name, value, size, flags);
}
@@ -1045,14 +1071,22 @@ static const struct xattr_handler scoutfs_xattr_security_handler = {
};
static const struct xattr_handler scoutfs_xattr_acl_access_handler = {
#ifdef KC_XATTR_HANDLER_NAME
.name = XATTR_NAME_POSIX_ACL_ACCESS,
#else
.prefix = XATTR_NAME_POSIX_ACL_ACCESS,
#endif
.flags = ACL_TYPE_ACCESS,
.get = scoutfs_acl_get_xattr,
.set = scoutfs_acl_set_xattr,
};
static const struct xattr_handler scoutfs_xattr_acl_default_handler = {
#ifdef KC_XATTR_HANDLER_NAME
.name = XATTR_NAME_POSIX_ACL_DEFAULT,
#else
.prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
#endif
.flags = ACL_TYPE_DEFAULT,
.get = scoutfs_acl_get_xattr,
.set = scoutfs_acl_set_xattr,
-7
View File
@@ -171,13 +171,6 @@ t_filter_dmesg()
# orphan log trees reclaim is handled, not an error
re="$re|scoutfs .* reclaiming orphan log trees"
# nfs can emit a whole range of messages we can ignore
re="$re|Installing knfsd .*"
re="$re|nfsd: .*"
re="$re|NFSD: .*"
re="$re|RPC: .*"
re="$re|FS-Cache: .*"
# fencing tests force unmounts and trigger timeouts
re="$re|scoutfs .* forcing unmount"
re="$re|scoutfs .* reconnect timed out"
-32
View File
@@ -1,32 +0,0 @@
== write via NFS, read both sides
== POSIX ACL set via NFS, read both sides
user::rw-
user:22222:rw-
group::r--
mask::rw-
other::r--
user::rw-
user:22222:rw-
group::r--
mask::rw-
other::r--
== POSIX ACL set on scoutfs, read via NFS
user::rw-
user:22222:rw-
group::r--
group:44444:r--
mask::rw-
other::r--
== default ACL inheritance via NFS
user::rw-
user:22222:rwx #effective:rw-
group::r-x #effective:r--
mask::rw-
other::r--
== NFS read demand-stages a released file
1
== cleanup
+6 -6
View File
@@ -8,10 +8,10 @@
/mnt/test/test/data-prealloc/file-1: extents: 32
/mnt/test/test/data-prealloc/file-2: extents: 32
== any writes to region prealloc get full extents
/mnt/test/test/data-prealloc/file-1: extents: 8
/mnt/test/test/data-prealloc/file-2: extents: 8
/mnt/test/test/data-prealloc/file-1: extents: 8
/mnt/test/test/data-prealloc/file-2: extents: 8
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
== streaming offline writes get full extents either way
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
@@ -20,8 +20,8 @@
== goofy preallocation amounts work
/mnt/test/test/data-prealloc/file-1: extents: 6
/mnt/test/test/data-prealloc/file-2: extents: 6
/mnt/test/test/data-prealloc/file-1: extents: 10
/mnt/test/test/data-prealloc/file-2: extents: 10
/mnt/test/test/data-prealloc/file-1: extents: 6
/mnt/test/test/data-prealloc/file-2: extents: 6
/mnt/test/test/data-prealloc/file-1: extents: 3
/mnt/test/test/data-prealloc/file-2: extents: 3
== block writes into region allocs hole
+4
View File
@@ -0,0 +1,4 @@
== ensuring utils and module for old versions
== unmounting test fs and removing test module
== testing combinations of old and new format versions
== restoring test module and mount
+2
View File
@@ -0,0 +1,2 @@
== send empty payload to a quorum port
== greeting-less connections still in reconn_wait
+2 -1
View File
@@ -3,7 +3,6 @@ basic-block-counts.sh
basic-bad-mounts.sh
basic-posix-acl.sh
basic-acl-consistency.sh
basic-nfs.sh
inode-items-updated.sh
simple-inode-index.sh
simple-staging.sh
@@ -19,6 +18,7 @@ offline-extent-waiting.sh
move-blocks.sh
projects.sh
large-fragmented-free.sh
format-version-forward-back.sh
enospc.sh
mmap.sh
srch-safe-merge-pos.sh
@@ -64,4 +64,5 @@ archive-light-cycle.sh
block-stale-reads.sh
inode-deletion.sh
renameat2-noreplace.sh
portscan.sh
xfstests.sh
-86
View File
@@ -1,86 +0,0 @@
#
# Test basic scoutfs-nfs interactions:
# - read/write
# - stage/release and data wait
# - nfs setacl/getacl mapping
#
t_require_commands scoutfs setfacl getfacl exportfs mount.nfs umount \
stat dd cmp systemctl
systemctl start nfs-server >> "$T_TMPDIR/nfs.log" 2>&1 || \
t_skip "nfs-server not available"
# Keep file creation modes deterministic for the ACL golden output.
umask 022
EXPORT_OPTS="rw,async,no_root_squash,no_subtree_check,fsid=42"
NFS_MNT="$T_TMP.nfs"
NFS_DIR="$NFS_MNT/test/basic-nfs"
filter() { sed "s@$T_TMPDIR@T_TMPDIR@g" | t_filter_fs; }
gf() { getfacl -n --omit-header "$@" 2>/dev/null; }
teardown_nfs()
{
(
umount "$NFS_MNT"
exportfs -u "127.0.0.1:$T_M0"
exportfs -f
systemctl stop nfs-server
rmdir "$NFS_MNT"
) >> "$T_TMPDIR/nfs.log" 2>&1
}
trap teardown_nfs EXIT
exportfs -u "127.0.0.1:$T_M0" >> "$T_TMPDIR/nfs.log" 2>&1 || true
t_quiet mkdir -p "$NFS_MNT"
exportfs -o "$EXPORT_OPTS" "127.0.0.1:$T_M0" >> "$T_TMPDIR/nfs.log" 2>&1
mount.nfs -o vers=3,noac,actimeo=0 "127.0.0.1:$T_M0" "$NFS_MNT" >> "$T_TMPDIR/nfs.log" 2>&1
test -d "$NFS_DIR" || t_fail "test dir $NFS_DIR not visible over NFS"
echo "== write via NFS, read both sides"
dd if=/dev/urandom bs=4096 count=1 of="$T_TMP.data" status=none
cp "$T_TMP.data" "$NFS_DIR/file"
cmp "$T_TMP.data" "$T_D0/file"
cmp "$T_TMP.data" "$NFS_DIR/file"
echo "== POSIX ACL set via NFS, read both sides"
setfacl -m u:22222:rw "$NFS_DIR/file" 2>&1 | filter
gf "$NFS_DIR/file"
gf "$T_D0/file"
echo "== POSIX ACL set on scoutfs, read via NFS"
setfacl -m g:44444:r "$T_D0/file" 2>&1 | filter
gf "$NFS_DIR/file"
echo "== default ACL inheritance via NFS"
mkdir "$NFS_DIR/d"
setfacl -d -m u:22222:rwx "$NFS_DIR/d" 2>&1 | filter
touch "$NFS_DIR/d/child"
gf "$NFS_DIR/d/child"
echo "== NFS read demand-stages a released file"
dd if=/dev/urandom bs=4096 count=1 of="$T_TMP.big" status=none
cp "$T_TMP.big" "$T_D0/big"
sync
vers=$(scoutfs stat -s data_version "$T_D0/big")
t_quiet scoutfs release "$T_D0/big" -V "$vers" -o 0 -l 4K
# NFS read against the offline file blocks in scoutfs_read waiting
# for the data to come back online.
cat "$NFS_DIR/big" > "$T_TMP.read" &
read_pid=$!
sleep 1
scoutfs data-waiting -B 0 -I 0 -p "$T_D0" | wc -l
t_quiet scoutfs stage "$T_TMP.big" "$T_D0/big" -V "$vers" -o 0 -l 4096
wait "$read_pid"
cmp "$T_TMP.big" "$T_TMP.read"
echo "== cleanup"
rm -f "$T_D0/file" "$T_D0/big"
rm -rf "$T_D0/d"
t_pass
+184
View File
@@ -0,0 +1,184 @@
#
# Test our basic ability to work with different format versions.
#
# The current code being tested has a range of supported format
# versions. For each of the older supported format versions we have a
# git hash of the commit before the next greater version was introduced.
# We build versions of the scoutfs utility and kernel module for the
# last commit in tree that had a lesser supported version as its max
# supported version. We use those binaries to test forward and back
# compat as new and old code works with a persistent volume with a given
# format version.
#
# not supported on el8 or higher
if [ $(source /etc/os-release ; echo ${VERSION_ID:0:1}) -gt 7 ]; then
t_skip_permitted "Unsupported OS version"
fi
mount_has_format_version()
{
local mnt="$1"
local vers="$2"
local sysfs_fmt_vers="$(t_sysfs_path_from_mnt $SCR)/format_version"
test "$(cat $sysfs_fmt_vers)" == "$vers"
}
SCR="/mnt/scoutfs.scratch"
MIN=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_min:"){print $2}')
MAX=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_max:"){print $2}')
echo "min: $MIN max: $MAX" > "$T_TMP.log"
test "$MIN" -gt 0 -a "$MAX" -gt 0 -a "$MIN" -le "$MAX" || \
t_fail "parsed bad versions, min: $MIN max: $MAX"
test "$MIN" == "$MAX" && \
t_skip "only one supported format version: $MIN"
# prepare dir and wipe any weird old partial state
builds="$T_RESULTS/format_version_builds"
mkdir -p "$builds"
echo "== ensuring utils and module for old versions"
declare -A commits
commits[1]=c3c4b080
for vers in $(seq $MIN $((MAX - 1))); do
dir="$builds/$vers"
platform=$(uname -rp)
buildmark="$dir/buildmark"
commit="${commits[$vers]}"
test -n "$commit" || \
t_fail "no commit for vers $vers"
# have our files for this version
test "$(cat $buildmark 2>&1)" == "$platform" && \
continue
# build as one big sequence of commands that can return failure
(
set -o pipefail
rm -rf $dir &&
mkdir -p $dir/building &&
cd "$T_TESTS/.." &&
git archive --format=tar "$commit" | tar -C "$dir/building" -xf - &&
cd - &&
find $dir &&
make -C "$dir/building" &&
mv $dir/building/utils/src/scoutfs $dir &&
mv $dir/building/kmod/src/scoutfs.ko $dir &&
rm -rf $dir/building &&
echo "$platform" > $buildmark &&
find $dir &&
cat $buildmark
) >> "$T_TMP.log" 2>&1 || t_fail "version $vers build failed"
done
echo "== unmounting test fs and removing test module"
t_quiet t_umount_all
t_quiet rmmod scoutfs
echo "== testing combinations of old and new format versions"
mkdir -p "$SCR"
for vers in $(seq $MIN $((MAX - 1))); do
old_scoutfs="$builds/$vers/scoutfs"
old_module="$builds/$vers/scoutfs.ko"
echo "mkfs $vers" >> "$T_TMP.log"
t_quiet $old_scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" \
|| t_fail "mkfs $vers failed"
echo "mount $vers with $vers" >> "$T_TMP.log"
t_quiet insmod $old_module
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
t_quiet mount_has_format_version "$SCR" "$vers"
echo "creating files in $vers" >> "$T_TMP.log"
t_quiet touch "$SCR/file-"{1,2,3}
stat "$SCR"/file-* > "$T_TMP.stat" || \
t_fail "stat in $vers failed"
echo "remounting $vers fs with $MAX" >> "$T_TMP.log"
t_quiet umount "$SCR"
rmmod scoutfs
insmod "$T_MODULE"
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
t_quiet mount_has_format_version "$SCR" "$vers"
echo "verifying stat in $vers with $MAX" >> "$T_TMP.log"
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
echo "keep/update/del existing, create new in $vers" >> "$T_TMP.log"
t_quiet touch "$SCR/file-2"
t_quiet rm -f "$SCR/file-3"
t_quiet touch "$SCR/file-4"
stat "$SCR"/file-* > "$T_TMP.stat" || \
t_fail "stat in $vers failed"
echo "remounting $vers fs with $vers" >> "$T_TMP.log"
t_quiet umount "$SCR"
rmmod scoutfs
insmod "$old_module"
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
t_quiet mount_has_format_version "$SCR" "$vers"
echo "verifying stat in $vers with $vers" >> "$T_TMP.log"
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
echo "changing format vers to $MAX" >> "$T_TMP.log"
t_quiet umount "$SCR"
rmmod scoutfs
t_quiet scoutfs change-format-version -F -V $MAX $T_EX_META_DEV "$T_EX_DATA_DEV"
echo "mount fs $MAX with old $vers should fail" >> "$T_TMP.log"
insmod "$old_module"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR" >> "$T_TMP.log" 2>&1
if [ "$?" == "0" ]; then
umount "$SCR"
t_fail "old code ver $vers able to mount new ver $MAX"
fi
echo "remounting $MAX fs with $MAX" >> "$T_TMP.log"
rmmod scoutfs
insmod "$T_MODULE"
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
t_quiet mount_has_format_version "$SCR" "$MAX"
echo "verifying stat in $MAX with $MAX" >> "$T_TMP.log"
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
echo "keep/update/del existing, create new in $MAX" >> "$T_TMP.log"
t_quiet touch "$SCR/file-2"
t_quiet rm -f "$SCR/file-4"
t_quiet touch "$SCR/file-5"
stat "$SCR"/file-* > "$T_TMP.stat" || \
t_fail "stat in $MAX failed"
echo "remounting $MAX fs with $MAX again" >> "$T_TMP.log"
t_quiet umount "$SCR"
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
t_quiet mount_has_format_version "$SCR" "$MAX"
echo "verifying stat in $MAX with $MAX again" >> "$T_TMP.log"
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
echo "done with old vers $vers" >> "$T_TMP.log"
t_quiet umount "$SCR"
rmmod scoutfs
done
echo "== restoring test module and mount"
insmod "$T_MODULE"
t_mount_all
t_pass
+46
View File
@@ -0,0 +1,46 @@
#
# portscan tests - assure malformed packets do not cause issues
#
# Send a short garbage payload to a scoutfs server quorum port. The
# accepted connection never completes a valid greeting, so after the
# reconnect timeout the kernel must drop it silently rather than
# fence it (which would restart the server).
#
t_require_commands scoutfs grep wc seq
send_garbage()
{
local port="$1"
(
exec 3<>"/dev/tcp/127.0.0.1/$port" || exit 1
printf ' ' >&3
exec 3>&-
) 2>/dev/null
}
echo "== send empty payload to a quorum port"
slot=-1
for i in $(seq 0 $((T_QUORUM - 1))); do
if send_garbage "$((T_TEST_PORT + i))"; then
slot=$i
break
fi
done
test "$slot" -ge 0 || t_fail "no quorum port accepted"
# CLIENT_RECONNECT_TIMEOUT_MS is 20s - wait until that happens.
echo "== greeting-less connections still in reconn_wait"
for _ in $(seq 1 25); do
n=$(grep -h 'vg 0 .* rw 1' /sys/kernel/debug/scoutfs/*/connections | wc -l)
[ "$n" = 0 ] && break
sleep 1
done
test "$n" -eq 0 || t_fail "$n greeting-less conns remain in reconn_wait"
# the mount whose port we hit should be up and not disconnected now.
eval dir=\$T_D$slot
touch "$dir/portscan-after" 2>/dev/null || t_fail "fs on $dir not responsive after portscan"
t_pass