Compare commits

..

6 Commits

Author SHA1 Message Date
Auke Kok c137637007 We can't cross-mount ipv4/ipv6.
I thought this would just work between ipv4 and ipv6 based quorum
members, but it turns out it will only work one way by default. While
we could make this work (multiple sockets, special sockopts) it is
highly unlikely and very undesirable.

Much stronger feels to just disallow it explicitly and reject mixed
v4/v6 configurations outright (mkfs/change-quorum, and mount) to avoid
this. I can't imagine this doing any good for users' fencing setups.

The test cases added validate the 2 easy userspace checks. The mount
check isn't easily testable because we disallow userspace from creating
such a failure path.

One additional test section tests the migration path from v4->v6->v4
so there's at least some test that checks that this actually works.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-14 14:43:00 -07:00
Auke Kok 2c46e37543 Account for ipv6 in kernel_get{sock,peer}name compat.
These 2 kernel wrapper functions need to be able to properly handle
the ipv6 addrlen, instead of returning -EAFNOSUPPORT(-97).

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-06 15:17:26 -07:00
Auke Kok 61ece361a7 Add IPv6 support to the kernel module.
This adds IPv6 support to the kernel module side.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-06 15:17:26 -07:00
Auke Kok afba683be6 Enable ipv6 in testing.
Instead of using 127.0.0.1, we initialize the quorum slots to ::1,
enabling all ipv6 support.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-06 15:17:26 -07:00
Auke Kok a95996e932 Add ipv6 support to scoutfs userspace utility.
This change adds ipv6 support to various scoutfs sub-commands, allowing
users to mkfs, print and change-quorum-config using ipv6 addresses, and
modifies the outputs.

Any ipv6 address/port is displayed as [::1]:5000 to comply with the
related RFC's. Input strings remain consistent as the quorum config
input value is comma-separated already, not posing any issues.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-06 15:17:26 -07:00
Auke Kok 6ff9fd39fa Don't stack alloc struct scoutfs_quorum_block_event old
The size of this thing is well over 1kb, and the compiler will
error on several supported distributions that this particular
function reaches over 2k stack frame size, which is excessive,
even for a function that isn't called regularly.

We can allocate the thing in one go if we smartly allocate this
as an array of (an array of structs) which allows us to index
it as a 2d array as before, taking away some of the additional
complexities.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-05-06 15:17:26 -07:00
45 changed files with 1821 additions and 407 deletions
-14
View File
@@ -1,20 +1,6 @@
Versity ScoutFS Release Notes
=============================
---
v1.32
\
*June 2, 2026*
Fix writing POSIX ACLs over NFS mounts that export the scoutfs
filesystem.
Add support for kernels in the RHEL 9.8 minor release.
Reduce unneeded block allocation when data\_prealloc\_contig\_only was
set to 0. This will help achieve more efficient data space usage when
writing small files.
---
v1.31
\
+283 -9
View File
@@ -6,6 +6,231 @@
ccflags-y += -include $(src)/kernelcompat.h
#
# v3.18-rc2-19-gb5ae6b15bd73
#
# Folds d_materialise_unique into d_splice_alias. Note reversal
# of arguments (Also note Documentation/filesystems/porting.rst)
#
ifneq (,$(shell grep 'd_materialise_unique' include/linux/dcache.h))
ccflags-y += -DKC_D_MATERIALISE_UNIQUE=1
endif
#
# RHEL extended the fop struct so to use it we have to set
# a flag to indicate that the struct is large enough and
# contains the pointer.
#
ifneq (,$(shell grep 'FMODE_KABI_ITERATE' include/linux/fs.h))
ccflags-y += -DKC_FMODE_KABI_ITERATE
endif
#
# v4.7-rc2-23-g0d4d717f2583
#
# Added user_ns argument to posix_acl_valid
#
ifneq (,$(shell grep 'posix_acl_valid.*user_namespace' include/linux/posix_acl.h))
ccflags-y += -DKC_POSIX_ACL_VALID_USER_NS
endif
#
# v5.3-12296-g6d2052d188d9
#
# The RBCOMPUTE function is now passed an extra flag, and should return a bool
# to indicate whether the propagated callback should stop or not.
#
ifneq (,$(shell grep 'static inline bool RBNAME.*_compute_max' include/linux/rbtree_augmented.h))
ccflags-y += -DKC_RB_TREE_AUGMENTED_COMPUTE_MAX
endif
#
# v3.13-25-g37bc15392a23
#
# Renames posix_acl_create to __posix_acl_create and provide some
# new interfaces for creating ACLs
#
ifneq (,$(shell grep '__posix_acl_create' include/linux/posix_acl.h))
ccflags-y += -DKC___POSIX_ACL_CREATE
endif
#
# v4.8-rc1-29-g31051c85b5e2
#
# inode_change_ok() removed - replace with setattr_prepare()
# v5.11-rc4-7-g2f221d6f7b88 removes extern attribute
#
ifneq (,$(shell grep 'int setattr_prepare' include/linux/fs.h))
ccflags-y += -DKC_SETATTR_PREPARE
endif
#
# v4.15-rc3-4-gae5e165d855d
#
# linux/iversion.h needs to manually be included for code that
# manipulates this field.
#
ifneq (,$(shell grep -s 'define _LINUX_IVERSION_H' include/linux/iversion.h))
ccflags-y += -DKC_NEED_LINUX_IVERSION_H=1
endif
# v4.11-12447-g104b4e5139fe
#
# Renamed __percpu_counter_add to percpu_counter_add_batch to clarify
# that the __ wasn't less safe, just took an extra parameter.
#
ifneq (,$(shell grep 'percpu_counter_add_batch' include/linux/percpu_counter.h))
ccflags-y += -DKC_PERCPU_COUNTER_ADD_BATCH
endif
#
# v4.11-4550-g7dea19f9ee63
#
# Introduced memalloc_nofs_{save,restore} preferred instead of _noio_.
#
ifneq (,$(shell grep 'memalloc_nofs_save' include/linux/sched/mm.h))
ccflags-y += -DKC_MEMALLOC_NOFS_SAVE
endif
#
# v4.7-12414-g1eff9d322a44
#
# Renamed bi_rw to bi_opf to force old code to catch up. We use it as a
# single switch between old and new bio structures.
#
ifneq (,$(shell grep 'bi_opf' include/linux/blk_types.h))
ccflags-y += -DKC_BIO_BI_OPF
endif
#
# v4.12-rc2-201-g4e4cbee93d56
#
# Moves to bi_status BLK_STS_ API instead of having a mix of error
# end_io args or bi_error.
#
ifneq (,$(shell grep 'bi_status' include/linux/blk_types.h))
ccflags-y += -DKC_BIO_BI_STATUS
endif
#
# v3.11-8765-ga0b02131c5fc
#
# Remove the old ->shrink() API, ->{scan,count}_objects is preferred.
#
ifneq (,$(shell grep '(*shrink)' include/linux/shrinker.h))
ccflags-y += -DKC_SHRINKER_SHRINK
endif
#
# v3.19-4777-g6bec00352861
#
# backing_dev_info is removed from address_space. Instead we need to use
# inode_to_bdi() inline from <backing-dev.h>.
#
ifneq (,$(shell grep 'struct backing_dev_info.*backing_dev_info' include/linux/fs.h))
ccflags-y += -DKC_LINUX_BACKING_DEV_INFO=1
endif
#
# v4.3-9290-ge409de992e3e
#
# xattr handlers are now passed a struct that contains `flags`
#
ifneq (,$(shell grep 'int...get..const struct xattr_handler.*struct dentry.*dentry,' include/linux/xattr.h))
ccflags-y += -DKC_XATTR_STRUCT_XATTR_HANDLER=1
endif
#
# v4.16-rc1-1-g9b2c45d479d0
#
# kernel_getsockname() and kernel_getpeername dropped addrlen arg
#
ifneq (,$(shell grep 'kernel_getsockname.*,$$' include/linux/net.h))
ccflags-y += -DKC_KERNEL_GETSOCKNAME_ADDRLEN=1
endif
#
# v4.1-rc1-410-geeb1bd5c40ed
#
# Adds a struct net parameter to sock_create_kern
#
ifneq (,$(shell grep 'sock_create_kern.*struct net' include/linux/net.h))
ccflags-y += -DKC_SOCK_CREATE_KERN_NET=1
endif
#
# v4.17-rc6-7-g95582b008388
#
# Kernel has current_time(inode) to uniformly retreive timespec in the right unit
#
ifneq (,$(shell grep 'struct timespec64 current_time' include/linux/fs.h))
ccflags-y += -DKC_CURRENT_TIME_INODE=1
endif
#
# v4.9-12228-g530e9b76ae8f
#
# register_cpu_notifier and family were all removed and to be
# replaced with cpuhp_* API calls.
#
ifneq (,$(shell grep 'define register_hotcpu_notifier' include/linux/cpu.h))
ccflags-y += -DKC_CPU_NOTIFIER
endif
#
# v3.14-rc8-130-gccad2365668f
#
# generic_file_buffered_write is removed, backport it
#
ifneq (,$(shell grep 'extern ssize_t generic_file_buffered_write' include/linux/fs.h))
ccflags-y += -DKC_GENERIC_FILE_BUFFERED_WRITE=1
endif
#
# v5.7-438-g8151b4c8bee4
#
# struct address_space_operations switches away from .readpages to .readahead
#
# RHEL has backported this feature all the way to RHEL8, as part of RHEL_KABI,
# which means we need to detect this very precisely
#
ifneq (,$(shell grep 'readahead.*struct readahead_control' include/linux/fs.h))
ccflags-y += -DKC_FILE_AOPS_READAHEAD
endif
#
# v4.0-rc7-1743-g8436318205b9
#
# .aio_read and .aio_write no longer exist. All reads and writes now use the
# .read_iter and .write_iter methods, or must implement .read and .write (which
# we don't).
#
ifneq (,$(shell grep 'ssize_t.*aio_read' include/linux/fs.h))
ccflags-y += -DKC_LINUX_HAVE_FOP_AIO_READ=1
endif
#
# rhel7 has a custom inode_operations_wrapper struct that is discarded
# entirely in favor of upstream structure since rhel8.
#
ifneq (,$(shell grep 'void.*follow_link.*struct dentry' include/linux/fs.h))
ccflags-y += -DKC_LINUX_HAVE_RHEL_IOPS_WRAPPER=1
endif
ifneq (,$(shell grep 'size_t.*ki_left;' include/linux/aio.h))
ccflags-y += -DKC_LINUX_AIO_KI_LEFT=1
endif
#
# v4.4-rc4-4-g98e9cb5711c6
#
# Introduces a new xattr_handler .name member that can be used to match the
# entire field, instead of just a prefix. For these kernels, we must use
# the new .name field instead.
ifneq (,$(shell grep 'static inline const char .xattr_prefix' include/linux/xattr.h))
ccflags-y += -DKC_XATTR_HANDLER_NAME=1
endif
#
# v5.19-rc4-96-g342a72a33407
#
@@ -109,6 +334,14 @@ ifneq (,$(shell grep 'int tcp_sock_set_keepintvl' include/linux/tcp.h))
ccflags-y += -DKC_HAVE_TCP_SET_SOCKFN
endif
#
# v4.16-rc3-13-ga84d1169164b
#
# Fixes y2038 issues with struct timeval.
ifneq (,$(shell grep -s '^struct __kernel_old_timeval .' include/uapi/linux/time_types.h))
ccflags-y += -DKC_KERNEL_OLD_TIMEVAL_STRUCT
endif
#
# v5.19-rc4-52-ge33c267ab70d
#
@@ -178,6 +411,47 @@ ifneq (,$(shell grep 'struct file.*bdev_file_open_by_path.const char.*path' incl
ccflags-y += -DKC_BDEV_FILE_OPEN_BY_PATH
endif
# v4.0-rc7-1796-gfe0f07d08ee3
#
# direct-io changes modify inode_dio_done to now be called inode_dio_end
ifneq (,$(shell grep 'void inode_dio_end.struct inode' include/linux/fs.h))
ccflags-y += -DKC_INODE_DIO_END
endif
#
# v5.0-6476-g3d3539018d2c
#
# page fault handlers return a bitmask vm_fault_t instead
# Note: el8's header has a slightly modified prefix here
ifneq (,$(shell grep 'typedef.*__bitwise unsigned.*int vm_fault_t' include/linux/mm_types.h))
ccflags-y += -DKC_MM_VM_FAULT_T
endif
# v3.19-499-gd83a08db5ba6
#
# .remap pages becomes obsolete
ifneq (,$(shell grep 'int ..remap_pages..struct vm_area_struct' include/linux/mm.h))
ccflags-y += -DKC_MM_REMAP_PAGES
endif
#
# v3.19-4742-g503c358cf192
#
# list_lru_shrink_count() and list_lru_shrink_walk() introduced
#
ifneq (,$(shell grep 'list_lru_shrink_count.*struct list_lru' include/linux/list_lru.h))
ccflags-y += -DKC_LIST_LRU_SHRINK_COUNT_WALK
endif
#
# v3.19-4757-g3f97b163207c
#
# lru_list_walk_cb lru arg added
#
ifneq (,$(shell grep 'struct list_head \*item, spinlock_t \*lock, void \*cb_arg' include/linux/list_lru.h))
ccflags-y += -DKC_LIST_LRU_WALK_CB_ITEM_LOCK
endif
#
# v6.7-rc4-153-g0a97c01cd20b
#
@@ -196,6 +470,15 @@ ifneq (,$(shell grep 'struct list_lru_one \*list, spinlock_t \*lock, void \*cb_a
ccflags-y += -DKC_LIST_LRU_WALK_CB_LIST_LOCK
endif
#
# v5.1-rc4-273-ge9b98e162aa5
#
# introduce stack trace helpers
#
ifneq (,$(shell grep '^unsigned int stack_trace_save' include/linux/stacktrace.h))
ccflags-y += -DKC_STACK_TRACE_SAVE
endif
#
# v6.1-rc1-2-g138060ba92b3
#
@@ -213,12 +496,3 @@ endif
ifneq (,$(shell grep 'struct posix_acl.*get_inode_acl' include/linux/fs.h))
ccflags-y += -DKC_GET_INODE_ACL
endif
#
# v6.15-13744-g41cb08555c41
#
# from_timer renamed to timer_container_of.
#
ifneq (,$(shell grep 'define timer_container_of' include/linux/timer.h))
ccflags-y += -DKC_TIMER_CONTAINER_OF
endif
+29 -5
View File
@@ -16,7 +16,6 @@
#include <linux/xattr.h>
#include <linux/posix_acl.h>
#include <linux/posix_acl_xattr.h>
#include <linux/iversion.h>
#include "format.h"
#include "super.h"
@@ -70,6 +69,15 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
char *name;
int ret;
#ifndef KC___POSIX_ACL_CREATE
if (!IS_POSIXACL(inode))
return NULL;
acl = get_cached_acl(inode, type);
if (acl != ACL_NOT_CACHED)
return acl;
#endif
ret = acl_xattr_name_len(type, &name, NULL);
if (ret < 0)
return ERR_PTR(ret);
@@ -115,6 +123,11 @@ struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
return ERR_PTR(-ECHILD);
#endif
#ifndef KC___POSIX_ACL_CREATE
if (!IS_POSIXACL(inode))
return NULL;
#endif
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
if (ret < 0) {
acl = ERR_PTR(ret);
@@ -203,8 +216,7 @@ int scoutfs_set_acl(KC_VFS_NS_DEF
{
struct inode *inode = dentry->d_inode;
#else
int scoutfs_set_acl(KC_VFS_NS_DEF
struct inode *inode, struct posix_acl *acl, int type)
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
#endif
struct super_block *sb = inode->i_sb;
@@ -227,11 +239,17 @@ int scoutfs_set_acl(KC_VFS_NS_DEF
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
return ret;
}
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
int scoutfs_acl_get_xattr(const struct xattr_handler *handler, struct dentry *dentry,
struct inode *inode, const char *name, void *value,
size_t size)
{
int type = handler->flags;
#else
int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
int type)
{
#endif
struct posix_acl *acl;
int ret = 0;
@@ -254,6 +272,7 @@ int scoutfs_acl_get_xattr(const struct xattr_handler *handler, struct dentry *de
return ret;
}
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
KC_VFS_NS_DEF
struct dentry *dentry,
@@ -261,6 +280,11 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
size_t size, int flags)
{
int type = handler->flags;
#else
int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
int flags, int type)
{
#endif
struct posix_acl *acl = NULL;
int ret;
@@ -276,7 +300,7 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
return PTR_ERR(acl);
if (acl) {
ret = posix_acl_valid(&init_user_ns, acl);
ret = kc_posix_acl_valid(&init_user_ns, acl);
if (ret)
goto out;
}
@@ -285,7 +309,7 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *handler,
#ifdef KC_SET_ACL_DENTRY
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry, acl, type);
#else
ret = scoutfs_set_acl(KC_VFS_INIT_NS dentry->d_inode, acl, type);
ret = scoutfs_set_acl(dentry->d_inode, acl, type);
#endif
out:
posix_acl_release(acl);
+8 -2
View File
@@ -5,8 +5,7 @@
int scoutfs_set_acl(KC_VFS_NS_DEF
struct dentry *dentry, struct posix_acl *acl, int type);
#else
int scoutfs_set_acl(KC_VFS_NS_DEF
struct inode *inode, struct posix_acl *acl, int type);
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#endif
#ifdef KC_GET_INODE_ACL
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type, bool rcu);
@@ -16,6 +15,7 @@ struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
struct scoutfs_lock *lock, struct list_head *ind_locks);
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
int scoutfs_acl_get_xattr(const struct xattr_handler *, struct dentry *dentry,
struct inode *inode, const char *name, void *value,
size_t size);
@@ -24,6 +24,12 @@ int scoutfs_acl_set_xattr(const struct xattr_handler *,
struct dentry *dentry,
struct inode *inode, const char *name, const void *value,
size_t size, int flags);
#else
int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
int type);
int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
int flags, int type);
#endif
int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr,
struct scoutfs_lock *lock, struct list_head *ind_locks);
int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir,
+9 -9
View File
@@ -444,13 +444,13 @@ static void block_end_io(struct super_block *sb, blk_opf_t opf,
wake_up(&binf->waitq);
}
static void block_bio_end_io(struct bio *bio)
static void KC_DECLARE_BIO_END_IO(block_bio_end_io, struct bio *bio)
{
struct block_private *bp = bio->bi_private;
struct super_block *sb = bp->sb;
TRACE_BLOCK(end_io, bp);
block_end_io(sb, bio->bi_opf, bp, blk_status_to_errno(bio->bi_status));
block_end_io(sb, kc_bio_get_opf(bio), bp, kc_bio_get_errno(bio));
bio_put(bio);
}
@@ -499,7 +499,7 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
break;
}
bio->bi_iter.bi_sector = sector + (off >> 9);
kc_bio_set_sector(bio, sector + (off >> 9));
bio->bi_end_io = block_bio_end_io;
bio->bi_private = bp;
@@ -516,13 +516,13 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
BUG();
if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
submit_bio(bio);
kc_submit_bio(bio);
bio = NULL;
}
}
if (bio)
submit_bio(bio);
kc_submit_bio(bio);
blk_finish_plug(&plug);
@@ -1179,11 +1179,11 @@ struct sm_block_completion {
int err;
};
static void sm_block_bio_end_io(struct bio *bio)
static void KC_DECLARE_BIO_END_IO(sm_block_bio_end_io, struct bio *bio)
{
struct sm_block_completion *sbc = bio->bi_private;
sbc->err = blk_status_to_errno(bio->bi_status);
sbc->err = kc_bio_get_errno(bio);
complete(&sbc->comp);
bio_put(bio);
}
@@ -1236,7 +1236,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, blk_op
goto out;
}
bio->bi_iter.bi_sector = blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9);
kc_bio_set_sector(bio, blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9));
bio->bi_end_io = sm_block_bio_end_io;
bio->bi_private = &sbc;
bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
@@ -1244,7 +1244,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, blk_op
init_completion(&sbc.comp);
sbc.err = 0;
submit_bio(bio);
kc_submit_bio(bio);
wait_for_completion(&sbc.comp);
ret = sbc.err;
+1 -1
View File
@@ -479,7 +479,7 @@ static void scoutfs_client_connect_worker(struct work_struct *work)
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_mount_options opts;
struct scoutfs_net_greeting greet;
struct sockaddr_in sin;
struct sockaddr_storage sin;
bool am_quorum;
int ret;
+81 -12
View File
@@ -23,7 +23,6 @@
#include <linux/fiemap.h>
#include <linux/writeback.h>
#include <linux/overflow.h>
#include <linux/iversion.h>
#include "format.h"
#include "super.h"
@@ -423,8 +422,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
mutex_lock(&datinf->mutex);
scoutfs_inode_get_onoff(inode, &online, &offline);
/* default to single allocation at the written block */
start = iblock;
count = 1;
@@ -447,6 +444,7 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
* the preallocation size to the number of online
* blocks.
*/
scoutfs_inode_get_onoff(inode, &online, &offline);
if (iblock > 1 && iblock == online) {
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
iblock, 1, &found);
@@ -488,13 +486,6 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
/* trim count by next extent after iblock */
if (found.len && found.start > start && found.start < start + count)
count = (found.start - start);
/*
* Ramp the aligned region size up proportionally with
* the file's online block count rather than jumping to
* the full prealloc size.
*/
count = max_t(u64, 1, min(count, online));
}
/* overall prealloc limit */
@@ -758,6 +749,54 @@ static int scoutfs_readpage(struct file *file, struct page *page)
return ret;
}
#ifndef KC_FILE_AOPS_READAHEAD
/*
* This is used for opportunistic read-ahead which can throw the pages
* away if it needs to. If the caller didn't deal with offline extents
* then we drop those pages rather than trying to wait. Whoever is
* staging offline extents should be doing it in enormous chunks so that
* read-ahead can ramp up within each staged region. The check for
* offline extents is cheap when the inode has no offline extents.
*/
static int scoutfs_readpages(struct file *file, struct address_space *mapping,
struct list_head *pages, unsigned nr_pages)
{
struct inode *inode = file->f_inode;
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *inode_lock = NULL;
struct page *page;
struct page *tmp;
int ret;
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
if (ret)
goto out;
list_for_each_entry_safe(page, tmp, pages, lru) {
ret = scoutfs_data_wait_check(inode, page_offset(page),
PAGE_SIZE, SEF_OFFLINE,
SCOUTFS_IOC_DWO_READ, NULL,
inode_lock);
if (ret < 0)
goto out;
if (ret > 0) {
list_del(&page->lru);
put_page(page);
if (--nr_pages == 0) {
ret = 0;
goto out;
}
}
}
ret = mpage_readpages(mapping, pages, nr_pages, scoutfs_get_block_read);
out:
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
BUG_ON(!list_empty(pages));
return ret;
}
#else
static void scoutfs_readahead(struct readahead_control *rac)
{
struct inode *inode = rac->file->f_inode;
@@ -779,6 +818,7 @@ static void scoutfs_readahead(struct readahead_control *rac)
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
}
#endif
static int scoutfs_writepage(struct page *page, struct writeback_control *wbc)
{
@@ -1219,7 +1259,7 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
struct data_ext_args from_args;
struct data_ext_args to_args;
struct scoutfs_extent ext;
struct timespec64 cur_time;
struct kc_timespec cur_time;
LIST_HEAD(locks);
bool done = false;
loff_t from_size;
@@ -2015,9 +2055,15 @@ int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock,
return ret;
}
#ifdef KC_MM_VM_FAULT_T
static vm_fault_t scoutfs_data_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
#else
static int scoutfs_data_page_mkwrite(struct vm_area_struct *vma,
struct vm_fault *vmf)
{
#endif
struct page *page = vmf->page;
struct file *file = vma->vm_file;
struct inode *inode = file_inode(file);
@@ -2159,9 +2205,14 @@ out:
return ret;
}
#ifdef KC_MM_VM_FAULT_T
static vm_fault_t scoutfs_data_filemap_fault(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
#else
static int scoutfs_data_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
#endif
struct file *file = vma->vm_file;
struct inode *inode = file_inode(file);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
@@ -2196,11 +2247,15 @@ retry:
}
}
#ifdef KC_MM_VM_FAULT_T
ret = filemap_fault(vmf);
#else
ret = filemap_fault(vma, vmf);
#endif
out:
if (scoutfs_per_task_del(&si->pt_data_lock, &pt_ent))
inode_dio_end(inode);
kc_inode_dio_end(inode);
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
if (scoutfs_data_wait_found(&dw)) {
err = scoutfs_data_wait(inode, &dw);
@@ -2218,6 +2273,9 @@ out:
static const struct vm_operations_struct scoutfs_data_file_vm_ops = {
.fault = scoutfs_data_filemap_fault,
.page_mkwrite = scoutfs_data_page_mkwrite,
#ifdef KC_MM_REMAP_PAGES
.remap_pages = generic_file_remap_pages,
#endif
};
static int scoutfs_file_mmap(struct file *file, struct vm_area_struct *vma)
@@ -2235,7 +2293,11 @@ const struct address_space_operations scoutfs_file_aops = {
#else
.readpage = scoutfs_readpage,
#endif
#ifndef KC_FILE_AOPS_READAHEAD
.readpages = scoutfs_readpages,
#else
.readahead = scoutfs_readahead,
#endif
.writepage = scoutfs_writepage,
.writepages = scoutfs_writepages,
.write_begin = scoutfs_write_begin,
@@ -2243,10 +2305,17 @@ const struct address_space_operations scoutfs_file_aops = {
};
const struct file_operations scoutfs_file_fops = {
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
.read = do_sync_read,
.write = do_sync_write,
.aio_read = scoutfs_file_aio_read,
.aio_write = scoutfs_file_aio_write,
#else
.read_iter = scoutfs_file_read_iter,
.write_iter = scoutfs_file_write_iter,
.splice_read = generic_file_splice_read,
.splice_write = iter_file_splice_write,
#endif
.mmap = scoutfs_file_mmap,
.unlocked_ioctl = scoutfs_ioctl,
.fsync = scoutfs_file_fsync,
+101 -8
View File
@@ -18,7 +18,6 @@
#include <linux/xattr.h>
#include <linux/namei.h>
#include <linux/mm.h>
#include <linux/iversion.h>
#include "format.h"
#include "file.h"
@@ -423,7 +422,18 @@ out:
else
inode = scoutfs_iget(sb, ino, 0, 0);
return d_splice_alias(inode, dentry);
/*
* We can't splice dir aliases into the dcache. dir entries
* might have changed on other nodes so our dcache could still
* contain them, rather than having been moved in rename. For
* dirs, we use d_materialize_unique to remove any existing
* aliases which must be stale. Our inode numbers aren't reused
* so inodes pointed to by entries can't change types.
*/
if (!IS_ERR_OR_NULL(inode) && S_ISDIR(inode->i_mode))
return d_materialise_unique(dentry, inode);
else
return d_splice_alias(inode, dentry);
}
/*
@@ -952,7 +962,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
{
struct super_block *sb = dir->i_sb;
struct inode *inode = dentry->d_inode;
struct timespec64 ts = current_time(inode);
struct kc_timespec ts = current_time(inode);
struct scoutfs_lock *inode_lock = NULL;
struct scoutfs_lock *orph_lock = NULL;
struct scoutfs_lock *dir_lock = NULL;
@@ -1187,6 +1197,24 @@ out:
return path;
}
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
{
char *path;
path = scoutfs_get_link_target(dentry);
if (!IS_ERR_OR_NULL(path))
nd_set_link(nd, path);
return path;
}
static void scoutfs_put_link(struct dentry *dentry, struct nameidata *nd,
void *cookie)
{
if (!IS_ERR_OR_NULL(cookie))
kfree(cookie);
}
#else
static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
{
char *path;
@@ -1197,6 +1225,7 @@ static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode,
return path;
}
#endif
/*
* Symlink target paths can be annoyingly large. We store relatively
@@ -1606,7 +1635,7 @@ static int scoutfs_rename_common(KC_VFS_NS_DEF
struct scoutfs_lock *orph_lock = NULL;
struct scoutfs_dirent new_dent;
struct scoutfs_dirent old_dent;
struct timespec64 now;
struct kc_timespec now;
bool ins_new = false;
bool del_new = false;
bool ins_old = false;
@@ -1618,9 +1647,6 @@ static int scoutfs_rename_common(KC_VFS_NS_DEF
int ret;
int err;
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
trace_scoutfs_rename(sb, old_dir, old_dentry, new_dir, new_dentry);
old_hash = dirent_name_hash(old_dentry->d_name.name,
@@ -1866,7 +1892,36 @@ out_unlock:
return ret;
}
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
static int scoutfs_rename(struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry)
{
return scoutfs_rename_common(KC_VFS_INIT_NS
old_dir, old_dentry, new_dir, new_dentry, 0);
}
#endif
static int scoutfs_rename2(KC_VFS_NS_DEF
struct inode *old_dir,
struct dentry *old_dentry, struct inode *new_dir,
struct dentry *new_dentry, unsigned int flags)
{
if (flags & ~RENAME_NOREPLACE)
return -EINVAL;
return scoutfs_rename_common(KC_VFS_NS
old_dir, old_dentry, new_dir, new_dentry, flags);
}
#ifdef KC_FMODE_KABI_ITERATE
/* we only need this to set the iterate flag for kabi :/ */
static int scoutfs_dir_open(struct inode *inode, struct file *file)
{
file->f_mode |= FMODE_KABI_ITERATE;
return 0;
}
#endif
static int scoutfs_tmpfile(KC_VFS_NS_DEF
struct inode *dir,
@@ -1936,15 +1991,29 @@ out:
}
const struct inode_operations scoutfs_symlink_iops = {
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.readlink = generic_readlink,
.follow_link = scoutfs_follow_link,
.put_link = scoutfs_put_link,
#else
.get_link = scoutfs_get_link,
#endif
.getattr = scoutfs_getattr,
.setattr = scoutfs_setattr,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.removexattr = generic_removexattr,
#endif
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.tmpfile = scoutfs_tmpfile,
.rename = scoutfs_rename_common,
.symlink = scoutfs_symlink,
@@ -1953,17 +2022,26 @@ const struct inode_operations scoutfs_symlink_iops = {
.mkdir = scoutfs_mkdir,
.create = scoutfs_create,
.lookup = scoutfs_lookup,
#endif
};
const struct file_operations scoutfs_dir_fops = {
.iterate = scoutfs_readdir,
#ifdef KC_FMODE_KABI_ITERATE
.open = scoutfs_dir_open,
#endif
.unlocked_ioctl = scoutfs_ioctl,
.fsync = scoutfs_file_fsync,
.llseek = generic_file_llseek,
};
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
const struct inode_operations_wrapper scoutfs_dir_iops = {
.ops = {
#else
const struct inode_operations scoutfs_dir_iops = {
#endif
.lookup = scoutfs_lookup,
.mknod = scoutfs_mknod,
.create = scoutfs_create,
@@ -1973,15 +2051,30 @@ const struct inode_operations scoutfs_dir_iops = {
.rmdir = scoutfs_unlink,
.getattr = scoutfs_getattr,
.setattr = scoutfs_setattr,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.rename = scoutfs_rename,
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_SET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
.symlink = scoutfs_symlink,
.permission = scoutfs_permission,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
},
#endif
.tmpfile = scoutfs_tmpfile,
.rename = scoutfs_rename_common,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.rename2 = scoutfs_rename2,
#else
.rename = scoutfs_rename2,
#endif
};
+4
View File
@@ -5,7 +5,11 @@
#include "lock.h"
extern const struct file_operations scoutfs_dir_fops;
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
extern const struct inode_operations_wrapper scoutfs_dir_iops;
#else
extern const struct inode_operations scoutfs_dir_iops;
#endif
extern const struct inode_operations scoutfs_symlink_iops;
extern const struct dentry_operations scoutfs_dentry_ops;
+16 -9
View File
@@ -25,6 +25,7 @@
#include "sysfs.h"
#include "server.h"
#include "fence.h"
#include "net.h"
/*
* Fencing ensures that a given mount can no longer write to the
@@ -79,7 +80,7 @@ struct pending_fence {
struct timer_list timer;
ktime_t start_kt;
__be32 ipv4_addr;
union scoutfs_inet_addr addr;
bool fenced;
bool error;
int reason;
@@ -171,14 +172,19 @@ static ssize_t error_store(struct kobject *kobj, struct kobj_attribute *attr, co
}
SCOUTFS_ATTR_RW(error);
static ssize_t ipv4_addr_show(struct kobject *kobj,
static ssize_t inet_addr_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
DECLARE_FENCE_FROM_KOBJ(fence, kobj);
struct sockaddr_storage sin;
return snprintf(buf, PAGE_SIZE, "%pI4", &fence->ipv4_addr);
memset(&sin, 0, sizeof(struct sockaddr_storage));
scoutfs_addr_to_sin(&sin, &fence->addr);
return snprintf(buf, PAGE_SIZE, "%pISc", SIN_ARG(&sin));
}
SCOUTFS_ATTR_RO(ipv4_addr);
SCOUTFS_ATTR_RO(inet_addr);
static ssize_t reason_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
@@ -212,7 +218,7 @@ static struct attribute *fence_attrs[] = {
SCOUTFS_ATTR_PTR(elapsed_secs),
SCOUTFS_ATTR_PTR(fenced),
SCOUTFS_ATTR_PTR(error),
SCOUTFS_ATTR_PTR(ipv4_addr),
SCOUTFS_ATTR_PTR(inet_addr),
SCOUTFS_ATTR_PTR(reason),
SCOUTFS_ATTR_PTR(rid),
NULL,
@@ -222,7 +228,7 @@ static struct attribute *fence_attrs[] = {
static void fence_timeout(struct timer_list *timer)
{
struct pending_fence *fence = timer_container_of(fence, timer, timer);
struct pending_fence *fence = from_timer(fence, timer, timer);
struct super_block *sb = fence->sb;
DECLARE_FENCE_INFO(sb, fi);
@@ -232,7 +238,7 @@ static void fence_timeout(struct timer_list *timer)
wake_up(&fi->waitq);
}
int scoutfs_fence_start(struct super_block *sb, u64 rid, __be32 ipv4_addr, int reason)
int scoutfs_fence_start(struct super_block *sb, u64 rid, union scoutfs_inet_addr *addr, int reason)
{
DECLARE_FENCE_INFO(sb, fi);
struct pending_fence *fence;
@@ -248,7 +254,7 @@ int scoutfs_fence_start(struct super_block *sb, u64 rid, __be32 ipv4_addr, int r
scoutfs_sysfs_init_attrs(sb, &fence->ssa);
fence->start_kt = ktime_get();
fence->ipv4_addr = ipv4_addr;
memcpy(&fence->addr, addr, sizeof(union scoutfs_inet_addr));
fence->fenced = false;
fence->error = false;
fence->reason = reason;
@@ -424,7 +430,8 @@ int scoutfs_fence_setup(struct super_block *sb)
goto out;
}
fi->wq = alloc_workqueue("scoutfs_fence", WQ_UNBOUND, 0);
fi->wq = alloc_workqueue("scoutfs_fence",
WQ_UNBOUND | WQ_NON_REENTRANT, 0);
if (!fi->wq) {
ret = -ENOMEM;
goto out;
+1 -1
View File
@@ -7,7 +7,7 @@ enum {
SCOUTFS_FENCE_QUORUM_BLOCK_LEADER,
};
int scoutfs_fence_start(struct super_block *sb, u64 rid, __be32 ipv4_addr, int reason);
int scoutfs_fence_start(struct super_block *sb, u64 rid, union scoutfs_inet_addr *addr, int reason);
int scoutfs_fence_next(struct super_block *sb, u64 *rid, int *reason, bool *error);
int scoutfs_fence_reason_pending(struct super_block *sb, int reason);
int scoutfs_fence_free(struct super_block *sb, u64 rid);
+128
View File
@@ -30,6 +30,133 @@
#include "omap.h"
#include "quota.h"
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
/*
* Start a high level file read. We check for offline extents in the
* read region here so that we only check the extents once. We use the
* dio count to prevent releasing while we're reading after we've
* checked the extents.
*/
ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *scoutfs_inode_lock = NULL;
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
DECLARE_DATA_WAIT(dw);
int ret;
retry:
/* protect checked extents from release */
inode_lock(inode);
atomic_inc(&inode->i_dio_count);
inode_unlock(inode);
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
if (ret)
goto out;
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos,
SEF_OFFLINE,
SCOUTFS_IOC_DWO_READ,
&dw, scoutfs_inode_lock);
if (ret != 0)
goto out;
} else {
WARN_ON_ONCE(true);
}
ret = generic_file_aio_read(iocb, iov, nr_segs, pos);
out:
inode_dio_done(inode);
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_READ);
if (scoutfs_data_wait_found(&dw)) {
ret = scoutfs_data_wait(inode, &dw);
if (ret == 0)
goto retry;
}
return ret;
}
ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *scoutfs_inode_lock = NULL;
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
DECLARE_DATA_WAIT(dw);
int ret;
if (iocb->ki_left == 0) /* Does this even happen? */
return 0;
retry:
inode_lock(inode);
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
if (ret)
goto out;
ret = scoutfs_inode_check_retention(inode);
if (ret < 0)
goto out;
ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock);
if (ret)
goto out;
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
/* data_version is per inode, whole file must be online */
ret = scoutfs_data_wait_check(inode, 0, i_size_read(inode),
SEF_OFFLINE,
SCOUTFS_IOC_DWO_WRITE,
&dw, scoutfs_inode_lock);
if (ret != 0)
goto out;
}
ret = scoutfs_quota_check_data(sb, inode);
if (ret)
goto out;
/* XXX: remove SUID bit */
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
out:
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_WRITE);
inode_unlock(inode);
if (scoutfs_data_wait_found(&dw)) {
ret = scoutfs_data_wait(inode, &dw);
if (ret == 0)
goto retry;
}
if (ret > 0 || ret == -EIOCBQUEUED) {
ssize_t err;
err = generic_write_sync(file, pos, ret);
if (err < 0 && ret > 0)
ret = err;
}
return ret;
}
#else
ssize_t scoutfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
struct file *file = iocb->ki_filp;
@@ -138,6 +265,7 @@ out:
return ret;
}
#endif
int scoutfs_permission(KC_VFS_NS_DEF
struct inode *inode, int mask)
+7
View File
@@ -1,8 +1,15 @@
#ifndef _SCOUTFS_FILE_H_
#define _SCOUTFS_FILE_H_
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos);
#else
ssize_t scoutfs_file_read_iter(struct kiocb *, struct iov_iter *);
ssize_t scoutfs_file_write_iter(struct kiocb *, struct iov_iter *);
#endif
int scoutfs_permission(KC_VFS_NS_DEF
struct inode *inode, int mask);
loff_t scoutfs_file_llseek(struct file *file, loff_t offset, int whence);
+1 -1
View File
@@ -783,7 +783,7 @@ int scoutfs_forest_setup(struct super_block *sb)
scoutfs_forest_log_merge_worker);
sbi->forest_info = finf;
finf->workq = alloc_workqueue("scoutfs_log_merge",
finf->workq = alloc_workqueue("scoutfs_log_merge", WQ_NON_REENTRANT |
WQ_UNBOUND | WQ_HIGHPRI, 0);
if (!finf->workq) {
ret = -ENOMEM;
+26 -1
View File
@@ -21,7 +21,6 @@
#include <linux/list_sort.h>
#include <linux/workqueue.h>
#include <linux/buffer_head.h>
#include <linux/iversion.h>
#include "format.h"
#include "super.h"
@@ -144,26 +143,40 @@ void scoutfs_destroy_inode(struct inode *inode)
static const struct inode_operations scoutfs_file_iops = {
.getattr = scoutfs_getattr,
.setattr = scoutfs_setattr,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_SET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
.fiemap = scoutfs_data_fiemap,
};
static const struct inode_operations scoutfs_special_iops = {
.getattr = scoutfs_getattr,
.setattr = scoutfs_setattr,
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
.setxattr = generic_setxattr,
.getxattr = generic_getxattr,
.removexattr = generic_removexattr,
#endif
.listxattr = scoutfs_listxattr,
#ifdef KC_GET_INODE_ACL
.get_inode_acl = scoutfs_get_acl,
#else
.get_acl = scoutfs_get_acl,
#endif
#ifdef KC_SET_ACL_DENTRY
.set_acl = scoutfs_set_acl,
#endif
};
/*
@@ -179,7 +192,12 @@ static void set_inode_ops(struct inode *inode)
inode->i_fop = &scoutfs_file_fops;
break;
case S_IFDIR:
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
inode->i_op = &scoutfs_dir_iops.ops;
inode->i_flags |= S_IOPS_WRAPPER;
#else
inode->i_op = &scoutfs_dir_iops;
#endif
inode->i_fop = &scoutfs_dir_fops;
break;
case S_IFLNK:
@@ -363,11 +381,18 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock)
return ret;
}
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat)
{
struct inode *inode = dentry->d_inode;
#else
int scoutfs_getattr(KC_VFS_NS_DEF
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
{
struct inode *inode = d_inode(path->dentry);
#endif
struct super_block *sb = inode->i_sb;
struct scoutfs_lock *lock = NULL;
int ret;
+6 -1
View File
@@ -23,7 +23,7 @@ struct scoutfs_inode_info {
u64 offline_blocks;
u64 proj;
u32 flags;
struct timespec64 crtime;
struct kc_timespec crtime;
/*
* Protects per-inode extent items, most particularly readers
@@ -131,9 +131,14 @@ int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock);
int scoutfs_inode_check_retention(struct inode *inode);
int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock);
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
struct kstat *stat);
#else
int scoutfs_getattr(KC_VFS_NS_DEF
const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags);
#endif
int scoutfs_setattr(KC_VFS_NS_DEF
struct dentry *dentry, struct iattr *attr);
+6 -4
View File
@@ -489,7 +489,6 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
struct scoutfs_lock *lock = NULL;
struct kiocb kiocb;
struct iovec iov;
struct iov_iter iter;
size_t written;
loff_t end_size;
loff_t isize;
@@ -515,6 +514,10 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
/* the iocb is really only used for the file pointer :P */
init_sync_kiocb(&kiocb, file);
kiocb.ki_pos = args.offset;
#ifdef KC_LINUX_AIO_KI_LEFT
kiocb.ki_left = args.length;
kiocb.ki_nbytes = args.length;
#endif
iov.iov_base = (void __user *)(unsigned long)args.buf_ptr;
iov.iov_len = args.length;
@@ -556,9 +559,8 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
pos = args.offset;
written = 0;
do {
iov_iter_init(&iter, WRITE, &iov, 1, args.length);
ret = kc_generic_perform_write(&kiocb, &iter, pos);
ret = generic_file_buffered_write(&kiocb, &iov, 1, pos, &pos,
args.length, written);
BUG_ON(ret == -EIOCBQUEUED);
if (ret > 0)
written += ret;
+26
View File
@@ -79,6 +79,9 @@ struct item_cache_info {
struct super_block *sb;
struct item_percpu_pages __percpu *pcpu_pages;
KC_DEFINE_SHRINKER(shrinker);
#ifdef KC_CPU_NOTIFIER
struct notifier_block notifier;
#endif
/* often walked, but per-cpu refs are fast path */
rwlock_t rwlock;
@@ -2581,6 +2584,22 @@ static unsigned long item_cache_scan_objects(struct shrinker *shrink,
return freed;
}
#ifdef KC_CPU_NOTIFIER
static int item_cpu_callback(struct notifier_block *nfb,
unsigned long action, void *hcpu)
{
struct item_cache_info *cinf = container_of(nfb,
struct item_cache_info,
notifier);
struct super_block *sb = cinf->sb;
unsigned long cpu = (unsigned long)hcpu;
if (action == CPU_DEAD)
drop_pcpu_pages(sb, cinf, cpu);
return NOTIFY_OK;
}
#endif
int scoutfs_item_setup(struct super_block *sb)
{
@@ -2611,6 +2630,10 @@ int scoutfs_item_setup(struct super_block *sb)
KC_INIT_SHRINKER_FUNCS(&cinf->shrinker, item_cache_count_objects,
item_cache_scan_objects);
KC_REGISTER_SHRINKER(&cinf->shrinker, "scoutfs-item:" SCSBF, SCSB_ARGS(sb));
#ifdef KC_CPU_NOTIFIER
cinf->notifier.notifier_call = item_cpu_callback;
register_hotcpu_notifier(&cinf->notifier);
#endif
sbi->item_cache_info = cinf;
return 0;
@@ -2628,6 +2651,9 @@ void scoutfs_item_destroy(struct super_block *sb)
int cpu;
if (cinf) {
#ifdef KC_CPU_NOTIFIER
unregister_hotcpu_notifier(&cinf->notifier);
#endif
KC_UNREGISTER_SHRINKER(&cinf->shrinker);
for_each_possible_cpu(cpu)
+111
View File
@@ -3,8 +3,119 @@
#include "kernelcompat.h"
#ifdef KC_SHRINKER_SHRINK
#include <linux/shrinker.h>
/*
* If a target doesn't have that .{count,scan}_objects() interface then
* we have a .shrink() helper that performs the shrink work in terms of
* count/scan.
*/
int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc)
{
struct kc_shrinker_wrapper *wrapper = container_of(shrink, struct kc_shrinker_wrapper, shrink);
unsigned long nr;
unsigned long rc;
if (sc->nr_to_scan != 0) {
rc = wrapper->scan_objects(shrink, sc);
/* translate magic values to the equivalent for older kernels */
if (rc == SHRINK_STOP)
return -1;
else if (rc == SHRINK_EMPTY)
return 0;
}
nr = wrapper->count_objects(shrink, sc);
return min_t(unsigned long, nr, INT_MAX);
}
#endif
#ifndef KC_CURRENT_TIME_INODE
struct timespec64 kc_current_time(struct inode *inode)
{
struct timespec64 now;
unsigned gran;
getnstimeofday64(&now);
if (unlikely(!inode->i_sb)) {
WARN(1, "current_time() called with uninitialized super_block in the inode");
return now;
}
gran = inode->i_sb->s_time_gran;
/* Avoid division in the common cases 1 ns and 1 s. */
if (gran == 1) {
/* nothing */
} else if (gran == NSEC_PER_SEC) {
now.tv_nsec = 0;
} else if (gran > 1 && gran < NSEC_PER_SEC) {
now.tv_nsec -= now.tv_nsec % gran;
} else {
WARN(1, "illegal file time granularity: %u", gran);
}
return now;
}
#endif
#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
ssize_t
kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos, loff_t *ppos,
size_t count, ssize_t written)
{
ssize_t status;
struct iov_iter i;
iov_iter_init(&i, WRITE, iov, nr_segs, count);
status = kc_generic_perform_write(iocb, &i, pos);
if (likely(status >= 0)) {
written += status;
*ppos = pos + status;
}
return written ? written : status;
}
#endif
#include <linux/list_lru.h>
#ifdef KC_LIST_LRU_WALK_CB_ITEM_LOCK
static enum lru_status kc_isolate(struct list_head *item, spinlock_t *lock, void *cb_arg)
{
struct kc_isolate_args *args = cb_arg;
/* isolate doesn't use list, nr_items updated in caller */
return args->isolate(item, NULL, args->cb_arg);
}
unsigned long kc_list_lru_walk(struct list_lru *lru, kc_list_lru_walk_cb_t isolate, void *cb_arg,
unsigned long nr_to_walk)
{
struct kc_isolate_args args = {
.isolate = isolate,
.cb_arg = cb_arg,
};
return list_lru_walk(lru, kc_isolate, &args, nr_to_walk);
}
unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
kc_list_lru_walk_cb_t isolate, void *cb_arg)
{
struct kc_isolate_args args = {
.isolate = isolate,
.cb_arg = cb_arg,
};
return list_lru_shrink_walk(lru, sc, kc_isolate, &args);
}
#endif
#ifdef KC_LIST_LRU_WALK_CB_LIST_LOCK
static enum lru_status kc_isolate(struct list_head *item, struct list_lru_one *list,
spinlock_t *lock, void *cb_arg)
+286 -5
View File
@@ -4,6 +4,146 @@
#include <linux/kernel.h>
#include <linux/fs.h>
/*
* v4.15-rc3-4-gae5e165d855d
*
* new API for handling inode->i_version. This forces us to
* include this API where we need. We include it here for
* convenience instead of where it's needed.
*/
#ifdef KC_NEED_LINUX_IVERSION_H
#include <linux/iversion.h>
#else
/*
* Kernels before above version will need to fall back to
* manipulating inode->i_version as previous with degraded
* methods.
*/
#define inode_set_iversion_queried(inode, val) \
do { \
(inode)->i_version = val; \
} while (0)
#define inode_peek_iversion(inode) \
({ \
(inode)->i_version; \
})
#endif
#ifdef KC_POSIX_ACL_VALID_USER_NS
#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(user_ns, acl)
#else
#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(acl)
#endif
/*
* v3.6-rc1-24-gdbf2576e37da
*
* All workqueues are now non-reentrant, and the bit flag is removed
* shortly after its uses were removed.
*/
#ifndef WQ_NON_REENTRANT
#define WQ_NON_REENTRANT 0
#endif
/*
* v3.18-rc2-19-gb5ae6b15bd73
*
* Folds d_materialise_unique into d_splice_alias. Note reversal
* of arguments (Also note Documentation/filesystems/porting.rst)
*/
#ifndef KC_D_MATERIALISE_UNIQUE
#define d_materialise_unique(dentry, inode) d_splice_alias(inode, dentry)
#endif
/*
* v4.8-rc1-29-g31051c85b5e2
*
* fall back to inode_change_ok() if setattr_prepare() isn't available
*/
#ifndef KC_SETATTR_PREPARE
#define setattr_prepare(dentry, attr) inode_change_ok(d_inode(dentry), attr)
#endif
#ifndef KC___POSIX_ACL_CREATE
#define __posix_acl_create posix_acl_create
#define __posix_acl_chmod posix_acl_chmod
#endif
#ifndef KC_PERCPU_COUNTER_ADD_BATCH
#define percpu_counter_add_batch __percpu_counter_add
#endif
#ifndef KC_MEMALLOC_NOFS_SAVE
#define memalloc_nofs_save memalloc_noio_save
#define memalloc_nofs_restore memalloc_noio_restore
#endif
#ifdef KC_BIO_BI_OPF
#define kc_bio_get_opf(bio) \
({ \
(bio)->bi_opf; \
})
#define kc_bio_set_opf(bio, opf) \
do { \
(bio)->bi_opf = opf; \
} while (0)
#define kc_bio_set_sector(bio, sect) \
do { \
(bio)->bi_iter.bi_sector = sect;\
} while (0)
#define kc_submit_bio(bio) submit_bio(bio)
#else
#define kc_bio_get_opf(bio) \
({ \
(bio)->bi_rw; \
})
#define kc_bio_set_opf(bio, opf) \
do { \
(bio)->bi_rw = opf; \
} while (0)
#define kc_bio_set_sector(bio, sect) \
do { \
(bio)->bi_sector = sect; \
} while (0)
#define kc_submit_bio(bio) \
do { \
submit_bio((bio)->bi_rw, bio); \
} while (0)
#define bio_set_dev(bio, bdev) \
do { \
(bio)->bi_bdev = (bdev); \
} while (0)
#endif
#ifdef KC_BIO_BI_STATUS
#define KC_DECLARE_BIO_END_IO(name, bio) name(bio)
#define kc_bio_get_errno(bio) ({ blk_status_to_errno((bio)->bi_status); })
#else
#define KC_DECLARE_BIO_END_IO(name, bio) name(bio, int _error_arg)
#define kc_bio_get_errno(bio) ({ (int)((void)(bio), _error_arg); })
#endif
/*
* v4.13-rc1-6-ge462ec50cb5f
*
* MS_* (mount) flags from <linux/mount.h> should not be used in the kernel
* anymore from 4.x onwards. Instead, we need to use the SB_* (superblock) flags
*/
#ifndef SB_POSIXACL
#define SB_POSIXACL MS_POSIXACL
#define SB_I_VERSION MS_I_VERSION
#endif
#ifndef KC_CURRENT_TIME_INODE
struct timespec64 kc_current_time(struct inode *inode);
#define current_time kc_current_time
#define kc_timespec timespec
#else
#define kc_timespec timespec64
#endif
#ifndef KC_SHRINKER_SHRINK
#define KC_DEFINE_SHRINKER(name) struct shrinker name
#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do { \
__typeof__(name) _shrink = (name); \
@@ -20,7 +160,81 @@
#endif /* KC_SHRINKER_NAME */
#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr))
#define KC_SHRINKER_FN(ptr) (ptr)
#else
#include <linux/shrinker.h>
#ifndef SHRINK_STOP
#define SHRINK_STOP (~0UL)
#define SHRINK_EMPTY (~0UL - 1)
#endif
int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc);
struct kc_shrinker_wrapper {
unsigned long (*count_objects)(struct shrinker *, struct shrink_control *sc);
unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc);
struct shrinker shrink;
};
#define KC_DEFINE_SHRINKER(name) struct kc_shrinker_wrapper name;
#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do { \
struct kc_shrinker_wrapper *_wrap = (name); \
_wrap->count_objects = (countfn); \
_wrap->scan_objects = (scanfn); \
_wrap->shrink.shrink = kc_shrink_wrapper_fn; \
_wrap->shrink.seeks = DEFAULT_SEEKS; \
} while (0)
#define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(container_of(ptr, struct kc_shrinker_wrapper, shrink), type, shrinker)
#define KC_REGISTER_SHRINKER(ptr, fmt, ...) (register_shrinker(ptr.shrink))
#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr.shrink))
#define KC_SHRINKER_FN(ptr) (ptr.shrink)
#endif /* KC_SHRINKER_SHRINK */
#ifdef KC_KERNEL_GETSOCKNAME_ADDRLEN
#include <linux/net.h>
#include <linux/inet.h>
static inline int kc_kernel_getsockname(struct socket *sock, struct sockaddr *addr)
{
int addrlen = sizeof(struct sockaddr_storage);
int ret = kernel_getsockname(sock, addr, &addrlen);
if (ret == 0 && (!(
(addrlen == sizeof(struct sockaddr_in)) ||
(addrlen == sizeof(struct sockaddr_in6)))))
return -EAFNOSUPPORT;
else if (ret < 0)
return ret;
return sizeof(struct sockaddr_in);
}
static inline int kc_kernel_getpeername(struct socket *sock, struct sockaddr *addr)
{
int addrlen = sizeof(struct sockaddr_storage);
int ret = kernel_getpeername(sock, addr, &addrlen);
if (ret == 0 && (!(
(addrlen == sizeof(struct sockaddr_in)) ||
(addrlen == sizeof(struct sockaddr_in6)))))
return -EAFNOSUPPORT;
else if (ret < 0)
return ret;
return sizeof(struct sockaddr_in);
}
#else
#define kc_kernel_getsockname(sock, addr) kernel_getsockname(sock, addr)
#define kc_kernel_getpeername(sock, addr) kernel_getpeername(sock, addr)
#endif
#ifdef KC_SOCK_CREATE_KERN_NET
#define kc_sock_create_kern(family, type, proto, res) sock_create_kern(&init_net, family, type, proto, res)
#else
#define kc_sock_create_kern sock_create_kern
#endif
#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
ssize_t kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
unsigned long nr_segs, loff_t pos, loff_t *ppos,
size_t count, ssize_t written);
#define generic_file_buffered_write kc_generic_file_buffered_write
#ifdef KC_GENERIC_PERFORM_WRITE_KIOCB_IOV_ITER
static inline int kc_generic_perform_write(struct kiocb *iocb, struct iov_iter *iter, loff_t pos)
{
@@ -34,6 +248,7 @@ static inline int kc_generic_perform_write(struct kiocb *iocb, struct iov_iter *
return generic_perform_write(file, iter, pos);
}
#endif
#endif // KC_GENERIC_FILE_BUFFERED_WRITE
#ifndef KC_HAVE_BLK_OPF_T
/* typedef __u32 __bitwise blk_opf_t; */
@@ -77,7 +292,7 @@ static inline struct bio *kc_bio_alloc(struct block_device *bdev, unsigned short
{
struct bio *b = bio_alloc(gfp_mask, nr_vecs);
if (b) {
b->bi_opf = opf;
kc_bio_set_opf(b, opf);
bio_set_dev(b, bdev);
}
return b;
@@ -88,6 +303,11 @@ static inline struct bio *kc_bio_alloc(struct block_device *bdev, unsigned short
#define fiemap_prep(inode, fieinfo, start, len, flags) fiemap_check_flags(fieinfo, flags)
#endif
#ifndef KC_KERNEL_OLD_TIMEVAL_STRUCT
#define __kernel_old_timeval timeval
#define ns_to_kernel_old_timeval(ktime) ns_to_timeval(ktime.tv64)
#endif
#ifdef KC_SOCK_SET_SNDTIMEO
#include <net/sock.h>
static inline int kc_sock_set_sndtimeo(struct socket *sock, s64 secs)
@@ -184,14 +404,45 @@ static inline int kc_tcp_sock_set_nodelay(struct socket *sock)
}
#endif
#ifdef KC_INODE_DIO_END
#define kc_inode_dio_end inode_dio_end
#else
#define kc_inode_dio_end inode_dio_done
#endif
#ifndef KC_MM_VM_FAULT_T
typedef unsigned int vm_fault_t;
static inline vm_fault_t vmf_error(int err)
{
if (err == -ENOMEM)
return VM_FAULT_OOM;
return VM_FAULT_SIGBUS;
}
#endif
#include <linux/list_lru.h>
#ifndef KC_LIST_LRU_SHRINK_COUNT_WALK
/* we don't bother with sc->{nid,memcg} (which doesn't exist in oldest kernels) */
static inline unsigned long list_lru_shrink_count(struct list_lru *lru,
struct shrink_control *sc)
{
return list_lru_count(lru);
}
static inline unsigned long
list_lru_shrink_walk(struct list_lru *lru, struct shrink_control *sc,
list_lru_walk_cb isolate, void *cb_arg)
{
return list_lru_walk(lru, isolate, cb_arg, sc->nr_to_scan);
}
#endif
#ifndef KC_LIST_LRU_ADD_OBJ
#define list_lru_add_obj list_lru_add
#define list_lru_del_obj list_lru_del
#endif
#if defined(KC_LIST_LRU_WALK_CB_LIST_LOCK)
#if defined(KC_LIST_LRU_WALK_CB_LIST_LOCK) || defined(KC_LIST_LRU_WALK_CB_ITEM_LOCK)
struct list_lru_one;
typedef enum lru_status (*kc_list_lru_walk_cb_t)(struct list_head *item, struct list_lru_one *list,
void *cb_arg);
@@ -207,9 +458,39 @@ unsigned long kc_list_lru_shrink_walk(struct list_lru *lru, struct shrink_contro
#define kc_list_lru_shrink_walk list_lru_shrink_walk
#endif
#ifndef KC_TIMER_CONTAINER_OF
#define timer_container_of(var, callback_timer, timer_fieldname) \
from_timer(var, callback_timer, timer_fieldname)
#if defined(KC_LIST_LRU_WALK_CB_ITEM_LOCK)
/* isolate moved by hand, nr_items updated in walk as _REMOVE returned */
static inline void list_lru_isolate_move(struct list_lru_one *list, struct list_head *item,
struct list_head *head)
{
list_move(item, head);
}
#endif
#ifndef KC_STACK_TRACE_SAVE
#include <linux/stacktrace.h>
static inline unsigned int stack_trace_save(unsigned long *store, unsigned int size,
unsigned int skipnr)
{
struct stack_trace trace = {
.entries = store,
.max_entries = size,
.skip = skipnr,
};
save_stack_trace(&trace);
return trace.nr_entries;
}
static inline void stack_trace_print(unsigned long *entries, unsigned int nr_entries, int spaces)
{
struct stack_trace trace = {
.entries = entries,
.nr_entries = nr_entries,
};
print_stack_trace(&trace, spaces);
}
#endif
#endif
+2 -1
View File
@@ -1693,7 +1693,8 @@ int scoutfs_lock_setup(struct super_block *sb)
}
linfo->workq = alloc_workqueue("scoutfs_lock_client_work",
WQ_UNBOUND | WQ_HIGHPRI, 0);
WQ_NON_REENTRANT | WQ_UNBOUND |
WQ_HIGHPRI, 0);
if (!linfo->workq) {
ret = -ENOMEM;
goto out;
+34 -19
View File
@@ -1113,11 +1113,11 @@ static int sock_opts_and_names(struct super_block *sb,
if (ret)
goto out;
ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
if (ret < 0)
goto out;
ret = kernel_getpeername(sock, (struct sockaddr *)&conn->peername);
ret = kc_kernel_getpeername(sock, (struct sockaddr *)&conn->peername);
if (ret < 0)
goto out;
@@ -1218,7 +1218,8 @@ static void scoutfs_net_connect_worker(struct work_struct *work)
trace_scoutfs_net_connect_work_enter(sb, 0, 0);
ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
ret = kc_sock_create_kern(conn->connect_sin.ss_family,
SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret)
goto out;
@@ -1239,7 +1240,9 @@ static void scoutfs_net_connect_worker(struct work_struct *work)
trace_scoutfs_conn_connect_start(conn);
ret = kernel_connect(sock, (struct sockaddr *)&conn->connect_sin,
sizeof(struct sockaddr_in), 0);
conn->connect_sin.ss_family == AF_INET ?
sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6),
0);
if (ret)
goto out;
@@ -1281,6 +1284,13 @@ static bool empty_accepted_list(struct scoutfs_net_connection *conn)
return empty;
}
/*
* sockaddr_storage wraps both _in and _in6, which have _port always
* __be16 at the same offset, and we only need to test whether it's
* zero.
*/
#define sockaddr_port_is_nonzero(sin) ((sin).__data[0] || (sin).__data[1])
/*
* Safely shut down an active connection. This can be triggered by
* errors in workers or by an external call to free the connection. The
@@ -1304,7 +1314,7 @@ static void scoutfs_net_shutdown_worker(struct work_struct *work)
trace_scoutfs_conn_shutdown_start(conn);
/* connected and accepted conns print a message */
if (conn->peername.sin_port != 0)
if (sockaddr_port_is_nonzero(conn->peername))
scoutfs_info(sb, "%s "SIN_FMT" -> "SIN_FMT,
conn->listening_conn ? "server closing" :
"client disconnected",
@@ -1434,6 +1444,7 @@ static void scoutfs_net_reconn_free_worker(struct work_struct *work)
DEFINE_CONN_FROM_WORK(conn, work, reconn_free_dwork.work);
struct super_block *sb = conn->sb;
struct scoutfs_net_connection *acc;
union scoutfs_inet_addr addr;
unsigned long now = jiffies;
unsigned long deadline = 0;
bool requeue = false;
@@ -1454,8 +1465,9 @@ restart:
if (!test_conn_fl(conn, shutting_down)) {
scoutfs_info(sb, "client "SIN_FMT" reconnect timed out, fencing",
SIN_ARG(&acc->last_peername));
scoutfs_sin_to_addr(&addr, &acc->last_peername);
ret = scoutfs_fence_start(sb, acc->rid,
acc->last_peername.sin_addr.s_addr,
&addr,
SCOUTFS_FENCE_CLIENT_RECONNECT);
if (ret) {
scoutfs_err(sb, "client fence returned err %d, shutting down server",
@@ -1517,7 +1529,8 @@ scoutfs_net_alloc_conn(struct super_block *sb,
conn->ordered_proc_wlists = kmalloc_array(nr, sizeof(struct scoutfs_work_list),
GFP_NOFS);
conn->workq = alloc_workqueue("scoutfs_net_%s",
WQ_UNBOUND, 0, name_suffix);
WQ_UNBOUND | WQ_NON_REENTRANT, 0,
name_suffix);
}
if (!conn || (info_size && !conn->info) || !conn->workq || !conn->ordered_proc_wlists) {
if (conn) {
@@ -1537,9 +1550,9 @@ scoutfs_net_alloc_conn(struct super_block *sb,
conn->req_funcs = req_funcs;
spin_lock_init(&conn->lock);
init_waitqueue_head(&conn->waitq);
conn->sockname.sin_family = AF_INET;
conn->peername.sin_family = AF_INET;
conn->last_peername.sin_family = AF_INET;
conn->sockname.ss_family = AF_UNSPEC;
conn->peername.ss_family = AF_UNSPEC;
conn->last_peername.ss_family = AF_UNSPEC;
INIT_LIST_HEAD(&conn->accepted_head);
INIT_LIST_HEAD(&conn->accepted_list);
conn->next_send_seq = 1;
@@ -1618,7 +1631,7 @@ void scoutfs_net_free_conn(struct super_block *sb,
*/
int scoutfs_net_bind(struct super_block *sb,
struct scoutfs_net_connection *conn,
struct sockaddr_in *sin)
struct sockaddr_storage *sin)
{
struct socket *sock = NULL;
int addrlen;
@@ -1629,7 +1642,7 @@ int scoutfs_net_bind(struct super_block *sb,
if (WARN_ON_ONCE(conn->sock))
return -EINVAL;
ret = sock_create_kern(&init_net, AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
ret = kc_sock_create_kern(sin->ss_family, SOCK_STREAM, IPPROTO_TCP, &sock);
if (ret)
goto out;
@@ -1641,7 +1654,7 @@ int scoutfs_net_bind(struct super_block *sb,
if (ret)
goto out;
addrlen = sizeof(struct sockaddr_in);
addrlen = sin->ss_family == AF_INET ? sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
ret = kernel_bind(sock, (struct sockaddr *)sin, addrlen);
if (ret)
goto out;
@@ -1650,14 +1663,14 @@ int scoutfs_net_bind(struct super_block *sb,
if (ret < 0)
goto out;
ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
if (ret < 0)
goto out;
ret = 0;
conn->sock = sock;
*sin = conn->sockname;
sin = (struct sockaddr_storage *)&conn->sockname;
out:
if (ret < 0 && sock)
@@ -1692,7 +1705,7 @@ static bool connect_result(struct scoutfs_net_connection *conn, int *error)
done = true;
*error = 0;
} else if (test_conn_fl(conn, shutting_down) ||
conn->connect_sin.sin_family == 0) {
conn->connect_sin.ss_family == AF_UNSPEC) {
done = true;
*error = -ESHUTDOWN;
}
@@ -1713,7 +1726,7 @@ static bool connect_result(struct scoutfs_net_connection *conn, int *error)
*/
int scoutfs_net_connect(struct super_block *sb,
struct scoutfs_net_connection *conn,
struct sockaddr_in *sin, unsigned long timeout_ms)
struct sockaddr_storage *sin, unsigned long timeout_ms)
{
int ret = 0;
@@ -2098,9 +2111,11 @@ int scoutfs_net_setup(struct super_block *sb)
scoutfs_tseq_tree_init(&ninf->msg_tseq_tree, net_tseq_show_msg);
ninf->shutdown_workq = alloc_workqueue("scoutfs_net_shutdown",
WQ_UNBOUND, 0);
WQ_UNBOUND | WQ_NON_REENTRANT,
0);
ninf->destroy_workq = alloc_workqueue("scoutfs_net_destroy",
WQ_UNBOUND, 0);
WQ_UNBOUND | WQ_NON_REENTRANT,
0);
if (!ninf->shutdown_workq || !ninf->destroy_workq) {
ret = -ENOMEM;
goto out;
+38 -21
View File
@@ -49,15 +49,15 @@ struct scoutfs_net_connection {
unsigned long flags; /* CONN_FL_* bitmask */
unsigned long reconn_deadline;
struct sockaddr_in connect_sin;
struct sockaddr_storage connect_sin;
unsigned long connect_timeout_ms;
struct socket *sock;
u64 rid;
u64 greeting_id;
struct sockaddr_in sockname;
struct sockaddr_in peername;
struct sockaddr_in last_peername;
struct sockaddr_storage sockname;
struct sockaddr_storage peername;
struct sockaddr_storage last_peername;
struct list_head accepted_head;
struct scoutfs_net_connection *listening_conn;
@@ -99,27 +99,44 @@ enum conn_flags {
CONN_FL_reconn_freeing = (1UL << 6), /* waiting done, setter frees */
};
#define SIN_FMT "%pIS:%u"
#define SIN_ARG(sin) sin, be16_to_cpu((sin)->sin_port)
#define SIN_FMT "%pISpc"
#define SIN_ARG(sin) sin
static inline void scoutfs_addr_to_sin(struct sockaddr_in *sin,
static inline void scoutfs_addr_to_sin(struct sockaddr_storage *sin,
union scoutfs_inet_addr *addr)
{
BUG_ON(addr->v4.family != cpu_to_le16(SCOUTFS_AF_IPV4));
sin->sin_family = AF_INET;
sin->sin_addr.s_addr = cpu_to_be32(le32_to_cpu(addr->v4.addr));
sin->sin_port = cpu_to_be16(le16_to_cpu(addr->v4.port));
if (addr->v4.family == cpu_to_le16(SCOUTFS_AF_IPV4)) {
struct sockaddr_in *sin4 = (struct sockaddr_in *)sin;
memset(sin, 0, sizeof(struct sockaddr_storage));
sin4->sin_family = AF_INET;
sin4->sin_addr.s_addr = cpu_to_be32(le32_to_cpu(addr->v4.addr));
sin4->sin_port = cpu_to_be16(le16_to_cpu(addr->v4.port));
} else if (addr->v6.family == cpu_to_le16(SCOUTFS_AF_IPV6)) {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sin;
memset(sin, 0, sizeof(struct sockaddr_storage));
sin6->sin6_family = AF_INET6;
memcpy(&sin6->sin6_addr.in6_u.u6_addr8, &addr->v6.addr, 16);
sin6->sin6_port = cpu_to_be16(le16_to_cpu(addr->v6.port));
} else
BUG();
}
static inline void scoutfs_sin_to_addr(union scoutfs_inet_addr *addr, struct sockaddr_in *sin)
static inline void scoutfs_sin_to_addr(union scoutfs_inet_addr *addr, struct sockaddr_storage *sin)
{
BUG_ON(sin->sin_family != AF_INET);
memset(addr, 0, sizeof(union scoutfs_inet_addr));
addr->v4.family = cpu_to_le16(SCOUTFS_AF_IPV4);
addr->v4.addr = be32_to_le32(sin->sin_addr.s_addr);
addr->v4.port = be16_to_le16(sin->sin_port);
if (sin->ss_family == AF_INET) {
struct sockaddr_in *sin4 = (struct sockaddr_in *)sin;
memset(addr, 0, sizeof(union scoutfs_inet_addr));
addr->v4.family = cpu_to_le16(SCOUTFS_AF_IPV4);
addr->v4.addr = be32_to_le32(sin4->sin_addr.s_addr);
addr->v4.port = be16_to_le16(sin4->sin_port);
} else if (sin->ss_family == AF_INET6) {
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)sin;
memset(addr, 0, sizeof(union scoutfs_inet_addr));
addr->v6.family = cpu_to_le16(SCOUTFS_AF_IPV6);
memcpy(&addr->v6.addr, &sin6->sin6_addr.in6_u.u6_addr8, 16);
addr->v6.port = be16_to_le16(sin6->sin6_port);
} else
BUG();
}
struct scoutfs_net_connection *
@@ -130,10 +147,10 @@ scoutfs_net_alloc_conn(struct super_block *sb,
u64 scoutfs_net_client_rid(struct scoutfs_net_connection *conn);
int scoutfs_net_connect(struct super_block *sb,
struct scoutfs_net_connection *conn,
struct sockaddr_in *sin, unsigned long timeout_ms);
struct sockaddr_storage *sin, unsigned long timeout_ms);
int scoutfs_net_bind(struct super_block *sb,
struct scoutfs_net_connection *conn,
struct sockaddr_in *sin);
struct sockaddr_storage *sin);
void scoutfs_net_listen(struct super_block *sb,
struct scoutfs_net_connection *conn);
int scoutfs_net_submit_request(struct super_block *sb,
+139 -44
View File
@@ -145,14 +145,26 @@ struct quorum_info {
#define DECLARE_QUORUM_INFO_KOBJ(kobj, name) \
DECLARE_QUORUM_INFO(SCOUTFS_SYSFS_ATTRS_SB(kobj), name)
static bool quorum_slot_present(struct scoutfs_quorum_config *qconf, int i)
static bool quorum_slot_ipv4(struct scoutfs_quorum_config *qconf, int i)
{
BUG_ON(i < 0 || i > SCOUTFS_QUORUM_MAX_SLOTS);
return qconf->slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4);
}
static void quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin)
static bool quorum_slot_ipv6(struct scoutfs_quorum_config *qconf, int i)
{
BUG_ON(i < 0 || i > SCOUTFS_QUORUM_MAX_SLOTS);
return qconf->slots[i].addr.v6.family == cpu_to_le16(SCOUTFS_AF_IPV6);
}
static bool quorum_slot_present(struct scoutfs_quorum_config *qconf, int i)
{
return quorum_slot_ipv4(qconf, i) || quorum_slot_ipv6(qconf, i);
}
static void quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_storage *sin)
{
BUG_ON(i < 0 || i >= SCOUTFS_QUORUM_MAX_SLOTS);
@@ -179,11 +191,18 @@ static int create_socket(struct super_block *sb)
{
DECLARE_QUORUM_INFO(sb, qinf);
struct socket *sock = NULL;
struct sockaddr_in sin;
struct sockaddr_storage sin;
struct scoutfs_quorum_slot slot = qinf->qconf.slots[qinf->our_quorum_slot_nr];
int addrlen;
int ret;
ret = sock_create_kern(&init_net, PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
if (le16_to_cpu(slot.addr.v4.family) == SCOUTFS_AF_IPV4)
ret = kc_sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
else if (le16_to_cpu(slot.addr.v6.family) == SCOUTFS_AF_IPV6)
ret = kc_sock_create_kern(PF_INET6, SOCK_DGRAM, IPPROTO_UDP, &sock);
else
BUG();
if (ret) {
scoutfs_err(sb, "quorum couldn't create udp socket: %d", ret);
goto out;
@@ -192,9 +211,9 @@ static int create_socket(struct super_block *sb)
/* rather fail and retry than block waiting for free */
sock->sk->sk_allocation = GFP_ATOMIC;
addrlen = (le16_to_cpu(slot.addr.v4.family) == SCOUTFS_AF_IPV4) ?
sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
quorum_slot_sin(&qinf->qconf, qinf->our_quorum_slot_nr, &sin);
addrlen = sizeof(sin);
ret = kernel_bind(sock, (struct sockaddr *)&sin, addrlen);
if (ret) {
scoutfs_err(sb, "quorum failed to bind udp socket to "SIN_FMT": %d",
@@ -241,7 +260,7 @@ static int send_msg_members(struct super_block *sb, int type, u64 term, int only
.iov_base = &qmes,
.iov_len = sizeof(qmes),
};
struct sockaddr_in sin;
struct sockaddr_storage sin;
struct msghdr mh = {
.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
.msg_name = &sin,
@@ -542,10 +561,11 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_c
u64 term)
{
#define NR_OLD 2
struct scoutfs_quorum_block_event old[SCOUTFS_QUORUM_MAX_SLOTS][NR_OLD] = {{{0,}}};
struct scoutfs_quorum_block_event (*old)[NR_OLD];
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_quorum_block blk;
struct sockaddr_in sin;
struct sockaddr_storage sin;
union scoutfs_inet_addr addr;
const __le64 lefsid = cpu_to_le64(sbi->fsid);
const u64 rid = sbi->rid;
bool fence_started = false;
@@ -558,13 +578,20 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_c
BUILD_BUG_ON(SCOUTFS_QUORUM_BLOCKS < SCOUTFS_QUORUM_MAX_SLOTS);
old = kmalloc(NR_OLD * SCOUTFS_QUORUM_MAX_SLOTS * sizeof(struct scoutfs_quorum_block_event), GFP_KERNEL);
if (!old) {
ret = -ENOMEM;
goto out;
}
memset(old, 0, NR_OLD * SCOUTFS_QUORUM_MAX_SLOTS * sizeof(struct scoutfs_quorum_block_event));
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
if (!quorum_slot_present(qconf, i))
continue;
ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + i, &blk, false);
if (ret < 0)
goto out;
goto out_free;
/* elected leader still running */
if (le64_to_cpu(blk.events[SCOUTFS_QUORUM_EVENT_ELECT].term) >
@@ -598,14 +625,17 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_c
scoutfs_info(sb, "fencing previous leader "SCSBF" at term %llu in slot %u with address "SIN_FMT,
SCSB_LEFR_ARGS(lefsid, fence_rid),
le64_to_cpu(old[i][j].term), i, SIN_ARG(&sin));
ret = scoutfs_fence_start(sb, le64_to_cpu(fence_rid), sin.sin_addr.s_addr,
scoutfs_sin_to_addr(&addr, &sin);
ret = scoutfs_fence_start(sb, le64_to_cpu(fence_rid), &addr,
SCOUTFS_FENCE_QUORUM_BLOCK_LEADER);
if (ret < 0)
goto out;
goto out_free;
fence_started = true;
}
}
out_free:
kfree(old);
out:
err = scoutfs_fence_wait_fenced(sb, msecs_to_jiffies(SCOUTFS_QUORUM_FENCE_TO_MS));
if (ret == 0)
@@ -708,7 +738,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
struct quorum_info *qinf = container_of(work, struct quorum_info, work);
struct scoutfs_mount_options opts;
struct super_block *sb = qinf->sb;
struct sockaddr_in unused;
struct sockaddr_storage unused;
struct quorum_host_msg msg;
struct quorum_status qst = {0,};
struct hb_recording hbr;
@@ -990,7 +1020,7 @@ out:
* leader with the greatest elected term. If we get it wrong the
* connection will timeout and the client will try again.
*/
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin)
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_storage *sin)
{
struct scoutfs_super_block *super = NULL;
struct scoutfs_quorum_block blk;
@@ -1049,7 +1079,7 @@ u8 scoutfs_quorum_votes_needed(struct super_block *sb)
return qinf->votes_needed;
}
void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin)
void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_storage *sin)
{
return quorum_slot_sin(qconf, i, sin);
}
@@ -1208,8 +1238,13 @@ static int verify_quorum_slots(struct super_block *sb, struct quorum_info *qinf,
struct scoutfs_quorum_config *qconf)
{
char slots[(SCOUTFS_QUORUM_MAX_SLOTS * 3) + 1];
struct sockaddr_in other;
struct sockaddr_in sin;
struct sockaddr_storage other;
struct sockaddr_storage sin;
struct sockaddr_in *sin4;
struct sockaddr_in *other4;
struct sockaddr_in6 *sin6;
struct sockaddr_in6 *other6;
__le16 family = cpu_to_le16(SCOUTFS_AF_NONE);
int found = 0;
int ret;
int i;
@@ -1220,35 +1255,94 @@ static int verify_quorum_slots(struct super_block *sb, struct quorum_info *qinf,
if (!quorum_slot_present(qconf, i))
continue;
scoutfs_quorum_slot_sin(qconf, i, &sin);
if (!valid_ipv4_unicast(sin.sin_addr.s_addr)) {
scoutfs_err(sb, "quorum slot #%d has invalid ipv4 unicast address: "SIN_FMT,
i, SIN_ARG(&sin));
return -EINVAL;
}
if (!valid_ipv4_port(sin.sin_port)) {
scoutfs_err(sb, "quorum slot #%d has invalid ipv4 port number:"SIN_FMT,
i, SIN_ARG(&sin));
return -EINVAL;
}
for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) {
if (!quorum_slot_present(qconf, j))
continue;
scoutfs_quorum_slot_sin(qconf, j, &other);
if (sin.sin_addr.s_addr == other.sin_addr.s_addr &&
sin.sin_port == other.sin_port) {
scoutfs_err(sb, "quorum slots #%u and #%u have the same address: "SIN_FMT,
i, j, SIN_ARG(&sin));
if (quorum_slot_ipv4(qconf, i)) {
if (family == cpu_to_le16(SCOUTFS_AF_NONE)) {
family = cpu_to_le16(SCOUTFS_AF_IPV4);
} else if (family != cpu_to_le16(SCOUTFS_AF_IPV4)) {
scoutfs_err(sb, "quorum slot #%d is IPv4 but earlier slots are IPv6; mixed IPv4/IPv6 quorum is not supported",
i);
return -EINVAL;
}
}
found++;
scoutfs_quorum_slot_sin(qconf, i, &sin);
sin4 = (struct sockaddr_in *)&sin;
if (!valid_ipv4_unicast(sin4->sin_addr.s_addr)) {
scoutfs_err(sb, "quorum slot #%d has invalid ipv4 unicast address: "SIN_FMT,
i, SIN_ARG(&sin));
return -EINVAL;
}
if (!valid_ipv4_port(sin4->sin_port)) {
scoutfs_err(sb, "quorum slot #%d has invalid ipv4 port number:"SIN_FMT,
i, SIN_ARG(&sin));
return -EINVAL;
}
for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) {
if (!quorum_slot_ipv4(qconf, j))
continue;
scoutfs_quorum_slot_sin(qconf, j, &other);
other4 = (struct sockaddr_in *)&other;
if (sin4->sin_addr.s_addr == other4->sin_addr.s_addr &&
sin4->sin_port == other4->sin_port) {
scoutfs_err(sb, "quorum slots #%u and #%u have the same address: "SIN_FMT,
i, j, SIN_ARG(&sin));
return -EINVAL;
}
}
found++;
} else if (quorum_slot_ipv6(qconf, i)) {
if (family == cpu_to_le16(SCOUTFS_AF_NONE)) {
family = cpu_to_le16(SCOUTFS_AF_IPV6);
} else if (family != cpu_to_le16(SCOUTFS_AF_IPV6)) {
scoutfs_err(sb, "quorum slot #%d is IPv6 but earlier slots are IPv4; mixed IPv4/IPv6 quorum is not supported",
i);
return -EINVAL;
}
quorum_slot_sin(qconf, i, &sin);
sin6 = (struct sockaddr_in6 *)&sin;
if ((sin6->sin6_addr.in6_u.u6_addr32[0] == 0) && (sin6->sin6_addr.in6_u.u6_addr32[1] == 0) &&
(sin6->sin6_addr.in6_u.u6_addr32[2] == 0) && (sin6->sin6_addr.in6_u.u6_addr32[3] == 0)) {
scoutfs_err(sb, "quorum slot #%d has unspecified ipv6 address:"SIN_FMT,
i, SIN_ARG(&sin));
return -EINVAL;
}
if (sin6->sin6_addr.in6_u.u6_addr8[0] == 0xff) {
scoutfs_err(sb, "quorum slot #%d has multicast ipv6 address:"SIN_FMT,
i, SIN_ARG(&sin));
return -EINVAL;
}
if (!valid_ipv4_port(sin6->sin6_port)) {
scoutfs_err(sb, "quorum slot #%d has invalid ipv6 port number:"SIN_FMT,
i, SIN_ARG(&sin));
return -EINVAL;
}
for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) {
if (!quorum_slot_ipv6(qconf, j))
continue;
quorum_slot_sin(qconf, j, &other);
other6 = (struct sockaddr_in6 *)&other;
if ((ipv6_addr_equal(&sin6->sin6_addr, &other6->sin6_addr)) &&
(sin6->sin6_port == other6->sin6_port)) {
scoutfs_err(sb, "quorum slots #%u and #%u have the same address: "SIN_FMT,
i, j, SIN_ARG(&sin));
return -EINVAL;
}
}
found++;
}
}
if (found == 0) {
@@ -1332,7 +1426,8 @@ int scoutfs_quorum_setup(struct super_block *sb)
/* a high priority single threaded context without mem reclaim */
qinf->workq = alloc_workqueue("scoutfs_quorum_work",
WQ_UNBOUND | WQ_HIGHPRI, 1);
WQ_NON_REENTRANT | WQ_UNBOUND |
WQ_HIGHPRI, 1);
if (!qinf->workq) {
ret = -ENOMEM;
goto out;
+2 -2
View File
@@ -1,11 +1,11 @@
#ifndef _SCOUTFS_QUORUM_H_
#define _SCOUTFS_QUORUM_H_
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin);
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_storage *sin);
u8 scoutfs_quorum_votes_needed(struct super_block *sb);
void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i,
struct sockaddr_in *sin);
struct sockaddr_storage *sin);
int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_config *qconf,
u64 term);
+1 -1
View File
@@ -134,7 +134,7 @@ static int recov_finished(struct recov_info *recinf)
static void timer_callback(struct timer_list *timer)
{
struct recov_info *recinf = timer_container_of(recinf, timer, timer);
struct recov_info *recinf = from_timer(recinf, timer, timer);
recinf->timeout_fn(recinf->sb);
}
+21 -19
View File
@@ -1355,35 +1355,37 @@ DEFINE_EVENT(scoutfs_lock_class, scoutfs_lock_shrink,
);
DECLARE_EVENT_CLASS(scoutfs_net_class,
TP_PROTO(struct super_block *sb, struct sockaddr_in *name,
struct sockaddr_in *peer, struct scoutfs_net_header *nh),
TP_PROTO(struct super_block *sb, struct sockaddr_storage *name,
struct sockaddr_storage *peer, struct scoutfs_net_header *nh),
TP_ARGS(sb, name, peer, nh),
TP_STRUCT__entry(
SCSB_TRACE_FIELDS
si4_trace_define(name)
si4_trace_define(peer)
__field_struct(struct sockaddr_storage, name)
__field_struct(struct sockaddr_storage, peer)
snh_trace_define(nh)
),
TP_fast_assign(
SCSB_TRACE_ASSIGN(sb);
si4_trace_assign(name, name);
si4_trace_assign(peer, peer);
memcpy(&__entry->name, name, sizeof(struct sockaddr_storage));
memcpy(&__entry->peer, peer, sizeof(struct sockaddr_storage));
snh_trace_assign(nh, nh);
),
TP_printk(SCSBF" name "SI4_FMT" peer "SI4_FMT" nh "SNH_FMT,
SCSB_TRACE_ARGS, si4_trace_args(name), si4_trace_args(peer),
TP_printk(SCSBF" name "SIN_FMT" peer "SIN_FMT" nh "SNH_FMT,
SCSB_TRACE_ARGS,
&__entry->name,
&__entry->peer,
snh_trace_args(nh))
);
DEFINE_EVENT(scoutfs_net_class, scoutfs_net_send_message,
TP_PROTO(struct super_block *sb, struct sockaddr_in *name,
struct sockaddr_in *peer, struct scoutfs_net_header *nh),
TP_PROTO(struct super_block *sb, struct sockaddr_storage *name,
struct sockaddr_storage *peer, struct scoutfs_net_header *nh),
TP_ARGS(sb, name, peer, nh)
);
DEFINE_EVENT(scoutfs_net_class, scoutfs_net_recv_message,
TP_PROTO(struct super_block *sb, struct sockaddr_in *name,
struct sockaddr_in *peer, struct scoutfs_net_header *nh),
TP_PROTO(struct super_block *sb, struct sockaddr_storage *name,
struct sockaddr_storage *peer, struct scoutfs_net_header *nh),
TP_ARGS(sb, name, peer, nh)
);
@@ -1416,8 +1418,8 @@ DECLARE_EVENT_CLASS(scoutfs_net_conn_class,
__field(void *, sock)
__field(__u64, c_rid)
__field(__u64, greeting_id)
si4_trace_define(sockname)
si4_trace_define(peername)
__field_struct(struct sockaddr_storage, sockname)
__field_struct(struct sockaddr_storage, peername)
__field(unsigned char, e_accepted_head)
__field(void *, listening_conn)
__field(unsigned char, e_accepted_list)
@@ -1435,8 +1437,8 @@ DECLARE_EVENT_CLASS(scoutfs_net_conn_class,
__entry->sock = conn->sock;
__entry->c_rid = conn->rid;
__entry->greeting_id = conn->greeting_id;
si4_trace_assign(sockname, &conn->sockname);
si4_trace_assign(peername, &conn->peername);
memcpy(&__entry->sockname, &conn->sockname, sizeof(struct sockaddr_storage));
memcpy(&__entry->peername, &conn->peername, sizeof(struct sockaddr_storage));
__entry->e_accepted_head = !!list_empty(&conn->accepted_head);
__entry->listening_conn = conn->listening_conn;
__entry->e_accepted_list = !!list_empty(&conn->accepted_list);
@@ -1446,7 +1448,7 @@ DECLARE_EVENT_CLASS(scoutfs_net_conn_class,
__entry->e_resend_queue = !!list_empty(&conn->resend_queue);
__entry->recv_seq = atomic64_read(&conn->recv_seq);
),
TP_printk(SCSBF" flags %s rc_dl %lu cto %lu sk %p rid %llu grid %llu sn "SI4_FMT" pn "SI4_FMT" eah %u lc %p eal %u nss %llu nsi %llu esq %u erq %u rs %llu",
TP_printk(SCSBF" flags %s rc_dl %lu cto %lu sk %p rid %llu grid %llu sn "SIN_FMT" pn "SIN_FMT" eah %u lc %p eal %u nss %llu nsi %llu esq %u erq %u rs %llu",
SCSB_TRACE_ARGS,
print_conn_flags(__entry->flags),
__entry->reconn_deadline,
@@ -1454,8 +1456,8 @@ DECLARE_EVENT_CLASS(scoutfs_net_conn_class,
__entry->sock,
__entry->c_rid,
__entry->greeting_id,
si4_trace_args(sockname),
si4_trace_args(peername),
&__entry->sockname,
&__entry->peername,
__entry->e_accepted_head,
__entry->listening_conn,
__entry->e_accepted_list,
+5 -5
View File
@@ -3639,7 +3639,7 @@ static bool invalid_mounted_client_item(struct scoutfs_btree_item_ref *iref)
* it's acceptable to see -EEXIST.
*/
static int insert_mounted_client(struct super_block *sb, u64 rid, u64 gr_flags,
struct sockaddr_in *sin)
struct sockaddr_storage *sin)
{
DECLARE_SERVER_INFO(sb, server);
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
@@ -4392,7 +4392,7 @@ static void fence_pending_recov_worker(struct work_struct *work)
break;
}
ret = scoutfs_fence_start(sb, rid, le32_to_be32(addr.v4.addr),
ret = scoutfs_fence_start(sb, rid, &addr,
SCOUTFS_FENCE_CLIENT_RECOVERY);
if (ret < 0) {
scoutfs_err(sb, "fence returned err %d, shutting down server", ret);
@@ -4543,7 +4543,7 @@ static void scoutfs_server_worker(struct work_struct *work)
struct scoutfs_net_connection *conn = NULL;
struct scoutfs_mount_options opts;
DECLARE_WAIT_QUEUE_HEAD(waitq);
struct sockaddr_in sin;
struct sockaddr_storage sin;
bool alloc_init = false;
u64 max_seq;
int ret;
@@ -4552,7 +4552,7 @@ static void scoutfs_server_worker(struct work_struct *work)
scoutfs_options_read(sb, &opts);
scoutfs_quorum_slot_sin(&server->qconf, opts.quorum_slot_nr, &sin);
scoutfs_info(sb, "server starting at "SIN_FMT, SIN_ARG(&sin));
scoutfs_info(sb, "server starting at "SIN_FMT, &sin);
scoutfs_block_writer_init(sb, &server->wri);
server->finalize_sent_seq = 0;
@@ -4751,7 +4751,7 @@ int scoutfs_server_setup(struct super_block *sb)
INIT_DELAYED_WORK(&server->reclaim_dwork, reclaim_worker);
server->wq = alloc_workqueue("scoutfs_server",
WQ_UNBOUND, 0);
WQ_UNBOUND | WQ_NON_REENTRANT, 0);
if (!server->wq) {
kfree(server);
return -ENOMEM;
-21
View File
@@ -1,27 +1,6 @@
#ifndef _SCOUTFS_SERVER_H_
#define _SCOUTFS_SERVER_H_
#define SI4_FMT "%u.%u.%u.%u:%u"
#define si4_trace_define(name) \
__field(__u32, name##_addr) \
__field(__u16, name##_port)
#define si4_trace_assign(name, sin) \
do { \
__typeof__(sin) _sin = (sin); \
\
__entry->name##_addr = be32_to_cpu(_sin->sin_addr.s_addr); \
__entry->name##_port = be16_to_cpu(_sin->sin_port); \
} while(0)
#define si4_trace_args(name) \
(__entry->name##_addr >> 24), \
(__entry->name##_addr >> 16) & 255, \
(__entry->name##_addr >> 8) & 255, \
__entry->name##_addr & 255, \
__entry->name##_port
#define SNH_FMT \
"seq %llu recv_seq %llu id %llu data_len %u cmd %u flags 0x%x error %u"
#define SNH_ARG(nh) \
+2 -1
View File
@@ -2392,7 +2392,8 @@ int scoutfs_srch_setup(struct super_block *sb)
goto out;
srinf->workq = alloc_workqueue("scoutfs_srch_compact",
WQ_UNBOUND | WQ_HIGHPRI, 0);
WQ_NON_REENTRANT | WQ_UNBOUND |
WQ_HIGHPRI, 0);
if (!srinf->workq) {
ret = -ENOMEM;
goto out;
+12
View File
@@ -46,6 +46,7 @@ static struct scoutfs_tseq_entry *tseq_rb_next(struct scoutfs_tseq_entry *ent)
return rb_entry(node, struct scoutfs_tseq_entry, node);
}
#ifdef KC_RB_TREE_AUGMENTED_COMPUTE_MAX
static bool tseq_compute_total(struct scoutfs_tseq_entry *ent, bool exit)
{
loff_t total = 1 + tseq_node_total(ent->node.rb_left) +
@@ -60,6 +61,17 @@ static bool tseq_compute_total(struct scoutfs_tseq_entry *ent, bool exit)
RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
node, total, tseq_compute_total);
#else
static loff_t tseq_compute_total(struct scoutfs_tseq_entry *ent)
{
return 1 + tseq_node_total(ent->node.rb_left) +
tseq_node_total(ent->node.rb_right);
}
RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
node, loff_t, total, tseq_compute_total);
#endif
void scoutfs_tseq_tree_init(struct scoutfs_tseq_tree *tree,
scoutfs_tseq_show_t show)
+35 -1
View File
@@ -16,7 +16,6 @@
#include <linux/xattr.h>
#include <linux/crc32c.h>
#include <linux/posix_acl.h>
#include <linux/iversion.h>
#include "format.h"
#include "inode.h"
@@ -995,17 +994,38 @@ unlock:
return ret;
}
#ifndef KC_XATTR_STRUCT_XATTR_HANDLER
/*
* Future kernels have this amazing hack to rewind the name to get the
* skipped prefix. We're back in the stone ages without the handler
* arg, so we Just Know that this is possible. This will become a
* compat hook to either call the kernel's xattr_full_name(handler), or
* our hack to use the flags as the prefix length.
*/
static const char *full_name_hack(const char *name, int len)
{
return name - len;
}
#endif
static int scoutfs_xattr_get_handler
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
(const struct xattr_handler *handler, struct dentry *dentry,
struct inode *inode, const char *name, void *value,
size_t size)
{
name = xattr_full_name(handler, name);
#else
(struct dentry *dentry, const char *name,
void *value, size_t size, int handler_flags)
{
name = full_name_hack(name, handler_flags);
#endif
return scoutfs_xattr_get(dentry, name, value, size);
}
static int scoutfs_xattr_set_handler
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
(const struct xattr_handler *handler,
KC_VFS_NS_DEF
struct dentry *dentry,
@@ -1013,6 +1033,12 @@ static int scoutfs_xattr_set_handler
size_t size, int flags)
{
name = xattr_full_name(handler, name);
#else
(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags, int handler_flags)
{
name = full_name_hack(name, handler_flags);
#endif
return scoutfs_xattr_set(dentry, name, value, size, flags);
}
@@ -1045,14 +1071,22 @@ static const struct xattr_handler scoutfs_xattr_security_handler = {
};
static const struct xattr_handler scoutfs_xattr_acl_access_handler = {
#ifdef KC_XATTR_HANDLER_NAME
.name = XATTR_NAME_POSIX_ACL_ACCESS,
#else
.prefix = XATTR_NAME_POSIX_ACL_ACCESS,
#endif
.flags = ACL_TYPE_ACCESS,
.get = scoutfs_acl_get_xattr,
.set = scoutfs_acl_set_xattr,
};
static const struct xattr_handler scoutfs_xattr_acl_default_handler = {
#ifdef KC_XATTR_HANDLER_NAME
.name = XATTR_NAME_POSIX_ACL_DEFAULT,
#else
.prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
#endif
.flags = ACL_TYPE_DEFAULT,
.get = scoutfs_acl_get_xattr,
.set = scoutfs_acl_set_xattr,
-7
View File
@@ -171,13 +171,6 @@ t_filter_dmesg()
# orphan log trees reclaim is handled, not an error
re="$re|scoutfs .* reclaiming orphan log trees"
# nfs can emit a whole range of messages we can ignore
re="$re|Installing knfsd .*"
re="$re|nfsd: .*"
re="$re|NFSD: .*"
re="$re|RPC: .*"
re="$re|FS-Cache: .*"
# fencing tests force unmounts and trigger timeouts
re="$re|scoutfs .* forcing unmount"
re="$re|scoutfs .* reconnect timed out"
+7
View File
@@ -0,0 +1,7 @@
== mkfs rejects mixed v4/v6 quorum
rc: 64
== mkfs all-v4, mount three members, cross-mount signature visible
== change-quorum-config rejects mixed v4/v6 quorum
rc: 64
== switch v4 -> v6, signature survives, cross-mount write again
== switch v6 -> v4, signatures survive
-32
View File
@@ -1,32 +0,0 @@
== write via NFS, read both sides
== POSIX ACL set via NFS, read both sides
user::rw-
user:22222:rw-
group::r--
mask::rw-
other::r--
user::rw-
user:22222:rw-
group::r--
mask::rw-
other::r--
== POSIX ACL set on scoutfs, read via NFS
user::rw-
user:22222:rw-
group::r--
group:44444:r--
mask::rw-
other::r--
== default ACL inheritance via NFS
user::rw-
user:22222:rwx #effective:rw-
group::r-x #effective:r--
mask::rw-
other::r--
== NFS read demand-stages a released file
1
== cleanup
+6 -6
View File
@@ -8,10 +8,10 @@
/mnt/test/test/data-prealloc/file-1: extents: 32
/mnt/test/test/data-prealloc/file-2: extents: 32
== any writes to region prealloc get full extents
/mnt/test/test/data-prealloc/file-1: extents: 8
/mnt/test/test/data-prealloc/file-2: extents: 8
/mnt/test/test/data-prealloc/file-1: extents: 8
/mnt/test/test/data-prealloc/file-2: extents: 8
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
== streaming offline writes get full extents either way
/mnt/test/test/data-prealloc/file-1: extents: 4
/mnt/test/test/data-prealloc/file-2: extents: 4
@@ -20,8 +20,8 @@
== goofy preallocation amounts work
/mnt/test/test/data-prealloc/file-1: extents: 6
/mnt/test/test/data-prealloc/file-2: extents: 6
/mnt/test/test/data-prealloc/file-1: extents: 10
/mnt/test/test/data-prealloc/file-2: extents: 10
/mnt/test/test/data-prealloc/file-1: extents: 6
/mnt/test/test/data-prealloc/file-2: extents: 6
/mnt/test/test/data-prealloc/file-1: extents: 3
/mnt/test/test/data-prealloc/file-2: extents: 3
== block writes into region allocs hole
+4
View File
@@ -0,0 +1,4 @@
== ensuring utils and module for old versions
== unmounting test fs and removing test module
== testing combinations of old and new format versions
== restoring test module and mount
+1 -1
View File
@@ -383,7 +383,7 @@ fi
quo=""
if [ -n "$T_MKFS" ]; then
for i in $(seq -0 $((T_QUORUM - 1))); do
quo="$quo -Q $i,127.0.0.1,$((T_TEST_PORT + i))"
quo="$quo -Q $i,::1,$((T_TEST_PORT + i))"
done
msg "making new filesystem with $T_QUORUM quorum members"
+2 -1
View File
@@ -1,9 +1,9 @@
export-get-name-parent.sh
basic-block-counts.sh
basic-bad-mounts.sh
basic-inetaddr.sh
basic-posix-acl.sh
basic-acl-consistency.sh
basic-nfs.sh
inode-items-updated.sh
simple-inode-index.sh
simple-staging.sh
@@ -19,6 +19,7 @@ offline-extent-waiting.sh
move-blocks.sh
projects.sh
large-fragmented-free.sh
format-version-forward-back.sh
enospc.sh
mmap.sh
srch-safe-merge-pos.sh
+78
View File
@@ -0,0 +1,78 @@
#
# Test that mixed ipv4/6 fails through mkfs/quorum change and that
# users can migrate from ipv4 to v6 and back.
#
t_require_commands dmsetup blockdev cmp
P0=$T_SCRATCH_PORT
P1=$((T_SCRATCH_PORT + 1))
P2=$((T_SCRATCH_PORT + 2))
SIG=$T_TMP.sig
seq 1 4096 > "$SIG"
trap '
umount $T_TMPDIR/m0 $T_TMPDIR/m1 $T_TMPDIR/m2 2>/dev/null
dmsetup remove _bia_m0 _bia_m1 _bia_m2 _bia_d0 _bia_d1 _bia_d2 2>/dev/null
' EXIT
mkdir -p "$T_TMPDIR/m0" "$T_TMPDIR/m1" "$T_TMPDIR/m2"
for nv in "m0 $T_EX_META_DEV" "m1 $T_EX_META_DEV" "m2 $T_EX_META_DEV" \
"d0 $T_EX_DATA_DEV" "d1 $T_EX_DATA_DEV" "d2 $T_EX_DATA_DEV"; do
set -- $nv
t_quiet dmsetup create _bia_$1 --table "0 $(blockdev --getsz $2) linear $2 0"
done
mnt() {
mount -t scoutfs \
-o metadev_path=/dev/mapper/_bia_m$1,quorum_slot_nr=$1 \
/dev/mapper/_bia_d$1 "$T_TMPDIR/m$1"
}
mount_all() {
mnt 0 &
mnt 1 &
mnt 2 &
wait
}
umount_all() {
umount $T_TMPDIR/m0 &
umount $T_TMPDIR/m1 &
umount $T_TMPDIR/m2 &
wait
}
verify() {
cmp -s "$SIG" "$T_TMPDIR/m0/sig" &&
cmp -s "$SIG" "$T_TMPDIR/m1/sig" &&
cmp -s "$SIG" "$T_TMPDIR/m2/sig" || t_fail "$1"
}
echo "== mkfs rejects mixed v4/v6 quorum"
t_rc scoutfs mkfs -f -Q 0,127.0.0.1,$P0 -Q 1,::1,$P1 -Q 2,127.0.0.1,$P2 /dev/mapper/_bia_m0 /dev/mapper/_bia_d0
echo "== mkfs all-v4, mount three members, cross-mount signature visible"
t_quiet scoutfs mkfs -f -Q 0,127.0.0.1,$P0 -Q 1,127.0.0.1,$P1 -Q 2,127.0.0.1,$P2 /dev/mapper/_bia_m0 /dev/mapper/_bia_d0
mount_all
cp "$SIG" "$T_TMPDIR/m0/sig"
verify "v4 initial"
umount_all
echo "== change-quorum-config rejects mixed v4/v6 quorum"
t_rc scoutfs change-quorum-config --offline -Q 0,127.0.0.1,$P0 -Q 1,::1,$P1 -Q 2,127.0.0.1,$P2 /dev/mapper/_bia_m0
echo "== switch v4 -> v6, signature survives, cross-mount write again"
t_quiet scoutfs change-quorum-config --offline -Q 0,::1,$P0 -Q 1,::1,$P1 -Q 2,::1,$P2 /dev/mapper/_bia_m0
mount_all
verify "after v4->v6"
cp "$SIG" "$T_TMPDIR/m1/sig-v6"
cmp -s "$SIG" "$T_TMPDIR/m0/sig-v6" || t_fail "v6 cross-mount write not visible on m0"
cmp -s "$SIG" "$T_TMPDIR/m2/sig-v6" || t_fail "v6 cross-mount write not visible on m2"
umount_all
echo "== switch v6 -> v4, signatures survive"
t_quiet scoutfs change-quorum-config --offline -Q 0,127.0.0.1,$P0 -Q 1,127.0.0.1,$P1 -Q 2,127.0.0.1,$P2 /dev/mapper/_bia_m0
mount_all
verify "after v6->v4"
cmp -s "$SIG" "$T_TMPDIR/m0/sig-v6" || t_fail "after v6->v4 sig-v6 lost"
umount_all
t_pass
-86
View File
@@ -1,86 +0,0 @@
#
# Test basic scoutfs-nfs interactions:
# - read/write
# - stage/release and data wait
# - nfs setacl/getacl mapping
#
t_require_commands scoutfs setfacl getfacl exportfs mount.nfs umount \
stat dd cmp systemctl
systemctl start nfs-server >> "$T_TMPDIR/nfs.log" 2>&1 || \
t_skip "nfs-server not available"
# Keep file creation modes deterministic for the ACL golden output.
umask 022
EXPORT_OPTS="rw,async,no_root_squash,no_subtree_check,fsid=42"
NFS_MNT="$T_TMP.nfs"
NFS_DIR="$NFS_MNT/test/basic-nfs"
filter() { sed "s@$T_TMPDIR@T_TMPDIR@g" | t_filter_fs; }
gf() { getfacl -n --omit-header "$@" 2>/dev/null; }
teardown_nfs()
{
(
umount "$NFS_MNT"
exportfs -u "127.0.0.1:$T_M0"
exportfs -f
systemctl stop nfs-server
rmdir "$NFS_MNT"
) >> "$T_TMPDIR/nfs.log" 2>&1
}
trap teardown_nfs EXIT
exportfs -u "127.0.0.1:$T_M0" >> "$T_TMPDIR/nfs.log" 2>&1 || true
t_quiet mkdir -p "$NFS_MNT"
exportfs -o "$EXPORT_OPTS" "127.0.0.1:$T_M0" >> "$T_TMPDIR/nfs.log" 2>&1
mount.nfs -o vers=3,noac,actimeo=0 "127.0.0.1:$T_M0" "$NFS_MNT" >> "$T_TMPDIR/nfs.log" 2>&1
test -d "$NFS_DIR" || t_fail "test dir $NFS_DIR not visible over NFS"
echo "== write via NFS, read both sides"
dd if=/dev/urandom bs=4096 count=1 of="$T_TMP.data" status=none
cp "$T_TMP.data" "$NFS_DIR/file"
cmp "$T_TMP.data" "$T_D0/file"
cmp "$T_TMP.data" "$NFS_DIR/file"
echo "== POSIX ACL set via NFS, read both sides"
setfacl -m u:22222:rw "$NFS_DIR/file" 2>&1 | filter
gf "$NFS_DIR/file"
gf "$T_D0/file"
echo "== POSIX ACL set on scoutfs, read via NFS"
setfacl -m g:44444:r "$T_D0/file" 2>&1 | filter
gf "$NFS_DIR/file"
echo "== default ACL inheritance via NFS"
mkdir "$NFS_DIR/d"
setfacl -d -m u:22222:rwx "$NFS_DIR/d" 2>&1 | filter
touch "$NFS_DIR/d/child"
gf "$NFS_DIR/d/child"
echo "== NFS read demand-stages a released file"
dd if=/dev/urandom bs=4096 count=1 of="$T_TMP.big" status=none
cp "$T_TMP.big" "$T_D0/big"
sync
vers=$(scoutfs stat -s data_version "$T_D0/big")
t_quiet scoutfs release "$T_D0/big" -V "$vers" -o 0 -l 4K
# NFS read against the offline file blocks in scoutfs_read waiting
# for the data to come back online.
cat "$NFS_DIR/big" > "$T_TMP.read" &
read_pid=$!
sleep 1
scoutfs data-waiting -B 0 -I 0 -p "$T_D0" | wc -l
t_quiet scoutfs stage "$T_TMP.big" "$T_D0/big" -V "$vers" -o 0 -l 4096
wait "$read_pid"
cmp "$T_TMP.big" "$T_TMP.read"
echo "== cleanup"
rm -f "$T_D0/file" "$T_D0/big"
rm -rf "$T_D0/d"
t_pass
+184
View File
@@ -0,0 +1,184 @@
#
# Test our basic ability to work with different format versions.
#
# The current code being tested has a range of supported format
# versions. For each of the older supported format versions we have a
# git hash of the commit before the next greater version was introduced.
# We build versions of the scoutfs utility and kernel module for the
# last commit in tree that had a lesser supported version as its max
# supported version. We use those binaries to test forward and back
# compat as new and old code works with a persistent volume with a given
# format version.
#
# not supported on el8 or higher
if [ $(source /etc/os-release ; echo ${VERSION_ID:0:1}) -gt 7 ]; then
t_skip_permitted "Unsupported OS version"
fi
mount_has_format_version()
{
local mnt="$1"
local vers="$2"
local sysfs_fmt_vers="$(t_sysfs_path_from_mnt $SCR)/format_version"
test "$(cat $sysfs_fmt_vers)" == "$vers"
}
SCR="/mnt/scoutfs.scratch"
MIN=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_min:"){print $2}')
MAX=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_max:"){print $2}')
echo "min: $MIN max: $MAX" > "$T_TMP.log"
test "$MIN" -gt 0 -a "$MAX" -gt 0 -a "$MIN" -le "$MAX" || \
t_fail "parsed bad versions, min: $MIN max: $MAX"
test "$MIN" == "$MAX" && \
t_skip "only one supported format version: $MIN"
# prepare dir and wipe any weird old partial state
builds="$T_RESULTS/format_version_builds"
mkdir -p "$builds"
echo "== ensuring utils and module for old versions"
declare -A commits
commits[1]=c3c4b080
for vers in $(seq $MIN $((MAX - 1))); do
dir="$builds/$vers"
platform=$(uname -rp)
buildmark="$dir/buildmark"
commit="${commits[$vers]}"
test -n "$commit" || \
t_fail "no commit for vers $vers"
# have our files for this version
test "$(cat $buildmark 2>&1)" == "$platform" && \
continue
# build as one big sequence of commands that can return failure
(
set -o pipefail
rm -rf $dir &&
mkdir -p $dir/building &&
cd "$T_TESTS/.." &&
git archive --format=tar "$commit" | tar -C "$dir/building" -xf - &&
cd - &&
find $dir &&
make -C "$dir/building" &&
mv $dir/building/utils/src/scoutfs $dir &&
mv $dir/building/kmod/src/scoutfs.ko $dir &&
rm -rf $dir/building &&
echo "$platform" > $buildmark &&
find $dir &&
cat $buildmark
) >> "$T_TMP.log" 2>&1 || t_fail "version $vers build failed"
done
echo "== unmounting test fs and removing test module"
t_quiet t_umount_all
t_quiet rmmod scoutfs
echo "== testing combinations of old and new format versions"
mkdir -p "$SCR"
for vers in $(seq $MIN $((MAX - 1))); do
old_scoutfs="$builds/$vers/scoutfs"
old_module="$builds/$vers/scoutfs.ko"
echo "mkfs $vers" >> "$T_TMP.log"
t_quiet $old_scoutfs mkfs -f -Q 0,127.0.0.1,$T_SCRATCH_PORT "$T_EX_META_DEV" "$T_EX_DATA_DEV" \
|| t_fail "mkfs $vers failed"
echo "mount $vers with $vers" >> "$T_TMP.log"
t_quiet insmod $old_module
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
t_quiet mount_has_format_version "$SCR" "$vers"
echo "creating files in $vers" >> "$T_TMP.log"
t_quiet touch "$SCR/file-"{1,2,3}
stat "$SCR"/file-* > "$T_TMP.stat" || \
t_fail "stat in $vers failed"
echo "remounting $vers fs with $MAX" >> "$T_TMP.log"
t_quiet umount "$SCR"
rmmod scoutfs
insmod "$T_MODULE"
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
t_quiet mount_has_format_version "$SCR" "$vers"
echo "verifying stat in $vers with $MAX" >> "$T_TMP.log"
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
echo "keep/update/del existing, create new in $vers" >> "$T_TMP.log"
t_quiet touch "$SCR/file-2"
t_quiet rm -f "$SCR/file-3"
t_quiet touch "$SCR/file-4"
stat "$SCR"/file-* > "$T_TMP.stat" || \
t_fail "stat in $vers failed"
echo "remounting $vers fs with $vers" >> "$T_TMP.log"
t_quiet umount "$SCR"
rmmod scoutfs
insmod "$old_module"
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
t_quiet mount_has_format_version "$SCR" "$vers"
echo "verifying stat in $vers with $vers" >> "$T_TMP.log"
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
echo "changing format vers to $MAX" >> "$T_TMP.log"
t_quiet umount "$SCR"
rmmod scoutfs
t_quiet scoutfs change-format-version -F -V $MAX $T_EX_META_DEV "$T_EX_DATA_DEV"
echo "mount fs $MAX with old $vers should fail" >> "$T_TMP.log"
insmod "$old_module"
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR" >> "$T_TMP.log" 2>&1
if [ "$?" == "0" ]; then
umount "$SCR"
t_fail "old code ver $vers able to mount new ver $MAX"
fi
echo "remounting $MAX fs with $MAX" >> "$T_TMP.log"
rmmod scoutfs
insmod "$T_MODULE"
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
t_quiet mount_has_format_version "$SCR" "$MAX"
echo "verifying stat in $MAX with $MAX" >> "$T_TMP.log"
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
echo "keep/update/del existing, create new in $MAX" >> "$T_TMP.log"
t_quiet touch "$SCR/file-2"
t_quiet rm -f "$SCR/file-4"
t_quiet touch "$SCR/file-5"
stat "$SCR"/file-* > "$T_TMP.stat" || \
t_fail "stat in $MAX failed"
echo "remounting $MAX fs with $MAX again" >> "$T_TMP.log"
t_quiet umount "$SCR"
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
"$T_EX_DATA_DEV" "$SCR"
t_quiet mount_has_format_version "$SCR" "$MAX"
echo "verifying stat in $MAX with $MAX again" >> "$T_TMP.log"
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
echo "done with old vers $vers" >> "$T_TMP.log"
t_quiet umount "$SCR"
rmmod scoutfs
done
echo "== restoring test module and mount"
insmod "$T_MODULE"
t_mount_all
t_pass
+22 -11
View File
@@ -160,15 +160,16 @@ int parse_timespec(char *str, struct timespec *ts)
* Parse a quorum slot specification string "NR,ADDR,PORT" into its
* component parts. We use sscanf to both parse the leading NR and
* trailing PORT integers, and to pull out the inner ADDR string which
* is then parsed to make sure that it's a valid unicast ipv4 address.
* is then parsed to make sure that it's a valid unicast ip address.
* We require that all components be specified, and sccanf will check
* this by the number of matches it returns.
*/
int parse_quorum_slot(struct scoutfs_quorum_slot *slot, char *arg)
{
#define ADDR_CHARS 45 /* max ipv6 */
char addr[ADDR_CHARS + 1] = {'\0',};
#define ADDR_CHARS 45 /* (INET6_ADDRSTRLEN - 1) */
char addr[INET6_ADDRSTRLEN] = {'\0',};
struct in_addr in;
struct in6_addr in6;
int port;
int parsed;
int nr;
@@ -206,15 +207,25 @@ int parse_quorum_slot(struct scoutfs_quorum_slot *slot, char *arg)
return -EINVAL;
}
if (inet_aton(addr, &in) == 0 || htonl(in.s_addr) == 0 ||
htonl(in.s_addr) == UINT_MAX) {
printf("invalid ipv4 address '%s' in quorum slot '%s'\n",
addr, arg);
return -EINVAL;
if (inet_pton(AF_INET, addr, &in) == 1) {
if (htonl(in.s_addr) == 0 || htonl(in.s_addr) == UINT_MAX) {
printf("invalid ipv4 address '%s' in quorum slot '%s'\n",
addr, arg);
return -EINVAL;
}
slot->addr.v4.family = cpu_to_le16(SCOUTFS_AF_IPV4);
slot->addr.v4.addr = cpu_to_le32(htonl(in.s_addr));
slot->addr.v4.port = cpu_to_le16(port);
} else if (inet_pton(AF_INET6, addr, &in6) == 1) {
if (IN6_IS_ADDR_UNSPECIFIED(&in6) || IN6_IS_ADDR_MULTICAST(&in6)) {
printf("invalid ipv6 address '%s' in quorum slot '%s'\n",
addr, arg);
return -EINVAL;
}
slot->addr.v6.family = cpu_to_le16(SCOUTFS_AF_IPV6);
memcpy(slot->addr.v6.addr, &in6, 16);
slot->addr.v6.port = cpu_to_le16(port);
}
slot->addr.v4.family = cpu_to_le16(SCOUTFS_AF_IPV4);
slot->addr.v4.addr = cpu_to_le32(htonl(in.s_addr));
slot->addr.v4.port = cpu_to_le16(port);
return nr;
}
+40 -17
View File
@@ -28,6 +28,7 @@
#include "srch.h"
#include "leaf_item_hash.h"
#include "dev.h"
#include "quorum.h"
static void print_block_header(struct scoutfs_block_header *hdr, int size)
{
@@ -400,12 +401,20 @@ static int print_mounted_client_entry(struct scoutfs_key *key, u64 seq, u8 flags
{
struct scoutfs_mounted_client_btree_val *mcv = val;
struct in_addr in;
char ip6addr[INET6_ADDRSTRLEN];
memset(&in, 0, sizeof(in));
in.s_addr = htonl(le32_to_cpu(mcv->addr.v4.addr));
if (mcv->addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4)) {
in.s_addr = htonl(le32_to_cpu(mcv->addr.v4.addr));
printf(" rid %016llx ipv4_addr %s flags 0x%x\n",
le64_to_cpu(key->skmc_rid), inet_ntoa(in), mcv->flags);
printf(" rid %016llx ipv4_addr %s flags 0x%x\n",
le64_to_cpu(key->skmc_rid), inet_ntoa(in), mcv->flags);
} else if (mcv->addr.v6.family == cpu_to_le16(SCOUTFS_AF_IPV6)) {
printf(" rid %016llx ipv6_addr %s flags 0x%x\n",
le64_to_cpu(key->skmc_rid),
inet_ntop(AF_INET, mcv->addr.v6.addr, ip6addr, INET6_ADDRSTRLEN),
mcv->flags);
}
return 0;
}
@@ -891,26 +900,40 @@ static int print_btree_leaf_items(int fd, struct scoutfs_super_block *super,
static char *alloc_addr_str(union scoutfs_inet_addr *ia)
{
struct in_addr addr;
char ip6addr[INET6_ADDRSTRLEN];
char *quad;
char *str;
int len;
memset(&addr, 0, sizeof(addr));
addr.s_addr = htonl(le32_to_cpu(ia->v4.addr));
quad = inet_ntoa(addr);
if (quad == NULL)
return NULL;
if (le16_to_cpu(ia->v4.family) == SCOUTFS_AF_IPV4) {
memset(&addr, 0, sizeof(addr));
addr.s_addr = htonl(le32_to_cpu(ia->v4.addr));
quad = inet_ntoa(addr);
if (quad == NULL)
return NULL;
len = snprintf(NULL, 0, "%s:%u", quad, le16_to_cpu(ia->v4.port));
if (len < 1 || len > 22)
return NULL;
len = snprintf(NULL, 0, "%s:%u", quad, le16_to_cpu(ia->v4.port));
if (len < 1 || len > 22)
return NULL;
len++; /* null */
str = malloc(len);
if (!str)
return NULL;
len++; /* null */
str = malloc(len);
if (!str)
return NULL;
snprintf(str, len, "%s:%u", quad, le16_to_cpu(ia->v4.port));
snprintf(str, len, "%s:%u", quad, le16_to_cpu(ia->v4.port));
} else if (le16_to_cpu(ia->v6.family) == SCOUTFS_AF_IPV6) {
if (inet_ntop(AF_INET6, ia->v6.addr, ip6addr, INET6_ADDRSTRLEN) == NULL)
return NULL;
len = strlen(ip6addr) + 9; /* "[]:\0" (4) plus max strlen(u16) (5) */
str = malloc(len);
if (!str)
return NULL;
snprintf(str, len, "[%s]:%u", ip6addr, le16_to_cpu(ia->v6.port));
} else
return NULL;
return str;
}
@@ -1026,7 +1049,7 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
printf(" quorum config version %llu\n",
le64_to_cpu(super->qconf.version));
for (i = 0; i < array_size(super->qconf.slots); i++) {
if (super->qconf.slots[i].addr.v4.family != cpu_to_le16(SCOUTFS_AF_IPV4))
if (!quorum_slot_present(super, i))
continue;
addr = alloc_addr_str(&super->qconf.slots[i].addr);
+56 -29
View File
@@ -10,7 +10,8 @@
bool quorum_slot_present(struct scoutfs_super_block *super, int i)
{
return super->qconf.slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4);
return ((super->qconf.slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4)) ||
(super->qconf.slots[i].addr.v6.family == cpu_to_le16(SCOUTFS_AF_IPV6)));
}
bool valid_quorum_slots(struct scoutfs_quorum_slot *slots)
@@ -18,35 +19,57 @@ bool valid_quorum_slots(struct scoutfs_quorum_slot *slots)
struct in_addr in;
bool valid = true;
char *addr;
char ip6addr[INET6_ADDRSTRLEN];
__le16 family = cpu_to_le16(SCOUTFS_AF_NONE);
int i;
int j;
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
if (slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_NONE))
continue;
if (slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4)) {
if (family == cpu_to_le16(SCOUTFS_AF_NONE)) {
family = cpu_to_le16(SCOUTFS_AF_IPV4);
} else if (family != cpu_to_le16(SCOUTFS_AF_IPV4)) {
fprintf(stderr, "quorum slot nr %u is IPv4 but earlier slots are IPv6; mixed IPv4/IPv6 quorum is not supported\n",
i);
valid = false;
}
if (slots[i].addr.v4.family != cpu_to_le16(SCOUTFS_AF_IPV4)) {
for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) {
if (slots[i].addr.v4.addr == slots[j].addr.v4.addr &&
slots[i].addr.v4.port == slots[j].addr.v4.port) {
in.s_addr =
htonl(le32_to_cpu(slots[i].addr.v4.addr));
addr = inet_ntoa(in);
fprintf(stderr, "quorum slot nr %u and %u have the same address %s:%u\n",
i, j, addr,
le16_to_cpu(slots[i].addr.v4.port));
valid = false;
}
}
} else if (slots[i].addr.v6.family == cpu_to_le16(SCOUTFS_AF_IPV6)) {
if (family == cpu_to_le16(SCOUTFS_AF_NONE)) {
family = cpu_to_le16(SCOUTFS_AF_IPV6);
} else if (family != cpu_to_le16(SCOUTFS_AF_IPV6)) {
fprintf(stderr, "quorum slot nr %u is IPv6 but earlier slots are IPv4; mixed IPv4/IPv6 quorum is not supported\n",
i);
valid = false;
}
for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) {
if ((IN6_ARE_ADDR_EQUAL(slots[i].addr.v6.addr, slots[j].addr.v6.addr)) &&
(slots[i].addr.v6.port == slots[j].addr.v6.port)) {
fprintf(stderr, "quorum slot nr %u and %u have the same address [%s]:%u\n",
i, j,
inet_ntop(AF_INET6, slots[i].addr.v6.addr, ip6addr, INET6_ADDRSTRLEN),
le16_to_cpu(slots[i].addr.v6.port));
valid = false;
}
}
} else if (slots[i].addr.v6.family != cpu_to_le16(SCOUTFS_AF_NONE)) {
fprintf(stderr, "quorum slot nr %u has invalid family %u\n",
i, le16_to_cpu(slots[i].addr.v4.family));
valid = false;
}
for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) {
if (slots[i].addr.v4.family != cpu_to_le16(SCOUTFS_AF_IPV4))
continue;
if (slots[i].addr.v4.addr == slots[j].addr.v4.addr &&
slots[i].addr.v4.port == slots[j].addr.v4.port) {
in.s_addr =
htonl(le32_to_cpu(slots[i].addr.v4.addr));
addr = inet_ntoa(in);
fprintf(stderr, "quorum slot nr %u and %u have the same address %s:%u\n",
i, j, addr,
le16_to_cpu(slots[i].addr.v4.port));
valid = false;
}
}
}
return valid;
@@ -61,19 +84,23 @@ void print_quorum_slots(struct scoutfs_quorum_slot *slots, int nr, char *indent)
{
struct scoutfs_quorum_slot *sl;
struct in_addr in;
char ip6addr[INET6_ADDRSTRLEN];
bool first = true;
int i;
for (i = 0, sl = slots; i < SCOUTFS_QUORUM_MAX_SLOTS; i++, sl++) {
if (sl->addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4)) {
in.s_addr = htonl(le32_to_cpu(sl->addr.v4.addr));
printf("%s%u: %s:%u\n", first ? "" : indent,
i, inet_ntoa(in), le16_to_cpu(sl->addr.v4.port));
if (sl->addr.v4.family != cpu_to_le16(SCOUTFS_AF_IPV4))
continue;
in.s_addr = htonl(le32_to_cpu(sl->addr.v4.addr));
printf("%s%u: %s:%u\n", first ? "" : indent,
i, inet_ntoa(in), le16_to_cpu(sl->addr.v4.port));
first = false;
first = false;
} else if (sl->addr.v6.family == cpu_to_le16(SCOUTFS_AF_IPV6)) {
printf("%s%u: [%s]:%u\n", first ? "" : indent, i,
inet_ntop(AF_INET6, sl->addr.v6.addr, ip6addr, INET6_ADDRSTRLEN),
le16_to_cpu(sl->addr.v6.port));
first = false;
}
}
}