mirror of
https://github.com/versity/scoutfs.git
synced 2026-05-01 10:25:43 +00:00
Compare commits
126 Commits
zab/hold_c
...
zab/quot_p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
18f00a8acc | ||
|
|
40b62b5033 | ||
|
|
d4b1cd5931 | ||
|
|
555b307fe6 | ||
|
|
9859a75d4e | ||
|
|
c6ad865054 | ||
|
|
19ea848bca | ||
|
|
13151b7c66 | ||
|
|
535352fbd4 | ||
|
|
941e053eb3 | ||
|
|
12fe44c0be | ||
|
|
b06a56b1ee | ||
|
|
6817b403e5 | ||
|
|
b89139b0fe | ||
|
|
bd4dad757b | ||
|
|
b2448f461a | ||
|
|
4091e2cc55 | ||
|
|
45cd62974a | ||
|
|
5d744b78bd | ||
|
|
1328f1a2cb | ||
|
|
1f5c68cd30 | ||
|
|
965b692bdc | ||
|
|
c3c4b08038 | ||
|
|
0519830229 | ||
|
|
4d6e1a14ae | ||
|
|
fc3e061ea8 | ||
|
|
a4bc3fb27d | ||
|
|
67990a7007 | ||
|
|
ba819be8f9 | ||
|
|
1b103184ca | ||
|
|
c3890abd7b | ||
|
|
5ab38bfa48 | ||
|
|
e9ad61b444 | ||
|
|
91bbf90f71 | ||
|
|
b5630f540d | ||
|
|
90a4c82363 | ||
|
|
f654fa0fda | ||
|
|
50168a2d2a | ||
|
|
3c0616524a | ||
|
|
8d3e6883c6 | ||
|
|
8747dae61c | ||
|
|
fffcf4a9bb | ||
|
|
b552406427 | ||
|
|
d812599e6b | ||
|
|
03ab5cedb6 | ||
|
|
2b94cd6468 | ||
|
|
5507ee5351 | ||
|
|
1600a121d9 | ||
|
|
6daf24ff37 | ||
|
|
cd5d9ff3e0 | ||
|
|
d94e49eb63 | ||
|
|
1dbe408539 | ||
|
|
bf21699ad7 | ||
|
|
c7c67a173d | ||
|
|
0d10189f58 | ||
|
|
6b88f3268e | ||
|
|
4b2afa61b8 | ||
|
|
222ba2cede | ||
|
|
c7e97eeb1f | ||
|
|
21c070b42d | ||
|
|
77fbf92968 | ||
|
|
d5c699c3b4 | ||
|
|
b56b8e502c | ||
|
|
5ff372561d | ||
|
|
bdecee5e5d | ||
|
|
a9281b75fa | ||
|
|
707e1b2d59 | ||
|
|
006f429f72 | ||
|
|
d71583bcf5 | ||
|
|
bb835b948d | ||
|
|
bcdc4f5423 | ||
|
|
7ceb215c91 | ||
|
|
d4d2b0850b | ||
|
|
cf05aefe50 | ||
|
|
9f06065ce7 | ||
|
|
d2c2fece2a | ||
|
|
0e1e55d25b | ||
|
|
293cee9554 | ||
|
|
a7704e0b56 | ||
|
|
819df4be60 | ||
|
|
592e3d471f | ||
|
|
29160b0bc6 | ||
|
|
11c041d2ea | ||
|
|
46e8dfe884 | ||
|
|
a9beeaf5da | ||
|
|
205d8ebd4a | ||
|
|
e580f33f82 | ||
|
|
d480243c11 | ||
|
|
bafecbc604 | ||
|
|
65be4682e3 | ||
|
|
e88845d185 | ||
|
|
ec50e66fff | ||
|
|
0e91f9a277 | ||
|
|
69068ae2c0 | ||
|
|
016dac39bf | ||
|
|
e69cf3dec8 | ||
|
|
d6c143a639 | ||
|
|
09ae100254 | ||
|
|
50f5077863 | ||
|
|
cca4fcb788 | ||
|
|
1d150da3f0 | ||
|
|
28f03d3558 | ||
|
|
4275f6e6e5 | ||
|
|
70a5b6ffe2 | ||
|
|
b89ecd47b4 | ||
|
|
4293816764 | ||
|
|
f0de59a9a3 | ||
|
|
1f0a08eacb | ||
|
|
dac3f056a5 | ||
|
|
af868aad9b | ||
|
|
cf4df0ef9f | ||
|
|
81aa58253e | ||
|
|
c683ded0e6 | ||
|
|
f27431b3ae | ||
|
|
28c3cee995 | ||
|
|
430960ef3c | ||
|
|
7006a84d96 | ||
|
|
eafb8621da | ||
|
|
006555d42a | ||
|
|
8e458f9230 | ||
|
|
32c0dbce09 | ||
|
|
9c9ba651bd | ||
|
|
14eddb6420 | ||
|
|
597208324d | ||
|
|
8596c9ad45 | ||
|
|
8a705ea380 |
@@ -1,6 +1,57 @@
|
||||
Versity ScoutFS Release Notes
|
||||
=============================
|
||||
|
||||
---
|
||||
v1.20
|
||||
\
|
||||
*Apr 22, 2024*
|
||||
|
||||
Minor changes to packaging to better support "weak" module linking of
|
||||
the kernel module, and to including git hashes in the built package. No
|
||||
changes in runtime behaviour.
|
||||
|
||||
---
|
||||
v1.19
|
||||
\
|
||||
*Jan 30, 2024*
|
||||
|
||||
Added the log\_merge\_wait\_timeout\_ms mount option to set the timeout
|
||||
for creating log merge operations. The previous timeout, now the
|
||||
default, was too short for some systems and was resulting in consistent
|
||||
timeouts which created an excessive number of log trees waiting to be
|
||||
merged.
|
||||
|
||||
Improved performance of many in-mount server operations when there are a
|
||||
large number of log trees waiting to be merged.
|
||||
|
||||
---
|
||||
v1.18
|
||||
\
|
||||
*Nov 7, 2023*
|
||||
|
||||
Fixed a bug where background srch file compaction could stop making
|
||||
forward progress if a partial compaction operation was committed at a
|
||||
specific byte offset in a block. This would cause srch file searches to
|
||||
be progressively more expensive over time. Once this fix is running
|
||||
background compaction will resume, bringing the cost of searches back
|
||||
down.
|
||||
|
||||
---
|
||||
v1.17
|
||||
\
|
||||
*Oct 23, 2023*
|
||||
|
||||
Add support for EL8 generation kernels.
|
||||
|
||||
---
|
||||
v1.16
|
||||
\
|
||||
*Oct 4, 2023*
|
||||
|
||||
Fix an issue where the server could hang on startup if its persistent
|
||||
allocator structures were left in a specific degraded state by the
|
||||
previously active server.
|
||||
|
||||
---
|
||||
v1.15
|
||||
\
|
||||
|
||||
@@ -12,17 +12,22 @@ else
|
||||
SP = @:
|
||||
endif
|
||||
|
||||
SCOUTFS_GIT_DESCRIBE := \
|
||||
SCOUTFS_GIT_DESCRIBE ?= \
|
||||
$(shell git describe --all --abbrev=6 --long 2>/dev/null || \
|
||||
echo no-git)
|
||||
|
||||
ESCAPED_GIT_DESCRIBE := \
|
||||
$(shell echo $(SCOUTFS_GIT_DESCRIBE) |sed -e 's/\//\\\//g')
|
||||
|
||||
RPM_GITHASH ?= $(shell git rev-parse --short HEAD)
|
||||
|
||||
SCOUTFS_ARGS := SCOUTFS_GIT_DESCRIBE=$(SCOUTFS_GIT_DESCRIBE) \
|
||||
RPM_GITHASH=$(RPM_GITHASH) \
|
||||
CONFIG_SCOUTFS_FS=m -C $(SK_KSRC) M=$(CURDIR)/src \
|
||||
EXTRA_CFLAGS="-Werror"
|
||||
|
||||
# - We use the git describe from tags to set up the RPM versioning
|
||||
RPM_VERSION := $(shell git describe --long --tags | awk -F '-' '{gsub(/^v/,""); print $$1}')
|
||||
RPM_GITHASH := $(shell git rev-parse --short HEAD)
|
||||
TARFILE = scoutfs-kmod-$(RPM_VERSION).tar
|
||||
|
||||
|
||||
@@ -31,17 +36,18 @@ TARFILE = scoutfs-kmod-$(RPM_VERSION).tar
|
||||
all: module
|
||||
|
||||
module:
|
||||
make $(SCOUTFS_ARGS)
|
||||
$(SP) make C=2 CF="-D__CHECK_ENDIAN__" $(SCOUTFS_ARGS)
|
||||
$(MAKE) $(SCOUTFS_ARGS)
|
||||
$(SP) $(MAKE) C=2 CF="-D__CHECK_ENDIAN__" $(SCOUTFS_ARGS)
|
||||
|
||||
|
||||
modules_install:
|
||||
make $(SCOUTFS_ARGS) modules_install
|
||||
$(MAKE) $(SCOUTFS_ARGS) modules_install
|
||||
|
||||
|
||||
%.spec: %.spec.in .FORCE
|
||||
sed -e 's/@@VERSION@@/$(RPM_VERSION)/g' \
|
||||
-e 's/@@GITHASH@@/$(RPM_GITHASH)/g' < $< > $@+
|
||||
-e 's/@@GITHASH@@/$(RPM_GITHASH)/g' \
|
||||
-e 's/@@GITDESCRIBE@@/$(ESCAPED_GIT_DESCRIBE)/g' < $< > $@+
|
||||
mv $@+ $@
|
||||
|
||||
|
||||
@@ -50,4 +56,4 @@ dist: scoutfs-kmod.spec
|
||||
@ tar rf $(TARFILE) --transform="s@\(.*\)@scoutfs-kmod-$(RPM_VERSION)/\1@" scoutfs-kmod.spec
|
||||
|
||||
clean:
|
||||
make $(SCOUTFS_ARGS) clean
|
||||
$(MAKE) $(SCOUTFS_ARGS) clean
|
||||
|
||||
@@ -1,18 +1,31 @@
|
||||
%define kmod_name scoutfs
|
||||
%define kmod_version @@VERSION@@
|
||||
%define kmod_git_hash @@GITHASH@@
|
||||
%define kmod_git_describe @@GITDESCRIBE@@
|
||||
%define pkg_date %(date +%%Y%%m%%d)
|
||||
|
||||
# Disable the building of the debug package(s).
|
||||
%define debug_package %{nil}
|
||||
|
||||
# take kernel version or default to uname -r
|
||||
%{!?kversion: %global kversion %(uname -r)}
|
||||
%global kernel_version %{kversion}
|
||||
|
||||
%if 0%{?el7}
|
||||
%global kernel_source() /usr/src/kernels/%{kernel_version}.$(arch)
|
||||
%global kernel_release() %{kversion}
|
||||
%endif
|
||||
%if 0%{?el8}
|
||||
%global kernel_source() /usr/src/kernels/%{kernel_version}
|
||||
%endif
|
||||
|
||||
%{!?_release: %global _release 0.%{pkg_date}git%{kmod_git_hash}}
|
||||
|
||||
%if 0%{?el7}
|
||||
Name: %{kmod_name}
|
||||
%endif
|
||||
%if 0%{?el8}
|
||||
Name: kmod-%{kmod_name}
|
||||
%endif
|
||||
Summary: %{kmod_name} kernel module
|
||||
Version: %{kmod_version}
|
||||
Release: %{_release}%{?dist}
|
||||
@@ -20,24 +33,30 @@ License: GPLv2
|
||||
Group: System/Kernel
|
||||
URL: http://scoutfs.org/
|
||||
|
||||
%if 0%{?el7}
|
||||
BuildRequires: %{kernel_module_package_buildreqs}
|
||||
BuildRequires: git
|
||||
%endif
|
||||
%if 0%{?el8}
|
||||
BuildRequires: elfutils-libelf-devel
|
||||
%endif
|
||||
BuildRequires: kernel-devel-uname-r = %{kernel_version}
|
||||
BuildRequires: git
|
||||
BuildRequires: module-init-tools
|
||||
|
||||
ExclusiveArch: x86_64
|
||||
|
||||
Source: %{kmod_name}-kmod-%{kmod_version}.tar
|
||||
|
||||
%if 0%{?el7}
|
||||
# Build only for standard kernel variant(s); for debug packages, append "debug"
|
||||
# after "default" (separated by space)
|
||||
%kernel_module_package default
|
||||
%endif
|
||||
|
||||
# Disable the building of the debug package(s).
|
||||
%define debug_package %{nil}
|
||||
|
||||
%global install_mod_dir extra/%{name}
|
||||
|
||||
%global install_mod_dir extra/%{kmod_name}
|
||||
%if 0%{?el8}
|
||||
%global flavors_to_build x86_64
|
||||
%endif
|
||||
|
||||
%description
|
||||
%{kmod_name} - kernel module
|
||||
@@ -57,7 +76,7 @@ echo "Building for kernel: %{kernel_version} flavors: '%{flavors_to_build}'"
|
||||
for flavor in %flavors_to_build; do
|
||||
rm -rf obj/$flavor
|
||||
cp -r source obj/$flavor
|
||||
make SK_KSRC=%{kernel_source $flavor} -C obj/$flavor module
|
||||
make RPM_GITHASH=%{kmod_git_hash} SCOUTFS_GIT_DESCRIBE=%{kmod_git_describe} SK_KSRC=%{kernel_source $flavor} -C obj/$flavor module
|
||||
done
|
||||
|
||||
%install
|
||||
@@ -66,7 +85,7 @@ export INSTALL_MOD_DIR=%{install_mod_dir}
|
||||
mkdir -p %{install_mod_dir}
|
||||
for flavor in %{flavors_to_build}; do
|
||||
export KSRC=%{kernel_source $flavor}
|
||||
export KVERSION=%{kernel_release $KSRC}
|
||||
export KVERSION=%{kversion}
|
||||
install -d $INSTALL_MOD_PATH/lib/modules/$KVERSION/%{install_mod_dir}
|
||||
cp $PWD/obj/$flavor/src/scoutfs.ko $INSTALL_MOD_PATH/lib/modules/$KVERSION/%{install_mod_dir}/
|
||||
done
|
||||
@@ -74,7 +93,26 @@ done
|
||||
# mark modules executable so that strip-to-file can strip them
|
||||
find %{buildroot} -type f -name \*.ko -exec %{__chmod} u+x \{\} \;
|
||||
|
||||
%if 0%{?el8}
|
||||
%files
|
||||
/lib/modules
|
||||
|
||||
%post
|
||||
echo /lib/modules/%{kversion}/%{install_mod_dir}/scoutfs.ko | weak-modules --add-modules --no-initramfs
|
||||
depmod -a
|
||||
%endif
|
||||
|
||||
%clean
|
||||
rm -rf %{buildroot}
|
||||
|
||||
%preun
|
||||
# stash our modules for postun cleanup
|
||||
SCOUTFS_RPM_NAME=$(rpm -q %{name} | grep "%{version}-%{release}")
|
||||
rpm -ql $SCOUTFS_RPM_NAME | grep '\.ko$' > /var/run/%{name}-modules-%{version}-%{release} || true
|
||||
|
||||
%postun
|
||||
if [ -x /sbin/weak-modules ]; then
|
||||
cat /var/run/%{name}-modules-%{version}-%{release} | /sbin/weak-modules --remove-modules --no-initramfs
|
||||
fi
|
||||
|
||||
rm /var/run/%{name}-modules-%{version}-%{release} || true
|
||||
|
||||
@@ -25,6 +25,7 @@ scoutfs-y += \
|
||||
inode.o \
|
||||
ioctl.o \
|
||||
item.o \
|
||||
kernelcompat.o \
|
||||
lock.o \
|
||||
lock_server.o \
|
||||
msg.o \
|
||||
@@ -33,6 +34,7 @@ scoutfs-y += \
|
||||
options.o \
|
||||
per_task.o \
|
||||
quorum.o \
|
||||
quota.o \
|
||||
recov.o \
|
||||
scoutfs_trace.o \
|
||||
server.o \
|
||||
@@ -41,10 +43,12 @@ scoutfs-y += \
|
||||
srch.o \
|
||||
super.o \
|
||||
sysfs.o \
|
||||
totl.o \
|
||||
trans.o \
|
||||
triggers.o \
|
||||
tseq.o \
|
||||
volopt.o \
|
||||
wkic.o \
|
||||
xattr.o
|
||||
|
||||
#
|
||||
|
||||
@@ -26,6 +26,16 @@ ifneq (,$(shell grep 'dir_emit_dots' include/linux/fs.h))
|
||||
ccflags-y += -DKC_DIR_EMIT_DOTS
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.18-rc2-19-gb5ae6b15bd73
|
||||
#
|
||||
# Folds d_materialise_unique into d_splice_alias. Note reversal
|
||||
# of arguments (Also note Documentation/filesystems/porting.rst)
|
||||
#
|
||||
ifneq (,$(shell grep 'd_materialise_unique' include/linux/dcache.h))
|
||||
ccflags-y += -DKC_D_MATERIALISE_UNIQUE=1
|
||||
endif
|
||||
|
||||
#
|
||||
# RHEL extended the fop struct so to use it we have to set
|
||||
# a flag to indicate that the struct is large enough and
|
||||
@@ -40,6 +50,211 @@ endif
|
||||
#
|
||||
# Added user_ns argument to posix_acl_valid
|
||||
#
|
||||
ifneq (,$(shell grep 'posix_acl_valid.*user_ns,' include/linux/posix_acl.h))
|
||||
ifneq (,$(shell grep 'posix_acl_valid.*user_namespace' include/linux/posix_acl.h))
|
||||
ccflags-y += -DKC_POSIX_ACL_VALID_USER_NS
|
||||
endif
|
||||
|
||||
#
|
||||
# v5.3-12296-g6d2052d188d9
|
||||
#
|
||||
# The RBCOMPUTE function is now passed an extra flag, and should return a bool
|
||||
# to indicate whether the propagated callback should stop or not.
|
||||
#
|
||||
ifneq (,$(shell grep 'static inline bool RBNAME.*_compute_max' include/linux/rbtree_augmented.h))
|
||||
ccflags-y += -DKC_RB_TREE_AUGMENTED_COMPUTE_MAX
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.13-25-g37bc15392a23
|
||||
#
|
||||
# Renames posix_acl_create to __posix_acl_create and provide some
|
||||
# new interfaces for creating ACLs
|
||||
#
|
||||
ifneq (,$(shell grep '__posix_acl_create' include/linux/posix_acl.h))
|
||||
ccflags-y += -DKC___POSIX_ACL_CREATE
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.8-rc1-29-g31051c85b5e2
|
||||
#
|
||||
# inode_change_ok() removed - replace with setattr_prepare()
|
||||
#
|
||||
ifneq (,$(shell grep 'extern int setattr_prepare' include/linux/fs.h))
|
||||
ccflags-y += -DKC_SETATTR_PREPARE
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.15-rc3-4-gae5e165d855d
|
||||
#
|
||||
# linux/iversion.h needs to manually be included for code that
|
||||
# manipulates this field.
|
||||
#
|
||||
ifneq (,$(shell grep -s 'define _LINUX_IVERSION_H' include/linux/iversion.h))
|
||||
ccflags-y += -DKC_NEED_LINUX_IVERSION_H=1
|
||||
endif
|
||||
|
||||
# v4.11-12447-g104b4e5139fe
|
||||
#
|
||||
# Renamed __percpu_counter_add to percpu_counter_add_batch to clarify
|
||||
# that the __ wasn't less safe, just took an extra parameter.
|
||||
#
|
||||
ifneq (,$(shell grep 'percpu_counter_add_batch' include/linux/percpu_counter.h))
|
||||
ccflags-y += -DKC_PERCPU_COUNTER_ADD_BATCH
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.11-4550-g7dea19f9ee63
|
||||
#
|
||||
# Introduced memalloc_nofs_{save,restore} preferred instead of _noio_.
|
||||
#
|
||||
ifneq (,$(shell grep 'memalloc_nofs_save' include/linux/sched/mm.h))
|
||||
ccflags-y += -DKC_MEMALLOC_NOFS_SAVE
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.7-12414-g1eff9d322a44
|
||||
#
|
||||
# Renamed bi_rw to bi_opf to force old code to catch up. We use it as a
|
||||
# single switch between old and new bio structures.
|
||||
#
|
||||
ifneq (,$(shell grep 'bi_opf' include/linux/blk_types.h))
|
||||
ccflags-y += -DKC_BIO_BI_OPF
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.12-rc2-201-g4e4cbee93d56
|
||||
#
|
||||
# Moves to bi_status BLK_STS_ API instead of having a mix of error
|
||||
# end_io args or bi_error.
|
||||
#
|
||||
ifneq (,$(shell grep 'bi_status' include/linux/blk_types.h))
|
||||
ccflags-y += -DKC_BIO_BI_STATUS
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.11-8765-ga0b02131c5fc
|
||||
#
|
||||
# Remove the old ->shrink() API, ->{scan,count}_objects is preferred.
|
||||
#
|
||||
ifneq (,$(shell grep '(*shrink)' include/linux/shrinker.h))
|
||||
ccflags-y += -DKC_SHRINKER_SHRINK
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.19-4777-g6bec00352861
|
||||
#
|
||||
# backing_dev_info is removed from address_space. Instead we need to use
|
||||
# inode_to_bdi() inline from <backing-dev.h>.
|
||||
#
|
||||
ifneq (,$(shell grep 'struct backing_dev_info.*backing_dev_info' include/linux/fs.h))
|
||||
ccflags-y += -DKC_LINUX_BACKING_DEV_INFO=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.3-9290-ge409de992e3e
|
||||
#
|
||||
# xattr handlers are now passed a struct that contains `flags`
|
||||
#
|
||||
ifneq (,$(shell grep 'int...get..const struct xattr_handler.*struct dentry.*dentry,' include/linux/xattr.h))
|
||||
ccflags-y += -DKC_XATTR_STRUCT_XATTR_HANDLER=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.16-rc1-1-g9b2c45d479d0
|
||||
#
|
||||
# kernel_getsockname() and kernel_getpeername dropped addrlen arg
|
||||
#
|
||||
ifneq (,$(shell grep 'kernel_getsockname.*,$$' include/linux/net.h))
|
||||
ccflags-y += -DKC_KERNEL_GETSOCKNAME_ADDRLEN=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.1-rc1-410-geeb1bd5c40ed
|
||||
#
|
||||
# Adds a struct net parameter to sock_create_kern
|
||||
#
|
||||
ifneq (,$(shell grep 'sock_create_kern.*struct net' include/linux/net.h))
|
||||
ccflags-y += -DKC_SOCK_CREATE_KERN_NET=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.18-rc6-1619-gc0371da6047a
|
||||
#
|
||||
# iov_iter is now part of struct msghdr
|
||||
#
|
||||
ifneq (,$(shell grep 'struct iov_iter.*msg_iter' include/linux/socket.h))
|
||||
ccflags-y += -DKC_MSGHDR_STRUCT_IOV_ITER=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.17-rc6-7-g95582b008388
|
||||
#
|
||||
# Kernel has current_time(inode) to uniformly retreive timespec in the right unit
|
||||
#
|
||||
ifneq (,$(shell grep 'extern struct timespec64 current_time' include/linux/fs.h))
|
||||
ccflags-y += -DKC_CURRENT_TIME_INODE=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.9-12228-g530e9b76ae8f
|
||||
#
|
||||
# register_cpu_notifier and family were all removed and to be
|
||||
# replaced with cpuhp_* API calls.
|
||||
#
|
||||
ifneq (,$(shell grep 'define register_hotcpu_notifier' include/linux/cpu.h))
|
||||
ccflags-y += -DKC_CPU_NOTIFIER
|
||||
endif
|
||||
|
||||
#
|
||||
# v3.14-rc8-130-gccad2365668f
|
||||
#
|
||||
# generic_file_buffered_write is removed, backport it
|
||||
#
|
||||
ifneq (,$(shell grep 'extern ssize_t generic_file_buffered_write' include/linux/fs.h))
|
||||
ccflags-y += -DKC_GENERIC_FILE_BUFFERED_WRITE=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v5.7-438-g8151b4c8bee4
|
||||
#
|
||||
# struct address_space_operations switches away from .readpages to .readahead
|
||||
#
|
||||
# RHEL has backported this feature all the way to RHEL8, as part of RHEL_KABI,
|
||||
# which means we need to detect this very precisely
|
||||
#
|
||||
ifneq (,$(shell grep 'readahead.*struct readahead_control' include/linux/fs.h))
|
||||
ccflags-y += -DKC_FILE_AOPS_READAHEAD
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.0-rc7-1743-g8436318205b9
|
||||
#
|
||||
# .aio_read and .aio_write no longer exist. All reads and writes now use the
|
||||
# .read_iter and .write_iter methods, or must implement .read and .write (which
|
||||
# we don't).
|
||||
#
|
||||
ifneq (,$(shell grep 'ssize_t.*aio_read' include/linux/fs.h))
|
||||
ccflags-y += -DKC_LINUX_HAVE_FOP_AIO_READ=1
|
||||
endif
|
||||
|
||||
#
|
||||
# rhel7 has a custom inode_operations_wrapper struct that is discarded
|
||||
# entirely in favor of upstream structure since rhel8.
|
||||
#
|
||||
ifneq (,$(shell grep 'void.*follow_link.*struct dentry' include/linux/fs.h))
|
||||
ccflags-y += -DKC_LINUX_HAVE_RHEL_IOPS_WRAPPER=1
|
||||
endif
|
||||
|
||||
ifneq (,$(shell grep 'size_t.*ki_left;' include/linux/aio.h))
|
||||
ccflags-y += -DKC_LINUX_AIO_KI_LEFT=1
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.4-rc4-4-g98e9cb5711c6
|
||||
#
|
||||
# Introduces a new xattr_handler .name member that can be used to match the
|
||||
# entire field, instead of just a prefix. For these kernels, we must use
|
||||
# the new .name field instead.
|
||||
ifneq (,$(shell grep 'static inline const char .xattr_prefix' include/linux/xattr.h))
|
||||
ccflags-y += -DKC_XATTR_HANDLER_NAME=1
|
||||
endif
|
||||
|
||||
@@ -69,12 +69,14 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
|
||||
char *name;
|
||||
int ret;
|
||||
|
||||
#ifndef KC___POSIX_ACL_CREATE
|
||||
if (!IS_POSIXACL(inode))
|
||||
return NULL;
|
||||
|
||||
acl = get_cached_acl(inode, type);
|
||||
if (acl != ACL_NOT_CACHED)
|
||||
return acl;
|
||||
#endif
|
||||
|
||||
ret = acl_xattr_name_len(type, &name, NULL);
|
||||
if (ret < 0)
|
||||
@@ -96,9 +98,11 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
|
||||
acl = ERR_PTR(ret);
|
||||
}
|
||||
|
||||
#ifndef KC___POSIX_ACL_CREATE
|
||||
/* can set null negative cache */
|
||||
if (!IS_ERR(acl))
|
||||
set_cached_acl(inode, type, acl);
|
||||
#endif
|
||||
|
||||
kfree(value);
|
||||
|
||||
@@ -112,8 +116,10 @@ struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
|
||||
struct posix_acl *acl;
|
||||
int ret;
|
||||
|
||||
#ifndef KC___POSIX_ACL_CREATE
|
||||
if (!IS_POSIXACL(inode))
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
|
||||
if (ret < 0) {
|
||||
@@ -183,13 +189,15 @@ int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
|
||||
if (!value) {
|
||||
/* can be setting an acl that only affects mode, didn't need xattr */
|
||||
inode_inc_iversion(inode);
|
||||
inode->i_ctime = CURRENT_TIME;
|
||||
inode->i_ctime = current_time(inode);
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
#ifndef KC___POSIX_ACL_CREATE
|
||||
if (!ret)
|
||||
set_cached_acl(inode, type, acl);
|
||||
#endif
|
||||
|
||||
kfree(value);
|
||||
|
||||
@@ -218,10 +226,17 @@ int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
int scoutfs_acl_get_xattr(const struct xattr_handler *handler, struct dentry *dentry,
|
||||
struct inode *inode, const char *name, void *value,
|
||||
size_t size)
|
||||
{
|
||||
int type = handler->flags;
|
||||
#else
|
||||
int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
|
||||
int type)
|
||||
{
|
||||
#endif
|
||||
struct posix_acl *acl;
|
||||
int ret = 0;
|
||||
|
||||
@@ -240,9 +255,17 @@ int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
int scoutfs_acl_set_xattr(const struct xattr_handler *handler, struct dentry *dentry,
|
||||
struct inode *inode, const char *name, const void *value,
|
||||
size_t size, int flags)
|
||||
{
|
||||
int type = handler->flags;
|
||||
#else
|
||||
int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
|
||||
int flags, int type)
|
||||
{
|
||||
#endif
|
||||
struct posix_acl *acl = NULL;
|
||||
int ret;
|
||||
|
||||
@@ -301,7 +324,7 @@ int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir,
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
|
||||
ret = __posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret > 0)
|
||||
@@ -345,7 +368,7 @@ int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr,
|
||||
if (IS_ERR_OR_NULL(acl))
|
||||
return PTR_ERR(acl);
|
||||
|
||||
ret = posix_acl_chmod(&acl, GFP_KERNEL, attr->ia_mode);
|
||||
ret = __posix_acl_chmod(&acl, GFP_KERNEL, attr->ia_mode);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
|
||||
@@ -6,10 +6,19 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
|
||||
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
|
||||
int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
|
||||
struct scoutfs_lock *lock, struct list_head *ind_locks);
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
int scoutfs_acl_get_xattr(const struct xattr_handler *, struct dentry *dentry,
|
||||
struct inode *inode, const char *name, void *value,
|
||||
size_t size);
|
||||
int scoutfs_acl_set_xattr(const struct xattr_handler *, struct dentry *dentry,
|
||||
struct inode *inode, const char *name, const void *value,
|
||||
size_t size, int flags);
|
||||
#else
|
||||
int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
|
||||
int type);
|
||||
int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
|
||||
int flags, int type);
|
||||
#endif
|
||||
int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr,
|
||||
struct scoutfs_lock *lock, struct list_head *ind_locks);
|
||||
int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir,
|
||||
|
||||
104
kmod/src/block.c
104
kmod/src/block.c
@@ -21,6 +21,7 @@
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/rhashtable.h>
|
||||
#include <linux/random.h>
|
||||
#include <linux/sched/mm.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "super.h"
|
||||
@@ -30,6 +31,7 @@
|
||||
#include "scoutfs_trace.h"
|
||||
#include "alloc.h"
|
||||
#include "triggers.h"
|
||||
#include "util.h"
|
||||
|
||||
/*
|
||||
* The scoutfs block cache manages metadata blocks that can be larger
|
||||
@@ -57,7 +59,7 @@ struct block_info {
|
||||
atomic64_t access_counter;
|
||||
struct rhashtable ht;
|
||||
wait_queue_head_t waitq;
|
||||
struct shrinker shrinker;
|
||||
KC_DEFINE_SHRINKER(shrinker);
|
||||
struct work_struct free_work;
|
||||
struct llist_head free_llist;
|
||||
};
|
||||
@@ -128,7 +130,7 @@ static __le32 block_calc_crc(struct scoutfs_block_header *hdr, u32 size)
|
||||
static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
|
||||
{
|
||||
struct block_private *bp;
|
||||
unsigned int noio_flags;
|
||||
unsigned int nofs_flags;
|
||||
|
||||
/*
|
||||
* If we had multiple blocks per page we'd need to be a little
|
||||
@@ -156,9 +158,9 @@ static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
|
||||
* spurious reclaim-on dependencies and warnings.
|
||||
*/
|
||||
lockdep_off();
|
||||
noio_flags = memalloc_noio_save();
|
||||
nofs_flags = memalloc_nofs_save();
|
||||
bp->virt = __vmalloc(SCOUTFS_BLOCK_LG_SIZE, GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL);
|
||||
memalloc_noio_restore(noio_flags);
|
||||
memalloc_nofs_restore(nofs_flags);
|
||||
lockdep_on();
|
||||
|
||||
if (!bp->virt) {
|
||||
@@ -436,11 +438,10 @@ static void block_remove_all(struct super_block *sb)
|
||||
* possible. Final freeing, verifying checksums, and unlinking errored
|
||||
* blocks are all done by future users of the blocks.
|
||||
*/
|
||||
static void block_end_io(struct super_block *sb, int rw,
|
||||
static void block_end_io(struct super_block *sb, unsigned int opf,
|
||||
struct block_private *bp, int err)
|
||||
{
|
||||
DECLARE_BLOCK_INFO(sb, binf);
|
||||
bool is_read = !(rw & WRITE);
|
||||
|
||||
if (err) {
|
||||
scoutfs_inc_counter(sb, block_cache_end_io_error);
|
||||
@@ -450,7 +451,7 @@ static void block_end_io(struct super_block *sb, int rw,
|
||||
if (!atomic_dec_and_test(&bp->io_count))
|
||||
return;
|
||||
|
||||
if (is_read && !test_bit(BLOCK_BIT_ERROR, &bp->bits))
|
||||
if (!op_is_write(opf) && !test_bit(BLOCK_BIT_ERROR, &bp->bits))
|
||||
set_bit(BLOCK_BIT_UPTODATE, &bp->bits);
|
||||
|
||||
clear_bit(BLOCK_BIT_IO_BUSY, &bp->bits);
|
||||
@@ -463,13 +464,13 @@ static void block_end_io(struct super_block *sb, int rw,
|
||||
wake_up(&binf->waitq);
|
||||
}
|
||||
|
||||
static void block_bio_end_io(struct bio *bio, int err)
|
||||
static void KC_DECLARE_BIO_END_IO(block_bio_end_io, struct bio *bio)
|
||||
{
|
||||
struct block_private *bp = bio->bi_private;
|
||||
struct super_block *sb = bp->sb;
|
||||
|
||||
TRACE_BLOCK(end_io, bp);
|
||||
block_end_io(sb, bio->bi_rw, bp, err);
|
||||
block_end_io(sb, kc_bio_get_opf(bio), bp, kc_bio_get_errno(bio));
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
@@ -477,7 +478,7 @@ static void block_bio_end_io(struct bio *bio, int err)
|
||||
* Kick off IO for a single block.
|
||||
*/
|
||||
static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
int rw)
|
||||
unsigned int opf)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct bio *bio = NULL;
|
||||
@@ -510,8 +511,9 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
break;
|
||||
}
|
||||
|
||||
bio->bi_sector = sector + (off >> 9);
|
||||
bio->bi_bdev = sbi->meta_bdev;
|
||||
kc_bio_set_opf(bio, opf);
|
||||
kc_bio_set_sector(bio, sector + (off >> 9));
|
||||
bio_set_dev(bio, sbi->meta_bdev);
|
||||
bio->bi_end_io = block_bio_end_io;
|
||||
bio->bi_private = bp;
|
||||
|
||||
@@ -528,18 +530,18 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
BUG();
|
||||
|
||||
if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
|
||||
submit_bio(rw, bio);
|
||||
kc_submit_bio(bio);
|
||||
bio = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (bio)
|
||||
submit_bio(rw, bio);
|
||||
kc_submit_bio(bio);
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
/* let racing end_io know we're done */
|
||||
block_end_io(sb, rw, bp, ret);
|
||||
block_end_io(sb, opf, bp, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -640,7 +642,7 @@ static struct block_private *block_read(struct super_block *sb, u64 blkno)
|
||||
|
||||
if (!test_bit(BLOCK_BIT_UPTODATE, &bp->bits) &&
|
||||
test_and_clear_bit(BLOCK_BIT_NEW, &bp->bits)) {
|
||||
ret = block_submit_bio(sb, bp, READ);
|
||||
ret = block_submit_bio(sb, bp, REQ_OP_READ);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
@@ -969,7 +971,7 @@ int scoutfs_block_writer_write(struct super_block *sb,
|
||||
/* retry previous write errors */
|
||||
clear_bit(BLOCK_BIT_ERROR, &bp->bits);
|
||||
|
||||
ret = block_submit_bio(sb, bp, WRITE);
|
||||
ret = block_submit_bio(sb, bp, REQ_OP_WRITE);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
@@ -1069,6 +1071,16 @@ u64 scoutfs_block_writer_dirty_bytes(struct super_block *sb,
|
||||
return wri->nr_dirty_blocks * SCOUTFS_BLOCK_LG_SIZE;
|
||||
}
|
||||
|
||||
static unsigned long block_count_objects(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
struct block_info *binf = KC_SHRINKER_CONTAINER_OF(shrink, struct block_info);
|
||||
struct super_block *sb = binf->sb;
|
||||
|
||||
scoutfs_inc_counter(sb, block_cache_count_objects);
|
||||
|
||||
return shrinker_min_long(atomic_read(&binf->total_inserted));
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove a number of cached blocks that haven't been used recently.
|
||||
*
|
||||
@@ -1089,24 +1101,18 @@ u64 scoutfs_block_writer_dirty_bytes(struct super_block *sb,
|
||||
* atomically remove blocks when the only references are ours and the
|
||||
* hash table.
|
||||
*/
|
||||
static int block_shrink(struct shrinker *shrink, struct shrink_control *sc)
|
||||
static unsigned long block_scan_objects(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
struct block_info *binf = container_of(shrink, struct block_info,
|
||||
shrinker);
|
||||
struct block_info *binf = KC_SHRINKER_CONTAINER_OF(shrink, struct block_info);
|
||||
struct super_block *sb = binf->sb;
|
||||
struct rhashtable_iter iter;
|
||||
struct block_private *bp;
|
||||
bool stop = false;
|
||||
unsigned long nr;
|
||||
unsigned long freed = 0;
|
||||
unsigned long nr = sc->nr_to_scan;
|
||||
u64 recently;
|
||||
|
||||
nr = sc->nr_to_scan;
|
||||
if (nr == 0)
|
||||
goto out;
|
||||
|
||||
scoutfs_inc_counter(sb, block_cache_shrink);
|
||||
|
||||
nr = DIV_ROUND_UP(nr, SCOUTFS_BLOCK_LG_PAGES_PER);
|
||||
scoutfs_inc_counter(sb, block_cache_scan_objects);
|
||||
|
||||
recently = accessed_recently(binf);
|
||||
rhashtable_walk_enter(&binf->ht, &iter);
|
||||
@@ -1151,6 +1157,7 @@ static int block_shrink(struct shrinker *shrink, struct shrink_control *sc)
|
||||
if (block_remove_solo(sb, bp)) {
|
||||
scoutfs_inc_counter(sb, block_cache_shrink_remove);
|
||||
TRACE_BLOCK(shrink, bp);
|
||||
freed++;
|
||||
nr--;
|
||||
}
|
||||
block_put(sb, bp);
|
||||
@@ -1159,12 +1166,11 @@ static int block_shrink(struct shrinker *shrink, struct shrink_control *sc)
|
||||
|
||||
rhashtable_walk_stop(&iter);
|
||||
rhashtable_walk_exit(&iter);
|
||||
out:
|
||||
|
||||
if (stop)
|
||||
return -1;
|
||||
return SHRINK_STOP;
|
||||
else
|
||||
return min_t(u64, INT_MAX,
|
||||
(u64)atomic_read(&binf->total_inserted) * SCOUTFS_BLOCK_LG_PAGES_PER);
|
||||
return freed;
|
||||
}
|
||||
|
||||
struct sm_block_completion {
|
||||
@@ -1172,11 +1178,11 @@ struct sm_block_completion {
|
||||
int err;
|
||||
};
|
||||
|
||||
static void sm_block_bio_end_io(struct bio *bio, int err)
|
||||
static void KC_DECLARE_BIO_END_IO(sm_block_bio_end_io, struct bio *bio)
|
||||
{
|
||||
struct sm_block_completion *sbc = bio->bi_private;
|
||||
|
||||
sbc->err = err;
|
||||
sbc->err = kc_bio_get_errno(bio);
|
||||
complete(&sbc->comp);
|
||||
bio_put(bio);
|
||||
}
|
||||
@@ -1191,9 +1197,8 @@ static void sm_block_bio_end_io(struct bio *bio, int err)
|
||||
* only layer that sees the full block buffer so we pass the calculated
|
||||
* crc to the caller for them to check in their context.
|
||||
*/
|
||||
static int sm_block_io(struct super_block *sb, struct block_device *bdev, int rw, u64 blkno,
|
||||
struct scoutfs_block_header *hdr, size_t len,
|
||||
__le32 *blk_crc)
|
||||
static int sm_block_io(struct super_block *sb, struct block_device *bdev, unsigned int opf,
|
||||
u64 blkno, struct scoutfs_block_header *hdr, size_t len, __le32 *blk_crc)
|
||||
{
|
||||
struct scoutfs_block_header *pg_hdr;
|
||||
struct sm_block_completion sbc;
|
||||
@@ -1207,7 +1212,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, int rw
|
||||
return -EIO;
|
||||
|
||||
if (WARN_ON_ONCE(len > SCOUTFS_BLOCK_SM_SIZE) ||
|
||||
WARN_ON_ONCE(!(rw & WRITE) && !blk_crc))
|
||||
WARN_ON_ONCE(!op_is_write(opf) && !blk_crc))
|
||||
return -EINVAL;
|
||||
|
||||
page = alloc_page(GFP_NOFS);
|
||||
@@ -1216,7 +1221,7 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, int rw
|
||||
|
||||
pg_hdr = page_address(page);
|
||||
|
||||
if (rw & WRITE) {
|
||||
if (op_is_write(opf)) {
|
||||
memcpy(pg_hdr, hdr, len);
|
||||
if (len < SCOUTFS_BLOCK_SM_SIZE)
|
||||
memset((char *)pg_hdr + len, 0,
|
||||
@@ -1230,8 +1235,9 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, int rw
|
||||
goto out;
|
||||
}
|
||||
|
||||
bio->bi_sector = blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9);
|
||||
bio->bi_bdev = bdev;
|
||||
kc_bio_set_opf(bio, opf | REQ_SYNC);
|
||||
kc_bio_set_sector(bio, blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9));
|
||||
bio_set_dev(bio, bdev);
|
||||
bio->bi_end_io = sm_block_bio_end_io;
|
||||
bio->bi_private = &sbc;
|
||||
bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
|
||||
@@ -1239,12 +1245,12 @@ static int sm_block_io(struct super_block *sb, struct block_device *bdev, int rw
|
||||
init_completion(&sbc.comp);
|
||||
sbc.err = 0;
|
||||
|
||||
submit_bio((rw & WRITE) ? WRITE_SYNC : READ_SYNC, bio);
|
||||
kc_submit_bio(bio);
|
||||
|
||||
wait_for_completion(&sbc.comp);
|
||||
ret = sbc.err;
|
||||
|
||||
if (ret == 0 && !(rw & WRITE)) {
|
||||
if (ret == 0 && !op_is_write(opf)) {
|
||||
memcpy(hdr, pg_hdr, len);
|
||||
*blk_crc = block_calc_crc(pg_hdr, SCOUTFS_BLOCK_SM_SIZE);
|
||||
}
|
||||
@@ -1258,14 +1264,14 @@ int scoutfs_block_read_sm(struct super_block *sb,
|
||||
struct scoutfs_block_header *hdr, size_t len,
|
||||
__le32 *blk_crc)
|
||||
{
|
||||
return sm_block_io(sb, bdev, READ, blkno, hdr, len, blk_crc);
|
||||
return sm_block_io(sb, bdev, REQ_OP_READ, blkno, hdr, len, blk_crc);
|
||||
}
|
||||
|
||||
int scoutfs_block_write_sm(struct super_block *sb,
|
||||
struct block_device *bdev, u64 blkno,
|
||||
struct scoutfs_block_header *hdr, size_t len)
|
||||
{
|
||||
return sm_block_io(sb, bdev, WRITE, blkno, hdr, len, NULL);
|
||||
return sm_block_io(sb, bdev, REQ_OP_WRITE, blkno, hdr, len, NULL);
|
||||
}
|
||||
|
||||
int scoutfs_block_setup(struct super_block *sb)
|
||||
@@ -1290,9 +1296,9 @@ int scoutfs_block_setup(struct super_block *sb)
|
||||
atomic_set(&binf->total_inserted, 0);
|
||||
atomic64_set(&binf->access_counter, 0);
|
||||
init_waitqueue_head(&binf->waitq);
|
||||
binf->shrinker.shrink = block_shrink;
|
||||
binf->shrinker.seeks = DEFAULT_SEEKS;
|
||||
register_shrinker(&binf->shrinker);
|
||||
KC_INIT_SHRINKER_FUNCS(&binf->shrinker, block_count_objects,
|
||||
block_scan_objects);
|
||||
KC_REGISTER_SHRINKER(&binf->shrinker);
|
||||
INIT_WORK(&binf->free_work, block_free_work);
|
||||
init_llist_head(&binf->free_llist);
|
||||
|
||||
@@ -1312,7 +1318,7 @@ void scoutfs_block_destroy(struct super_block *sb)
|
||||
struct block_info *binf = SCOUTFS_SB(sb)->block_info;
|
||||
|
||||
if (binf) {
|
||||
unregister_shrinker(&binf->shrinker);
|
||||
KC_UNREGISTER_SHRINKER(&binf->shrinker);
|
||||
block_remove_all(sb);
|
||||
flush_work(&binf->free_work);
|
||||
rhashtable_destroy(&binf->ht);
|
||||
|
||||
439
kmod/src/btree.c
439
kmod/src/btree.c
@@ -2029,187 +2029,253 @@ int scoutfs_btree_rebalance(struct super_block *sb,
|
||||
key, SCOUTFS_BTREE_MAX_VAL_LEN, NULL, NULL, NULL);
|
||||
}
|
||||
|
||||
struct merge_pos {
|
||||
struct merged_range {
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
struct rb_root root;
|
||||
int size;
|
||||
};
|
||||
|
||||
struct merged_item {
|
||||
struct rb_node node;
|
||||
struct scoutfs_btree_root *root;
|
||||
struct scoutfs_block *bl;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_avl_node *avl;
|
||||
struct scoutfs_key *key;
|
||||
struct scoutfs_key key;
|
||||
u64 seq;
|
||||
u8 flags;
|
||||
unsigned int val_len;
|
||||
u8 *val;
|
||||
u8 val[0];
|
||||
};
|
||||
|
||||
static struct merge_pos *first_mpos(struct rb_root *root)
|
||||
static inline struct merged_item *mitem_container(struct rb_node *node)
|
||||
{
|
||||
struct rb_node *node = rb_first(root);
|
||||
if (node)
|
||||
return container_of(node, struct merge_pos, node);
|
||||
return node ? container_of(node, struct merged_item, node) : NULL;
|
||||
}
|
||||
|
||||
static inline struct merged_item *first_mitem(struct rb_root *root)
|
||||
{
|
||||
return mitem_container(rb_first(root));
|
||||
}
|
||||
|
||||
static inline struct merged_item *last_mitem(struct rb_root *root)
|
||||
{
|
||||
return mitem_container(rb_last(root));
|
||||
}
|
||||
|
||||
static inline struct merged_item *next_mitem(struct merged_item *mitem)
|
||||
{
|
||||
return mitem_container(mitem ? rb_next(&mitem->node) : NULL);
|
||||
}
|
||||
|
||||
static inline struct merged_item *prev_mitem(struct merged_item *mitem)
|
||||
{
|
||||
return mitem_container(mitem ? rb_prev(&mitem->node) : NULL);
|
||||
}
|
||||
|
||||
static struct merged_item *find_mitem(struct rb_root *root, struct scoutfs_key *key,
|
||||
struct rb_node **parent_ret, struct rb_node ***link_ret)
|
||||
{
|
||||
struct rb_node **node = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct merged_item *mitem;
|
||||
int cmp;
|
||||
|
||||
while (*node) {
|
||||
parent = *node;
|
||||
mitem = container_of(*node, struct merged_item, node);
|
||||
|
||||
cmp = scoutfs_key_compare(key, &mitem->key);
|
||||
|
||||
if (cmp < 0) {
|
||||
node = &(*node)->rb_left;
|
||||
} else if (cmp > 0) {
|
||||
node = &(*node)->rb_right;
|
||||
} else {
|
||||
*parent_ret = NULL;
|
||||
*link_ret = NULL;
|
||||
return mitem;
|
||||
}
|
||||
}
|
||||
|
||||
*parent_ret = parent;
|
||||
*link_ret = node;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct merge_pos *next_mpos(struct merge_pos *mpos)
|
||||
static void insert_mitem(struct merged_range *rng, struct merged_item *mitem,
|
||||
struct rb_node *parent, struct rb_node **link)
|
||||
{
|
||||
struct rb_node *node;
|
||||
|
||||
if (mpos && (node = rb_next(&mpos->node)))
|
||||
return container_of(node, struct merge_pos, node);
|
||||
else
|
||||
return NULL;
|
||||
rb_link_node(&mitem->node, parent, link);
|
||||
rb_insert_color(&mitem->node, &rng->root);
|
||||
rng->size += item_len_bytes(mitem->val_len);
|
||||
}
|
||||
|
||||
static void free_mpos(struct super_block *sb, struct merge_pos *mpos)
|
||||
static void replace_mitem(struct merged_range *rng, struct merged_item *victim,
|
||||
struct merged_item *new)
|
||||
{
|
||||
scoutfs_block_put(sb, mpos->bl);
|
||||
kfree(mpos);
|
||||
rb_replace_node(&victim->node, &new->node, &rng->root);
|
||||
RB_CLEAR_NODE(&victim->node);
|
||||
rng->size -= item_len_bytes(victim->val_len);
|
||||
rng->size += item_len_bytes(new->val_len);
|
||||
}
|
||||
|
||||
static void insert_mpos(struct rb_root *pos_root, struct merge_pos *ins)
|
||||
static void free_mitem(struct merged_range *rng, struct merged_item *mitem)
|
||||
{
|
||||
struct rb_node **node = &pos_root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct merge_pos *mpos;
|
||||
int cmp;
|
||||
if (IS_ERR_OR_NULL(mitem))
|
||||
return;
|
||||
|
||||
parent = NULL;
|
||||
while (*node) {
|
||||
parent = *node;
|
||||
mpos = container_of(*node, struct merge_pos, node);
|
||||
|
||||
/* sort merge items by key then newest to oldest */
|
||||
cmp = scoutfs_key_compare(ins->key, mpos->key) ?:
|
||||
-scoutfs_cmp(ins->seq, mpos->seq);
|
||||
|
||||
if (cmp < 0)
|
||||
node = &(*node)->rb_left;
|
||||
else
|
||||
node = &(*node)->rb_right;
|
||||
if (!RB_EMPTY_NODE(&mitem->node)) {
|
||||
rng->size -= item_len_bytes(mitem->val_len);
|
||||
rb_erase(&mitem->node, &rng->root);
|
||||
}
|
||||
|
||||
rb_link_node(&ins->node, parent, node);
|
||||
rb_insert_color(&ins->node, pos_root);
|
||||
kfree(mitem);
|
||||
}
|
||||
|
||||
static void trim_range_size(struct merged_range *rng, int merge_window)
|
||||
{
|
||||
struct merged_item *mitem;
|
||||
struct merged_item *tmp;
|
||||
|
||||
mitem = last_mitem(&rng->root);
|
||||
while (mitem && rng->size > merge_window) {
|
||||
|
||||
rng->end = mitem->key;
|
||||
scoutfs_key_dec(&rng->end);
|
||||
|
||||
tmp = mitem;
|
||||
mitem = prev_mitem(mitem);
|
||||
free_mitem(rng, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
static void trim_range_end(struct merged_range *rng)
|
||||
{
|
||||
struct merged_item *mitem;
|
||||
struct merged_item *tmp;
|
||||
|
||||
mitem = last_mitem(&rng->root);
|
||||
while (mitem && scoutfs_key_compare(&mitem->key, &rng->end) > 0) {
|
||||
tmp = mitem;
|
||||
mitem = prev_mitem(mitem);
|
||||
free_mitem(rng, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the next item in the merge_pos root in the caller's range and
|
||||
* insert it into the rbtree sorted by key and version so that merging
|
||||
* can find the next newest item at the front of the rbtree. We free
|
||||
* the mpos on error or if there are no more items in the range.
|
||||
* Record and combine logged items from log roots for merging with the
|
||||
* writable destination root. The caller is responsible for trimming
|
||||
* the range if it gets too large or if the key range shrinks.
|
||||
*/
|
||||
static int reset_mpos(struct super_block *sb, struct rb_root *pos_root, struct merge_pos *mpos,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end)
|
||||
static int merge_read_item(struct super_block *sb, struct scoutfs_key *key, u64 seq, u8 flags,
|
||||
void *val, int val_len, void *arg)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_avl_node *next;
|
||||
struct btree_walk_key_range kr;
|
||||
struct scoutfs_key walk_key;
|
||||
int ret = 0;
|
||||
struct merged_range *rng = arg;
|
||||
struct merged_item *mitem;
|
||||
struct merged_item *found;
|
||||
struct rb_node *parent;
|
||||
struct rb_node **link;
|
||||
int ret;
|
||||
|
||||
/* always erase before freeing or inserting */
|
||||
if (!RB_EMPTY_NODE(&mpos->node)) {
|
||||
rb_erase(&mpos->node, pos_root);
|
||||
RB_CLEAR_NODE(&mpos->node);
|
||||
}
|
||||
|
||||
/*
|
||||
* advance to next item via the avl tree. The caller's pos is
|
||||
* only ever incremented past the last key so we can use next to
|
||||
* iterate rather than using search to skip past multiple items.
|
||||
*/
|
||||
if (mpos->avl)
|
||||
mpos->avl = scoutfs_avl_next(&mpos->bt->item_root, mpos->avl);
|
||||
|
||||
/* find the next leaf with the key if we run out of items */
|
||||
walk_key = *start;
|
||||
while (!mpos->avl && !scoutfs_key_is_zeros(&walk_key)) {
|
||||
scoutfs_block_put(sb, mpos->bl);
|
||||
mpos->bl = NULL;
|
||||
ret = btree_walk(sb, NULL, NULL, mpos->root, BTW_NEXT, &walk_key,
|
||||
0, &mpos->bl, &kr, NULL);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
free_mpos(sb, mpos);
|
||||
found = find_mitem(&rng->root, key, &parent, &link);
|
||||
if (found) {
|
||||
ret = scoutfs_forest_combine_deltas(key, found->val, found->val_len, val, val_len);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0) {
|
||||
if (ret == SCOUTFS_DELTA_COMBINED) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_combined);
|
||||
} else if (ret == SCOUTFS_DELTA_COMBINED_NULL) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_null);
|
||||
free_mitem(rng, found);
|
||||
}
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
mpos->bt = mpos->bl->data;
|
||||
|
||||
mpos->avl = scoutfs_avl_search(&mpos->bt->item_root, cmp_key_item,
|
||||
start, NULL, NULL, &next, NULL) ?: next;
|
||||
if (mpos->avl == NULL)
|
||||
walk_key = kr.iter_next;
|
||||
if (found->seq >= seq) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* see if we're out of items within the range */
|
||||
item = node_item(mpos->avl);
|
||||
if (!item || scoutfs_key_compare(item_key(item), end) > 0) {
|
||||
free_mpos(sb, mpos);
|
||||
ret = 0;
|
||||
mitem = kmalloc(offsetof(struct merged_item, val[val_len]), GFP_NOFS);
|
||||
if (!mitem) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* insert the next item within range at its version */
|
||||
mpos->key = item_key(item);
|
||||
mpos->seq = le64_to_cpu(item->seq);
|
||||
mpos->flags = item->flags;
|
||||
mpos->val_len = item_val_len(item);
|
||||
mpos->val = item_val(mpos->bt, item);
|
||||
mitem->key = *key;
|
||||
mitem->seq = seq;
|
||||
mitem->flags = flags;
|
||||
mitem->val_len = val_len;
|
||||
if (val_len)
|
||||
memcpy(mitem->val, val, val_len);
|
||||
|
||||
if (found) {
|
||||
replace_mitem(rng, found, mitem);
|
||||
free_mitem(rng, found);
|
||||
} else {
|
||||
insert_mitem(rng, mitem, parent, link);
|
||||
}
|
||||
|
||||
insert_mpos(pos_root, mpos);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller has reset all the merge positions for all the input log
|
||||
* btree roots and wants the next logged item it should try and merge
|
||||
* with the items in the fs_root.
|
||||
* Read a range of merged items. The caller has set the key bounds of
|
||||
* the range. We read a merge window's worth of items from blocks in
|
||||
* each input btree.
|
||||
*
|
||||
* We look ahead in the logged item stream to see if we should merge any
|
||||
* older logged delta items into one result for the caller. We also
|
||||
* take this opportunity to skip and reset the mpos for any older
|
||||
* versions of the first item.
|
||||
* The caller can only use the smallest range that overlaps with all the
|
||||
* blocks that we read. We start reading from the range's start key so
|
||||
* it will always be present and we don't need to adjust it. The final
|
||||
* block we read from each input might not cover the range's end so it
|
||||
* needs to be adjusted.
|
||||
*
|
||||
* The end range can also shrink if we have to drop items because the
|
||||
* items exceeded the merge window size.
|
||||
*/
|
||||
static int next_resolved_mpos(struct super_block *sb, struct rb_root *pos_root,
|
||||
struct scoutfs_key *end, struct merge_pos **mpos_ret)
|
||||
static int read_merged_range(struct super_block *sb, struct merged_range *rng,
|
||||
struct list_head *inputs, int merge_window)
|
||||
{
|
||||
struct merge_pos *mpos;
|
||||
struct merge_pos *next;
|
||||
struct scoutfs_btree_root_head *rhead;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
struct scoutfs_key key;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
while ((mpos = first_mpos(pos_root)) && (next = next_mpos(mpos)) &&
|
||||
!scoutfs_key_compare(mpos->key, next->key)) {
|
||||
list_for_each_entry(rhead, inputs, head) {
|
||||
key = rng->start;
|
||||
|
||||
ret = scoutfs_forest_combine_deltas(mpos->key, mpos->val, mpos->val_len,
|
||||
next->val, next->val_len);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
/* reset advances to the next item */
|
||||
key = *mpos->key;
|
||||
scoutfs_key_inc(&key);
|
||||
|
||||
/* always skip next combined or older version */
|
||||
ret = reset_mpos(sb, pos_root, next, &key, end);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (ret == SCOUTFS_DELTA_COMBINED) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_combined);
|
||||
} else if (ret == SCOUTFS_DELTA_COMBINED_NULL) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_null);
|
||||
/* if merging resulted in no info, skip current */
|
||||
ret = reset_mpos(sb, pos_root, mpos, &key, end);
|
||||
for (i = 0; i < merge_window; i += SCOUTFS_BLOCK_LG_SIZE) {
|
||||
start = key;
|
||||
end = rng->end;
|
||||
ret = scoutfs_btree_read_items(sb, &rhead->root, &key, &start, &end,
|
||||
merge_read_item, rng);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_key_compare(&end, &rng->end) >= 0)
|
||||
break;
|
||||
|
||||
key = end;
|
||||
scoutfs_key_inc(&key);
|
||||
}
|
||||
|
||||
if (scoutfs_key_compare(&end, &rng->end) < 0) {
|
||||
rng->end = end;
|
||||
trim_range_end(rng);
|
||||
}
|
||||
|
||||
if (rng->size > merge_window)
|
||||
trim_range_size(rng, merge_window);
|
||||
}
|
||||
|
||||
*mpos_ret = mpos;
|
||||
trace_scoutfs_btree_merge_read_range(sb, &rng->start, &rng->end, rng->size);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -2226,6 +2292,13 @@ static int next_resolved_mpos(struct super_block *sb, struct rb_root *pos_root,
|
||||
* to allocators running low or needing to join/split the parent.
|
||||
* *next_ret is set to the next key which hasn't been merged so that the
|
||||
* caller can retry with a new allocator and subtree.
|
||||
*
|
||||
* The number of input roots can be immense. The merge_window specifies
|
||||
* the size of the set of merged items that we'll maintain as we iterate
|
||||
* over all the input roots. Once we've merged items into the window
|
||||
* from all the input roots the merged input items are then merged to
|
||||
* the writable destination root. It may take multiple passes of
|
||||
* windows of merged items to cover the input key range.
|
||||
*/
|
||||
int scoutfs_btree_merge(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
@@ -2235,18 +2308,16 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
struct scoutfs_key *next_ret,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct list_head *inputs,
|
||||
bool subtree, int dirty_limit, int alloc_low)
|
||||
bool subtree, int dirty_limit, int alloc_low, int merge_window)
|
||||
{
|
||||
struct scoutfs_btree_root_head *rhead;
|
||||
struct rb_root pos_root = RB_ROOT;
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_block *bl = NULL;
|
||||
struct btree_walk_key_range kr;
|
||||
struct scoutfs_avl_node *par;
|
||||
struct scoutfs_key next;
|
||||
struct merge_pos *mpos;
|
||||
struct merge_pos *tmp;
|
||||
struct merged_item *mitem;
|
||||
struct merged_item *tmp;
|
||||
struct merged_range rng;
|
||||
int walk_val_len;
|
||||
int walk_flags;
|
||||
bool is_del;
|
||||
@@ -2257,49 +2328,59 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
trace_scoutfs_btree_merge(sb, root, start, end);
|
||||
scoutfs_inc_counter(sb, btree_merge);
|
||||
|
||||
list_for_each_entry(rhead, inputs, head) {
|
||||
mpos = kzalloc(sizeof(*mpos), GFP_NOFS);
|
||||
if (!mpos) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
RB_CLEAR_NODE(&mpos->node);
|
||||
mpos->root = &rhead->root;
|
||||
|
||||
ret = reset_mpos(sb, &pos_root, mpos, start, end);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
walk_flags = BTW_DIRTY;
|
||||
if (subtree)
|
||||
walk_flags |= BTW_SUBTREE;
|
||||
walk_val_len = 0;
|
||||
|
||||
while ((ret = next_resolved_mpos(sb, &pos_root, end, &mpos)) == 0 && mpos) {
|
||||
rng.start = *start;
|
||||
rng.end = *end;
|
||||
rng.root = RB_ROOT;
|
||||
rng.size = 0;
|
||||
|
||||
ret = read_merged_range(sb, &rng, inputs, merge_window);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
for (;;) {
|
||||
/* read next window as it empties (and it is possible to read an empty range) */
|
||||
mitem = first_mitem(&rng.root);
|
||||
if (!mitem) {
|
||||
/* done if the read range hit the end */
|
||||
if (scoutfs_key_compare(&rng.end, end) >= 0)
|
||||
break;
|
||||
|
||||
/* read next batch of merged items */
|
||||
rng.start = rng.end;
|
||||
scoutfs_key_inc(&rng.start);
|
||||
rng.end = *end;
|
||||
ret = read_merged_range(sb, &rng, inputs, merge_window);
|
||||
if (ret < 0)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (scoutfs_block_writer_dirty_bytes(sb, wri) >= dirty_limit) {
|
||||
scoutfs_inc_counter(sb, btree_merge_dirty_limit);
|
||||
ret = -ERANGE;
|
||||
*next_ret = *mpos->key;
|
||||
*next_ret = mitem->key;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (scoutfs_alloc_meta_low(sb, alloc, alloc_low)) {
|
||||
scoutfs_inc_counter(sb, btree_merge_alloc_low);
|
||||
ret = -ERANGE;
|
||||
*next_ret = *mpos->key;
|
||||
*next_ret = mitem->key;
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
ret = btree_walk(sb, alloc, wri, root, walk_flags,
|
||||
mpos->key, walk_val_len, &bl, &kr, NULL);
|
||||
&mitem->key, walk_val_len, &bl, &kr, NULL);
|
||||
if (ret < 0) {
|
||||
if (ret == -ERANGE)
|
||||
*next_ret = *mpos->key;
|
||||
*next_ret = mitem->key;
|
||||
goto out;
|
||||
}
|
||||
bt = bl->data;
|
||||
@@ -2311,22 +2392,21 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
continue;
|
||||
}
|
||||
|
||||
while ((ret = next_resolved_mpos(sb, &pos_root, end, &mpos)) == 0 && mpos) {
|
||||
|
||||
while (mitem) {
|
||||
/* walk to new leaf if we exceed parent ref key */
|
||||
if (scoutfs_key_compare(mpos->key, &kr.end) > 0)
|
||||
if (scoutfs_key_compare(&mitem->key, &kr.end) > 0)
|
||||
break;
|
||||
|
||||
/* see if there's an existing item */
|
||||
item = leaf_item_hash_search(sb, bt, mpos->key);
|
||||
is_del = !!(mpos->flags & SCOUTFS_ITEM_FLAG_DELETION);
|
||||
item = leaf_item_hash_search(sb, bt, &mitem->key);
|
||||
is_del = !!(mitem->flags & SCOUTFS_ITEM_FLAG_DELETION);
|
||||
|
||||
/* see if we're merging delta items */
|
||||
if (item && !is_del)
|
||||
delta = scoutfs_forest_combine_deltas(mpos->key,
|
||||
delta = scoutfs_forest_combine_deltas(&mitem->key,
|
||||
item_val(bt, item),
|
||||
item_val_len(item),
|
||||
mpos->val, mpos->val_len);
|
||||
mitem->val, mitem->val_len);
|
||||
else
|
||||
delta = 0;
|
||||
if (delta < 0) {
|
||||
@@ -2338,40 +2418,38 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_null);
|
||||
}
|
||||
|
||||
trace_scoutfs_btree_merge_items(sb, mpos->root,
|
||||
mpos->key, mpos->val_len,
|
||||
trace_scoutfs_btree_merge_items(sb, &mitem->key, mitem->val_len,
|
||||
item ? root : NULL,
|
||||
item ? item_key(item) : NULL,
|
||||
item ? item_val_len(item) : 0, is_del);
|
||||
|
||||
/* rewalk and split if ins/update needs room */
|
||||
if (!is_del && !delta && !mid_free_item_room(bt, mpos->val_len)) {
|
||||
if (!is_del && !delta && !mid_free_item_room(bt, mitem->val_len)) {
|
||||
walk_flags |= BTW_INSERT;
|
||||
walk_val_len = mpos->val_len;
|
||||
walk_val_len = mitem->val_len;
|
||||
break;
|
||||
}
|
||||
|
||||
/* insert missing non-deletion merge items */
|
||||
if (!item && !is_del) {
|
||||
scoutfs_avl_search(&bt->item_root,
|
||||
cmp_key_item, mpos->key,
|
||||
scoutfs_avl_search(&bt->item_root, cmp_key_item, &mitem->key,
|
||||
&cmp, &par, NULL, NULL);
|
||||
create_item(bt, mpos->key, mpos->seq, mpos->flags,
|
||||
mpos->val, mpos->val_len, par, cmp);
|
||||
create_item(bt, &mitem->key, mitem->seq, mitem->flags,
|
||||
mitem->val, mitem->val_len, par, cmp);
|
||||
scoutfs_inc_counter(sb, btree_merge_insert);
|
||||
}
|
||||
|
||||
/* update existing items */
|
||||
if (item && !is_del && !delta) {
|
||||
item->seq = cpu_to_le64(mpos->seq);
|
||||
item->flags = mpos->flags;
|
||||
update_item_value(bt, item, mpos->val, mpos->val_len);
|
||||
item->seq = cpu_to_le64(mitem->seq);
|
||||
item->flags = mitem->flags;
|
||||
update_item_value(bt, item, mitem->val, mitem->val_len);
|
||||
scoutfs_inc_counter(sb, btree_merge_update);
|
||||
}
|
||||
|
||||
/* update combined delta item seq */
|
||||
if (delta == SCOUTFS_DELTA_COMBINED) {
|
||||
item->seq = cpu_to_le64(mpos->seq);
|
||||
item->seq = cpu_to_le64(mitem->seq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2403,21 +2481,18 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
walk_flags &= ~(BTW_INSERT | BTW_DELETE);
|
||||
walk_val_len = 0;
|
||||
|
||||
/* finished with this key, skip any older items */
|
||||
next = *mpos->key;
|
||||
scoutfs_key_inc(&next);
|
||||
ret = reset_mpos(sb, &pos_root, mpos, &next, end);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
/* finished with this merged item */
|
||||
tmp = mitem;
|
||||
mitem = next_mitem(mitem);
|
||||
free_mitem(&rng, tmp);
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
scoutfs_block_put(sb, bl);
|
||||
rbtree_postorder_for_each_entry_safe(mpos, tmp, &pos_root, node) {
|
||||
free_mpos(sb, mpos);
|
||||
}
|
||||
rbtree_postorder_for_each_entry_safe(mitem, tmp, &rng.root, node)
|
||||
free_mitem(&rng, mitem);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -119,7 +119,7 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
struct scoutfs_key *next_ret,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct list_head *input_list,
|
||||
bool subtree, int dirty_limit, int alloc_low);
|
||||
bool subtree, int dirty_limit, int alloc_low, int merge_window);
|
||||
|
||||
int scoutfs_btree_free_blocks(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
|
||||
@@ -30,6 +30,8 @@
|
||||
EXPAND_COUNTER(block_cache_free) \
|
||||
EXPAND_COUNTER(block_cache_free_work) \
|
||||
EXPAND_COUNTER(block_cache_remove_stale) \
|
||||
EXPAND_COUNTER(block_cache_count_objects) \
|
||||
EXPAND_COUNTER(block_cache_scan_objects) \
|
||||
EXPAND_COUNTER(block_cache_shrink) \
|
||||
EXPAND_COUNTER(block_cache_shrink_next) \
|
||||
EXPAND_COUNTER(block_cache_shrink_recent) \
|
||||
@@ -88,6 +90,8 @@
|
||||
EXPAND_COUNTER(forest_read_items) \
|
||||
EXPAND_COUNTER(forest_roots_next_hint) \
|
||||
EXPAND_COUNTER(forest_set_bloom_bits) \
|
||||
EXPAND_COUNTER(item_cache_count_objects) \
|
||||
EXPAND_COUNTER(item_cache_scan_objects) \
|
||||
EXPAND_COUNTER(item_clear_dirty) \
|
||||
EXPAND_COUNTER(item_create) \
|
||||
EXPAND_COUNTER(item_delete) \
|
||||
@@ -121,6 +125,7 @@
|
||||
EXPAND_COUNTER(item_update) \
|
||||
EXPAND_COUNTER(item_write_dirty) \
|
||||
EXPAND_COUNTER(lock_alloc) \
|
||||
EXPAND_COUNTER(lock_count_objects) \
|
||||
EXPAND_COUNTER(lock_free) \
|
||||
EXPAND_COUNTER(lock_grant_request) \
|
||||
EXPAND_COUNTER(lock_grant_response) \
|
||||
@@ -134,11 +139,13 @@
|
||||
EXPAND_COUNTER(lock_lock_error) \
|
||||
EXPAND_COUNTER(lock_nonblock_eagain) \
|
||||
EXPAND_COUNTER(lock_recover_request) \
|
||||
EXPAND_COUNTER(lock_scan_objects) \
|
||||
EXPAND_COUNTER(lock_shrink_attempted) \
|
||||
EXPAND_COUNTER(lock_shrink_aborted) \
|
||||
EXPAND_COUNTER(lock_shrink_work) \
|
||||
EXPAND_COUNTER(lock_unlock) \
|
||||
EXPAND_COUNTER(lock_wait) \
|
||||
EXPAND_COUNTER(log_merge_wait_timeout) \
|
||||
EXPAND_COUNTER(net_dropped_response) \
|
||||
EXPAND_COUNTER(net_send_bytes) \
|
||||
EXPAND_COUNTER(net_send_error) \
|
||||
@@ -192,10 +199,7 @@
|
||||
EXPAND_COUNTER(srch_read_stale) \
|
||||
EXPAND_COUNTER(statfs) \
|
||||
EXPAND_COUNTER(totl_read_copied) \
|
||||
EXPAND_COUNTER(totl_read_finalized) \
|
||||
EXPAND_COUNTER(totl_read_fs) \
|
||||
EXPAND_COUNTER(totl_read_item) \
|
||||
EXPAND_COUNTER(totl_read_logged) \
|
||||
EXPAND_COUNTER(trans_commit_data_alloc_low) \
|
||||
EXPAND_COUNTER(trans_commit_dirty_meta_full) \
|
||||
EXPAND_COUNTER(trans_commit_fsync) \
|
||||
@@ -232,12 +236,12 @@ struct scoutfs_counters {
|
||||
#define SCOUTFS_PCPU_COUNTER_BATCH (1 << 30)
|
||||
|
||||
#define scoutfs_inc_counter(sb, which) \
|
||||
__percpu_counter_add(&SCOUTFS_SB(sb)->counters->which, 1, \
|
||||
SCOUTFS_PCPU_COUNTER_BATCH)
|
||||
percpu_counter_add_batch(&SCOUTFS_SB(sb)->counters->which, 1, \
|
||||
SCOUTFS_PCPU_COUNTER_BATCH)
|
||||
|
||||
#define scoutfs_add_counter(sb, which, cnt) \
|
||||
__percpu_counter_add(&SCOUTFS_SB(sb)->counters->which, cnt, \
|
||||
SCOUTFS_PCPU_COUNTER_BATCH)
|
||||
percpu_counter_add_batch(&SCOUTFS_SB(sb)->counters->which, cnt, \
|
||||
SCOUTFS_PCPU_COUNTER_BATCH)
|
||||
|
||||
void __init scoutfs_init_counters(void);
|
||||
int scoutfs_setup_counters(struct super_block *sb);
|
||||
|
||||
@@ -307,7 +307,7 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
LIST_HEAD(ind_locks);
|
||||
s64 ret = 0;
|
||||
|
||||
WARN_ON_ONCE(inode && !mutex_is_locked(&inode->i_mutex));
|
||||
WARN_ON_ONCE(inode && !inode_is_locked(inode));
|
||||
|
||||
/* clamp last to the last possible block? */
|
||||
if (last > SCOUTFS_BLOCK_SM_MAX)
|
||||
@@ -558,7 +558,7 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock,
|
||||
u64 offset;
|
||||
int ret;
|
||||
|
||||
WARN_ON_ONCE(create && !mutex_is_locked(&inode->i_mutex));
|
||||
WARN_ON_ONCE(create && !inode_is_locked(inode));
|
||||
|
||||
/* make sure caller holds a cluster lock */
|
||||
lock = scoutfs_per_task_get(&si->pt_data_lock);
|
||||
@@ -704,7 +704,7 @@ static int scoutfs_readpage(struct file *file, struct page *page)
|
||||
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
|
||||
ret = scoutfs_data_wait_check(inode, page_offset(page),
|
||||
PAGE_CACHE_SIZE, SEF_OFFLINE,
|
||||
PAGE_SIZE, SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_READ, &dw,
|
||||
inode_lock);
|
||||
if (ret != 0) {
|
||||
@@ -729,6 +729,7 @@ static int scoutfs_readpage(struct file *file, struct page *page)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef KC_FILE_AOPS_READAHEAD
|
||||
/*
|
||||
* This is used for opportunistic read-ahead which can throw the pages
|
||||
* away if it needs to. If the caller didn't deal with offline extents
|
||||
@@ -754,14 +755,14 @@ static int scoutfs_readpages(struct file *file, struct address_space *mapping,
|
||||
|
||||
list_for_each_entry_safe(page, tmp, pages, lru) {
|
||||
ret = scoutfs_data_wait_check(inode, page_offset(page),
|
||||
PAGE_CACHE_SIZE, SEF_OFFLINE,
|
||||
PAGE_SIZE, SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_READ, NULL,
|
||||
inode_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (ret > 0) {
|
||||
list_del(&page->lru);
|
||||
page_cache_release(page);
|
||||
put_page(page);
|
||||
if (--nr_pages == 0) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
@@ -775,6 +776,29 @@ out:
|
||||
BUG_ON(!list_empty(pages));
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
static void scoutfs_readahead(struct readahead_control *rac)
|
||||
{
|
||||
struct inode *inode = rac->file->f_inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
|
||||
if (ret)
|
||||
return;
|
||||
|
||||
ret = scoutfs_data_wait_check(inode, readahead_pos(rac),
|
||||
readahead_length(rac), SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_READ, NULL,
|
||||
inode_lock);
|
||||
if (ret == 0)
|
||||
mpage_readahead(rac, scoutfs_get_block_read);
|
||||
|
||||
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int scoutfs_writepage(struct page *page, struct writeback_control *wbc)
|
||||
{
|
||||
@@ -1057,7 +1081,7 @@ long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
inode_lock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
@@ -1080,6 +1104,10 @@ long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
||||
|
||||
while(iblock <= last) {
|
||||
|
||||
ret = scoutfs_quota_check_data(sb, inode);
|
||||
if (ret)
|
||||
goto out_extent;
|
||||
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false, true);
|
||||
if (ret)
|
||||
goto out_extent;
|
||||
@@ -1118,7 +1146,7 @@ out_extent:
|
||||
up_write(&si->extent_sem);
|
||||
out_mutex:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
inode_unlock(inode);
|
||||
|
||||
out:
|
||||
trace_scoutfs_data_fallocate(sb, ino, mode, offset, len, ret);
|
||||
@@ -1221,7 +1249,7 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
struct data_ext_args from_args;
|
||||
struct data_ext_args to_args;
|
||||
struct scoutfs_extent ext;
|
||||
struct timespec cur_time;
|
||||
struct kc_timespec cur_time;
|
||||
LIST_HEAD(locks);
|
||||
bool done = false;
|
||||
loff_t from_size;
|
||||
@@ -1442,7 +1470,7 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
up_write(&from_si->extent_sem);
|
||||
up_write(&to_si->extent_sem);
|
||||
|
||||
cur_time = CURRENT_TIME;
|
||||
cur_time = current_time(from);
|
||||
if (!is_stage) {
|
||||
to->i_ctime = to->i_mtime = cur_time;
|
||||
inode_inc_iversion(to);
|
||||
@@ -1529,7 +1557,7 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
inode_lock(inode);
|
||||
down_read(&si->extent_sem);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
|
||||
@@ -1583,7 +1611,7 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
unlock:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
|
||||
up_read(&si->extent_sem);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
inode_unlock(inode);
|
||||
|
||||
out:
|
||||
if (ret == 1)
|
||||
@@ -1873,7 +1901,11 @@ int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock,
|
||||
|
||||
const struct address_space_operations scoutfs_file_aops = {
|
||||
.readpage = scoutfs_readpage,
|
||||
#ifndef KC_FILE_AOPS_READAHEAD
|
||||
.readpages = scoutfs_readpages,
|
||||
#else
|
||||
.readahead = scoutfs_readahead,
|
||||
#endif
|
||||
.writepage = scoutfs_writepage,
|
||||
.writepages = scoutfs_writepages,
|
||||
.write_begin = scoutfs_write_begin,
|
||||
@@ -1881,10 +1913,15 @@ const struct address_space_operations scoutfs_file_aops = {
|
||||
};
|
||||
|
||||
const struct file_operations scoutfs_file_fops = {
|
||||
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
|
||||
.read = do_sync_read,
|
||||
.write = do_sync_write,
|
||||
.aio_read = scoutfs_file_aio_read,
|
||||
.aio_write = scoutfs_file_aio_write,
|
||||
#else
|
||||
.read_iter = scoutfs_file_read_iter,
|
||||
.write_iter = scoutfs_file_write_iter,
|
||||
#endif
|
||||
.unlocked_ioctl = scoutfs_ioctl,
|
||||
.fsync = scoutfs_file_fsync,
|
||||
.llseek = scoutfs_file_llseek,
|
||||
|
||||
116
kmod/src/dir.c
116
kmod/src/dir.c
@@ -34,6 +34,7 @@
|
||||
#include "forest.h"
|
||||
#include "acl.h"
|
||||
#include "counters.h"
|
||||
#include "quota.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
@@ -272,7 +273,7 @@ static void set_dentry_fsdata(struct dentry *dentry, struct scoutfs_lock *lock)
|
||||
|
||||
static bool test_dentry_fsdata(struct dentry *dentry, u64 refresh)
|
||||
{
|
||||
u64 fsd = (unsigned long)ACCESS_ONCE(dentry->d_fsdata);
|
||||
u64 fsd = (unsigned long)READ_ONCE(dentry->d_fsdata);
|
||||
|
||||
return fsd == refresh;
|
||||
}
|
||||
@@ -651,6 +652,10 @@ static struct inode *lock_hold_create(struct inode *dir, struct dentry *dentry,
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = scoutfs_quota_check_inode(sb, dir);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
if (orph_lock) {
|
||||
ret = scoutfs_lock_orphan(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, ino, orph_lock);
|
||||
if (ret < 0)
|
||||
@@ -672,6 +677,8 @@ retry:
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
scoutfs_inode_set_proj(inode, scoutfs_inode_get_proj(dir));
|
||||
|
||||
ret = scoutfs_dirty_inode_item(dir, *dir_lock);
|
||||
out:
|
||||
if (ret)
|
||||
@@ -735,7 +742,7 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
set_dentry_fsdata(dentry, dir_lock);
|
||||
|
||||
i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
dir->i_mtime = dir->i_ctime = current_time(inode);
|
||||
inode->i_mtime = inode->i_atime = inode->i_ctime = dir->i_mtime;
|
||||
si->crtime = inode->i_mtime;
|
||||
inode_inc_iversion(dir);
|
||||
@@ -859,7 +866,7 @@ retry:
|
||||
set_dentry_fsdata(dentry, dir_lock);
|
||||
|
||||
i_size_write(dir, dir_size);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
dir->i_mtime = dir->i_ctime = current_time(inode);
|
||||
inode->i_ctime = dir->i_mtime;
|
||||
inc_nlink(inode);
|
||||
inode_inc_iversion(dir);
|
||||
@@ -900,7 +907,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
{
|
||||
struct super_block *sb = dir->i_sb;
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct timespec ts = current_kernel_time();
|
||||
struct kc_timespec ts = current_time(inode);
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
struct scoutfs_lock *orph_lock = NULL;
|
||||
struct scoutfs_lock *dir_lock = NULL;
|
||||
@@ -1059,14 +1066,14 @@ static int symlink_item_ops(struct super_block *sb, enum symlink_ops op, u64 ino
|
||||
}
|
||||
|
||||
/*
|
||||
* Full a buffer with the null terminated symlink, point nd at it, and
|
||||
* return it so put_link can free it once the vfs is done.
|
||||
* Fill a buffer with the null terminated symlink, and return it
|
||||
* so callers can free it once the vfs is done.
|
||||
*
|
||||
* We chose to pay the runtime cost of per-call allocation and copy
|
||||
* overhead instead of wiring up symlinks to the page cache, storing
|
||||
* each small link in a full page, and later having to reclaim them.
|
||||
*/
|
||||
static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
static void *scoutfs_get_link_target(struct dentry *dentry)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
@@ -1125,32 +1132,41 @@ out:
|
||||
if (ret < 0) {
|
||||
kfree(path);
|
||||
path = ERR_PTR(ret);
|
||||
} else {
|
||||
nd_set_link(nd, path);
|
||||
}
|
||||
|
||||
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
|
||||
return path;
|
||||
}
|
||||
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
|
||||
{
|
||||
char *path;
|
||||
|
||||
path = scoutfs_get_link_target(dentry);
|
||||
if (!IS_ERR_OR_NULL(path))
|
||||
nd_set_link(nd, path);
|
||||
return path;
|
||||
}
|
||||
|
||||
static void scoutfs_put_link(struct dentry *dentry, struct nameidata *nd,
|
||||
void *cookie)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(cookie))
|
||||
kfree(cookie);
|
||||
}
|
||||
#else
|
||||
static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
|
||||
{
|
||||
char *path;
|
||||
|
||||
const struct inode_operations scoutfs_symlink_iops = {
|
||||
.readlink = generic_readlink,
|
||||
.follow_link = scoutfs_follow_link,
|
||||
.put_link = scoutfs_put_link,
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
};
|
||||
path = scoutfs_get_link_target(dentry);
|
||||
if (!IS_ERR_OR_NULL(path))
|
||||
set_delayed_call(done, kfree_link, path);
|
||||
|
||||
return path;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Symlink target paths can be annoyingly large. We store relatively
|
||||
@@ -1204,7 +1220,7 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
set_dentry_fsdata(dentry, dir_lock);
|
||||
|
||||
i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
dir->i_mtime = dir->i_ctime = current_time(inode);
|
||||
inode_inc_iversion(dir);
|
||||
|
||||
inode->i_ctime = dir->i_mtime;
|
||||
@@ -1558,7 +1574,7 @@ static int scoutfs_rename_common(struct inode *old_dir,
|
||||
struct scoutfs_lock *orph_lock = NULL;
|
||||
struct scoutfs_dirent new_dent;
|
||||
struct scoutfs_dirent old_dent;
|
||||
struct timespec now;
|
||||
struct kc_timespec now;
|
||||
bool ins_new = false;
|
||||
bool del_new = false;
|
||||
bool ins_old = false;
|
||||
@@ -1724,7 +1740,7 @@ retry:
|
||||
inc_nlink(new_dir);
|
||||
}
|
||||
|
||||
now = CURRENT_TIME;
|
||||
now = current_time(old_inode);
|
||||
old_dir->i_ctime = now;
|
||||
old_dir->i_mtime = now;
|
||||
if (new_dir != old_dir) {
|
||||
@@ -1811,12 +1827,14 @@ out_unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
static int scoutfs_rename(struct inode *old_dir,
|
||||
struct dentry *old_dentry, struct inode *new_dir,
|
||||
struct dentry *new_dentry)
|
||||
{
|
||||
return scoutfs_rename_common(old_dir, old_dentry, new_dir, new_dentry, 0);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int scoutfs_rename2(struct inode *old_dir,
|
||||
struct dentry *old_dentry, struct inode *new_dir,
|
||||
@@ -1861,7 +1879,7 @@ static int scoutfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mod
|
||||
if (ret < 0)
|
||||
goto out; /* XXX returning error but items created */
|
||||
|
||||
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
|
||||
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
|
||||
si->crtime = inode->i_mtime;
|
||||
insert_inode_hash(inode);
|
||||
ihold(inode); /* need to update inode modifications in d_tmpfile */
|
||||
@@ -1886,6 +1904,37 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
const struct inode_operations scoutfs_symlink_iops = {
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.readlink = generic_readlink,
|
||||
.follow_link = scoutfs_follow_link,
|
||||
.put_link = scoutfs_put_link,
|
||||
#else
|
||||
.get_link = scoutfs_get_link,
|
||||
#endif
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.get_acl = scoutfs_get_acl,
|
||||
#ifndef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.tmpfile = scoutfs_tmpfile,
|
||||
.rename = scoutfs_rename_common,
|
||||
.symlink = scoutfs_symlink,
|
||||
.unlink = scoutfs_unlink,
|
||||
.link = scoutfs_link,
|
||||
.mkdir = scoutfs_mkdir,
|
||||
.create = scoutfs_create,
|
||||
.lookup = scoutfs_lookup,
|
||||
#endif
|
||||
};
|
||||
|
||||
const struct file_operations scoutfs_dir_fops = {
|
||||
.KC_FOP_READDIR = scoutfs_readdir,
|
||||
#ifdef KC_FMODE_KABI_ITERATE
|
||||
@@ -1897,9 +1946,12 @@ const struct file_operations scoutfs_dir_fops = {
|
||||
};
|
||||
|
||||
|
||||
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
const struct inode_operations_wrapper scoutfs_dir_iops = {
|
||||
.ops = {
|
||||
#else
|
||||
const struct inode_operations scoutfs_dir_iops = {
|
||||
#endif
|
||||
.lookup = scoutfs_lookup,
|
||||
.mknod = scoutfs_mknod,
|
||||
.create = scoutfs_create,
|
||||
@@ -1907,17 +1959,25 @@ const struct inode_operations_wrapper scoutfs_dir_iops = {
|
||||
.link = scoutfs_link,
|
||||
.unlink = scoutfs_unlink,
|
||||
.rmdir = scoutfs_unlink,
|
||||
.rename = scoutfs_rename,
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.rename = scoutfs_rename,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
.symlink = scoutfs_symlink,
|
||||
.permission = scoutfs_permission,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
},
|
||||
#endif
|
||||
.tmpfile = scoutfs_tmpfile,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.rename2 = scoutfs_rename2,
|
||||
#else
|
||||
.rename = scoutfs_rename2,
|
||||
#endif
|
||||
};
|
||||
|
||||
@@ -5,7 +5,11 @@
|
||||
#include "lock.h"
|
||||
|
||||
extern const struct file_operations scoutfs_dir_fops;
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
extern const struct inode_operations_wrapper scoutfs_dir_iops;
|
||||
#else
|
||||
extern const struct inode_operations scoutfs_dir_iops;
|
||||
#endif
|
||||
extern const struct inode_operations scoutfs_symlink_iops;
|
||||
|
||||
extern const struct dentry_operations scoutfs_dentry_ops;
|
||||
|
||||
142
kmod/src/file.c
142
kmod/src/file.c
@@ -28,7 +28,9 @@
|
||||
#include "inode.h"
|
||||
#include "per_task.h"
|
||||
#include "omap.h"
|
||||
#include "quota.h"
|
||||
|
||||
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
|
||||
/*
|
||||
* Start a high level file read. We check for offline extents in the
|
||||
* read region here so that we only check the extents once. We use the
|
||||
@@ -42,27 +44,27 @@ ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
struct scoutfs_lock *scoutfs_inode_lock = NULL;
|
||||
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
|
||||
DECLARE_DATA_WAIT(dw);
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
/* protect checked extents from release */
|
||||
mutex_lock(&inode->i_mutex);
|
||||
inode_lock(inode);
|
||||
atomic_inc(&inode->i_dio_count);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
inode_unlock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
|
||||
ret = scoutfs_data_wait_check_iov(inode, iov, nr_segs, pos,
|
||||
SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_READ,
|
||||
&dw, inode_lock);
|
||||
&dw, scoutfs_inode_lock);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
} else {
|
||||
@@ -74,7 +76,7 @@ retry:
|
||||
out:
|
||||
inode_dio_done(inode);
|
||||
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
|
||||
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
|
||||
scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_READ);
|
||||
|
||||
if (scoutfs_data_wait_found(&dw)) {
|
||||
ret = scoutfs_data_wait(inode, &dw);
|
||||
@@ -92,7 +94,7 @@ ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
struct scoutfs_lock *scoutfs_inode_lock = NULL;
|
||||
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
|
||||
DECLARE_DATA_WAIT(dw);
|
||||
int ret;
|
||||
@@ -101,34 +103,38 @@ ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
return 0;
|
||||
|
||||
retry:
|
||||
mutex_lock(&inode->i_mutex);
|
||||
inode_lock(inode);
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_complete_truncate(inode, inode_lock);
|
||||
ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, inode_lock)) {
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
|
||||
/* data_version is per inode, whole file must be online */
|
||||
ret = scoutfs_data_wait_check(inode, 0, i_size_read(inode),
|
||||
SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_WRITE,
|
||||
&dw, inode_lock);
|
||||
&dw, scoutfs_inode_lock);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_quota_check_data(sb, inode);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* XXX: remove SUID bit */
|
||||
|
||||
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
|
||||
|
||||
out:
|
||||
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
|
||||
scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_WRITE);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_WRITE);
|
||||
inode_unlock(inode);
|
||||
|
||||
if (scoutfs_data_wait_found(&dw)) {
|
||||
ret = scoutfs_data_wait(inode, &dw);
|
||||
@@ -146,6 +152,112 @@ out:
|
||||
|
||||
return ret;
|
||||
}
|
||||
#else
|
||||
ssize_t scoutfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *scoutfs_inode_lock = NULL;
|
||||
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
|
||||
DECLARE_DATA_WAIT(dw);
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
/* protect checked extents from release */
|
||||
inode_lock(inode);
|
||||
atomic_inc(&inode->i_dio_count);
|
||||
inode_unlock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
|
||||
ret = scoutfs_data_wait_check(inode, iocb->ki_pos, iov_iter_count(to), SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_READ, &dw, scoutfs_inode_lock);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
} else {
|
||||
WARN_ON_ONCE(true);
|
||||
}
|
||||
|
||||
ret = generic_file_read_iter(iocb, to);
|
||||
|
||||
out:
|
||||
inode_dio_end(inode);
|
||||
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
|
||||
scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_READ);
|
||||
|
||||
if (scoutfs_data_wait_found(&dw)) {
|
||||
ret = scoutfs_data_wait(inode, &dw);
|
||||
if (ret == 0)
|
||||
goto retry;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
ssize_t scoutfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *scoutfs_inode_lock = NULL;
|
||||
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
|
||||
DECLARE_DATA_WAIT(dw);
|
||||
ssize_t ret;
|
||||
|
||||
retry:
|
||||
inode_lock(inode);
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = generic_write_checks(iocb, from);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_quota_check_data(sb, inode);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
|
||||
/* data_version is per inode, whole file must be online */
|
||||
ret = scoutfs_data_wait_check(inode, 0, i_size_read(inode), SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_WRITE, &dw, scoutfs_inode_lock);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* XXX: remove SUID bit */
|
||||
|
||||
ret = __generic_file_write_iter(iocb, from);
|
||||
|
||||
out:
|
||||
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
|
||||
scoutfs_unlock(sb, scoutfs_inode_lock, SCOUTFS_LOCK_WRITE);
|
||||
inode_unlock(inode);
|
||||
|
||||
if (scoutfs_data_wait_found(&dw)) {
|
||||
ret = scoutfs_data_wait(inode, &dw);
|
||||
if (ret == 0)
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (ret > 0)
|
||||
ret = generic_write_sync(iocb, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
int scoutfs_permission(struct inode *inode, int mask)
|
||||
{
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
#ifndef _SCOUTFS_FILE_H_
|
||||
#define _SCOUTFS_FILE_H_
|
||||
|
||||
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
|
||||
ssize_t scoutfs_file_aio_read(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos);
|
||||
ssize_t scoutfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos);
|
||||
#else
|
||||
ssize_t scoutfs_file_read_iter(struct kiocb *, struct iov_iter *);
|
||||
ssize_t scoutfs_file_write_iter(struct kiocb *, struct iov_iter *);
|
||||
#endif
|
||||
int scoutfs_permission(struct inode *inode, int mask);
|
||||
loff_t scoutfs_file_llseek(struct file *file, loff_t offset, int whence);
|
||||
|
||||
|
||||
@@ -238,19 +238,16 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
|
||||
* We return -ESTALE if we hit stale blocks to give the caller a chance
|
||||
* to reset their state and retry with a newer version of the btrees.
|
||||
*/
|
||||
int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg)
|
||||
int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
|
||||
struct scoutfs_key *key, struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg)
|
||||
{
|
||||
struct forest_read_items_data rid = {
|
||||
.cb = cb,
|
||||
.cb_arg = arg,
|
||||
};
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_net_roots roots;
|
||||
struct scoutfs_bloom_block *bb;
|
||||
struct forest_bloom_nrs bloom;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
@@ -264,18 +261,14 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
scoutfs_inc_counter(sb, forest_read_items);
|
||||
calc_bloom_nrs(&bloom, bloom_key);
|
||||
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
trace_scoutfs_forest_using_roots(sb, &roots.fs_root, &roots.logs_root);
|
||||
trace_scoutfs_forest_using_roots(sb, &roots->fs_root, &roots->logs_root);
|
||||
|
||||
*start = orig_start;
|
||||
*end = orig_end;
|
||||
|
||||
/* start with fs root items */
|
||||
rid.fic |= FIC_FS_ROOT;
|
||||
ret = scoutfs_btree_read_items(sb, &roots.fs_root, key, start, end,
|
||||
ret = scoutfs_btree_read_items(sb, &roots->fs_root, key, start, end,
|
||||
forest_read_items, &rid);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@@ -283,7 +276,7 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
|
||||
scoutfs_key_init_log_trees(<k, 0, 0);
|
||||
for (;; scoutfs_key_inc(<k)) {
|
||||
ret = scoutfs_btree_next(sb, &roots.logs_root, <k, &iref);
|
||||
ret = scoutfs_btree_next(sb, &roots->logs_root, <k, &iref);
|
||||
if (ret == 0) {
|
||||
if (iref.val_len == sizeof(lt)) {
|
||||
ltk = *iref.key;
|
||||
@@ -340,6 +333,23 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg)
|
||||
{
|
||||
struct scoutfs_net_roots roots;
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret == 0)
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, key, bloom_key, start, end,
|
||||
cb, arg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the items are deltas then combine the src with the destination
|
||||
* value and store the result in the destination.
|
||||
@@ -721,7 +731,8 @@ static void scoutfs_forest_log_merge_worker(struct work_struct *work)
|
||||
ret = scoutfs_btree_merge(sb, &alloc, &wri, &req.start, &req.end,
|
||||
&next, &comp.root, &inputs,
|
||||
!!(req.flags & cpu_to_le64(SCOUTFS_LOG_MERGE_REQUEST_SUBTREE)),
|
||||
SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT, 10);
|
||||
SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT, 10,
|
||||
(2 * 1024 * 1024));
|
||||
if (ret == -ERANGE) {
|
||||
comp.remain = next;
|
||||
le64_add_cpu(&comp.flags, SCOUTFS_LOG_MERGE_COMP_REMAIN);
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
struct scoutfs_alloc;
|
||||
struct scoutfs_block_writer;
|
||||
struct scoutfs_block;
|
||||
struct scoutfs_lock;
|
||||
|
||||
#include "btree.h"
|
||||
|
||||
@@ -23,6 +24,10 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
|
||||
struct scoutfs_key *key, struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_set_bloom_bits(struct super_block *sb,
|
||||
struct scoutfs_lock *lock);
|
||||
void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq);
|
||||
|
||||
@@ -5,11 +5,16 @@
|
||||
* The format version defines the format of structures on devices,
|
||||
* structures that are communicated over the wire, and the protocol
|
||||
* behind the structures.
|
||||
*
|
||||
* Builds can have unique pre-release formats that are incompatible with
|
||||
* every other build. This lets people experiment with formats without
|
||||
* accidentally corrupting data with release builds.
|
||||
*/
|
||||
#define SCOUTFS_FORMAT_VERSION_MIN 1
|
||||
#define SCOUTFS_FORMAT_VERSION_MIN 0x8cf3b46619eb9975ULL
|
||||
#define SCOUTFS_FORMAT_VERSION_MIN_STR __stringify(SCOUTFS_FORMAT_VERSION_MIN)
|
||||
#define SCOUTFS_FORMAT_VERSION_MAX 1
|
||||
#define SCOUTFS_FORMAT_VERSION_MAX 0x8cf3b46619eb9975ULL
|
||||
#define SCOUTFS_FORMAT_VERSION_MAX_STR __stringify(SCOUTFS_FORMAT_VERSION_MAX)
|
||||
#define SCOUTFS_FORMAT_VER_PREREL 0x8000000000000000ULL
|
||||
|
||||
/* statfs(2) f_type */
|
||||
#define SCOUTFS_SUPER_MAGIC 0x554f4353 /* "SCOU" */
|
||||
@@ -175,11 +180,20 @@ struct scoutfs_key {
|
||||
#define sko_rid _sk_first
|
||||
#define sko_ino _sk_second
|
||||
|
||||
/* quota rules */
|
||||
#define skqr_hash _sk_second
|
||||
#define skqr_coll_nr _sk_third
|
||||
|
||||
/* xattr totl */
|
||||
#define skxt_a _sk_first
|
||||
#define skxt_b _sk_second
|
||||
#define skxt_c _sk_third
|
||||
|
||||
/* xattr index */
|
||||
#define skxi_a _sk_first
|
||||
#define skxi_b _sk_second
|
||||
#define skxi_ino _sk_third
|
||||
|
||||
/* inode */
|
||||
#define ski_ino _sk_first
|
||||
|
||||
@@ -585,7 +599,9 @@ struct scoutfs_log_merge_freeing {
|
||||
*/
|
||||
#define SCOUTFS_INODE_INDEX_ZONE 4
|
||||
#define SCOUTFS_ORPHAN_ZONE 8
|
||||
#define SCOUTFS_QUOTA_ZONE 10
|
||||
#define SCOUTFS_XATTR_TOTL_ZONE 12
|
||||
#define SCOUTFS_XATTR_INDX_ZONE 14
|
||||
#define SCOUTFS_FS_ZONE 16
|
||||
#define SCOUTFS_LOCK_ZONE 20
|
||||
/* Items only stored in server btrees */
|
||||
@@ -608,6 +624,9 @@ struct scoutfs_log_merge_freeing {
|
||||
/* orphan zone, redundant type used for clarity */
|
||||
#define SCOUTFS_ORPHAN_TYPE 4
|
||||
|
||||
/* quota zone */
|
||||
#define SCOUTFS_QUOTA_RULE_TYPE 4
|
||||
|
||||
/* fs zone */
|
||||
#define SCOUTFS_INODE_TYPE 4
|
||||
#define SCOUTFS_XATTR_TYPE 8
|
||||
@@ -661,6 +680,34 @@ struct scoutfs_xattr_totl_val {
|
||||
__le64 count;
|
||||
};
|
||||
|
||||
#define SQ_RF_TOTL_COUNT (1 << 0)
|
||||
#define SQ_RF__UNKNOWN (~((1 << 1) - 1))
|
||||
|
||||
#define SQ_NS_LITERAL 0
|
||||
#define SQ_NS_PROJ 1
|
||||
#define SQ_NS_UID 2
|
||||
#define SQ_NS_GID 3
|
||||
#define SQ_NS__NR 4
|
||||
#define SQ_NS__NR_SELECT (SQ_NS__NR - 1) /* !literal */
|
||||
|
||||
#define SQ_NF_SELECT (1 << 0)
|
||||
#define SQ_NF__UNKNOWN (~((1 << 1) - 1))
|
||||
|
||||
#define SQ_OP_INODE 0
|
||||
#define SQ_OP_DATA 1
|
||||
#define SQ_OP__NR 2
|
||||
|
||||
struct scoutfs_quota_rule_val {
|
||||
__le64 name_val[3];
|
||||
__le64 limit;
|
||||
__u8 prio;
|
||||
__u8 op;
|
||||
__u8 rule_flags;
|
||||
__u8 name_source[3];
|
||||
__u8 name_flags[3];
|
||||
__u8 _pad[7];
|
||||
};
|
||||
|
||||
/* XXX does this exist upstream somewhere? */
|
||||
#define member_sizeof(TYPE, MEMBER) (sizeof(((TYPE *)0)->MEMBER))
|
||||
|
||||
@@ -849,6 +896,7 @@ struct scoutfs_inode {
|
||||
__le64 next_readdir_pos;
|
||||
__le64 next_xattr_id;
|
||||
__le64 version;
|
||||
__le64 proj;
|
||||
__le32 nlink;
|
||||
__le32 uid;
|
||||
__le32 gid;
|
||||
|
||||
@@ -143,10 +143,12 @@ void scoutfs_destroy_inode(struct inode *inode)
|
||||
static const struct inode_operations scoutfs_file_iops = {
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
.fiemap = scoutfs_data_fiemap,
|
||||
};
|
||||
@@ -154,10 +156,12 @@ static const struct inode_operations scoutfs_file_iops = {
|
||||
static const struct inode_operations scoutfs_special_iops = {
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
#endif
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
};
|
||||
|
||||
@@ -174,8 +178,12 @@ static void set_inode_ops(struct inode *inode)
|
||||
inode->i_fop = &scoutfs_file_fops;
|
||||
break;
|
||||
case S_IFDIR:
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
inode->i_op = &scoutfs_dir_iops.ops;
|
||||
inode->i_flags |= S_IOPS_WRAPPER;
|
||||
#else
|
||||
inode->i_op = &scoutfs_dir_iops;
|
||||
#endif
|
||||
inode->i_fop = &scoutfs_dir_fops;
|
||||
break;
|
||||
case S_IFLNK:
|
||||
@@ -247,7 +255,7 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
i_size_write(inode, le64_to_cpu(cinode->size));
|
||||
inode->i_version = le64_to_cpu(cinode->version);
|
||||
inode_set_iversion_queried(inode, le64_to_cpu(cinode->version));
|
||||
set_nlink(inode, le32_to_cpu(cinode->nlink));
|
||||
i_uid_write(inode, le32_to_cpu(cinode->uid));
|
||||
i_gid_write(inode, le32_to_cpu(cinode->gid));
|
||||
@@ -267,6 +275,7 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
si->offline_blocks = le64_to_cpu(cinode->offline_blocks);
|
||||
si->next_readdir_pos = le64_to_cpu(cinode->next_readdir_pos);
|
||||
si->next_xattr_id = le64_to_cpu(cinode->next_xattr_id);
|
||||
si->proj = le64_to_cpu(cinode->proj);
|
||||
si->flags = le32_to_cpu(cinode->flags);
|
||||
si->crtime.tv_sec = le64_to_cpu(cinode->crtime.sec);
|
||||
si->crtime.tv_nsec = le32_to_cpu(cinode->crtime.nsec);
|
||||
@@ -340,10 +349,17 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock)
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
#else
|
||||
int scoutfs_getattr(const struct path *path, struct kstat *stat,
|
||||
u32 request_mask, unsigned int query_flags)
|
||||
{
|
||||
struct inode *inode = d_inode(path->dentry);
|
||||
#endif
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
int ret;
|
||||
@@ -384,7 +400,7 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
|
||||
scoutfs_inode_inc_data_version(inode);
|
||||
|
||||
truncate_setsize(inode, new_size);
|
||||
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
|
||||
inode->i_ctime = inode->i_mtime = current_time(inode);
|
||||
if (truncate)
|
||||
si->flags |= SCOUTFS_INO_FLAG_TRUNCATE;
|
||||
scoutfs_inode_set_data_seq(inode);
|
||||
@@ -467,8 +483,7 @@ retry:
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = inode_change_ok(inode, attr);
|
||||
ret = setattr_prepare(dentry, attr);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -496,9 +511,9 @@ retry:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
|
||||
/* XXX callee locks instead? */
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
inode_unlock(inode);
|
||||
ret = scoutfs_data_wait(inode, &dw);
|
||||
mutex_lock(&inode->i_mutex);
|
||||
inode_lock(inode);
|
||||
|
||||
if (ret == 0)
|
||||
goto retry;
|
||||
@@ -680,6 +695,31 @@ void scoutfs_inode_get_onoff(struct inode *inode, s64 *on, s64 *off)
|
||||
} while (read_seqcount_retry(&si->seqcount, seq));
|
||||
}
|
||||
|
||||
u64 scoutfs_inode_get_proj(struct inode *inode)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
unsigned int seq;
|
||||
u64 proj;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&si->seqcount);
|
||||
proj = si->proj;
|
||||
} while (read_seqcount_retry(&si->seqcount, seq));
|
||||
|
||||
return proj;
|
||||
}
|
||||
|
||||
void scoutfs_inode_set_proj(struct inode *inode, u64 proj)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&si->seqcount);
|
||||
si->proj = proj;
|
||||
write_seqcount_end(&si->seqcount);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
static int scoutfs_iget_test(struct inode *inode, void *arg)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
@@ -750,7 +790,7 @@ struct inode *scoutfs_iget(struct super_block *sb, u64 ino, int lkf, int igf)
|
||||
/* XXX ensure refresh, instead clear in drop_inode? */
|
||||
si = SCOUTFS_I(inode);
|
||||
atomic64_set(&si->last_refreshed, 0);
|
||||
inode->i_version = 0;
|
||||
inode_set_iversion_queried(inode, 0);
|
||||
}
|
||||
|
||||
ret = scoutfs_inode_refresh(inode, lock);
|
||||
@@ -798,7 +838,7 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
scoutfs_inode_get_onoff(inode, &online_blocks, &offline_blocks);
|
||||
|
||||
cinode->size = cpu_to_le64(i_size_read(inode));
|
||||
cinode->version = cpu_to_le64(inode->i_version);
|
||||
cinode->version = cpu_to_le64(inode_peek_iversion(inode));
|
||||
cinode->nlink = cpu_to_le32(inode->i_nlink);
|
||||
cinode->uid = cpu_to_le32(i_uid_read(inode));
|
||||
cinode->gid = cpu_to_le32(i_gid_read(inode));
|
||||
@@ -821,6 +861,7 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
cinode->offline_blocks = cpu_to_le64(offline_blocks);
|
||||
cinode->next_readdir_pos = cpu_to_le64(si->next_readdir_pos);
|
||||
cinode->next_xattr_id = cpu_to_le64(si->next_xattr_id);
|
||||
cinode->proj = cpu_to_le64(si->proj);
|
||||
cinode->flags = cpu_to_le32(si->flags);
|
||||
cinode->crtime.sec = cpu_to_le64(si->crtime.tv_sec);
|
||||
cinode->crtime.nsec = cpu_to_le32(si->crtime.tv_nsec);
|
||||
@@ -1464,6 +1505,7 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
|
||||
si->offline_blocks = 0;
|
||||
si->next_readdir_pos = SCOUTFS_DIRENT_FIRST_POS;
|
||||
si->next_xattr_id = 0;
|
||||
si->proj = 0;
|
||||
si->have_item = false;
|
||||
atomic64_set(&si->last_refreshed, lock->refresh_gen);
|
||||
scoutfs_lock_add_coverage(sb, lock, &si->ino_lock_cov);
|
||||
@@ -1475,7 +1517,7 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
|
||||
inode->i_ino = ino; /* XXX overflow */
|
||||
inode_init_owner(inode, dir, mode);
|
||||
inode_set_bytes(inode, 0);
|
||||
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
|
||||
inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
|
||||
inode->i_rdev = rdev;
|
||||
set_inode_ops(inode);
|
||||
|
||||
|
||||
@@ -21,8 +21,9 @@ struct scoutfs_inode_info {
|
||||
u64 data_version;
|
||||
u64 online_blocks;
|
||||
u64 offline_blocks;
|
||||
u64 proj;
|
||||
u32 flags;
|
||||
struct timespec crtime;
|
||||
struct kc_timespec crtime;
|
||||
|
||||
/*
|
||||
* Protects per-inode extent items, most particularly readers
|
||||
@@ -120,11 +121,19 @@ u64 scoutfs_inode_meta_seq(struct inode *inode);
|
||||
u64 scoutfs_inode_data_seq(struct inode *inode);
|
||||
u64 scoutfs_inode_data_version(struct inode *inode);
|
||||
void scoutfs_inode_get_onoff(struct inode *inode, s64 *on, s64 *off);
|
||||
u64 scoutfs_inode_get_proj(struct inode *inode);
|
||||
void scoutfs_inode_set_proj(struct inode *inode, u64 proj);
|
||||
|
||||
int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock);
|
||||
|
||||
int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock);
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat);
|
||||
#else
|
||||
int scoutfs_getattr(const struct path *path, struct kstat *stat,
|
||||
u32 request_mask, unsigned int query_flags);
|
||||
#endif
|
||||
int scoutfs_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
|
||||
int scoutfs_inode_orphan_create(struct super_block *sb, u64 ino, struct scoutfs_lock *lock,
|
||||
|
||||
562
kmod/src/ioctl.c
562
kmod/src/ioctl.c
@@ -22,6 +22,7 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/aio.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/backing-dev.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "key.h"
|
||||
@@ -41,6 +42,9 @@
|
||||
#include "alloc.h"
|
||||
#include "server.h"
|
||||
#include "counters.h"
|
||||
#include "totl.h"
|
||||
#include "wkic.h"
|
||||
#include "quota.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
@@ -302,7 +306,7 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
inode_lock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
@@ -351,7 +355,7 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)
|
||||
|
||||
out:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
inode_unlock(inode);
|
||||
mnt_drop_write_file(file);
|
||||
|
||||
trace_scoutfs_ioc_release_ret(sb, scoutfs_ino(inode), ret);
|
||||
@@ -393,7 +397,7 @@ static long scoutfs_ioc_data_wait_err(struct file *file, unsigned long arg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
inode_lock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
@@ -411,7 +415,7 @@ static long scoutfs_ioc_data_wait_err(struct file *file, unsigned long arg)
|
||||
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
|
||||
unlock:
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
inode_unlock(inode);
|
||||
iput(inode);
|
||||
out:
|
||||
return ret;
|
||||
@@ -448,7 +452,6 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct address_space *mapping = inode->i_mapping;
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
|
||||
struct scoutfs_ioctl_stage args;
|
||||
@@ -480,8 +483,10 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
/* the iocb is really only used for the file pointer :P */
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
kiocb.ki_pos = args.offset;
|
||||
#ifdef KC_LINUX_AIO_KI_LEFT
|
||||
kiocb.ki_left = args.length;
|
||||
kiocb.ki_nbytes = args.length;
|
||||
#endif
|
||||
iov.iov_base = (void __user *)(unsigned long)args.buf_ptr;
|
||||
iov.iov_len = args.length;
|
||||
|
||||
@@ -489,7 +494,7 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
inode_lock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
@@ -516,7 +521,7 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
}
|
||||
|
||||
si->staging = true;
|
||||
current->backing_dev_info = mapping->backing_dev_info;
|
||||
current->backing_dev_info = inode_to_bdi(inode);
|
||||
|
||||
pos = args.offset;
|
||||
written = 0;
|
||||
@@ -533,7 +538,7 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
out:
|
||||
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
inode_unlock(inode);
|
||||
mnt_drop_write_file(file);
|
||||
|
||||
trace_scoutfs_ioc_stage_ret(sb, scoutfs_ino(inode), ret);
|
||||
@@ -652,7 +657,7 @@ static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
mutex_lock(&inode->i_mutex);
|
||||
inode_lock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
@@ -696,7 +701,7 @@ static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
|
||||
unlock:
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
mutex_unlock(&inode->i_mutex);
|
||||
inode_unlock(inode);
|
||||
mnt_drop_write_file(file);
|
||||
out:
|
||||
|
||||
@@ -1033,124 +1038,32 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct xattr_total_entry {
|
||||
struct rb_node node;
|
||||
struct scoutfs_ioctl_xattr_total xt;
|
||||
u64 fs_seq;
|
||||
u64 fs_total;
|
||||
u64 fs_count;
|
||||
u64 fin_seq;
|
||||
u64 fin_total;
|
||||
s64 fin_count;
|
||||
u64 log_seq;
|
||||
u64 log_total;
|
||||
s64 log_count;
|
||||
struct read_xattr_total_iter_cb_args {
|
||||
struct scoutfs_ioctl_xattr_total *xt;
|
||||
unsigned int copied;
|
||||
unsigned int total;
|
||||
};
|
||||
|
||||
static int cmp_xt_entry_name(const struct xattr_total_entry *a,
|
||||
const struct xattr_total_entry *b)
|
||||
|
||||
{
|
||||
return scoutfs_cmp_u64s(a->xt.name[0], b->xt.name[0]) ?:
|
||||
scoutfs_cmp_u64s(a->xt.name[1], b->xt.name[1]) ?:
|
||||
scoutfs_cmp_u64s(a->xt.name[2], b->xt.name[2]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record the contribution of the three classes of logged items we can
|
||||
* see: the item in the fs_root, items from finalized log btrees, and
|
||||
* items from active log btrees. Once we have the full set the caller
|
||||
* can decide which of the items contribute to the total it sends to the
|
||||
* user.
|
||||
* This is called under an RCU read lock so it can't copy to userspace.
|
||||
*/
|
||||
static int read_xattr_total_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
u64 seq, u8 flags, void *val, int val_len, int fic, void *arg)
|
||||
static int read_xattr_total_iter_cb(struct scoutfs_key *key, void *val, unsigned int val_len,
|
||||
void *cb_arg)
|
||||
{
|
||||
struct read_xattr_total_iter_cb_args *cba = cb_arg;
|
||||
struct scoutfs_xattr_totl_val *tval = val;
|
||||
struct xattr_total_entry *ent;
|
||||
struct xattr_total_entry rd;
|
||||
struct rb_root *root = arg;
|
||||
struct rb_node *parent;
|
||||
struct rb_node **node;
|
||||
int cmp;
|
||||
struct scoutfs_ioctl_xattr_total *xt = &cba->xt[cba->copied];
|
||||
|
||||
rd.xt.name[0] = le64_to_cpu(key->skxt_a);
|
||||
rd.xt.name[1] = le64_to_cpu(key->skxt_b);
|
||||
rd.xt.name[2] = le64_to_cpu(key->skxt_c);
|
||||
xt->name[0] = le64_to_cpu(key->skxt_a);
|
||||
xt->name[1] = le64_to_cpu(key->skxt_b);
|
||||
xt->name[2] = le64_to_cpu(key->skxt_c);
|
||||
xt->total = le64_to_cpu(tval->total);
|
||||
xt->count = le64_to_cpu(tval->count);
|
||||
|
||||
/* find entry matching name */
|
||||
node = &root->rb_node;
|
||||
parent = NULL;
|
||||
cmp = -1;
|
||||
while (*node) {
|
||||
parent = *node;
|
||||
ent = container_of(*node, struct xattr_total_entry, node);
|
||||
|
||||
/* sort merge items by key then newest to oldest */
|
||||
cmp = cmp_xt_entry_name(&rd, ent);
|
||||
if (cmp < 0)
|
||||
node = &(*node)->rb_left;
|
||||
else if (cmp > 0)
|
||||
node = &(*node)->rb_right;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
/* allocate and insert new node if we need to */
|
||||
if (cmp != 0) {
|
||||
ent = kzalloc(sizeof(*ent), GFP_KERNEL);
|
||||
if (!ent)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(&ent->xt.name, &rd.xt.name, sizeof(ent->xt.name));
|
||||
|
||||
rb_link_node(&ent->node, parent, node);
|
||||
rb_insert_color(&ent->node, root);
|
||||
}
|
||||
|
||||
if (fic & FIC_FS_ROOT) {
|
||||
ent->fs_seq = seq;
|
||||
ent->fs_total = le64_to_cpu(tval->total);
|
||||
ent->fs_count = le64_to_cpu(tval->count);
|
||||
} else if (fic & FIC_FINALIZED) {
|
||||
ent->fin_seq = seq;
|
||||
ent->fin_total += le64_to_cpu(tval->total);
|
||||
ent->fin_count += le64_to_cpu(tval->count);
|
||||
} else {
|
||||
ent->log_seq = seq;
|
||||
ent->log_total += le64_to_cpu(tval->total);
|
||||
ent->log_count += le64_to_cpu(tval->count);
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, totl_read_item);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* these are always _safe, node stores next */
|
||||
#define for_each_xt_ent(ent, node, root) \
|
||||
for (node = rb_first(root); \
|
||||
node && (ent = rb_entry(node, struct xattr_total_entry, node), \
|
||||
node = rb_next(node), 1); )
|
||||
|
||||
#define for_each_xt_ent_reverse(ent, node, root) \
|
||||
for (node = rb_last(root); \
|
||||
node && (ent = rb_entry(node, struct xattr_total_entry, node), \
|
||||
node = rb_prev(node), 1); )
|
||||
|
||||
static void free_xt_ent(struct rb_root *root, struct xattr_total_entry *ent)
|
||||
{
|
||||
rb_erase(&ent->node, root);
|
||||
kfree(ent);
|
||||
}
|
||||
|
||||
static void free_all_xt_ents(struct rb_root *root)
|
||||
{
|
||||
struct xattr_total_entry *ent;
|
||||
struct rb_node *node;
|
||||
|
||||
for_each_xt_ent(ent, node, root)
|
||||
free_xt_ent(root, ent);
|
||||
if (++cba->copied < cba->total)
|
||||
return -EAGAIN;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1160,30 +1073,6 @@ static void free_all_xt_ents(struct rb_root *root)
|
||||
* have been committed. It doesn't use locking to force commits and
|
||||
* block writers so it can be a little bit out of date with respect to
|
||||
* dirty xattrs in memory across the system.
|
||||
*
|
||||
* Our reader has to be careful because the log btree merging code can
|
||||
* write partial results to the fs_root. This means that a reader can
|
||||
* see both cases where new finalized logs should be applied to the old
|
||||
* fs items and where old finalized logs have already been applied to
|
||||
* the partially merged fs items. Currently active logged items are
|
||||
* always applied on top of all cases.
|
||||
*
|
||||
* These cases are differentiated with a combination of sequence numbers
|
||||
* in items, the count of contributing xattrs, and a flag
|
||||
* differentiating finalized and active logged items. This lets us
|
||||
* recognize all cases, including when finalized logs were merged and
|
||||
* deleted the fs item.
|
||||
*
|
||||
* We're allocating a tracking struct for each totl name we see while
|
||||
* traversing the item btrees. The forest reader is providing the items
|
||||
* it finds in leaf blocks that contain the search key. In the worst
|
||||
* case all of these blocks are full and none of the items overlap. At
|
||||
* most, figure order a thousand names per mount. But in practice many
|
||||
* of these factors fall away: leaf blocks aren't fill, leaf items
|
||||
* overlap, there aren't finalized log btrees, and not all mounts are
|
||||
* actively changing totals. We're much more likely to only read a
|
||||
* leaf block's worth of totals that have been long since merged into
|
||||
* the fs_root.
|
||||
*/
|
||||
static long scoutfs_ioc_read_xattr_totals(struct file *file, unsigned long arg)
|
||||
{
|
||||
@@ -1191,14 +1080,13 @@ static long scoutfs_ioc_read_xattr_totals(struct file *file, unsigned long arg)
|
||||
struct scoutfs_ioctl_read_xattr_totals __user *urxt = (void __user *)arg;
|
||||
struct scoutfs_ioctl_read_xattr_totals rxt;
|
||||
struct scoutfs_ioctl_xattr_total __user *uxt;
|
||||
struct xattr_total_entry *ent;
|
||||
struct read_xattr_total_iter_cb_args cba = {NULL, };
|
||||
struct scoutfs_key range_start;
|
||||
struct scoutfs_key range_end;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key bloom_key;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
struct rb_root root = RB_ROOT;
|
||||
struct rb_node *node;
|
||||
int count = 0;
|
||||
unsigned int copied = 0;
|
||||
unsigned int total;
|
||||
unsigned int ready;
|
||||
int ret;
|
||||
|
||||
if (!(file->f_mode & FMODE_READ)) {
|
||||
@@ -1211,6 +1099,13 @@ static long scoutfs_ioc_read_xattr_totals(struct file *file, unsigned long arg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
cba.xt = (void *)__get_free_page(GFP_KERNEL);
|
||||
if (!cba.xt) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
cba.total = PAGE_SIZE / sizeof(struct scoutfs_ioctl_xattr_total);
|
||||
|
||||
if (copy_from_user(&rxt, urxt, sizeof(rxt))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
@@ -1223,101 +1118,40 @@ static long scoutfs_ioc_read_xattr_totals(struct file *file, unsigned long arg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_key_set_zeros(&bloom_key);
|
||||
bloom_key.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
scoutfs_xattr_init_totl_key(&start, rxt.pos_name);
|
||||
total = div_u64(min_t(u64, rxt.totals_bytes, INT_MAX),
|
||||
sizeof(struct scoutfs_ioctl_xattr_total));
|
||||
|
||||
while (rxt.totals_bytes >= sizeof(struct scoutfs_ioctl_xattr_total)) {
|
||||
scoutfs_totl_set_range(&range_start, &range_end);
|
||||
scoutfs_xattr_init_totl_key(&key, rxt.pos_name);
|
||||
|
||||
scoutfs_key_set_ones(&end);
|
||||
end.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
if (scoutfs_key_compare(&start, &end) > 0)
|
||||
while (copied < total) {
|
||||
cba.copied = 0;
|
||||
ret = scoutfs_wkic_iterate(sb, &key, &range_end, &range_start, &range_end,
|
||||
read_xattr_total_iter_cb, &cba);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (cba.copied == 0)
|
||||
break;
|
||||
|
||||
key = start;
|
||||
ret = scoutfs_forest_read_items(sb, &key, &bloom_key, &start, &end,
|
||||
read_xattr_total_item, &root);
|
||||
if (ret < 0) {
|
||||
if (ret == -ESTALE) {
|
||||
free_all_xt_ents(&root);
|
||||
continue;
|
||||
}
|
||||
ready = min(total - copied, cba.copied);
|
||||
|
||||
if (copy_to_user(&uxt[copied], cba.xt, ready * sizeof(cba.xt[0]))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (RB_EMPTY_ROOT(&root))
|
||||
break;
|
||||
|
||||
/* trim totals that fall outside of the consistent range */
|
||||
for_each_xt_ent(ent, node, &root) {
|
||||
scoutfs_xattr_init_totl_key(&key, ent->xt.name);
|
||||
if (scoutfs_key_compare(&key, &start) < 0) {
|
||||
free_xt_ent(&root, ent);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for_each_xt_ent_reverse(ent, node, &root) {
|
||||
scoutfs_xattr_init_totl_key(&key, ent->xt.name);
|
||||
if (scoutfs_key_compare(&key, &end) > 0) {
|
||||
free_xt_ent(&root, ent);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* copy resulting unique non-zero totals to userspace */
|
||||
for_each_xt_ent(ent, node, &root) {
|
||||
if (rxt.totals_bytes < sizeof(ent->xt))
|
||||
break;
|
||||
|
||||
/* start with the fs item if we have it */
|
||||
if (ent->fs_seq != 0) {
|
||||
ent->xt.total = ent->fs_total;
|
||||
ent->xt.count = ent->fs_count;
|
||||
scoutfs_inc_counter(sb, totl_read_fs);
|
||||
}
|
||||
|
||||
/* apply finalized logs if they're newer or creating */
|
||||
if (((ent->fs_seq != 0) && (ent->fin_seq > ent->fs_seq)) ||
|
||||
((ent->fs_seq == 0) && (ent->fin_count > 0))) {
|
||||
ent->xt.total += ent->fin_total;
|
||||
ent->xt.count += ent->fin_count;
|
||||
scoutfs_inc_counter(sb, totl_read_finalized);
|
||||
}
|
||||
|
||||
/* always apply active logs which must be newer than fs and finalized */
|
||||
if (ent->log_seq > 0) {
|
||||
ent->xt.total += ent->log_total;
|
||||
ent->xt.count += ent->log_count;
|
||||
scoutfs_inc_counter(sb, totl_read_logged);
|
||||
}
|
||||
|
||||
if (ent->xt.total != 0 || ent->xt.count != 0) {
|
||||
if (copy_to_user(uxt, &ent->xt, sizeof(ent->xt))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
uxt++;
|
||||
rxt.totals_bytes -= sizeof(ent->xt);
|
||||
count++;
|
||||
scoutfs_inc_counter(sb, totl_read_copied);
|
||||
}
|
||||
|
||||
free_xt_ent(&root, ent);
|
||||
}
|
||||
|
||||
/* continue after the last possible key read */
|
||||
start = end;
|
||||
scoutfs_key_inc(&start);
|
||||
scoutfs_xattr_init_totl_key(&key, cba.xt[ready - 1].name);
|
||||
scoutfs_key_inc(&key);
|
||||
copied += ready;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
free_all_xt_ents(&root);
|
||||
if (cba.xt)
|
||||
free_page((long)cba.xt);
|
||||
|
||||
return ret ?: count;
|
||||
return ret ?: copied;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_get_allocated_inos(struct file *file, unsigned long arg)
|
||||
@@ -1502,6 +1336,254 @@ out:
|
||||
return nr ?: ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_get_project_id(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
u64 __user *uproj = (void __user *)arg;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
u64 proj;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_DAC_READ_SEARCH))
|
||||
return -EPERM;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
if (ret == 0) {
|
||||
proj = scoutfs_inode_get_proj(inode);
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
|
||||
}
|
||||
|
||||
if (ret == 0 && __put_user(proj, uproj))
|
||||
ret = -EFAULT;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_set_project_id(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
u64 __user *uproj = (void __user *)arg;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 proj;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (get_user(proj, uproj))
|
||||
return -EFAULT;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (scoutfs_inode_get_proj(inode) == proj) {
|
||||
ret = 0;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false, false);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = scoutfs_dirty_inode_item(inode, lock);
|
||||
if (ret < 0)
|
||||
goto out_release;
|
||||
|
||||
scoutfs_inode_set_proj(inode, proj);
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
|
||||
ret = 0;
|
||||
out_release:
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
out_unlock:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_get_quota_rules(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_get_quota_rules __user *ugqr = (void __user *)arg;
|
||||
struct scoutfs_ioctl_get_quota_rules gqr;
|
||||
struct scoutfs_ioctl_quota_rule __user *uirules;
|
||||
struct scoutfs_ioctl_quota_rule *irules;
|
||||
struct page *page = NULL;
|
||||
int copied = 0;
|
||||
int nr;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&gqr, ugqr, sizeof(gqr)))
|
||||
return -EFAULT;
|
||||
|
||||
if (gqr.rules_nr == 0)
|
||||
return 0;
|
||||
|
||||
uirules = (void __user *)gqr.rules_ptr;
|
||||
/* limit rules copied per call */
|
||||
gqr.rules_nr = min_t(u64, gqr.rules_nr, INT_MAX);
|
||||
|
||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
irules = page_address(page);
|
||||
|
||||
while (copied < gqr.rules_nr) {
|
||||
nr = min_t(u64, gqr.rules_nr - copied,
|
||||
PAGE_SIZE / sizeof(struct scoutfs_ioctl_quota_rule));
|
||||
ret = scoutfs_quota_get_rules(sb, gqr.iterator, page_address(page), nr);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
|
||||
if (copy_to_user(&uirules[copied], irules, ret * sizeof(irules[0]))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
copied += ret;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (page)
|
||||
__free_page(page);
|
||||
|
||||
if (ret == 0 && copy_to_user(ugqr->iterator, gqr.iterator, sizeof(gqr.iterator)))
|
||||
ret = -EFAULT;
|
||||
|
||||
return ret ?: copied;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_mod_quota_rule(struct file *file, unsigned long arg, bool is_add)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_quota_rule __user *uirule = (void __user *)arg;
|
||||
struct scoutfs_ioctl_quota_rule irule;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&irule, uirule, sizeof(irule)))
|
||||
return -EFAULT;
|
||||
|
||||
return scoutfs_quota_mod_rule(sb, is_add, &irule);
|
||||
}
|
||||
|
||||
struct read_index_buf {
|
||||
int nr;
|
||||
int size;
|
||||
struct scoutfs_ioctl_xattr_index_entry ents[0];
|
||||
};
|
||||
|
||||
#define READ_INDEX_BUF_MAX_ENTS \
|
||||
((PAGE_SIZE - sizeof(struct read_index_buf)) / \
|
||||
sizeof(struct scoutfs_ioctl_xattr_index_entry))
|
||||
|
||||
static int read_index_cb(struct scoutfs_key *key, void *val, unsigned int val_len, void *cb_arg)
|
||||
{
|
||||
struct read_index_buf *rib = cb_arg;
|
||||
struct scoutfs_ioctl_xattr_index_entry *ent = &rib->ents[rib->nr];
|
||||
|
||||
if (val_len != 0)
|
||||
return -EIO;
|
||||
|
||||
ent->a = le64_to_cpu(key->skxi_a);
|
||||
ent->b = le64_to_cpu(key->skxi_b);
|
||||
ent->ino = le64_to_cpu(key->skxi_ino);
|
||||
|
||||
if (++rib->nr == rib->size)
|
||||
return rib->nr;
|
||||
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_read_xattr_index(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_read_xattr_index __user *urxi = (void __user *)arg;
|
||||
struct scoutfs_ioctl_xattr_index_entry __user *uents;
|
||||
struct scoutfs_ioctl_xattr_index_entry *ent;
|
||||
struct scoutfs_ioctl_read_xattr_index rxi;
|
||||
struct read_index_buf *rib;
|
||||
struct page *page = NULL;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
int copied = 0;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(&rxi, urxi, sizeof(rxi))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
uents = (void __user *)rxi.entries_ptr;
|
||||
rxi.entries_nr = min_t(u64, rxi.entries_nr, INT_MAX);
|
||||
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
rib = page_address(page);
|
||||
|
||||
scoutfs_xattr_init_indx_key(&first, rxi.first.a, rxi.first.b, rxi.first.ino);
|
||||
scoutfs_xattr_init_indx_key(&last, rxi.last.a, rxi.last.b, rxi.last.ino);
|
||||
scoutfs_xattr_indx_get_range(&start, &end);
|
||||
|
||||
if (scoutfs_key_compare(&first, &last) > 0) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (copied < rxi.entries_nr) {
|
||||
rib->nr = 0;
|
||||
rib->size = min_t(u64, rxi.entries_nr - copied, READ_INDEX_BUF_MAX_ENTS);
|
||||
ret = scoutfs_wkic_iterate(sb, &first, &last, &start, &end,
|
||||
read_index_cb, rib);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (rib->nr == 0)
|
||||
break;
|
||||
|
||||
if (copy_to_user(&uents[copied], rib->ents, rib->nr * sizeof(rib->ents[0]))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
copied += rib->nr;
|
||||
|
||||
ent = &rib->ents[rib->nr - 1];
|
||||
scoutfs_xattr_init_indx_key(&first, ent->a, ent->b, ent->ino);
|
||||
scoutfs_key_inc(&first);
|
||||
}
|
||||
|
||||
ret = copied;
|
||||
|
||||
out:
|
||||
if (page)
|
||||
__free_page(page);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
@@ -1539,6 +1621,18 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
return scoutfs_ioc_get_allocated_inos(file, arg);
|
||||
case SCOUTFS_IOC_GET_REFERRING_ENTRIES:
|
||||
return scoutfs_ioc_get_referring_entries(file, arg);
|
||||
case SCOUTFS_IOC_GET_PROJECT_ID:
|
||||
return scoutfs_ioc_get_project_id(file, arg);
|
||||
case SCOUTFS_IOC_SET_PROJECT_ID:
|
||||
return scoutfs_ioc_set_project_id(file, arg);
|
||||
case SCOUTFS_IOC_GET_QUOTA_RULES:
|
||||
return scoutfs_ioc_get_quota_rules(file, arg);
|
||||
case SCOUTFS_IOC_ADD_QUOTA_RULE:
|
||||
return scoutfs_ioc_mod_quota_rule(file, arg, true);
|
||||
case SCOUTFS_IOC_DEL_QUOTA_RULE:
|
||||
return scoutfs_ioc_mod_quota_rule(file, arg, false);
|
||||
case SCOUTFS_IOC_READ_XATTR_INDEX:
|
||||
return scoutfs_ioc_read_xattr_index(file, arg);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
||||
@@ -673,4 +673,100 @@ struct scoutfs_ioctl_dirent {
|
||||
#define SCOUTFS_IOC_GET_REFERRING_ENTRIES \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 17, struct scoutfs_ioctl_get_referring_entries)
|
||||
|
||||
#define SCOUTFS_IOC_GET_PROJECT_ID \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 18, __u64)
|
||||
#define SCOUTFS_IOC_SET_PROJECT_ID \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 19, __u64)
|
||||
|
||||
/*
|
||||
* (These fields are documented in the order that they're displayed by
|
||||
* the scoutfs cli utility which matches the sort order of the rules.)
|
||||
*
|
||||
* @prio: The priority of the rule. Rules are sorted by their fields
|
||||
* with prio at the highest magnitude. When multiple rules match the
|
||||
* rule with the highest sort order is enforced. The priority field
|
||||
* lets rules override the default field sort order.
|
||||
*
|
||||
* @name_val[3]: The three 64bit values that make up the name of the
|
||||
* totl xattr whose total will be checked against the rule's limit to
|
||||
* see if the quota rule has been exceeded. The behavior of the values
|
||||
* can be changed by their corresponding name_source and name_flags.
|
||||
*
|
||||
* @name_source[3]: The SQ_NS_ enums that control where the value comes
|
||||
* from. _LITERAL uses the value from name_val. Inode attribute
|
||||
* sources (_PROJ, _UID, _GID) are taken from the inode of the operation
|
||||
* that is being checked against the rule.
|
||||
*
|
||||
* @name_flags[3]: The SQ_NF_ enums that alter the name values. _SELECT
|
||||
* makes the rule only match if the inode attribute of the operation
|
||||
* matches the attribute value stored in name_val. This lets rules
|
||||
* match a specific value of an attribute rather than mapping all
|
||||
* attribute values of to totl names.
|
||||
*
|
||||
* @op: The SQ_OP_ enums which specify the operation that can't exceed
|
||||
* the rule's limit. _INODE checks inode creation and the inode
|
||||
* attributes are taken from the inode that would be created. _DATA
|
||||
* checks file data block allocation and the inode fields come from the
|
||||
* inode that is allocating the blocks.
|
||||
*
|
||||
* @limit: The 64bit value that is checked against the totl value
|
||||
* described by the rule. If the totl value is greater than or equal to
|
||||
* this value of the matching rule then the operation will return
|
||||
* -EDQUOT.
|
||||
*
|
||||
* @rule_flags: SQ_RF_TOTL_COUNT indicates that the rule's limit should
|
||||
* be checked against the number of xattrs contributing to a totl value
|
||||
* instead of the sum of the xattrs.
|
||||
*/
|
||||
struct scoutfs_ioctl_quota_rule {
|
||||
__u64 name_val[3];
|
||||
__u64 limit;
|
||||
__u8 prio;
|
||||
__u8 op;
|
||||
__u8 rule_flags;
|
||||
__u8 name_source[3];
|
||||
__u8 name_flags[3];
|
||||
__u8 _pad[7];
|
||||
};
|
||||
|
||||
struct scoutfs_ioctl_get_quota_rules {
|
||||
__u64 iterator[2];
|
||||
__u64 rules_ptr;
|
||||
__u64 rules_nr;
|
||||
};
|
||||
|
||||
/*
|
||||
* Rules are uniquely identified by their non-padded fields. Addition will fail
|
||||
* with -EEXIST if the specified rule already exists and deletion must find a rule
|
||||
* with all matching fields to delete.
|
||||
*/
|
||||
#define SCOUTFS_IOC_GET_QUOTA_RULES \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 20, struct scoutfs_ioctl_get_quota_rules)
|
||||
#define SCOUTFS_IOC_ADD_QUOTA_RULE \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 21, struct scoutfs_ioctl_quota_rule)
|
||||
#define SCOUTFS_IOC_DEL_QUOTA_RULE \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 22, struct scoutfs_ioctl_quota_rule)
|
||||
|
||||
/*
|
||||
* Inodes can be indexed in a global key space at a position determined
|
||||
* by a single scoutfs.hide.indx xattr per inode. The xattr sets the
|
||||
* two index position values, with a being higher significance.
|
||||
*/
|
||||
struct scoutfs_ioctl_xattr_index_entry {
|
||||
__u64 a;
|
||||
__u64 b;
|
||||
__u64 ino;
|
||||
};
|
||||
|
||||
struct scoutfs_ioctl_read_xattr_index {
|
||||
__u64 flags;
|
||||
struct scoutfs_ioctl_xattr_index_entry first;
|
||||
struct scoutfs_ioctl_xattr_index_entry last;
|
||||
__u64 entries_ptr;
|
||||
__u64 entries_nr;
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_READ_XATTR_INDEX \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 23, struct scoutfs_ioctl_read_xattr_index)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -24,9 +24,11 @@
|
||||
#include "item.h"
|
||||
#include "forest.h"
|
||||
#include "block.h"
|
||||
#include "msg.h"
|
||||
#include "trans.h"
|
||||
#include "counters.h"
|
||||
#include "scoutfs_trace.h"
|
||||
#include "util.h"
|
||||
|
||||
/*
|
||||
* The item cache maintains a consistent view of items that are read
|
||||
@@ -76,8 +78,10 @@ struct item_cache_info {
|
||||
/* almost always read, barely written */
|
||||
struct super_block *sb;
|
||||
struct item_percpu_pages __percpu *pcpu_pages;
|
||||
struct shrinker shrinker;
|
||||
KC_DEFINE_SHRINKER(shrinker);
|
||||
#ifdef KC_CPU_NOTIFIER
|
||||
struct notifier_block notifier;
|
||||
#endif
|
||||
|
||||
/* often walked, but per-cpu refs are fast path */
|
||||
rwlock_t rwlock;
|
||||
@@ -1667,13 +1671,24 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lock_safe(struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
static int lock_safe(struct super_block *sb, struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
int mode)
|
||||
{
|
||||
if (WARN_ON_ONCE(!scoutfs_lock_protected(lock, key, mode)))
|
||||
bool prot = scoutfs_lock_protected(lock, key, mode);
|
||||
|
||||
if (!prot) {
|
||||
static bool once = false;
|
||||
if (!once) {
|
||||
scoutfs_err(sb, "lock (start "SK_FMT" end "SK_FMT" mode 0x%x) does not protect operation (key "SK_FMT" mode 0x%x)",
|
||||
SK_ARG(&lock->start), SK_ARG(&lock->end), lock->mode,
|
||||
SK_ARG(key), mode);
|
||||
dump_stack();
|
||||
once = true;
|
||||
}
|
||||
return -EINVAL;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int optional_lock_mode_match(struct scoutfs_lock *lock, int mode)
|
||||
@@ -1715,7 +1730,7 @@ int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_lookup);
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_READ)))
|
||||
if ((ret = lock_safe(sb, lock, key, SCOUTFS_LOCK_READ)))
|
||||
goto out;
|
||||
|
||||
ret = get_cached_page(sb, cinf, lock, key, false, false, 0, &pg);
|
||||
@@ -1790,7 +1805,7 @@ int scoutfs_item_next(struct super_block *sb, struct scoutfs_key *key,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_READ)))
|
||||
if ((ret = lock_safe(sb, lock, key, SCOUTFS_LOCK_READ)))
|
||||
goto out;
|
||||
|
||||
pos = *key;
|
||||
@@ -1871,7 +1886,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_dirty);
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_WRITE)))
|
||||
if ((ret = lock_safe(sb, lock, key, SCOUTFS_LOCK_WRITE)))
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_forest_set_bloom_bits(sb, lock);
|
||||
@@ -1917,7 +1932,7 @@ static int item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_create);
|
||||
|
||||
if ((ret = lock_safe(lock, key, mode)) ||
|
||||
if ((ret = lock_safe(sb, lock, key, mode)) ||
|
||||
(ret = optional_lock_mode_match(primary, SCOUTFS_LOCK_WRITE)))
|
||||
goto out;
|
||||
|
||||
@@ -1960,7 +1975,7 @@ int scoutfs_item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
return item_create(sb, key, val, val_len, lock, NULL,
|
||||
SCOUTFS_LOCK_READ, false);
|
||||
SCOUTFS_LOCK_WRITE, false);
|
||||
}
|
||||
|
||||
int scoutfs_item_create_force(struct super_block *sb, struct scoutfs_key *key,
|
||||
@@ -1991,7 +2006,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_update);
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_WRITE)))
|
||||
if ((ret = lock_safe(sb, lock, key, SCOUTFS_LOCK_WRITE)))
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_forest_set_bloom_bits(sb, lock);
|
||||
@@ -2059,7 +2074,7 @@ int scoutfs_item_delta(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_delta);
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_WRITE_ONLY)))
|
||||
if ((ret = lock_safe(sb, lock, key, SCOUTFS_LOCK_WRITE_ONLY)))
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_forest_set_bloom_bits(sb, lock);
|
||||
@@ -2132,7 +2147,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_delete);
|
||||
|
||||
if ((ret = lock_safe(lock, key, mode)) ||
|
||||
if ((ret = lock_safe(sb, lock, key, mode)) ||
|
||||
(ret = optional_lock_mode_match(primary, SCOUTFS_LOCK_WRITE)))
|
||||
goto out;
|
||||
|
||||
@@ -2277,7 +2292,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
list_add(&page->list, &pages);
|
||||
list_add(&page->lru, &pages);
|
||||
|
||||
first = NULL;
|
||||
prev = &first;
|
||||
@@ -2290,7 +2305,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
list_add(&second->list, &pages);
|
||||
list_add(&second->lru, &pages);
|
||||
}
|
||||
|
||||
/* read lock next sorted page, we're only dirty_list user */
|
||||
@@ -2347,8 +2362,8 @@ int scoutfs_item_write_dirty(struct super_block *sb)
|
||||
/* write all the dirty items into log btree blocks */
|
||||
ret = scoutfs_forest_insert_list(sb, first);
|
||||
out:
|
||||
list_for_each_entry_safe(page, second, &pages, list) {
|
||||
list_del_init(&page->list);
|
||||
list_for_each_entry_safe(page, second, &pages, lru) {
|
||||
list_del_init(&page->lru);
|
||||
__free_page(page);
|
||||
}
|
||||
|
||||
@@ -2530,27 +2545,35 @@ retry:
|
||||
put_pg(sb, right);
|
||||
}
|
||||
|
||||
static unsigned long item_cache_count_objects(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct item_cache_info *cinf = KC_SHRINKER_CONTAINER_OF(shrink, struct item_cache_info);
|
||||
struct super_block *sb = cinf->sb;
|
||||
|
||||
scoutfs_inc_counter(sb, item_cache_count_objects);
|
||||
|
||||
return shrinker_min_long(cinf->lru_pages);
|
||||
}
|
||||
|
||||
/*
|
||||
* Shrink the size the item cache. We're operating against the fast
|
||||
* path lock ordering and we skip pages if we can't acquire locks. We
|
||||
* can run into dirty pages or pages with items that weren't visible to
|
||||
* the earliest active reader which must be skipped.
|
||||
*/
|
||||
static int item_lru_shrink(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
static unsigned long item_cache_scan_objects(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct item_cache_info *cinf = container_of(shrink,
|
||||
struct item_cache_info,
|
||||
shrinker);
|
||||
struct item_cache_info *cinf = KC_SHRINKER_CONTAINER_OF(shrink, struct item_cache_info);
|
||||
struct super_block *sb = cinf->sb;
|
||||
struct cached_page *tmp;
|
||||
struct cached_page *pg;
|
||||
unsigned long freed = 0;
|
||||
u64 first_reader_seq;
|
||||
int nr;
|
||||
int nr = sc->nr_to_scan;
|
||||
|
||||
if (sc->nr_to_scan == 0)
|
||||
goto out;
|
||||
nr = sc->nr_to_scan;
|
||||
scoutfs_inc_counter(sb, item_cache_scan_objects);
|
||||
|
||||
/* can't invalidate pages with items that weren't visible to first reader */
|
||||
first_reader_seq = first_active_reader_seq(cinf);
|
||||
@@ -2582,6 +2605,7 @@ static int item_lru_shrink(struct shrinker *shrink,
|
||||
rbtree_erase(&pg->node, &cinf->pg_root);
|
||||
invalidate_pcpu_page(pg);
|
||||
write_unlock(&pg->rwlock);
|
||||
freed++;
|
||||
|
||||
put_pg(sb, pg);
|
||||
|
||||
@@ -2591,10 +2615,11 @@ static int item_lru_shrink(struct shrinker *shrink,
|
||||
|
||||
write_unlock(&cinf->rwlock);
|
||||
spin_unlock(&cinf->lru_lock);
|
||||
out:
|
||||
return min_t(unsigned long, cinf->lru_pages, INT_MAX);
|
||||
|
||||
return freed;
|
||||
}
|
||||
|
||||
#ifdef KC_CPU_NOTIFIER
|
||||
static int item_cpu_callback(struct notifier_block *nfb,
|
||||
unsigned long action, void *hcpu)
|
||||
{
|
||||
@@ -2609,6 +2634,7 @@ static int item_cpu_callback(struct notifier_block *nfb,
|
||||
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
#endif
|
||||
|
||||
int scoutfs_item_setup(struct super_block *sb)
|
||||
{
|
||||
@@ -2638,11 +2664,13 @@ int scoutfs_item_setup(struct super_block *sb)
|
||||
for_each_possible_cpu(cpu)
|
||||
init_pcpu_pages(cinf, cpu);
|
||||
|
||||
cinf->shrinker.shrink = item_lru_shrink;
|
||||
cinf->shrinker.seeks = DEFAULT_SEEKS;
|
||||
register_shrinker(&cinf->shrinker);
|
||||
KC_INIT_SHRINKER_FUNCS(&cinf->shrinker, item_cache_count_objects,
|
||||
item_cache_scan_objects);
|
||||
KC_REGISTER_SHRINKER(&cinf->shrinker);
|
||||
#ifdef KC_CPU_NOTIFIER
|
||||
cinf->notifier.notifier_call = item_cpu_callback;
|
||||
register_hotcpu_notifier(&cinf->notifier);
|
||||
#endif
|
||||
|
||||
sbi->item_cache_info = cinf;
|
||||
return 0;
|
||||
@@ -2662,8 +2690,10 @@ void scoutfs_item_destroy(struct super_block *sb)
|
||||
if (cinf) {
|
||||
BUG_ON(!list_empty(&cinf->active_list));
|
||||
|
||||
#ifdef KC_CPU_NOTIFIER
|
||||
unregister_hotcpu_notifier(&cinf->notifier);
|
||||
unregister_shrinker(&cinf->shrinker);
|
||||
#endif
|
||||
KC_UNREGISTER_SHRINKER(&cinf->shrinker);
|
||||
|
||||
for_each_possible_cpu(cpu)
|
||||
drop_pcpu_pages(sb, cinf, cpu);
|
||||
|
||||
84
kmod/src/kernelcompat.c
Normal file
84
kmod/src/kernelcompat.c
Normal file
@@ -0,0 +1,84 @@
|
||||
|
||||
#include <linux/uio.h>
|
||||
|
||||
#include "kernelcompat.h"
|
||||
|
||||
#ifdef KC_SHRINKER_SHRINK
|
||||
#include <linux/shrinker.h>
|
||||
/*
|
||||
* If a target doesn't have that .{count,scan}_objects() interface then
|
||||
* we have a .shrink() helper that performs the shrink work in terms of
|
||||
* count/scan.
|
||||
*/
|
||||
int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
struct kc_shrinker_wrapper *wrapper = container_of(shrink, struct kc_shrinker_wrapper, shrink);
|
||||
unsigned long nr;
|
||||
unsigned long rc;
|
||||
|
||||
if (sc->nr_to_scan != 0) {
|
||||
rc = wrapper->scan_objects(shrink, sc);
|
||||
/* translate magic values to the equivalent for older kernels */
|
||||
if (rc == SHRINK_STOP)
|
||||
return -1;
|
||||
else if (rc == SHRINK_EMPTY)
|
||||
return 0;
|
||||
}
|
||||
|
||||
nr = wrapper->count_objects(shrink, sc);
|
||||
|
||||
return min_t(unsigned long, nr, INT_MAX);
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KC_CURRENT_TIME_INODE
|
||||
struct timespec64 kc_current_time(struct inode *inode)
|
||||
{
|
||||
struct timespec64 now;
|
||||
unsigned gran;
|
||||
|
||||
getnstimeofday64(&now);
|
||||
|
||||
if (unlikely(!inode->i_sb)) {
|
||||
WARN(1, "current_time() called with uninitialized super_block in the inode");
|
||||
return now;
|
||||
}
|
||||
|
||||
gran = inode->i_sb->s_time_gran;
|
||||
|
||||
/* Avoid division in the common cases 1 ns and 1 s. */
|
||||
if (gran == 1) {
|
||||
/* nothing */
|
||||
} else if (gran == NSEC_PER_SEC) {
|
||||
now.tv_nsec = 0;
|
||||
} else if (gran > 1 && gran < NSEC_PER_SEC) {
|
||||
now.tv_nsec -= now.tv_nsec % gran;
|
||||
} else {
|
||||
WARN(1, "illegal file time granularity: %u", gran);
|
||||
}
|
||||
|
||||
return now;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
|
||||
ssize_t
|
||||
kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos, loff_t *ppos,
|
||||
size_t count, ssize_t written)
|
||||
{
|
||||
struct file *file = iocb->ki_filp;
|
||||
ssize_t status;
|
||||
struct iov_iter i;
|
||||
|
||||
iov_iter_init(&i, WRITE, iov, nr_segs, count);
|
||||
status = generic_perform_write(file, &i, pos);
|
||||
|
||||
if (likely(status >= 0)) {
|
||||
written += status;
|
||||
*ppos = pos + status;
|
||||
}
|
||||
|
||||
return written ? written : status;
|
||||
}
|
||||
#endif
|
||||
@@ -1,8 +1,35 @@
|
||||
#ifndef _SCOUTFS_KERNELCOMPAT_H_
|
||||
#define _SCOUTFS_KERNELCOMPAT_H_
|
||||
|
||||
#ifndef KC_ITERATE_DIR_CONTEXT
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
/*
|
||||
* v4.15-rc3-4-gae5e165d855d
|
||||
*
|
||||
* new API for handling inode->i_version. This forces us to
|
||||
* include this API where we need. We include it here for
|
||||
* convenience instead of where it's needed.
|
||||
*/
|
||||
#ifdef KC_NEED_LINUX_IVERSION_H
|
||||
#include <linux/iversion.h>
|
||||
#else
|
||||
/*
|
||||
* Kernels before above version will need to fall back to
|
||||
* manipulating inode->i_version as previous with degraded
|
||||
* methods.
|
||||
*/
|
||||
#define inode_set_iversion_queried(inode, val) \
|
||||
do { \
|
||||
(inode)->i_version = val; \
|
||||
} while (0)
|
||||
#define inode_peek_iversion(inode) \
|
||||
({ \
|
||||
(inode)->i_version; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#ifndef KC_ITERATE_DIR_CONTEXT
|
||||
typedef filldir_t kc_readdir_ctx_t;
|
||||
#define KC_DECLARE_READDIR(name, file, dirent, ctx) name(file, dirent, ctx)
|
||||
#define KC_FOP_READDIR readdir
|
||||
@@ -52,4 +79,198 @@ static inline int dir_emit_dots(struct file *file, void *dirent,
|
||||
#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(acl)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* v3.6-rc1-24-gdbf2576e37da
|
||||
*
|
||||
* All workqueues are now non-reentrant, and the bit flag is removed
|
||||
* shortly after its uses were removed.
|
||||
*/
|
||||
#ifndef WQ_NON_REENTRANT
|
||||
#define WQ_NON_REENTRANT 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* v3.18-rc2-19-gb5ae6b15bd73
|
||||
*
|
||||
* Folds d_materialise_unique into d_splice_alias. Note reversal
|
||||
* of arguments (Also note Documentation/filesystems/porting.rst)
|
||||
*/
|
||||
#ifndef KC_D_MATERIALISE_UNIQUE
|
||||
#define d_materialise_unique(dentry, inode) d_splice_alias(inode, dentry)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* v4.8-rc1-29-g31051c85b5e2
|
||||
*
|
||||
* fall back to inode_change_ok() if setattr_prepare() isn't available
|
||||
*/
|
||||
#ifndef KC_SETATTR_PREPARE
|
||||
#define setattr_prepare(dentry, attr) inode_change_ok(d_inode(dentry), attr)
|
||||
#endif
|
||||
|
||||
#ifndef KC___POSIX_ACL_CREATE
|
||||
#define __posix_acl_create posix_acl_create
|
||||
#define __posix_acl_chmod posix_acl_chmod
|
||||
#endif
|
||||
|
||||
#ifndef KC_PERCPU_COUNTER_ADD_BATCH
|
||||
#define percpu_counter_add_batch __percpu_counter_add
|
||||
#endif
|
||||
|
||||
#ifndef KC_MEMALLOC_NOFS_SAVE
|
||||
#define memalloc_nofs_save memalloc_noio_save
|
||||
#define memalloc_nofs_restore memalloc_noio_restore
|
||||
#endif
|
||||
|
||||
#ifdef KC_BIO_BI_OPF
|
||||
#define kc_bio_get_opf(bio) \
|
||||
({ \
|
||||
(bio)->bi_opf; \
|
||||
})
|
||||
#define kc_bio_set_opf(bio, opf) \
|
||||
do { \
|
||||
(bio)->bi_opf = opf; \
|
||||
} while (0)
|
||||
#define kc_bio_set_sector(bio, sect) \
|
||||
do { \
|
||||
(bio)->bi_iter.bi_sector = sect;\
|
||||
} while (0)
|
||||
#define kc_submit_bio(bio) submit_bio(bio)
|
||||
#else
|
||||
#define kc_bio_get_opf(bio) \
|
||||
({ \
|
||||
(bio)->bi_rw; \
|
||||
})
|
||||
#define kc_bio_set_opf(bio, opf) \
|
||||
do { \
|
||||
(bio)->bi_rw = opf; \
|
||||
} while (0)
|
||||
#define kc_bio_set_sector(bio, sect) \
|
||||
do { \
|
||||
(bio)->bi_sector = sect; \
|
||||
} while (0)
|
||||
#define kc_submit_bio(bio) \
|
||||
do { \
|
||||
submit_bio((bio)->bi_rw, bio); \
|
||||
} while (0)
|
||||
#define bio_set_dev(bio, bdev) \
|
||||
do { \
|
||||
(bio)->bi_bdev = (bdev); \
|
||||
} while (0)
|
||||
#endif
|
||||
|
||||
#ifdef KC_BIO_BI_STATUS
|
||||
#define KC_DECLARE_BIO_END_IO(name, bio) name(bio)
|
||||
#define kc_bio_get_errno(bio) ({ blk_status_to_errno((bio)->bi_status); })
|
||||
#else
|
||||
#define KC_DECLARE_BIO_END_IO(name, bio) name(bio, int _error_arg)
|
||||
#define kc_bio_get_errno(bio) ({ (int)((void)(bio), _error_arg); })
|
||||
#endif
|
||||
|
||||
/*
|
||||
* v4.13-rc1-6-ge462ec50cb5f
|
||||
*
|
||||
* MS_* (mount) flags from <linux/mount.h> should not be used in the kernel
|
||||
* anymore from 4.x onwards. Instead, we need to use the SB_* (superblock) flags
|
||||
*/
|
||||
#ifndef SB_POSIXACL
|
||||
#define SB_POSIXACL MS_POSIXACL
|
||||
#define SB_I_VERSION MS_I_VERSION
|
||||
#endif
|
||||
|
||||
#ifndef KC_CURRENT_TIME_INODE
|
||||
struct timespec64 kc_current_time(struct inode *inode);
|
||||
#define current_time kc_current_time
|
||||
#define kc_timespec timespec
|
||||
#else
|
||||
#define kc_timespec timespec64
|
||||
#endif
|
||||
|
||||
#ifndef KC_SHRINKER_SHRINK
|
||||
|
||||
#define KC_DEFINE_SHRINKER(name) struct shrinker name
|
||||
#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do { \
|
||||
__typeof__(name) _shrink = (name); \
|
||||
_shrink->count_objects = (countfn); \
|
||||
_shrink->scan_objects = (scanfn); \
|
||||
_shrink->seeks = DEFAULT_SEEKS; \
|
||||
} while (0)
|
||||
|
||||
#define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(ptr, type, shrinker)
|
||||
#define KC_REGISTER_SHRINKER(ptr) (register_shrinker(ptr))
|
||||
#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr))
|
||||
#define KC_SHRINKER_FN(ptr) (ptr)
|
||||
#else
|
||||
|
||||
#include <linux/shrinker.h>
|
||||
#ifndef SHRINK_STOP
|
||||
#define SHRINK_STOP (~0UL)
|
||||
#define SHRINK_EMPTY (~0UL - 1)
|
||||
#endif
|
||||
|
||||
int kc_shrink_wrapper_fn(struct shrinker *shrink, struct shrink_control *sc);
|
||||
struct kc_shrinker_wrapper {
|
||||
unsigned long (*count_objects)(struct shrinker *, struct shrink_control *sc);
|
||||
unsigned long (*scan_objects)(struct shrinker *, struct shrink_control *sc);
|
||||
struct shrinker shrink;
|
||||
};
|
||||
|
||||
#define KC_DEFINE_SHRINKER(name) struct kc_shrinker_wrapper name;
|
||||
#define KC_INIT_SHRINKER_FUNCS(name, countfn, scanfn) do { \
|
||||
struct kc_shrinker_wrapper *_wrap = (name); \
|
||||
_wrap->count_objects = (countfn); \
|
||||
_wrap->scan_objects = (scanfn); \
|
||||
_wrap->shrink.shrink = kc_shrink_wrapper_fn; \
|
||||
_wrap->shrink.seeks = DEFAULT_SEEKS; \
|
||||
} while (0)
|
||||
#define KC_SHRINKER_CONTAINER_OF(ptr, type) container_of(container_of(ptr, struct kc_shrinker_wrapper, shrink), type, shrinker)
|
||||
#define KC_REGISTER_SHRINKER(ptr) (register_shrinker(ptr.shrink))
|
||||
#define KC_UNREGISTER_SHRINKER(ptr) (unregister_shrinker(ptr.shrink))
|
||||
#define KC_SHRINKER_FN(ptr) (ptr.shrink)
|
||||
|
||||
#endif /* KC_SHRINKER_SHRINK */
|
||||
|
||||
#ifdef KC_KERNEL_GETSOCKNAME_ADDRLEN
|
||||
#include <linux/net.h>
|
||||
#include <linux/inet.h>
|
||||
static inline int kc_kernel_getsockname(struct socket *sock, struct sockaddr *addr)
|
||||
{
|
||||
int addrlen = sizeof(struct sockaddr_in);
|
||||
int ret = kernel_getsockname(sock, addr, &addrlen);
|
||||
if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
|
||||
return -EAFNOSUPPORT;
|
||||
else if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return sizeof(struct sockaddr_in);
|
||||
}
|
||||
static inline int kc_kernel_getpeername(struct socket *sock, struct sockaddr *addr)
|
||||
{
|
||||
int addrlen = sizeof(struct sockaddr_in);
|
||||
int ret = kernel_getpeername(sock, addr, &addrlen);
|
||||
if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
|
||||
return -EAFNOSUPPORT;
|
||||
else if (ret < 0)
|
||||
return ret;
|
||||
|
||||
return sizeof(struct sockaddr_in);
|
||||
}
|
||||
#else
|
||||
#define kc_kernel_getsockname(sock, addr) kernel_getsockname(sock, addr)
|
||||
#define kc_kernel_getpeername(sock, addr) kernel_getpeername(sock, addr)
|
||||
#endif
|
||||
|
||||
#ifdef KC_SOCK_CREATE_KERN_NET
|
||||
#define kc_sock_create_kern(family, type, proto, res) sock_create_kern(&init_net, family, type, proto, res)
|
||||
#else
|
||||
#define kc_sock_create_kern sock_create_kern
|
||||
#endif
|
||||
|
||||
#ifndef KC_GENERIC_FILE_BUFFERED_WRITE
|
||||
ssize_t kc_generic_file_buffered_write(struct kiocb *iocb, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos, loff_t *ppos,
|
||||
size_t count, ssize_t written);
|
||||
#define generic_file_buffered_write kc_generic_file_buffered_write
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -12,7 +12,6 @@
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/preempt_mask.h> /* a rhel shed.h needed preempt_offset? */
|
||||
#include <linux/sched.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/mm.h>
|
||||
@@ -36,6 +35,9 @@
|
||||
#include "xattr.h"
|
||||
#include "item.h"
|
||||
#include "omap.h"
|
||||
#include "util.h"
|
||||
#include "totl.h"
|
||||
#include "quota.h"
|
||||
|
||||
/*
|
||||
* scoutfs uses a lock service to manage item cache consistency between
|
||||
@@ -77,7 +79,7 @@ struct lock_info {
|
||||
bool unmounting;
|
||||
struct rb_root lock_tree;
|
||||
struct rb_root lock_range_tree;
|
||||
struct shrinker shrinker;
|
||||
KC_DEFINE_SHRINKER(shrinker);
|
||||
struct list_head lru_list;
|
||||
unsigned long long lru_nr;
|
||||
struct workqueue_struct *workq;
|
||||
@@ -185,6 +187,9 @@ static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (lock->start.sk_zone == SCOUTFS_QUOTA_ZONE && !lock_mode_can_read(mode))
|
||||
scoutfs_quota_invalidate(sb);
|
||||
|
||||
/* have to invalidate if we're not in the only usable case */
|
||||
if (!(prev == SCOUTFS_LOCK_WRITE && mode == SCOUTFS_LOCK_READ)) {
|
||||
retry:
|
||||
@@ -1244,10 +1249,29 @@ int scoutfs_lock_xattr_totl(struct super_block *sb, enum scoutfs_lock_mode mode,
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
|
||||
scoutfs_key_set_zeros(&start);
|
||||
start.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
scoutfs_key_set_ones(&end);
|
||||
end.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
scoutfs_totl_set_range(&start, &end);
|
||||
|
||||
return lock_key_range(sb, mode, flags, &start, &end, lock);
|
||||
}
|
||||
|
||||
int scoutfs_lock_xattr_indx(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
|
||||
scoutfs_xattr_indx_get_range(&start, &end);
|
||||
|
||||
return lock_key_range(sb, mode, flags, &start, &end, lock);
|
||||
}
|
||||
|
||||
int scoutfs_lock_quota(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
|
||||
scoutfs_quota_get_lock_range(&start, &end);
|
||||
|
||||
return lock_key_range(sb, mode, flags, &start, &end, lock);
|
||||
}
|
||||
@@ -1346,7 +1370,7 @@ void scoutfs_lock_del_coverage(struct super_block *sb,
|
||||
bool scoutfs_lock_protected(struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
enum scoutfs_lock_mode mode)
|
||||
{
|
||||
signed char lock_mode = ACCESS_ONCE(lock->mode);
|
||||
signed char lock_mode = READ_ONCE(lock->mode);
|
||||
|
||||
return lock_modes_match(lock_mode, mode) &&
|
||||
scoutfs_key_compare_ranges(key, key,
|
||||
@@ -1401,6 +1425,17 @@ static void lock_shrink_worker(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned long lock_count_objects(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct lock_info *linfo = KC_SHRINKER_CONTAINER_OF(shrink, struct lock_info);
|
||||
struct super_block *sb = linfo->sb;
|
||||
|
||||
scoutfs_inc_counter(sb, lock_count_objects);
|
||||
|
||||
return shrinker_min_long(linfo->lru_nr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Start the shrinking process for locks on the lru. If a lock is on
|
||||
* the lru then it can't have any active users. We don't want to block
|
||||
@@ -1413,21 +1448,18 @@ static void lock_shrink_worker(struct work_struct *work)
|
||||
* mode which will prevent the lock from being freed when the null
|
||||
* response arrives.
|
||||
*/
|
||||
static int scoutfs_lock_shrink(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
static unsigned long lock_scan_objects(struct shrinker *shrink,
|
||||
struct shrink_control *sc)
|
||||
{
|
||||
struct lock_info *linfo = container_of(shrink, struct lock_info,
|
||||
shrinker);
|
||||
struct lock_info *linfo = KC_SHRINKER_CONTAINER_OF(shrink, struct lock_info);
|
||||
struct super_block *sb = linfo->sb;
|
||||
struct scoutfs_lock *lock;
|
||||
struct scoutfs_lock *tmp;
|
||||
unsigned long nr;
|
||||
unsigned long freed = 0;
|
||||
unsigned long nr = sc->nr_to_scan;
|
||||
bool added = false;
|
||||
int ret;
|
||||
|
||||
nr = sc->nr_to_scan;
|
||||
if (nr == 0)
|
||||
goto out;
|
||||
scoutfs_inc_counter(sb, lock_scan_objects);
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
|
||||
@@ -1445,6 +1477,7 @@ restart:
|
||||
lock->request_pending = 1;
|
||||
list_add_tail(&lock->shrink_head, &linfo->shrink_list);
|
||||
added = true;
|
||||
freed++;
|
||||
|
||||
scoutfs_inc_counter(sb, lock_shrink_attempted);
|
||||
trace_scoutfs_lock_shrink(sb, lock);
|
||||
@@ -1459,10 +1492,8 @@ restart:
|
||||
if (added)
|
||||
queue_work(linfo->workq, &linfo->shrink_work);
|
||||
|
||||
out:
|
||||
ret = min_t(unsigned long, linfo->lru_nr, INT_MAX);
|
||||
trace_scoutfs_lock_shrink_exit(sb, sc->nr_to_scan, ret);
|
||||
return ret;
|
||||
trace_scoutfs_lock_shrink_exit(sb, sc->nr_to_scan, freed);
|
||||
return freed;
|
||||
}
|
||||
|
||||
void scoutfs_free_unused_locks(struct super_block *sb)
|
||||
@@ -1473,7 +1504,7 @@ void scoutfs_free_unused_locks(struct super_block *sb)
|
||||
.nr_to_scan = INT_MAX,
|
||||
};
|
||||
|
||||
linfo->shrinker.shrink(&linfo->shrinker, &sc);
|
||||
lock_scan_objects(KC_SHRINKER_FN(&linfo->shrinker), &sc);
|
||||
}
|
||||
|
||||
static void lock_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent)
|
||||
@@ -1580,7 +1611,7 @@ void scoutfs_lock_shutdown(struct super_block *sb)
|
||||
trace_scoutfs_lock_shutdown(sb, linfo);
|
||||
|
||||
/* stop the shrinker from queueing work */
|
||||
unregister_shrinker(&linfo->shrinker);
|
||||
KC_UNREGISTER_SHRINKER(&linfo->shrinker);
|
||||
flush_work(&linfo->shrink_work);
|
||||
|
||||
/* cause current and future lock calls to return errors */
|
||||
@@ -1699,9 +1730,9 @@ int scoutfs_lock_setup(struct super_block *sb)
|
||||
spin_lock_init(&linfo->lock);
|
||||
linfo->lock_tree = RB_ROOT;
|
||||
linfo->lock_range_tree = RB_ROOT;
|
||||
linfo->shrinker.shrink = scoutfs_lock_shrink;
|
||||
linfo->shrinker.seeks = DEFAULT_SEEKS;
|
||||
register_shrinker(&linfo->shrinker);
|
||||
KC_INIT_SHRINKER_FUNCS(&linfo->shrinker, lock_count_objects,
|
||||
lock_scan_objects);
|
||||
KC_REGISTER_SHRINKER(&linfo->shrinker);
|
||||
INIT_LIST_HEAD(&linfo->lru_list);
|
||||
INIT_WORK(&linfo->inv_work, lock_invalidate_worker);
|
||||
INIT_LIST_HEAD(&linfo->inv_list);
|
||||
|
||||
@@ -86,6 +86,10 @@ int scoutfs_lock_orphan(struct super_block *sb, enum scoutfs_lock_mode mode, int
|
||||
u64 ino, struct scoutfs_lock **lock);
|
||||
int scoutfs_lock_xattr_totl(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock);
|
||||
int scoutfs_lock_xattr_indx(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock);
|
||||
int scoutfs_lock_quota(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock);
|
||||
void scoutfs_unlock(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
enum scoutfs_lock_mode mode);
|
||||
|
||||
|
||||
@@ -549,12 +549,16 @@ static int recvmsg_full(struct socket *sock, void *buf, unsigned len)
|
||||
|
||||
while (len) {
|
||||
memset(&msg, 0, sizeof(msg));
|
||||
msg.msg_iov = (struct iovec *)&kv;
|
||||
msg.msg_iovlen = 1;
|
||||
msg.msg_flags = MSG_NOSIGNAL;
|
||||
kv.iov_base = buf;
|
||||
kv.iov_len = len;
|
||||
|
||||
#ifndef KC_MSGHDR_STRUCT_IOV_ITER
|
||||
msg.msg_iov = (struct iovec *)&kv;
|
||||
msg.msg_iovlen = 1;
|
||||
#else
|
||||
iov_iter_init(&msg.msg_iter, READ, (struct iovec *)&kv, len, 1);
|
||||
#endif
|
||||
ret = kernel_recvmsg(sock, &msg, &kv, 1, len, msg.msg_flags);
|
||||
if (ret <= 0)
|
||||
return -ECONNABORTED;
|
||||
@@ -707,12 +711,16 @@ static int sendmsg_full(struct socket *sock, void *buf, unsigned len)
|
||||
|
||||
while (len) {
|
||||
memset(&msg, 0, sizeof(msg));
|
||||
msg.msg_iov = (struct iovec *)&kv;
|
||||
msg.msg_iovlen = 1;
|
||||
msg.msg_flags = MSG_NOSIGNAL;
|
||||
kv.iov_base = buf;
|
||||
kv.iov_len = len;
|
||||
|
||||
#ifndef KC_MSGHDR_STRUCT_IOV_ITER
|
||||
msg.msg_iov = (struct iovec *)&kv;
|
||||
msg.msg_iovlen = 1;
|
||||
#else
|
||||
iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)&kv, len, 1);
|
||||
#endif
|
||||
ret = kernel_sendmsg(sock, &msg, &kv, 1, len);
|
||||
if (ret <= 0)
|
||||
return -ECONNABORTED;
|
||||
@@ -897,7 +905,6 @@ static int sock_opts_and_names(struct scoutfs_net_connection *conn,
|
||||
struct socket *sock)
|
||||
{
|
||||
struct timeval tv;
|
||||
int addrlen;
|
||||
int optval;
|
||||
int ret;
|
||||
|
||||
@@ -947,23 +954,18 @@ static int sock_opts_and_names(struct scoutfs_net_connection *conn,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
addrlen = sizeof(struct sockaddr_in);
|
||||
ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname,
|
||||
&addrlen);
|
||||
if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
|
||||
ret = -EAFNOSUPPORT;
|
||||
if (ret)
|
||||
ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
addrlen = sizeof(struct sockaddr_in);
|
||||
ret = kernel_getpeername(sock, (struct sockaddr *)&conn->peername,
|
||||
&addrlen);
|
||||
if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
|
||||
ret = -EAFNOSUPPORT;
|
||||
if (ret)
|
||||
ret = kc_kernel_getpeername(sock, (struct sockaddr *)&conn->peername);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
|
||||
conn->last_peername = conn->peername;
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -1052,7 +1054,7 @@ static void scoutfs_net_connect_worker(struct work_struct *work)
|
||||
|
||||
trace_scoutfs_net_connect_work_enter(sb, 0, 0);
|
||||
|
||||
ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
|
||||
ret = kc_sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -1453,7 +1455,7 @@ int scoutfs_net_bind(struct super_block *sb,
|
||||
if (WARN_ON_ONCE(conn->sock))
|
||||
return -EINVAL;
|
||||
|
||||
ret = sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
|
||||
ret = kc_sock_create_kern(AF_INET, SOCK_STREAM, IPPROTO_TCP, &sock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -1471,20 +1473,18 @@ int scoutfs_net_bind(struct super_block *sb,
|
||||
goto out;
|
||||
|
||||
ret = kernel_listen(sock, 255);
|
||||
if (ret)
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
addrlen = sizeof(struct sockaddr_in);
|
||||
ret = kernel_getsockname(sock, (struct sockaddr *)&conn->sockname,
|
||||
&addrlen);
|
||||
if (ret == 0 && addrlen != sizeof(struct sockaddr_in))
|
||||
ret = -EAFNOSUPPORT;
|
||||
if (ret)
|
||||
ret = kc_kernel_getsockname(sock, (struct sockaddr *)&conn->sockname);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
|
||||
conn->sock = sock;
|
||||
*sin = conn->sockname;
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
if (ret < 0 && sock)
|
||||
sock_release(sock);
|
||||
|
||||
@@ -33,6 +33,7 @@ enum {
|
||||
Opt_acl,
|
||||
Opt_data_prealloc_blocks,
|
||||
Opt_data_prealloc_contig_only,
|
||||
Opt_log_merge_wait_timeout_ms,
|
||||
Opt_metadev_path,
|
||||
Opt_noacl,
|
||||
Opt_orphan_scan_delay_ms,
|
||||
@@ -45,6 +46,7 @@ static const match_table_t tokens = {
|
||||
{Opt_acl, "acl"},
|
||||
{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
|
||||
{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
|
||||
{Opt_log_merge_wait_timeout_ms, "log_merge_wait_timeout_ms=%s"},
|
||||
{Opt_metadev_path, "metadev_path=%s"},
|
||||
{Opt_noacl, "noacl"},
|
||||
{Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"},
|
||||
@@ -113,6 +115,10 @@ static void free_options(struct scoutfs_mount_options *opts)
|
||||
kfree(opts->metadev_path);
|
||||
}
|
||||
|
||||
#define MIN_LOG_MERGE_WAIT_TIMEOUT_MS 100UL
|
||||
#define DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS 500
|
||||
#define MAX_LOG_MERGE_WAIT_TIMEOUT_MS (60 * MSEC_PER_SEC)
|
||||
|
||||
#define MIN_ORPHAN_SCAN_DELAY_MS 100UL
|
||||
#define DEFAULT_ORPHAN_SCAN_DELAY_MS (10 * MSEC_PER_SEC)
|
||||
#define MAX_ORPHAN_SCAN_DELAY_MS (60 * MSEC_PER_SEC)
|
||||
@@ -126,11 +132,27 @@ static void init_default_options(struct scoutfs_mount_options *opts)
|
||||
|
||||
opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS;
|
||||
opts->data_prealloc_contig_only = 1;
|
||||
opts->log_merge_wait_timeout_ms = DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS;
|
||||
opts->orphan_scan_delay_ms = -1;
|
||||
opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
|
||||
opts->quorum_slot_nr = -1;
|
||||
}
|
||||
|
||||
static int verify_log_merge_wait_timeout_ms(struct super_block *sb, int ret, int val)
|
||||
{
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "failed to parse log_merge_wait_timeout_ms value");
|
||||
return -EINVAL;
|
||||
}
|
||||
if (val < MIN_LOG_MERGE_WAIT_TIMEOUT_MS || val > MAX_LOG_MERGE_WAIT_TIMEOUT_MS) {
|
||||
scoutfs_err(sb, "invalid log_merge_wait_timeout_ms value %d, must be between %lu and %lu",
|
||||
val, MIN_LOG_MERGE_WAIT_TIMEOUT_MS, MAX_LOG_MERGE_WAIT_TIMEOUT_MS);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int verify_quorum_heartbeat_timeout_ms(struct super_block *sb, int ret, u64 val)
|
||||
{
|
||||
if (ret < 0) {
|
||||
@@ -169,7 +191,7 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
|
||||
switch (token) {
|
||||
|
||||
case Opt_acl:
|
||||
sb->s_flags |= MS_POSIXACL;
|
||||
sb->s_flags |= SB_POSIXACL;
|
||||
break;
|
||||
|
||||
case Opt_data_prealloc_blocks:
|
||||
@@ -196,6 +218,14 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
|
||||
opts->data_prealloc_contig_only = nr;
|
||||
break;
|
||||
|
||||
case Opt_log_merge_wait_timeout_ms:
|
||||
ret = match_int(args, &nr);
|
||||
ret = verify_log_merge_wait_timeout_ms(sb, ret, nr);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
opts->log_merge_wait_timeout_ms = nr;
|
||||
break;
|
||||
|
||||
case Opt_metadev_path:
|
||||
ret = parse_bdev_path(sb, &args[0], &opts->metadev_path);
|
||||
if (ret < 0)
|
||||
@@ -203,7 +233,7 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
|
||||
break;
|
||||
|
||||
case Opt_noacl:
|
||||
sb->s_flags &= ~MS_POSIXACL;
|
||||
sb->s_flags &= ~SB_POSIXACL;
|
||||
break;
|
||||
|
||||
case Opt_orphan_scan_delay_ms:
|
||||
@@ -327,7 +357,7 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
|
||||
{
|
||||
struct super_block *sb = root->d_sb;
|
||||
struct scoutfs_mount_options opts;
|
||||
const bool is_acl = !!(sb->s_flags & MS_POSIXACL);
|
||||
const bool is_acl = !!(sb->s_flags & SB_POSIXACL);
|
||||
|
||||
scoutfs_options_read(sb, &opts);
|
||||
|
||||
@@ -422,6 +452,43 @@ static ssize_t data_prealloc_contig_only_store(struct kobject *kobj, struct kobj
|
||||
}
|
||||
SCOUTFS_ATTR_RW(data_prealloc_contig_only);
|
||||
|
||||
static ssize_t log_merge_wait_timeout_ms_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
struct scoutfs_mount_options opts;
|
||||
|
||||
scoutfs_options_read(sb, &opts);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u", opts.log_merge_wait_timeout_ms);
|
||||
}
|
||||
static ssize_t log_merge_wait_timeout_ms_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
DECLARE_OPTIONS_INFO(sb, optinf);
|
||||
char nullterm[30]; /* more than enough for octal -U64_MAX */
|
||||
int val;
|
||||
int len;
|
||||
int ret;
|
||||
|
||||
len = min(count, sizeof(nullterm) - 1);
|
||||
memcpy(nullterm, buf, len);
|
||||
nullterm[len] = '\0';
|
||||
|
||||
ret = kstrtoint(nullterm, 0, &val);
|
||||
ret = verify_log_merge_wait_timeout_ms(sb, ret, val);
|
||||
if (ret == 0) {
|
||||
write_seqlock(&optinf->seqlock);
|
||||
optinf->opts.log_merge_wait_timeout_ms = val;
|
||||
write_sequnlock(&optinf->seqlock);
|
||||
ret = count;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
SCOUTFS_ATTR_RW(log_merge_wait_timeout_ms);
|
||||
|
||||
static ssize_t metadev_path_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
@@ -525,6 +592,7 @@ SCOUTFS_ATTR_RO(quorum_slot_nr);
|
||||
static struct attribute *options_attrs[] = {
|
||||
SCOUTFS_ATTR_PTR(data_prealloc_blocks),
|
||||
SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
|
||||
SCOUTFS_ATTR_PTR(log_merge_wait_timeout_ms),
|
||||
SCOUTFS_ATTR_PTR(metadev_path),
|
||||
SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
|
||||
SCOUTFS_ATTR_PTR(quorum_heartbeat_timeout_ms),
|
||||
|
||||
@@ -8,6 +8,7 @@
|
||||
struct scoutfs_mount_options {
|
||||
u64 data_prealloc_blocks;
|
||||
bool data_prealloc_contig_only;
|
||||
unsigned int log_merge_wait_timeout_ms;
|
||||
char *metadev_path;
|
||||
unsigned int orphan_scan_delay_ms;
|
||||
int quorum_slot_nr;
|
||||
|
||||
@@ -183,7 +183,7 @@ static int create_socket(struct super_block *sb)
|
||||
int addrlen;
|
||||
int ret;
|
||||
|
||||
ret = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
|
||||
ret = kc_sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock);
|
||||
if (ret) {
|
||||
scoutfs_err(sb, "quorum couldn't create udp socket: %d", ret);
|
||||
goto out;
|
||||
@@ -243,8 +243,10 @@ static int send_msg_members(struct super_block *sb, int type, u64 term, int only
|
||||
};
|
||||
struct sockaddr_in sin;
|
||||
struct msghdr mh = {
|
||||
#ifndef KC_MSGHDR_STRUCT_IOV_ITER
|
||||
.msg_iov = (struct iovec *)&kv,
|
||||
.msg_iovlen = 1,
|
||||
#endif
|
||||
.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL,
|
||||
.msg_name = &sin,
|
||||
.msg_namelen = sizeof(sin),
|
||||
@@ -266,6 +268,9 @@ static int send_msg_members(struct super_block *sb, int type, u64 term, int only
|
||||
|
||||
scoutfs_quorum_slot_sin(&qinf->qconf, i, &sin);
|
||||
now = ktime_get();
|
||||
#ifdef KC_MSGHDR_STRUCT_IOV_ITER
|
||||
iov_iter_init(&mh.msg_iter, WRITE, (struct iovec *)&kv, sizeof(qmes), 1);
|
||||
#endif
|
||||
ret = kernel_sendmsg(qinf->sock, &mh, &kv, 1, kv.iov_len);
|
||||
if (ret != kv.iov_len)
|
||||
failed++;
|
||||
@@ -308,8 +313,10 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
|
||||
.iov_len = sizeof(struct scoutfs_quorum_message),
|
||||
};
|
||||
struct msghdr mh = {
|
||||
#ifndef KC_MSGHDR_STRUCT_IOV_ITER
|
||||
.msg_iov = (struct iovec *)&kv,
|
||||
.msg_iovlen = 1,
|
||||
#endif
|
||||
.msg_flags = MSG_NOSIGNAL,
|
||||
};
|
||||
|
||||
@@ -331,6 +338,9 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifdef KC_MSGHDR_STRUCT_IOV_ITER
|
||||
iov_iter_init(&mh.msg_iter, READ, (struct iovec *)&kv, sizeof(struct scoutfs_quorum_message), 1);
|
||||
#endif
|
||||
ret = kernel_recvmsg(qinf->sock, &mh, &kv, 1, kv.iov_len, mh.msg_flags);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
@@ -719,11 +729,13 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
struct sockaddr_in unused;
|
||||
struct quorum_host_msg msg;
|
||||
struct quorum_status qst = {0,};
|
||||
struct hb_recording hbr = {{0,},};
|
||||
struct hb_recording hbr;
|
||||
bool record_hb;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
memset(&hbr, 0, sizeof(struct hb_recording));
|
||||
|
||||
/* recording votes from slots as native single word bitmap */
|
||||
BUILD_BUG_ON(SCOUTFS_QUORUM_MAX_SLOTS > BITS_PER_LONG);
|
||||
|
||||
@@ -771,8 +783,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
msg.type = SCOUTFS_QUORUM_MSG_INVALID;
|
||||
|
||||
trace_scoutfs_quorum_loop(sb, qst.role, qst.term, qst.vote_for,
|
||||
qst.vote_bits,
|
||||
ktime_to_timespec64(qst.timeout));
|
||||
qst.vote_bits, ktime_to_ns(qst.timeout));
|
||||
|
||||
/* receiving greater terms resets term, becomes follower */
|
||||
if (msg.type != SCOUTFS_QUORUM_MSG_INVALID &&
|
||||
|
||||
1238
kmod/src/quota.c
Normal file
1238
kmod/src/quota.c
Normal file
File diff suppressed because it is too large
Load Diff
48
kmod/src/quota.h
Normal file
48
kmod/src/quota.h
Normal file
@@ -0,0 +1,48 @@
|
||||
#ifndef _SCOUTFS_QUOTA_H_
|
||||
#define _SCOUTFS_QUOTA_H_
|
||||
|
||||
#include "ioctl.h"
|
||||
|
||||
/*
|
||||
* Each rule's name can be in the ruleset's rbtree associated with the
|
||||
* source attr that it selects. This lets checks only test rules that
|
||||
* the inputs could match. The 'i' field indicates which name is in the
|
||||
* tree so we can find the containing rule.
|
||||
*
|
||||
* This is mostly private to quota.c but we expose it for tracing.
|
||||
*/
|
||||
struct squota_rule {
|
||||
u64 limit;
|
||||
u8 prio;
|
||||
u8 op;
|
||||
u8 rule_flags;
|
||||
struct squota_rule_name {
|
||||
struct rb_node node;
|
||||
u64 val;
|
||||
u8 source;
|
||||
u8 flags;
|
||||
u8 i;
|
||||
} names[3];
|
||||
};
|
||||
|
||||
/* private to quota.c, only here for tracing */
|
||||
struct squota_input {
|
||||
u64 attrs[SQ_NS__NR_SELECT];
|
||||
u8 op;
|
||||
};
|
||||
|
||||
int scoutfs_quota_check_inode(struct super_block *sb, struct inode *dir);
|
||||
int scoutfs_quota_check_data(struct super_block *sb, struct inode *inode);
|
||||
|
||||
int scoutfs_quota_get_rules(struct super_block *sb, u64 *iterator,
|
||||
struct scoutfs_ioctl_quota_rule *irules, int nr);
|
||||
int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
|
||||
struct scoutfs_ioctl_quota_rule *irule);
|
||||
|
||||
void scoutfs_quota_get_lock_range(struct scoutfs_key *start, struct scoutfs_key *end);
|
||||
void scoutfs_quota_invalidate(struct super_block *sb);
|
||||
|
||||
int scoutfs_quota_setup(struct super_block *sb);
|
||||
void scoutfs_quota_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
@@ -37,6 +37,10 @@
|
||||
#include "net.h"
|
||||
#include "data.h"
|
||||
#include "ext.h"
|
||||
#include "quota.h"
|
||||
|
||||
#include "trace/quota.h"
|
||||
#include "trace/wkic.h"
|
||||
|
||||
struct lock_info;
|
||||
|
||||
@@ -439,6 +443,7 @@ DECLARE_EVENT_CLASS(scoutfs_trans_hold_release_class,
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->journal_info = (unsigned long)journal_info;
|
||||
__entry->holders = holders;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" journal_info 0x%0lx holders %d ret %d",
|
||||
@@ -1746,21 +1751,41 @@ TRACE_EVENT(scoutfs_btree_merge,
|
||||
sk_trace_args(end))
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_btree_merge_read_range,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
int size),
|
||||
|
||||
TP_ARGS(sb, start, end, size),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
__field(int, size)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
__entry->size = size;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" start "SK_FMT" end "SK_FMT" size %d",
|
||||
SCSB_TRACE_ARGS, sk_trace_args(start), sk_trace_args(end), __entry->size)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_btree_merge_items,
|
||||
TP_PROTO(struct super_block *sb,
|
||||
struct scoutfs_btree_root *m_root,
|
||||
struct scoutfs_key *m_key, int m_val_len,
|
||||
struct scoutfs_btree_root *f_root,
|
||||
struct scoutfs_key *f_key, int f_val_len,
|
||||
int is_del),
|
||||
|
||||
TP_ARGS(sb, m_root, m_key, m_val_len, f_root, f_key, f_val_len, is_del),
|
||||
TP_ARGS(sb, m_key, m_val_len, f_root, f_key, f_val_len, is_del),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, m_root_blkno)
|
||||
__field(__u64, m_root_seq)
|
||||
__field(__u8, m_root_height)
|
||||
sk_trace_define(m_key)
|
||||
__field(int, m_val_len)
|
||||
__field(__u64, f_root_blkno)
|
||||
@@ -1773,10 +1798,6 @@ TRACE_EVENT(scoutfs_btree_merge_items,
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->m_root_blkno = m_root ?
|
||||
le64_to_cpu(m_root->ref.blkno) : 0;
|
||||
__entry->m_root_seq = m_root ? le64_to_cpu(m_root->ref.seq) : 0;
|
||||
__entry->m_root_height = m_root ? m_root->height : 0;
|
||||
sk_trace_assign(m_key, m_key);
|
||||
__entry->m_val_len = m_val_len;
|
||||
__entry->f_root_blkno = f_root ?
|
||||
@@ -1788,11 +1809,9 @@ TRACE_EVENT(scoutfs_btree_merge_items,
|
||||
__entry->is_del = !!is_del;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" merge item root blkno %llu seq %llu height %u key "SK_FMT" val_len %d, fs item root blkno %llu seq %llu height %u key "SK_FMT" val_len %d, is_del %d",
|
||||
SCSB_TRACE_ARGS, __entry->m_root_blkno, __entry->m_root_seq,
|
||||
__entry->m_root_height, sk_trace_args(m_key),
|
||||
__entry->m_val_len, __entry->f_root_blkno,
|
||||
__entry->f_root_seq, __entry->f_root_height,
|
||||
TP_printk(SCSBF" merge item key "SK_FMT" val_len %d, fs item root blkno %llu seq %llu height %u key "SK_FMT" val_len %d, is_del %d",
|
||||
SCSB_TRACE_ARGS, sk_trace_args(m_key), __entry->m_val_len,
|
||||
__entry->f_root_blkno, __entry->f_root_seq, __entry->f_root_height,
|
||||
sk_trace_args(f_key), __entry->f_val_len, __entry->is_del)
|
||||
);
|
||||
|
||||
@@ -2024,9 +2043,9 @@ DEFINE_EVENT(scoutfs_quorum_message_class, scoutfs_quorum_recv_message,
|
||||
|
||||
TRACE_EVENT(scoutfs_quorum_loop,
|
||||
TP_PROTO(struct super_block *sb, int role, u64 term, int vote_for,
|
||||
unsigned long vote_bits, struct timespec64 timeout),
|
||||
unsigned long vote_bits, unsigned long long nsecs),
|
||||
|
||||
TP_ARGS(sb, role, term, vote_for, vote_bits, timeout),
|
||||
TP_ARGS(sb, role, term, vote_for, vote_bits, nsecs),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
@@ -2035,8 +2054,7 @@ TRACE_EVENT(scoutfs_quorum_loop,
|
||||
__field(int, vote_for)
|
||||
__field(unsigned long, vote_bits)
|
||||
__field(unsigned long, vote_count)
|
||||
__field(unsigned long long, timeout_sec)
|
||||
__field(int, timeout_nsec)
|
||||
__field(unsigned long long, nsecs)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@@ -2046,14 +2064,13 @@ TRACE_EVENT(scoutfs_quorum_loop,
|
||||
__entry->vote_for = vote_for;
|
||||
__entry->vote_bits = vote_bits;
|
||||
__entry->vote_count = hweight_long(vote_bits);
|
||||
__entry->timeout_sec = timeout.tv_sec;
|
||||
__entry->timeout_nsec = timeout.tv_nsec;
|
||||
__entry->nsecs = nsecs;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" term %llu role %d vote_for %d vote_bits 0x%lx vote_count %lu timeout %llu.%u",
|
||||
TP_printk(SCSBF" term %llu role %d vote_for %d vote_bits 0x%lx vote_count %lu timeout %llu",
|
||||
SCSB_TRACE_ARGS, __entry->term, __entry->role,
|
||||
__entry->vote_for, __entry->vote_bits, __entry->vote_count,
|
||||
__entry->timeout_sec, __entry->timeout_nsec)
|
||||
__entry->nsecs)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_trans_seq_last,
|
||||
@@ -2077,6 +2094,71 @@ TRACE_EVENT(scoutfs_trans_seq_last,
|
||||
SCSB_TRACE_ARGS, __entry->s_rid, __entry->trans_seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_server_finalize_items,
|
||||
TP_PROTO(struct super_block *sb, u64 rid, u64 item_rid, u64 item_nr, u64 item_flags,
|
||||
u64 item_get_trans_seq),
|
||||
|
||||
TP_ARGS(sb, rid, item_rid, item_nr, item_flags, item_get_trans_seq),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, c_rid)
|
||||
__field(__u64, item_rid)
|
||||
__field(__u64, item_nr)
|
||||
__field(__u64, item_flags)
|
||||
__field(__u64, item_get_trans_seq)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->c_rid = rid;
|
||||
__entry->item_rid = item_rid;
|
||||
__entry->item_nr = item_nr;
|
||||
__entry->item_flags = item_flags;
|
||||
__entry->item_get_trans_seq = item_get_trans_seq;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" rid %016llx item_rid %016llx item_nr %llu item_flags 0x%llx item_get_trans_seq %llu",
|
||||
SCSB_TRACE_ARGS, __entry->c_rid, __entry->item_rid, __entry->item_nr,
|
||||
__entry->item_flags, __entry->item_get_trans_seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_server_finalize_decision,
|
||||
TP_PROTO(struct super_block *sb, u64 rid, bool saw_finalized, bool others_active,
|
||||
bool ours_visible, bool finalize_ours, unsigned int delay_ms,
|
||||
u64 finalize_sent_seq),
|
||||
|
||||
TP_ARGS(sb, rid, saw_finalized, others_active, ours_visible, finalize_ours, delay_ms,
|
||||
finalize_sent_seq),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, c_rid)
|
||||
__field(bool, saw_finalized)
|
||||
__field(bool, others_active)
|
||||
__field(bool, ours_visible)
|
||||
__field(bool, finalize_ours)
|
||||
__field(unsigned int, delay_ms)
|
||||
__field(__u64, finalize_sent_seq)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->c_rid = rid;
|
||||
__entry->saw_finalized = saw_finalized;
|
||||
__entry->others_active = others_active;
|
||||
__entry->ours_visible = ours_visible;
|
||||
__entry->finalize_ours = finalize_ours;
|
||||
__entry->delay_ms = delay_ms;
|
||||
__entry->finalize_sent_seq = finalize_sent_seq;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" rid %016llx saw_finalized %u others_active %u ours_visible %u finalize_ours %u delay_ms %u finalize_sent_seq %llu",
|
||||
SCSB_TRACE_ARGS, __entry->c_rid, __entry->saw_finalized, __entry->others_active,
|
||||
__entry->ours_visible, __entry->finalize_ours, __entry->delay_ms,
|
||||
__entry->finalize_sent_seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_get_log_merge_status,
|
||||
TP_PROTO(struct super_block *sb, u64 rid, struct scoutfs_key *next_range_key,
|
||||
u64 nr_requests, u64 nr_complete, u64 seq),
|
||||
@@ -2801,6 +2883,81 @@ TRACE_EVENT(scoutfs_omap_should_delete,
|
||||
SCSB_TRACE_ARGS, __entry->ino, __entry->nlink, __entry->ret)
|
||||
);
|
||||
|
||||
#define SSCF_FMT "[bo %llu bs %llu es %llu]"
|
||||
#define SSCF_FIELDS(pref) \
|
||||
__field(__u64, pref##_blkno) \
|
||||
__field(__u64, pref##_blocks) \
|
||||
__field(__u64, pref##_entries)
|
||||
#define SSCF_ASSIGN(pref, sfl) \
|
||||
__entry->pref##_blkno = le64_to_cpu((sfl)->ref.blkno); \
|
||||
__entry->pref##_blocks = le64_to_cpu((sfl)->blocks); \
|
||||
__entry->pref##_entries = le64_to_cpu((sfl)->entries);
|
||||
#define SSCF_ENTRY_ARGS(pref) \
|
||||
__entry->pref##_blkno, \
|
||||
__entry->pref##_blocks, \
|
||||
__entry->pref##_entries
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_srch_compact_class,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_srch_compact *sc),
|
||||
|
||||
TP_ARGS(sb, sc),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, id)
|
||||
__field(__u8, nr)
|
||||
__field(__u8, flags)
|
||||
SSCF_FIELDS(out)
|
||||
__field(__u64, in0_blk)
|
||||
__field(__u64, in0_pos)
|
||||
SSCF_FIELDS(in0)
|
||||
__field(__u64, in1_blk)
|
||||
__field(__u64, in1_pos)
|
||||
SSCF_FIELDS(in1)
|
||||
__field(__u64, in2_blk)
|
||||
__field(__u64, in2_pos)
|
||||
SSCF_FIELDS(in2)
|
||||
__field(__u64, in3_blk)
|
||||
__field(__u64, in3_pos)
|
||||
SSCF_FIELDS(in3)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->id = le64_to_cpu(sc->id);
|
||||
__entry->nr = sc->nr;
|
||||
__entry->flags = sc->flags;
|
||||
SSCF_ASSIGN(out, &sc->out)
|
||||
__entry->in0_blk = le64_to_cpu(sc->in[0].blk);
|
||||
__entry->in0_pos = le64_to_cpu(sc->in[0].pos);
|
||||
SSCF_ASSIGN(in0, &sc->in[0].sfl)
|
||||
__entry->in1_blk = le64_to_cpu(sc->in[0].blk);
|
||||
__entry->in1_pos = le64_to_cpu(sc->in[0].pos);
|
||||
SSCF_ASSIGN(in1, &sc->in[1].sfl)
|
||||
__entry->in2_blk = le64_to_cpu(sc->in[0].blk);
|
||||
__entry->in2_pos = le64_to_cpu(sc->in[0].pos);
|
||||
SSCF_ASSIGN(in2, &sc->in[2].sfl)
|
||||
__entry->in3_blk = le64_to_cpu(sc->in[0].blk);
|
||||
__entry->in3_pos = le64_to_cpu(sc->in[0].pos);
|
||||
SSCF_ASSIGN(in3, &sc->in[3].sfl)
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" id %llu nr %u flags 0x%x out "SSCF_FMT" in0 b %llu p %llu "SSCF_FMT" in1 b %llu p %llu "SSCF_FMT" in2 b %llu p %llu "SSCF_FMT" in3 b %llu p %llu "SSCF_FMT,
|
||||
SCSB_TRACE_ARGS, __entry->id, __entry->nr, __entry->flags, SSCF_ENTRY_ARGS(out),
|
||||
__entry->in0_blk, __entry->in0_pos, SSCF_ENTRY_ARGS(in0),
|
||||
__entry->in1_blk, __entry->in1_pos, SSCF_ENTRY_ARGS(in1),
|
||||
__entry->in2_blk, __entry->in2_pos, SSCF_ENTRY_ARGS(in2),
|
||||
__entry->in3_blk, __entry->in3_pos, SSCF_ENTRY_ARGS(in3))
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_srch_compact_class, scoutfs_srch_compact_client_send,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_srch_compact *sc),
|
||||
TP_ARGS(sb, sc)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_srch_compact_class, scoutfs_srch_compact_client_recv,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_srch_compact *sc),
|
||||
TP_ARGS(sb, sc)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_SCOUTFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
||||
@@ -91,6 +91,7 @@ do { \
|
||||
struct server_info {
|
||||
struct super_block *sb;
|
||||
spinlock_t lock;
|
||||
seqlock_t seqlock;
|
||||
wait_queue_head_t waitq;
|
||||
|
||||
struct workqueue_struct *wq;
|
||||
@@ -132,11 +133,9 @@ struct server_info {
|
||||
struct mutex mounted_clients_mutex;
|
||||
|
||||
/* stable super stored from commits, given in locks and rpcs */
|
||||
seqcount_t stable_seqcount;
|
||||
struct scoutfs_super_block stable_super;
|
||||
|
||||
/* serializing and get and set volume options */
|
||||
seqcount_t volopt_seqcount;
|
||||
struct mutex volopt_mutex;
|
||||
struct scoutfs_volume_options volopt;
|
||||
|
||||
@@ -149,6 +148,8 @@ struct server_info {
|
||||
struct scoutfs_quorum_config qconf;
|
||||
/* a running server maintains a private dirty super */
|
||||
struct scoutfs_super_block dirty_super;
|
||||
|
||||
u64 finalize_sent_seq;
|
||||
};
|
||||
|
||||
#define DECLARE_SERVER_INFO(sb, name) \
|
||||
@@ -182,7 +183,7 @@ static bool get_volopt_val(struct server_info *server, int nr, u64 *val)
|
||||
unsigned seq;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&server->volopt_seqcount);
|
||||
seq = read_seqbegin(&server->seqlock);
|
||||
if ((le64_to_cpu(server->volopt.set_bits) & bit)) {
|
||||
is_set = true;
|
||||
*val = le64_to_cpup(opt);
|
||||
@@ -190,7 +191,7 @@ static bool get_volopt_val(struct server_info *server, int nr, u64 *val)
|
||||
is_set = false;
|
||||
*val = 0;
|
||||
};
|
||||
} while (read_seqcount_retry(&server->volopt_seqcount, seq));
|
||||
} while (read_seqretry(&server->seqlock, seq));
|
||||
|
||||
return is_set;
|
||||
}
|
||||
@@ -414,6 +415,27 @@ static void server_hold_commit(struct super_block *sb, struct commit_hold *hold)
|
||||
wait_event(cusers->waitq, hold_commit(sb, server, cusers, hold));
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the higher of the avail or freed used by the active commit
|
||||
* since this holder joined the commit. This is *not* the amount used
|
||||
* by the holder, we don't track per-holder alloc use.
|
||||
*/
|
||||
static u32 server_hold_alloc_used_since(struct super_block *sb, struct commit_hold *hold)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
u32 avail_used;
|
||||
u32 freed_used;
|
||||
u32 avail_now;
|
||||
u32 freed_now;
|
||||
|
||||
scoutfs_alloc_meta_remaining(&server->alloc, &avail_now, &freed_now);
|
||||
|
||||
avail_used = hold->avail - avail_now;
|
||||
freed_used = hold->freed - freed_now;
|
||||
|
||||
return max(avail_used, freed_used);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called while holding the commit and returns once the commit
|
||||
* is successfully written. Many holders can all wait for all holders
|
||||
@@ -506,7 +528,7 @@ static void get_stable(struct super_block *sb, struct scoutfs_super_block *super
|
||||
unsigned int seq;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&server->stable_seqcount);
|
||||
seq = read_seqbegin(&server->seqlock);
|
||||
if (super)
|
||||
*super = server->stable_super;
|
||||
if (roots) {
|
||||
@@ -514,7 +536,7 @@ static void get_stable(struct super_block *sb, struct scoutfs_super_block *super
|
||||
roots->logs_root = server->stable_super.logs_root;
|
||||
roots->srch_root = server->stable_super.srch_root;
|
||||
}
|
||||
} while (read_seqcount_retry(&server->stable_seqcount, seq));
|
||||
} while (read_seqretry(&server->seqlock, seq));
|
||||
}
|
||||
|
||||
u64 scoutfs_server_seq(struct super_block *sb)
|
||||
@@ -548,11 +570,9 @@ void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq)
|
||||
|
||||
static void set_stable_super(struct server_info *server, struct scoutfs_super_block *super)
|
||||
{
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&server->stable_seqcount);
|
||||
write_seqlock(&server->seqlock);
|
||||
server->stable_super = *super;
|
||||
write_seqcount_end(&server->stable_seqcount);
|
||||
preempt_enable();
|
||||
write_sequnlock(&server->seqlock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -941,22 +961,24 @@ static int find_log_trees_item(struct super_block *sb,
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the next log_trees item from the key. Fills the caller's log_trees and sets
|
||||
* the key past the returned log_trees for iteration. Returns 0 when done, > 0 for each
|
||||
* item, and -errno on fatal errors.
|
||||
* Find the log_trees item with the greatest nr for each rid. Fills the
|
||||
* caller's log_trees and sets the key before the returned log_trees for
|
||||
* the next iteration. Returns 0 when done, > 0 for each item, and
|
||||
* -errno on fatal errors.
|
||||
*/
|
||||
static int for_each_lt(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key, struct scoutfs_log_trees *lt)
|
||||
static int for_each_rid_last_lt(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key, struct scoutfs_log_trees *lt)
|
||||
{
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_btree_next(sb, root, key, &iref);
|
||||
ret = scoutfs_btree_prev(sb, root, key, &iref);
|
||||
if (ret == 0) {
|
||||
if (iref.val_len == sizeof(struct scoutfs_log_trees)) {
|
||||
memcpy(lt, iref.val, iref.val_len);
|
||||
*key = *iref.key;
|
||||
scoutfs_key_inc(key);
|
||||
key->sklt_nr = 0;
|
||||
scoutfs_key_dec(key);
|
||||
ret = 1;
|
||||
} else {
|
||||
ret = -EIO;
|
||||
@@ -1051,21 +1073,13 @@ static int next_log_merge_item(struct super_block *sb,
|
||||
* abandoned log btree finalized. If it takes too long each client has
|
||||
* a change to make forward progress before being asked to commit again.
|
||||
*
|
||||
* We're waiting on heavy state that is protected by mutexes and
|
||||
* transaction machinery. It's tricky to recreate that state for
|
||||
* lightweight condition tests that don't change task state. Instead of
|
||||
* trying to get that right, particularly as we unwind after success or
|
||||
* after timeouts, waiters use an unsatisfying poll. Short enough to
|
||||
* not add terrible latency, given how heavy and infrequent this already
|
||||
* is, and long enough to not melt the cpu. This could be tuned if it
|
||||
* becomes a problem.
|
||||
*
|
||||
* This can end up finalizing a new empty log btree if a new mount
|
||||
* happens to arrive at just the right time. That's fine, merging will
|
||||
* ignore and tear down the empty input.
|
||||
*/
|
||||
#define FINALIZE_POLL_MS (11)
|
||||
#define FINALIZE_TIMEOUT_MS (MSEC_PER_SEC / 2)
|
||||
#define FINALIZE_POLL_MIN_DELAY_MS 5U
|
||||
#define FINALIZE_POLL_MAX_DELAY_MS 100U
|
||||
#define FINALIZE_POLL_DELAY_GROWTH_PCT 150U
|
||||
static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_log_trees *lt,
|
||||
u64 rid, struct commit_hold *hold)
|
||||
{
|
||||
@@ -1073,8 +1087,10 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_log_merge_status stat;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
struct scoutfs_mount_options opts;
|
||||
struct scoutfs_log_trees each_lt;
|
||||
struct scoutfs_log_trees fin;
|
||||
unsigned int delay_ms;
|
||||
unsigned long timeo;
|
||||
bool saw_finalized;
|
||||
bool others_active;
|
||||
@@ -1082,10 +1098,14 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
bool ours_visible;
|
||||
struct scoutfs_key key;
|
||||
char *err_str = NULL;
|
||||
ktime_t start;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
timeo = jiffies + msecs_to_jiffies(FINALIZE_TIMEOUT_MS);
|
||||
scoutfs_options_read(sb, &opts);
|
||||
timeo = jiffies + msecs_to_jiffies(opts.log_merge_wait_timeout_ms);
|
||||
delay_ms = FINALIZE_POLL_MIN_DELAY_MS;
|
||||
start = ktime_get_raw();
|
||||
|
||||
for (;;) {
|
||||
/* nothing to do if there's already a merge in flight */
|
||||
@@ -1102,8 +1122,13 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
saw_finalized = false;
|
||||
others_active = false;
|
||||
ours_visible = false;
|
||||
scoutfs_key_init_log_trees(&key, 0, 0);
|
||||
while ((ret = for_each_lt(sb, &super->logs_root, &key, &each_lt)) > 0) {
|
||||
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
|
||||
while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, &each_lt)) > 0) {
|
||||
|
||||
trace_scoutfs_server_finalize_items(sb, rid, le64_to_cpu(each_lt.rid),
|
||||
le64_to_cpu(each_lt.nr),
|
||||
le64_to_cpu(each_lt.flags),
|
||||
le64_to_cpu(each_lt.get_trans_seq));
|
||||
|
||||
if ((le64_to_cpu(each_lt.flags) & SCOUTFS_LOG_TREES_FINALIZED))
|
||||
saw_finalized = true;
|
||||
@@ -1128,6 +1153,10 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
finalize_ours = (lt->item_root.height > 2) ||
|
||||
(le32_to_cpu(lt->meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW);
|
||||
|
||||
trace_scoutfs_server_finalize_decision(sb, rid, saw_finalized, others_active,
|
||||
ours_visible, finalize_ours, delay_ms,
|
||||
server->finalize_sent_seq);
|
||||
|
||||
/* done if we're not finalizing and there's no finalized */
|
||||
if (!finalize_ours && !saw_finalized) {
|
||||
ret = 0;
|
||||
@@ -1135,12 +1164,13 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
}
|
||||
|
||||
/* send sync requests soon to give time to commit */
|
||||
scoutfs_key_init_log_trees(&key, 0, 0);
|
||||
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
|
||||
while (others_active &&
|
||||
(ret = for_each_lt(sb, &super->logs_root, &key, &each_lt)) > 0) {
|
||||
(ret = for_each_rid_last_lt(sb, &super->logs_root, &key, &each_lt)) > 0) {
|
||||
|
||||
if ((le64_to_cpu(each_lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) ||
|
||||
(le64_to_cpu(each_lt.rid) == rid))
|
||||
(le64_to_cpu(each_lt.rid) == rid) ||
|
||||
(le64_to_cpu(each_lt.get_trans_seq) <= server->finalize_sent_seq))
|
||||
continue;
|
||||
|
||||
ret = scoutfs_net_submit_request_node(sb, server->conn,
|
||||
@@ -1160,6 +1190,8 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
break;
|
||||
}
|
||||
|
||||
server->finalize_sent_seq = scoutfs_server_seq(sb);
|
||||
|
||||
/* Finalize ours if it's visible to others */
|
||||
if (ours_visible) {
|
||||
fin = *lt;
|
||||
@@ -1197,13 +1229,16 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
if (ret < 0)
|
||||
err_str = "applying commit before waiting for finalized";
|
||||
|
||||
msleep(FINALIZE_POLL_MS);
|
||||
msleep(delay_ms);
|
||||
delay_ms = min(delay_ms * FINALIZE_POLL_DELAY_GROWTH_PCT / 100,
|
||||
FINALIZE_POLL_MAX_DELAY_MS);
|
||||
|
||||
server_hold_commit(sb, hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
|
||||
/* done if we timed out */
|
||||
if (time_after(jiffies, timeo)) {
|
||||
scoutfs_inc_counter(sb, log_merge_wait_timeout);
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
@@ -1786,43 +1821,29 @@ out:
|
||||
* Give the caller the last seq before outstanding client commits. All
|
||||
* seqs up to and including this are stable, new client transactions can
|
||||
* only have greater seqs.
|
||||
*
|
||||
* For each rid, only its greatest log trees nr can be an open commit.
|
||||
* We look at the last log_trees item for each client rid and record its
|
||||
* trans seq if it hasn't been committed.
|
||||
*/
|
||||
static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret)
|
||||
{
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_log_trees *lt;
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_key key;
|
||||
u64 last_seq = 0;
|
||||
int ret;
|
||||
|
||||
last_seq = scoutfs_server_seq(sb) - 1;
|
||||
scoutfs_key_init_log_trees(&key, 0, 0);
|
||||
|
||||
mutex_lock(&server->logs_mutex);
|
||||
|
||||
for (;; scoutfs_key_inc(&key)) {
|
||||
ret = scoutfs_btree_next(sb, &super->logs_root, &key, &iref);
|
||||
if (ret == 0) {
|
||||
if (iref.val_len == sizeof(*lt)) {
|
||||
lt = iref.val;
|
||||
if ((le64_to_cpu(lt->get_trans_seq) >
|
||||
le64_to_cpu(lt->commit_trans_seq)) &&
|
||||
le64_to_cpu(lt->get_trans_seq) <= last_seq) {
|
||||
last_seq = le64_to_cpu(lt->get_trans_seq) - 1;
|
||||
}
|
||||
key = *iref.key;
|
||||
} else {
|
||||
ret = -EIO;
|
||||
}
|
||||
scoutfs_btree_put_iref(&iref);
|
||||
}
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
|
||||
while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, <)) > 0) {
|
||||
if ((le64_to_cpu(lt.get_trans_seq) > le64_to_cpu(lt.commit_trans_seq)) &&
|
||||
le64_to_cpu(lt.get_trans_seq) <= last_seq) {
|
||||
last_seq = le64_to_cpu(lt.get_trans_seq) - 1;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1969,9 +1990,7 @@ static int server_srch_get_compact(struct super_block *sb,
|
||||
ret = scoutfs_srch_get_compact(sb, &server->alloc, &server->wri,
|
||||
&super->srch_root, rid, sc);
|
||||
mutex_unlock(&server->srch_mutex);
|
||||
if (ret == 0 && sc->nr == 0)
|
||||
ret = -ENOENT;
|
||||
if (ret < 0)
|
||||
if (ret < 0 || (ret == 0 && sc->nr == 0))
|
||||
goto apply;
|
||||
|
||||
mutex_lock(&server->alloc_mutex);
|
||||
@@ -2476,9 +2495,11 @@ static void server_log_merge_free_work(struct work_struct *work)
|
||||
|
||||
while (!server_is_stopping(server)) {
|
||||
|
||||
server_hold_commit(sb, &hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
commit = true;
|
||||
if (!commit) {
|
||||
server_hold_commit(sb, &hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
commit = true;
|
||||
}
|
||||
|
||||
ret = next_log_merge_item(sb, &super->log_merge,
|
||||
SCOUTFS_LOG_MERGE_FREEING_ZONE,
|
||||
@@ -2525,12 +2546,14 @@ static void server_log_merge_free_work(struct work_struct *work)
|
||||
/* freed blocks are in allocator, we *have* to update fr */
|
||||
BUG_ON(ret < 0);
|
||||
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
commit = false;
|
||||
if (ret < 0) {
|
||||
err_str = "looping commit del/upd freeing item";
|
||||
break;
|
||||
if (server_hold_alloc_used_since(sb, &hold) >= COMMIT_HOLD_ALLOC_BUDGET / 2) {
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
commit = false;
|
||||
if (ret < 0) {
|
||||
err_str = "looping commit del/upd freeing item";
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3073,9 +3096,9 @@ static int server_get_volopt(struct super_block *sb, struct scoutfs_net_connecti
|
||||
}
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&server->volopt_seqcount);
|
||||
seq = read_seqbegin(&server->seqlock);
|
||||
volopt = server->volopt;
|
||||
} while (read_seqcount_retry(&server->volopt_seqcount, seq));
|
||||
} while (read_seqretry(&server->seqlock, seq));
|
||||
|
||||
out:
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, &volopt, sizeof(volopt));
|
||||
@@ -3144,12 +3167,12 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti
|
||||
apply:
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
|
||||
write_seqcount_begin(&server->volopt_seqcount);
|
||||
write_seqlock(&server->seqlock);
|
||||
if (ret == 0)
|
||||
server->volopt = super->volopt;
|
||||
else
|
||||
super->volopt = server->volopt;
|
||||
write_seqcount_end(&server->volopt_seqcount);
|
||||
write_sequnlock(&server->seqlock);
|
||||
|
||||
mutex_unlock(&server->volopt_mutex);
|
||||
out:
|
||||
@@ -3192,12 +3215,12 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec
|
||||
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
|
||||
write_seqcount_begin(&server->volopt_seqcount);
|
||||
write_seqlock(&server->seqlock);
|
||||
if (ret == 0)
|
||||
server->volopt = super->volopt;
|
||||
else
|
||||
super->volopt = server->volopt;
|
||||
write_seqcount_end(&server->volopt_seqcount);
|
||||
write_sequnlock(&server->seqlock);
|
||||
|
||||
mutex_unlock(&server->volopt_mutex);
|
||||
out:
|
||||
@@ -4303,6 +4326,7 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
scoutfs_info(sb, "server starting at "SIN_FMT, SIN_ARG(&sin));
|
||||
|
||||
scoutfs_block_writer_init(sb, &server->wri);
|
||||
server->finalize_sent_seq = 0;
|
||||
|
||||
/* first make sure no other servers are still running */
|
||||
ret = scoutfs_quorum_fence_leaders(sb, &server->qconf, server->term);
|
||||
@@ -4336,9 +4360,9 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
}
|
||||
|
||||
/* update volume options early, possibly for use during startup */
|
||||
write_seqcount_begin(&server->volopt_seqcount);
|
||||
write_seqlock(&server->seqlock);
|
||||
server->volopt = super->volopt;
|
||||
write_seqcount_end(&server->volopt_seqcount);
|
||||
write_sequnlock(&server->seqlock);
|
||||
|
||||
atomic64_set(&server->seq_atomic, le64_to_cpu(super->seq));
|
||||
set_stable_super(server, super);
|
||||
@@ -4464,7 +4488,7 @@ void scoutfs_server_stop_wait(struct super_block *sb)
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
stop_server(server);
|
||||
flush_work_sync(&server->work);
|
||||
flush_work(&server->work);
|
||||
}
|
||||
|
||||
int scoutfs_server_setup(struct super_block *sb)
|
||||
@@ -4478,6 +4502,7 @@ int scoutfs_server_setup(struct super_block *sb)
|
||||
|
||||
server->sb = sb;
|
||||
spin_lock_init(&server->lock);
|
||||
seqlock_init(&server->seqlock);
|
||||
init_waitqueue_head(&server->waitq);
|
||||
INIT_WORK(&server->work, scoutfs_server_worker);
|
||||
server->status = SERVER_DOWN;
|
||||
@@ -4492,8 +4517,6 @@ int scoutfs_server_setup(struct super_block *sb)
|
||||
INIT_WORK(&server->log_merge_free_work, server_log_merge_free_work);
|
||||
mutex_init(&server->srch_mutex);
|
||||
mutex_init(&server->mounted_clients_mutex);
|
||||
seqcount_init(&server->stable_seqcount);
|
||||
seqcount_init(&server->volopt_seqcount);
|
||||
mutex_init(&server->volopt_mutex);
|
||||
INIT_WORK(&server->fence_pending_recov_work, fence_pending_recov_worker);
|
||||
INIT_DELAYED_WORK(&server->reclaim_dwork, reclaim_worker);
|
||||
|
||||
211
kmod/src/srch.c
211
kmod/src/srch.c
@@ -30,6 +30,9 @@
|
||||
#include "client.h"
|
||||
#include "counters.h"
|
||||
#include "scoutfs_trace.h"
|
||||
#include "triggers.h"
|
||||
#include "sysfs.h"
|
||||
#include "msg.h"
|
||||
|
||||
/*
|
||||
* This srch subsystem gives us a way to find inodes that have a given
|
||||
@@ -68,10 +71,14 @@ struct srch_info {
|
||||
atomic_t shutdown;
|
||||
struct workqueue_struct *workq;
|
||||
struct delayed_work compact_dwork;
|
||||
struct scoutfs_sysfs_attrs ssa;
|
||||
atomic_t compact_delay_ms;
|
||||
};
|
||||
|
||||
#define DECLARE_SRCH_INFO(sb, name) \
|
||||
struct srch_info *name = SCOUTFS_SB(sb)->srch_info
|
||||
#define DECLARE_SRCH_INFO_KOBJ(kobj, name) \
|
||||
DECLARE_SRCH_INFO(SCOUTFS_SYSFS_ATTRS_SB(kobj), name)
|
||||
|
||||
#define SRE_FMT "%016llx.%llu.%llu"
|
||||
#define SRE_ARG(sre) \
|
||||
@@ -520,6 +527,95 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Padded entries are encoded in pairs after an existing entry. All of
|
||||
* the pairs cancel each other out by all readers (the second encoding
|
||||
* looks like deletion) so they aren't visible to the first/last bounds of
|
||||
* the block or file.
|
||||
*/
|
||||
static int append_padded_entry(struct scoutfs_srch_file *sfl, u64 blk,
|
||||
struct scoutfs_srch_block *srb, struct scoutfs_srch_entry *sre)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = encode_entry(srb->entries + le32_to_cpu(srb->entry_bytes),
|
||||
sre, &srb->tail);
|
||||
if (ret > 0) {
|
||||
srb->tail = *sre;
|
||||
le32_add_cpu(&srb->entry_nr, 1);
|
||||
le32_add_cpu(&srb->entry_bytes, ret);
|
||||
le64_add_cpu(&sfl->entries, 1);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called by a testing trigger to create a very specific case of
|
||||
* encoded entry offsets. We want the last entry in the block to start
|
||||
* precisely at the _SAFE_BYTES offset.
|
||||
*
|
||||
* This is called when there is a single existing entry in the block.
|
||||
* We have the entire block to work with. We encode pairs of matching
|
||||
* entries. This hides them from readers (both searches and merging) as
|
||||
* they're interpreted as creation and deletion and are deleted. We use
|
||||
* the existing hash value of the first entry in the block but then set
|
||||
* the inode to an impossibly large number so it doesn't interfere with
|
||||
* anything.
|
||||
*
|
||||
* To hit the specific offset we very carefully manage the amount of
|
||||
* bytes of change between fields in the entry. We know that if we
|
||||
* change all the byte of the ino and id we end up with a 20 byte
|
||||
* (2+8+8,2) encoding of the pair of entries. To have the last entry
|
||||
* start at the _SAFE_POS offset we know that the final 20 byte pair
|
||||
* encoding needs to end at 2 bytes (second entry encoding) after the
|
||||
* _SAFE_POS offset.
|
||||
*
|
||||
* So as we encode pairs we watch the delta of our current offset from
|
||||
* that desired final offset of 2 past _SAFE_POS. If we're a multiple
|
||||
* of 20 away then we encode the full 20 byte pairs. If we're not, then
|
||||
* we drop a byte to encode 19 bytes. That'll slowly change the offset
|
||||
* to be a multiple of 20 again while encoding large entries.
|
||||
*/
|
||||
static void pad_entries_at_safe(struct scoutfs_srch_file *sfl, u64 blk,
|
||||
struct scoutfs_srch_block *srb)
|
||||
{
|
||||
struct scoutfs_srch_entry sre;
|
||||
u32 target;
|
||||
s32 diff;
|
||||
u64 hash;
|
||||
u64 ino;
|
||||
u64 id;
|
||||
int ret;
|
||||
|
||||
hash = le64_to_cpu(srb->tail.hash);
|
||||
ino = le64_to_cpu(srb->tail.ino) | (1ULL << 62);
|
||||
id = le64_to_cpu(srb->tail.id);
|
||||
|
||||
target = SCOUTFS_SRCH_BLOCK_SAFE_BYTES + 2;
|
||||
|
||||
while ((diff = target - le32_to_cpu(srb->entry_bytes)) > 0) {
|
||||
ino ^= 1ULL << (7 * 8);
|
||||
if (diff % 20 == 0) {
|
||||
id ^= 1ULL << (7 * 8);
|
||||
} else {
|
||||
id ^= 1ULL << (6 * 8);
|
||||
}
|
||||
|
||||
sre.hash = cpu_to_le64(hash);
|
||||
sre.ino = cpu_to_le64(ino);
|
||||
sre.id = cpu_to_le64(id);
|
||||
|
||||
ret = append_padded_entry(sfl, blk, srb, &sre);
|
||||
if (ret == 0)
|
||||
ret = append_padded_entry(sfl, blk, srb, &sre);
|
||||
BUG_ON(ret != 0);
|
||||
|
||||
diff = target - le32_to_cpu(srb->entry_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller is dropping an ino/id because the tracking rbtree is full.
|
||||
* This loses information so we can't return any entries at or after the
|
||||
@@ -987,6 +1083,9 @@ int scoutfs_srch_rotate_log(struct super_block *sb,
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
if (sfl->ref.blkno && !force && scoutfs_trigger(sb, SRCH_FORCE_LOG_ROTATE))
|
||||
force = true;
|
||||
|
||||
if (sfl->ref.blkno == 0 ||
|
||||
(!force && le64_to_cpu(sfl->blocks) < SCOUTFS_SRCH_LOG_BLOCK_LIMIT))
|
||||
return 0;
|
||||
@@ -1462,7 +1561,7 @@ static int kway_merge(struct super_block *sb,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_srch_file *sfl,
|
||||
kway_get_t kway_get, kway_advance_t kway_adv,
|
||||
void **args, int nr)
|
||||
void **args, int nr, bool logs_input)
|
||||
{
|
||||
DECLARE_SRCH_INFO(sb, srinf);
|
||||
struct scoutfs_srch_block *srb = NULL;
|
||||
@@ -1567,6 +1666,15 @@ static int kway_merge(struct super_block *sb,
|
||||
blk++;
|
||||
}
|
||||
|
||||
/* end sorted block on _SAFE offset for testing */
|
||||
if (bl && le32_to_cpu(srb->entry_nr) == 1 && logs_input &&
|
||||
scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) {
|
||||
pad_entries_at_safe(sfl, blk, srb);
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
blk++;
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, srch_compact_entry);
|
||||
|
||||
} else {
|
||||
@@ -1609,6 +1717,8 @@ static int kway_merge(struct super_block *sb,
|
||||
empty++;
|
||||
ret = 0;
|
||||
} else if (ret < 0) {
|
||||
if (ret == -ENOANO) /* just testing trigger */
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1747,7 +1857,7 @@ static int compact_logs(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
page->private = 0;
|
||||
list_add_tail(&page->list, &pages);
|
||||
list_add_tail(&page->lru, &pages);
|
||||
nr_pages++;
|
||||
scoutfs_inc_counter(sb, srch_compact_log_page);
|
||||
}
|
||||
@@ -1800,7 +1910,7 @@ static int compact_logs(struct super_block *sb,
|
||||
|
||||
/* sort page entries and reset private for _next */
|
||||
i = 0;
|
||||
list_for_each_entry(page, &pages, list) {
|
||||
list_for_each_entry(page, &pages, lru) {
|
||||
args[i++] = page;
|
||||
|
||||
if (atomic_read(&srinf->shutdown)) {
|
||||
@@ -1816,12 +1926,12 @@ static int compact_logs(struct super_block *sb,
|
||||
}
|
||||
|
||||
ret = kway_merge(sb, alloc, wri, &sc->out, kway_get_page, kway_adv_page,
|
||||
args, nr_pages);
|
||||
args, nr_pages, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* make sure we finished all the pages */
|
||||
list_for_each_entry(page, &pages, list) {
|
||||
list_for_each_entry(page, &pages, lru) {
|
||||
sre = page_priv_sre(page);
|
||||
if (page->private < SRES_PER_PAGE && sre->ino != 0) {
|
||||
ret = -ENOSPC;
|
||||
@@ -1834,8 +1944,8 @@ static int compact_logs(struct super_block *sb,
|
||||
out:
|
||||
scoutfs_block_put(sb, bl);
|
||||
vfree(args);
|
||||
list_for_each_entry_safe(page, tmp, &pages, list) {
|
||||
list_del(&page->list);
|
||||
list_for_each_entry_safe(page, tmp, &pages, lru) {
|
||||
list_del(&page->lru);
|
||||
__free_page(page);
|
||||
}
|
||||
|
||||
@@ -1874,12 +1984,18 @@ static int kway_get_reader(struct super_block *sb,
|
||||
srb = rdr->bl->data;
|
||||
|
||||
if (rdr->pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES ||
|
||||
rdr->skip >= SCOUTFS_SRCH_BLOCK_SAFE_BYTES ||
|
||||
rdr->skip > SCOUTFS_SRCH_BLOCK_SAFE_BYTES ||
|
||||
rdr->skip >= le32_to_cpu(srb->entry_bytes)) {
|
||||
/* XXX inconsistency */
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (rdr->decoded_bytes == 0 && rdr->pos == SCOUTFS_SRCH_BLOCK_SAFE_BYTES &&
|
||||
scoutfs_trigger(sb, SRCH_MERGE_STOP_SAFE)) {
|
||||
/* only used in testing */
|
||||
return -ENOANO;
|
||||
}
|
||||
|
||||
/* decode entry, possibly skipping start of the block */
|
||||
while (rdr->decoded_bytes == 0 || rdr->pos < rdr->skip) {
|
||||
ret = decode_entry(srb->entries + rdr->pos,
|
||||
@@ -1969,7 +2085,7 @@ static int compact_sorted(struct super_block *sb,
|
||||
}
|
||||
|
||||
ret = kway_merge(sb, alloc, wri, &sc->out, kway_get_reader,
|
||||
kway_adv_reader, args, nr);
|
||||
kway_adv_reader, args, nr, false);
|
||||
|
||||
sc->flags |= SCOUTFS_SRCH_COMPACT_FLAG_DONE;
|
||||
for (i = 0; i < nr; i++) {
|
||||
@@ -2098,8 +2214,15 @@ static int delete_files(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* wait 10s between compact attempts on error, immediate after success */
|
||||
#define SRCH_COMPACT_DELAY_MS (10 * MSEC_PER_SEC)
|
||||
static void queue_compact_work(struct srch_info *srinf, bool immediate)
|
||||
{
|
||||
unsigned long delay;
|
||||
|
||||
if (!atomic_read(&srinf->shutdown)) {
|
||||
delay = immediate ? 0 : msecs_to_jiffies(atomic_read(&srinf->compact_delay_ms));
|
||||
queue_delayed_work(srinf->workq, &srinf->compact_dwork, delay);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a compaction operation from the server, sort the entries from the
|
||||
@@ -2127,7 +2250,6 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
|
||||
struct super_block *sb = srinf->sb;
|
||||
struct scoutfs_block_writer wri;
|
||||
struct scoutfs_alloc alloc;
|
||||
unsigned long delay;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
@@ -2140,6 +2262,8 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
|
||||
scoutfs_block_writer_init(sb, &wri);
|
||||
|
||||
ret = scoutfs_client_srch_get_compact(sb, sc);
|
||||
if (ret >= 0)
|
||||
trace_scoutfs_srch_compact_client_recv(sb, sc);
|
||||
if (ret < 0 || sc->nr == 0)
|
||||
goto out;
|
||||
|
||||
@@ -2168,6 +2292,7 @@ commit:
|
||||
sc->meta_freed = alloc.freed;
|
||||
sc->flags |= ret < 0 ? SCOUTFS_SRCH_COMPACT_FLAG_ERROR : 0;
|
||||
|
||||
trace_scoutfs_srch_compact_client_send(sb, sc);
|
||||
err = scoutfs_client_srch_commit_compact(sb, sc);
|
||||
if (err < 0 && ret == 0)
|
||||
ret = err;
|
||||
@@ -2178,14 +2303,56 @@ out:
|
||||
scoutfs_inc_counter(sb, srch_compact_error);
|
||||
|
||||
scoutfs_block_writer_forget_all(sb, &wri);
|
||||
if (!atomic_read(&srinf->shutdown)) {
|
||||
delay = ret == 0 ? 0 : msecs_to_jiffies(SRCH_COMPACT_DELAY_MS);
|
||||
queue_delayed_work(srinf->workq, &srinf->compact_dwork, delay);
|
||||
}
|
||||
queue_compact_work(srinf, sc->nr > 0 && ret == 0);
|
||||
|
||||
kfree(sc);
|
||||
}
|
||||
|
||||
static ssize_t compact_delay_ms_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
DECLARE_SRCH_INFO_KOBJ(kobj, srinf);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u", atomic_read(&srinf->compact_delay_ms));
|
||||
}
|
||||
|
||||
#define MIN_COMPACT_DELAY_MS MSEC_PER_SEC
|
||||
#define DEF_COMPACT_DELAY_MS (10 * MSEC_PER_SEC)
|
||||
#define MAX_COMPACT_DELAY_MS (60 * MSEC_PER_SEC)
|
||||
|
||||
static ssize_t compact_delay_ms_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
DECLARE_SRCH_INFO(sb, srinf);
|
||||
char nullterm[30]; /* more than enough for octal -U64_MAX */
|
||||
u64 val;
|
||||
int len;
|
||||
int ret;
|
||||
|
||||
len = min(count, sizeof(nullterm) - 1);
|
||||
memcpy(nullterm, buf, len);
|
||||
nullterm[len] = '\0';
|
||||
|
||||
ret = kstrtoll(nullterm, 0, &val);
|
||||
if (ret < 0 || val < MIN_COMPACT_DELAY_MS || val > MAX_COMPACT_DELAY_MS) {
|
||||
scoutfs_err(sb, "invalid compact_delay_ms value, must be between %lu and %lu",
|
||||
MIN_COMPACT_DELAY_MS, MAX_COMPACT_DELAY_MS);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
atomic_set(&srinf->compact_delay_ms, val);
|
||||
cancel_delayed_work(&srinf->compact_dwork);
|
||||
queue_compact_work(srinf, false);
|
||||
|
||||
return count;
|
||||
}
|
||||
SCOUTFS_ATTR_RW(compact_delay_ms);
|
||||
|
||||
static struct attribute *srch_attrs[] = {
|
||||
SCOUTFS_ATTR_PTR(compact_delay_ms),
|
||||
NULL,
|
||||
};
|
||||
|
||||
void scoutfs_srch_destroy(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
@@ -2202,6 +2369,8 @@ void scoutfs_srch_destroy(struct super_block *sb)
|
||||
destroy_workqueue(srinf->workq);
|
||||
}
|
||||
|
||||
scoutfs_sysfs_destroy_attrs(sb, &srinf->ssa);
|
||||
|
||||
kfree(srinf);
|
||||
sbi->srch_info = NULL;
|
||||
}
|
||||
@@ -2219,8 +2388,15 @@ int scoutfs_srch_setup(struct super_block *sb)
|
||||
srinf->sb = sb;
|
||||
atomic_set(&srinf->shutdown, 0);
|
||||
INIT_DELAYED_WORK(&srinf->compact_dwork, scoutfs_srch_compact_worker);
|
||||
scoutfs_sysfs_init_attrs(sb, &srinf->ssa);
|
||||
atomic_set(&srinf->compact_delay_ms, DEF_COMPACT_DELAY_MS);
|
||||
|
||||
sbi->srch_info = srinf;
|
||||
|
||||
ret = scoutfs_sysfs_create_attrs(sb, &srinf->ssa, srch_attrs, "srch");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
srinf->workq = alloc_workqueue("scoutfs_srch_compact",
|
||||
WQ_NON_REENTRANT | WQ_UNBOUND |
|
||||
WQ_HIGHPRI, 0);
|
||||
@@ -2229,8 +2405,7 @@ int scoutfs_srch_setup(struct super_block *sb)
|
||||
goto out;
|
||||
}
|
||||
|
||||
queue_delayed_work(srinf->workq, &srinf->compact_dwork,
|
||||
msecs_to_jiffies(SRCH_COMPACT_DELAY_MS));
|
||||
queue_compact_work(srinf, false);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/magic.h>
|
||||
@@ -48,6 +49,8 @@
|
||||
#include "volopt.h"
|
||||
#include "fence.h"
|
||||
#include "xattr.h"
|
||||
#include "wkic.h"
|
||||
#include "quota.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
static struct dentry *scoutfs_debugfs_root;
|
||||
@@ -178,7 +181,7 @@ static void scoutfs_put_super(struct super_block *sb)
|
||||
/*
|
||||
* Wait for invalidation and iput to finish with any lingering
|
||||
* inode references that escaped the evict_inodes in
|
||||
* generic_shutdown_super. MS_ACTIVE is clear so final iput
|
||||
* generic_shutdown_super. SB_ACTIVE is clear so final iput
|
||||
* will always evict.
|
||||
*/
|
||||
scoutfs_lock_flush_invalidate(sb);
|
||||
@@ -193,7 +196,9 @@ static void scoutfs_put_super(struct super_block *sb)
|
||||
scoutfs_shutdown_trans(sb);
|
||||
scoutfs_volopt_destroy(sb);
|
||||
scoutfs_client_destroy(sb);
|
||||
scoutfs_quota_destroy(sb);
|
||||
scoutfs_inode_destroy(sb);
|
||||
scoutfs_wkic_destroy(sb);
|
||||
scoutfs_item_destroy(sb);
|
||||
scoutfs_forest_destroy(sb);
|
||||
scoutfs_data_destroy(sb);
|
||||
@@ -325,7 +330,7 @@ static int scoutfs_read_super_from_bdev(struct super_block *sb,
|
||||
|
||||
if (le64_to_cpu(super->fmt_vers) < SCOUTFS_FORMAT_VERSION_MIN ||
|
||||
le64_to_cpu(super->fmt_vers) > SCOUTFS_FORMAT_VERSION_MAX) {
|
||||
scoutfs_err(sb, "super block has format version %llu outside of supported version range %u-%u",
|
||||
scoutfs_err(sb, "super block has format version %llu outside of supported version range %llu-%llu",
|
||||
le64_to_cpu(super->fmt_vers), SCOUTFS_FORMAT_VERSION_MIN,
|
||||
SCOUTFS_FORMAT_VERSION_MAX);
|
||||
ret = -EINVAL;
|
||||
@@ -485,7 +490,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sb->s_d_op = &scoutfs_dentry_ops;
|
||||
sb->s_export_op = &scoutfs_export_ops;
|
||||
sb->s_xattr = scoutfs_xattr_handlers;
|
||||
sb->s_flags |= MS_I_VERSION | MS_POSIXACL;
|
||||
sb->s_flags |= SB_I_VERSION | SB_POSIXACL;
|
||||
sb->s_time_gran = 1;
|
||||
|
||||
/* btree blocks use long lived bh->b_data refs */
|
||||
@@ -543,7 +548,9 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
scoutfs_block_setup(sb) ?:
|
||||
scoutfs_forest_setup(sb) ?:
|
||||
scoutfs_item_setup(sb) ?:
|
||||
scoutfs_wkic_setup(sb) ?:
|
||||
scoutfs_inode_setup(sb) ?:
|
||||
scoutfs_quota_setup(sb) ?:
|
||||
scoutfs_data_setup(sb) ?:
|
||||
scoutfs_setup_trans(sb) ?:
|
||||
scoutfs_omap_setup(sb) ?:
|
||||
@@ -662,6 +669,10 @@ static int __init scoutfs_module_init(void)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (SCOUTFS_FORMAT_VERSION_MIN & SCOUTFS_FORMAT_VER_PREREL) {
|
||||
printk(KERN_INFO "scoutfs module using incompatible pre-release format version 0x%016llx. This module can only mount volumes with this version, and volumes with this version will be incompatible with all other release builds.", SCOUTFS_FORMAT_VERSION_MIN);
|
||||
}
|
||||
|
||||
scoutfs_debugfs_root = debugfs_create_dir("scoutfs", NULL);
|
||||
if (!scoutfs_debugfs_root) {
|
||||
ret = -ENOMEM;
|
||||
@@ -674,14 +685,14 @@ out:
|
||||
teardown_module();
|
||||
return ret;
|
||||
}
|
||||
module_init(scoutfs_module_init)
|
||||
module_init(scoutfs_module_init);
|
||||
|
||||
static void __exit scoutfs_module_exit(void)
|
||||
{
|
||||
unregister_filesystem(&scoutfs_fs_type);
|
||||
teardown_module();
|
||||
}
|
||||
module_exit(scoutfs_module_exit)
|
||||
module_exit(scoutfs_module_exit);
|
||||
|
||||
MODULE_AUTHOR("Zach Brown <zab@versity.com>");
|
||||
MODULE_LICENSE("GPL");
|
||||
|
||||
@@ -30,6 +30,8 @@ struct recov_info;
|
||||
struct omap_info;
|
||||
struct volopt_info;
|
||||
struct fence_info;
|
||||
struct wkic_info;
|
||||
struct squota_info;
|
||||
|
||||
struct scoutfs_sb_info {
|
||||
struct super_block *sb;
|
||||
@@ -55,6 +57,8 @@ struct scoutfs_sb_info {
|
||||
struct omap_info *omap_info;
|
||||
struct volopt_info *volopt_info;
|
||||
struct item_cache_info *item_cache_info;
|
||||
struct wkic_info *wkic_info;
|
||||
struct squota_info *squota_info;
|
||||
struct fence_info *fence_info;
|
||||
|
||||
/* tracks tasks waiting for data extents */
|
||||
|
||||
90
kmod/src/totl.c
Normal file
90
kmod/src/totl.c
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "forest.h"
|
||||
#include "totl.h"
|
||||
|
||||
void scoutfs_totl_set_range(struct scoutfs_key *start, struct scoutfs_key *end)
|
||||
{
|
||||
scoutfs_key_set_zeros(start);
|
||||
start->sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
scoutfs_key_set_ones(end);
|
||||
end->sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
}
|
||||
|
||||
void scoutfs_totl_merge_init(struct scoutfs_totl_merging *merg)
|
||||
{
|
||||
memset(merg, 0, sizeof(struct scoutfs_totl_merging));
|
||||
}
|
||||
|
||||
void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
|
||||
u64 seq, u8 flags, void *val, int val_len, int fic)
|
||||
{
|
||||
struct scoutfs_xattr_totl_val *tval = val;
|
||||
|
||||
if (fic & FIC_FS_ROOT) {
|
||||
merg->fs_seq = seq;
|
||||
merg->fs_total = le64_to_cpu(tval->total);
|
||||
merg->fs_count = le64_to_cpu(tval->count);
|
||||
} else if (fic & FIC_FINALIZED) {
|
||||
merg->fin_seq = seq;
|
||||
merg->fin_total += le64_to_cpu(tval->total);
|
||||
merg->fin_count += le64_to_cpu(tval->count);
|
||||
} else {
|
||||
merg->log_seq = seq;
|
||||
merg->log_total += le64_to_cpu(tval->total);
|
||||
merg->log_count += le64_to_cpu(tval->count);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* .totl. item merging has to be careful because the log btree merging
|
||||
* code can write partial results to the fs_root. This means that a
|
||||
* reader can see both cases where new finalized logs should be applied
|
||||
* to the old fs items and where old finalized logs have already been
|
||||
* applied to the partially merged fs items. Currently active logged
|
||||
* items are always applied on top of all cases.
|
||||
*
|
||||
* These cases are differentiated with a combination of sequence numbers
|
||||
* in items, the count of contributing xattrs, and a flag
|
||||
* differentiating finalized and active logged items. This lets us
|
||||
* recognize all cases, including when finalized logs were merged and
|
||||
* deleted the fs item.
|
||||
*/
|
||||
void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total, __u64 *count)
|
||||
{
|
||||
*total = 0;
|
||||
*count = 0;
|
||||
|
||||
/* start with the fs item if we have it */
|
||||
if (merg->fs_seq != 0) {
|
||||
*total = merg->fs_total;
|
||||
*count = merg->fs_count;
|
||||
}
|
||||
|
||||
/* apply finalized logs if they're newer or creating */
|
||||
if (((merg->fs_seq != 0) && (merg->fin_seq > merg->fs_seq)) ||
|
||||
((merg->fs_seq == 0) && (merg->fin_count > 0))) {
|
||||
*total += merg->fin_total;
|
||||
*count += merg->fin_count;
|
||||
}
|
||||
|
||||
/* always apply active logs which must be newer than fs and finalized */
|
||||
if (merg->log_seq > 0) {
|
||||
*total += merg->log_total;
|
||||
*count += merg->log_count;
|
||||
}
|
||||
}
|
||||
24
kmod/src/totl.h
Normal file
24
kmod/src/totl.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef _SCOUTFS_TOTL_H_
|
||||
#define _SCOUTFS_TOTL_H_
|
||||
|
||||
#include "key.h"
|
||||
|
||||
struct scoutfs_totl_merging {
|
||||
u64 fs_seq;
|
||||
u64 fs_total;
|
||||
u64 fs_count;
|
||||
u64 fin_seq;
|
||||
u64 fin_total;
|
||||
s64 fin_count;
|
||||
u64 log_seq;
|
||||
u64 log_total;
|
||||
s64 log_count;
|
||||
};
|
||||
|
||||
void scoutfs_totl_set_range(struct scoutfs_key *start, struct scoutfs_key *end);
|
||||
void scoutfs_totl_merge_init(struct scoutfs_totl_merging *merg);
|
||||
void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
|
||||
u64 seq, u8 flags, void *val, int val_len, int fic);
|
||||
void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total, __u64 *count);
|
||||
|
||||
#endif
|
||||
143
kmod/src/trace/quota.h
Normal file
143
kmod/src/trace/quota.h
Normal file
@@ -0,0 +1,143 @@
|
||||
|
||||
/*
|
||||
* Tracing squota_input
|
||||
*/
|
||||
#define SQI_FMT "[%u %llu %llu %llu]"
|
||||
|
||||
#define SQI_ARGS(i) \
|
||||
(i)->op, (i)->attrs[0], (i)->attrs[1], (i)->attrs[2]
|
||||
|
||||
#define SQI_FIELDS(pref) \
|
||||
__array(__u64, pref##_attrs, SQ_NS__NR_SELECT) \
|
||||
__field(__u8, pref##_op)
|
||||
|
||||
#define SQI_ASSIGN(pref, i) \
|
||||
__entry->pref##_attrs[0] = (i)->attrs[0]; \
|
||||
__entry->pref##_attrs[1] = (i)->attrs[1]; \
|
||||
__entry->pref##_attrs[2] = (i)->attrs[2]; \
|
||||
__entry->pref##_op = (i)->op;
|
||||
|
||||
#define SQI_ENTRY_ARGS(pref) \
|
||||
__entry->pref##_op, __entry->pref##_attrs[0], \
|
||||
__entry->pref##_attrs[1], __entry->pref##_attrs[2]
|
||||
|
||||
/*
|
||||
* Tracing squota_rule
|
||||
*/
|
||||
#define SQR_FMT "[%u %llu,%u,%x %llu,%u,%x %llu,%u,%x %u %llu]"
|
||||
|
||||
#define SQR_ARGS(r) \
|
||||
(r)->prio, \
|
||||
(r)->name_val[0], (r)->name_source[0], (r)->name_flags[0], \
|
||||
(r)->name_val[1], (r)->name_source[1], (r)->name_flags[1], \
|
||||
(r)->name_val[2], (r)->name_source[2], (r)->name_flags[2], \
|
||||
(r)->op, (r)->limit \
|
||||
|
||||
#define SQR_FIELDS(pref) \
|
||||
__array(__u64, pref##_name_val, 3) \
|
||||
__field(__u64, pref##_limit) \
|
||||
__array(__u8, pref##_name_source, 3) \
|
||||
__array(__u8, pref##_name_flags, 3) \
|
||||
__field(__u8, pref##_prio) \
|
||||
__field(__u8, pref##_op)
|
||||
|
||||
#define SQR_ASSIGN(pref, r) \
|
||||
__entry->pref##_name_val[0] = (r)->names[0].val; \
|
||||
__entry->pref##_name_val[1] = (r)->names[1].val; \
|
||||
__entry->pref##_name_val[2] = (r)->names[2].val; \
|
||||
__entry->pref##_limit = (r)->limit; \
|
||||
__entry->pref##_name_source[0] = (r)->names[0].source; \
|
||||
__entry->pref##_name_source[1] = (r)->names[1].source; \
|
||||
__entry->pref##_name_source[2] = (r)->names[2].source; \
|
||||
__entry->pref##_name_flags[0] = (r)->names[0].flags; \
|
||||
__entry->pref##_name_flags[1] = (r)->names[1].flags; \
|
||||
__entry->pref##_name_flags[2] = (r)->names[2].flags; \
|
||||
__entry->pref##_prio = (r)->prio; \
|
||||
__entry->pref##_op = (r)->op;
|
||||
|
||||
#define SQR_ENTRY_ARGS(pref) \
|
||||
__entry->pref##_prio, __entry->pref##_name_val[0], \
|
||||
__entry->pref##_name_source[0], __entry->pref##_name_flags[0], \
|
||||
__entry->pref##_name_val[1], __entry->pref##_name_source[1], \
|
||||
__entry->pref##_name_flags[1], __entry->pref##_name_val[2], \
|
||||
__entry->pref##_name_source[2], __entry->pref##_name_flags[2], \
|
||||
__entry->pref##_op, __entry->pref##_limit
|
||||
|
||||
TRACE_EVENT(scoutfs_quota_check,
|
||||
TP_PROTO(struct super_block *sb, long rs_ptr, struct squota_input *inp, int ret),
|
||||
|
||||
TP_ARGS(sb, rs_ptr, inp, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(long, rs_ptr)
|
||||
SQI_FIELDS(i)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->rs_ptr = rs_ptr;
|
||||
SQI_ASSIGN(i, inp);
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" rs_ptr %ld ret %d inp "SQI_FMT,
|
||||
SCSB_TRACE_ARGS, __entry->rs_ptr, __entry->ret, SQI_ENTRY_ARGS(i))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_quota_rule_op_class,
|
||||
TP_PROTO(struct super_block *sb, struct squota_rule *rule, int ret),
|
||||
|
||||
TP_ARGS(sb, rule, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
SQR_FIELDS(r)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
SQR_ASSIGN(r, rule);
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" "SQR_FMT" ret %d",
|
||||
SCSB_TRACE_ARGS, SQR_ENTRY_ARGS(r), __entry->ret)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_quota_rule_op_class, scoutfs_quota_add_rule,
|
||||
TP_PROTO(struct super_block *sb, struct squota_rule *rule, int ret),
|
||||
TP_ARGS(sb, rule, ret)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_quota_rule_op_class, scoutfs_quota_del_rule,
|
||||
TP_PROTO(struct super_block *sb, struct squota_rule *rule, int ret),
|
||||
TP_ARGS(sb, rule, ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_quota_totl_check,
|
||||
TP_PROTO(struct super_block *sb, struct squota_input *inp, struct scoutfs_key *key,
|
||||
u64 limit, int ret),
|
||||
|
||||
TP_ARGS(sb, inp, key, limit, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
SQI_FIELDS(i)
|
||||
sk_trace_define(k)
|
||||
__field(__u64, limit)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
SQI_ASSIGN(i, inp);
|
||||
sk_trace_assign(k, key);
|
||||
__entry->limit = limit;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" inp "SQI_FMT" key "SK_FMT" limit %llu ret %d",
|
||||
SCSB_TRACE_ARGS, SQI_ENTRY_ARGS(i), sk_trace_args(k), __entry->limit,
|
||||
__entry->ret)
|
||||
);
|
||||
112
kmod/src/trace/wkic.h
Normal file
112
kmod/src/trace/wkic.h
Normal file
@@ -0,0 +1,112 @@
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_wkic_wpage_class,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, ptr)
|
||||
__field(int, which)
|
||||
__field(bool, n0l)
|
||||
__field(bool, n1l)
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->ptr = ptr;
|
||||
__entry->which = which;
|
||||
__entry->n0l = n0l;
|
||||
__entry->n1l = n1l;
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
__entry->which = which;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" ptr %p wh %d nl %u,%u start "SK_FMT " end "SK_FMT, SCSB_TRACE_ARGS,
|
||||
__entry->ptr, __entry->which, __entry->n0l, __entry->n1l,
|
||||
sk_trace_args(start), sk_trace_args(end))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_alloced,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_freeing,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_found,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_trimmed,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_erased,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_inserting,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_inserted,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_shrinking,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_dropping,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_replaying,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_filled,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_wkic_read_items,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end),
|
||||
|
||||
TP_ARGS(sb, key, start, end),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
sk_trace_define(key)
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
sk_trace_assign(key, start);
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" key "SK_FMT" start "SK_FMT " end "SK_FMT, SCSB_TRACE_ARGS,
|
||||
sk_trace_args(key), sk_trace_args(start), sk_trace_args(end))
|
||||
);
|
||||
@@ -39,6 +39,9 @@ struct scoutfs_triggers {
|
||||
|
||||
static char *names[] = {
|
||||
[SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE] = "block_remove_stale",
|
||||
[SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE] = "srch_compact_logs_pad_safe",
|
||||
[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
|
||||
[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
|
||||
[SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge",
|
||||
};
|
||||
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
|
||||
enum scoutfs_trigger {
|
||||
SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE,
|
||||
SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE,
|
||||
SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
|
||||
SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
|
||||
SCOUTFS_TRIGGER_STATFS_LOCK_PURGE,
|
||||
SCOUTFS_TRIGGER_NR,
|
||||
};
|
||||
|
||||
@@ -46,6 +46,23 @@ static struct scoutfs_tseq_entry *tseq_rb_next(struct scoutfs_tseq_entry *ent)
|
||||
return rb_entry(node, struct scoutfs_tseq_entry, node);
|
||||
}
|
||||
|
||||
#ifdef KC_RB_TREE_AUGMENTED_COMPUTE_MAX
|
||||
static bool tseq_compute_total(struct scoutfs_tseq_entry *ent, bool exit)
|
||||
{
|
||||
loff_t total = 1 + tseq_node_total(ent->node.rb_left) +
|
||||
tseq_node_total(ent->node.rb_right);
|
||||
|
||||
if (exit && ent->total == total)
|
||||
return true;
|
||||
|
||||
ent->total = total;
|
||||
return false;
|
||||
}
|
||||
|
||||
RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
|
||||
node, total, tseq_compute_total);
|
||||
#else
|
||||
|
||||
static loff_t tseq_compute_total(struct scoutfs_tseq_entry *ent)
|
||||
{
|
||||
return 1 + tseq_node_total(ent->node.rb_left) +
|
||||
@@ -53,7 +70,8 @@ static loff_t tseq_compute_total(struct scoutfs_tseq_entry *ent)
|
||||
}
|
||||
|
||||
RB_DECLARE_CALLBACKS(static, tseq_rb_callbacks, struct scoutfs_tseq_entry,
|
||||
node, loff_t, total, tseq_compute_total)
|
||||
node, loff_t, total, tseq_compute_total);
|
||||
#endif
|
||||
|
||||
void scoutfs_tseq_tree_init(struct scoutfs_tseq_tree *tree,
|
||||
scoutfs_tseq_show_t show)
|
||||
|
||||
@@ -17,4 +17,15 @@ static inline void down_write_two(struct rw_semaphore *a,
|
||||
down_write_nested(b, SINGLE_DEPTH_NESTING);
|
||||
}
|
||||
|
||||
/*
|
||||
* When returning shrinker counts from scan_objects, we should steer
|
||||
* clear of the magic SHRINK_STOP and SHRINK_EMPTY values, which are near
|
||||
* ~0UL values. Hence, we cap count to ~0L, which is arbitarily high
|
||||
* enough to avoid it.
|
||||
*/
|
||||
static inline long shrinker_min_long(long count)
|
||||
{
|
||||
return min(count, LONG_MAX);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
1155
kmod/src/wkic.c
Normal file
1155
kmod/src/wkic.c
Normal file
File diff suppressed because it is too large
Load Diff
19
kmod/src/wkic.h
Normal file
19
kmod/src/wkic.h
Normal file
@@ -0,0 +1,19 @@
|
||||
#ifndef _SCOUTFS_WKIC_H_
|
||||
#define _SCOUTFS_WKIC_H_
|
||||
|
||||
#include "format.h"
|
||||
|
||||
typedef int (*wkic_iter_cb_t)(struct scoutfs_key *key, void *val, unsigned int val_len,
|
||||
void *cb_arg);
|
||||
|
||||
int scoutfs_wkic_iterate(struct super_block *sb, struct scoutfs_key *key, struct scoutfs_key *last,
|
||||
struct scoutfs_key *range_start, struct scoutfs_key *range_end,
|
||||
wkic_iter_cb_t cb, void *cb_arg);
|
||||
int scoutfs_wkic_iterate_stable(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *last, struct scoutfs_key *range_start,
|
||||
struct scoutfs_key *range_end, wkic_iter_cb_t cb, void *cb_arg);
|
||||
|
||||
int scoutfs_wkic_setup(struct super_block *sb);
|
||||
void scoutfs_wkic_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
240
kmod/src/xattr.c
240
kmod/src/xattr.c
@@ -82,6 +82,7 @@ static void init_xattr_key(struct scoutfs_key *key, u64 ino, u32 name_hash,
|
||||
#define SCOUTFS_XATTR_PREFIX_LEN (sizeof(SCOUTFS_XATTR_PREFIX) - 1)
|
||||
|
||||
#define HIDE_TAG "hide."
|
||||
#define INDX_TAG "indx."
|
||||
#define SRCH_TAG "srch."
|
||||
#define TOTL_TAG "totl."
|
||||
#define TAG_LEN (sizeof(HIDE_TAG) - 1)
|
||||
@@ -103,6 +104,9 @@ int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
if (!strncmp(name, HIDE_TAG, TAG_LEN)) {
|
||||
if (++tgs->hide == 0)
|
||||
return -EINVAL;
|
||||
} else if (!strncmp(name, INDX_TAG, TAG_LEN)) {
|
||||
if (++tgs->indx == 0)
|
||||
return -EINVAL;
|
||||
} else if (!strncmp(name, SRCH_TAG, TAG_LEN)) {
|
||||
if (++tgs->srch == 0)
|
||||
return -EINVAL;
|
||||
@@ -540,47 +544,57 @@ static int parse_totl_u64(const char *s, int len, u64 *res)
|
||||
}
|
||||
|
||||
/*
|
||||
* non-destructive relatively quick parse of the last 3 dotted u64s that
|
||||
* make up the name of the xattr total. -EINVAL is returned if there
|
||||
* are anything but 3 valid u64 encodings between single dots at the end
|
||||
* of the name.
|
||||
* non-destructive relatively quick parse of final dotted u64s in an
|
||||
* xattr name. If the required number of values are found then we
|
||||
* return the number of bytes in the name that are not the final dotted
|
||||
* u64s with their dots. -EINVAL is returned if we didn't find the
|
||||
* required number of values.
|
||||
*/
|
||||
static int parse_totl_key(struct scoutfs_key *key, const char *name, int name_len)
|
||||
static int parse_dotted_u64s(u64 *u64s, int nr, const char *name, int name_len)
|
||||
{
|
||||
u64 tot_name[3];
|
||||
int end = name_len;
|
||||
int nr = 0;
|
||||
int len;
|
||||
int ret;
|
||||
int i;
|
||||
int u;
|
||||
|
||||
/* parse name elements in reserve order from end of xattr name string */
|
||||
for (i = name_len - 1; i >= 0 && nr < ARRAY_SIZE(tot_name); i--) {
|
||||
for (u = nr - 1, i = name_len - 1; u >= 0 && i >= 0; i--) {
|
||||
if (name[i] != '.')
|
||||
continue;
|
||||
|
||||
len = end - (i + 1);
|
||||
ret = parse_totl_u64(&name[i + 1], len, &tot_name[nr]);
|
||||
ret = parse_totl_u64(&name[i + 1], len, &u64s[u]);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
end = i;
|
||||
nr++;
|
||||
u--;
|
||||
}
|
||||
|
||||
if (nr == ARRAY_SIZE(tot_name)) {
|
||||
/* swap to account for parsing in reverse */
|
||||
swap(tot_name[0], tot_name[2]);
|
||||
scoutfs_xattr_init_totl_key(key, tot_name);
|
||||
ret = 0;
|
||||
} else {
|
||||
if (u == -1)
|
||||
ret = end;
|
||||
else
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_totl_key(struct scoutfs_key *key, const char *name, int name_len)
|
||||
{
|
||||
u64 u64s[3];
|
||||
int ret;
|
||||
|
||||
ret = parse_dotted_u64s(u64s, ARRAY_SIZE(u64s), name, name_len);
|
||||
if (ret >= 0) {
|
||||
scoutfs_xattr_init_totl_key(key, u64s);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int apply_totl_delta(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_xattr_totl_val *tval, struct scoutfs_lock *lock)
|
||||
{
|
||||
@@ -607,6 +621,47 @@ int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len)
|
||||
return SCOUTFS_DELTA_COMBINED;
|
||||
}
|
||||
|
||||
void scoutfs_xattr_indx_get_range(struct scoutfs_key *start, struct scoutfs_key *end)
|
||||
{
|
||||
scoutfs_key_set_zeros(start);
|
||||
start->sk_zone = SCOUTFS_XATTR_INDX_ZONE;
|
||||
scoutfs_key_set_ones(end);
|
||||
end->sk_zone = SCOUTFS_XATTR_INDX_ZONE;
|
||||
}
|
||||
|
||||
void scoutfs_xattr_init_indx_key(struct scoutfs_key *key, u64 a, u64 b, u64 ino)
|
||||
{
|
||||
scoutfs_key_set_zeros(key);
|
||||
key->sk_zone = SCOUTFS_XATTR_INDX_ZONE;
|
||||
key->skxi_a = cpu_to_le64(a);
|
||||
key->skxi_b = cpu_to_le64(b);
|
||||
key->skxi_ino = cpu_to_le64(ino);
|
||||
}
|
||||
|
||||
/*
|
||||
* indx keys have a restricted name so that there can only be one xattr
|
||||
* that places in inode at a given position. This lets us emit index
|
||||
* items under CW cluster locks without reading to see if they exist or
|
||||
* not.
|
||||
*/
|
||||
#define REQUIRED_INDEX_PREFIX "scoutfs.hide.indx"
|
||||
|
||||
static int parse_indx_key(struct scoutfs_key *key, const char *name, int name_len, u64 ino)
|
||||
{
|
||||
u64 u64s[2];
|
||||
int ret;
|
||||
|
||||
ret = parse_dotted_u64s(u64s, ARRAY_SIZE(u64s), name, name_len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (!xattr_names_equal(name, ret, REQUIRED_INDEX_PREFIX, sizeof(REQUIRED_INDEX_PREFIX) - 1))
|
||||
return -EINVAL;
|
||||
|
||||
scoutfs_xattr_init_indx_key(key, u64s[0], u64s[1], ino);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The confusing swiss army knife of creating, modifying, and deleting
|
||||
* xattrs.
|
||||
@@ -627,7 +682,7 @@ int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len)
|
||||
int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_len,
|
||||
const void *value, size_t size, int flags,
|
||||
const struct scoutfs_xattr_prefix_tags *tgs,
|
||||
struct scoutfs_lock *lck, struct scoutfs_lock *totl_lock,
|
||||
struct scoutfs_lock *lck, struct scoutfs_lock *tag_lock,
|
||||
struct list_head *ind_locks)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
@@ -635,10 +690,11 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct scoutfs_xattr_totl_val tval = {0,};
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_key totl_key;
|
||||
struct scoutfs_key tag_key;
|
||||
struct scoutfs_key key;
|
||||
bool undo_srch = false;
|
||||
bool undo_totl = false;
|
||||
bool undo_indx = false;
|
||||
u8 found_parts;
|
||||
unsigned int xat_bytes_totl;
|
||||
unsigned int xat_bytes;
|
||||
@@ -651,7 +707,8 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
|
||||
trace_scoutfs_xattr_set(sb, name_len, value, size, flags);
|
||||
|
||||
if (WARN_ON_ONCE(tgs->totl && !totl_lock))
|
||||
if (WARN_ON_ONCE(tgs->totl && tgs->indx) ||
|
||||
WARN_ON_ONCE((tgs->totl | tgs->indx) && !tag_lock))
|
||||
return -EINVAL;
|
||||
|
||||
/* mirror the syscall's errors for large names and values */
|
||||
@@ -664,10 +721,13 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
(flags & ~(XATTR_CREATE | XATTR_REPLACE)))
|
||||
return -EINVAL;
|
||||
|
||||
if ((tgs->hide | tgs->srch | tgs->totl) && !capable(CAP_SYS_ADMIN))
|
||||
if ((tgs->hide | tgs->indx | tgs->srch | tgs->totl) && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (tgs->totl && ((ret = parse_totl_key(&totl_key, name, name_len)) != 0))
|
||||
if (tgs->totl && ((ret = parse_totl_key(&tag_key, name, name_len)) != 0))
|
||||
return ret;
|
||||
|
||||
if (tgs->indx && ((ret = parse_indx_key(&tag_key, name, name_len, ino)) != 0))
|
||||
return ret;
|
||||
|
||||
/* allocate enough to always read an existing xattr's totl */
|
||||
@@ -718,6 +778,23 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
le64_add_cpu(&tval.total, -total);
|
||||
}
|
||||
|
||||
/*
|
||||
* indx xattrs don't have a value. After returning an error for
|
||||
* non-zero val length or short circuiting modifying with the
|
||||
* same 0 length, all we're left with is creating or deleting
|
||||
* the xattr.
|
||||
*/
|
||||
if (tgs->indx) {
|
||||
if (size != 0) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (found_parts && value) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* prepare the xattr header, name, and start of value in first item */
|
||||
if (value) {
|
||||
if (found_parts)
|
||||
@@ -741,6 +818,16 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
le64_add_cpu(&tval.total, total);
|
||||
}
|
||||
|
||||
if (tgs->indx) {
|
||||
if (value)
|
||||
ret = scoutfs_item_create_force(sb, &tag_key, NULL, 0, tag_lock, NULL);
|
||||
else
|
||||
ret = scoutfs_item_delete_force(sb, &tag_key, tag_lock, NULL);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
undo_indx = true;
|
||||
}
|
||||
|
||||
if (tgs->srch && !(found_parts && value)) {
|
||||
if (found_parts)
|
||||
id = le64_to_cpu(key.skx_id);
|
||||
@@ -752,7 +839,7 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
}
|
||||
|
||||
if (tgs->totl) {
|
||||
ret = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
ret = apply_totl_delta(sb, &tag_key, &tval, tag_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
undo_totl = true;
|
||||
@@ -773,10 +860,17 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
|
||||
/* XXX do these want i_mutex or anything? */
|
||||
inode_inc_iversion(inode);
|
||||
inode->i_ctime = CURRENT_TIME;
|
||||
inode->i_ctime = current_time(inode);
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
if (ret < 0 && undo_indx) {
|
||||
if (value)
|
||||
err = scoutfs_item_delete_force(sb, &tag_key, tag_lock, NULL);
|
||||
else
|
||||
err = scoutfs_item_create_force(sb, &tag_key, NULL, 0, tag_lock, NULL);
|
||||
BUG_ON(err); /* inconsistent */
|
||||
}
|
||||
if (ret < 0 && undo_srch) {
|
||||
err = scoutfs_forest_srch_add(sb, hash, ino, id);
|
||||
BUG_ON(err);
|
||||
@@ -785,7 +879,7 @@ out:
|
||||
/* _delta() on dirty items shouldn't fail */
|
||||
tval.total = cpu_to_le64(-le64_to_cpu(tval.total));
|
||||
tval.count = cpu_to_le64(-le64_to_cpu(tval.count));
|
||||
err = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
err = apply_totl_delta(sb, &tag_key, &tval, tag_lock);
|
||||
BUG_ON(err);
|
||||
}
|
||||
|
||||
@@ -801,7 +895,7 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name, const void
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_lock *totl_lock = NULL;
|
||||
struct scoutfs_lock *tag_lock = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
size_t name_len = strlen(name);
|
||||
LIST_HEAD(ind_locks);
|
||||
@@ -816,8 +910,11 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name, const void
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
if (tgs.totl) {
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &totl_lock);
|
||||
if (tgs.totl || tgs.indx) {
|
||||
if (tgs.totl)
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &tag_lock);
|
||||
else
|
||||
ret = scoutfs_lock_xattr_indx(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &tag_lock);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
}
|
||||
@@ -836,7 +933,7 @@ retry:
|
||||
goto release;
|
||||
|
||||
ret = scoutfs_xattr_set_locked(dentry->d_inode, name, name_len, value, size, flags, &tgs,
|
||||
lck, totl_lock, &ind_locks);
|
||||
lck, tag_lock, &ind_locks);
|
||||
if (ret == 0)
|
||||
scoutfs_update_inode_item(inode, lck, &ind_locks);
|
||||
|
||||
@@ -845,11 +942,12 @@ release:
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
unlock:
|
||||
scoutfs_unlock(sb, lck, SCOUTFS_LOCK_WRITE);
|
||||
scoutfs_unlock(sb, totl_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
scoutfs_unlock(sb, tag_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
#ifndef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
/*
|
||||
* Future kernels have this amazing hack to rewind the name to get the
|
||||
* skipped prefix. We're back in the stone ages without the handler
|
||||
@@ -857,22 +955,41 @@ unlock:
|
||||
* compat hook to either call the kernel's xattr_full_name(handler), or
|
||||
* our hack to use the flags as the prefix length.
|
||||
*/
|
||||
static const char *full_name_hack(void *handler, const char *name, int len)
|
||||
static const char *full_name_hack(const char *name, int len)
|
||||
{
|
||||
return name - len;
|
||||
}
|
||||
#endif
|
||||
|
||||
static int scoutfs_xattr_get_handler(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size, int handler_flags)
|
||||
static int scoutfs_xattr_get_handler
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
(const struct xattr_handler *handler, struct dentry *dentry,
|
||||
struct inode *inode, const char *name, void *value,
|
||||
size_t size)
|
||||
{
|
||||
name = full_name_hack(NULL, name, handler_flags);
|
||||
name = xattr_full_name(handler, name);
|
||||
#else
|
||||
(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size, int handler_flags)
|
||||
{
|
||||
name = full_name_hack(name, handler_flags);
|
||||
#endif
|
||||
return scoutfs_xattr_get(dentry, name, value, size);
|
||||
}
|
||||
|
||||
static int scoutfs_xattr_set_handler(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int handler_flags)
|
||||
static int scoutfs_xattr_set_handler
|
||||
#ifdef KC_XATTR_STRUCT_XATTR_HANDLER
|
||||
(const struct xattr_handler *handler, struct dentry *dentry,
|
||||
struct inode *inode, const char *name, const void *value,
|
||||
size_t size, int flags)
|
||||
{
|
||||
name = full_name_hack(NULL, name, handler_flags);
|
||||
name = xattr_full_name(handler, name);
|
||||
#else
|
||||
(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int handler_flags)
|
||||
{
|
||||
name = full_name_hack(name, handler_flags);
|
||||
#endif
|
||||
return scoutfs_xattr_set(dentry, name, value, size, flags);
|
||||
}
|
||||
|
||||
@@ -905,14 +1022,22 @@ static const struct xattr_handler scoutfs_xattr_security_handler = {
|
||||
};
|
||||
|
||||
static const struct xattr_handler scoutfs_xattr_acl_access_handler = {
|
||||
#ifdef KC_XATTR_HANDLER_NAME
|
||||
.name = XATTR_NAME_POSIX_ACL_ACCESS,
|
||||
#else
|
||||
.prefix = XATTR_NAME_POSIX_ACL_ACCESS,
|
||||
#endif
|
||||
.flags = ACL_TYPE_ACCESS,
|
||||
.get = scoutfs_acl_get_xattr,
|
||||
.set = scoutfs_acl_set_xattr,
|
||||
};
|
||||
|
||||
static const struct xattr_handler scoutfs_xattr_acl_default_handler = {
|
||||
#ifdef KC_XATTR_HANDLER_NAME
|
||||
.name = XATTR_NAME_POSIX_ACL_DEFAULT,
|
||||
#else
|
||||
.prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
|
||||
#endif
|
||||
.flags = ACL_TYPE_DEFAULT,
|
||||
.get = scoutfs_acl_get_xattr,
|
||||
.set = scoutfs_acl_set_xattr,
|
||||
@@ -1027,14 +1152,15 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
{
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *totl_lock = NULL;
|
||||
struct scoutfs_lock *tag_lock = NULL;
|
||||
struct scoutfs_xattr_totl_val tval;
|
||||
struct scoutfs_key totl_key;
|
||||
struct scoutfs_key tag_key;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
bool release = false;
|
||||
unsigned int bytes;
|
||||
unsigned int val_len;
|
||||
u8 locked_zone = 0;
|
||||
void *value;
|
||||
u64 total;
|
||||
u64 hash;
|
||||
@@ -1080,16 +1206,36 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = parse_totl_key(&totl_key, xat->name, xat->name_len) ?:
|
||||
ret = parse_totl_key(&tag_key, xat->name, xat->name_len) ?:
|
||||
parse_totl_u64(value, val_len, &total);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tgs.totl && totl_lock == NULL) {
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &totl_lock);
|
||||
if (tgs.indx) {
|
||||
ret = parse_indx_key(&tag_key, xat->name, xat->name_len, ino);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((tgs.totl || tgs.indx) && locked_zone != tag_key.sk_zone) {
|
||||
if (tag_lock) {
|
||||
if (release) {
|
||||
scoutfs_release_trans(sb);
|
||||
release = false;
|
||||
}
|
||||
scoutfs_unlock(sb, tag_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
tag_lock = NULL;
|
||||
}
|
||||
if (tgs.totl)
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0,
|
||||
&tag_lock);
|
||||
else
|
||||
ret = scoutfs_lock_xattr_indx(sb, SCOUTFS_LOCK_WRITE_ONLY, 0,
|
||||
&tag_lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
locked_zone = tag_key.sk_zone;
|
||||
}
|
||||
|
||||
ret = scoutfs_hold_trans(sb, false);
|
||||
@@ -1112,11 +1258,17 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
if (tgs.totl) {
|
||||
tval.total = cpu_to_le64(-total);
|
||||
tval.count = cpu_to_le64(-1LL);
|
||||
ret = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
ret = apply_totl_delta(sb, &tag_key, &tval, tag_lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tgs.indx) {
|
||||
ret = scoutfs_item_delete_force(sb, &tag_key, tag_lock, NULL);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_release_trans(sb);
|
||||
release = false;
|
||||
|
||||
@@ -1125,7 +1277,7 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
|
||||
if (release)
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_unlock(sb, totl_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
scoutfs_unlock(sb, tag_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
kfree(xat);
|
||||
out:
|
||||
return ret;
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
struct scoutfs_xattr_prefix_tags {
|
||||
unsigned long hide:1,
|
||||
indx:1,
|
||||
srch:1,
|
||||
totl:1;
|
||||
};
|
||||
@@ -30,4 +31,7 @@ int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
void scoutfs_xattr_init_totl_key(struct scoutfs_key *key, u64 *name);
|
||||
int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len);
|
||||
|
||||
void scoutfs_xattr_indx_get_range(struct scoutfs_key *start, struct scoutfs_key *end);
|
||||
void scoutfs_xattr_init_indx_key(struct scoutfs_key *key, u64 a, u64 b, u64 ino);
|
||||
|
||||
#endif
|
||||
|
||||
1
tests/.gitignore
vendored
1
tests/.gitignore
vendored
@@ -9,3 +9,4 @@ src/find_xattrs
|
||||
src/stage_tmpfile
|
||||
src/create_xattr_loop
|
||||
src/o_tmpfile_umask
|
||||
src/o_tmpfile_linkat
|
||||
|
||||
@@ -12,7 +12,8 @@ BIN := src/createmany \
|
||||
src/find_xattrs \
|
||||
src/create_xattr_loop \
|
||||
src/fragmented_data_extents \
|
||||
src/o_tmpfile_umask
|
||||
src/o_tmpfile_umask \
|
||||
src/o_tmpfile_linkat
|
||||
|
||||
DEPS := $(wildcard src/*.d)
|
||||
|
||||
|
||||
@@ -25,8 +25,9 @@ All options can be seen by running with -h.
|
||||
This script is built to test multi-node systems on one host by using
|
||||
different mounts of the same devices. The script creates a fake block
|
||||
device in front of each fs block device for each mount that will be
|
||||
tested. Currently it will create free loop devices and will mount on
|
||||
/mnt/test.[0-9].
|
||||
tested. It will create predictable device mapper devices and mounts
|
||||
them on /mnt/test.N. These static device names and mount paths limit
|
||||
the script to a single execution per host.
|
||||
|
||||
All tests will be run by default. Particular tests can be included or
|
||||
excluded by providing test name regular expressions with the -I and -E
|
||||
@@ -104,8 +105,8 @@ used during the test.
|
||||
|
||||
| Variable | Description | Origin | Example |
|
||||
| ---------------- | ------------------- | --------------- | ----------------- |
|
||||
| T\_MB[0-9] | per-mount meta bdev | created per run | /dev/loop0 |
|
||||
| T\_DB[0-9] | per-mount data bdev | created per run | /dev/loop1 |
|
||||
| T\_MB[0-9] | per-mount meta bdev | created per run | /dev/mapper/\_scoutfs\_test\_meta\_[0-9] |
|
||||
| T\_DB[0-9] | per-mount data bdev | created per run | /dev/mapper/\_scoutfs\_test\_data\_[0-9] |
|
||||
| T\_D[0-9] | per-mount test dir | made for test | /mnt/test.[0-9]/t |
|
||||
| T\_META\_DEVICE | main FS meta bdev | -M | /dev/vda |
|
||||
| T\_DATA\_DEVICE | main FS data bdev | -D | /dev/vdb |
|
||||
|
||||
@@ -35,7 +35,7 @@ t_fail()
|
||||
t_quiet()
|
||||
{
|
||||
echo "# $*" >> "$T_TMPDIR/quiet.log"
|
||||
"$@" > "$T_TMPDIR/quiet.log" 2>&1 || \
|
||||
"$@" >> "$T_TMPDIR/quiet.log" 2>&1 || \
|
||||
t_fail "quiet command failed"
|
||||
}
|
||||
|
||||
|
||||
@@ -6,6 +6,61 @@ t_filter_fs()
|
||||
-e 's@Device: [a-fA-F0-9]*h/[0-9]*d@Device: 0h/0d@g'
|
||||
}
|
||||
|
||||
#
|
||||
# We can hit a spurious kasan warning that was fixed upstream:
|
||||
#
|
||||
# e504e74cc3a2 x86/unwind/orc: Disable KASAN checking in the ORC unwinder, part 2
|
||||
#
|
||||
# KASAN can get mad when the unwinder doesn't find ORC metadata and
|
||||
# wanders up without using frames and hits the KASAN stack red zones.
|
||||
# We can ignore these messages.
|
||||
#
|
||||
# They're bracketed by:
|
||||
# [ 2687.690127] ==================================================================
|
||||
# [ 2687.691366] BUG: KASAN: stack-out-of-bounds in get_reg+0x1bc/0x230
|
||||
# ...
|
||||
# [ 2687.706220] ==================================================================
|
||||
# [ 2687.707284] Disabling lock debugging due to kernel taint
|
||||
#
|
||||
# That final lock debugging message may not be included.
|
||||
#
|
||||
ignore_harmless_unwind_kasan_stack_oob()
|
||||
{
|
||||
awk '
|
||||
BEGIN {
|
||||
in_soob = 0
|
||||
soob_nr = 0
|
||||
}
|
||||
( !in_soob && $0 ~ /==================================================================/ ) {
|
||||
in_soob = 1
|
||||
soob_nr = NR
|
||||
saved = $0
|
||||
}
|
||||
( in_soob == 1 && NR == (soob_nr + 1) ) {
|
||||
if (match($0, /KASAN: stack-out-of-bounds in get_reg/) != 0) {
|
||||
in_soob = 2
|
||||
} else {
|
||||
in_soob = 0
|
||||
print saved
|
||||
}
|
||||
saved=""
|
||||
}
|
||||
( in_soob == 2 && $0 ~ /==================================================================/ ) {
|
||||
in_soob = 3
|
||||
soob_nr = NR
|
||||
}
|
||||
( in_soob == 3 && NR > soob_nr && $0 !~ /Disabling lock debugging/ ) {
|
||||
in_soob = 0
|
||||
}
|
||||
( !in_soob ) { print $0 }
|
||||
END {
|
||||
if (saved) {
|
||||
print saved
|
||||
}
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
#
|
||||
# Filter out expected messages. Putting messages here implies that
|
||||
# tests aren't relying on messages to discover failures.. they're
|
||||
@@ -85,5 +140,15 @@ t_filter_dmesg()
|
||||
re="$re|scoutfs .* error.*server failed to bind to.*"
|
||||
re="$re|scoutfs .* critical transaction commit failure.*"
|
||||
|
||||
egrep -v "($re)"
|
||||
# change-devices causes loop device resizing
|
||||
re="$re|loop: module loaded"
|
||||
re="$re|loop[0-9].* detected capacity change from.*"
|
||||
|
||||
# ignore systemd-journal rotating
|
||||
re="$re|systemd-journald.*"
|
||||
|
||||
re="$re|incompatible pre-release format version"
|
||||
|
||||
egrep -v "($re)" | \
|
||||
ignore_harmless_unwind_kasan_stack_oob
|
||||
}
|
||||
|
||||
@@ -265,6 +265,15 @@ t_trigger_get() {
|
||||
cat "$(t_trigger_path "$nr")/$which"
|
||||
}
|
||||
|
||||
t_trigger_set() {
|
||||
local which="$1"
|
||||
local nr="$2"
|
||||
local val="$3"
|
||||
local path=$(t_trigger_path "$nr")
|
||||
|
||||
echo "$val" > "$path/$which"
|
||||
}
|
||||
|
||||
t_trigger_show() {
|
||||
local which="$1"
|
||||
local string="$2"
|
||||
@@ -276,9 +285,8 @@ t_trigger_show() {
|
||||
t_trigger_arm_silent() {
|
||||
local which="$1"
|
||||
local nr="$2"
|
||||
local path=$(t_trigger_path "$nr")
|
||||
|
||||
echo 1 > "$path/$which"
|
||||
t_trigger_set "$which" "$nr" 1
|
||||
}
|
||||
|
||||
t_trigger_arm() {
|
||||
|
||||
@@ -47,7 +47,7 @@ four
|
||||
--- dir within dir
|
||||
--- overwrite file
|
||||
--- can't overwrite non-empty dir
|
||||
mv: cannot move ‘/mnt/test/test/basic-posix-consistency/dir/c/clobber’ to ‘/mnt/test/test/basic-posix-consistency/dir/a/dir’: Directory not empty
|
||||
mv: cannot move '/mnt/test/test/basic-posix-consistency/dir/c/clobber' to '/mnt/test/test/basic-posix-consistency/dir/a/dir': Directory not empty
|
||||
--- can overwrite empty dir
|
||||
--- can rename into root
|
||||
== path resoluion
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
== measure initial createmany
|
||||
== measure initial createmany
|
||||
== measure two concurrent createmany runs
|
||||
== cleanup
|
||||
|
||||
@@ -17,7 +17,7 @@ ino not found in dseq index
|
||||
mount 0 contents after mount 1 rm: contents
|
||||
ino found in dseq index
|
||||
ino found in dseq index
|
||||
stat: cannot stat ‘/mnt/test/test/inode-deletion/file’: No such file or directory
|
||||
stat: cannot stat '/mnt/test/test/inode-deletion/file': No such file or directory
|
||||
ino not found in dseq index
|
||||
ino not found in dseq index
|
||||
== lots of deletions use one open map
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
== setting longer hung task timeout
|
||||
== creating fragmented extents
|
||||
== unlink file with moved extents to free extents per block
|
||||
== cleanup
|
||||
|
||||
@@ -20,10 +20,10 @@ offline waiting should now have two known entries:
|
||||
data_wait_err found 2 waiters.
|
||||
offline waiting should now have 0 known entries:
|
||||
0
|
||||
dd: error reading ‘/mnt/test/test/offline-extent-waiting/dir/file’: Input/output error
|
||||
dd: error reading '/mnt/test/test/offline-extent-waiting/dir/file': Input/output error
|
||||
0+0 records in
|
||||
0+0 records out
|
||||
dd: error reading ‘/mnt/test/test/offline-extent-waiting/dir/file’: Input/output error
|
||||
dd: error reading '/mnt/test/test/offline-extent-waiting/dir/file': Input/output error
|
||||
0+0 records in
|
||||
0+0 records out
|
||||
offline waiting should be empty again:
|
||||
|
||||
22
tests/golden/projects
Normal file
22
tests/golden/projects
Normal file
@@ -0,0 +1,22 @@
|
||||
== default new files don't have project
|
||||
0
|
||||
== set new project on files and dirs
|
||||
/mnt/test.0/test/projects/file: 1
|
||||
/mnt/test.0/test/projects/dir: 1
|
||||
== can use interesting IDs
|
||||
2147483647
|
||||
2147483648
|
||||
4294967295
|
||||
9223372036854775807
|
||||
9223372036854775808
|
||||
18446744073709551615
|
||||
== created files and dirs inherit project id
|
||||
/mnt/test.0/test/projects/dir/file: 1
|
||||
/mnt/test.0/test/projects/dir/sub: 1
|
||||
== inheritance continues
|
||||
1
|
||||
== clearing project id stops inheritance
|
||||
/mnt/test.0/test/projects/dir/another-file: 0
|
||||
/mnt/test.0/test/projects/dir/another-sub: 0
|
||||
== o_tmpfile creations inherit dir
|
||||
1
|
||||
40
tests/golden/quota
Normal file
40
tests/golden/quota
Normal file
@@ -0,0 +1,40 @@
|
||||
== prepare dir with write perm for test ids
|
||||
== test assumes starting with no rules, empty list
|
||||
== add rule
|
||||
7 13,L,- 15,L,- 17,L,- I 33 -
|
||||
== list is empty again after delete
|
||||
== can change limits without deleting
|
||||
1 1,L,- 1,L,- 1,L,- I 100 -
|
||||
1 1,L,- 1,L,- 1,L,- I 101 -
|
||||
1 1,L,- 1,L,- 1,L,- I 99 -
|
||||
== wipe and restore rules in bulk
|
||||
7 15,L,- 0,L,- 0,L,- I 33 -
|
||||
7 14,L,- 0,L,- 0,L,- I 33 -
|
||||
7 13,L,- 0,L,- 0,L,- I 33 -
|
||||
7 12,L,- 0,L,- 0,L,- I 33 -
|
||||
7 11,L,- 0,L,- 0,L,- I 33 -
|
||||
7 10,L,- 0,L,- 0,L,- I 33 -
|
||||
7 15,L,- 0,L,- 0,L,- I 33 -
|
||||
7 14,L,- 0,L,- 0,L,- I 33 -
|
||||
7 13,L,- 0,L,- 0,L,- I 33 -
|
||||
7 12,L,- 0,L,- 0,L,- I 33 -
|
||||
7 11,L,- 0,L,- 0,L,- I 33 -
|
||||
7 10,L,- 0,L,- 0,L,- I 33 -
|
||||
== default rule prevents file creation
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
== decreasing totl allows file creation again
|
||||
== attr selecting rules prevent creation
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
== multi attr selecting doesn't prevent partial
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
== op differentiates
|
||||
== higher priority rule applies
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
== data rules with total and count prevent write and fallocate
|
||||
dd: error writing '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
fallocate: fallocate failed: Disk quota exceeded
|
||||
dd: error writing '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
fallocate: fallocate failed: Disk quota exceeded
|
||||
== added rules work after bulk restore
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
37
tests/golden/srch-safe-merge-pos
Normal file
37
tests/golden/srch-safe-merge-pos
Normal file
@@ -0,0 +1,37 @@
|
||||
== initialize per-mount values
|
||||
== arm compaction triggers
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_merge_stop_safe armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_merge_stop_safe armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_merge_stop_safe armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_merge_stop_safe armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_merge_stop_safe armed: 1
|
||||
== compact more often
|
||||
== create padded sorted inputs by forcing log rotation
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
== compaction of padded should stop at safe
|
||||
== verify no compaction errors
|
||||
== cleanup
|
||||
@@ -241,7 +241,6 @@ generic/312
|
||||
generic/314
|
||||
generic/316
|
||||
generic/317
|
||||
generic/318
|
||||
generic/324
|
||||
generic/326
|
||||
generic/327
|
||||
|
||||
@@ -1,5 +1,8 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
# Force system tools to use ASCII quotes
|
||||
export LC_ALL=C
|
||||
|
||||
#
|
||||
# XXX
|
||||
# - could have helper functions for waiting for pids
|
||||
@@ -323,16 +326,10 @@ unmount_all() {
|
||||
cmd wait $p
|
||||
done
|
||||
|
||||
# delete all temp meta devices
|
||||
for dev in $(losetup --associated "$T_META_DEVICE" | cut -d : -f 1); do
|
||||
if [ -e "$dev" ]; then
|
||||
cmd losetup -d "$dev"
|
||||
fi
|
||||
done
|
||||
# delete all temp data devices
|
||||
for dev in $(losetup --associated "$T_DATA_DEVICE" | cut -d : -f 1); do
|
||||
if [ -e "$dev" ]; then
|
||||
cmd losetup -d "$dev"
|
||||
# delete all temp devices
|
||||
for dev in /dev/mapper/_scoutfs_test_*; do
|
||||
if [ -b "$dev" ]; then
|
||||
cmd dmsetup remove $dev
|
||||
fi
|
||||
done
|
||||
}
|
||||
@@ -431,6 +428,12 @@ $T_UTILS/fenced/scoutfs-fenced > "$T_FENCED_LOG" 2>&1 &
|
||||
fenced_pid=$!
|
||||
fenced_log "started fenced pid $fenced_pid in the background"
|
||||
|
||||
# setup dm tables
|
||||
echo "0 $(blockdev --getsz $T_META_DEVICE) linear $T_META_DEVICE 0" > \
|
||||
$T_RESULTS/dmtable.meta
|
||||
echo "0 $(blockdev --getsz $T_DATA_DEVICE) linear $T_DATA_DEVICE 0" > \
|
||||
$T_RESULTS/dmtable.data
|
||||
|
||||
#
|
||||
# mount concurrently so that a quorum is present to elect the leader and
|
||||
# start a server.
|
||||
@@ -439,10 +442,13 @@ msg "mounting $T_NR_MOUNTS mounts on meta $T_META_DEVICE data $T_DATA_DEVICE"
|
||||
pids=""
|
||||
for i in $(seq 0 $((T_NR_MOUNTS - 1))); do
|
||||
|
||||
meta_dev=$(losetup --find --show $T_META_DEVICE)
|
||||
test -b "$meta_dev" || die "failed to create temp device $meta_dev"
|
||||
data_dev=$(losetup --find --show $T_DATA_DEVICE)
|
||||
test -b "$data_dev" || die "failed to create temp device $data_dev"
|
||||
name="_scoutfs_test_meta_$i"
|
||||
cmd dmsetup create "$name" --table "$(cat $T_RESULTS/dmtable.meta)"
|
||||
meta_dev="/dev/mapper/$name"
|
||||
|
||||
name="_scoutfs_test_data_$i"
|
||||
cmd dmsetup create "$name" --table "$(cat $T_RESULTS/dmtable.data)"
|
||||
data_dev="/dev/mapper/$name"
|
||||
|
||||
dir="/mnt/test.$i"
|
||||
test -d "$dir" || cmd mkdir -p "$dir"
|
||||
|
||||
@@ -12,11 +12,14 @@ data-prealloc.sh
|
||||
setattr_more.sh
|
||||
offline-extent-waiting.sh
|
||||
move-blocks.sh
|
||||
projects.sh
|
||||
large-fragmented-free.sh
|
||||
enospc.sh
|
||||
srch-safe-merge-pos.sh
|
||||
srch-basic-functionality.sh
|
||||
simple-xattr-unit.sh
|
||||
totl-xattr-tag.sh
|
||||
quota.sh
|
||||
lock-refleak.sh
|
||||
lock-shrink-consistency.sh
|
||||
lock-pr-cw-conflict.sh
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
@@ -35,10 +36,10 @@ struct opts {
|
||||
unsigned int dry_run:1,
|
||||
ls_output:1,
|
||||
quiet:1,
|
||||
user_xattr:1,
|
||||
same_srch_xattr:1,
|
||||
group_srch_xattr:1,
|
||||
unique_srch_xattr:1;
|
||||
xattr_set:1,
|
||||
xattr_file:1,
|
||||
xattr_group:1;
|
||||
char *xattr_name;
|
||||
};
|
||||
|
||||
struct stats {
|
||||
@@ -149,12 +150,31 @@ static void free_dir(struct dir *dir)
|
||||
free(dir);
|
||||
}
|
||||
|
||||
static size_t snprintf_off(void *buf, size_t sz, size_t off, char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int ret;
|
||||
|
||||
if (off >= sz)
|
||||
return sz;
|
||||
|
||||
va_start(ap, fmt);
|
||||
ret = vsnprintf(buf + off, sz - off, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (ret <= 0)
|
||||
return sz;
|
||||
|
||||
return off + ret;
|
||||
}
|
||||
|
||||
static void create_dir(struct dir *dir, struct opts *opts,
|
||||
struct stats *stats)
|
||||
{
|
||||
struct str_list *s;
|
||||
char name[100];
|
||||
char name[256]; /* max len and null term */
|
||||
char val = 'v';
|
||||
size_t off;
|
||||
int rc;
|
||||
int i;
|
||||
|
||||
@@ -175,29 +195,21 @@ static void create_dir(struct dir *dir, struct opts *opts,
|
||||
rc = mknod(s->str, S_IFREG | 0644, 0);
|
||||
error_exit(rc, "mknod %s failed"ERRF, s->str, ERRA);
|
||||
|
||||
rc = 0;
|
||||
if (rc == 0 && opts->user_xattr) {
|
||||
strcpy(name, "user.scoutfs_bcp");
|
||||
rc = setxattr(s->str, name, &val, 1, 0);
|
||||
}
|
||||
if (rc == 0 && opts->same_srch_xattr) {
|
||||
strcpy(name, "scoutfs.srch.scoutfs_bcp");
|
||||
rc = setxattr(s->str, name, &val, 1, 0);
|
||||
}
|
||||
if (rc == 0 && opts->group_srch_xattr) {
|
||||
snprintf(name, sizeof(name),
|
||||
"scoutfs.srch.scoutfs_bcp.group.%lu",
|
||||
stats->files / 10000);
|
||||
rc = setxattr(s->str, name, &val, 1, 0);
|
||||
}
|
||||
if (rc == 0 && opts->unique_srch_xattr) {
|
||||
snprintf(name, sizeof(name),
|
||||
"scoutfs.srch.scoutfs_bcp.unique.%lu",
|
||||
stats->files);
|
||||
if (opts->xattr_set) {
|
||||
off = snprintf_off(name, sizeof(name), 0, "%s", opts->xattr_name);
|
||||
if (opts->xattr_file)
|
||||
off = snprintf_off(name, sizeof(name), off,
|
||||
"-f-%lu", stats->files);
|
||||
if (opts->xattr_group)
|
||||
off = snprintf_off(name, sizeof(name), off,
|
||||
"-g-%lu", stats->files / 10000);
|
||||
|
||||
error_exit(off >= sizeof(name), "xattr name longer than 255 bytes");
|
||||
|
||||
rc = setxattr(s->str, name, &val, 1, 0);
|
||||
error_exit(rc, "setxattr %s %s failed"ERRF, s->str, name, ERRA);
|
||||
}
|
||||
|
||||
error_exit(rc, "setxattr %s %s failed"ERRF, s->str, name, ERRA);
|
||||
|
||||
stats->files++;
|
||||
rate_banner(opts, stats);
|
||||
@@ -365,11 +377,10 @@ static void usage(void)
|
||||
" -d DIR | create all files in DIR top level directory\n"
|
||||
" -n | dry run, only parse, don't create any files\n"
|
||||
" -q | quiet, don't regularly print rates\n"
|
||||
" -F | append \"-f-NR\" file nr to xattr name, requires -X\n"
|
||||
" -G | append \"-g-NR\" file nr/10000 to xattr name, requires -X\n"
|
||||
" -L | parse ls output; only reg, skip meta, paths at ./\n"
|
||||
" -X | set the same user. xattr name in all files\n"
|
||||
" -S | set the same .srch. xattr name in all files\n"
|
||||
" -G | set a .srch. xattr name shared by groups of files\n"
|
||||
" -U | set a unique .srch. xattr name in all files\n");
|
||||
" -X NAM | set named xattr in all files\n");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
@@ -386,7 +397,7 @@ int main(int argc, char **argv)
|
||||
|
||||
memset(&opts, 0, sizeof(opts));
|
||||
|
||||
while ((c = getopt(argc, argv, "d:nqLXSGU")) != -1) {
|
||||
while ((c = getopt(argc, argv, "d:nqFGLX:")) != -1) {
|
||||
switch(c) {
|
||||
case 'd':
|
||||
top_dir = strdup(optarg);
|
||||
@@ -397,20 +408,19 @@ int main(int argc, char **argv)
|
||||
case 'q':
|
||||
opts.quiet = 1;
|
||||
break;
|
||||
case 'F':
|
||||
opts.xattr_file = 1;
|
||||
break;
|
||||
case 'G':
|
||||
opts.xattr_group = 1;
|
||||
break;
|
||||
case 'L':
|
||||
opts.ls_output = 1;
|
||||
break;
|
||||
case 'X':
|
||||
opts.user_xattr = 1;
|
||||
break;
|
||||
case 'S':
|
||||
opts.same_srch_xattr = 1;
|
||||
break;
|
||||
case 'G':
|
||||
opts.group_srch_xattr = 1;
|
||||
break;
|
||||
case 'U':
|
||||
opts.unique_srch_xattr = 1;
|
||||
opts.xattr_set = 1;
|
||||
opts.xattr_name = strdup(optarg);
|
||||
error_exit(!opts.xattr_name, "error allocating xattr name");
|
||||
break;
|
||||
case '?':
|
||||
printf("Unknown option '%c'\n", optopt);
|
||||
@@ -419,6 +429,11 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
error_exit(opts.xattr_file && !opts.xattr_set,
|
||||
"must specify xattr -X when appending file nr with -F");
|
||||
error_exit(opts.xattr_group && !opts.xattr_set,
|
||||
"must specify xattr -X when appending file nr with -G");
|
||||
|
||||
if (!opts.dry_run) {
|
||||
error_exit(!top_dir,
|
||||
"must specify top level directory with -d");
|
||||
|
||||
@@ -48,7 +48,7 @@ struct our_handle {
|
||||
static void exit_usage(void)
|
||||
{
|
||||
printf(" -h/-? output this usage message and exit\n"
|
||||
" -e keep trying on enoent, consider success an error\n"
|
||||
" -e keep trying on enoent and estale, consider success an error\n"
|
||||
" -i <num> 64bit inode number for handle open, can be multiple\n"
|
||||
" -m <string> scoutfs mount path string for ioctl fd\n"
|
||||
" -n <string> optional xattr name string, defaults to \""DEFAULT_NAME"\"\n"
|
||||
@@ -149,7 +149,7 @@ int main(int argc, char **argv)
|
||||
|
||||
fd = open_by_handle_at(mntfd, &handle.handle, O_RDWR);
|
||||
if (fd == -1) {
|
||||
if (!enoent_success_err || errno != ENOENT) {
|
||||
if (!enoent_success_err || ( errno != ENOENT && errno != ESTALE )) {
|
||||
perror("open_by_handle_at");
|
||||
return 1;
|
||||
}
|
||||
|
||||
71
tests/src/o_tmpfile_linkat.c
Normal file
71
tests/src/o_tmpfile_linkat.c
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
|
||||
static void linkat_tmpfile(char *dir, char *lpath)
|
||||
{
|
||||
char proc_self[PATH_MAX];
|
||||
int ret;
|
||||
int fd;
|
||||
|
||||
fd = open(dir, O_RDWR | O_TMPFILE, 0777);
|
||||
if (fd < 0) {
|
||||
perror("open(O_TMPFILE)");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
snprintf(proc_self, sizeof(proc_self), "/proc/self/fd/%d", fd);
|
||||
|
||||
ret = linkat(AT_FDCWD, proc_self, AT_FDCWD, lpath, AT_SYMLINK_FOLLOW);
|
||||
if (ret < 0) {
|
||||
perror("linkat");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use O_TMPFILE and linkat to create a new visible file, used to test
|
||||
* the O_TMPFILE creation path by inspecting the created file.
|
||||
*/
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char *lpath;
|
||||
char *dir;
|
||||
|
||||
if (argc < 3) {
|
||||
printf("%s <open_dir> <linkat_path>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
dir = argv[1];
|
||||
lpath = argv[2];
|
||||
|
||||
linkat_tmpfile(dir, lpath);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -11,8 +11,13 @@ FILE="$T_D0/file"
|
||||
# final block as we truncated past it.
|
||||
#
|
||||
echo "== truncate writes zeroed partial end of file block"
|
||||
yes | dd of="$FILE" bs=8K count=1 status=none
|
||||
yes | dd of="$FILE" bs=8K count=1 status=none iflag=fullblock
|
||||
sync
|
||||
|
||||
# not passing iflag=fullblock causes the file occasionally to just be
|
||||
# 4K, so just to be safe we should at least check size once
|
||||
test `stat --printf="%s\n" "$FILE"` -eq 8192 || t_fail "test file incorrect start size"
|
||||
|
||||
truncate -s 6K "$FILE"
|
||||
truncate -s 12K "$FILE"
|
||||
echo 3 > /proc/sys/vm/drop_caches
|
||||
|
||||
@@ -7,9 +7,11 @@ t_require_mounts 2
|
||||
|
||||
COUNT=50000
|
||||
|
||||
# Prep dirs for test. Each mount needs to make their own parent dir for
|
||||
# the createmany run, otherwise both dirs will end up in the same inode
|
||||
# group, causing updates to bounce that lock around.
|
||||
#
|
||||
# Prep dirs for test. We have per-directory inode number allocators so
|
||||
# by putting each createmany in a per-mount dir they get their own inode
|
||||
# number region and cluster locks.
|
||||
#
|
||||
echo "== measure initial createmany"
|
||||
mkdir -p $T_D0/dir/0
|
||||
mkdir $T_D1/dir/1
|
||||
@@ -17,18 +19,20 @@ mkdir $T_D1/dir/1
|
||||
echo "== measure initial createmany"
|
||||
START=$SECONDS
|
||||
createmany -o "$T_D0/file_" $COUNT >> $T_TMP.full
|
||||
sync
|
||||
SINGLE=$((SECONDS - START))
|
||||
echo single $SINGLE >> $T_TMP.full
|
||||
|
||||
echo "== measure two concurrent createmany runs"
|
||||
START=$SECONDS
|
||||
createmany -o $T_D0/dir/0/file $COUNT > /dev/null &
|
||||
(cd $T_D0/dir/0; createmany -o ./file_ $COUNT > /dev/null) &
|
||||
pids="$!"
|
||||
createmany -o $T_D1/dir/1/file $COUNT > /dev/null &
|
||||
(cd $T_D1/dir/1; createmany -o ./file_ $COUNT > /dev/null) &
|
||||
pids="$pids $!"
|
||||
for p in $pids; do
|
||||
wait $p
|
||||
done
|
||||
sync
|
||||
BOTH=$((SECONDS - START))
|
||||
echo both $BOTH >> $T_TMP.full
|
||||
|
||||
@@ -41,7 +45,10 @@ echo both $BOTH >> $T_TMP.full
|
||||
# synchronized operation.
|
||||
FACTOR=200
|
||||
if [ "$BOTH" -gt $(($SINGLE*$FACTOR)) ]; then
|
||||
echo "both createmany took $BOTH sec, more than $FACTOR x single $SINGLE sec"
|
||||
t_fail "both createmany took $BOTH sec, more than $FACTOR x single $SINGLE sec"
|
||||
fi
|
||||
|
||||
echo "== cleanup"
|
||||
find $T_D0/dir -delete
|
||||
|
||||
t_pass
|
||||
|
||||
@@ -95,7 +95,7 @@ print_logical_extents()
|
||||
}
|
||||
print $2, $6, flags
|
||||
}
|
||||
'
|
||||
' | sed 's/last,eof/eof/'
|
||||
}
|
||||
|
||||
t_save_all_sysfs_mount_options data_prealloc_blocks
|
||||
|
||||
@@ -7,14 +7,11 @@ t_require_mounts 2
|
||||
|
||||
#
|
||||
# Make sure that all mounts can read the results of a write from each
|
||||
# mount. And make sure that the greatest of all the written seqs is
|
||||
# visible after the writes were commited by remote reads.
|
||||
# mount.
|
||||
#
|
||||
check_read_write()
|
||||
{
|
||||
local expected
|
||||
local greatest=0
|
||||
local seq
|
||||
local path
|
||||
local saw
|
||||
local w
|
||||
@@ -25,11 +22,6 @@ check_read_write()
|
||||
eval path="\$T_D${w}/written"
|
||||
echo "$expected" > "$path"
|
||||
|
||||
seq=$(scoutfs stat -s meta_seq $path)
|
||||
if [ "$seq" -gt "$greatest" ]; then
|
||||
greatest=$seq
|
||||
fi
|
||||
|
||||
for r in $(t_fs_nrs); do
|
||||
eval path="\$T_D${r}/written"
|
||||
saw=$(cat "$path")
|
||||
@@ -38,11 +30,6 @@ check_read_write()
|
||||
fi
|
||||
done
|
||||
done
|
||||
|
||||
seq=$(scoutfs statfs -s committed_seq -p $T_D0)
|
||||
if [ "$seq" -lt "$greatest" ]; then
|
||||
echo "committed_seq $seq less than greatest $greatest"
|
||||
fi
|
||||
}
|
||||
|
||||
# verify that fenced ran our testing fence script
|
||||
|
||||
@@ -72,7 +72,7 @@ check_ino_index "$ino" "$dseq" "$T_M0"
|
||||
check_ino_index "$ino" "$dseq" "$T_M1"
|
||||
exec {FD}>&- # close
|
||||
# we know that revalidating will unhash the remote dentry
|
||||
stat "$T_D0/file" 2>&1 | t_filter_fs
|
||||
stat "$T_D0/file" 2>&1 | sed 's/cannot statx/cannot stat/' | t_filter_fs
|
||||
check_ino_index "$ino" "$dseq" "$T_M0"
|
||||
check_ino_index "$ino" "$dseq" "$T_M1"
|
||||
|
||||
|
||||
@@ -10,6 +10,30 @@ EXTENTS_PER_BTREE_BLOCK=600
|
||||
EXTENTS_PER_LIST_BLOCK=8192
|
||||
FREED_EXTENTS=$((EXTENTS_PER_BTREE_BLOCK * EXTENTS_PER_LIST_BLOCK))
|
||||
|
||||
#
|
||||
# This test specifically creates a pathologically sparse file that will
|
||||
# be as expensive as possible to free. This is usually fine on
|
||||
# dedicated or reasonable hardware, but trying to run this in
|
||||
# virtualized debug kernels can take a very long time. This test is
|
||||
# about making sure that the server doesn't fail, not that the platform
|
||||
# can handle the scale of work that our btree formats happen to require
|
||||
# while execution is bogged down with use-after-free memory reference
|
||||
# tracking. So we give the test a lot more breathing room before
|
||||
# deciding that its hung.
|
||||
#
|
||||
echo "== setting longer hung task timeout"
|
||||
if [ -w /proc/sys/kernel/hung_task_timeout_secs ]; then
|
||||
secs=$(cat /proc/sys/kernel/hung_task_timeout_secs)
|
||||
test "$secs" -gt 0 || \
|
||||
t_fail "confusing value '$secs' from /proc/sys/kernel/hung_task_timeout_secs"
|
||||
restore_hung_task_timeout()
|
||||
{
|
||||
echo "$secs" > /proc/sys/kernel/hung_task_timeout_secs
|
||||
}
|
||||
trap restore_hung_task_timeout EXIT
|
||||
echo "$((secs * 5))" > /proc/sys/kernel/hung_task_timeout_secs
|
||||
fi
|
||||
|
||||
echo "== creating fragmented extents"
|
||||
fragmented_data_extents $FREED_EXTENTS $EXTENTS_PER_BTREE_BLOCK "$T_D0/alloc" "$T_D0/move"
|
||||
|
||||
|
||||
40
tests/tests/projects.sh
Normal file
40
tests/tests/projects.sh
Normal file
@@ -0,0 +1,40 @@
|
||||
|
||||
echo "== default new files don't have project"
|
||||
touch "$T_D0/file"
|
||||
scoutfs project-id -g "$T_D0/file"
|
||||
|
||||
echo "== set new project on files and dirs"
|
||||
mkdir "$T_D0/dir"
|
||||
scoutfs project-id -s 1 "$T_D0/file" "$T_D0/dir"
|
||||
scoutfs project-id -g "$T_D0/file" "$T_D0/dir"
|
||||
|
||||
echo "== can use interesting IDs"
|
||||
touch "$T_D0/ids"
|
||||
for id in 0x7FFFFFFF 0x80000000 0xFFFFFFFF \
|
||||
0x7FFFFFFFFFFFFFFF 0x8000000000000000 0xFFFFFFFFFFFFFFFF; do
|
||||
scoutfs project-id -s $id "$T_D0/ids"
|
||||
scoutfs project-id -g "$T_D0/ids"
|
||||
done
|
||||
|
||||
echo "== created files and dirs inherit project id"
|
||||
touch "$T_D0/dir/file"
|
||||
mkdir "$T_D0/dir/sub"
|
||||
scoutfs project-id -g "$T_D0/dir/file" "$T_D0/dir/sub"
|
||||
|
||||
echo "== inheritance continues"
|
||||
mkdir "$T_D0/dir/sub/more"
|
||||
scoutfs project-id -g "$T_D0/dir/sub/more"
|
||||
|
||||
# .. just inherits 0 :)
|
||||
echo "== clearing project id stops inheritance"
|
||||
scoutfs project-id -s 0 "$T_D0/dir"
|
||||
touch "$T_D0/dir/another-file"
|
||||
mkdir "$T_D0/dir/another-sub"
|
||||
scoutfs project-id -g "$T_D0/dir/another-file" "$T_D0/dir/another-sub"
|
||||
|
||||
echo "== o_tmpfile creations inherit dir"
|
||||
scoutfs project-id -s 1 "$T_D0/dir"
|
||||
o_tmpfile_linkat "$T_D0/dir" "$T_D0/dir/tmpfile"
|
||||
scoutfs project-id -g "$T_D0/dir/tmpfile"
|
||||
|
||||
t_pass
|
||||
150
tests/tests/quota.sh
Normal file
150
tests/tests/quota.sh
Normal file
@@ -0,0 +1,150 @@
|
||||
|
||||
TEST_UID=22222
|
||||
TEST_GID=44444
|
||||
|
||||
# sys_setreuid() set fs[uid] to e[ug]id
|
||||
SET_UID="--ruid=$TEST_UID --euid=$TEST_UID"
|
||||
SET_GID="--rgid=$TEST_GID --egid=$TEST_GID --clear-groups"
|
||||
|
||||
FILE="$T_D0/dir/file"
|
||||
|
||||
sync_and_drop()
|
||||
{
|
||||
sync
|
||||
echo 1 > $(t_debugfs_path)/drop_weak_item_cache
|
||||
echo 1 > $(t_debugfs_path)/drop_quota_check_cache
|
||||
}
|
||||
|
||||
reset_all()
|
||||
{
|
||||
rm -f "$FILE"
|
||||
scoutfs quota-wipe -p "$T_M0"
|
||||
getfattr --absolute-names -d -m - "$T_D0" | \
|
||||
grep "^scoutfs.totl." | \
|
||||
cut -d '=' -f 1 | \
|
||||
xargs -n 1 -I'{}' setfattr -x '{}' "$T_D0"
|
||||
}
|
||||
|
||||
echo "== prepare dir with write perm for test ids"
|
||||
mkdir "$T_D0/dir"
|
||||
chown --quiet $TEST_UID "$T_D0/dir"
|
||||
chgrp --quiet $TEST_GID "$T_D0/dir"
|
||||
|
||||
echo "== test assumes starting with no rules, empty list"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
|
||||
echo "== add rule"
|
||||
scoutfs quota-add -p "$T_M0" -r "7 13,L,- 15,L,- 17,L,- I 33 -"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
|
||||
echo "== list is empty again after delete"
|
||||
scoutfs quota-del -p "$T_M0" -r "7 13,L,- 15,L,- 17,L,- I 33 -"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
|
||||
echo "== can change limits without deleting"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 100 -"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 101 -"
|
||||
scoutfs quota-del -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 100 -"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 99 -"
|
||||
scoutfs quota-del -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 101 -"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
scoutfs quota-del -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 99 -"
|
||||
reset_all
|
||||
|
||||
echo "== wipe and restore rules in bulk"
|
||||
for a in $(seq 10 15); do
|
||||
scoutfs quota-add -p "$T_M0" -r "7 $a,L,- 0,L,- 0,L,- I 33 -"
|
||||
done
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
scoutfs quota-list -p "$T_M0" > "$T_TMP.list"
|
||||
scoutfs quota-wipe -p "$T_M0"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
scoutfs quota-restore -p "$T_M0" < "$T_TMP.list"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
reset_all
|
||||
|
||||
echo "== default rule prevents file creation"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.1.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== decreasing totl allows file creation again"
|
||||
setfattr -x scoutfs.totl.test.1.1.1 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE"
|
||||
reset_all
|
||||
|
||||
echo "== attr selecting rules prevent creation"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S 1,L,- 1,L,- I 1 -"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_GID,G,S 1,L,- 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.1.1 -v 2 "$T_D0"
|
||||
setfattr -n scoutfs.totl.test.$TEST_GID.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE" 2>&1 | t_filter_fs
|
||||
setpriv $SET_GID touch "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== multi attr selecting doesn't prevent partial"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S $TEST_GID,G,S 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.$TEST_GID.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE"
|
||||
rm -f "$FILE"
|
||||
setpriv $SET_GID touch "$FILE"
|
||||
rm -f "$FILE"
|
||||
setpriv $SET_UID $SET_GID touch "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== op differentiates"
|
||||
# inode ops succeed in presence of data rule
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S 1,L,- 1,L,- D 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
# data ops succeed in presence of inode rule
|
||||
touch "$FILE"
|
||||
chown --quiet $TEST_UID "$FILE"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S 1,L,- 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID fallocate -l 4096 "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== higher priority rule applies"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S 1,L,- 1,L,- I 1000 -"
|
||||
scoutfs quota-add -p "$T_M0" -r "2 $TEST_UID,U,S 1,L,- 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== data rules with total and count prevent write and fallocate"
|
||||
touch "$FILE"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- D 1 -"
|
||||
setfattr -n scoutfs.totl.test.1.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
dd if=/dev/zero of="$FILE" bs=4096 count=1 conv=notrunc status=none 2>&1 | t_filter_fs
|
||||
fallocate -l 4096 "$FILE" 2>&1 | t_filter_fs
|
||||
scoutfs quota-del -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- D 1 -"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- D 0 C"
|
||||
sync_and_drop
|
||||
dd if=/dev/zero of="$FILE" bs=4096 count=1 conv=notrunc status=none 2>&1 | t_filter_fs
|
||||
fallocate -l 4096 "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== added rules work after bulk restore"
|
||||
seq -f " 1 %.0f,U,S 1,L,- 1,L,- I 1 -" 9000050000 -1 9000000000 > "$T_TMP.lots"
|
||||
scoutfs quota-restore -p "$T_M0" < "$T_TMP.lots"
|
||||
scoutfs quota-list -p "$T_M0" > "$T_TMP.list"
|
||||
diff -u "$T_TMP.lots" "$T_TMP.list"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S 1,L,- 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
t_pass
|
||||
@@ -2,6 +2,8 @@
|
||||
# Some basic tests of online resizing metadata and data devices.
|
||||
#
|
||||
|
||||
t_require_commands bc
|
||||
|
||||
statfs_total() {
|
||||
local single="total_$1_blocks"
|
||||
local mnt="$2"
|
||||
|
||||
@@ -55,10 +55,17 @@ scoutfs setattr -t 67305985.999999999 -V 1 -s 1 "$FILE" 2>&1 | t_filter_fs
|
||||
TZ=GMT stat -c "%z" "$FILE"
|
||||
rm "$FILE"
|
||||
|
||||
#
|
||||
# With e2fsprogs-v1.42.10-10-g29758d2f, the output of filefrag 'flags' changes
|
||||
# significantly. First, the _LAST flag is now output. Second, the 'unknown'
|
||||
# flag is now printed out as 'unknown_loc'. To compensate for this, we check
|
||||
# and replace the "correct" output for new versions here with the expected
|
||||
# value.
|
||||
#
|
||||
echo "== large offline extents are created"
|
||||
touch "$FILE"
|
||||
scoutfs setattr -V 1 -o -s $((10007 * 4096)) "$FILE" 2>&1 | t_filter_fs
|
||||
filefrag -v -b4096 "$FILE" 2>&1 | t_filter_fs
|
||||
filefrag -v -b4096 "$FILE" 2>&1 | sed 's/last,unknown_loc,eof$/unknown,eof/' | t_filter_fs
|
||||
rm "$FILE"
|
||||
|
||||
# had a bug where we were creating extents that were too long
|
||||
|
||||
@@ -27,15 +27,9 @@ test_xattr_lengths() {
|
||||
echo "key len $name_len val len $val_len" >> "$T_TMP.log"
|
||||
setfattr -n $name -v \"$val\" "$FILE"
|
||||
|
||||
# grep has trouble with enormous args? so we dump the
|
||||
# name=value to a file and compare with a known good file
|
||||
getfattr -d --absolute-names "$FILE" | grep "$name" > "$T_TMP.got"
|
||||
getfattr -d --only-values --absolute-names "$FILE" -n "$name" > "$T_TMP.got"
|
||||
echo -n "$val" > "$T_TMP.good"
|
||||
|
||||
if [ $val_len == 0 ]; then
|
||||
echo "$name" > "$T_TMP.good"
|
||||
else
|
||||
echo "$name=\"$val\"" > "$T_TMP.good"
|
||||
fi
|
||||
cmp "$T_TMP.good" "$T_TMP.got" || \
|
||||
t_fail "cmp failed name len $name_len val len $val_len"
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ LOG=340000
|
||||
LIM=1000000
|
||||
|
||||
SEQF="%.20g"
|
||||
SXA="scoutfs.srch.test-srch-basic-functionality"
|
||||
|
||||
t_require_commands touch rm setfattr scoutfs find_xattrs
|
||||
|
||||
@@ -27,20 +28,20 @@ diff_srch_find()
|
||||
|
||||
echo "== create new xattrs"
|
||||
touch "$T_D0/"{create,update}
|
||||
setfattr -n scoutfs.srch.test -v 1 "$T_D0/"{create,update} 2>&1 | t_filter_fs
|
||||
diff_srch_find scoutfs.srch.test
|
||||
setfattr -n $SXA -v 1 "$T_D0/"{create,update} 2>&1 | t_filter_fs
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== update existing xattr"
|
||||
setfattr -n scoutfs.srch.test -v 2 "$T_D0/update" 2>&1 | t_filter_fs
|
||||
diff_srch_find scoutfs.srch.test
|
||||
setfattr -n $SXA -v 2 "$T_D0/update" 2>&1 | t_filter_fs
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== remove an xattr"
|
||||
setfattr -x scoutfs.srch.test "$T_D0/create" 2>&1 | t_filter_fs
|
||||
diff_srch_find scoutfs.srch.test
|
||||
setfattr -x $SXA "$T_D0/create" 2>&1 | t_filter_fs
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== remove xattr with files"
|
||||
rm -f "$T_D0/"{create,update}
|
||||
diff_srch_find scoutfs.srch.test
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== trigger small log merges by rotating single block with unmount"
|
||||
sv=$(t_server_nr)
|
||||
@@ -56,7 +57,7 @@ while [ "$i" -lt "8" ]; do
|
||||
|
||||
eval path="\$T_D${nr}/single-block-$i"
|
||||
touch "$path"
|
||||
setfattr -n scoutfs.srch.single-block-logs -v $i "$path"
|
||||
setfattr -n $SXA -v $i "$path"
|
||||
t_umount $nr
|
||||
t_mount $nr
|
||||
|
||||
@@ -65,51 +66,51 @@ while [ "$i" -lt "8" ]; do
|
||||
done
|
||||
# wait for srch compaction worker delay
|
||||
sleep 10
|
||||
rm -rf "$T_D0/single-block-*"
|
||||
find "$T_D0" -type f -name 'single-block-*' -delete
|
||||
|
||||
echo "== create entries in current log"
|
||||
DIR="$T_D0/dir"
|
||||
NR=$((LOG / 4))
|
||||
mkdir -p "$DIR"
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -S -d "$DIR" > /dev/null
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -X $SXA -d "$DIR" > /dev/null
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== delete small fraction"
|
||||
seq -f "$DIR/f-$SEQF" 1 7 $NR | xargs setfattr -x scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "$DIR/f-$SEQF" 1 7 $NR | xargs setfattr -x $SXA
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== remove files"
|
||||
rm -rf "$DIR"
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== create entries that exceed one log"
|
||||
NR=$((LOG * 3 / 2))
|
||||
mkdir -p "$DIR"
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -S -d "$DIR" > /dev/null
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -X $SXA -d "$DIR" > /dev/null
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== delete fractions in phases"
|
||||
for i in $(seq 1 3); do
|
||||
seq -f "$DIR/f-$SEQF" $i 3 $NR | xargs setfattr -x scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "$DIR/f-$SEQF" $i 3 $NR | xargs setfattr -x $SXA
|
||||
diff_srch_find $SXA
|
||||
done
|
||||
|
||||
echo "== remove files"
|
||||
rm -rf "$DIR"
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== create entries for exceed search entry limit"
|
||||
NR=$((LIM * 3 / 2))
|
||||
mkdir -p "$DIR"
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -S -d "$DIR" > /dev/null
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -X $SXA -d "$DIR" > /dev/null
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== delete half"
|
||||
seq -f "$DIR/f-$SEQF" 1 2 $NR | xargs setfattr -x scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "$DIR/f-$SEQF" 1 2 $NR | xargs setfattr -x $SXA
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== entirely remove third batch"
|
||||
rm -rf "$DIR"
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find $SXA
|
||||
|
||||
t_pass
|
||||
|
||||
90
tests/tests/srch-safe-merge-pos.sh
Normal file
90
tests/tests/srch-safe-merge-pos.sh
Normal file
@@ -0,0 +1,90 @@
|
||||
#
|
||||
# There was a bug where srch file compaction could get stuck if a
|
||||
# partial compaction finished at the specific _SAFE_BYTES offset in a
|
||||
# block. Resuming from that position would return an error and
|
||||
# compaction would stop making forward progress.
|
||||
#
|
||||
# We use triggers to pad the output of log compaction to end on the safe
|
||||
# offset and then cause compaction of those padded inputs to stop at the
|
||||
# safe offset. Continuation will either succeed or return errors.
|
||||
#
|
||||
|
||||
# forcing rotation, so just a few
|
||||
NR=10
|
||||
SEQF="%.20g"
|
||||
COMPACT_NR=4
|
||||
|
||||
echo "== initialize per-mount values"
|
||||
declare -a err
|
||||
declare -a compact_delay
|
||||
for nr in $(t_fs_nrs); do
|
||||
err[$nr]=$(t_counter srch_compact_error $nr)
|
||||
compact_delay[$nr]=$(cat $(t_sysfs_path $nr)/srch/compact_delay_ms)
|
||||
done
|
||||
restore_compact_delay()
|
||||
{
|
||||
for nr in $(t_fs_nrs); do
|
||||
echo ${compact_delay[$nr]} > $(t_sysfs_path $nr)/srch/compact_delay_ms
|
||||
done
|
||||
}
|
||||
trap restore_compact_delay EXIT
|
||||
|
||||
echo "== arm compaction triggers"
|
||||
for nr in $(t_fs_nrs); do
|
||||
t_trigger_arm srch_compact_logs_pad_safe $nr
|
||||
t_trigger_arm srch_merge_stop_safe $nr
|
||||
done
|
||||
|
||||
echo "== compact more often"
|
||||
for nr in $(t_fs_nrs); do
|
||||
echo 1000 > $(t_sysfs_path $nr)/srch/compact_delay_ms
|
||||
done
|
||||
|
||||
echo "== create padded sorted inputs by forcing log rotation"
|
||||
sv=$(t_server_nr)
|
||||
for i in $(seq 1 $COMPACT_NR); do
|
||||
for j in $(seq 1 $COMPACT_NR); do
|
||||
t_trigger_arm srch_force_log_rotate $sv
|
||||
|
||||
seq -f "f-$i-$j-$SEQF" 1 10 | \
|
||||
bulk_create_paths -X "scoutfs.srch.t-srch-safe-merge-pos" -d "$T_D0" > \
|
||||
/dev/null
|
||||
sync
|
||||
|
||||
test "$(t_trigger_get srch_force_log_rotate $sv)" == "0" || \
|
||||
t_fail "srch_force_log_rotate didn't trigger"
|
||||
done
|
||||
|
||||
padded=0
|
||||
while test $padded == 0 && sleep .5; do
|
||||
for nr in $(t_fs_nrs); do
|
||||
if [ "$(t_trigger_get srch_compact_logs_pad_safe $nr)" == "0" ]; then
|
||||
t_trigger_arm srch_compact_logs_pad_safe $nr
|
||||
padded=1
|
||||
break
|
||||
fi
|
||||
test "$(t_counter srch_compact_error $nr)" == "${err[$nr]}" || \
|
||||
t_fail "srch_compact_error counter increased on mount $nr"
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
echo "== compaction of padded should stop at safe"
|
||||
sleep 2
|
||||
for nr in $(t_fs_nrs); do
|
||||
if [ "$(t_trigger_get srch_merge_stop_safe $nr)" == "0" ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
echo "== verify no compaction errors"
|
||||
sleep 2
|
||||
for nr in $(t_fs_nrs); do
|
||||
test "$(t_counter srch_compact_error $nr)" == "${err[$nr]}" || \
|
||||
t_fail "srch_compact_error counter increased on mount $nr"
|
||||
done
|
||||
|
||||
echo "== cleanup"
|
||||
find "$T_D0" -type f -name 'f-*' -delete
|
||||
|
||||
t_pass
|
||||
@@ -3,6 +3,7 @@ t_require_commands touch rm setfattr scoutfs find_xattrs
|
||||
read_xattr_totals()
|
||||
{
|
||||
sync
|
||||
echo 1 > $(t_debugfs_path)/drop_weak_item_cache
|
||||
scoutfs read-xattr-totals -p "$T_M0"
|
||||
}
|
||||
|
||||
@@ -112,7 +113,6 @@ for phase in create update remove; do
|
||||
echo "$k.0.0 = ${totals[$k]}, ${counts[$k]}"
|
||||
done ) | grep -v "= 0, 0$" | sort -n >> $T_TMP.check_arr
|
||||
|
||||
sync
|
||||
read_xattr_totals | sort -n >> $T_TMP.check_read
|
||||
|
||||
diff -u $T_TMP.check_arr $T_TMP.check_read || \
|
||||
|
||||
@@ -75,6 +75,7 @@ generic/215 # mmap missing
|
||||
generic/246 # mmap missing
|
||||
generic/247 # mmap missing
|
||||
generic/248 # mmap missing
|
||||
generic/318 # can't support user namespaces until v5.11
|
||||
generic/321 # requires selinux enabled for '+' in ls?
|
||||
generic/325 # mmap missing
|
||||
generic/338 # BUG_ON update inode error handling
|
||||
|
||||
@@ -55,6 +55,19 @@ with initial sparse regions (perhaps by multiple threads writing to
|
||||
different regions) and wasted space isn't an issue (perhaps because the
|
||||
file population contains few small files).
|
||||
.TP
|
||||
.B log_merge_wait_timeout_ms=<number>
|
||||
This option sets the amount of time, in milliseconds, that log merge
|
||||
creation can wait before timing out. This setting is per-mount, only
|
||||
changes the behavior of that mount, and only affects the server when it
|
||||
is running in that mount.
|
||||
.sp
|
||||
This determines how long it may take for mounts to synchronize
|
||||
committing their log trees to create a log merge operation. Setting it
|
||||
too high can create long latencies in the event that a mount takes a
|
||||
long time to commit their log. Setting it too low can result in the
|
||||
creation of excessive numbers of log trees that are never merged. The
|
||||
default is 500 and it can not be less than 100 nor greater than 60000.
|
||||
.TP
|
||||
.B metadev_path=<device>
|
||||
The metadev_path option specifies the path to the block device that
|
||||
contains the filesystem's metadata.
|
||||
|
||||
@@ -61,7 +61,7 @@ install -m 644 -D fenced/scoutfs-fenced.conf.example $RPM_BUILD_ROOT%{_sysconfdi
|
||||
%files
|
||||
%defattr(644,root,root,755)
|
||||
%{_mandir}/man*/scoutfs*.gz
|
||||
%{_unitdir}/scoutfs-fenced.service
|
||||
/%{_unitdir}/scoutfs-fenced.service
|
||||
%{_sysconfdir}/scoutfs
|
||||
%defattr(755,root,root,755)
|
||||
%{_sbindir}/scoutfs
|
||||
|
||||
@@ -96,7 +96,7 @@ static int do_change_fmt_vers(struct change_fmt_vers_args *args)
|
||||
|
||||
if (le64_to_cpu(meta_super->fmt_vers) < SCOUTFS_FORMAT_VERSION_MIN ||
|
||||
le64_to_cpu(meta_super->fmt_vers) > SCOUTFS_FORMAT_VERSION_MAX) {
|
||||
fprintf(stderr, "meta super block has format version %llu outside of supported version range %u-%u",
|
||||
fprintf(stderr, "meta super block has format version %llu outside of supported version range %llu-%llu",
|
||||
le64_to_cpu(meta_super->fmt_vers), SCOUTFS_FORMAT_VERSION_MIN,
|
||||
SCOUTFS_FORMAT_VERSION_MAX);
|
||||
ret = -EINVAL;
|
||||
@@ -105,7 +105,7 @@ static int do_change_fmt_vers(struct change_fmt_vers_args *args)
|
||||
|
||||
if (le64_to_cpu(data_super->fmt_vers) < SCOUTFS_FORMAT_VERSION_MIN ||
|
||||
le64_to_cpu(data_super->fmt_vers) > SCOUTFS_FORMAT_VERSION_MAX) {
|
||||
fprintf(stderr, "data super block has format version %llu outside of supported version range %u-%u",
|
||||
fprintf(stderr, "data super block has format version %llu outside of supported version range %llu-%llu",
|
||||
le64_to_cpu(data_super->fmt_vers), SCOUTFS_FORMAT_VERSION_MIN,
|
||||
SCOUTFS_FORMAT_VERSION_MAX);
|
||||
ret = -EINVAL;
|
||||
@@ -186,7 +186,7 @@ static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
return ret;
|
||||
if (args->fmt_vers < SCOUTFS_FORMAT_VERSION_MIN ||
|
||||
args->fmt_vers > SCOUTFS_FORMAT_VERSION_MAX)
|
||||
argp_error(state, "format-version %llu is outside supported range of %u-%u",
|
||||
argp_error(state, "format-version %llu is outside supported range of %llu-%llu",
|
||||
args->fmt_vers, SCOUTFS_FORMAT_VERSION_MIN,
|
||||
SCOUTFS_FORMAT_VERSION_MAX);
|
||||
break;
|
||||
|
||||
@@ -70,7 +70,7 @@ static void usage(void)
|
||||
fprintf(stderr, "Selected fs defaults to current working directory.\n");
|
||||
fprintf(stderr, "See <command> --help for more details.\n");
|
||||
|
||||
fprintf(stderr, "\nSupported format version: %u-%u\n",
|
||||
fprintf(stderr, "\nSupported format version: %llu-%llu\n",
|
||||
SCOUTFS_FORMAT_VERSION_MIN, SCOUTFS_FORMAT_VERSION_MAX);
|
||||
|
||||
fprintf(stderr, "\nCore admin:\n");
|
||||
|
||||
@@ -386,6 +386,10 @@ static int do_mkfs(struct mkfs_args *args)
|
||||
print_quorum_slots(super->qconf.slots, array_size(super->qconf.slots),
|
||||
" ");
|
||||
|
||||
if (SCOUTFS_FORMAT_VERSION_MIN & SCOUTFS_FORMAT_VER_PREREL)
|
||||
printf("This volume was created with the incompatible pre-release format version 0x%016llx. This volume will only be mountable by pre-release builds with this specific matching format version.\n",
|
||||
SCOUTFS_FORMAT_VERSION_MIN);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (super)
|
||||
@@ -456,7 +460,7 @@ static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
return ret;
|
||||
if (args->fmt_vers < SCOUTFS_FORMAT_VERSION_MIN ||
|
||||
args->fmt_vers > SCOUTFS_FORMAT_VERSION_MAX)
|
||||
argp_error(state, "format-version %llu is outside supported range of %u-%u",
|
||||
argp_error(state, "format-version %llu is outside supported range of %llu-%llu",
|
||||
args->fmt_vers, SCOUTFS_FORMAT_VERSION_MIN,
|
||||
SCOUTFS_FORMAT_VERSION_MAX);
|
||||
break;
|
||||
|
||||
@@ -49,7 +49,7 @@ static void print_inode(struct scoutfs_key *key, void *val, int val_len)
|
||||
{
|
||||
struct scoutfs_inode *inode = val;
|
||||
|
||||
printf(" inode: ino %llu size %llu version %llu nlink %u\n"
|
||||
printf(" inode: ino %llu size %llu version %llu proj %llu nlink %u\n"
|
||||
" uid %u gid %u mode 0%o rdev 0x%x flags 0x%x\n"
|
||||
" next_readdir_pos %llu meta_seq %llu data_seq %llu data_version %llu\n"
|
||||
" atime %llu.%08u ctime %llu.%08u\n"
|
||||
@@ -57,6 +57,7 @@ static void print_inode(struct scoutfs_key *key, void *val, int val_len)
|
||||
le64_to_cpu(key->ski_ino),
|
||||
le64_to_cpu(inode->size),
|
||||
le64_to_cpu(inode->version),
|
||||
le64_to_cpu(inode->proj),
|
||||
le32_to_cpu(inode->nlink), le32_to_cpu(inode->uid),
|
||||
le32_to_cpu(inode->gid), le32_to_cpu(inode->mode),
|
||||
le32_to_cpu(inode->rdev),
|
||||
@@ -79,6 +80,24 @@ static void print_orphan(struct scoutfs_key *key, void *val, int val_len)
|
||||
}
|
||||
|
||||
|
||||
#define SQR_FMT "[%u %llu,%u,%x %llu,%u,%x %llu,%u,%x %u %llu %x]"
|
||||
|
||||
#define SQR_ARGS(r) \
|
||||
(r)->prio, \
|
||||
le64_to_cpu((r)->name_val[0]), (r)->name_source[0], (r)->name_flags[0], \
|
||||
le64_to_cpu((r)->name_val[1]), (r)->name_source[1], (r)->name_flags[1], \
|
||||
le64_to_cpu((r)->name_val[2]), (r)->name_source[2], (r)->name_flags[2], \
|
||||
(r)->op, le64_to_cpu((r)->limit), (r)->rule_flags
|
||||
|
||||
static void print_quota(struct scoutfs_key *key, void *val, int val_len)
|
||||
{
|
||||
struct scoutfs_quota_rule_val *rv = val;
|
||||
|
||||
printf(" quota rule: hash 0x%016llx coll_nr %llu\n"
|
||||
" "SQR_FMT"\n",
|
||||
le64_to_cpu(key->skqr_hash), le64_to_cpu(key->skqr_coll_nr), SQR_ARGS(rv));
|
||||
}
|
||||
|
||||
static void print_xattr_totl(struct scoutfs_key *key, void *val, int val_len)
|
||||
{
|
||||
struct scoutfs_xattr_totl_val *tval = val;
|
||||
@@ -89,6 +108,13 @@ static void print_xattr_totl(struct scoutfs_key *key, void *val, int val_len)
|
||||
le64_to_cpu(tval->count));
|
||||
}
|
||||
|
||||
static void print_xattr_indx(struct scoutfs_key *key, void *val, int val_len)
|
||||
{
|
||||
printf(" xattr indx: a %llu b %llu ino %llu",
|
||||
le64_to_cpu(key->skxi_a), le64_to_cpu(key->skxi_b),
|
||||
le64_to_cpu(key->skxi_ino));
|
||||
}
|
||||
|
||||
static u8 *global_printable_name(u8 *name, int name_len)
|
||||
{
|
||||
static u8 name_buf[SCOUTFS_NAME_LEN + 1];
|
||||
@@ -177,9 +203,15 @@ static print_func_t find_printer(u8 zone, u8 type)
|
||||
return print_orphan;
|
||||
}
|
||||
|
||||
if (zone == SCOUTFS_QUOTA_ZONE)
|
||||
return print_quota;
|
||||
|
||||
if (zone == SCOUTFS_XATTR_TOTL_ZONE)
|
||||
return print_xattr_totl;
|
||||
|
||||
if (zone == SCOUTFS_XATTR_INDX_ZONE)
|
||||
return print_xattr_indx;
|
||||
|
||||
if (zone == SCOUTFS_FS_ZONE) {
|
||||
switch(type) {
|
||||
case SCOUTFS_INODE_TYPE: return print_inode;
|
||||
|
||||
155
utils/src/projects.c
Normal file
155
utils/src/projects.c
Normal file
@@ -0,0 +1,155 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "parse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "ioctl.h"
|
||||
#include "cmd.h"
|
||||
#include "list.h"
|
||||
|
||||
struct str_head {
|
||||
struct list_head head;
|
||||
char str[0];
|
||||
};
|
||||
|
||||
struct proj_args {
|
||||
struct list_head paths;
|
||||
char *which;
|
||||
u64 proj;
|
||||
unsigned int cmd;
|
||||
bool have_proj;
|
||||
};
|
||||
|
||||
static bool single_entry(struct list_head *list)
|
||||
{
|
||||
return list->next->next == list;
|
||||
}
|
||||
|
||||
static int do_proj(struct proj_args *args)
|
||||
{
|
||||
struct str_head *shead;
|
||||
int fd = -1;
|
||||
int ret;
|
||||
|
||||
list_for_each_entry(shead, &args->paths, head) {
|
||||
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
fd = get_path(shead->str, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
ret = fd;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ioctl(fd, args->cmd, &args->proj);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "%s project ioctl failed: %s (%d)\n",
|
||||
args->which, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (args->cmd == SCOUTFS_IOC_GET_PROJECT_ID) {
|
||||
if (single_entry(&args->paths))
|
||||
printf("%llu\n", args->proj);
|
||||
else
|
||||
printf("%s: %llu\n", shead->str, args->proj);
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool add_strdup_head(struct list_head *list, char *str)
|
||||
{
|
||||
struct str_head *shead;
|
||||
size_t bytes;
|
||||
|
||||
bytes = strlen(str) + 1;
|
||||
shead = malloc(offsetof(struct str_head, str[bytes]));
|
||||
if (!shead)
|
||||
return false;
|
||||
|
||||
memcpy(shead->str, str, bytes);
|
||||
list_add_tail(&shead->head, list);
|
||||
return true;
|
||||
}
|
||||
|
||||
static int parse_proj_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct proj_args *args = state->input;
|
||||
int ret;
|
||||
|
||||
switch (key) {
|
||||
case 'g':
|
||||
args->cmd = SCOUTFS_IOC_GET_PROJECT_ID;
|
||||
args->which = "get";
|
||||
break;
|
||||
case 's':
|
||||
ret = parse_u64(arg, &args->proj);
|
||||
if (ret)
|
||||
argp_error(state, "error parsing project ID");
|
||||
args->cmd = SCOUTFS_IOC_SET_PROJECT_ID;
|
||||
args->which = "set";
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
if (!add_strdup_head(&args->paths, arg))
|
||||
argp_error(state, "error allocating memory for path");
|
||||
break;
|
||||
case ARGP_KEY_FINI:
|
||||
if (!args->cmd)
|
||||
argp_error(state, "must specify either -g (get) or -s (set)");
|
||||
if (list_empty(&args->paths))
|
||||
argp_error(state, "must final path arguments");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option proj_opts[] = {
|
||||
{ "get", 'g', NULL, 0, "Get and print existing project ID from inodes"},
|
||||
{ "set", 's', "ID", 0, "Set unsigned 64bit project ID on inodes (0 clears)"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp proj_argp = {
|
||||
proj_opts,
|
||||
parse_proj_opt,
|
||||
"",
|
||||
"Manipulate Project ID on inodes"
|
||||
};
|
||||
|
||||
static int proj_cmd(int argc, char **argv)
|
||||
{
|
||||
struct proj_args args = {
|
||||
.paths = LIST_HEAD_INIT(args.paths),
|
||||
.have_proj = false,
|
||||
};
|
||||
|
||||
return argp_parse(&proj_argp, argc, argv, 0, NULL, &args) ?:
|
||||
do_proj(&args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) proj_ctor(void)
|
||||
{
|
||||
cmd_register_argp("project-id", &proj_argp, GROUP_CORE, proj_cmd);
|
||||
}
|
||||
547
utils/src/quota.c
Normal file
547
utils/src/quota.c
Normal file
@@ -0,0 +1,547 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "parse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "ioctl.h"
|
||||
#include "cmd.h"
|
||||
#include "util.h"
|
||||
#include "key.h"
|
||||
|
||||
static char opc[] = {
|
||||
[SQ_OP_DATA] = 'D',
|
||||
[SQ_OP_INODE] = 'I',
|
||||
};
|
||||
|
||||
static char nsc[] = {
|
||||
[SQ_NS_LITERAL] = 'L',
|
||||
[SQ_NS_PROJ] = 'P',
|
||||
[SQ_NS_UID] = 'U',
|
||||
[SQ_NS_GID] = 'G',
|
||||
};
|
||||
|
||||
static void printf_rule(struct scoutfs_ioctl_quota_rule *irule)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* priority: [0-9]+ */
|
||||
printf("%3u ", irule->prio);
|
||||
|
||||
/* totl name: ([0-9]+,[LPUG-]+,[S-]+){3} */
|
||||
for (i = 0; i < array_size(irule->name_val); i++) {
|
||||
|
||||
printf("%llu,%c,%c ",
|
||||
irule->name_val[i],
|
||||
nsc[irule->name_source[i]],
|
||||
(irule->name_flags[i] & SQ_NF_SELECT) ? 'S' : '-');
|
||||
}
|
||||
|
||||
/* op: [ID], limit: [0-9]+, flags [C-] */
|
||||
printf("%c %llu %c\n",
|
||||
opc[irule->op], irule->limit, (irule->rule_flags & SQ_RF_TOTL_COUNT) ? 'C' : '-');
|
||||
}
|
||||
|
||||
static int parse_rule(struct scoutfs_ioctl_quota_rule *irule, char *str)
|
||||
{
|
||||
char ns[3];
|
||||
char nf[3];
|
||||
char rf;
|
||||
char op;
|
||||
int ret;
|
||||
int i;
|
||||
int j;
|
||||
|
||||
memset(irule, 0, sizeof(struct scoutfs_ioctl_quota_rule));
|
||||
|
||||
ret = sscanf(str, " %hhu %llu,%c,%c %llu,%c,%c %llu,%c,%c %c %llu %c",
|
||||
&irule->prio, &irule->name_val[0], &ns[0], &nf[0], &irule->name_val[1],
|
||||
&ns[1], &nf[1], &irule->name_val[2], &ns[2], &nf[2], &op, &irule->limit,
|
||||
&rf);
|
||||
if (ret != 13) {
|
||||
printf("invalid rule, missing fields: %s\n", str);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < array_size(irule->name_val); i++) {
|
||||
irule->name_source[i] = SQ_NS__NR;
|
||||
|
||||
for (j = 0; j < array_size(nsc); j++) {
|
||||
if (ns[i] == nsc[j]) {
|
||||
irule->name_source[i] = j;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (irule->name_source[i] == SQ_NS__NR) {
|
||||
printf("invalid name source '%c' in name #%u in rule:\n\t%s\n",
|
||||
ns[i], i + 1, str);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
irule->name_flags[i] = nf[i] == '-' ? 0 :
|
||||
nf[i] == 'S' ? SQ_NF_SELECT :
|
||||
SQ_NF__UNKNOWN;
|
||||
if (irule->name_flags[i] == SQ_NF__UNKNOWN) {
|
||||
printf("invalid name flags '%c' in name #%u in rule:\n\t%s\n",
|
||||
nf[i], i + 1, str);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
irule->op = SQ_NS__NR;
|
||||
for (i = 0; i < array_size(opc); i++) {
|
||||
if (op == opc[i]) {
|
||||
irule->op = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (irule->op == SQ_NS__NR) {
|
||||
printf("invalid op '%c' in rule:\n\t%s\n", op, str);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
irule->rule_flags = rf == '-' ? 0 : rf == 'C' ? SQ_RF_TOTL_COUNT : SQ_RF__UNKNOWN;
|
||||
if (irule->rule_flags == SQ_RF__UNKNOWN) {
|
||||
printf("invalid rule flags '%c' in rule:\n\t%s\n", rf, str);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------- */
|
||||
|
||||
struct mod_args {
|
||||
char *path;
|
||||
char *rule_str;
|
||||
bool is_add;
|
||||
};
|
||||
|
||||
static int do_mod(struct mod_args *args)
|
||||
{
|
||||
struct scoutfs_ioctl_quota_rule irule;
|
||||
unsigned int cmd;
|
||||
int fd = -1;
|
||||
int ret;
|
||||
|
||||
memset(&irule, 0, sizeof(irule));
|
||||
|
||||
ret = parse_rule(&irule, args->rule_str);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
fd = get_path(args->path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
cmd = args->is_add ? SCOUTFS_IOC_ADD_QUOTA_RULE : SCOUTFS_IOC_DEL_QUOTA_RULE;
|
||||
ret = ioctl(fd, cmd, &irule);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "MOD_QUOTA_RULE ioctl failed: %s (%d)\n",
|
||||
strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_mod_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct mod_args *args = state->input;
|
||||
|
||||
switch (key) {
|
||||
case 'p':
|
||||
args->path = strdup_or_error(state, arg);
|
||||
break;
|
||||
case 'r':
|
||||
args->rule_str = strdup_or_error(state, arg);
|
||||
break;
|
||||
case ARGP_KEY_FINI:
|
||||
if (!args->path)
|
||||
argp_error(state, "must provide file path");
|
||||
if (!args->rule_str)
|
||||
argp_error(state, "must provide rule string");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option add_options[] = {
|
||||
{ "path", 'p', "PATH", 0, "Path to ScoutFS filesystem"},
|
||||
{ "rule", 'r', "RULE_STRING", 0, "Rule string"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp add_argp = {
|
||||
add_options,
|
||||
parse_mod_opt,
|
||||
"",
|
||||
"Add quota rule"
|
||||
};
|
||||
|
||||
static int add_cmd(int argc, char **argv)
|
||||
{
|
||||
struct mod_args args = { .is_add = true, };
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&add_argp, argc, argv, 0, NULL, &args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_mod(&args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) add_ctor(void)
|
||||
{
|
||||
cmd_register_argp("quota-add", &add_argp, GROUP_CORE, add_cmd);
|
||||
}
|
||||
|
||||
static struct argp_option del_options[] = {
|
||||
{ "path", 'p', "PATH", 0, "Path to ScoutFS filesystem"},
|
||||
{ "rule", 'r', "RULE_STRING", 0, "Rule string"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp del_argp = {
|
||||
del_options,
|
||||
parse_mod_opt,
|
||||
"",
|
||||
"Delete quota rule"
|
||||
};
|
||||
|
||||
static int del_cmd(int argc, char **argv)
|
||||
{
|
||||
struct mod_args args = { .is_add = false };
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&del_argp, argc, argv, 0, NULL, &args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_mod(&args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) del_ctor(void)
|
||||
{
|
||||
cmd_register_argp("quota-del", &del_argp, GROUP_CORE, del_cmd);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------- */
|
||||
|
||||
struct bulk_args {
|
||||
char *path;
|
||||
bool unsorted;
|
||||
};
|
||||
|
||||
typedef int (*bulk_in_fn)(int fd, struct scoutfs_ioctl_quota_rule *irules, size_t nr,
|
||||
void *in_args);
|
||||
typedef int (*bulk_out_fn)(int fd, struct scoutfs_ioctl_quota_rule *irule, void *out_args);
|
||||
|
||||
static int cmp_irules(const struct scoutfs_ioctl_quota_rule *a,
|
||||
const struct scoutfs_ioctl_quota_rule *b)
|
||||
{
|
||||
return scoutfs_cmp(a->prio, b->prio) ?:
|
||||
scoutfs_cmp(a->name_val[0], b->name_val[0]) ?:
|
||||
scoutfs_cmp(a->name_source[0], b->name_source[0]) ?:
|
||||
scoutfs_cmp(a->name_flags[0], b->name_flags[0]) ?:
|
||||
scoutfs_cmp(a->name_val[1], b->name_val[1]) ?:
|
||||
scoutfs_cmp(a->name_source[1], b->name_source[1]) ?:
|
||||
scoutfs_cmp(a->name_flags[1], b->name_flags[1]) ?:
|
||||
scoutfs_cmp(a->name_val[2], b->name_val[2]) ?:
|
||||
scoutfs_cmp(a->name_source[2], b->name_source[2]) ?:
|
||||
scoutfs_cmp(a->name_flags[2], b->name_flags[2]) ?:
|
||||
scoutfs_cmp(a->op, b->op) ?:
|
||||
scoutfs_cmp(a->limit, b->limit) ?:
|
||||
scoutfs_cmp(a->rule_flags, b->rule_flags);
|
||||
}
|
||||
|
||||
static int compar_irules(const void *a, const void *b)
|
||||
{
|
||||
return -cmp_irules(a, b);
|
||||
}
|
||||
|
||||
static int do_bulk(struct bulk_args *args, bulk_in_fn in_fn, void *in_args,
|
||||
bulk_out_fn out_fn, void *out_args)
|
||||
{
|
||||
struct scoutfs_ioctl_quota_rule *irules = NULL;
|
||||
size_t alloced = 0;
|
||||
size_t nr = 0;
|
||||
size_t batch;
|
||||
size_t i;
|
||||
int fd = -1;
|
||||
int ret;
|
||||
|
||||
fd = get_path(args->path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
for (;;) {
|
||||
if (nr == alloced) {
|
||||
alloced += 1024;
|
||||
irules = realloc(irules, alloced * sizeof(irules[0]));
|
||||
if (!irules) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "memory allocation failed: %s (%d)\n",
|
||||
strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = in_fn(fd, &irules[nr], alloced - nr, in_args);
|
||||
if (ret == 0)
|
||||
break;
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
batch = ret;
|
||||
|
||||
if (args->unsorted) {
|
||||
for (i = 0; i < batch; i++) {
|
||||
ret = out_fn(fd, &irules[nr + i], out_args);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
nr += batch;
|
||||
}
|
||||
}
|
||||
|
||||
if (!args->unsorted) {
|
||||
qsort(irules, nr, sizeof(irules[0]), compar_irules);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
ret = out_fn(fd, &irules[i], out_args);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
if (irules)
|
||||
free(irules);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------- */
|
||||
|
||||
/* maintain iterator in gqr between calls */
|
||||
static int get_ioctl_in_fn(int fd, struct scoutfs_ioctl_quota_rule *irules, size_t nr,
|
||||
void *in_args)
|
||||
{
|
||||
struct scoutfs_ioctl_get_quota_rules *gqr = in_args;
|
||||
int ret;
|
||||
|
||||
gqr->rules_ptr = (intptr_t)irules;
|
||||
gqr->rules_nr = nr;
|
||||
|
||||
ret = ioctl(fd, SCOUTFS_IOC_GET_QUOTA_RULES, gqr);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "GET_QUOTA_RULES ioctl failed: %s (%d)\n",
|
||||
strerror(errno), errno);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_stdin_in_fn(int fd, struct scoutfs_ioctl_quota_rule *irules, size_t nr,
|
||||
void *in_args)
|
||||
{
|
||||
char *line = NULL;
|
||||
size_t size;
|
||||
int ret;
|
||||
|
||||
ret = getline(&line, &size, stdin);
|
||||
if (ret < 0) {
|
||||
if (errno == ENOENT)
|
||||
return 0;
|
||||
|
||||
ret = -errno;
|
||||
fprintf(stderr, "error reading rules: %s (%d)\n",
|
||||
strerror(errno), errno);
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = parse_rule(&irules[0], line);
|
||||
if (ret == 0)
|
||||
ret = 1;
|
||||
|
||||
free(line);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct mod_ioctl_args {
|
||||
unsigned int cmd;
|
||||
char *which;
|
||||
};
|
||||
|
||||
static int mod_ioctl_out_fn(int fd, struct scoutfs_ioctl_quota_rule *irule, void *out_args)
|
||||
{
|
||||
struct mod_ioctl_args *args = out_args;
|
||||
int ret;
|
||||
|
||||
ret = ioctl(fd, args->cmd, irule);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
printf("Failed to %s following rule:\n ", args->which);
|
||||
printf_rule(irule);
|
||||
fprintf(stderr, "Error: %s (%d)\n", strerror(-ret), -ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int print_out_fn(int fd, struct scoutfs_ioctl_quota_rule *irule, void *out_args)
|
||||
{
|
||||
printf_rule(irule);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------- */
|
||||
|
||||
static int parse_bulk_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct bulk_args *args = state->input;
|
||||
|
||||
switch (key) {
|
||||
case 'p':
|
||||
args->path = strdup_or_error(state, arg);
|
||||
break;
|
||||
case 'U':
|
||||
args->unsorted = true;
|
||||
break;
|
||||
case ARGP_KEY_FINI:
|
||||
if (!args->path)
|
||||
argp_error(state, "must provide file path");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option bulk_options[] = {
|
||||
{ "path", 'p', "PATH", 0, "Path to ScoutFS filesystem"},
|
||||
{ "unsorted", 'U', NULL, 0, "Process rules in unsorted filesystem storage order"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp list_argp = {
|
||||
bulk_options,
|
||||
parse_bulk_opt,
|
||||
"",
|
||||
"List quota rules"
|
||||
};
|
||||
|
||||
static int list_cmd(int argc, char **argv)
|
||||
{
|
||||
struct scoutfs_ioctl_get_quota_rules gqr = {{0,}};
|
||||
struct bulk_args args = {NULL};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&list_argp, argc, argv, 0, NULL, &args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_bulk(&args, get_ioctl_in_fn, &gqr, print_out_fn, NULL);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) list_ctor(void)
|
||||
{
|
||||
cmd_register_argp("quota-list", &list_argp, GROUP_CORE, list_cmd);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------- */
|
||||
|
||||
static struct argp wipe_argp = {
|
||||
bulk_options,
|
||||
parse_bulk_opt,
|
||||
"",
|
||||
"Delete all quota rules"
|
||||
};
|
||||
|
||||
static int wipe_cmd(int argc, char **argv)
|
||||
{
|
||||
struct bulk_args args = {NULL};
|
||||
struct scoutfs_ioctl_get_quota_rules gqr = {{0,}};
|
||||
struct mod_ioctl_args out_args = {
|
||||
.cmd = SCOUTFS_IOC_DEL_QUOTA_RULE,
|
||||
.which = "delete",
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&wipe_argp, argc, argv, 0, NULL, &args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_bulk(&args, get_ioctl_in_fn, &gqr, mod_ioctl_out_fn, &out_args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) wipe_ctor(void)
|
||||
{
|
||||
cmd_register_argp("quota-wipe", &wipe_argp, GROUP_CORE, wipe_cmd);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------- */
|
||||
|
||||
static struct argp restore_argp = {
|
||||
bulk_options,
|
||||
parse_bulk_opt,
|
||||
"",
|
||||
"Restore quota rules from list output on stdin"
|
||||
};
|
||||
|
||||
static int restore_cmd(int argc, char **argv)
|
||||
{
|
||||
struct bulk_args args = {NULL};
|
||||
struct mod_ioctl_args out_args = {
|
||||
.cmd = SCOUTFS_IOC_ADD_QUOTA_RULE,
|
||||
.which = "add",
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&restore_argp, argc, argv, 0, NULL, &args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_bulk(&args, parse_stdin_in_fn, NULL, mod_ioctl_out_fn, &out_args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) restore_ctor(void)
|
||||
{
|
||||
cmd_register_argp("quota-restore", &restore_argp, GROUP_CORE, restore_cmd);
|
||||
}
|
||||
176
utils/src/read_xattr_index.c
Normal file
176
utils/src/read_xattr_index.c
Normal file
@@ -0,0 +1,176 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "parse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "ioctl.h"
|
||||
#include "cmd.h"
|
||||
#include "cmp.h"
|
||||
|
||||
#define ENTF "%llu.%llu.%llu"
|
||||
#define ENTA(e) (e)->a, (e)->b, (e)->ino
|
||||
|
||||
struct xattr_args {
|
||||
char *path;
|
||||
char *first_entry;
|
||||
char *last_entry;
|
||||
};
|
||||
|
||||
static int compare_entries(struct scoutfs_ioctl_xattr_index_entry *a,
|
||||
struct scoutfs_ioctl_xattr_index_entry *b)
|
||||
{
|
||||
return scoutfs_cmp(a->a, b->a) ?: scoutfs_cmp(a->b, b->b) ?: scoutfs_cmp(a->ino, b->ino);
|
||||
}
|
||||
|
||||
static int parse_entry(struct scoutfs_ioctl_xattr_index_entry *ent, char *str)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = sscanf(str, "%lli.%lli.%lli", &ent->a, &ent->b, &ent->ino);
|
||||
if (ret != 3) {
|
||||
fprintf(stderr, "bad index position entry argument '%s', it must be "
|
||||
"in the form \"a.b.ino\" where each value can be prefixed by "
|
||||
"'0' for octal or '0x' for hex\n", str);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define NR_ENTRIES 1024
|
||||
|
||||
static int do_read_xattr_index(struct xattr_args *args)
|
||||
{
|
||||
struct scoutfs_ioctl_read_xattr_index rxi;
|
||||
struct scoutfs_ioctl_xattr_index_entry *ents;
|
||||
struct scoutfs_ioctl_xattr_index_entry *ent;
|
||||
int fd = -1;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
ents = calloc(NR_ENTRIES, sizeof(struct scoutfs_ioctl_xattr_index_entry));
|
||||
if (!ents) {
|
||||
fprintf(stderr, "xattr index entry allocation failed\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
fd = get_path(args->path, O_RDONLY);
|
||||
if (fd < 0)
|
||||
return fd;
|
||||
|
||||
memset(&rxi, 0, sizeof(rxi));
|
||||
memset(&rxi.last, 0xff, sizeof(rxi.last));
|
||||
rxi.entries_ptr = (unsigned long)ents;
|
||||
rxi.entries_nr = NR_ENTRIES;
|
||||
|
||||
ret = 0;
|
||||
if (args->first_entry)
|
||||
ret = parse_entry(&rxi.first, args->first_entry);
|
||||
if (args->last_entry)
|
||||
ret = parse_entry(&rxi.last, args->last_entry);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (compare_entries(&rxi.first, &rxi.last) > 0) {
|
||||
fprintf(stderr, "first index position "ENTF" must be less than last index position "ENTF"\n",
|
||||
ENTA(&rxi.first), ENTA(&rxi.last));
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
ret = ioctl(fd, SCOUTFS_IOC_READ_XATTR_INDEX, &rxi);
|
||||
if (ret == 0)
|
||||
break;
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "read_xattr_index ioctl failed: "
|
||||
"%s (%d)\n", strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
ent = &ents[i];
|
||||
printf("%llu.%llu = %llu\n",
|
||||
ent->a, ent->b, ent->ino);
|
||||
}
|
||||
|
||||
rxi.first = *ent;
|
||||
|
||||
if ((++rxi.first.ino == 0 && ++rxi.first.b == 0 && ++rxi.first.a == 0) ||
|
||||
compare_entries(&rxi.first, &rxi.last) > 0)
|
||||
break;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
free(ents);
|
||||
|
||||
return ret;
|
||||
};
|
||||
|
||||
static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct xattr_args *args = state->input;
|
||||
|
||||
switch (key) {
|
||||
case 'p':
|
||||
args->path = strdup_or_error(state, arg);
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
if (!args->first_entry)
|
||||
args->first_entry = strdup_or_error(state, arg);
|
||||
else if (!args->last_entry)
|
||||
args->last_entry = strdup_or_error(state, arg);
|
||||
else
|
||||
argp_error(state, "more than two entry arguments given");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{ "path", 'p', "PATH", 0, "Path to ScoutFS filesystem"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp argp = {
|
||||
options,
|
||||
parse_opt,
|
||||
"FIRST-ENTRY LAST-ENTRY",
|
||||
"Search and print inode numbers indexed by their .indx. xattrs"
|
||||
};
|
||||
|
||||
static int read_xattr_index_cmd(int argc, char **argv)
|
||||
{
|
||||
|
||||
struct xattr_args xattr_args = {NULL};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&argp, argc, argv, 0, NULL, &xattr_args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_read_xattr_index(&xattr_args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) read_xattr_index_ctor(void)
|
||||
{
|
||||
cmd_register_argp("read-xattr-index", &argp, GROUP_INFO, read_xattr_index_cmd);
|
||||
}
|
||||
Reference in New Issue
Block a user