mirror of
https://github.com/versity/scoutfs.git
synced 2026-05-01 02:15:44 +00:00
Compare commits
450 Commits
bcrl/mmap
...
zab/disabl
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6ad18769cb | ||
|
|
49d82fcaaf | ||
|
|
e4e12c1968 | ||
|
|
15fd2ccc02 | ||
|
|
eea95357d3 | ||
|
|
9842c5d13e | ||
|
|
ade539217e | ||
|
|
5a90234c94 | ||
|
|
f81e4cb98a | ||
|
|
1fc706bf3f | ||
|
|
e9c3aa6501 | ||
|
|
d39268bbc1 | ||
|
|
35ed1a2438 | ||
|
|
32e7978a6e | ||
|
|
8123b8fc35 | ||
|
|
da5911c311 | ||
|
|
098fc420be | ||
|
|
7a96537210 | ||
|
|
0607dfdac8 | ||
|
|
0354bb64c5 | ||
|
|
631801c45c | ||
|
|
47a1ac92f7 | ||
|
|
004f693af3 | ||
|
|
f271a5d140 | ||
|
|
355eac79d2 | ||
|
|
d8b4e94854 | ||
|
|
bed33c7ffd | ||
|
|
b370730029 | ||
|
|
d64dd89ead | ||
|
|
8d81196e01 | ||
|
|
d731c1577e | ||
|
|
a421bb0884 | ||
|
|
773eb129ed | ||
|
|
eb3981c103 | ||
|
|
3139d3ea68 | ||
|
|
4da3d47601 | ||
|
|
aa1b1fa34f | ||
|
|
8fcc9095e6 | ||
|
|
299062a456 | ||
|
|
7cac1e7136 | ||
|
|
454dbebf59 | ||
|
|
2c5871c253 | ||
|
|
64a698aa93 | ||
|
|
d48b447e75 | ||
|
|
5241bba7f6 | ||
|
|
e0a2175c2e | ||
|
|
f2cd1003f6 | ||
|
|
97c6cc559e | ||
|
|
7c54c86c38 | ||
|
|
e1ba508301 | ||
|
|
f35154eb19 | ||
|
|
7befc61482 | ||
|
|
1383ca1a8d | ||
|
|
6b5ddf2b3a | ||
|
|
d025122fdd | ||
|
|
706fe9a30e | ||
|
|
0f17ecb9e3 | ||
|
|
fc003a5038 | ||
|
|
10df01eb7a | ||
|
|
68b8e4098d | ||
|
|
5701184324 | ||
|
|
a3035582d3 | ||
|
|
9e47a32257 | ||
|
|
b4592554af | ||
|
|
1e0f8ee27a | ||
|
|
511cb04330 | ||
|
|
807ae11ee9 | ||
|
|
7ca3672a67 | ||
|
|
eb22425bad | ||
|
|
e386b900ee | ||
|
|
6415814f92 | ||
|
|
86cf3ec4ab | ||
|
|
aa6e210ac7 | ||
|
|
e648063baa | ||
|
|
bc09012836 | ||
|
|
cf78e92eaf | ||
|
|
19f5c1d7bf | ||
|
|
bb0ed34786 | ||
|
|
14530471c4 | ||
|
|
88aefc381a | ||
|
|
8982750266 | ||
|
|
e2dfffcab9 | ||
|
|
f0ddf5ff04 | ||
|
|
18aee0ebbd | ||
|
|
c35f1ff324 | ||
|
|
6770a31683 | ||
|
|
9395360324 | ||
|
|
7c5823ad12 | ||
|
|
560c91a0e4 | ||
|
|
4647a6ccb2 | ||
|
|
1bef610416 | ||
|
|
9375b9d3b7 | ||
|
|
ae286bf837 | ||
|
|
a5d9ac5514 | ||
|
|
7b2310442b | ||
|
|
cf278f5fa0 | ||
|
|
73333af364 | ||
|
|
9a647a98f1 | ||
|
|
2f3d1c395e | ||
|
|
222e5f1b9d | ||
|
|
30668c1cdd | ||
|
|
84bb170e3a | ||
|
|
320c411678 | ||
|
|
c08f818b64 | ||
|
|
0e5fb021a2 | ||
|
|
b40f53633f | ||
|
|
aed9f66410 | ||
|
|
09256fdf15 | ||
|
|
8f72d16609 | ||
|
|
08eb75c508 | ||
|
|
9f151fde92 | ||
|
|
f46ab548a4 | ||
|
|
ff532eba75 | ||
|
|
736d9d7df8 | ||
|
|
45e2209123 | ||
|
|
9cf2a6ced0 | ||
|
|
66c6331131 | ||
|
|
42bf0980b6 | ||
|
|
e6228ead73 | ||
|
|
13438c8f5d | ||
|
|
d9d9b65f14 | ||
|
|
5e1c8586cc | ||
|
|
68d7a2e2cb | ||
|
|
87cb971630 | ||
|
|
dc47ec65e4 | ||
|
|
dbea353b92 | ||
|
|
5701182665 | ||
|
|
6fea9f90c4 | ||
|
|
e78ba2b427 | ||
|
|
8bd6646d9a | ||
|
|
6b1dd980f0 | ||
|
|
ea7c41d876 | ||
|
|
669e7f733b | ||
|
|
4bd86d1a00 | ||
|
|
b424208555 | ||
|
|
4ca0b3ff74 | ||
|
|
838e293413 | ||
|
|
a19e151277 | ||
|
|
36c426d555 | ||
|
|
fddfde62e6 | ||
|
|
e6385784f5 | ||
|
|
23711f05f6 | ||
|
|
d87e2e0166 | ||
|
|
2e7053497e | ||
|
|
735c2c6905 | ||
|
|
a848477e64 | ||
|
|
b094b18618 | ||
|
|
7a3749d591 | ||
|
|
d589881855 | ||
|
|
2073a672a0 | ||
|
|
33374d8fe6 | ||
|
|
3d790b24d5 | ||
|
|
fb66372988 | ||
|
|
8bf4c078df | ||
|
|
27bc0ef095 | ||
|
|
c4663ea1a1 | ||
|
|
e347ca3606 | ||
|
|
005cf99f42 | ||
|
|
c61175e796 | ||
|
|
e60f4e7082 | ||
|
|
8f946aa478 | ||
|
|
b605407c29 | ||
|
|
84d6904de8 | ||
|
|
85a27b2198 | ||
|
|
c1229644da | ||
|
|
a65eccd0f5 | ||
|
|
e2a919492d | ||
|
|
1e2dc6c1df | ||
|
|
f04a636229 | ||
|
|
e82cce36d9 | ||
|
|
e85fc5b1a7 | ||
|
|
c0fdd37e5a | ||
|
|
35d1ad1422 | ||
|
|
9bb32b8003 | ||
|
|
5f0dbc5f85 | ||
|
|
ffc1e5aa86 | ||
|
|
39993d8b5f | ||
|
|
b86a1bebbb | ||
|
|
aa84f7c601 | ||
|
|
ac2d465b66 | ||
|
|
4e546b2e7c | ||
|
|
b28acdf904 | ||
|
|
ae97ffd6fc | ||
|
|
12067e99ab | ||
|
|
6bacd95aea | ||
|
|
45e594396f | ||
|
|
b1757a061e | ||
|
|
1a994137f4 | ||
|
|
57af2bd34b | ||
|
|
9e975dffe1 | ||
|
|
d440056e6f | ||
|
|
d1e62a43c9 | ||
|
|
289caeb353 | ||
|
|
ba879b977a | ||
|
|
5c6b263d97 | ||
|
|
ca6b7f1e6d | ||
|
|
55dde87bb1 | ||
|
|
f4db553c28 | ||
|
|
4b9c02ba32 | ||
|
|
6356440073 | ||
|
|
9658412d09 | ||
|
|
57c7caf348 | ||
|
|
f8bf1718a0 | ||
|
|
c415cab1e9 | ||
|
|
f8e1812288 | ||
|
|
cca83b1758 | ||
|
|
8c114ddb87 | ||
|
|
ab271f4682 | ||
|
|
42e7fbb4f7 | ||
|
|
f48112e2a7 | ||
|
|
07ba053021 | ||
|
|
69e5f5ae5f | ||
|
|
2980edac53 | ||
|
|
f9ff25db23 | ||
|
|
0a47e8f936 | ||
|
|
3a82090ab1 | ||
|
|
4d0b78f5cb | ||
|
|
e6ae397d12 | ||
|
|
e5f5ee2679 | ||
|
|
8fe683dab8 | ||
|
|
6d7b8233c6 | ||
|
|
26ccaca80b | ||
|
|
ca8abeebb1 | ||
|
|
b7943c5412 | ||
|
|
304dbbbafa | ||
|
|
177af7f746 | ||
|
|
99bc710f03 | ||
|
|
ac0e58839d | ||
|
|
efd9763355 | ||
|
|
f59336085d | ||
|
|
ad99636af8 | ||
|
|
f9df3ada6c | ||
|
|
22716c0389 | ||
|
|
d2d32c8776 | ||
|
|
492afae552 | ||
|
|
8cf6f73744 | ||
|
|
74f85ff93d | ||
|
|
79e235af6e | ||
|
|
0a8faf3e94 | ||
|
|
63cccfa582 | ||
|
|
e44fb23064 | ||
|
|
247e22f56f | ||
|
|
3c7d1f3935 | ||
|
|
91c64dfa2d | ||
|
|
53f29d3f2a | ||
|
|
3308bf8d8c | ||
|
|
cce20dbeb6 | ||
|
|
503011b777 | ||
|
|
ec782fff8d | ||
|
|
6b66e583f2 | ||
|
|
ff436db49b | ||
|
|
34c3d903d9 | ||
|
|
794277053f | ||
|
|
4c225c2061 | ||
|
|
1ce084fcd9 | ||
|
|
7dc3d7d732 | ||
|
|
89fcb207a7 | ||
|
|
3ce6061907 | ||
|
|
12b00d0058 | ||
|
|
920fca752c | ||
|
|
e0a49c46a7 | ||
|
|
c87a9f3a07 | ||
|
|
3776c18c66 | ||
|
|
0fee134133 | ||
|
|
4326a95b9b | ||
|
|
a471c7716e | ||
|
|
70efa2f905 | ||
|
|
7cd8738add | ||
|
|
3670a5b80d | ||
|
|
fc15b816b0 | ||
|
|
2dc611a433 | ||
|
|
2b966fd45c | ||
|
|
3981f944dd | ||
|
|
b9bd7d1293 | ||
|
|
adadd51815 | ||
|
|
9a087be46c | ||
|
|
8597fd0bfc | ||
|
|
674224d454 | ||
|
|
8d505668fe | ||
|
|
da185b214b | ||
|
|
336a6a155d | ||
|
|
ffe15c2d82 | ||
|
|
57da5fae4c | ||
|
|
77bd0c20ab | ||
|
|
514418421c | ||
|
|
a9b46213b3 | ||
|
|
841fbc1b66 | ||
|
|
3c9eeeb2ef | ||
|
|
587760edb3 | ||
|
|
3d64c46fcd | ||
|
|
4c611474e8 | ||
|
|
64bdda717c | ||
|
|
dd117593da | ||
|
|
02d2edb467 | ||
|
|
ea969a5dde | ||
|
|
f59dfe8b73 | ||
|
|
266b6d8bdd | ||
|
|
92f22358a7 | ||
|
|
bbfa71361f | ||
|
|
bf014a4c57 | ||
|
|
078d2f6073 | ||
|
|
7abf5c1e2b | ||
|
|
c3ad8282a3 | ||
|
|
f368686b89 | ||
|
|
ea2ec838ec | ||
|
|
51a48fbbb6 | ||
|
|
b96feaa5b0 | ||
|
|
35b5f1f9c5 | ||
|
|
0a62ffbc2f | ||
|
|
445ac62172 | ||
|
|
3ab93baa55 | ||
|
|
59739e0057 | ||
|
|
cfc8cb8800 | ||
|
|
98d06c7a6b | ||
|
|
35e4ab92f0 | ||
|
|
f649edd65d | ||
|
|
37d5aae4d2 | ||
|
|
f275020baa | ||
|
|
8e6c18a0fa | ||
|
|
837310e8e6 | ||
|
|
65ce5c6ad5 | ||
|
|
0770cc8c57 | ||
|
|
787555158a | ||
|
|
8119a56c92 | ||
|
|
ac1065014b | ||
|
|
02204c36fc | ||
|
|
2527b4906e | ||
|
|
d796fbf15e | ||
|
|
e68a999ed5 | ||
|
|
7d674fa4bf | ||
|
|
7c30294e1b | ||
|
|
33fa14b730 | ||
|
|
3ecc099589 | ||
|
|
7df8b87128 | ||
|
|
0876fb31c6 | ||
|
|
80e0c4bd56 | ||
|
|
0acab247e3 | ||
|
|
4ab22d8f09 | ||
|
|
b3d11925c7 | ||
|
|
34fc095392 | ||
|
|
362fc0ab62 | ||
|
|
f02944bd73 | ||
|
|
589e9d10b9 | ||
|
|
288a752f42 | ||
|
|
affdaddc15 | ||
|
|
2c89ff3a07 | ||
|
|
7684e7fcf6 | ||
|
|
cf291e2483 | ||
|
|
7bbe49fde2 | ||
|
|
6c37e3dee0 | ||
|
|
d78649e065 | ||
|
|
6ae8e9743f | ||
|
|
c6eaccbf90 | ||
|
|
51ae302d81 | ||
|
|
228c5d8b4b | ||
|
|
08aaa5b430 | ||
|
|
9fc99a8c31 | ||
|
|
9c00602051 | ||
|
|
a9cb464d49 | ||
|
|
4585d57153 | ||
|
|
1c9a407059 | ||
|
|
e09a216762 | ||
|
|
bd54995599 | ||
|
|
f86ce74ffd | ||
|
|
a147239022 | ||
|
|
2e2ee3b2f1 | ||
|
|
77d0268cb2 | ||
|
|
13b2d9bb88 | ||
|
|
02993a2dd7 | ||
|
|
16da3c182a | ||
|
|
acda5a3bf1 | ||
|
|
44f8551fb6 | ||
|
|
52291b2c75 | ||
|
|
38c8a4901f | ||
|
|
c4f2563cc1 | ||
|
|
e81c256a22 | ||
|
|
34c62824e5 | ||
|
|
26a4266964 | ||
|
|
c2b47d84c1 | ||
|
|
484b34057a | ||
|
|
7c4bc528c6 | ||
|
|
c3b6dd0763 | ||
|
|
19b674cb38 | ||
|
|
7cd70ab2bb | ||
|
|
818e149643 | ||
|
|
eb4baa88f5 | ||
|
|
c96b833a36 | ||
|
|
9d3fe27929 | ||
|
|
5fcf70b53e | ||
|
|
41e3ca0f41 | ||
|
|
ec702b9bb3 | ||
|
|
c3f122a5f1 | ||
|
|
932b0776d1 | ||
|
|
22140c93d1 | ||
|
|
c2cfb0227f | ||
|
|
f1d8955303 | ||
|
|
fb16af7b7d | ||
|
|
cd0d045c93 | ||
|
|
40b9f19ec4 | ||
|
|
871db60fb2 | ||
|
|
a901db2ff7 | ||
|
|
b436772376 | ||
|
|
e6222223c2 | ||
|
|
0dff7f55a6 | ||
|
|
4ccb80a8ec | ||
|
|
86ffdf24a2 | ||
|
|
a89f6c10b1 | ||
|
|
2f91a9a735 | ||
|
|
c17a7036ed | ||
|
|
43619a245d | ||
|
|
be4a137479 | ||
|
|
25e3b03d94 | ||
|
|
0af40547b5 | ||
|
|
6a97aa3c9a | ||
|
|
4b86256904 | ||
|
|
99167f6d66 | ||
|
|
c48e08a378 | ||
|
|
1cacc50de0 | ||
|
|
fc37ece26b | ||
|
|
54044508fa | ||
|
|
a069bdd945 | ||
|
|
d774e5308b | ||
|
|
54867b0f9c | ||
|
|
29c1f529f1 | ||
|
|
67ad29508d | ||
|
|
77c673f984 | ||
|
|
1235f04c4a | ||
|
|
56077b61a1 | ||
|
|
c4fcf40097 | ||
|
|
544fd1ba9a | ||
|
|
af2975111a | ||
|
|
7ea78502c8 | ||
|
|
10cf83ffc5 | ||
|
|
339c719e4e | ||
|
|
e1c1c50ead | ||
|
|
e0e6179156 | ||
|
|
ddf5ef1017 | ||
|
|
502783e1bc | ||
|
|
463f5e5a07 | ||
|
|
d0429e1c88 | ||
|
|
8471134328 | ||
|
|
f3de3b1817 | ||
|
|
a0a3ef9675 | ||
|
|
e59d0af199 | ||
|
|
d8f76cb893 | ||
|
|
906c0186bc | ||
|
|
e9baa4559b | ||
|
|
de1bf39614 | ||
|
|
a7b8f955fe | ||
|
|
2c2f090168 |
0
.gitignore
vendored
Normal file
0
.gitignore
vendored
Normal file
17
Makefile
Normal file
17
Makefile
Normal file
@@ -0,0 +1,17 @@
|
||||
#
|
||||
# Typically development is done in each subdir, but we have a tiny
|
||||
# makefile here to make it easy to run simple targets across all the
|
||||
# subdirs.
|
||||
#
|
||||
|
||||
SUBDIRS := kmod utils tests
|
||||
NOTTESTS := kmod utils
|
||||
|
||||
all clean: $(SUBDIRS) FORCE
|
||||
dist: $(NOTTESTS) FORCE
|
||||
|
||||
$(SUBDIRS): FORCE
|
||||
$(MAKE) -C $@ $(MAKECMDGOALS)
|
||||
|
||||
all:
|
||||
FORCE:
|
||||
@@ -6,7 +6,7 @@ from the ground up to support large archival systems.
|
||||
Its key differentiating features are:
|
||||
|
||||
- Integrated consistent indexing accelerates archival maintenance operations
|
||||
- Log-structured commits allow nodes to write concurrently without contention
|
||||
- Commit logs allow nodes to write concurrently without contention
|
||||
|
||||
It meets best of breed expectations:
|
||||
|
||||
@@ -31,15 +31,9 @@ functionality hasn't been implemented. It's appropriate for early
|
||||
adopters and interested developers, not for production use.
|
||||
|
||||
In that vein, expect significant incompatible changes to both the format
|
||||
of network messages and persistent structures. To avoid mistakes the
|
||||
implementation currently calculates a hash of the format and ioctl
|
||||
header files in the source tree. The kernel module will refuse to mount
|
||||
a volume created by userspace utilities with a mismatched hash, and it
|
||||
will refuse to connect to a remote node with a mismatched hash. This
|
||||
means having to unmount, mkfs, and remount everything across many
|
||||
functional changes. Once the format is nailed down we'll wire up
|
||||
forward and back compat machinery and remove this temporary safety
|
||||
measure.
|
||||
of network messages and persistent structures. Since the format hash-checking
|
||||
has now been removed in preparation for release, if there is any doubt, mkfs
|
||||
is strongly recommended.
|
||||
|
||||
The current kernel module is developed against the RHEL/CentOS 7.x
|
||||
kernel to minimize the friction of developing and testing with partners'
|
||||
@@ -62,17 +56,17 @@ help on the mailing list.**
|
||||
The requirements for running scoutfs on a small cluster are:
|
||||
|
||||
1. One or more nodes running x86-64 CentOS/RHEL 7.4 (or 7.3)
|
||||
2. Access to a single shared block device
|
||||
2. Access to two shared block devices
|
||||
3. IPv4 connectivity between the nodes
|
||||
|
||||
The steps for getting scoutfs mounted and operational are:
|
||||
|
||||
1. Get the kernel module running on the nodes
|
||||
2. Make a new filesystem on the device with the userspace utilities
|
||||
3. Mount the device on all the nodes
|
||||
2. Make a new filesystem on the devices with the userspace utilities
|
||||
3. Mount the devices on all the nodes
|
||||
|
||||
In this example we run all of these commands on three nodes. The block
|
||||
device name is the same on all the nodes.
|
||||
In this example we run all of these commands on three nodes. The names
|
||||
of the block devices are the same on all the nodes.
|
||||
|
||||
1. Get the Kernel Module and Userspace Binaries
|
||||
|
||||
@@ -87,14 +81,11 @@ device name is the same on all the nodes.
|
||||
|
||||
```shell
|
||||
yum install kernel-devel
|
||||
git clone git@github.com:versity/scoutfs-kmod-dev.git
|
||||
make -C scoutfs-kmod-dev module
|
||||
git clone git@github.com:versity/scoutfs.git
|
||||
make -C scoutfs
|
||||
modprobe libcrc32c
|
||||
insmod scoutfs-kmod-dev/src/scoutfs.ko
|
||||
|
||||
git clone git@github.com:versity/scoutfs-utils-dev.git
|
||||
make -C scoutfs-utils-dev
|
||||
alias scoutfs=$PWD/scoutfs-utils-dev/src/scoutfs
|
||||
insmod scoutfs/kmod/src/scoutfs.ko
|
||||
alias scoutfs=$PWD/scoutfs/utils/src/scoutfs
|
||||
```
|
||||
|
||||
2. Make a New Filesystem (**destroys contents, no questions asked**)
|
||||
@@ -103,7 +94,7 @@ device name is the same on all the nodes.
|
||||
quorum for the system to function.
|
||||
|
||||
```shell
|
||||
scoutfs mkfs -Q 2 /dev/shared_block_device
|
||||
scoutfs mkfs -Q 2 /dev/meta_dev /dev/data_dev
|
||||
```
|
||||
|
||||
3. Mount the Filesystem
|
||||
@@ -114,7 +105,7 @@ device name is the same on all the nodes.
|
||||
|
||||
```shell
|
||||
mkdir /mnt/scoutfs
|
||||
mount -t scoutfs -o server_addr=$NODE_ADDR /dev/shared_block_device /mnt/scoutfs
|
||||
mount -t scoutfs -o server_addr=$NODE_ADDR,metadev_path=/dev/meta_dev /dev/data_dev /mnt/scoutfs
|
||||
```
|
||||
|
||||
4. For Kicks, Observe the Metadata Change Index
|
||||
@@ -16,11 +16,7 @@ SCOUTFS_GIT_DESCRIBE := \
|
||||
$(shell git describe --all --abbrev=6 --long 2>/dev/null || \
|
||||
echo no-git)
|
||||
|
||||
SCOUTFS_FORMAT_HASH := \
|
||||
$(shell cat src/format.h src/ioctl.h | md5sum | cut -b1-16)
|
||||
|
||||
SCOUTFS_ARGS := SCOUTFS_GIT_DESCRIBE=$(SCOUTFS_GIT_DESCRIBE) \
|
||||
SCOUTFS_FORMAT_HASH=$(SCOUTFS_FORMAT_HASH) \
|
||||
CONFIG_SCOUTFS_FS=m -C $(SK_KSRC) M=$(CURDIR)/src \
|
||||
EXTRA_CFLAGS="-Werror"
|
||||
|
||||
@@ -51,7 +47,7 @@ modules_install:
|
||||
|
||||
dist: scoutfs-kmod.spec
|
||||
git archive --format=tar --prefix scoutfs-kmod-$(RPM_VERSION)/ HEAD^{tree} > $(TARFILE)
|
||||
@ tar rf $(TARFILE) --transform="s@\(.*\)@scoutfs-$(RPM_VERSION)/\1@" scoutfs-kmod.spec
|
||||
@ tar rf $(TARFILE) --transform="s@\(.*\)@scoutfs-kmod-$(RPM_VERSION)/\1@" scoutfs-kmod.spec
|
||||
|
||||
clean:
|
||||
make $(SCOUTFS_ARGS) clean
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
obj-$(CONFIG_SCOUTFS_FS) := scoutfs.o
|
||||
|
||||
CFLAGS_super.o = -DSCOUTFS_GIT_DESCRIBE=\"$(SCOUTFS_GIT_DESCRIBE)\" \
|
||||
-DSCOUTFS_FORMAT_HASH=0x$(SCOUTFS_FORMAT_HASH)LLU
|
||||
CFLAGS_super.o = -DSCOUTFS_GIT_DESCRIBE=\"$(SCOUTFS_GIT_DESCRIBE)\"
|
||||
|
||||
CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include
|
||||
|
||||
@@ -9,6 +8,8 @@ CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include
|
||||
-include $(src)/Makefile.kernelcompat
|
||||
|
||||
scoutfs-y += \
|
||||
avl.o \
|
||||
alloc.o \
|
||||
block.o \
|
||||
btree.o \
|
||||
client.o \
|
||||
@@ -16,10 +17,12 @@ scoutfs-y += \
|
||||
data.o \
|
||||
dir.o \
|
||||
export.o \
|
||||
ext.o \
|
||||
file.o \
|
||||
forest.o \
|
||||
inode.o \
|
||||
ioctl.o \
|
||||
item.o \
|
||||
lock.o \
|
||||
lock_server.o \
|
||||
msg.o \
|
||||
@@ -27,10 +30,11 @@ scoutfs-y += \
|
||||
options.o \
|
||||
per_task.o \
|
||||
quorum.o \
|
||||
radix.o \
|
||||
scoutfs_trace.o \
|
||||
server.o \
|
||||
sort_priv.o \
|
||||
spbm.o \
|
||||
srch.o \
|
||||
super.o \
|
||||
sysfs.o \
|
||||
trans.o \
|
||||
@@ -50,5 +54,9 @@ $(src)/check_exported_types:
|
||||
echo "no raw types in exported headers, preface with __"; \
|
||||
exit 1; \
|
||||
fi
|
||||
@if egrep '\<__packed\>' $(src)/format.h $(src)/ioctl.h; then \
|
||||
echo "no __packed allowed in exported headers"; \
|
||||
exit 1; \
|
||||
fi
|
||||
|
||||
extra-y += check_exported_types
|
||||
|
||||
1300
kmod/src/alloc.c
Normal file
1300
kmod/src/alloc.c
Normal file
File diff suppressed because it is too large
Load Diff
155
kmod/src/alloc.h
Normal file
155
kmod/src/alloc.h
Normal file
@@ -0,0 +1,155 @@
|
||||
#ifndef _SCOUTFS_ALLOC_H_
|
||||
#define _SCOUTFS_ALLOC_H_
|
||||
|
||||
#include "ext.h"
|
||||
|
||||
/*
|
||||
* These are implementation-specific metrics, they don't need to be
|
||||
* consistent across implementations. They should probably be run-time
|
||||
* knobs.
|
||||
*/
|
||||
|
||||
/*
|
||||
* The largest extent that we'll try to allocate with fallocate. We're
|
||||
* trying not to completely consume a transactions data allocation all
|
||||
* at once. This is only allocation granularity, repeated allocations
|
||||
* can produce large contiguous extents.
|
||||
*/
|
||||
#define SCOUTFS_FALLOCATE_ALLOC_LIMIT \
|
||||
(128ULL * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
/*
|
||||
* The largest aligned region that we'll try to allocate at the end of
|
||||
* the file as it's extended. This is also limited to the current file
|
||||
* size so we can only waste at most twice the total file size when
|
||||
* files are less than this. We try to keep this around the point of
|
||||
* diminishing returns in streaming performance of common data devices
|
||||
* to limit waste.
|
||||
*/
|
||||
#define SCOUTFS_DATA_EXTEND_PREALLOC_LIMIT \
|
||||
(8ULL * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
/*
|
||||
* Small data allocations are satisfied by cached extents stored in
|
||||
* the run-time alloc struct to minimize item operations for small
|
||||
* block allocations. Large allocations come directly from btree
|
||||
* extent items, and this defines the threshold beetwen them.
|
||||
*/
|
||||
#define SCOUTFS_ALLOC_DATA_LG_THRESH \
|
||||
(8ULL * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
/*
|
||||
* Fill client alloc roots to the target when they fall below the lo
|
||||
* threshold.
|
||||
*
|
||||
* We're giving the client the most available meta blocks we can so that
|
||||
* it has the freedom to build large transactions before worrying that
|
||||
* it might run out of meta allocs during commits.
|
||||
*/
|
||||
#define SCOUTFS_SERVER_META_FILL_TARGET \
|
||||
SCOUTFS_ALLOC_LIST_MAX_BLOCKS
|
||||
#define SCOUTFS_SERVER_META_FILL_LO \
|
||||
(SCOUTFS_ALLOC_LIST_MAX_BLOCKS / 2)
|
||||
#define SCOUTFS_SERVER_DATA_FILL_TARGET \
|
||||
(4ULL * 1024 * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
#define SCOUTFS_SERVER_DATA_FILL_LO \
|
||||
(1ULL * 1024 * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
/*
|
||||
* Each of the server meta_alloc roots will try to keep a minimum amount
|
||||
* of free blocks. The server will swap roots when its current avail
|
||||
* falls below the threshold while the freed root is still above it. It
|
||||
* must have room for all the largest allocation attempted in a
|
||||
* transaction on the server.
|
||||
*/
|
||||
#define SCOUTFS_SERVER_META_ALLOC_MIN \
|
||||
(SCOUTFS_SERVER_META_FILL_TARGET * 2)
|
||||
|
||||
/*
|
||||
* A run-time use of a pair of persistent avail/freed roots as a
|
||||
* metadata allocator. It has the machinery needed to lock and avoid
|
||||
* recursion when dirtying the list blocks that are used during the
|
||||
* transaction.
|
||||
*/
|
||||
struct scoutfs_alloc {
|
||||
spinlock_t lock;
|
||||
struct mutex mutex;
|
||||
struct scoutfs_block *dirty_avail_bl;
|
||||
struct scoutfs_block *dirty_freed_bl;
|
||||
struct scoutfs_alloc_list_head avail;
|
||||
struct scoutfs_alloc_list_head freed;
|
||||
};
|
||||
|
||||
/*
|
||||
* A run-time data allocator. We have a cached extent in memory that is
|
||||
* a lot cheaper to work with than the extent items, and we have a
|
||||
* consistent record of the total_len that can be sampled outside of the
|
||||
* usual heavy serialization of the extent modifications.
|
||||
*/
|
||||
struct scoutfs_data_alloc {
|
||||
struct scoutfs_alloc_root root;
|
||||
struct scoutfs_extent cached;
|
||||
atomic64_t total_len;
|
||||
};
|
||||
|
||||
void scoutfs_alloc_init(struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_alloc_list_head *avail,
|
||||
struct scoutfs_alloc_list_head *freed);
|
||||
int scoutfs_alloc_prepare_commit(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri);
|
||||
|
||||
int scoutfs_alloc_meta(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, u64 *blkno);
|
||||
int scoutfs_free_meta(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, u64 blkno);
|
||||
|
||||
void scoutfs_dalloc_init(struct scoutfs_data_alloc *dalloc,
|
||||
struct scoutfs_alloc_root *data_avail);
|
||||
void scoutfs_dalloc_get_root(struct scoutfs_data_alloc *dalloc,
|
||||
struct scoutfs_alloc_root *data_avail);
|
||||
u64 scoutfs_dalloc_total_len(struct scoutfs_data_alloc *dalloc);
|
||||
int scoutfs_dalloc_return_cached(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_data_alloc *dalloc);
|
||||
int scoutfs_alloc_data(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_data_alloc *dalloc, u64 count,
|
||||
u64 *blkno_ret, u64 *count_ret);
|
||||
int scoutfs_free_data(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_alloc_root *root, u64 blkno, u64 count);
|
||||
|
||||
int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_alloc_root *dst,
|
||||
struct scoutfs_alloc_root *src, u64 total);
|
||||
|
||||
int scoutfs_alloc_fill_list(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_alloc_list_head *lhead,
|
||||
struct scoutfs_alloc_root *root,
|
||||
u64 lo, u64 target);
|
||||
int scoutfs_alloc_empty_list(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_alloc_root *root,
|
||||
struct scoutfs_alloc_list_head *lhead);
|
||||
int scoutfs_alloc_splice_list(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_alloc_list_head *dst,
|
||||
struct scoutfs_alloc_list_head *src);
|
||||
|
||||
bool scoutfs_alloc_meta_low(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc, u32 nr);
|
||||
|
||||
typedef int (*scoutfs_alloc_foreach_cb_t)(struct super_block *sb, void *arg,
|
||||
int owner, u64 id,
|
||||
bool meta, bool avail, u64 blocks);
|
||||
int scoutfs_alloc_foreach(struct super_block *sb,
|
||||
scoutfs_alloc_foreach_cb_t cb, void *arg);
|
||||
|
||||
#endif
|
||||
405
kmod/src/avl.c
Normal file
405
kmod/src/avl.c
Normal file
@@ -0,0 +1,405 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "avl.h"
|
||||
|
||||
/*
|
||||
* We use a simple avl to index items in btree blocks. The interface
|
||||
* looks a bit like the kernel rbtree interface in that the caller
|
||||
* manages locking and storage for the nodes. Node references are
|
||||
* stored as byte offsets from the root so that the implementation
|
||||
* doesn't have to know anything about the caller's container.
|
||||
*
|
||||
* We store the full height in each node, rather than just 2 bits for
|
||||
* the balance, so that we can use the extra redundancy to verify the
|
||||
* integrity of the tree.
|
||||
*/
|
||||
|
||||
static struct scoutfs_avl_node *node_ptr(struct scoutfs_avl_root *root,
|
||||
__le16 off)
|
||||
{
|
||||
return off ? (void *)root + le16_to_cpu(off) : NULL;
|
||||
}
|
||||
|
||||
static __le16 node_off(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node)
|
||||
{
|
||||
return node ? cpu_to_le16((void *)node - (void *)root) : 0;
|
||||
}
|
||||
|
||||
static __u8 node_height(struct scoutfs_avl_node *node)
|
||||
{
|
||||
return node ? node->height : 0;
|
||||
}
|
||||
|
||||
struct scoutfs_avl_node *
|
||||
scoutfs_avl_search(struct scoutfs_avl_root *root,
|
||||
scoutfs_avl_compare_t compare, void *arg, int *cmp_ret,
|
||||
struct scoutfs_avl_node **par,
|
||||
struct scoutfs_avl_node **next,
|
||||
struct scoutfs_avl_node **prev)
|
||||
{
|
||||
struct scoutfs_avl_node *node = node_ptr(root, root->node);
|
||||
int cmp;
|
||||
|
||||
if (cmp_ret)
|
||||
*cmp_ret = -1;
|
||||
if (par)
|
||||
*par = NULL;
|
||||
if (next)
|
||||
*next = NULL;
|
||||
if (prev)
|
||||
*prev = NULL;
|
||||
|
||||
while (node) {
|
||||
cmp = compare(arg, node);
|
||||
if (par)
|
||||
*par = node;
|
||||
if (cmp_ret)
|
||||
*cmp_ret = cmp;
|
||||
if (cmp < 0) {
|
||||
if (next)
|
||||
*next = node;
|
||||
node = node_ptr(root, node->left);
|
||||
} else if (cmp > 0) {
|
||||
if (prev)
|
||||
*prev = node;
|
||||
node = node_ptr(root, node->right);
|
||||
} else {
|
||||
return node;
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct scoutfs_avl_node *scoutfs_avl_first(struct scoutfs_avl_root *root)
|
||||
{
|
||||
struct scoutfs_avl_node *node = node_ptr(root, root->node);
|
||||
|
||||
while (node && node->left)
|
||||
node = node_ptr(root, node->left);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
struct scoutfs_avl_node *scoutfs_avl_last(struct scoutfs_avl_root *root)
|
||||
{
|
||||
struct scoutfs_avl_node *node = node_ptr(root, root->node);
|
||||
|
||||
while (node && node->right)
|
||||
node = node_ptr(root, node->right);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
struct scoutfs_avl_node *scoutfs_avl_next(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node)
|
||||
{
|
||||
struct scoutfs_avl_node *parent;
|
||||
|
||||
if (node->right) {
|
||||
node = node_ptr(root, node->right);
|
||||
while (node->left)
|
||||
node = node_ptr(root, node->left);
|
||||
return node;
|
||||
}
|
||||
|
||||
while ((parent = node_ptr(root, node->parent)) &&
|
||||
node == node_ptr(root, parent->right))
|
||||
node = parent;
|
||||
|
||||
return parent;
|
||||
}
|
||||
|
||||
struct scoutfs_avl_node *scoutfs_avl_prev(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node)
|
||||
{
|
||||
struct scoutfs_avl_node *parent;
|
||||
|
||||
if (node->left) {
|
||||
node = node_ptr(root, node->left);
|
||||
while (node->right)
|
||||
node = node_ptr(root, node->right);
|
||||
return node;
|
||||
}
|
||||
|
||||
while ((parent = node_ptr(root, node->parent)) &&
|
||||
node == node_ptr(root, parent->left))
|
||||
node = parent;
|
||||
|
||||
return parent;
|
||||
}
|
||||
|
||||
static void set_parent_left_right(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *parent,
|
||||
struct scoutfs_avl_node *old,
|
||||
struct scoutfs_avl_node *new)
|
||||
{
|
||||
__le16 *off;
|
||||
|
||||
if (parent == NULL)
|
||||
off = &root->node;
|
||||
else if (parent->left == node_off(root, old))
|
||||
off = &parent->left;
|
||||
else
|
||||
off = &parent->right;
|
||||
|
||||
*off = node_off(root, new);
|
||||
}
|
||||
|
||||
static void set_height(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node)
|
||||
{
|
||||
struct scoutfs_avl_node *left = node_ptr(root, node->left);
|
||||
struct scoutfs_avl_node *right = node_ptr(root, node->right);
|
||||
|
||||
node->height = 1 + max(node_height(left), node_height(right));
|
||||
}
|
||||
|
||||
static int node_balance(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node)
|
||||
{
|
||||
if (node == NULL)
|
||||
return 0;
|
||||
|
||||
return (int)node_height(node_ptr(root, node->right)) -
|
||||
(int)node_height(node_ptr(root, node->left));
|
||||
}
|
||||
|
||||
/*
|
||||
* d b
|
||||
* / \ rotate right -> / \
|
||||
* b e a d
|
||||
* / \ <- rotate left / \
|
||||
* a c c e
|
||||
*
|
||||
* The rotate functions are always called with the higher node as the
|
||||
* earlier argument. Links to a and e are constant. We have to update
|
||||
* the forward and back refs between parents and nodes for the three links
|
||||
* along root->[db]->[bd]->c.
|
||||
*/
|
||||
static void rotate_right(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *d)
|
||||
{
|
||||
struct scoutfs_avl_node *gpa = node_ptr(root, d->parent);
|
||||
struct scoutfs_avl_node *b = node_ptr(root, d->left);
|
||||
struct scoutfs_avl_node *c = node_ptr(root, b->right);
|
||||
|
||||
set_parent_left_right(root, gpa, d, b);
|
||||
b->parent = node_off(root, gpa);
|
||||
|
||||
b->right = node_off(root, d);
|
||||
d->parent = node_off(root, b);
|
||||
|
||||
d->left = node_off(root, c);
|
||||
if (c)
|
||||
c->parent = node_off(root, d);
|
||||
|
||||
set_height(root, d);
|
||||
set_height(root, b);
|
||||
}
|
||||
|
||||
static void rotate_left(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *b)
|
||||
{
|
||||
struct scoutfs_avl_node *gpa = node_ptr(root, b->parent);
|
||||
struct scoutfs_avl_node *d = node_ptr(root, b->right);
|
||||
struct scoutfs_avl_node *c = node_ptr(root, d->left);
|
||||
|
||||
set_parent_left_right(root, gpa, b, d);
|
||||
d->parent = node_off(root, gpa);
|
||||
|
||||
d->left = node_off(root, b);
|
||||
b->parent = node_off(root, d);
|
||||
|
||||
b->right = node_off(root, c);
|
||||
if (c)
|
||||
c->parent = node_off(root, b);
|
||||
|
||||
set_height(root, b);
|
||||
set_height(root, d);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the balance factor for the given node and perform rotations if
|
||||
* its two child subtrees are too far out of balance. Return either the
|
||||
* node again or the root of the newly balanced subtree.
|
||||
*/
|
||||
static struct scoutfs_avl_node *
|
||||
rotate_imbalance(struct scoutfs_avl_root *root, struct scoutfs_avl_node *node)
|
||||
{
|
||||
int bal = node_balance(root, node);
|
||||
struct scoutfs_avl_node *child;
|
||||
|
||||
if (bal >= -1 && bal <= 1)
|
||||
return node;
|
||||
|
||||
if (bal > 0) {
|
||||
/* turn right-left case into right-right */
|
||||
child = node_ptr(root, node->right);
|
||||
if (node_balance(root, child) < 0)
|
||||
rotate_right(root, child);
|
||||
/* rotate left to address right-right */
|
||||
rotate_left(root, node);
|
||||
|
||||
} else {
|
||||
/* or do the mirror for the left- cases */
|
||||
child = node_ptr(root, node->left);
|
||||
if (node_balance(root, child) > 0)
|
||||
rotate_left(root, child);
|
||||
rotate_right(root, node);
|
||||
}
|
||||
|
||||
return node_ptr(root, node->parent);
|
||||
}
|
||||
|
||||
void scoutfs_avl_insert(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *parent,
|
||||
struct scoutfs_avl_node *node, int cmp)
|
||||
{
|
||||
node->parent = 0;
|
||||
node->left = 0;
|
||||
node->right = 0;
|
||||
set_height(root, node);
|
||||
memset(node->__pad, 0, sizeof(node->__pad));
|
||||
|
||||
if (parent == NULL) {
|
||||
root->node = node_off(root, node);
|
||||
node->parent = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
if (cmp < 0)
|
||||
parent->left = node_off(root, node);
|
||||
else
|
||||
parent->right = node_off(root, node);
|
||||
node->parent = node_off(root, parent);
|
||||
|
||||
while (parent) {
|
||||
set_height(root, parent);
|
||||
parent = rotate_imbalance(root, parent);
|
||||
parent = node_ptr(root, parent->parent);
|
||||
}
|
||||
}
|
||||
|
||||
static struct scoutfs_avl_node *avl_successor(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node)
|
||||
{
|
||||
node = node_ptr(root, node->right);
|
||||
while (node->left)
|
||||
node = node_ptr(root, node->left);
|
||||
|
||||
return node;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find a node next successor and then swap the positions of the two
|
||||
* nodes with each other in the tree. This is only tricky because the
|
||||
* successor can be a direct child of the node and if we weren't careful
|
||||
* we'd be modifying each of the nodes through the pointers between
|
||||
* them.
|
||||
*/
|
||||
static void swap_with_successor(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node)
|
||||
{
|
||||
struct scoutfs_avl_node *succ = avl_successor(root, node);
|
||||
struct scoutfs_avl_node *succ_par = node_ptr(root, succ->parent);
|
||||
struct scoutfs_avl_node *succ_right = node_ptr(root, succ->right);
|
||||
struct scoutfs_avl_node *parent;
|
||||
struct scoutfs_avl_node *left;
|
||||
struct scoutfs_avl_node *right;
|
||||
|
||||
/* Link old node's parent and left child with the successor */
|
||||
succ->parent = node->parent;
|
||||
parent = node_ptr(root, succ->parent);
|
||||
set_parent_left_right(root, parent, node, succ);
|
||||
succ->left = node->left;
|
||||
left = node_ptr(root, succ->left);
|
||||
if (left)
|
||||
left->parent = node_off(root, succ);
|
||||
|
||||
/*
|
||||
* Link the old node's right with successor and the old
|
||||
* successor's parent with the node, they could have pointed to
|
||||
* each other.
|
||||
*/
|
||||
if (succ_par == node) {
|
||||
succ->right = node_off(root, node);
|
||||
node->parent = node_off(root, succ);
|
||||
} else {
|
||||
succ->right = node->right;
|
||||
right = node_ptr(root, succ->right);
|
||||
if (right)
|
||||
right->parent = node_off(root, succ);
|
||||
set_parent_left_right(root, succ_par, succ, node);
|
||||
node->parent = node_off(root, succ_par);
|
||||
}
|
||||
|
||||
/* Link the old successor's right with the node, it can't have left */
|
||||
node->right = node_off(root, succ_right);
|
||||
if (succ_right)
|
||||
succ_right->parent = node_off(root, node);
|
||||
node->left = 0;
|
||||
|
||||
swap(node->height, succ->height);
|
||||
}
|
||||
|
||||
void scoutfs_avl_delete(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node)
|
||||
{
|
||||
struct scoutfs_avl_node *parent;
|
||||
struct scoutfs_avl_node *child;
|
||||
|
||||
if (node->left && node->right)
|
||||
swap_with_successor(root, node);
|
||||
|
||||
parent = node_ptr(root, node->parent);
|
||||
child = node_ptr(root, node->left ?: node->right);
|
||||
|
||||
set_parent_left_right(root, parent, node, child);
|
||||
if (child)
|
||||
child->parent = node->parent;
|
||||
|
||||
while (parent) {
|
||||
set_height(root, parent);
|
||||
parent = rotate_imbalance(root, parent);
|
||||
parent = node_ptr(root, parent->parent);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Move the contents of a node to a new node location in memory. The
|
||||
* logical position of the node in the tree does not change.
|
||||
*/
|
||||
void scoutfs_avl_relocate(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *to,
|
||||
struct scoutfs_avl_node *from)
|
||||
{
|
||||
struct scoutfs_avl_node *parent = node_ptr(root, from->parent);
|
||||
struct scoutfs_avl_node *left = node_ptr(root, from->left);
|
||||
struct scoutfs_avl_node *right = node_ptr(root, from->right);
|
||||
|
||||
set_parent_left_right(root, parent, from, to);
|
||||
to->parent = from->parent;
|
||||
to->left = from->left;
|
||||
if (left)
|
||||
left->parent = node_off(root, to);
|
||||
to->right = from->right;
|
||||
if (right)
|
||||
right->parent = node_off(root, to);
|
||||
to->height = from->height;
|
||||
}
|
||||
30
kmod/src/avl.h
Normal file
30
kmod/src/avl.h
Normal file
@@ -0,0 +1,30 @@
|
||||
#ifndef _SCOUTFS_AVL_H_
|
||||
#define _SCOUTFS_AVL_H_
|
||||
|
||||
#include "format.h"
|
||||
|
||||
typedef int (*scoutfs_avl_compare_t)(void *arg,
|
||||
struct scoutfs_avl_node *node);
|
||||
|
||||
struct scoutfs_avl_node *
|
||||
scoutfs_avl_search(struct scoutfs_avl_root *root,
|
||||
scoutfs_avl_compare_t compare, void *arg, int *cmp_ret,
|
||||
struct scoutfs_avl_node **par,
|
||||
struct scoutfs_avl_node **next,
|
||||
struct scoutfs_avl_node **prev);
|
||||
struct scoutfs_avl_node *scoutfs_avl_first(struct scoutfs_avl_root *root);
|
||||
struct scoutfs_avl_node *scoutfs_avl_last(struct scoutfs_avl_root *root);
|
||||
struct scoutfs_avl_node *scoutfs_avl_next(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node);
|
||||
struct scoutfs_avl_node *scoutfs_avl_prev(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node);
|
||||
void scoutfs_avl_insert(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *parent,
|
||||
struct scoutfs_avl_node *node, int cmp);
|
||||
void scoutfs_avl_delete(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *node);
|
||||
void scoutfs_avl_relocate(struct scoutfs_avl_root *root,
|
||||
struct scoutfs_avl_node *to,
|
||||
struct scoutfs_avl_node *from);
|
||||
|
||||
#endif
|
||||
220
kmod/src/block.c
220
kmod/src/block.c
@@ -19,7 +19,6 @@
|
||||
#include <linux/sched.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "super.h"
|
||||
@@ -46,7 +45,7 @@
|
||||
struct block_info {
|
||||
struct super_block *sb;
|
||||
spinlock_t lock;
|
||||
struct rb_root root;
|
||||
struct radix_tree_root radix;
|
||||
struct list_head lru_list;
|
||||
u64 lru_nr;
|
||||
u64 lru_move_counter;
|
||||
@@ -59,22 +58,20 @@ struct block_info {
|
||||
#define DECLARE_BLOCK_INFO(sb, name) \
|
||||
struct block_info *name = SCOUTFS_SB(sb)->block_info
|
||||
|
||||
enum {
|
||||
enum block_status_bits {
|
||||
BLOCK_BIT_UPTODATE = 0, /* contents consistent with media */
|
||||
BLOCK_BIT_NEW, /* newly allocated, contents undefined */
|
||||
BLOCK_BIT_DIRTY, /* dirty, writer will write */
|
||||
BLOCK_BIT_IO_BUSY, /* bios are in flight */
|
||||
BLOCK_BIT_ERROR, /* saw IO error */
|
||||
BLOCK_BIT_DELETED, /* has been deleted from rbtree */
|
||||
BLOCK_BIT_DELETED, /* has been deleted from radix tree */
|
||||
BLOCK_BIT_PAGE_ALLOC, /* page (possibly high order) allocation */
|
||||
BLOCK_BIT_VIRT, /* mapped virt allocation */
|
||||
BLOCK_BIT_CRC_VALID, /* crc has been verified */
|
||||
BLOCK_BIT_VISITED, /* used by callers to track blocks */
|
||||
};
|
||||
|
||||
struct block_private {
|
||||
struct scoutfs_block bl;
|
||||
struct rb_node node;
|
||||
struct super_block *sb;
|
||||
atomic_t refcount;
|
||||
union {
|
||||
@@ -108,18 +105,18 @@ do { \
|
||||
* be refactored away.
|
||||
*/
|
||||
|
||||
__le32 scoutfs_block_calc_crc(struct scoutfs_block_header *hdr)
|
||||
__le32 scoutfs_block_calc_crc(struct scoutfs_block_header *hdr, u32 size)
|
||||
{
|
||||
int off = offsetof(struct scoutfs_block_header, crc) +
|
||||
FIELD_SIZEOF(struct scoutfs_block_header, crc);
|
||||
u32 calc = crc32c(~0, (char *)hdr + off, SCOUTFS_BLOCK_SIZE - off);
|
||||
u32 calc = crc32c(~0, (char *)hdr + off, size - off);
|
||||
|
||||
return cpu_to_le32(calc);
|
||||
}
|
||||
|
||||
bool scoutfs_block_valid_crc(struct scoutfs_block_header *hdr)
|
||||
bool scoutfs_block_valid_crc(struct scoutfs_block_header *hdr, u32 size)
|
||||
{
|
||||
return hdr->crc == scoutfs_block_calc_crc(hdr);
|
||||
return hdr->crc == scoutfs_block_calc_crc(hdr, size);
|
||||
}
|
||||
|
||||
bool scoutfs_block_valid_ref(struct super_block *sb,
|
||||
@@ -132,22 +129,6 @@ bool scoutfs_block_valid_ref(struct super_block *sb,
|
||||
hdr->blkno == blkno;
|
||||
}
|
||||
|
||||
bool scoutfs_block_tas_visited(struct super_block *sb,
|
||||
struct scoutfs_block *bl)
|
||||
{
|
||||
struct block_private *bp = BLOCK_PRIVATE(bl);
|
||||
|
||||
return test_bit(BLOCK_BIT_VISITED, &bp->bits) != 0;
|
||||
}
|
||||
|
||||
void scoutfs_block_clear_visited(struct super_block *sb,
|
||||
struct scoutfs_block *bl)
|
||||
{
|
||||
struct block_private *bp = BLOCK_PRIVATE(bl);
|
||||
|
||||
clear_bit(BLOCK_BIT_VISITED, &bp->bits);
|
||||
}
|
||||
|
||||
static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
|
||||
{
|
||||
struct block_private *bp;
|
||||
@@ -157,19 +138,20 @@ static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
|
||||
* more careful with a partial page allocator when allocating
|
||||
* blocks and would make the lru per-page instead of per-block.
|
||||
*/
|
||||
BUILD_BUG_ON(PAGE_SIZE > SCOUTFS_BLOCK_SIZE);
|
||||
BUILD_BUG_ON(PAGE_SIZE > SCOUTFS_BLOCK_LG_SIZE);
|
||||
|
||||
bp = kzalloc(sizeof(struct block_private), GFP_NOFS);
|
||||
if (!bp)
|
||||
goto out;
|
||||
|
||||
bp->page = alloc_pages(GFP_NOFS, SCOUTFS_BLOCK_PAGE_ORDER);
|
||||
bp->page = alloc_pages(GFP_NOFS | __GFP_NOWARN,
|
||||
SCOUTFS_BLOCK_LG_PAGE_ORDER);
|
||||
if (bp->page) {
|
||||
scoutfs_inc_counter(sb, block_cache_alloc_page_order);
|
||||
set_bit(BLOCK_BIT_PAGE_ALLOC, &bp->bits);
|
||||
bp->bl.data = page_address(bp->page);
|
||||
} else {
|
||||
bp->virt = __vmalloc(SCOUTFS_BLOCK_SIZE,
|
||||
bp->virt = __vmalloc(SCOUTFS_BLOCK_LG_SIZE,
|
||||
GFP_NOFS | __GFP_HIGHMEM, PAGE_KERNEL);
|
||||
if (!bp->virt) {
|
||||
kfree(bp);
|
||||
@@ -183,7 +165,6 @@ static struct block_private *block_alloc(struct super_block *sb, u64 blkno)
|
||||
}
|
||||
|
||||
bp->bl.blkno = blkno;
|
||||
RB_CLEAR_NODE(&bp->node);
|
||||
bp->sb = sb;
|
||||
atomic_set(&bp->refcount, 1);
|
||||
INIT_LIST_HEAD(&bp->lru_entry);
|
||||
@@ -206,7 +187,7 @@ static void block_free(struct super_block *sb, struct block_private *bp)
|
||||
TRACE_BLOCK(free, bp);
|
||||
|
||||
if (test_bit(BLOCK_BIT_PAGE_ALLOC, &bp->bits))
|
||||
__free_pages(bp->page, SCOUTFS_BLOCK_PAGE_ORDER);
|
||||
__free_pages(bp->page, SCOUTFS_BLOCK_LG_PAGE_ORDER);
|
||||
else if (test_bit(BLOCK_BIT_VIRT, &bp->bits))
|
||||
vfree(bp->virt);
|
||||
else
|
||||
@@ -253,39 +234,9 @@ static void block_put(struct super_block *sb, struct block_private *bp)
|
||||
}
|
||||
}
|
||||
|
||||
static struct block_private *walk_block_rbtree(struct rb_root *root,
|
||||
u64 blkno,
|
||||
struct block_private *ins)
|
||||
{
|
||||
struct rb_node **node = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct block_private *bp;
|
||||
int cmp;
|
||||
|
||||
while (*node) {
|
||||
parent = *node;
|
||||
bp = container_of(*node, struct block_private, node);
|
||||
|
||||
cmp = scoutfs_cmp_u64s(bp->bl.blkno, blkno);
|
||||
if (cmp == 0)
|
||||
return bp;
|
||||
else if (cmp < 0)
|
||||
node = &(*node)->rb_left;
|
||||
else
|
||||
node = &(*node)->rb_right;
|
||||
}
|
||||
|
||||
if (ins) {
|
||||
rb_link_node(&ins->node, parent, node);
|
||||
rb_insert_color(&ins->node, root);
|
||||
return ins;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a new block into the cache. The caller holds the lock.
|
||||
* Add a new block into the cache. The caller holds the lock and has
|
||||
* preloaded the radix.
|
||||
*/
|
||||
static void block_insert(struct super_block *sb, struct block_private *bp,
|
||||
u64 blkno)
|
||||
@@ -294,10 +245,9 @@ static void block_insert(struct super_block *sb, struct block_private *bp,
|
||||
|
||||
assert_spin_locked(&binf->lock);
|
||||
BUG_ON(!list_empty(&bp->lru_entry));
|
||||
BUG_ON(!RB_EMPTY_NODE(&bp->node));
|
||||
|
||||
atomic_inc(&bp->refcount);
|
||||
walk_block_rbtree(&binf->root, blkno, bp);
|
||||
radix_tree_insert(&binf->radix, blkno, bp);
|
||||
list_add_tail(&bp->lru_entry, &binf->lru_list);
|
||||
bp->lru_moved = ++binf->lru_move_counter;
|
||||
binf->lru_nr++;
|
||||
@@ -345,10 +295,11 @@ static void block_remove(struct super_block *sb, struct block_private *bp)
|
||||
{
|
||||
DECLARE_BLOCK_INFO(sb, binf);
|
||||
|
||||
assert_spin_locked(&binf->lock);
|
||||
|
||||
if (!test_and_set_bit(BLOCK_BIT_DELETED, &bp->bits)) {
|
||||
BUG_ON(list_empty(&bp->lru_entry));
|
||||
rb_erase(&bp->node, &binf->root);
|
||||
RB_CLEAR_NODE(&bp->node);
|
||||
radix_tree_delete(&binf->radix, bp->bl.blkno);
|
||||
list_del_init(&bp->lru_entry);
|
||||
binf->lru_nr--;
|
||||
block_put(sb, bp);
|
||||
@@ -368,18 +319,19 @@ static void block_remove_all(struct super_block *sb)
|
||||
{
|
||||
DECLARE_BLOCK_INFO(sb, binf);
|
||||
struct block_private *bp;
|
||||
struct rb_node *node;
|
||||
|
||||
for (node = rb_first(&binf->root); node; ) {
|
||||
bp = container_of(node, struct block_private, node);
|
||||
node = rb_next(node);
|
||||
spin_lock(&binf->lock);
|
||||
|
||||
while (radix_tree_gang_lookup(&binf->radix, (void **)&bp, 0, 1) == 1) {
|
||||
wait_event(binf->waitq, !io_busy(bp));
|
||||
block_remove(sb, bp);
|
||||
}
|
||||
|
||||
spin_unlock(&binf->lock);
|
||||
|
||||
WARN_ON_ONCE(!list_empty(&binf->lru_list));
|
||||
WARN_ON_ONCE(binf->lru_nr != 0);
|
||||
WARN_ON_ONCE(!RB_EMPTY_ROOT(&binf->root));
|
||||
WARN_ON_ONCE(binf->radix.rnode != NULL);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -434,6 +386,7 @@ static void block_bio_end_io(struct bio *bio, int err)
|
||||
static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
int rw)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct bio *bio = NULL;
|
||||
struct blk_plug plug;
|
||||
struct page *page;
|
||||
@@ -441,7 +394,7 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
sector_t sector;
|
||||
int ret = 0;
|
||||
|
||||
sector = bp->bl.blkno << (SCOUTFS_BLOCK_SHIFT - 9);
|
||||
sector = bp->bl.blkno << (SCOUTFS_BLOCK_LG_SHIFT - 9);
|
||||
|
||||
WARN_ON_ONCE(bp->bl.blkno == U64_MAX);
|
||||
WARN_ON_ONCE(sector == U64_MAX || sector == 0);
|
||||
@@ -453,16 +406,16 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
|
||||
blk_start_plug(&plug);
|
||||
|
||||
for (off = 0; off < SCOUTFS_BLOCK_SIZE; off += PAGE_SIZE) {
|
||||
for (off = 0; off < SCOUTFS_BLOCK_LG_SIZE; off += PAGE_SIZE) {
|
||||
if (!bio) {
|
||||
bio = bio_alloc(GFP_NOFS, SCOUTFS_PAGES_PER_BLOCK);
|
||||
bio = bio_alloc(GFP_NOFS, SCOUTFS_BLOCK_LG_PAGES_PER);
|
||||
if (!bio) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
bio->bi_sector = sector + (off >> 9);
|
||||
bio->bi_bdev = sb->s_bdev;
|
||||
bio->bi_bdev = sbi->meta_bdev;
|
||||
bio->bi_end_io = block_bio_end_io;
|
||||
bio->bi_private = bp;
|
||||
|
||||
@@ -497,8 +450,8 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
|
||||
/*
|
||||
* Return a reference to a cached block in the system, allocating a new
|
||||
* block if one isn't found in the rbtree. Its contents are undefined
|
||||
* if it's newly allocated.
|
||||
* block if one isn't found in the radix. Its contents are undefined if
|
||||
* it's newly allocated.
|
||||
*/
|
||||
static struct block_private *block_get(struct super_block *sb, u64 blkno)
|
||||
{
|
||||
@@ -507,11 +460,11 @@ static struct block_private *block_get(struct super_block *sb, u64 blkno)
|
||||
struct block_private *bp;
|
||||
int ret;
|
||||
|
||||
spin_lock(&binf->lock);
|
||||
bp = walk_block_rbtree(&binf->root, blkno, NULL);
|
||||
rcu_read_lock();
|
||||
bp = radix_tree_lookup(&binf->radix, blkno);
|
||||
if (bp)
|
||||
atomic_inc(&bp->refcount);
|
||||
spin_unlock(&binf->lock);
|
||||
rcu_read_unlock();
|
||||
|
||||
/* drop failed reads that interrupted waiters abandoned */
|
||||
if (bp && (test_bit(BLOCK_BIT_ERROR, &bp->bits) &&
|
||||
@@ -530,15 +483,20 @@ static struct block_private *block_get(struct super_block *sb, u64 blkno)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* could refactor to insert in one walk */
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* could use slot instead of lookup/insert */
|
||||
spin_lock(&binf->lock);
|
||||
found = walk_block_rbtree(&binf->root, blkno, NULL);
|
||||
found = radix_tree_lookup(&binf->radix, blkno);
|
||||
if (found) {
|
||||
atomic_inc(&found->refcount);
|
||||
} else {
|
||||
block_insert(sb, bp, blkno);
|
||||
}
|
||||
spin_unlock(&binf->lock);
|
||||
radix_tree_preload_end();
|
||||
|
||||
if (found) {
|
||||
block_put(sb, bp);
|
||||
@@ -634,6 +592,7 @@ void scoutfs_block_invalidate(struct super_block *sb, struct scoutfs_block *bl)
|
||||
}
|
||||
}
|
||||
|
||||
/* This is only used for large metadata blocks */
|
||||
bool scoutfs_block_consistent_ref(struct super_block *sb,
|
||||
struct scoutfs_block *bl,
|
||||
__le64 seq, __le64 blkno, u32 magic)
|
||||
@@ -643,7 +602,8 @@ bool scoutfs_block_consistent_ref(struct super_block *sb,
|
||||
struct scoutfs_block_header *hdr = bl->data;
|
||||
|
||||
if (!test_bit(BLOCK_BIT_CRC_VALID, &bp->bits)) {
|
||||
if (hdr->crc != scoutfs_block_calc_crc(hdr))
|
||||
if (hdr->crc !=
|
||||
scoutfs_block_calc_crc(hdr, SCOUTFS_BLOCK_LG_SIZE))
|
||||
return false;
|
||||
set_bit(BLOCK_BIT_CRC_VALID, &bp->bits);
|
||||
}
|
||||
@@ -722,7 +682,7 @@ int scoutfs_block_writer_write(struct super_block *sb,
|
||||
/* checksum everything to reduce time between io submission merging */
|
||||
list_for_each_entry(bp, &wri->dirty_list, dirty_entry) {
|
||||
hdr = bp->bl.data;
|
||||
hdr->crc = scoutfs_block_calc_crc(hdr);
|
||||
hdr->crc = scoutfs_block_calc_crc(hdr, SCOUTFS_BLOCK_LG_SIZE);
|
||||
}
|
||||
|
||||
blk_start_plug(&plug);
|
||||
@@ -810,44 +770,6 @@ void scoutfs_block_writer_forget(struct super_block *sb,
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Change a cached block's location. We're careful to only change its
|
||||
* position in the rbtree. If we find another block existing at the new
|
||||
* location then we remove it from the cache and forget it if it was
|
||||
* dirty.
|
||||
*/
|
||||
void scoutfs_block_move(struct super_block *sb,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_block *bl, u64 blkno)
|
||||
{
|
||||
DECLARE_BLOCK_INFO(sb, binf);
|
||||
struct block_private *bp = BLOCK_PRIVATE(bl);
|
||||
struct block_private *existing = NULL;
|
||||
|
||||
spin_lock(&binf->lock);
|
||||
|
||||
existing = walk_block_rbtree(&binf->root, blkno, NULL);
|
||||
if (existing) {
|
||||
/* only nesting of binf and wri locks */
|
||||
if (test_bit(BLOCK_BIT_DIRTY, &bp->bits)) {
|
||||
spin_lock(&wri->lock);
|
||||
if (test_bit(BLOCK_BIT_DIRTY, &bp->bits))
|
||||
block_forget(sb, wri, bp);
|
||||
spin_unlock(&wri->lock);
|
||||
}
|
||||
block_remove(sb, existing);
|
||||
}
|
||||
|
||||
rb_erase(&bp->node, &binf->root);
|
||||
RB_CLEAR_NODE(&bp->node);
|
||||
bp->bl.blkno = blkno;
|
||||
walk_block_rbtree(&binf->root, blkno, bp);
|
||||
|
||||
TRACE_BLOCK(move, bp);
|
||||
|
||||
spin_unlock(&binf->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller has ensured that no more dirtying will take place. This
|
||||
* helps the caller avoid doing a bunch of work before calling into the
|
||||
@@ -866,7 +788,7 @@ bool scoutfs_block_writer_has_dirty(struct super_block *sb,
|
||||
u64 scoutfs_block_writer_dirty_bytes(struct super_block *sb,
|
||||
struct scoutfs_block_writer *wri)
|
||||
{
|
||||
return wri->nr_dirty_blocks * SCOUTFS_BLOCK_SIZE;
|
||||
return wri->nr_dirty_blocks * SCOUTFS_BLOCK_LG_SIZE;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -916,12 +838,9 @@ static int block_shrink(struct shrinker *shrink, struct shrink_control *sc)
|
||||
spin_unlock(&binf->lock);
|
||||
|
||||
out:
|
||||
return min_t(u64, binf->lru_nr * SCOUTFS_PAGES_PER_BLOCK, INT_MAX);
|
||||
return min_t(u64, binf->lru_nr * SCOUTFS_BLOCK_LG_PAGES_PER, INT_MAX);
|
||||
}
|
||||
|
||||
#define SCOUTFS_SM_BLOCK_SHIFT 12
|
||||
#define SCOUTFS_SM_BLOCK_SIZE (1 << SCOUTFS_SM_BLOCK_SHIFT)
|
||||
|
||||
struct sm_block_completion {
|
||||
struct completion comp;
|
||||
int err;
|
||||
@@ -946,7 +865,7 @@ static void sm_block_bio_end_io(struct bio *bio, int err)
|
||||
* only layer that sees the full block buffer so we pass the calculated
|
||||
* crc to the caller for them to check in their context.
|
||||
*/
|
||||
static int sm_block_io(struct super_block *sb, int rw, u64 blkno,
|
||||
static int sm_block_io(struct block_device *bdev, int rw, u64 blkno,
|
||||
struct scoutfs_block_header *hdr, size_t len,
|
||||
__le32 *blk_crc)
|
||||
{
|
||||
@@ -956,11 +875,9 @@ static int sm_block_io(struct super_block *sb, int rw, u64 blkno,
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
||||
BUILD_BUG_ON(PAGE_SIZE < SCOUTFS_SM_BLOCK_SIZE);
|
||||
/* block calc crc is assuming block size, they'll be different later */
|
||||
BUILD_BUG_ON(SCOUTFS_SM_BLOCK_SIZE != SCOUTFS_BLOCK_SIZE);
|
||||
BUILD_BUG_ON(PAGE_SIZE < SCOUTFS_BLOCK_SM_SIZE);
|
||||
|
||||
if (WARN_ON_ONCE(len > SCOUTFS_SM_BLOCK_SIZE) ||
|
||||
if (WARN_ON_ONCE(len > SCOUTFS_BLOCK_SM_SIZE) ||
|
||||
WARN_ON_ONCE(!(rw & WRITE) && !blk_crc))
|
||||
return -EINVAL;
|
||||
|
||||
@@ -972,10 +889,11 @@ static int sm_block_io(struct super_block *sb, int rw, u64 blkno,
|
||||
|
||||
if (rw & WRITE) {
|
||||
memcpy(pg_hdr, hdr, len);
|
||||
if (len < SCOUTFS_SM_BLOCK_SIZE)
|
||||
if (len < SCOUTFS_BLOCK_SM_SIZE)
|
||||
memset((char *)pg_hdr + len, 0,
|
||||
SCOUTFS_SM_BLOCK_SIZE - len);
|
||||
pg_hdr->crc = scoutfs_block_calc_crc(pg_hdr);
|
||||
SCOUTFS_BLOCK_SM_SIZE - len);
|
||||
pg_hdr->crc = scoutfs_block_calc_crc(pg_hdr,
|
||||
SCOUTFS_BLOCK_SM_SIZE);
|
||||
}
|
||||
|
||||
bio = bio_alloc(GFP_NOFS, 1);
|
||||
@@ -984,11 +902,11 @@ static int sm_block_io(struct super_block *sb, int rw, u64 blkno,
|
||||
goto out;
|
||||
}
|
||||
|
||||
bio->bi_sector = blkno << (SCOUTFS_SM_BLOCK_SHIFT - 9);
|
||||
bio->bi_bdev = sb->s_bdev;
|
||||
bio->bi_sector = blkno << (SCOUTFS_BLOCK_SM_SHIFT - 9);
|
||||
bio->bi_bdev = bdev;
|
||||
bio->bi_end_io = sm_block_bio_end_io;
|
||||
bio->bi_private = &sbc;
|
||||
bio_add_page(bio, page, SCOUTFS_SM_BLOCK_SIZE, 0);
|
||||
bio_add_page(bio, page, SCOUTFS_BLOCK_SM_SIZE, 0);
|
||||
|
||||
init_completion(&sbc.comp);
|
||||
sbc.err = 0;
|
||||
@@ -1000,32 +918,44 @@ static int sm_block_io(struct super_block *sb, int rw, u64 blkno,
|
||||
|
||||
if (ret == 0 && !(rw & WRITE)) {
|
||||
memcpy(hdr, pg_hdr, len);
|
||||
*blk_crc = scoutfs_block_calc_crc(pg_hdr);
|
||||
*blk_crc = scoutfs_block_calc_crc(pg_hdr,
|
||||
SCOUTFS_BLOCK_SM_SIZE);
|
||||
}
|
||||
out:
|
||||
__free_page(page);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_block_read_sm(struct super_block *sb, u64 blkno,
|
||||
int scoutfs_block_read_sm(struct super_block *sb,
|
||||
struct block_device *bdev, u64 blkno,
|
||||
struct scoutfs_block_header *hdr, size_t len,
|
||||
__le32 *blk_crc)
|
||||
{
|
||||
return sm_block_io(sb, READ, blkno, hdr, len, blk_crc);
|
||||
return sm_block_io(bdev, READ, blkno, hdr, len, blk_crc);
|
||||
}
|
||||
|
||||
int scoutfs_block_write_sm(struct super_block *sb, u64 blkno,
|
||||
int scoutfs_block_write_sm(struct super_block *sb,
|
||||
struct block_device *bdev, u64 blkno,
|
||||
struct scoutfs_block_header *hdr, size_t len)
|
||||
{
|
||||
return sm_block_io(sb, WRITE, blkno, hdr, len, NULL);
|
||||
return sm_block_io(bdev, WRITE, blkno, hdr, len, NULL);
|
||||
}
|
||||
|
||||
int scoutfs_block_setup(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct block_info *binf;
|
||||
loff_t size;
|
||||
int ret;
|
||||
|
||||
/* we store blknos in longs in the radix */
|
||||
size = i_size_read(sb->s_bdev->bd_inode);
|
||||
if ((size >> SCOUTFS_BLOCK_LG_SHIFT) >= LONG_MAX) {
|
||||
scoutfs_err(sb, "Cant reference all blocks in %llu byte device with %u bit long radix tree indexes",
|
||||
size, BITS_PER_LONG);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
binf = kzalloc(sizeof(struct block_info), GFP_KERNEL);
|
||||
if (!binf) {
|
||||
ret = -ENOMEM;
|
||||
@@ -1034,7 +964,7 @@ int scoutfs_block_setup(struct super_block *sb)
|
||||
|
||||
binf->sb = sb;
|
||||
spin_lock_init(&binf->lock);
|
||||
binf->root = RB_ROOT;
|
||||
INIT_RADIX_TREE(&binf->radix, GFP_ATOMIC); /* insertion preloads */
|
||||
INIT_LIST_HEAD(&binf->lru_list);
|
||||
init_waitqueue_head(&binf->waitq);
|
||||
binf->shrinker.shrink = block_shrink;
|
||||
|
||||
@@ -10,17 +10,14 @@ struct scoutfs_block_writer {
|
||||
struct scoutfs_block {
|
||||
u64 blkno;
|
||||
void *data;
|
||||
void *priv;
|
||||
};
|
||||
|
||||
__le32 scoutfs_block_calc_crc(struct scoutfs_block_header *hdr);
|
||||
bool scoutfs_block_valid_crc(struct scoutfs_block_header *hdr);
|
||||
__le32 scoutfs_block_calc_crc(struct scoutfs_block_header *hdr, u32 size);
|
||||
bool scoutfs_block_valid_crc(struct scoutfs_block_header *hdr, u32 size);
|
||||
bool scoutfs_block_valid_ref(struct super_block *sb,
|
||||
struct scoutfs_block_header *hdr,
|
||||
__le64 seq, __le64 blkno);
|
||||
bool scoutfs_block_tas_visited(struct super_block *sb,
|
||||
struct scoutfs_block *bl);
|
||||
void scoutfs_block_clear_visited(struct super_block *sb,
|
||||
struct scoutfs_block *bl);
|
||||
|
||||
struct scoutfs_block *scoutfs_block_create(struct super_block *sb, u64 blkno);
|
||||
struct scoutfs_block *scoutfs_block_read(struct super_block *sb, u64 blkno);
|
||||
@@ -44,18 +41,17 @@ void scoutfs_block_writer_forget_all(struct super_block *sb,
|
||||
void scoutfs_block_writer_forget(struct super_block *sb,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_block *bl);
|
||||
void scoutfs_block_move(struct super_block *sb,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_block *bl, u64 blkno);
|
||||
bool scoutfs_block_writer_has_dirty(struct super_block *sb,
|
||||
struct scoutfs_block_writer *wri);
|
||||
u64 scoutfs_block_writer_dirty_bytes(struct super_block *sb,
|
||||
struct scoutfs_block_writer *wri);
|
||||
|
||||
int scoutfs_block_read_sm(struct super_block *sb, u64 blkno,
|
||||
int scoutfs_block_read_sm(struct super_block *sb,
|
||||
struct block_device *bdev, u64 blkno,
|
||||
struct scoutfs_block_header *hdr, size_t len,
|
||||
__le32 *blk_crc);
|
||||
int scoutfs_block_write_sm(struct super_block *sb, u64 blkno,
|
||||
int scoutfs_block_write_sm(struct super_block *sb,
|
||||
struct block_device *bdev, u64 blkno,
|
||||
struct scoutfs_block_header *hdr, size_t len);
|
||||
|
||||
int scoutfs_block_setup(struct super_block *sb);
|
||||
|
||||
1566
kmod/src/btree.c
1566
kmod/src/btree.c
File diff suppressed because it is too large
Load Diff
@@ -3,15 +3,14 @@
|
||||
|
||||
#include <linux/uio.h>
|
||||
|
||||
struct scoutfs_radix_allocator;
|
||||
struct scoutfs_alloc;
|
||||
struct scoutfs_block_writer;
|
||||
struct scoutfs_block;
|
||||
|
||||
struct scoutfs_btree_item_ref {
|
||||
struct super_block *sb;
|
||||
struct scoutfs_block *bl;
|
||||
void *key;
|
||||
unsigned key_len;
|
||||
struct scoutfs_key *key;
|
||||
void *val;
|
||||
unsigned val_len;
|
||||
};
|
||||
@@ -19,50 +18,69 @@ struct scoutfs_btree_item_ref {
|
||||
#define SCOUTFS_BTREE_ITEM_REF(name) \
|
||||
struct scoutfs_btree_item_ref name = {NULL,}
|
||||
|
||||
/* caller gives an item to the callback */
|
||||
typedef int (*scoutfs_btree_item_cb)(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
void *val, int val_len, void *arg);
|
||||
|
||||
int scoutfs_btree_lookup(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
void *key, unsigned key_len,
|
||||
/* simple singly-linked list of items */
|
||||
struct scoutfs_btree_item_list {
|
||||
struct scoutfs_btree_item_list *next;
|
||||
struct scoutfs_key key;
|
||||
int val_len;
|
||||
u8 val[0];
|
||||
};
|
||||
|
||||
int scoutfs_btree_lookup(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_item_ref *iref);
|
||||
int scoutfs_btree_insert(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
void *key, unsigned key_len,
|
||||
struct scoutfs_key *key,
|
||||
void *val, unsigned val_len);
|
||||
int scoutfs_btree_update(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
void *key, unsigned key_len,
|
||||
struct scoutfs_key *key,
|
||||
void *val, unsigned val_len);
|
||||
int scoutfs_btree_force(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
void *key, unsigned key_len,
|
||||
struct scoutfs_key *key,
|
||||
void *val, unsigned val_len);
|
||||
int scoutfs_btree_delete(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
void *key, unsigned key_len);
|
||||
struct scoutfs_key *key);
|
||||
int scoutfs_btree_next(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
void *key, unsigned key_len,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_item_ref *iref);
|
||||
int scoutfs_btree_after(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
void *key, unsigned key_len,
|
||||
struct scoutfs_btree_item_ref *iref);
|
||||
int scoutfs_btree_prev(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
void *key, unsigned key_len,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_item_ref *iref);
|
||||
int scoutfs_btree_before(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
void *key, unsigned key_len,
|
||||
struct scoutfs_btree_item_ref *iref);
|
||||
int scoutfs_btree_dirty(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
void *key, unsigned key_len);
|
||||
struct scoutfs_key *key);
|
||||
|
||||
int scoutfs_btree_read_items(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_btree_item_cb cb, void *arg);
|
||||
int scoutfs_btree_insert_list(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_btree_item_list *lst);
|
||||
|
||||
void scoutfs_btree_put_iref(struct scoutfs_btree_item_ref *iref);
|
||||
|
||||
|
||||
@@ -108,19 +108,27 @@ int scoutfs_client_commit_log_trees(struct super_block *sb,
|
||||
lt, sizeof(*lt), NULL, 0);
|
||||
}
|
||||
|
||||
int scoutfs_client_get_roots(struct super_block *sb,
|
||||
struct scoutfs_net_roots *roots)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_ROOTS,
|
||||
NULL, 0, roots, sizeof(*roots));
|
||||
}
|
||||
|
||||
int scoutfs_client_advance_seq(struct super_block *sb, u64 *seq)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
__le64 before = cpu_to_le64p(seq);
|
||||
__le64 after;
|
||||
__le64 leseq;
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_ADVANCE_SEQ,
|
||||
&before, sizeof(before),
|
||||
&after, sizeof(after));
|
||||
NULL, 0, &leseq, sizeof(leseq));
|
||||
if (ret == 0)
|
||||
*seq = le64_to_cpu(after);
|
||||
*seq = le64_to_cpu(leseq);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -140,24 +148,13 @@ int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_client_statfs(struct super_block *sb,
|
||||
struct scoutfs_net_statfs *nstatfs)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_STATFS, NULL, 0,
|
||||
nstatfs,
|
||||
sizeof(struct scoutfs_net_statfs));
|
||||
}
|
||||
|
||||
/* process an incoming grant response from the server */
|
||||
static int client_lock_response(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
void *resp, unsigned int resp_len,
|
||||
int error, void *data)
|
||||
{
|
||||
if (resp_len != sizeof(struct scoutfs_net_lock))
|
||||
if (resp_len != sizeof(struct scoutfs_net_lock_grant_response))
|
||||
return -EINVAL;
|
||||
|
||||
/* XXX error? */
|
||||
@@ -200,6 +197,28 @@ int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
|
||||
net_id, 0, nlr, bytes);
|
||||
}
|
||||
|
||||
/* Find srch files that need to be compacted. */
|
||||
int scoutfs_client_srch_get_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *sc)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
|
||||
NULL, 0, sc, sizeof(*sc));
|
||||
}
|
||||
|
||||
/* Commit the result of a srch file compaction. */
|
||||
int scoutfs_client_srch_commit_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *res)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
|
||||
res, sizeof(*res), NULL, 0);
|
||||
}
|
||||
|
||||
/* The client is receiving a invalidation request from the server */
|
||||
static int client_lock(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn, u8 cmd, u64 id,
|
||||
@@ -261,10 +280,10 @@ static int client_greeting(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (gr->format_hash != super->format_hash) {
|
||||
if (gr->version != super->version) {
|
||||
scoutfs_warn(sb, "server sent format 0x%llx, client has 0x%llx",
|
||||
le64_to_cpu(gr->format_hash),
|
||||
le64_to_cpu(super->format_hash));
|
||||
le64_to_cpu(gr->version),
|
||||
le64_to_cpu(super->version));
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@@ -373,7 +392,7 @@ static void scoutfs_client_connect_worker(struct work_struct *work)
|
||||
|
||||
/* send a greeting to verify endpoints of each connection */
|
||||
greet.fsid = super->hdr.fsid;
|
||||
greet.format_hash = super->format_hash;
|
||||
greet.version = super->version;
|
||||
greet.server_term = cpu_to_le64(client->server_term);
|
||||
greet.unmount_barrier = cpu_to_le64(client->greeting_umb);
|
||||
greet.rid = cpu_to_le64(sbi->rid);
|
||||
|
||||
@@ -7,17 +7,21 @@ int scoutfs_client_get_log_trees(struct super_block *sb,
|
||||
struct scoutfs_log_trees *lt);
|
||||
int scoutfs_client_commit_log_trees(struct super_block *sb,
|
||||
struct scoutfs_log_trees *lt);
|
||||
int scoutfs_client_get_roots(struct super_block *sb,
|
||||
struct scoutfs_net_roots *roots);
|
||||
u64 *scoutfs_client_bulk_alloc(struct super_block *sb);
|
||||
int scoutfs_client_advance_seq(struct super_block *sb, u64 *seq);
|
||||
int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq);
|
||||
int scoutfs_client_statfs(struct super_block *sb,
|
||||
struct scoutfs_net_statfs *nstatfs);
|
||||
int scoutfs_client_lock_request(struct super_block *sb,
|
||||
struct scoutfs_net_lock *nl);
|
||||
int scoutfs_client_lock_response(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_net_lock *nl);
|
||||
int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_net_lock_recover *nlr);
|
||||
int scoutfs_client_srch_get_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *sc);
|
||||
int scoutfs_client_srch_commit_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *res);
|
||||
|
||||
int scoutfs_client_setup(struct super_block *sb);
|
||||
void scoutfs_client_destroy(struct super_block *sb);
|
||||
|
||||
319
kmod/src/count.h
319
kmod/src/count.h
@@ -1,319 +0,0 @@
|
||||
#ifndef _SCOUTFS_COUNT_H_
|
||||
#define _SCOUTFS_COUNT_H_
|
||||
|
||||
/*
|
||||
* Our estimate of the space consumed while dirtying items is based on
|
||||
* the number of items and the size of their values.
|
||||
*
|
||||
* The estimate is still a read-only input to entering the transaction.
|
||||
* We'd like to use it as a clean rhs arg to hold_trans. We define SIC_
|
||||
* functions which return the count struct. This lets us have a single
|
||||
* arg and avoid bugs in initializing and passing in struct pointers
|
||||
* from callers. The internal __count functions are used compose an
|
||||
* estimate out of the sets of items it manipulates. We program in much
|
||||
* clearer C instead of in the preprocessor.
|
||||
*
|
||||
* Compilers are able to collapse the inlines into constants for the
|
||||
* constant estimates.
|
||||
*/
|
||||
|
||||
struct scoutfs_item_count {
|
||||
signed items;
|
||||
signed vals;
|
||||
};
|
||||
|
||||
/* The caller knows exactly what they're doing. */
|
||||
static inline const struct scoutfs_item_count SIC_EXACT(signed items,
|
||||
signed vals)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {
|
||||
.items = items,
|
||||
.vals = vals,
|
||||
};
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocating an inode creates a new set of indexed items.
|
||||
*/
|
||||
static inline void __count_alloc_inode(struct scoutfs_item_count *cnt)
|
||||
{
|
||||
const int nr_indices = SCOUTFS_INODE_INDEX_NR;
|
||||
|
||||
cnt->items += 1 + nr_indices;
|
||||
cnt->vals += sizeof(struct scoutfs_inode);
|
||||
}
|
||||
|
||||
/*
|
||||
* Dirtying an inode dirties the inode item and can delete and create
|
||||
* the full set of indexed items.
|
||||
*/
|
||||
static inline void __count_dirty_inode(struct scoutfs_item_count *cnt)
|
||||
{
|
||||
const int nr_indices = 2 * SCOUTFS_INODE_INDEX_NR;
|
||||
|
||||
cnt->items += 1 + nr_indices;
|
||||
cnt->vals += sizeof(struct scoutfs_inode);
|
||||
}
|
||||
|
||||
static inline const struct scoutfs_item_count SIC_ALLOC_INODE(void)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
|
||||
__count_alloc_inode(&cnt);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static inline const struct scoutfs_item_count SIC_DIRTY_INODE(void)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
|
||||
__count_dirty_inode(&cnt);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Directory entries are stored in three items.
|
||||
*/
|
||||
static inline void __count_dirents(struct scoutfs_item_count *cnt,
|
||||
unsigned name_len)
|
||||
{
|
||||
cnt->items += 3;
|
||||
cnt->vals += 3 * offsetof(struct scoutfs_dirent, name[name_len]);
|
||||
}
|
||||
|
||||
static inline void __count_sym_target(struct scoutfs_item_count *cnt,
|
||||
unsigned size)
|
||||
{
|
||||
unsigned nr = DIV_ROUND_UP(size, SCOUTFS_MAX_VAL_SIZE);
|
||||
|
||||
cnt->items += nr;
|
||||
cnt->vals += size;
|
||||
}
|
||||
|
||||
static inline void __count_orphan(struct scoutfs_item_count *cnt)
|
||||
{
|
||||
|
||||
cnt->items += 1;
|
||||
}
|
||||
|
||||
static inline void __count_mknod(struct scoutfs_item_count *cnt,
|
||||
unsigned name_len)
|
||||
{
|
||||
__count_alloc_inode(cnt);
|
||||
__count_dirents(cnt, name_len);
|
||||
__count_dirty_inode(cnt);
|
||||
}
|
||||
|
||||
static inline const struct scoutfs_item_count SIC_MKNOD(unsigned name_len)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
|
||||
__count_mknod(&cnt, name_len);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Dropping the inode deletes all its items. Potentially enormous numbers
|
||||
* of items (data mapping, xattrs) are deleted in their own transactions.
|
||||
*/
|
||||
static inline const struct scoutfs_item_count SIC_DROP_INODE(int mode,
|
||||
u64 size)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
|
||||
if (S_ISLNK(mode))
|
||||
__count_sym_target(&cnt, size);
|
||||
__count_dirty_inode(&cnt);
|
||||
__count_orphan(&cnt);
|
||||
|
||||
cnt.vals = 0;
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static inline const struct scoutfs_item_count SIC_LINK(unsigned name_len)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
|
||||
__count_dirents(&cnt, name_len);
|
||||
__count_dirty_inode(&cnt);
|
||||
__count_dirty_inode(&cnt);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Unlink can add orphan items.
|
||||
*/
|
||||
static inline const struct scoutfs_item_count SIC_UNLINK(unsigned name_len)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
|
||||
__count_dirents(&cnt, name_len);
|
||||
__count_dirty_inode(&cnt);
|
||||
__count_dirty_inode(&cnt);
|
||||
__count_orphan(&cnt);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
static inline const struct scoutfs_item_count SIC_SYMLINK(unsigned name_len,
|
||||
unsigned size)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
|
||||
__count_mknod(&cnt, name_len);
|
||||
__count_sym_target(&cnt, size);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* This assumes the worst case of a rename between directories that
|
||||
* unlinks an existing target. That'll be worse than the common case
|
||||
* by a few hundred bytes.
|
||||
*/
|
||||
static inline const struct scoutfs_item_count SIC_RENAME(unsigned old_len,
|
||||
unsigned new_len)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
|
||||
/* dirty dirs and inodes */
|
||||
__count_dirty_inode(&cnt);
|
||||
__count_dirty_inode(&cnt);
|
||||
__count_dirty_inode(&cnt);
|
||||
__count_dirty_inode(&cnt);
|
||||
|
||||
/* unlink old and new, link new */
|
||||
__count_dirents(&cnt, old_len);
|
||||
__count_dirents(&cnt, new_len);
|
||||
__count_dirents(&cnt, new_len);
|
||||
|
||||
/* orphan the existing target */
|
||||
__count_orphan(&cnt);
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Creating an xattr results in a dirty set of items with values that
|
||||
* store the xattr header, name, and value. There's always at least one
|
||||
* item with the header and name. Any previously existing items are
|
||||
* deleted which dirties their key but removes their value. The two
|
||||
* sets of items are indexed by different ids so their items don't
|
||||
* overlap. If the xattr name is indexed then we modify one xattr index
|
||||
* item.
|
||||
*/
|
||||
static inline const struct scoutfs_item_count SIC_XATTR_SET(unsigned old_parts,
|
||||
bool creating,
|
||||
unsigned name_len,
|
||||
unsigned size,
|
||||
bool indexed)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
unsigned int new_parts;
|
||||
|
||||
__count_dirty_inode(&cnt);
|
||||
|
||||
if (old_parts)
|
||||
cnt.items += old_parts;
|
||||
if (indexed)
|
||||
cnt.items++;
|
||||
|
||||
if (creating) {
|
||||
new_parts = SCOUTFS_XATTR_NR_PARTS(name_len, size);
|
||||
|
||||
cnt.items += new_parts;
|
||||
cnt.vals += sizeof(struct scoutfs_xattr) + name_len + size;
|
||||
}
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* write_begin can have to allocate all the blocks in the page and can
|
||||
* have to add a big allocation from the server to do so:
|
||||
* - merge added free extents from the server
|
||||
* - remove a free extent per block
|
||||
* - remove an offline extent for every other block
|
||||
* - add a file extent per block
|
||||
*/
|
||||
static inline const struct scoutfs_item_count SIC_WRITE_BEGIN(void)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
unsigned nr_free = (1 + SCOUTFS_BLOCKS_PER_PAGE) * 3;
|
||||
unsigned nr_file = (DIV_ROUND_UP(SCOUTFS_BLOCKS_PER_PAGE, 2) +
|
||||
SCOUTFS_BLOCKS_PER_PAGE) * 3;
|
||||
|
||||
__count_dirty_inode(&cnt);
|
||||
|
||||
cnt.items += nr_free + nr_file;
|
||||
cnt.vals += nr_file;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Truncating an extent can:
|
||||
* - delete existing file extent,
|
||||
* - create two surrounding file extents,
|
||||
* - add an offline file extent,
|
||||
* - delete two existing free extents
|
||||
* - create a merged free extent
|
||||
*/
|
||||
static inline const struct scoutfs_item_count
|
||||
SIC_TRUNC_EXTENT(struct inode *inode)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
unsigned int nr_file = 1 + 2 + 1;
|
||||
unsigned int nr_free = (2 + 1) * 2;
|
||||
|
||||
if (inode)
|
||||
__count_dirty_inode(&cnt);
|
||||
|
||||
cnt.items += nr_file + nr_free;
|
||||
cnt.vals += nr_file;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fallocating an extent can, at most:
|
||||
* - allocate from the server: delete two free and insert merged
|
||||
* - free an allocated extent: delete one and create two split
|
||||
* - remove an unallocated file extent: delete one and create two split
|
||||
* - add an fallocated flie extent: delete two and inset one merged
|
||||
*/
|
||||
static inline const struct scoutfs_item_count SIC_FALLOCATE_ONE(void)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
unsigned int nr_free = ((1 + 2) * 2) * 2;
|
||||
unsigned int nr_file = (1 + 2) * 2;
|
||||
|
||||
__count_dirty_inode(&cnt);
|
||||
|
||||
cnt.items += nr_free + nr_file;
|
||||
cnt.vals += nr_file;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
/*
|
||||
* ioc_setattr_more can dirty the inode and add a single offline extent.
|
||||
*/
|
||||
static inline const struct scoutfs_item_count SIC_SETATTR_MORE(void)
|
||||
{
|
||||
struct scoutfs_item_count cnt = {0,};
|
||||
|
||||
__count_dirty_inode(&cnt);
|
||||
|
||||
cnt.items++;
|
||||
|
||||
return cnt;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -12,6 +12,15 @@
|
||||
* other places by this macro. Don't forget to update LAST_COUNTER.
|
||||
*/
|
||||
#define EXPAND_EACH_COUNTER \
|
||||
EXPAND_COUNTER(alloc_alloc_data) \
|
||||
EXPAND_COUNTER(alloc_alloc_meta) \
|
||||
EXPAND_COUNTER(alloc_free_data) \
|
||||
EXPAND_COUNTER(alloc_free_meta) \
|
||||
EXPAND_COUNTER(alloc_list_avail_lo) \
|
||||
EXPAND_COUNTER(alloc_list_freed_hi) \
|
||||
EXPAND_COUNTER(alloc_move) \
|
||||
EXPAND_COUNTER(alloc_moved_extent) \
|
||||
EXPAND_COUNTER(alloc_stale_cached_list_block) \
|
||||
EXPAND_COUNTER(block_cache_access) \
|
||||
EXPAND_COUNTER(block_cache_alloc_failure) \
|
||||
EXPAND_COUNTER(block_cache_alloc_page_order) \
|
||||
@@ -22,8 +31,23 @@
|
||||
EXPAND_COUNTER(block_cache_invalidate) \
|
||||
EXPAND_COUNTER(block_cache_lru_move) \
|
||||
EXPAND_COUNTER(block_cache_shrink) \
|
||||
EXPAND_COUNTER(btree_compact_values) \
|
||||
EXPAND_COUNTER(btree_compact_values_enomem) \
|
||||
EXPAND_COUNTER(btree_delete) \
|
||||
EXPAND_COUNTER(btree_dirty) \
|
||||
EXPAND_COUNTER(btree_force) \
|
||||
EXPAND_COUNTER(btree_join) \
|
||||
EXPAND_COUNTER(btree_insert) \
|
||||
EXPAND_COUNTER(btree_leaf_item_hash_search) \
|
||||
EXPAND_COUNTER(btree_lookup) \
|
||||
EXPAND_COUNTER(btree_next) \
|
||||
EXPAND_COUNTER(btree_prev) \
|
||||
EXPAND_COUNTER(btree_read_error) \
|
||||
EXPAND_COUNTER(btree_split) \
|
||||
EXPAND_COUNTER(btree_stale_read) \
|
||||
EXPAND_COUNTER(btree_update) \
|
||||
EXPAND_COUNTER(btree_walk) \
|
||||
EXPAND_COUNTER(btree_walk_restart) \
|
||||
EXPAND_COUNTER(client_farewell_error) \
|
||||
EXPAND_COUNTER(corrupt_btree_block_level) \
|
||||
EXPAND_COUNTER(corrupt_btree_no_child_ref) \
|
||||
@@ -34,6 +58,8 @@
|
||||
EXPAND_COUNTER(corrupt_symlink_inode_size) \
|
||||
EXPAND_COUNTER(corrupt_symlink_missing_item) \
|
||||
EXPAND_COUNTER(corrupt_symlink_not_null_term) \
|
||||
EXPAND_COUNTER(data_fallocate_enobufs_retry) \
|
||||
EXPAND_COUNTER(data_write_begin_enobufs_retry) \
|
||||
EXPAND_COUNTER(dentry_revalidate_error) \
|
||||
EXPAND_COUNTER(dentry_revalidate_invalid) \
|
||||
EXPAND_COUNTER(dentry_revalidate_locked) \
|
||||
@@ -42,25 +68,65 @@
|
||||
EXPAND_COUNTER(dentry_revalidate_root) \
|
||||
EXPAND_COUNTER(dentry_revalidate_valid) \
|
||||
EXPAND_COUNTER(dir_backref_excessive_retries) \
|
||||
EXPAND_COUNTER(ext_op_insert) \
|
||||
EXPAND_COUNTER(ext_op_next) \
|
||||
EXPAND_COUNTER(ext_op_remove) \
|
||||
EXPAND_COUNTER(forest_bloom_fail) \
|
||||
EXPAND_COUNTER(forest_bloom_pass) \
|
||||
EXPAND_COUNTER(forest_read_items) \
|
||||
EXPAND_COUNTER(forest_roots_next_hint) \
|
||||
EXPAND_COUNTER(forest_set_bloom_bits) \
|
||||
EXPAND_COUNTER(item_clear_dirty) \
|
||||
EXPAND_COUNTER(item_create) \
|
||||
EXPAND_COUNTER(item_delete) \
|
||||
EXPAND_COUNTER(item_dirty) \
|
||||
EXPAND_COUNTER(item_invalidate) \
|
||||
EXPAND_COUNTER(item_invalidate_page) \
|
||||
EXPAND_COUNTER(item_lookup) \
|
||||
EXPAND_COUNTER(item_mark_dirty) \
|
||||
EXPAND_COUNTER(item_next) \
|
||||
EXPAND_COUNTER(item_page_accessed) \
|
||||
EXPAND_COUNTER(item_page_alloc) \
|
||||
EXPAND_COUNTER(item_page_clear_dirty) \
|
||||
EXPAND_COUNTER(item_page_compact) \
|
||||
EXPAND_COUNTER(item_page_free) \
|
||||
EXPAND_COUNTER(item_page_lru_add) \
|
||||
EXPAND_COUNTER(item_page_lru_remove) \
|
||||
EXPAND_COUNTER(item_page_mark_dirty) \
|
||||
EXPAND_COUNTER(item_page_rbtree_walk) \
|
||||
EXPAND_COUNTER(item_page_split) \
|
||||
EXPAND_COUNTER(item_pcpu_add_replaced) \
|
||||
EXPAND_COUNTER(item_pcpu_page_hit) \
|
||||
EXPAND_COUNTER(item_pcpu_page_miss) \
|
||||
EXPAND_COUNTER(item_pcpu_page_miss_keys) \
|
||||
EXPAND_COUNTER(item_read_pages_split) \
|
||||
EXPAND_COUNTER(item_shrink_page) \
|
||||
EXPAND_COUNTER(item_shrink_page_dirty) \
|
||||
EXPAND_COUNTER(item_shrink_page_reader) \
|
||||
EXPAND_COUNTER(item_shrink_page_trylock) \
|
||||
EXPAND_COUNTER(item_update) \
|
||||
EXPAND_COUNTER(item_write_dirty) \
|
||||
EXPAND_COUNTER(lock_alloc) \
|
||||
EXPAND_COUNTER(lock_free) \
|
||||
EXPAND_COUNTER(lock_grace_elapsed) \
|
||||
EXPAND_COUNTER(lock_grace_extended) \
|
||||
EXPAND_COUNTER(lock_grace_set) \
|
||||
EXPAND_COUNTER(lock_grace_wait) \
|
||||
EXPAND_COUNTER(lock_grant_request) \
|
||||
EXPAND_COUNTER(lock_grant_response) \
|
||||
EXPAND_COUNTER(lock_invalidate_commit) \
|
||||
EXPAND_COUNTER(lock_grant_work) \
|
||||
EXPAND_COUNTER(lock_invalidate_coverage) \
|
||||
EXPAND_COUNTER(lock_invalidate_inode) \
|
||||
EXPAND_COUNTER(lock_invalidate_request) \
|
||||
EXPAND_COUNTER(lock_invalidate_response) \
|
||||
EXPAND_COUNTER(lock_invalidate_sync) \
|
||||
EXPAND_COUNTER(lock_invalidate_work) \
|
||||
EXPAND_COUNTER(lock_lock) \
|
||||
EXPAND_COUNTER(lock_lock_error) \
|
||||
EXPAND_COUNTER(lock_nonblock_eagain) \
|
||||
EXPAND_COUNTER(lock_recover_request) \
|
||||
EXPAND_COUNTER(lock_shrink_queued) \
|
||||
EXPAND_COUNTER(lock_shrink_request_aborted) \
|
||||
EXPAND_COUNTER(lock_shrink_attempted) \
|
||||
EXPAND_COUNTER(lock_shrink_aborted) \
|
||||
EXPAND_COUNTER(lock_shrink_work) \
|
||||
EXPAND_COUNTER(lock_unlock) \
|
||||
EXPAND_COUNTER(lock_wait) \
|
||||
EXPAND_COUNTER(net_dropped_response) \
|
||||
@@ -85,17 +151,37 @@
|
||||
EXPAND_COUNTER(quorum_write_block) \
|
||||
EXPAND_COUNTER(quorum_write_block_error) \
|
||||
EXPAND_COUNTER(quorum_fenced) \
|
||||
EXPAND_COUNTER(radix_enospc_data) \
|
||||
EXPAND_COUNTER(radix_enospc_paths) \
|
||||
EXPAND_COUNTER(radix_enospc_synth) \
|
||||
EXPAND_COUNTER(server_commit_hold) \
|
||||
EXPAND_COUNTER(server_commit_queue) \
|
||||
EXPAND_COUNTER(server_commit_worker) \
|
||||
EXPAND_COUNTER(srch_add_entry) \
|
||||
EXPAND_COUNTER(srch_compact_dirty_block) \
|
||||
EXPAND_COUNTER(srch_compact_entry) \
|
||||
EXPAND_COUNTER(srch_compact_flush) \
|
||||
EXPAND_COUNTER(srch_compact_log_page) \
|
||||
EXPAND_COUNTER(srch_compact_removed_entry) \
|
||||
EXPAND_COUNTER(srch_inconsistent_ref) \
|
||||
EXPAND_COUNTER(srch_rotate_log) \
|
||||
EXPAND_COUNTER(srch_search_log) \
|
||||
EXPAND_COUNTER(srch_search_log_block) \
|
||||
EXPAND_COUNTER(srch_search_retry_empty) \
|
||||
EXPAND_COUNTER(srch_search_sorted) \
|
||||
EXPAND_COUNTER(srch_search_sorted_block) \
|
||||
EXPAND_COUNTER(srch_search_stale_eio) \
|
||||
EXPAND_COUNTER(srch_search_stale_retry) \
|
||||
EXPAND_COUNTER(srch_search_xattrs) \
|
||||
EXPAND_COUNTER(srch_read_stale) \
|
||||
EXPAND_COUNTER(statfs) \
|
||||
EXPAND_COUNTER(trans_commit_data_alloc_low) \
|
||||
EXPAND_COUNTER(trans_commit_dirty_meta_full) \
|
||||
EXPAND_COUNTER(trans_commit_fsync) \
|
||||
EXPAND_COUNTER(trans_commit_full) \
|
||||
EXPAND_COUNTER(trans_commit_meta_alloc_low) \
|
||||
EXPAND_COUNTER(trans_commit_sync_fs) \
|
||||
EXPAND_COUNTER(trans_commit_timer)
|
||||
EXPAND_COUNTER(trans_commit_timer) \
|
||||
EXPAND_COUNTER(trans_commit_written)
|
||||
|
||||
#define FIRST_COUNTER block_cache_access
|
||||
#define LAST_COUNTER trans_commit_timer
|
||||
#define FIRST_COUNTER alloc_alloc_data
|
||||
#define LAST_COUNTER trans_commit_written
|
||||
|
||||
#undef EXPAND_COUNTER
|
||||
#define EXPAND_COUNTER(which) struct percpu_counter which;
|
||||
@@ -113,11 +199,21 @@ struct scoutfs_counters {
|
||||
pcpu <= &SCOUTFS_SB(sb)->counters->LAST_COUNTER; \
|
||||
pcpu++)
|
||||
|
||||
#define scoutfs_inc_counter(sb, which) \
|
||||
percpu_counter_inc(&SCOUTFS_SB(sb)->counters->which)
|
||||
/*
|
||||
* We always read with _sum, we have no use for the shared count and
|
||||
* certainly don't want to pay the cost of a shared lock to update it.
|
||||
* The default batch of 32 make counter increments show up significantly
|
||||
* in profiles.
|
||||
*/
|
||||
#define SCOUTFS_PCPU_COUNTER_BATCH (1 << 30)
|
||||
|
||||
#define scoutfs_add_counter(sb, which, cnt) \
|
||||
percpu_counter_add(&SCOUTFS_SB(sb)->counters->which, cnt)
|
||||
#define scoutfs_inc_counter(sb, which) \
|
||||
__percpu_counter_add(&SCOUTFS_SB(sb)->counters->which, 1, \
|
||||
SCOUTFS_PCPU_COUNTER_BATCH)
|
||||
|
||||
#define scoutfs_add_counter(sb, which, cnt) \
|
||||
__percpu_counter_add(&SCOUTFS_SB(sb)->counters->which, cnt, \
|
||||
SCOUTFS_PCPU_COUNTER_BATCH)
|
||||
|
||||
void __init scoutfs_init_counters(void);
|
||||
int scoutfs_setup_counters(struct super_block *sb);
|
||||
|
||||
1741
kmod/src/data.c
1741
kmod/src/data.c
File diff suppressed because it is too large
Load Diff
@@ -47,7 +47,7 @@ struct scoutfs_traced_extent {
|
||||
|
||||
extern const struct address_space_operations scoutfs_file_aops;
|
||||
extern const struct file_operations scoutfs_file_fops;
|
||||
struct scoutfs_radix_allocator;
|
||||
struct scoutfs_alloc;
|
||||
struct scoutfs_block_writer;
|
||||
|
||||
int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
@@ -58,6 +58,8 @@ int scoutfs_data_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
|
||||
long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len);
|
||||
int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
u64 byte_len, struct inode *to, u64 to_off);
|
||||
|
||||
int scoutfs_data_wait_check(struct inode *inode, loff_t pos, loff_t len,
|
||||
u8 sef, u8 op, struct scoutfs_data_wait *ow,
|
||||
@@ -77,11 +79,12 @@ int scoutfs_data_waiting(struct super_block *sb, u64 ino, u64 iblock,
|
||||
unsigned int nr);
|
||||
|
||||
void scoutfs_data_init_btrees(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_log_trees *lt);
|
||||
void scoutfs_data_get_btrees(struct super_block *sb,
|
||||
struct scoutfs_log_trees *lt);
|
||||
int scoutfs_data_prepare_commit(struct super_block *sb);
|
||||
u64 scoutfs_data_alloc_free_bytes(struct super_block *sb);
|
||||
|
||||
int scoutfs_data_setup(struct super_block *sb);
|
||||
|
||||
142
kmod/src/dir.c
142
kmod/src/dir.c
@@ -13,7 +13,6 @@
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/uio.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/namei.h>
|
||||
@@ -28,9 +27,9 @@
|
||||
#include "super.h"
|
||||
#include "trans.h"
|
||||
#include "xattr.h"
|
||||
#include "kvec.h"
|
||||
#include "forest.h"
|
||||
#include "item.h"
|
||||
#include "lock.h"
|
||||
#include "hash.h"
|
||||
#include "counters.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
@@ -79,7 +78,7 @@ static unsigned int mode_to_type(umode_t mode)
|
||||
#undef S_SHIFT
|
||||
}
|
||||
|
||||
static unsigned int dentry_type(unsigned int type)
|
||||
static unsigned int dentry_type(enum scoutfs_dentry_type type)
|
||||
{
|
||||
static unsigned char types[] = {
|
||||
[SCOUTFS_DT_FIFO] = DT_FIFO,
|
||||
@@ -213,12 +212,44 @@ static struct scoutfs_dirent *alloc_dirent(unsigned int name_len)
|
||||
return kmalloc(dirent_bytes(name_len), GFP_NOFS);
|
||||
}
|
||||
|
||||
/*
|
||||
* Test a bit number as though an array of bytes is a large len-bit
|
||||
* big-endian value. nr 0 is the LSB of the final byte, nr (len - 1) is
|
||||
* the MSB of the first byte.
|
||||
*/
|
||||
static int test_be_bytes_bit(int nr, const char *bytes, int len)
|
||||
{
|
||||
return bytes[(len - 1 - nr) >> 3] & (1 << (nr & 7));
|
||||
}
|
||||
|
||||
/*
|
||||
* Generate a 32bit "fingerprint" of the name by extracting 32 evenly
|
||||
* distributed bits from the name. The intent is to have the sort order
|
||||
* of the fingerprints reflect the memcmp() sort order of the names
|
||||
* while mapping large names down to small fs keys.
|
||||
*
|
||||
* Names that are smaller than 32bits are biased towards the high bits
|
||||
* of the fingerprint so that most significant bits of the fingerprints
|
||||
* consistently reflect the initial characters of the names.
|
||||
*/
|
||||
static u32 dirent_name_fingerprint(const char *name, unsigned int name_len)
|
||||
{
|
||||
int name_bits = name_len * 8;
|
||||
int skip = max(name_bits / 32, 1);
|
||||
u32 fp = 0;
|
||||
int f;
|
||||
int n;
|
||||
|
||||
for (f = 31, n = name_bits - 1; f >= 0 && n >= 0; f--, n -= skip)
|
||||
fp |= !!test_be_bytes_bit(n, name, name_bits) << f;
|
||||
|
||||
return fp;
|
||||
}
|
||||
|
||||
static u64 dirent_name_hash(const char *name, unsigned int name_len)
|
||||
{
|
||||
unsigned int half = (name_len + 1) / 2;
|
||||
|
||||
return crc32c(~0, name, half) |
|
||||
((u64)crc32c(~0, name + name_len - half, half) << 32);
|
||||
return scoutfs_hash32(name, name_len) |
|
||||
((u64)dirent_name_fingerprint(name, name_len) << 32);
|
||||
}
|
||||
|
||||
static u64 dirent_names_equal(const char *a_name, unsigned int a_len,
|
||||
@@ -239,7 +270,6 @@ static int lookup_dirent(struct super_block *sb, u64 dir_ino, const char *name,
|
||||
struct scoutfs_key last_key;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_dirent *dent = NULL;
|
||||
struct kvec val;
|
||||
int ret;
|
||||
|
||||
dent = alloc_dirent(SCOUTFS_NAME_LEN);
|
||||
@@ -250,10 +280,10 @@ static int lookup_dirent(struct super_block *sb, u64 dir_ino, const char *name,
|
||||
|
||||
init_dirent_key(&key, SCOUTFS_DIRENT_TYPE, dir_ino, hash, 0);
|
||||
init_dirent_key(&last_key, SCOUTFS_DIRENT_TYPE, dir_ino, hash, U64_MAX);
|
||||
kvec_init(&val, dent, dirent_bytes(SCOUTFS_NAME_LEN));
|
||||
|
||||
for (;;) {
|
||||
ret = scoutfs_forest_next(sb, &key, &last_key, &val, lock);
|
||||
ret = scoutfs_item_next(sb, &key, &last_key, dent,
|
||||
dirent_bytes(SCOUTFS_NAME_LEN), lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
@@ -433,7 +463,18 @@ out:
|
||||
else
|
||||
inode = scoutfs_iget(sb, ino);
|
||||
|
||||
return d_splice_alias(inode, dentry);
|
||||
/*
|
||||
* We can't splice dir aliases into the dcache. dir entries
|
||||
* might have changed on other nodes so our dcache could still
|
||||
* contain them, rather than having been moved in rename. For
|
||||
* dirs, we use d_materialize_unique to remove any existing
|
||||
* aliases which must be stale. Our inode numbers aren't reused
|
||||
* so inodes pointed to by entries can't change types.
|
||||
*/
|
||||
if (!IS_ERR_OR_NULL(inode) && S_ISDIR(inode->i_mode))
|
||||
return d_materialise_unique(dentry, inode);
|
||||
else
|
||||
return d_splice_alias(inode, dentry);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -452,7 +493,6 @@ static int KC_DECLARE_READDIR(scoutfs_readdir, struct file *file,
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key last_key;
|
||||
struct scoutfs_lock *dir_lock;
|
||||
struct kvec val;
|
||||
int name_len;
|
||||
u64 pos;
|
||||
int ret;
|
||||
@@ -468,7 +508,6 @@ static int KC_DECLARE_READDIR(scoutfs_readdir, struct file *file,
|
||||
|
||||
init_dirent_key(&last_key, SCOUTFS_READDIR_TYPE, scoutfs_ino(inode),
|
||||
SCOUTFS_DIRENT_LAST_POS, 0);
|
||||
kvec_init(&val, dent, dirent_bytes(SCOUTFS_NAME_LEN));
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &dir_lock);
|
||||
if (ret)
|
||||
@@ -478,7 +517,9 @@ static int KC_DECLARE_READDIR(scoutfs_readdir, struct file *file,
|
||||
init_dirent_key(&key, SCOUTFS_READDIR_TYPE, scoutfs_ino(inode),
|
||||
kc_readdir_pos(file, ctx), 0);
|
||||
|
||||
ret = scoutfs_forest_next(sb, &key, &last_key, &val, dir_lock);
|
||||
ret = scoutfs_item_next(sb, &key, &last_key, dent,
|
||||
dirent_bytes(SCOUTFS_NAME_LEN),
|
||||
dir_lock);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
@@ -535,7 +576,6 @@ static int add_entry_items(struct super_block *sb, u64 dir_ino, u64 hash,
|
||||
struct scoutfs_dirent *dent;
|
||||
bool del_ent = false;
|
||||
bool del_rdir = false;
|
||||
struct kvec val;
|
||||
int ret;
|
||||
|
||||
dent = alloc_dirent(name_len);
|
||||
@@ -554,25 +594,27 @@ static int add_entry_items(struct super_block *sb, u64 dir_ino, u64 hash,
|
||||
init_dirent_key(&ent_key, SCOUTFS_DIRENT_TYPE, dir_ino, hash, pos);
|
||||
init_dirent_key(&rdir_key, SCOUTFS_READDIR_TYPE, dir_ino, pos, 0);
|
||||
init_dirent_key(&lb_key, SCOUTFS_LINK_BACKREF_TYPE, ino, dir_ino, pos);
|
||||
kvec_init(&val, dent, dirent_bytes(name_len));
|
||||
|
||||
ret = scoutfs_forest_create(sb, &ent_key, &val, dir_lock);
|
||||
ret = scoutfs_item_create(sb, &ent_key, dent, dirent_bytes(name_len),
|
||||
dir_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
del_ent = true;
|
||||
|
||||
ret = scoutfs_forest_create(sb, &rdir_key, &val, dir_lock);
|
||||
ret = scoutfs_item_create(sb, &rdir_key, dent, dirent_bytes(name_len),
|
||||
dir_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
del_rdir = true;
|
||||
|
||||
ret = scoutfs_forest_create(sb, &lb_key, &val, inode_lock);
|
||||
ret = scoutfs_item_create(sb, &lb_key, dent, dirent_bytes(name_len),
|
||||
inode_lock);
|
||||
out:
|
||||
if (ret < 0) {
|
||||
if (del_ent)
|
||||
scoutfs_forest_delete_dirty(sb, &ent_key);
|
||||
scoutfs_item_delete(sb, &ent_key, dir_lock);
|
||||
if (del_rdir)
|
||||
scoutfs_forest_delete_dirty(sb, &rdir_key);
|
||||
scoutfs_item_delete(sb, &rdir_key, dir_lock);
|
||||
}
|
||||
|
||||
kfree(dent);
|
||||
@@ -594,23 +636,20 @@ static int del_entry_items(struct super_block *sb, u64 dir_ino, u64 hash,
|
||||
struct scoutfs_key rdir_key;
|
||||
struct scoutfs_key ent_key;
|
||||
struct scoutfs_key lb_key;
|
||||
LIST_HEAD(dir_saved);
|
||||
LIST_HEAD(inode_saved);
|
||||
int ret;
|
||||
|
||||
init_dirent_key(&ent_key, SCOUTFS_DIRENT_TYPE, dir_ino, hash, pos);
|
||||
init_dirent_key(&rdir_key, SCOUTFS_READDIR_TYPE, dir_ino, pos, 0);
|
||||
init_dirent_key(&lb_key, SCOUTFS_LINK_BACKREF_TYPE, ino, dir_ino, pos);
|
||||
|
||||
ret = scoutfs_forest_delete_save(sb, &ent_key, &dir_saved, dir_lock) ?:
|
||||
scoutfs_forest_delete_save(sb, &rdir_key, &dir_saved, dir_lock) ?:
|
||||
scoutfs_forest_delete_save(sb, &lb_key, &inode_saved, inode_lock);
|
||||
if (ret < 0) {
|
||||
scoutfs_forest_restore(sb, &dir_saved, dir_lock);
|
||||
scoutfs_forest_restore(sb, &inode_saved, inode_lock);
|
||||
} else {
|
||||
scoutfs_forest_free_batch(sb, &dir_saved);
|
||||
scoutfs_forest_free_batch(sb, &inode_saved);
|
||||
ret = scoutfs_item_dirty(sb, &ent_key, dir_lock) ?:
|
||||
scoutfs_item_dirty(sb, &rdir_key, dir_lock) ?:
|
||||
scoutfs_item_dirty(sb, &lb_key, inode_lock);
|
||||
if (ret == 0) {
|
||||
ret = scoutfs_item_delete(sb, &ent_key, dir_lock) ?:
|
||||
scoutfs_item_delete(sb, &rdir_key, dir_lock) ?:
|
||||
scoutfs_item_delete(sb, &lb_key, inode_lock);
|
||||
BUG_ON(ret); /* _dirty should have guaranteed success */
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -627,7 +666,6 @@ static int del_entry_items(struct super_block *sb, u64 dir_ino, u64 hash,
|
||||
*/
|
||||
static struct inode *lock_hold_create(struct inode *dir, struct dentry *dentry,
|
||||
umode_t mode, dev_t rdev,
|
||||
const struct scoutfs_item_count cnt,
|
||||
struct scoutfs_lock **dir_lock,
|
||||
struct scoutfs_lock **inode_lock,
|
||||
struct list_head *ind_locks)
|
||||
@@ -642,7 +680,7 @@ static struct inode *lock_hold_create(struct inode *dir, struct dentry *dentry,
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
ret = scoutfs_alloc_ino(dir, &ino);
|
||||
ret = scoutfs_alloc_ino(sb, S_ISDIR(mode), &ino);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
@@ -666,7 +704,7 @@ retry:
|
||||
ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
|
||||
scoutfs_inode_index_prepare(sb, ind_locks, dir, true) ?:
|
||||
scoutfs_inode_index_prepare_ino(sb, ind_locks, ino, mode) ?:
|
||||
scoutfs_inode_index_try_lock_hold(sb, ind_locks, ind_seq, cnt);
|
||||
scoutfs_inode_index_try_lock_hold(sb, ind_locks, ind_seq);
|
||||
if (ret > 0)
|
||||
goto retry;
|
||||
if (ret)
|
||||
@@ -713,7 +751,6 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
|
||||
hash = dirent_name_hash(dentry->d_name.name, dentry->d_name.len);
|
||||
inode = lock_hold_create(dir, dentry, mode, rdev,
|
||||
SIC_MKNOD(dentry->d_name.len),
|
||||
&dir_lock, &inode_lock, &ind_locks);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
@@ -808,8 +845,7 @@ retry:
|
||||
ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
|
||||
scoutfs_inode_index_prepare(sb, &ind_locks, dir, false) ?:
|
||||
scoutfs_inode_index_prepare(sb, &ind_locks, inode, false) ?:
|
||||
scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq,
|
||||
SIC_LINK(dentry->d_name.len));
|
||||
scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq);
|
||||
if (ret > 0)
|
||||
goto retry;
|
||||
if (ret)
|
||||
@@ -890,8 +926,7 @@ retry:
|
||||
ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
|
||||
scoutfs_inode_index_prepare(sb, &ind_locks, dir, false) ?:
|
||||
scoutfs_inode_index_prepare(sb, &ind_locks, inode, false) ?:
|
||||
scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq,
|
||||
SIC_UNLINK(dentry->d_name.len));
|
||||
scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq);
|
||||
if (ret > 0)
|
||||
goto retry;
|
||||
if (ret)
|
||||
@@ -960,17 +995,16 @@ static void init_symlink_key(struct scoutfs_key *key, u64 ino, u8 nr)
|
||||
* The target name can be null for deletion when val isn't used. Size
|
||||
* still has to be provided to determine the number of items.
|
||||
*/
|
||||
enum {
|
||||
enum symlink_ops {
|
||||
SYM_CREATE = 0,
|
||||
SYM_LOOKUP,
|
||||
SYM_DELETE,
|
||||
};
|
||||
static int symlink_item_ops(struct super_block *sb, int op, u64 ino,
|
||||
static int symlink_item_ops(struct super_block *sb, enum symlink_ops op, u64 ino,
|
||||
struct scoutfs_lock *lock, const char *target,
|
||||
size_t size)
|
||||
{
|
||||
struct scoutfs_key key;
|
||||
struct kvec val;
|
||||
unsigned bytes;
|
||||
unsigned nr;
|
||||
int ret;
|
||||
@@ -985,14 +1019,16 @@ static int symlink_item_ops(struct super_block *sb, int op, u64 ino,
|
||||
|
||||
init_symlink_key(&key, ino, i);
|
||||
bytes = min_t(u64, size, SCOUTFS_MAX_VAL_SIZE);
|
||||
kvec_init(&val, (void *)target, bytes);
|
||||
|
||||
if (op == SYM_CREATE)
|
||||
ret = scoutfs_forest_create(sb, &key, &val, lock);
|
||||
ret = scoutfs_item_create(sb, &key, (void *)target,
|
||||
bytes, lock);
|
||||
else if (op == SYM_LOOKUP)
|
||||
ret = scoutfs_forest_lookup_exact(sb, &key, &val, lock);
|
||||
ret = scoutfs_item_lookup_exact(sb, &key,
|
||||
(void *)target, bytes,
|
||||
lock);
|
||||
else if (op == SYM_DELETE)
|
||||
ret = scoutfs_forest_delete(sb, &key, lock);
|
||||
ret = scoutfs_item_delete(sb, &key, lock);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
@@ -1125,7 +1161,6 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
return ret;
|
||||
|
||||
inode = lock_hold_create(dir, dentry, S_IFLNK|S_IRWXUGO, 0,
|
||||
SIC_SYMLINK(dentry->d_name.len, name_len),
|
||||
&dir_lock, &inode_lock, &ind_locks);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
@@ -1207,7 +1242,6 @@ int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
|
||||
struct scoutfs_key last_key;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct kvec val;
|
||||
int len;
|
||||
int ret;
|
||||
|
||||
@@ -1223,13 +1257,13 @@ int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
|
||||
init_dirent_key(&key, SCOUTFS_LINK_BACKREF_TYPE, ino, dir_ino, dir_pos);
|
||||
init_dirent_key(&last_key, SCOUTFS_LINK_BACKREF_TYPE, ino, U64_MAX,
|
||||
U64_MAX);
|
||||
kvec_init(&val, &ent->dent, dirent_bytes(SCOUTFS_NAME_LEN));
|
||||
|
||||
ret = scoutfs_lock_ino(sb, SCOUTFS_LOCK_READ, 0, ino, &lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_forest_next(sb, &key, &last_key, &val, lock);
|
||||
ret = scoutfs_item_next(sb, &key, &last_key, &ent->dent,
|
||||
dirent_bytes(SCOUTFS_NAME_LEN), lock);
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
|
||||
lock = NULL;
|
||||
if (ret < 0)
|
||||
@@ -1558,9 +1592,7 @@ retry:
|
||||
scoutfs_inode_index_prepare(sb, &ind_locks, new_dir, false)) ?:
|
||||
(new_inode == NULL ? 0 :
|
||||
scoutfs_inode_index_prepare(sb, &ind_locks, new_inode, false)) ?:
|
||||
scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq,
|
||||
SIC_RENAME(old_dentry->d_name.len,
|
||||
new_dentry->d_name.len));
|
||||
scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq);
|
||||
if (ret > 0)
|
||||
goto retry;
|
||||
if (ret)
|
||||
|
||||
394
kmod/src/ext.c
Normal file
394
kmod/src/ext.c
Normal file
@@ -0,0 +1,394 @@
|
||||
/*
|
||||
* Copyright (C) 2020 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include "ext.h"
|
||||
#include "counters.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
* Extents are used to track free block regions and to map logical file
|
||||
* regions to device blocks. Extents can be split and merged as
|
||||
* they're modified. These helpers implement all the fiddly extent
|
||||
* manipulations. Callers provide callbacks which implement the actual
|
||||
* storage of extents in either the item cache or btree items.
|
||||
*/
|
||||
|
||||
static void ext_zero(struct scoutfs_extent *ext)
|
||||
{
|
||||
memset(ext, 0, sizeof(struct scoutfs_extent));
|
||||
}
|
||||
|
||||
static bool ext_overlap(struct scoutfs_extent *ext, u64 start, u64 len)
|
||||
{
|
||||
u64 e_end = ext->start + ext->len - 1;
|
||||
u64 end = start + len - 1;
|
||||
|
||||
return !(e_end < start || ext->start > end);
|
||||
}
|
||||
|
||||
static bool ext_inside(u64 start, u64 len, struct scoutfs_extent *out)
|
||||
{
|
||||
u64 in_end = start + len - 1;
|
||||
u64 out_end = out->start + out->len - 1;
|
||||
|
||||
return out->start <= start && out_end >= in_end;
|
||||
}
|
||||
|
||||
/* we only translate mappings when they exist */
|
||||
static inline u64 ext_map_add(u64 map, u64 diff)
|
||||
{
|
||||
return map ? map + diff : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Extents can merge if they're logically contiguous, both don't have
|
||||
* mappings or have mappings which are also contiguous, and have
|
||||
* matching flags.
|
||||
*/
|
||||
bool scoutfs_ext_can_merge(struct scoutfs_extent *left,
|
||||
struct scoutfs_extent *right)
|
||||
{
|
||||
return (left->start + left->len == right->start) &&
|
||||
((!left->map && !right->map) ||
|
||||
(left->map + left->len == right->map)) &&
|
||||
(left->flags == right->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* Split an existing extent in to left and right extents by removing
|
||||
* an interior range. The split extents are all zeros if the range
|
||||
* extends to their end of the extent.
|
||||
*/
|
||||
static void ext_split(struct scoutfs_extent *ext, u64 start, u64 len,
|
||||
struct scoutfs_extent *left,
|
||||
struct scoutfs_extent *right)
|
||||
{
|
||||
if (ext->start < start) {
|
||||
left->start = ext->start;
|
||||
left->len = start - ext->start;
|
||||
left->map = ext->map;
|
||||
left->flags = ext->flags;
|
||||
} else {
|
||||
ext_zero(left);
|
||||
}
|
||||
|
||||
if (ext->start + ext->len > start + len) {
|
||||
right->start = start + len;
|
||||
right->len = ext->start + ext->len - right->start;
|
||||
right->map = ext_map_add(ext->map, right->start - ext->start);
|
||||
right->flags = ext->flags;
|
||||
} else {
|
||||
ext_zero(right);
|
||||
}
|
||||
}
|
||||
|
||||
#define op_call(sb, ops, arg, which, args...) \
|
||||
({ \
|
||||
int _ret; \
|
||||
_ret = ops->which(sb, arg, ##args); \
|
||||
scoutfs_inc_counter(sb, ext_op_##which); \
|
||||
trace_scoutfs_ext_op_##which(sb, ##args, _ret); \
|
||||
_ret; \
|
||||
})
|
||||
|
||||
struct extent_changes {
|
||||
struct scoutfs_extent exts[4];
|
||||
bool ins[4];
|
||||
u8 nr;
|
||||
};
|
||||
|
||||
static void add_change(struct extent_changes *chg,
|
||||
struct scoutfs_extent *ext, bool ins)
|
||||
{
|
||||
BUILD_BUG_ON(ARRAY_SIZE(chg->ins) != ARRAY_SIZE(chg->exts));
|
||||
|
||||
if (ext->len) {
|
||||
BUG_ON(chg->nr == ARRAY_SIZE(chg->exts));
|
||||
chg->exts[chg->nr] = *ext;
|
||||
chg->ins[chg->nr] = !!ins;
|
||||
chg->nr++;
|
||||
}
|
||||
}
|
||||
|
||||
static int apply_changes(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, struct extent_changes *chg)
|
||||
{
|
||||
int ret = 0;
|
||||
int err;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < chg->nr; i++) {
|
||||
if (chg->ins[i])
|
||||
ret = op_call(sb, ops, arg, insert, chg->exts[i].start,
|
||||
chg->exts[i].len, chg->exts[i].map,
|
||||
chg->exts[i].flags);
|
||||
else
|
||||
ret = op_call(sb, ops, arg, remove, chg->exts[i].start,
|
||||
chg->exts[i].len, chg->exts[i].map,
|
||||
chg->exts[i].flags);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
while (ret < 0 && --i >= 0) {
|
||||
if (chg->ins[i])
|
||||
err = op_call(sb, ops, arg, remove, chg->exts[i].start,
|
||||
chg->exts[i].len, chg->exts[i].map,
|
||||
chg->exts[i].flags);
|
||||
else
|
||||
err = op_call(sb, ops, arg, insert, chg->exts[i].start,
|
||||
chg->exts[i].len, chg->exts[i].map,
|
||||
chg->exts[i].flags);
|
||||
BUG_ON(err); /* inconsistent */
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_ext_next(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, u64 start, u64 len, struct scoutfs_extent *ext)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = op_call(sb, ops, arg, next, start, len, ext);
|
||||
trace_scoutfs_ext_next(sb, start, len, ext, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert the given extent. EINVAL is returned if there's already an existing
|
||||
* overlapping extent. This can merge with its neighbours.
|
||||
*/
|
||||
int scoutfs_ext_insert(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, u64 start, u64 len, u64 map, u8 flags)
|
||||
{
|
||||
struct extent_changes chg = { .nr = 0 };
|
||||
struct scoutfs_extent found;
|
||||
struct scoutfs_extent ins;
|
||||
int ret;
|
||||
|
||||
ins.start = start;
|
||||
ins.len = len;
|
||||
ins.map = map;
|
||||
ins.flags = flags;
|
||||
|
||||
/* find right neighbour and check for overlap */
|
||||
ret = op_call(sb, ops, arg, next, start, 1, &found);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
|
||||
/* inserting extent must not overlap */
|
||||
if (found.len && ext_overlap(&ins, found.start, found.len)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* merge with right if we can */
|
||||
if (found.len && scoutfs_ext_can_merge(&ins, &found)) {
|
||||
ins.len += found.len;
|
||||
add_change(&chg, &found, false);
|
||||
}
|
||||
|
||||
/* see if we can merge with a left neighbour */
|
||||
if (start > 0) {
|
||||
ret = op_call(sb, ops, arg, next, start - 1, 1, &found);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
|
||||
if (ret == 0 && scoutfs_ext_can_merge(&found, &ins)) {
|
||||
ins.start = found.start;
|
||||
ins.map = found.map;
|
||||
ins.len += found.len;
|
||||
add_change(&chg, &found, false);
|
||||
}
|
||||
}
|
||||
|
||||
add_change(&chg, &ins, true);
|
||||
ret = apply_changes(sb, ops, arg, &chg);
|
||||
out:
|
||||
trace_scoutfs_ext_insert(sb, start, len, map, flags, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the given extent. The extent to remove must be found entirely
|
||||
* in an existing extent. If the existing extent is larger then we leave
|
||||
* behind the remaining extent. The existing extent can be split.
|
||||
*/
|
||||
int scoutfs_ext_remove(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, u64 start, u64 len)
|
||||
{
|
||||
struct extent_changes chg = { .nr = 0 };
|
||||
struct scoutfs_extent found;
|
||||
struct scoutfs_extent left;
|
||||
struct scoutfs_extent right;
|
||||
int ret;
|
||||
|
||||
ret = op_call(sb, ops, arg, next, start, 1, &found);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* removed extent must be entirely within found */
|
||||
if (!ext_inside(start, len, &found)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ext_split(&found, start, len, &left, &right);
|
||||
|
||||
add_change(&chg, &found, false);
|
||||
add_change(&chg, &left, true);
|
||||
add_change(&chg, &right, true);
|
||||
|
||||
ret = apply_changes(sb, ops, arg, &chg);
|
||||
out:
|
||||
trace_scoutfs_ext_remove(sb, start, len, 0, 0, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find and remove the next extent, removing only a portion if the
|
||||
* extent is larger than the count. Returns ENOENT if it didn't
|
||||
* find any extents.
|
||||
*
|
||||
* This does not search for merge candidates so it's safe to call with
|
||||
* extents indexed by length.
|
||||
*/
|
||||
int scoutfs_ext_alloc(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, u64 start, u64 len, u64 count,
|
||||
struct scoutfs_extent *ext)
|
||||
{
|
||||
struct extent_changes chg = { .nr = 0 };
|
||||
struct scoutfs_extent found;
|
||||
struct scoutfs_extent ins;
|
||||
int ret;
|
||||
|
||||
ret = op_call(sb, ops, arg, next, start, len, &found);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
add_change(&chg, &found, false);
|
||||
|
||||
if (found.len > count) {
|
||||
ins.start = found.start + count;
|
||||
ins.len = found.len - count;
|
||||
ins.map = ext_map_add(found.map, count);
|
||||
ins.flags = found.flags;
|
||||
|
||||
add_change(&chg, &ins, true);
|
||||
}
|
||||
|
||||
ret = apply_changes(sb, ops, arg, &chg);
|
||||
out:
|
||||
if (ret == 0) {
|
||||
ext->start = found.start;
|
||||
ext->len = min(found.len, count);
|
||||
ext->map = found.map;
|
||||
ext->flags = found.flags;
|
||||
} else {
|
||||
ext_zero(ext);
|
||||
}
|
||||
|
||||
trace_scoutfs_ext_alloc(sb, start, len, count, ext, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set the map and flags for an extent region, with the magical property
|
||||
* that extents with map and flags set to 0 are removed.
|
||||
*
|
||||
* If we're modifying an existing extent then the modification must be
|
||||
* fully inside the existing extent. The modification can leave edges
|
||||
* of the extent which need to be inserted. If the modification extends
|
||||
* to the end of the existing extent then we need to check for adjacent
|
||||
* neighbouring extents which might now be able to be merged.
|
||||
*
|
||||
* Inserting a new extent is like the case of modifying the entire
|
||||
* existing extent. We need to check neighbours of the inserted extent
|
||||
* to see if they can be merged.
|
||||
*/
|
||||
int scoutfs_ext_set(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, u64 start, u64 len, u64 map, u8 flags)
|
||||
{
|
||||
struct extent_changes chg = { .nr = 0 };
|
||||
struct scoutfs_extent found;
|
||||
struct scoutfs_extent left;
|
||||
struct scoutfs_extent right;
|
||||
struct scoutfs_extent set;
|
||||
int ret;
|
||||
|
||||
set.start = start;
|
||||
set.len = len;
|
||||
set.map = map;
|
||||
set.flags = flags;
|
||||
|
||||
/* find extent to remove */
|
||||
ret = op_call(sb, ops, arg, next, start, 1, &found);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
|
||||
if (ret == 0 && ext_overlap(&found, start, len)) {
|
||||
/* set extent must be entirely within found */
|
||||
if (!ext_inside(start, len, &found)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
add_change(&chg, &found, false);
|
||||
ext_split(&found, start, len, &left, &right);
|
||||
} else {
|
||||
ext_zero(&found);
|
||||
ext_zero(&left);
|
||||
ext_zero(&right);
|
||||
}
|
||||
|
||||
if (left.len) {
|
||||
/* inserting split left, won't merge */
|
||||
add_change(&chg, &left, true);
|
||||
} else if (start > 0) {
|
||||
ret = op_call(sb, ops, arg, next, start - 1, 1, &left);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
else if (ret == 0 && scoutfs_ext_can_merge(&left, &set)) {
|
||||
/* remove found left, merging */
|
||||
set.start = left.start;
|
||||
set.map = left.map;
|
||||
set.len += left.len;
|
||||
add_change(&chg, &left, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (right.len) {
|
||||
/* inserting split right, won't merge */
|
||||
add_change(&chg, &right, true);
|
||||
} else {
|
||||
ret = op_call(sb, ops, arg, next, start + len, 1, &right);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
else if (ret == 0 && scoutfs_ext_can_merge(&set, &right)) {
|
||||
/* remove found right, merging */
|
||||
set.len += right.len;
|
||||
add_change(&chg, &right, false);
|
||||
}
|
||||
}
|
||||
|
||||
if (set.flags || set.map)
|
||||
add_change(&chg, &set, true);
|
||||
|
||||
ret = apply_changes(sb, ops, arg, &chg);
|
||||
out:
|
||||
trace_scoutfs_ext_set(sb, start, len, map, flags, ret);
|
||||
return ret;
|
||||
}
|
||||
35
kmod/src/ext.h
Normal file
35
kmod/src/ext.h
Normal file
@@ -0,0 +1,35 @@
|
||||
#ifndef _SCOUTFS_EXT_H_
|
||||
#define _SCOUTFS_EXT_H_
|
||||
|
||||
struct scoutfs_extent {
|
||||
u64 start;
|
||||
u64 len;
|
||||
u64 map;
|
||||
u8 flags;
|
||||
};
|
||||
|
||||
struct scoutfs_ext_ops {
|
||||
int (*next)(struct super_block *sb, void *arg,
|
||||
u64 start, u64 len, struct scoutfs_extent *ext);
|
||||
int (*insert)(struct super_block *sb, void *arg,
|
||||
u64 start, u64 len, u64 map, u8 flags);
|
||||
int (*remove)(struct super_block *sb, void *arg, u64 start, u64 len,
|
||||
u64 map, u8 flags);
|
||||
};
|
||||
|
||||
bool scoutfs_ext_can_merge(struct scoutfs_extent *left,
|
||||
struct scoutfs_extent *right);
|
||||
|
||||
int scoutfs_ext_next(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, u64 start, u64 len, struct scoutfs_extent *ext);
|
||||
int scoutfs_ext_insert(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, u64 start, u64 len, u64 map, u8 flags);
|
||||
int scoutfs_ext_remove(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, u64 start, u64 len);
|
||||
int scoutfs_ext_alloc(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, u64 start, u64 len, u64 limit,
|
||||
struct scoutfs_extent *ext);
|
||||
int scoutfs_ext_set(struct super_block *sb, struct scoutfs_ext_ops *ops,
|
||||
void *arg, u64 start, u64 len, u64 map, u8 flags);
|
||||
|
||||
#endif
|
||||
1539
kmod/src/forest.c
1539
kmod/src/forest.c
File diff suppressed because it is too large
Load Diff
@@ -1,54 +1,43 @@
|
||||
#ifndef _SCOUTFS_FOREST_H_
|
||||
#define _SCOUTFS_FOREST_H_
|
||||
|
||||
struct scoutfs_radix_allocator;
|
||||
struct scoutfs_alloc;
|
||||
struct scoutfs_block_writer;
|
||||
struct scoutfs_block;
|
||||
|
||||
#include "btree.h"
|
||||
|
||||
/* caller gives an item to the callback */
|
||||
typedef int (*scoutfs_forest_item_cb)(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_log_item_value *liv,
|
||||
void *val, int val_len, void *arg);
|
||||
|
||||
int scoutfs_forest_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct kvec *val, struct scoutfs_lock *lock);
|
||||
int scoutfs_forest_lookup_exact(struct super_block *sb,
|
||||
struct scoutfs_key *key, struct kvec *val,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_forest_next(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *last, struct kvec *val,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_forest_next_hint(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *next);
|
||||
int scoutfs_forest_prev(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *first, struct kvec *val,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_forest_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct kvec *val, struct scoutfs_lock *lock);
|
||||
int scoutfs_forest_create_force(struct super_block *sb,
|
||||
struct scoutfs_key *key, struct kvec *val,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_forest_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct kvec *val, struct scoutfs_lock *lock);
|
||||
int scoutfs_forest_delete_dirty(struct super_block *sb,
|
||||
struct scoutfs_key *key);
|
||||
int scoutfs_forest_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_forest_delete_force(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_forest_delete_save(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct list_head *list,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_forest_restore(struct super_block *sb, struct list_head *list,
|
||||
struct scoutfs_lock *lock);
|
||||
void scoutfs_forest_free_batch(struct super_block *sb, struct list_head *list);
|
||||
int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_lock *lock,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_set_bloom_bits(struct super_block *sb,
|
||||
struct scoutfs_lock *lock);
|
||||
void scoutfs_forest_set_max_vers(struct super_block *sb, u64 max_vers);
|
||||
int scoutfs_forest_get_max_vers(struct super_block *sb,
|
||||
struct scoutfs_super_block *super,
|
||||
u64 *vers);
|
||||
int scoutfs_forest_insert_list(struct super_block *sb,
|
||||
struct scoutfs_btree_item_list *lst);
|
||||
int scoutfs_forest_srch_add(struct super_block *sb, u64 hash, u64 ino, u64 id);
|
||||
|
||||
void scoutfs_forest_init_btrees(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_log_trees *lt);
|
||||
void scoutfs_forest_get_btrees(struct super_block *sb,
|
||||
struct scoutfs_log_trees *lt);
|
||||
|
||||
void scoutfs_forest_clear_lock(struct super_block *sb,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
int scoutfs_forest_setup(struct super_block *sb);
|
||||
void scoutfs_forest_destroy(struct super_block *sb);
|
||||
|
||||
|
||||
@@ -1,6 +1,9 @@
|
||||
#ifndef _SCOUTFS_FORMAT_H_
|
||||
#define _SCOUTFS_FORMAT_H_
|
||||
|
||||
#define SCOUTFS_INTEROP_VERSION 0ULL
|
||||
#define SCOUTFS_INTEROP_VERSION_STR __stringify(0)
|
||||
|
||||
/* statfs(2) f_type */
|
||||
#define SCOUTFS_SUPER_MAGIC 0x554f4353 /* "SCOU" */
|
||||
|
||||
@@ -8,27 +11,47 @@
|
||||
#define SCOUTFS_BLOCK_MAGIC_SUPER 0x103c428b
|
||||
#define SCOUTFS_BLOCK_MAGIC_BTREE 0xe597f96d
|
||||
#define SCOUTFS_BLOCK_MAGIC_BLOOM 0x31995604
|
||||
#define SCOUTFS_BLOCK_MAGIC_RADIX 0xebeb5e65
|
||||
#define SCOUTFS_BLOCK_MAGIC_SRCH_BLOCK 0x897e4a7d
|
||||
#define SCOUTFS_BLOCK_MAGIC_SRCH_PARENT 0xb23a2a05
|
||||
#define SCOUTFS_BLOCK_MAGIC_ALLOC_LIST 0x8a93ac83
|
||||
|
||||
/*
|
||||
* The super block and btree blocks are fixed 4k.
|
||||
* The super block, quorum block, and file data allocation granularity
|
||||
* use the smaller 4KB block.
|
||||
*/
|
||||
#define SCOUTFS_BLOCK_SHIFT 12
|
||||
#define SCOUTFS_BLOCK_SIZE (1 << SCOUTFS_BLOCK_SHIFT)
|
||||
#define SCOUTFS_BLOCK_MASK (SCOUTFS_BLOCK_SIZE - 1)
|
||||
#define SCOUTFS_BLOCKS_PER_PAGE (PAGE_SIZE / SCOUTFS_BLOCK_SIZE)
|
||||
#define SCOUTFS_BLOCK_SECTOR_SHIFT (SCOUTFS_BLOCK_SHIFT - 9)
|
||||
#define SCOUTFS_BLOCK_SECTORS (1 << SCOUTFS_BLOCK_SECTOR_SHIFT)
|
||||
#define SCOUTFS_BLOCK_MAX (U64_MAX >> SCOUTFS_BLOCK_SHIFT)
|
||||
#define SCOUTFS_BLOCK_SM_SHIFT 12
|
||||
#define SCOUTFS_BLOCK_SM_SIZE (1 << SCOUTFS_BLOCK_SM_SHIFT)
|
||||
#define SCOUTFS_BLOCK_SM_MASK (SCOUTFS_BLOCK_SM_SIZE - 1)
|
||||
#define SCOUTFS_BLOCK_SM_PER_PAGE (PAGE_SIZE / SCOUTFS_BLOCK_SM_SIZE)
|
||||
#define SCOUTFS_BLOCK_SM_SECTOR_SHIFT (SCOUTFS_BLOCK_SM_SHIFT - 9)
|
||||
#define SCOUTFS_BLOCK_SM_SECTORS (1 << SCOUTFS_BLOCK_SM_SECTOR_SHIFT)
|
||||
#define SCOUTFS_BLOCK_SM_MAX (U64_MAX >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
#define SCOUTFS_BLOCK_SM_PAGES_PER (SCOUTFS_BLOCK_SM_SIZE / PAGE_SIZE)
|
||||
#define SCOUTFS_BLOCK_SM_PAGE_ORDER (SCOUTFS_BLOCK_SM_SHIFT - PAGE_SHIFT)
|
||||
|
||||
/*
|
||||
* The radix and btree structures, and the forest bloom block, use the
|
||||
* larger 64KB metadata block size.
|
||||
*/
|
||||
#define SCOUTFS_BLOCK_LG_SHIFT 16
|
||||
#define SCOUTFS_BLOCK_LG_SIZE (1 << SCOUTFS_BLOCK_LG_SHIFT)
|
||||
#define SCOUTFS_BLOCK_LG_MASK (SCOUTFS_BLOCK_LG_SIZE - 1)
|
||||
#define SCOUTFS_BLOCK_LG_PER_PAGE (PAGE_SIZE / SCOUTFS_BLOCK_LG_SIZE)
|
||||
#define SCOUTFS_BLOCK_LG_SECTOR_SHIFT (SCOUTFS_BLOCK_LG_SHIFT - 9)
|
||||
#define SCOUTFS_BLOCK_LG_SECTORS (1 << SCOUTFS_BLOCK_LG_SECTOR_SHIFT)
|
||||
#define SCOUTFS_BLOCK_LG_MAX (U64_MAX >> SCOUTFS_BLOCK_LG_SHIFT)
|
||||
#define SCOUTFS_BLOCK_LG_PAGES_PER (SCOUTFS_BLOCK_LG_SIZE / PAGE_SIZE)
|
||||
#define SCOUTFS_BLOCK_LG_PAGE_ORDER (SCOUTFS_BLOCK_LG_SHIFT - PAGE_SHIFT)
|
||||
|
||||
#define SCOUTFS_BLOCK_SM_LG_SHIFT (SCOUTFS_BLOCK_LG_SHIFT - \
|
||||
SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
#define SCOUTFS_PAGES_PER_BLOCK (SCOUTFS_BLOCK_SIZE / PAGE_SIZE)
|
||||
#define SCOUTFS_BLOCK_PAGE_ORDER (SCOUTFS_BLOCK_SHIFT - PAGE_SHIFT)
|
||||
|
||||
/*
|
||||
* The super block leaves some room before the first block for platform
|
||||
* structures like boot loaders.
|
||||
*/
|
||||
#define SCOUTFS_SUPER_BLKNO ((64ULL * 1024) >> SCOUTFS_BLOCK_SHIFT)
|
||||
#define SCOUTFS_SUPER_BLKNO ((64ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
/*
|
||||
* A reasonably large region of aligned quorum blocks follow the super
|
||||
@@ -38,8 +61,14 @@
|
||||
* mounts that have a reasonable probability of not overwriting each
|
||||
* other's random block locations.
|
||||
*/
|
||||
#define SCOUTFS_QUORUM_BLKNO ((256ULL * 1024) >> SCOUTFS_BLOCK_SHIFT)
|
||||
#define SCOUTFS_QUORUM_BLOCKS ((256ULL * 1024) >> SCOUTFS_BLOCK_SHIFT)
|
||||
#define SCOUTFS_QUORUM_BLKNO ((256ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
#define SCOUTFS_QUORUM_BLOCKS ((256ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
/*
|
||||
* Start data on the data device aligned as well.
|
||||
*/
|
||||
#define SCOUTFS_DATA_DEV_START_BLKNO ((256ULL * 1024) >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
|
||||
#define SCOUTFS_UNIQUE_NAME_MAX_BYTES 64 /* includes null */
|
||||
|
||||
@@ -49,18 +78,15 @@
|
||||
struct scoutfs_timespec {
|
||||
__le64 sec;
|
||||
__le32 nsec;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_betimespec {
|
||||
__be64 sec;
|
||||
__be32 nsec;
|
||||
} __packed;
|
||||
__u8 __pad[4];
|
||||
};
|
||||
|
||||
/* XXX ipv6 */
|
||||
struct scoutfs_inet_addr {
|
||||
__le32 addr;
|
||||
__le16 port;
|
||||
} __packed;
|
||||
__u8 __pad[2];
|
||||
};
|
||||
|
||||
/*
|
||||
* This header is stored at the start of btree blocks and the super
|
||||
@@ -73,7 +99,7 @@ struct scoutfs_block_header {
|
||||
__le64 fsid;
|
||||
__le64 seq;
|
||||
__le64 blkno;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/*
|
||||
* scoutfs identifies all file system metadata items by a small key
|
||||
@@ -89,23 +115,19 @@ struct scoutfs_block_header {
|
||||
* increment them, subtract them from each other, etc.
|
||||
*/
|
||||
struct scoutfs_key {
|
||||
__u8 sk_zone;
|
||||
__le64 _sk_first;
|
||||
__u8 sk_type;
|
||||
__le64 _sk_second;
|
||||
__le64 _sk_third;
|
||||
__u8 _sk_fourth;
|
||||
}__packed;
|
||||
__u8 sk_zone;
|
||||
__u8 sk_type;
|
||||
__u8 __pad[5];
|
||||
};
|
||||
|
||||
/* inode index */
|
||||
#define skii_major _sk_second
|
||||
#define skii_ino _sk_third
|
||||
|
||||
/* xattr index */
|
||||
#define skxi_hash _sk_first
|
||||
#define skxi_ino _sk_second
|
||||
#define skxi_id _sk_third
|
||||
|
||||
/* node orphan inode */
|
||||
#define sko_rid _sk_first
|
||||
#define sko_ino _sk_second
|
||||
@@ -128,85 +150,46 @@ struct scoutfs_key {
|
||||
#define sks_ino _sk_first
|
||||
#define sks_nr _sk_second
|
||||
|
||||
/* packed extents */
|
||||
#define skpe_ino _sk_first
|
||||
#define skpe_base _sk_second
|
||||
#define skpe_part _sk_fourth
|
||||
/* data extents */
|
||||
#define skdx_ino _sk_first
|
||||
#define skdx_end _sk_second
|
||||
#define skdx_len _sk_third
|
||||
|
||||
struct scoutfs_radix_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le32 sm_first;
|
||||
__le32 lg_first;
|
||||
union {
|
||||
struct scoutfs_radix_ref {
|
||||
__le64 blkno;
|
||||
__le64 seq;
|
||||
__le64 sm_total;
|
||||
__le64 lg_total;
|
||||
} __packed refs[0];
|
||||
__le64 bits[0];
|
||||
} __packed;
|
||||
} __packed;
|
||||
/* log trees */
|
||||
#define sklt_rid _sk_first
|
||||
#define sklt_nr _sk_second
|
||||
|
||||
struct scoutfs_radix_root {
|
||||
/* lock clients */
|
||||
#define sklc_rid _sk_first
|
||||
|
||||
/* seqs */
|
||||
#define skts_trans_seq _sk_first
|
||||
#define skts_rid _sk_second
|
||||
|
||||
/* mounted clients */
|
||||
#define skmc_rid _sk_first
|
||||
|
||||
/* free extents by blkno */
|
||||
#define skfb_end _sk_second
|
||||
#define skfb_len _sk_third
|
||||
/* free extents by len */
|
||||
#define skfl_neglen _sk_second
|
||||
#define skfl_blkno _sk_third
|
||||
|
||||
struct scoutfs_avl_root {
|
||||
__le16 node;
|
||||
};
|
||||
|
||||
struct scoutfs_avl_node {
|
||||
__le16 parent;
|
||||
__le16 left;
|
||||
__le16 right;
|
||||
__u8 height;
|
||||
__le64 next_find_bit;
|
||||
struct scoutfs_radix_ref ref;
|
||||
} __packed;
|
||||
__u8 __pad[1];
|
||||
};
|
||||
|
||||
#define SCOUTFS_RADIX_REFS \
|
||||
((SCOUTFS_BLOCK_SIZE - offsetof(struct scoutfs_radix_block, refs[0])) /\
|
||||
sizeof(struct scoutfs_radix_ref))
|
||||
|
||||
/* 8 meg regions with 4k data blocks */
|
||||
#define SCOUTFS_RADIX_LG_SHIFT 11
|
||||
#define SCOUTFS_RADIX_LG_BITS (1 << SCOUTFS_RADIX_LG_SHIFT)
|
||||
#define SCOUTFS_RADIX_LG_MASK (SCOUTFS_RADIX_LG_BITS - 1)
|
||||
|
||||
/* round block bits down to a multiple of large ranges */
|
||||
#define SCOUTFS_RADIX_BITS \
|
||||
(((SCOUTFS_BLOCK_SIZE - \
|
||||
offsetof(struct scoutfs_radix_block, bits[0])) * 8) & \
|
||||
~(__u64)SCOUTFS_RADIX_LG_MASK)
|
||||
#define SCOUTFS_RADIX_BITS_BYTES (SCOUTFS_RADIX_BITS / 8)
|
||||
|
||||
/*
|
||||
* The btree still uses memcmp() to compare keys. We should fix that
|
||||
* before too long.
|
||||
*/
|
||||
struct scoutfs_key_be {
|
||||
__u8 sk_zone;
|
||||
__be64 _sk_first;
|
||||
__u8 sk_type;
|
||||
__be64 _sk_second;
|
||||
__be64 _sk_third;
|
||||
__u8 _sk_fourth;
|
||||
}__packed;
|
||||
|
||||
/* chose reasonable max key lens that have room for some u64s */
|
||||
#define SCOUTFS_BTREE_MAX_KEY_LEN 40
|
||||
/* when we split we want to have multiple items on each side */
|
||||
#define SCOUTFS_BTREE_MAX_VAL_LEN (SCOUTFS_BLOCK_SIZE / 8)
|
||||
|
||||
/*
|
||||
* The min number of free bytes we must leave in a parent as we descend
|
||||
* to modify. This leaves enough free bytes to insert a possibly maximal
|
||||
* sized key as a seperator for a child block. Fewer bytes then this
|
||||
* and split/merge might try to insert a max child item in the parent
|
||||
* that wouldn't fit.
|
||||
*/
|
||||
#define SCOUTFS_BTREE_PARENT_MIN_FREE_BYTES \
|
||||
(sizeof(struct scoutfs_btree_item_header) + \
|
||||
sizeof(struct scoutfs_btree_item) + SCOUTFS_BTREE_MAX_KEY_LEN +\
|
||||
sizeof(struct scoutfs_btree_ref))
|
||||
|
||||
/*
|
||||
* When debugging we can tune the splitting and merging thresholds to
|
||||
* create much larger trees by having blocks with many fewer items. We
|
||||
* implement this by pretending the blocks are tiny. They're still
|
||||
* large enough for a handful of items.
|
||||
*/
|
||||
#define SCOUTFS_BTREE_TINY_BLOCK_SIZE 512
|
||||
#define SCOUTFS_BTREE_MAX_VAL_LEN 896
|
||||
|
||||
/*
|
||||
* A 4EB test image measured a worst case height of 17. This is plenty
|
||||
@@ -217,7 +200,7 @@ struct scoutfs_key_be {
|
||||
struct scoutfs_btree_ref {
|
||||
__le64 blkno;
|
||||
__le64 seq;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/*
|
||||
* A height of X means that the first block read will have level X-1 and
|
||||
@@ -226,91 +209,230 @@ struct scoutfs_btree_ref {
|
||||
struct scoutfs_btree_root {
|
||||
struct scoutfs_btree_ref ref;
|
||||
__u8 height;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_btree_item_header {
|
||||
__le32 off;
|
||||
} __packed;
|
||||
__u8 __pad[7];
|
||||
};
|
||||
|
||||
struct scoutfs_btree_item {
|
||||
__le16 key_len;
|
||||
struct scoutfs_avl_node node;
|
||||
struct scoutfs_key key;
|
||||
__le16 val_off;
|
||||
__le16 val_len;
|
||||
__u8 data[0];
|
||||
} __packed;
|
||||
__u8 __pad[4];
|
||||
};
|
||||
|
||||
struct scoutfs_btree_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le32 free_end;
|
||||
__le32 nr_items;
|
||||
struct scoutfs_avl_root item_root;
|
||||
__le16 nr_items;
|
||||
__le16 total_item_bytes;
|
||||
__le16 mid_free_len;
|
||||
__u8 level;
|
||||
struct scoutfs_btree_item_header item_hdrs[0];
|
||||
} __packed;
|
||||
__u8 __pad[7];
|
||||
struct scoutfs_btree_item items[0];
|
||||
/* leaf blocks have a fixed size item offset hash table at the end */
|
||||
};
|
||||
|
||||
#define SCOUTFS_BTREE_VALUE_ALIGN 8
|
||||
|
||||
/*
|
||||
* The lock server keeps a persistent record of connected clients so that
|
||||
* server failover knows who to wait for before resuming operations.
|
||||
* Try to aim for a 75% load in a leaf full of items with no value.
|
||||
* We'll almost never see this because most items have values and most
|
||||
* blocks aren't full.
|
||||
*/
|
||||
struct scoutfs_lock_client_btree_key {
|
||||
__be64 rid;
|
||||
} __packed;
|
||||
#define SCOUTFS_BTREE_LEAF_ITEM_HASH_NR_UNALIGNED \
|
||||
((SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_btree_block)) / \
|
||||
(sizeof(struct scoutfs_btree_item) + (sizeof(__le16))) * 100 / 75)
|
||||
#define SCOUTFS_BTREE_LEAF_ITEM_HASH_NR \
|
||||
(round_up(SCOUTFS_BTREE_LEAF_ITEM_HASH_NR_UNALIGNED, \
|
||||
SCOUTFS_BTREE_VALUE_ALIGN))
|
||||
#define SCOUTFS_BTREE_LEAF_ITEM_HASH_BYTES \
|
||||
(SCOUTFS_BTREE_LEAF_ITEM_HASH_NR * sizeof(__le16))
|
||||
|
||||
struct scoutfs_alloc_list_ref {
|
||||
__le64 blkno;
|
||||
__le64 seq;
|
||||
};
|
||||
|
||||
/*
|
||||
* The server tracks transaction sequence numbers that clients have
|
||||
* open. This limits results that can be returned from the seq indices.
|
||||
* first_nr tracks the nr of the first block in the list and is used for
|
||||
* allocation sizing. total_nr is the sum of the nr of all the blocks in
|
||||
* the list and is used for calculating total free block counts.
|
||||
*/
|
||||
struct scoutfs_trans_seq_btree_key {
|
||||
__be64 trans_seq;
|
||||
__be64 rid;
|
||||
} __packed;
|
||||
struct scoutfs_alloc_list_head {
|
||||
struct scoutfs_alloc_list_ref ref;
|
||||
__le64 total_nr;
|
||||
__le32 first_nr;
|
||||
__u8 __pad[4];
|
||||
};
|
||||
|
||||
/*
|
||||
* The server keeps a persistent record of mounted clients.
|
||||
* While the main allocator uses extent items in btree blocks, metadata
|
||||
* allocations for a single transaction are recorded in arrays in
|
||||
* blocks. This limits the number of allocations and frees needed to
|
||||
* cow and modify the structure. The blocks can be stored in a list
|
||||
* which lets us create a persistent log of pending frees that are
|
||||
* generated as we cow btree blocks to insert freed extents.
|
||||
*
|
||||
* The array floats in the block so that both adding and removing blknos
|
||||
* only modifies an index.
|
||||
*/
|
||||
struct scoutfs_mounted_client_btree_key {
|
||||
__be64 rid;
|
||||
} __packed;
|
||||
struct scoutfs_alloc_list_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
struct scoutfs_alloc_list_ref next;
|
||||
__le32 start;
|
||||
__le32 nr;
|
||||
__le64 blknos[0]; /* naturally aligned for sorting */
|
||||
};
|
||||
|
||||
#define SCOUTFS_ALLOC_LIST_MAX_BLOCKS \
|
||||
((SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_alloc_list_block)) / \
|
||||
(member_sizeof(struct scoutfs_alloc_list_block, blknos[0])))
|
||||
|
||||
/*
|
||||
* These can safely be initialized to all-zeros.
|
||||
*/
|
||||
struct scoutfs_alloc_root {
|
||||
__le64 total_len;
|
||||
struct scoutfs_btree_root root;
|
||||
};
|
||||
|
||||
/* types of allocators, exposed to alloc_detail ioctl */
|
||||
#define SCOUTFS_ALLOC_OWNER_NONE 0
|
||||
#define SCOUTFS_ALLOC_OWNER_SERVER 1
|
||||
#define SCOUTFS_ALLOC_OWNER_MOUNT 2
|
||||
#define SCOUTFS_ALLOC_OWNER_SRCH 3
|
||||
|
||||
struct scoutfs_mounted_client_btree_val {
|
||||
__u8 flags;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
#define SCOUTFS_MOUNTED_CLIENT_VOTER (1 << 0)
|
||||
|
||||
/*
|
||||
* srch files are a contiguous run of blocks with compressed entries
|
||||
* described by a dense parent radix. The files can be stored in
|
||||
* log_tree items when the files contain unsorted entries written by
|
||||
* mounts during their transactions. Sorted files of increasing size
|
||||
* are kept in a btree off the super for searching and further
|
||||
* compacting.
|
||||
*/
|
||||
struct scoutfs_srch_entry {
|
||||
__le64 hash;
|
||||
__le64 ino;
|
||||
__le64 id;
|
||||
};
|
||||
|
||||
#define SCOUTFS_SRCH_ENTRY_MAX_BYTES (2 + (sizeof(__u64) * 3))
|
||||
|
||||
struct scoutfs_srch_ref {
|
||||
__le64 blkno;
|
||||
__le64 seq;
|
||||
};
|
||||
|
||||
struct scoutfs_srch_file {
|
||||
struct scoutfs_srch_entry first;
|
||||
struct scoutfs_srch_entry last;
|
||||
struct scoutfs_srch_ref ref;
|
||||
__le64 blocks;
|
||||
__le64 entries;
|
||||
__u8 height;
|
||||
__u8 __pad[7];
|
||||
};
|
||||
|
||||
struct scoutfs_srch_parent {
|
||||
struct scoutfs_block_header hdr;
|
||||
struct scoutfs_srch_ref refs[0];
|
||||
};
|
||||
|
||||
#define SCOUTFS_SRCH_PARENT_REFS \
|
||||
((SCOUTFS_BLOCK_LG_SIZE - \
|
||||
offsetof(struct scoutfs_srch_parent, refs)) / \
|
||||
sizeof(struct scoutfs_srch_ref))
|
||||
|
||||
struct scoutfs_srch_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
struct scoutfs_srch_entry first;
|
||||
struct scoutfs_srch_entry last;
|
||||
struct scoutfs_srch_entry tail;
|
||||
__le32 entry_nr;
|
||||
__le32 entry_bytes;
|
||||
__u8 entries[0];
|
||||
};
|
||||
|
||||
/*
|
||||
* Decoding loads final small deltas with full __u64 loads. Rather than
|
||||
* check the size before each load we stop coding entries past the point
|
||||
* where a full size entry could overflow the block. A final entry can
|
||||
* start at this byte count and consume the rest of the block, though
|
||||
* its unlikely.
|
||||
*/
|
||||
#define SCOUTFS_SRCH_BLOCK_SAFE_BYTES \
|
||||
(SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_srch_block) - \
|
||||
SCOUTFS_SRCH_ENTRY_MAX_BYTES)
|
||||
|
||||
#define SCOUTFS_SRCH_LOG_BLOCK_LIMIT (1024 * 1024 / SCOUTFS_BLOCK_LG_SIZE)
|
||||
#define SCOUTFS_SRCH_COMPACT_ORDER 2
|
||||
#define SCOUTFS_SRCH_COMPACT_NR (1 << SCOUTFS_SRCH_COMPACT_ORDER)
|
||||
|
||||
/*
|
||||
* A persistent record of a srch file compaction operation in progress.
|
||||
*
|
||||
* When compacting log files blk and pos aren't used. When compacting
|
||||
* sorted files blk is the logical block number and pos is the byte
|
||||
* offset of the next entry. When deleting files pos is the height of
|
||||
* the level that we're deleting, and blk is the logical block offset of
|
||||
* the next parent ref array index to descend through.
|
||||
*/
|
||||
struct scoutfs_srch_compact {
|
||||
struct scoutfs_alloc_list_head meta_avail;
|
||||
struct scoutfs_alloc_list_head meta_freed;
|
||||
__le64 id;
|
||||
__u8 nr;
|
||||
__u8 flags;
|
||||
__u8 __pad[6];
|
||||
struct scoutfs_srch_file out;
|
||||
struct scoutfs_srch_compact_input {
|
||||
struct scoutfs_srch_file sfl;
|
||||
__le64 blk;
|
||||
__le64 pos;
|
||||
} in[SCOUTFS_SRCH_COMPACT_NR];
|
||||
};
|
||||
|
||||
/* server -> client: combine input log file entries into output file */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_LOG (1 << 0)
|
||||
/* server -> client: combine input sorted file entries into output file */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_SORTED (1 << 1)
|
||||
/* server -> client: delete input files */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_DELETE (1 << 2)
|
||||
/* client -> server: compaction phase (LOG,SORTED,DELETE) done */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_DONE (1 << 4)
|
||||
/* client -> server: compaction failed */
|
||||
#define SCOUTFS_SRCH_COMPACT_FLAG_ERROR (1 << 5)
|
||||
|
||||
/*
|
||||
* XXX I imagine we should rename these now that they've evolved to track
|
||||
* all the btrees that clients use during a transaction. It's not just
|
||||
* about item logs, it's about clients making changes to trees.
|
||||
*/
|
||||
struct scoutfs_log_trees {
|
||||
struct scoutfs_radix_root meta_avail;
|
||||
struct scoutfs_radix_root meta_freed;
|
||||
struct scoutfs_alloc_list_head meta_avail;
|
||||
struct scoutfs_alloc_list_head meta_freed;
|
||||
struct scoutfs_btree_root item_root;
|
||||
struct scoutfs_btree_ref bloom_ref;
|
||||
struct scoutfs_radix_root data_avail;
|
||||
struct scoutfs_radix_root data_freed;
|
||||
struct scoutfs_alloc_root data_avail;
|
||||
struct scoutfs_alloc_root data_freed;
|
||||
struct scoutfs_srch_file srch_file;
|
||||
__le64 max_item_vers;
|
||||
__le64 rid;
|
||||
__le64 nr;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_log_trees_key {
|
||||
__be64 rid;
|
||||
__be64 nr;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_log_trees_val {
|
||||
struct scoutfs_radix_root meta_avail;
|
||||
struct scoutfs_radix_root meta_freed;
|
||||
struct scoutfs_btree_root item_root;
|
||||
struct scoutfs_btree_ref bloom_ref;
|
||||
struct scoutfs_radix_root data_avail;
|
||||
struct scoutfs_radix_root data_freed;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct scoutfs_log_item_value {
|
||||
__le64 vers;
|
||||
__u8 flags;
|
||||
__u8 __pad[7];
|
||||
__u8 data[0];
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/*
|
||||
* FS items are limited by the max btree value length with the log item
|
||||
@@ -325,7 +447,7 @@ struct scoutfs_bloom_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le64 total_set;
|
||||
__le64 bits[0];
|
||||
} __packed;
|
||||
};
|
||||
|
||||
/*
|
||||
* Item log trees are accompanied by a block of bits that make up a
|
||||
@@ -334,30 +456,33 @@ struct scoutfs_bloom_block {
|
||||
* before the bloom filters fill up and start returning excessive false
|
||||
* positives.
|
||||
*/
|
||||
#define SCOUTFS_FOREST_BLOOM_NRS 7
|
||||
#define SCOUTFS_FOREST_BLOOM_NRS 3
|
||||
#define SCOUTFS_FOREST_BLOOM_BITS \
|
||||
(((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_bloom_block)) / \
|
||||
member_sizeof(struct scoutfs_bloom_block, bits[0])) * \
|
||||
member_sizeof(struct scoutfs_bloom_block, bits[0]) * 8) \
|
||||
(((SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_bloom_block)) / \
|
||||
member_sizeof(struct scoutfs_bloom_block, bits[0])) * \
|
||||
member_sizeof(struct scoutfs_bloom_block, bits[0]) * 8)
|
||||
#define SCOUTFS_FOREST_BLOOM_FUNC_BITS (SCOUTFS_BLOCK_LG_SHIFT + 3)
|
||||
|
||||
/*
|
||||
* Keys are first sorted by major key zones.
|
||||
*/
|
||||
#define SCOUTFS_INODE_INDEX_ZONE 1
|
||||
#define SCOUTFS_XATTR_INDEX_ZONE 2
|
||||
#define SCOUTFS_RID_ZONE 3
|
||||
#define SCOUTFS_FS_ZONE 4
|
||||
#define SCOUTFS_LOCK_ZONE 5
|
||||
#define SCOUTFS_MAX_ZONE 8 /* power of 2 is efficient */
|
||||
#define SCOUTFS_RID_ZONE 2
|
||||
#define SCOUTFS_FS_ZONE 3
|
||||
#define SCOUTFS_LOCK_ZONE 4
|
||||
/* Items only stored in server btrees */
|
||||
#define SCOUTFS_LOG_TREES_ZONE 6
|
||||
#define SCOUTFS_LOCK_CLIENTS_ZONE 7
|
||||
#define SCOUTFS_TRANS_SEQ_ZONE 8
|
||||
#define SCOUTFS_MOUNTED_CLIENT_ZONE 9
|
||||
#define SCOUTFS_SRCH_ZONE 10
|
||||
#define SCOUTFS_FREE_EXTENT_ZONE 11
|
||||
|
||||
/* inode index zone */
|
||||
#define SCOUTFS_INODE_INDEX_META_SEQ_TYPE 1
|
||||
#define SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE 2
|
||||
#define SCOUTFS_INODE_INDEX_NR 3 /* don't forget to update */
|
||||
|
||||
/* xattr index zone */
|
||||
#define SCOUTFS_XATTR_INDEX_NAME_TYPE 1
|
||||
|
||||
/* rid zone (also used in server alloc btree) */
|
||||
#define SCOUTFS_ORPHAN_TYPE 1
|
||||
|
||||
@@ -368,44 +493,27 @@ struct scoutfs_bloom_block {
|
||||
#define SCOUTFS_READDIR_TYPE 4
|
||||
#define SCOUTFS_LINK_BACKREF_TYPE 5
|
||||
#define SCOUTFS_SYMLINK_TYPE 6
|
||||
#define SCOUTFS_PACKED_EXTENT_TYPE 7
|
||||
#define SCOUTFS_DATA_EXTENT_TYPE 7
|
||||
|
||||
/* lock zone, only ever found in lock ranges, never in persistent items */
|
||||
#define SCOUTFS_RENAME_TYPE 1
|
||||
|
||||
#define SCOUTFS_MAX_TYPE 8 /* power of 2 is efficient */
|
||||
/* srch zone, only in server btrees */
|
||||
#define SCOUTFS_SRCH_LOG_TYPE 1
|
||||
#define SCOUTFS_SRCH_BLOCKS_TYPE 2
|
||||
#define SCOUTFS_SRCH_PENDING_TYPE 3
|
||||
#define SCOUTFS_SRCH_BUSY_TYPE 4
|
||||
|
||||
/* free extents in allocator btrees in client and server, by blkno or len */
|
||||
#define SCOUTFS_FREE_EXTENT_BLKNO_TYPE 1
|
||||
#define SCOUTFS_FREE_EXTENT_LEN_TYPE 2
|
||||
|
||||
/*
|
||||
* The extents that map blocks in a fixed-size logical region of a file
|
||||
* are packed and stored in item values. The packed extents are
|
||||
* contiguous so the starting logical block is implicit from the length
|
||||
* of previous extents. Sparse regions are represented by 0 flags and
|
||||
* blkno. The blkno of a packed extent is encoded as the zigzag (lsb is
|
||||
* sign bit) difference from the last blkno of the previous extent.
|
||||
* This guarantees that non-sparse extents must have a blkno delta of at
|
||||
* least -1/1. High zero byte aren't stored.
|
||||
*/
|
||||
struct scoutfs_packed_extent {
|
||||
__le16 count;
|
||||
#if defined(__LITTLE_ENDIAN_BITFIELD)
|
||||
__u8 diff_bytes:4,
|
||||
flags:3,
|
||||
final:1;
|
||||
#elif defined(__BIG_ENDIAN_BITFIELD)
|
||||
__u8 final:1,
|
||||
flags:3,
|
||||
diff_bytes:4;
|
||||
#else
|
||||
#error "no {BIG,LITTLE}_ENDIAN_BITFIELD defined?"
|
||||
#endif
|
||||
__u8 le_blkno_diff[0];
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_PACKEXT_BLOCKS (8 * 1024 * 1024 / SCOUTFS_BLOCK_SIZE)
|
||||
#define SCOUTFS_PACKEXT_BASE_SHIFT (ilog2(SCOUTFS_PACKEXT_BLOCKS))
|
||||
#define SCOUTFS_PACKEXT_BASE_MASK (~((__u64)SCOUTFS_PACKEXT_BLOCKS - 1))
|
||||
#define SCOUTFS_PACKEXT_MAX_BYTES SCOUTFS_MAX_VAL_SIZE
|
||||
/* file data extents have start and len in key */
|
||||
struct scoutfs_data_extent_val {
|
||||
__le64 blkno;
|
||||
__u8 flags;
|
||||
__u8 __pad[7];
|
||||
};
|
||||
|
||||
#define SEF_OFFLINE (1 << 0)
|
||||
#define SEF_UNWRITTEN (1 << 1)
|
||||
@@ -417,10 +525,11 @@ struct scoutfs_packed_extent {
|
||||
* part item and overflow into the values of the rest of the part items.
|
||||
*/
|
||||
struct scoutfs_xattr {
|
||||
__u8 name_len;
|
||||
__le16 val_len;
|
||||
__u8 name_len;
|
||||
__u8 __pad[5];
|
||||
__u8 name[0];
|
||||
} __packed;
|
||||
};
|
||||
|
||||
|
||||
/* XXX does this exist upstream somewhere? */
|
||||
@@ -460,47 +569,51 @@ struct scoutfs_quorum_block {
|
||||
__le64 vote_for_rid;
|
||||
__le32 crc;
|
||||
__u8 log_nr;
|
||||
__u8 __pad[3];
|
||||
struct scoutfs_quorum_log {
|
||||
__le64 term;
|
||||
__le64 rid;
|
||||
struct scoutfs_inet_addr addr;
|
||||
} __packed log[0];
|
||||
} __packed;
|
||||
} log[0];
|
||||
};
|
||||
|
||||
#define SCOUTFS_QUORUM_LOG_MAX \
|
||||
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_quorum_block)) / \
|
||||
#define SCOUTFS_QUORUM_LOG_MAX \
|
||||
((SCOUTFS_BLOCK_SM_SIZE - sizeof(struct scoutfs_quorum_block)) / \
|
||||
sizeof(struct scoutfs_quorum_log))
|
||||
|
||||
#define SCOUTFS_FLAG_IS_META_BDEV 0x01
|
||||
|
||||
struct scoutfs_super_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le64 id;
|
||||
__le64 format_hash;
|
||||
__le64 version;
|
||||
__le64 flags;
|
||||
__u8 uuid[SCOUTFS_UUID_BYTES];
|
||||
__le64 next_ino;
|
||||
__le64 next_trans_seq;
|
||||
__le64 total_meta_blocks; /* both static and dynamic */
|
||||
__le64 first_meta_blkno; /* first dynamically allocated */
|
||||
__le64 last_meta_blkno;
|
||||
__le64 free_meta_blocks;
|
||||
__le64 total_data_blocks;
|
||||
__le64 first_data_blkno;
|
||||
__le64 last_data_blkno;
|
||||
__le64 free_data_blocks;
|
||||
__le64 quorum_fenced_term;
|
||||
__le64 quorum_server_term;
|
||||
__le64 unmount_barrier;
|
||||
__u8 quorum_count;
|
||||
__u8 __pad[7];
|
||||
struct scoutfs_inet_addr server_addr;
|
||||
struct scoutfs_radix_root core_meta_avail;
|
||||
struct scoutfs_radix_root core_meta_freed;
|
||||
struct scoutfs_radix_root core_data_avail;
|
||||
struct scoutfs_radix_root core_data_freed;
|
||||
struct scoutfs_alloc_root meta_alloc[2];
|
||||
struct scoutfs_alloc_root data_alloc;
|
||||
struct scoutfs_alloc_list_head server_meta_avail[2];
|
||||
struct scoutfs_alloc_list_head server_meta_freed[2];
|
||||
struct scoutfs_btree_root fs_root;
|
||||
struct scoutfs_btree_root logs_root;
|
||||
struct scoutfs_btree_root lock_clients;
|
||||
struct scoutfs_btree_root trans_seqs;
|
||||
struct scoutfs_btree_root mounted_clients;
|
||||
} __packed;
|
||||
struct scoutfs_btree_root srch_root;
|
||||
};
|
||||
|
||||
#define SCOUTFS_ROOT_INO 1
|
||||
|
||||
@@ -549,7 +662,7 @@ struct scoutfs_inode {
|
||||
struct scoutfs_timespec atime;
|
||||
struct scoutfs_timespec ctime;
|
||||
struct scoutfs_timespec mtime;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
#define SCOUTFS_INO_FLAG_TRUNCATE 0x1
|
||||
|
||||
@@ -571,8 +684,9 @@ struct scoutfs_dirent {
|
||||
__le64 hash;
|
||||
__le64 pos;
|
||||
__u8 type;
|
||||
__u8 __pad[7];
|
||||
__u8 name[0];
|
||||
} __packed;
|
||||
};
|
||||
|
||||
#define SCOUTFS_NAME_LEN 255
|
||||
|
||||
@@ -584,7 +698,7 @@ struct scoutfs_dirent {
|
||||
/* getdents returns next pos with an entry, no entry at (f_pos)~0 */
|
||||
#define SCOUTFS_DIRENT_LAST_POS (U64_MAX - 1)
|
||||
|
||||
enum {
|
||||
enum scoutfs_dentry_type {
|
||||
SCOUTFS_DT_FIFO = 0,
|
||||
SCOUTFS_DT_CHR,
|
||||
SCOUTFS_DT_DIR,
|
||||
@@ -635,12 +749,12 @@ enum {
|
||||
*/
|
||||
struct scoutfs_net_greeting {
|
||||
__le64 fsid;
|
||||
__le64 format_hash;
|
||||
__le64 version;
|
||||
__le64 server_term;
|
||||
__le64 unmount_barrier;
|
||||
__le64 rid;
|
||||
__le64 flags;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
#define SCOUTFS_NET_GREETING_FLAG_FAREWELL (1 << 0)
|
||||
#define SCOUTFS_NET_GREETING_FLAG_VOTER (1 << 1)
|
||||
@@ -675,22 +789,25 @@ struct scoutfs_net_header {
|
||||
__u8 cmd;
|
||||
__u8 flags;
|
||||
__u8 error;
|
||||
__u8 __pad[3];
|
||||
__u8 data[0];
|
||||
} __packed;
|
||||
};
|
||||
|
||||
#define SCOUTFS_NET_FLAG_RESPONSE (1 << 0)
|
||||
#define SCOUTFS_NET_FLAGS_UNKNOWN (U8_MAX << 1)
|
||||
|
||||
enum {
|
||||
enum scoutfs_net_cmd {
|
||||
SCOUTFS_NET_CMD_GREETING = 0,
|
||||
SCOUTFS_NET_CMD_ALLOC_INODES,
|
||||
SCOUTFS_NET_CMD_GET_LOG_TREES,
|
||||
SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
|
||||
SCOUTFS_NET_CMD_GET_ROOTS,
|
||||
SCOUTFS_NET_CMD_ADVANCE_SEQ,
|
||||
SCOUTFS_NET_CMD_GET_LAST_SEQ,
|
||||
SCOUTFS_NET_CMD_STATFS,
|
||||
SCOUTFS_NET_CMD_LOCK,
|
||||
SCOUTFS_NET_CMD_LOCK_RECOVER,
|
||||
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
|
||||
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
|
||||
SCOUTFS_NET_CMD_FAREWELL,
|
||||
SCOUTFS_NET_CMD_UNKNOWN,
|
||||
};
|
||||
@@ -709,7 +826,7 @@ enum {
|
||||
|
||||
#undef EXPAND_NET_ERRNO
|
||||
#define EXPAND_NET_ERRNO(which) SCOUTFS_NET_ERR_##which,
|
||||
enum {
|
||||
enum scoutfs_net_errors {
|
||||
SCOUTFS_NET_ERR_NONE = 0,
|
||||
EXPAND_EACH_NET_ERRNO
|
||||
SCOUTFS_NET_ERR_UNKNOWN,
|
||||
@@ -725,33 +842,39 @@ enum {
|
||||
struct scoutfs_net_inode_alloc {
|
||||
__le64 ino;
|
||||
__le64 nr;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
struct scoutfs_net_statfs {
|
||||
__le64 total_blocks; /* total blocks in device */
|
||||
__le64 next_ino; /* next unused inode number */
|
||||
__le64 bfree; /* free blocks */
|
||||
__u8 uuid[SCOUTFS_UUID_BYTES]; /* logical volume uuid */
|
||||
} __packed;
|
||||
struct scoutfs_net_roots {
|
||||
struct scoutfs_btree_root fs_root;
|
||||
struct scoutfs_btree_root logs_root;
|
||||
struct scoutfs_btree_root srch_root;
|
||||
};
|
||||
|
||||
struct scoutfs_net_lock {
|
||||
struct scoutfs_key key;
|
||||
__le64 write_version;
|
||||
__u8 old_mode;
|
||||
__u8 new_mode;
|
||||
} __packed;
|
||||
__u8 __pad[6];
|
||||
};
|
||||
|
||||
struct scoutfs_net_lock_grant_response {
|
||||
struct scoutfs_net_lock nl;
|
||||
struct scoutfs_net_roots roots;
|
||||
};
|
||||
|
||||
struct scoutfs_net_lock_recover {
|
||||
__le16 nr;
|
||||
__u8 __pad[6];
|
||||
struct scoutfs_net_lock locks[0];
|
||||
} __packed;
|
||||
};
|
||||
|
||||
#define SCOUTFS_NET_LOCK_MAX_RECOVER_NR \
|
||||
((SCOUTFS_NET_MAX_DATA_LEN - sizeof(struct scoutfs_net_lock_recover)) /\
|
||||
sizeof(struct scoutfs_net_lock))
|
||||
|
||||
/* some enums for tracing */
|
||||
enum {
|
||||
enum scoutfs_lock_trace {
|
||||
SLT_CLIENT,
|
||||
SLT_SERVER,
|
||||
SLT_GRANT,
|
||||
@@ -772,7 +895,7 @@ enum {
|
||||
*
|
||||
* The null mode provides no access and is used to destroy locks.
|
||||
*/
|
||||
enum {
|
||||
enum scoutfs_lock_mode {
|
||||
SCOUTFS_LOCK_NULL = 0,
|
||||
SCOUTFS_LOCK_READ,
|
||||
SCOUTFS_LOCK_WRITE,
|
||||
@@ -787,7 +910,7 @@ enum {
|
||||
struct scoutfs_fid {
|
||||
__le64 ino;
|
||||
__le64 parent_ino;
|
||||
} __packed;
|
||||
};
|
||||
|
||||
#define FILEID_SCOUTFS 0x81
|
||||
#define FILEID_SCOUTFS_WITH_PARENT 0x82
|
||||
@@ -795,7 +918,7 @@ struct scoutfs_fid {
|
||||
/*
|
||||
* Identifiers for sources of corruption that can generate messages.
|
||||
*/
|
||||
enum {
|
||||
enum scoutfs_corruption_sources {
|
||||
SC_DIRENT_NAME_LEN = 0,
|
||||
SC_DIRENT_BACKREF_NAME_LEN,
|
||||
SC_DIRENT_READDIR_NAME_LEN,
|
||||
|
||||
@@ -1,15 +1,49 @@
|
||||
#ifndef _SCOUTFS_HASH_H_
|
||||
#define _SCOUTFS_HASH_H_
|
||||
|
||||
#include <linux/crc32c.h>
|
||||
/*
|
||||
* We're using FNV1a for now. It's fine. Ish.
|
||||
*
|
||||
* The longer term plan is xxh3 but it looks like it'll take just a bit
|
||||
* more time to be declared stable and then it needs to be ported to the
|
||||
* kernel.
|
||||
*
|
||||
* - https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
|
||||
* - https://github.com/Cyan4973/xxHash/releases/tag/v0.7.4
|
||||
*/
|
||||
|
||||
static inline u32 fnv1a32(const void *data, unsigned int len)
|
||||
{
|
||||
u32 hash = 0x811c9dc5;
|
||||
|
||||
while (len--) {
|
||||
hash ^= *(u8 *)(data++);
|
||||
hash *= 0x01000193;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
static inline u64 fnv1a64(const void *data, unsigned int len)
|
||||
{
|
||||
u64 hash = 0xcbf29ce484222325ULL;
|
||||
|
||||
while (len--) {
|
||||
hash ^= *(u8 *)(data++);
|
||||
hash *= 0x100000001b3ULL;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
static inline u32 scoutfs_hash32(const void *data, unsigned int len)
|
||||
{
|
||||
return fnv1a32(data, len);
|
||||
}
|
||||
|
||||
/* XXX replace with xxhash */
|
||||
static inline u64 scoutfs_hash64(const void *data, unsigned int len)
|
||||
{
|
||||
unsigned int half = (len + 1) / 2;
|
||||
|
||||
return crc32c(~0, data, half) |
|
||||
((u64)crc32c(~0, data + len - half, half) << 32);
|
||||
return fnv1a64(data, len);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
199
kmod/src/inode.c
199
kmod/src/inode.c
@@ -30,8 +30,7 @@
|
||||
#include "xattr.h"
|
||||
#include "trans.h"
|
||||
#include "msg.h"
|
||||
#include "kvec.h"
|
||||
#include "forest.h"
|
||||
#include "item.h"
|
||||
#include "client.h"
|
||||
#include "cmp.h"
|
||||
|
||||
@@ -47,9 +46,17 @@
|
||||
* - describe data locking size problems
|
||||
*/
|
||||
|
||||
struct inode_allocator {
|
||||
spinlock_t lock;
|
||||
u64 ino;
|
||||
u64 nr;
|
||||
};
|
||||
|
||||
struct inode_sb_info {
|
||||
spinlock_t writeback_lock;
|
||||
struct rb_root writeback_inodes;
|
||||
struct inode_allocator dir_ino_alloc;
|
||||
struct inode_allocator ino_alloc;
|
||||
};
|
||||
|
||||
#define DECLARE_INODE_SB_INFO(sb, name) \
|
||||
@@ -64,30 +71,30 @@ static struct kmem_cache *scoutfs_inode_cachep;
|
||||
*/
|
||||
static void scoutfs_inode_ctor(void *obj)
|
||||
{
|
||||
struct scoutfs_inode_info *ci = obj;
|
||||
struct scoutfs_inode_info *si = obj;
|
||||
|
||||
mutex_init(&ci->item_mutex);
|
||||
seqcount_init(&ci->seqcount);
|
||||
ci->staging = false;
|
||||
scoutfs_per_task_init(&ci->pt_data_lock);
|
||||
atomic64_set(&ci->data_waitq.changed, 0);
|
||||
init_waitqueue_head(&ci->data_waitq.waitq);
|
||||
init_rwsem(&ci->xattr_rwsem);
|
||||
RB_CLEAR_NODE(&ci->writeback_node);
|
||||
spin_lock_init(&ci->ino_alloc.lock);
|
||||
init_rwsem(&si->extent_sem);
|
||||
mutex_init(&si->item_mutex);
|
||||
seqcount_init(&si->seqcount);
|
||||
si->staging = false;
|
||||
scoutfs_per_task_init(&si->pt_data_lock);
|
||||
atomic64_set(&si->data_waitq.changed, 0);
|
||||
init_waitqueue_head(&si->data_waitq.waitq);
|
||||
init_rwsem(&si->xattr_rwsem);
|
||||
RB_CLEAR_NODE(&si->writeback_node);
|
||||
|
||||
inode_init_once(&ci->inode);
|
||||
inode_init_once(&si->inode);
|
||||
}
|
||||
|
||||
struct inode *scoutfs_alloc_inode(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_inode_info *ci;
|
||||
struct scoutfs_inode_info *si;
|
||||
|
||||
ci = kmem_cache_alloc(scoutfs_inode_cachep, GFP_NOFS);
|
||||
if (!ci)
|
||||
si = kmem_cache_alloc(scoutfs_inode_cachep, GFP_NOFS);
|
||||
if (!si)
|
||||
return NULL;
|
||||
|
||||
return &ci->inode;
|
||||
return &si->inode;
|
||||
}
|
||||
|
||||
static void scoutfs_i_callback(struct rcu_head *head)
|
||||
@@ -215,7 +222,7 @@ static void set_item_info(struct scoutfs_inode_info *si,
|
||||
|
||||
static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
{
|
||||
struct scoutfs_inode_info *ci = SCOUTFS_I(inode);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
i_size_write(inode, le64_to_cpu(cinode->size));
|
||||
set_nlink(inode, le32_to_cpu(cinode->nlink));
|
||||
@@ -230,23 +237,23 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
inode->i_ctime.tv_sec = le64_to_cpu(cinode->ctime.sec);
|
||||
inode->i_ctime.tv_nsec = le32_to_cpu(cinode->ctime.nsec);
|
||||
|
||||
ci->meta_seq = le64_to_cpu(cinode->meta_seq);
|
||||
ci->data_seq = le64_to_cpu(cinode->data_seq);
|
||||
ci->data_version = le64_to_cpu(cinode->data_version);
|
||||
ci->online_blocks = le64_to_cpu(cinode->online_blocks);
|
||||
ci->offline_blocks = le64_to_cpu(cinode->offline_blocks);
|
||||
ci->next_readdir_pos = le64_to_cpu(cinode->next_readdir_pos);
|
||||
ci->next_xattr_id = le64_to_cpu(cinode->next_xattr_id);
|
||||
ci->flags = le32_to_cpu(cinode->flags);
|
||||
si->meta_seq = le64_to_cpu(cinode->meta_seq);
|
||||
si->data_seq = le64_to_cpu(cinode->data_seq);
|
||||
si->data_version = le64_to_cpu(cinode->data_version);
|
||||
si->online_blocks = le64_to_cpu(cinode->online_blocks);
|
||||
si->offline_blocks = le64_to_cpu(cinode->offline_blocks);
|
||||
si->next_readdir_pos = le64_to_cpu(cinode->next_readdir_pos);
|
||||
si->next_xattr_id = le64_to_cpu(cinode->next_xattr_id);
|
||||
si->flags = le32_to_cpu(cinode->flags);
|
||||
|
||||
/*
|
||||
* i_blocks is initialized from online and offline and is then
|
||||
* maintained as blocks come and go.
|
||||
*/
|
||||
inode->i_blocks = (ci->online_blocks + ci->offline_blocks)
|
||||
<< SCOUTFS_BLOCK_SECTOR_SHIFT;
|
||||
inode->i_blocks = (si->online_blocks + si->offline_blocks)
|
||||
<< SCOUTFS_BLOCK_SM_SECTOR_SHIFT;
|
||||
|
||||
set_item_info(ci, cinode);
|
||||
set_item_info(si, cinode);
|
||||
}
|
||||
|
||||
static void init_inode_key(struct scoutfs_key *key, u64 ino)
|
||||
@@ -276,7 +283,6 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock,
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_inode sinode;
|
||||
struct kvec val;
|
||||
const u64 refresh_gen = lock->refresh_gen;
|
||||
int ret;
|
||||
|
||||
@@ -292,11 +298,11 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock,
|
||||
return 0;
|
||||
|
||||
init_inode_key(&key, scoutfs_ino(inode));
|
||||
kvec_init(&val, &sinode, sizeof(sinode));
|
||||
|
||||
mutex_lock(&si->item_mutex);
|
||||
if (atomic64_read(&si->last_refreshed) < refresh_gen) {
|
||||
ret = scoutfs_forest_lookup_exact(sb, &key, &val, lock);
|
||||
ret = scoutfs_item_lookup_exact(sb, &key, &sinode,
|
||||
sizeof(sinode), lock);
|
||||
if (ret == 0) {
|
||||
load_inode(inode, &sinode);
|
||||
atomic64_set(&si->last_refreshed, refresh_gen);
|
||||
@@ -329,7 +335,7 @@ int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
|
||||
u64 new_size, bool truncate)
|
||||
{
|
||||
struct scoutfs_inode_info *ci = SCOUTFS_I(inode);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
LIST_HEAD(ind_locks);
|
||||
int ret;
|
||||
@@ -337,8 +343,7 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
|
||||
if (!S_ISREG(inode->i_mode))
|
||||
return 0;
|
||||
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true,
|
||||
SIC_DIRTY_INODE());
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
@@ -348,7 +353,7 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
|
||||
truncate_setsize(inode, new_size);
|
||||
inode->i_ctime = inode->i_mtime = CURRENT_TIME;
|
||||
if (truncate)
|
||||
ci->flags |= SCOUTFS_INO_FLAG_TRUNCATE;
|
||||
si->flags |= SCOUTFS_INO_FLAG_TRUNCATE;
|
||||
scoutfs_inode_set_data_seq(inode);
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
|
||||
@@ -360,17 +365,16 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
|
||||
|
||||
static int clear_truncate_flag(struct inode *inode, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_inode_info *ci = SCOUTFS_I(inode);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
LIST_HEAD(ind_locks);
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false,
|
||||
SIC_DIRTY_INODE());
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ci->flags &= ~SCOUTFS_INO_FLAG_TRUNCATE;
|
||||
si->flags &= ~SCOUTFS_INO_FLAG_TRUNCATE;
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
|
||||
scoutfs_release_trans(sb);
|
||||
@@ -381,16 +385,17 @@ static int clear_truncate_flag(struct inode *inode, struct scoutfs_lock *lock)
|
||||
|
||||
int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_inode_info *ci = SCOUTFS_I(inode);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
u64 start;
|
||||
int ret, err;
|
||||
|
||||
trace_scoutfs_complete_truncate(inode, ci->flags);
|
||||
trace_scoutfs_complete_truncate(inode, si->flags);
|
||||
|
||||
if (!(ci->flags & SCOUTFS_INO_FLAG_TRUNCATE))
|
||||
if (!(si->flags & SCOUTFS_INO_FLAG_TRUNCATE))
|
||||
return 0;
|
||||
|
||||
start = (i_size_read(inode) + SCOUTFS_BLOCK_SIZE - 1) >> SCOUTFS_BLOCK_SHIFT;
|
||||
start = (i_size_read(inode) + SCOUTFS_BLOCK_SM_SIZE - 1) >>
|
||||
SCOUTFS_BLOCK_SM_SHIFT;
|
||||
ret = scoutfs_data_truncate_items(inode->i_sb, inode,
|
||||
scoutfs_ino(inode), start, ~0ULL,
|
||||
false, lock);
|
||||
@@ -480,8 +485,7 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false,
|
||||
SIC_DIRTY_INODE());
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -573,7 +577,7 @@ void scoutfs_inode_add_onoff(struct inode *inode, s64 on, s64 off)
|
||||
si->online_blocks += on;
|
||||
si->offline_blocks += off;
|
||||
/* XXX not sure if this is right */
|
||||
inode->i_blocks += (on + off) * SCOUTFS_BLOCK_SECTORS;
|
||||
inode->i_blocks += (on + off) * SCOUTFS_BLOCK_SM_SECTORS;
|
||||
|
||||
trace_scoutfs_online_offline_blocks(inode, on, off,
|
||||
si->online_blocks,
|
||||
@@ -637,19 +641,19 @@ void scoutfs_inode_get_onoff(struct inode *inode, s64 *on, s64 *off)
|
||||
|
||||
static int scoutfs_iget_test(struct inode *inode, void *arg)
|
||||
{
|
||||
struct scoutfs_inode_info *ci = SCOUTFS_I(inode);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
u64 *ino = arg;
|
||||
|
||||
return ci->ino == *ino;
|
||||
return si->ino == *ino;
|
||||
}
|
||||
|
||||
static int scoutfs_iget_set(struct inode *inode, void *arg)
|
||||
{
|
||||
struct scoutfs_inode_info *ci = SCOUTFS_I(inode);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
u64 *ino = arg;
|
||||
|
||||
inode->i_ino = *ino;
|
||||
ci->ino = *ino;
|
||||
si->ino = *ino;
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -681,8 +685,6 @@ struct inode *scoutfs_iget(struct super_block *sb, u64 ino)
|
||||
/* XXX ensure refresh, instead clear in drop_inode? */
|
||||
si = SCOUTFS_I(inode);
|
||||
atomic64_set(&si->last_refreshed, 0);
|
||||
si->ino_alloc.ino = 0;
|
||||
si->ino_alloc.nr = 0;
|
||||
|
||||
ret = scoutfs_inode_refresh(inode, lock, 0);
|
||||
if (ret) {
|
||||
@@ -701,7 +703,7 @@ out:
|
||||
|
||||
static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
{
|
||||
struct scoutfs_inode_info *ci = SCOUTFS_I(inode);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
u64 online_blocks;
|
||||
u64 offline_blocks;
|
||||
|
||||
@@ -715,19 +717,22 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
cinode->rdev = cpu_to_le32(inode->i_rdev);
|
||||
cinode->atime.sec = cpu_to_le64(inode->i_atime.tv_sec);
|
||||
cinode->atime.nsec = cpu_to_le32(inode->i_atime.tv_nsec);
|
||||
memset(cinode->atime.__pad, 0, sizeof(cinode->atime.__pad));
|
||||
cinode->ctime.sec = cpu_to_le64(inode->i_ctime.tv_sec);
|
||||
cinode->ctime.nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
|
||||
memset(cinode->ctime.__pad, 0, sizeof(cinode->ctime.__pad));
|
||||
cinode->mtime.sec = cpu_to_le64(inode->i_mtime.tv_sec);
|
||||
cinode->mtime.nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
|
||||
memset(cinode->mtime.__pad, 0, sizeof(cinode->mtime.__pad));
|
||||
|
||||
cinode->meta_seq = cpu_to_le64(scoutfs_inode_meta_seq(inode));
|
||||
cinode->data_seq = cpu_to_le64(scoutfs_inode_data_seq(inode));
|
||||
cinode->data_version = cpu_to_le64(scoutfs_inode_data_version(inode));
|
||||
cinode->online_blocks = cpu_to_le64(online_blocks);
|
||||
cinode->offline_blocks = cpu_to_le64(offline_blocks);
|
||||
cinode->next_readdir_pos = cpu_to_le64(ci->next_readdir_pos);
|
||||
cinode->next_xattr_id = cpu_to_le64(ci->next_xattr_id);
|
||||
cinode->flags = cpu_to_le32(ci->flags);
|
||||
cinode->next_readdir_pos = cpu_to_le64(si->next_readdir_pos);
|
||||
cinode->next_xattr_id = cpu_to_le64(si->next_xattr_id);
|
||||
cinode->flags = cpu_to_le32(si->flags);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -753,15 +758,13 @@ int scoutfs_dirty_inode_item(struct inode *inode, struct scoutfs_lock *lock)
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
struct kvec val;
|
||||
int ret;
|
||||
|
||||
store_inode(&sinode, inode);
|
||||
kvec_init(&val, &sinode, sizeof(sinode));
|
||||
|
||||
init_inode_key(&key, scoutfs_ino(inode));
|
||||
|
||||
ret = scoutfs_forest_update(sb, &key, &val, lock);
|
||||
ret = scoutfs_item_update(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
if (!ret)
|
||||
trace_scoutfs_dirty_inode(inode);
|
||||
return ret;
|
||||
@@ -893,7 +896,7 @@ static int update_index_items(struct super_block *sb,
|
||||
scoutfs_inode_init_index_key(&ins, type, major, minor, ino);
|
||||
|
||||
ins_lock = find_index_lock(lock_list, type, major, minor, ino);
|
||||
ret = scoutfs_forest_create_force(sb, &ins, NULL, ins_lock);
|
||||
ret = scoutfs_item_create_force(sb, &ins, NULL, 0, ins_lock);
|
||||
if (ret || !will_del_index(si, type, major, minor))
|
||||
return ret;
|
||||
|
||||
@@ -905,9 +908,9 @@ static int update_index_items(struct super_block *sb,
|
||||
|
||||
del_lock = find_index_lock(lock_list, type, si->item_majors[type],
|
||||
si->item_minors[type], ino);
|
||||
ret = scoutfs_forest_delete_force(sb, &del, del_lock);
|
||||
ret = scoutfs_item_delete_force(sb, &del, del_lock);
|
||||
if (ret) {
|
||||
err = scoutfs_forest_delete(sb, &ins, ins_lock);
|
||||
err = scoutfs_item_delete(sb, &ins, ins_lock);
|
||||
BUG_ON(err);
|
||||
}
|
||||
|
||||
@@ -966,7 +969,6 @@ void scoutfs_update_inode_item(struct inode *inode, struct scoutfs_lock *lock,
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_inode sinode;
|
||||
struct kvec val;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
@@ -982,9 +984,8 @@ void scoutfs_update_inode_item(struct inode *inode, struct scoutfs_lock *lock,
|
||||
BUG_ON(ret);
|
||||
|
||||
init_inode_key(&key, ino);
|
||||
kvec_init(&val, &sinode, sizeof(sinode));
|
||||
|
||||
err = scoutfs_forest_update(sb, &key, &val, lock);
|
||||
err = scoutfs_item_update(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
if (err) {
|
||||
scoutfs_err(sb, "inode %llu update err %d", ino, err);
|
||||
BUG_ON(err);
|
||||
@@ -1185,8 +1186,7 @@ int scoutfs_inode_index_start(struct super_block *sb, u64 *seq)
|
||||
* Returns > 0 if the seq changed and the locks should be retried.
|
||||
*/
|
||||
int scoutfs_inode_index_try_lock_hold(struct super_block *sb,
|
||||
struct list_head *list, u64 seq,
|
||||
const struct scoutfs_item_count cnt)
|
||||
struct list_head *list, u64 seq)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct index_lock *ind_lock;
|
||||
@@ -1202,7 +1202,7 @@ int scoutfs_inode_index_try_lock_hold(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_hold_trans(sb, cnt);
|
||||
ret = scoutfs_hold_trans(sb);
|
||||
if (ret == 0 && seq != sbi->trans_seq) {
|
||||
scoutfs_release_trans(sb);
|
||||
ret = 1;
|
||||
@@ -1216,8 +1216,7 @@ out:
|
||||
}
|
||||
|
||||
int scoutfs_inode_index_lock_hold(struct inode *inode, struct list_head *list,
|
||||
bool set_data_seq,
|
||||
const struct scoutfs_item_count cnt)
|
||||
bool set_data_seq)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
int ret;
|
||||
@@ -1227,7 +1226,7 @@ int scoutfs_inode_index_lock_hold(struct inode *inode, struct list_head *list,
|
||||
ret = scoutfs_inode_index_start(sb, &seq) ?:
|
||||
scoutfs_inode_index_prepare(sb, list, inode,
|
||||
set_data_seq) ?:
|
||||
scoutfs_inode_index_try_lock_hold(sb, list, seq, cnt);
|
||||
scoutfs_inode_index_try_lock_hold(sb, list, seq);
|
||||
} while (ret > 0);
|
||||
|
||||
return ret;
|
||||
@@ -1259,7 +1258,7 @@ static int remove_index(struct super_block *sb, u64 ino, u8 type, u64 major,
|
||||
scoutfs_inode_init_index_key(&key, type, major, minor, ino);
|
||||
|
||||
lock = find_index_lock(ind_locks, type, major, minor, ino);
|
||||
ret = scoutfs_forest_delete_force(sb, &key, lock);
|
||||
ret = scoutfs_item_delete_force(sb, &key, lock);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
return ret;
|
||||
@@ -1321,14 +1320,16 @@ u64 scoutfs_last_ino(struct super_block *sb)
|
||||
* minimize that loss while still being large enough for typical
|
||||
* directory file counts.
|
||||
*/
|
||||
int scoutfs_alloc_ino(struct inode *parent, u64 *ino_ret)
|
||||
int scoutfs_alloc_ino(struct super_block *sb, bool is_dir, u64 *ino_ret)
|
||||
{
|
||||
struct scoutfs_inode_allocator *ia = &SCOUTFS_I(parent)->ino_alloc;
|
||||
struct super_block *sb = parent->i_sb;
|
||||
DECLARE_INODE_SB_INFO(sb, inf);
|
||||
struct inode_allocator *ia;
|
||||
u64 ino;
|
||||
u64 nr;
|
||||
int ret;
|
||||
|
||||
ia = is_dir ? &inf->dir_ino_alloc : &inf->ino_alloc;
|
||||
|
||||
spin_lock(&ia->lock);
|
||||
|
||||
if (ia->nr == 0) {
|
||||
@@ -1363,29 +1364,26 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
|
||||
umode_t mode, dev_t rdev, u64 ino,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_inode_info *ci;
|
||||
struct scoutfs_inode_info *si;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_inode sinode;
|
||||
struct inode *inode;
|
||||
struct kvec val;
|
||||
int ret;
|
||||
|
||||
inode = new_inode(sb);
|
||||
if (!inode)
|
||||
return ERR_PTR(-ENOMEM);
|
||||
|
||||
ci = SCOUTFS_I(inode);
|
||||
ci->ino = ino;
|
||||
ci->data_version = 0;
|
||||
ci->online_blocks = 0;
|
||||
ci->offline_blocks = 0;
|
||||
ci->next_readdir_pos = SCOUTFS_DIRENT_FIRST_POS;
|
||||
ci->next_xattr_id = 0;
|
||||
ci->have_item = false;
|
||||
atomic64_set(&ci->last_refreshed, lock->refresh_gen);
|
||||
ci->flags = 0;
|
||||
ci->ino_alloc.ino = 0;
|
||||
ci->ino_alloc.nr = 0;
|
||||
si = SCOUTFS_I(inode);
|
||||
si->ino = ino;
|
||||
si->data_version = 0;
|
||||
si->online_blocks = 0;
|
||||
si->offline_blocks = 0;
|
||||
si->next_readdir_pos = SCOUTFS_DIRENT_FIRST_POS;
|
||||
si->next_xattr_id = 0;
|
||||
si->have_item = false;
|
||||
atomic64_set(&si->last_refreshed, lock->refresh_gen);
|
||||
si->flags = 0;
|
||||
|
||||
scoutfs_inode_set_meta_seq(inode);
|
||||
scoutfs_inode_set_data_seq(inode);
|
||||
@@ -1399,9 +1397,8 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
|
||||
|
||||
store_inode(&sinode, inode);
|
||||
init_inode_key(&key, scoutfs_ino(inode));
|
||||
kvec_init(&val, &sinode, sizeof(sinode));
|
||||
|
||||
ret = scoutfs_forest_create(sb, &key, &val, lock);
|
||||
ret = scoutfs_item_create(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
if (ret) {
|
||||
iput(inode);
|
||||
return ERR_PTR(ret);
|
||||
@@ -1429,7 +1426,7 @@ static int remove_orphan_item(struct super_block *sb, u64 ino)
|
||||
|
||||
init_orphan_key(&key, sbi->rid, ino);
|
||||
|
||||
ret = scoutfs_forest_delete(sb, &key, lock);
|
||||
ret = scoutfs_item_delete(sb, &key, lock);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
|
||||
@@ -1451,7 +1448,6 @@ static int delete_inode_items(struct super_block *sb, u64 ino)
|
||||
struct scoutfs_key key;
|
||||
LIST_HEAD(ind_locks);
|
||||
bool release = false;
|
||||
struct kvec val;
|
||||
umode_t mode;
|
||||
u64 ind_seq;
|
||||
u64 size;
|
||||
@@ -1462,9 +1458,9 @@ static int delete_inode_items(struct super_block *sb, u64 ino)
|
||||
return ret;
|
||||
|
||||
init_inode_key(&key, ino);
|
||||
kvec_init(&val, &sinode, sizeof(sinode));
|
||||
|
||||
ret = scoutfs_forest_lookup_exact(sb, &key, &val, lock);
|
||||
ret = scoutfs_item_lookup_exact(sb, &key, &sinode, sizeof(sinode),
|
||||
lock);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
@@ -1498,8 +1494,7 @@ static int delete_inode_items(struct super_block *sb, u64 ino)
|
||||
retry:
|
||||
ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
|
||||
prepare_index_deletion(sb, &ind_locks, ino, mode, &sinode) ?:
|
||||
scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq,
|
||||
SIC_DROP_INODE(mode, size));
|
||||
scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq);
|
||||
if (ret > 0)
|
||||
goto retry;
|
||||
if (ret)
|
||||
@@ -1517,7 +1512,7 @@ retry:
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_forest_delete(sb, &key, lock);
|
||||
ret = scoutfs_item_delete(sb, &key, lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -1586,7 +1581,7 @@ int scoutfs_scan_orphans(struct super_block *sb)
|
||||
init_orphan_key(&last, sbi->rid, ~0ULL);
|
||||
|
||||
while (1) {
|
||||
ret = scoutfs_forest_next(sb, &key, &last, NULL, lock);
|
||||
ret = scoutfs_item_next(sb, &key, &last, NULL, 0, lock);
|
||||
if (ret == -ENOENT) /* No more orphan items */
|
||||
break;
|
||||
if (ret < 0)
|
||||
@@ -1620,7 +1615,7 @@ int scoutfs_orphan_inode(struct inode *inode)
|
||||
|
||||
init_orphan_key(&key, sbi->rid, scoutfs_ino(inode));
|
||||
|
||||
ret = scoutfs_forest_create(sb, &key, NULL, lock);
|
||||
ret = scoutfs_item_create(sb, &key, NULL, 0, lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1724,6 +1719,8 @@ int scoutfs_inode_setup(struct super_block *sb)
|
||||
|
||||
spin_lock_init(&inf->writeback_lock);
|
||||
inf->writeback_inodes = RB_ROOT;
|
||||
spin_lock_init(&inf->dir_ino_alloc.lock);
|
||||
spin_lock_init(&inf->ino_alloc.lock);
|
||||
|
||||
sbi->inode_sb_info = inf;
|
||||
|
||||
|
||||
@@ -4,18 +4,11 @@
|
||||
#include "key.h"
|
||||
#include "lock.h"
|
||||
#include "per_task.h"
|
||||
#include "count.h"
|
||||
#include "format.h"
|
||||
#include "data.h"
|
||||
|
||||
struct scoutfs_lock;
|
||||
|
||||
struct scoutfs_inode_allocator {
|
||||
spinlock_t lock;
|
||||
u64 ino;
|
||||
u64 nr;
|
||||
};
|
||||
|
||||
struct scoutfs_inode_info {
|
||||
/* read or initialized for each inode instance */
|
||||
u64 ino;
|
||||
@@ -28,6 +21,14 @@ struct scoutfs_inode_info {
|
||||
u64 offline_blocks;
|
||||
u32 flags;
|
||||
|
||||
/*
|
||||
* Protects per-inode extent items, most particularly readers
|
||||
* who want to serialize writers without holding i_mutex. (only
|
||||
* used in data.c, it's the only place that understands file
|
||||
* extent items)
|
||||
*/
|
||||
struct rw_semaphore extent_sem;
|
||||
|
||||
/*
|
||||
* The in-memory item info caches the current index item values
|
||||
* so that we can decide to update them with comparisons instead
|
||||
@@ -42,9 +43,6 @@ struct scoutfs_inode_info {
|
||||
/* updated at on each new lock acquisition */
|
||||
atomic64_t last_refreshed;
|
||||
|
||||
/* reset for every new inode instance */
|
||||
struct scoutfs_inode_allocator ino_alloc;
|
||||
|
||||
/* initialized once for slab object */
|
||||
seqcount_t seqcount;
|
||||
bool staging; /* holder of i_mutex is staging */
|
||||
@@ -84,18 +82,16 @@ int scoutfs_inode_index_prepare_ino(struct super_block *sb,
|
||||
struct list_head *list, u64 ino,
|
||||
umode_t mode);
|
||||
int scoutfs_inode_index_try_lock_hold(struct super_block *sb,
|
||||
struct list_head *list, u64 seq,
|
||||
const struct scoutfs_item_count cnt);
|
||||
struct list_head *list, u64 seq);
|
||||
int scoutfs_inode_index_lock_hold(struct inode *inode, struct list_head *list,
|
||||
bool set_data_seq,
|
||||
const struct scoutfs_item_count cnt);
|
||||
bool set_data_seq);
|
||||
void scoutfs_inode_index_unlock(struct super_block *sb, struct list_head *list);
|
||||
|
||||
int scoutfs_dirty_inode_item(struct inode *inode, struct scoutfs_lock *lock);
|
||||
void scoutfs_update_inode_item(struct inode *inode, struct scoutfs_lock *lock,
|
||||
struct list_head *ind_locks);
|
||||
|
||||
int scoutfs_alloc_ino(struct inode *parent, u64 *ino);
|
||||
int scoutfs_alloc_ino(struct super_block *sb, bool is_dir, u64 *ino_ret);
|
||||
struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
|
||||
umode_t mode, dev_t rdev, u64 ino,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
259
kmod/src/ioctl.c
259
kmod/src/ioctl.c
@@ -12,6 +12,7 @@
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/uaccess.h>
|
||||
#include <linux/compiler.h>
|
||||
#include <linux/uio.h>
|
||||
@@ -27,6 +28,7 @@
|
||||
#include "ioctl.h"
|
||||
#include "super.h"
|
||||
#include "inode.h"
|
||||
#include "item.h"
|
||||
#include "forest.h"
|
||||
#include "data.h"
|
||||
#include "client.h"
|
||||
@@ -34,6 +36,8 @@
|
||||
#include "trans.h"
|
||||
#include "xattr.h"
|
||||
#include "hash.h"
|
||||
#include "srch.h"
|
||||
#include "alloc.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
@@ -109,7 +113,7 @@ static long scoutfs_ioc_walk_inodes(struct file *file, unsigned long arg)
|
||||
|
||||
for (nr = 0; nr < walk.nr_entries; ) {
|
||||
|
||||
ret = scoutfs_forest_next(sb, &key, &last_key, NULL, lock);
|
||||
ret = scoutfs_item_next(sb, &key, &last_key, NULL, 0, lock);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
break;
|
||||
|
||||
@@ -271,8 +275,8 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_ioctl_release args;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
loff_t start;
|
||||
loff_t end_inc;
|
||||
u64 sblock;
|
||||
u64 eblock;
|
||||
u64 online;
|
||||
u64 offline;
|
||||
u64 isize;
|
||||
@@ -283,9 +287,11 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)
|
||||
|
||||
trace_scoutfs_ioc_release(sb, scoutfs_ino(inode), &args);
|
||||
|
||||
if (args.count == 0)
|
||||
if (args.length == 0)
|
||||
return 0;
|
||||
if ((args.block + args.count) < args.block)
|
||||
if (((args.offset + args.length) < args.offset) ||
|
||||
(args.offset & SCOUTFS_BLOCK_SM_MASK) ||
|
||||
(args.length & SCOUTFS_BLOCK_SM_MASK))
|
||||
return -EINVAL;
|
||||
|
||||
|
||||
@@ -318,23 +324,24 @@ static long scoutfs_ioc_release(struct file *file, unsigned long arg)
|
||||
inode_dio_wait(inode);
|
||||
|
||||
/* drop all clean and dirty cached blocks in the range */
|
||||
start = args.block << SCOUTFS_BLOCK_SHIFT;
|
||||
end_inc = ((args.block + args.count) << SCOUTFS_BLOCK_SHIFT) - 1;
|
||||
truncate_inode_pages_range(&inode->i_data, start, end_inc);
|
||||
truncate_inode_pages_range(&inode->i_data, args.offset,
|
||||
args.offset + args.length - 1);
|
||||
|
||||
sblock = args.offset >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
eblock = (args.offset + args.length - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
ret = scoutfs_data_truncate_items(sb, inode, scoutfs_ino(inode),
|
||||
args.block,
|
||||
args.block + args.count - 1, true,
|
||||
sblock,
|
||||
eblock, true,
|
||||
lock);
|
||||
if (ret == 0) {
|
||||
scoutfs_inode_get_onoff(inode, &online, &offline);
|
||||
isize = i_size_read(inode);
|
||||
if (online == 0 && isize) {
|
||||
start = (isize + SCOUTFS_BLOCK_SIZE - 1)
|
||||
>> SCOUTFS_BLOCK_SHIFT;
|
||||
sblock = (isize + SCOUTFS_BLOCK_SM_SIZE - 1)
|
||||
>> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
ret = scoutfs_data_truncate_items(sb, inode,
|
||||
scoutfs_ino(inode),
|
||||
start, U64_MAX,
|
||||
sblock, U64_MAX,
|
||||
false, lock);
|
||||
}
|
||||
}
|
||||
@@ -371,8 +378,8 @@ static long scoutfs_ioc_data_wait_err(struct file *file, unsigned long arg)
|
||||
|
||||
trace_scoutfs_ioc_data_wait_err(sb, &args);
|
||||
|
||||
sblock = args.offset >> SCOUTFS_BLOCK_SHIFT;
|
||||
eblock = (args.offset + args.count - 1) >> SCOUTFS_BLOCK_SHIFT;
|
||||
sblock = args.offset >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
eblock = (args.offset + args.count - 1) >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
|
||||
if (sblock > eblock)
|
||||
return -EINVAL;
|
||||
@@ -456,23 +463,24 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
|
||||
trace_scoutfs_ioc_stage(sb, scoutfs_ino(inode), &args);
|
||||
|
||||
end_size = args.offset + args.count;
|
||||
end_size = args.offset + args.length;
|
||||
|
||||
/* verify arg constraints that aren't dependent on file */
|
||||
if (args.count < 0 || (end_size < args.offset) ||
|
||||
args.offset & SCOUTFS_BLOCK_MASK)
|
||||
if (args.length < 0 || (end_size < args.offset) ||
|
||||
args.offset & SCOUTFS_BLOCK_SM_MASK) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (args.count == 0)
|
||||
if (args.length == 0)
|
||||
return 0;
|
||||
|
||||
/* the iocb is really only used for the file pointer :P */
|
||||
init_sync_kiocb(&kiocb, file);
|
||||
kiocb.ki_pos = args.offset;
|
||||
kiocb.ki_left = args.count;
|
||||
kiocb.ki_nbytes = args.count;
|
||||
kiocb.ki_left = args.length;
|
||||
kiocb.ki_nbytes = args.length;
|
||||
iov.iov_base = (void __user *)(unsigned long)args.buf_ptr;
|
||||
iov.iov_len = args.count;
|
||||
iov.iov_len = args.length;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
@@ -494,7 +502,7 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
(file->f_flags & (O_APPEND | O_DIRECT | O_DSYNC)) ||
|
||||
IS_SYNC(file->f_mapping->host) ||
|
||||
(end_size > isize) ||
|
||||
((end_size & SCOUTFS_BLOCK_MASK) && (end_size != isize))) {
|
||||
((end_size & SCOUTFS_BLOCK_SM_MASK) && (end_size != isize))) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@@ -511,11 +519,11 @@ static long scoutfs_ioc_stage(struct file *file, unsigned long arg)
|
||||
written = 0;
|
||||
do {
|
||||
ret = generic_file_buffered_write(&kiocb, &iov, 1, pos, &pos,
|
||||
args.count, written);
|
||||
args.length, written);
|
||||
BUG_ON(ret == -EIOCBQUEUED);
|
||||
if (ret > 0)
|
||||
written += ret;
|
||||
} while (ret > 0 && written < args.count);
|
||||
} while (ret > 0 && written < args.length);
|
||||
|
||||
si->staging = false;
|
||||
current->backing_dev_info = NULL;
|
||||
@@ -666,8 +674,7 @@ static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
|
||||
|
||||
/* setting only so we don't see 0 data seq with nonzero data_version */
|
||||
set_data_seq = sm.data_version != 0 ? true : false;
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, set_data_seq,
|
||||
SIC_SETATTR_MORE());
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, set_data_seq);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
@@ -759,18 +766,20 @@ out:
|
||||
* but we don't check that the callers xattr name contains the tag and
|
||||
* search for it regardless.
|
||||
*/
|
||||
static long scoutfs_ioc_find_xattrs(struct file *file, unsigned long arg)
|
||||
static long scoutfs_ioc_search_xattrs(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_find_xattrs __user *ufx = (void __user *)arg;
|
||||
struct scoutfs_ioctl_find_xattrs fx;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_ioctl_search_xattrs __user *usx = (void __user *)arg;
|
||||
struct scoutfs_ioctl_search_xattrs sx;
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_srch_rb_root sroot;
|
||||
struct scoutfs_srch_rb_node *snode;
|
||||
u64 __user *uinos;
|
||||
struct rb_node *node;
|
||||
char *name = NULL;
|
||||
int total = 0;
|
||||
u64 hash;
|
||||
u64 ino;
|
||||
bool done = false;
|
||||
u64 prev_ino = 0;
|
||||
u64 total = 0;
|
||||
int ret;
|
||||
|
||||
if (!(file->f_mode & FMODE_READ)) {
|
||||
@@ -783,67 +792,73 @@ static long scoutfs_ioc_find_xattrs(struct file *file, unsigned long arg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(&fx, ufx, sizeof(fx))) {
|
||||
if (copy_from_user(&sx, usx, sizeof(sx))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
uinos = (u64 __user *)sx.inodes_ptr;
|
||||
|
||||
if (fx.name_bytes > SCOUTFS_XATTR_MAX_NAME_LEN) {
|
||||
if (sx.name_bytes > SCOUTFS_XATTR_MAX_NAME_LEN) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
name = kmalloc(fx.name_bytes, GFP_KERNEL);
|
||||
if (sx.nr_inodes == 0 || sx.last_ino < sx.next_ino) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
name = kmalloc(sx.name_bytes, GFP_KERNEL);
|
||||
if (!name) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(name, (void __user *)fx.name_ptr, fx.name_bytes)) {
|
||||
if (copy_from_user(name, (void __user *)sx.name_ptr, sx.name_bytes)) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hash = scoutfs_hash64(name, fx.name_bytes);
|
||||
scoutfs_xattr_index_key(&key, hash, fx.next_ino, 0);
|
||||
scoutfs_xattr_index_key(&last, hash, U64_MAX, U64_MAX);
|
||||
ino = 0;
|
||||
if (scoutfs_xattr_parse_tags(name, sx.name_bytes, &tgs) < 0 ||
|
||||
!tgs.srch) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_lock_xattr_index(sb, SCOUTFS_LOCK_READ, 0, hash, &lock);
|
||||
ret = scoutfs_srch_search_xattrs(sb, &sroot,
|
||||
scoutfs_hash64(name, sx.name_bytes),
|
||||
sx.next_ino, sx.last_ino, &done);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
while (fx.nr_inodes) {
|
||||
prev_ino = 0;
|
||||
scoutfs_srch_foreach_rb_node(snode, node, &sroot) {
|
||||
if (prev_ino == snode->ino)
|
||||
continue;
|
||||
|
||||
ret = scoutfs_forest_next(sb, &key, &last, NULL, lock);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
if (put_user(snode->ino, uinos + total)) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
prev_ino = snode->ino;
|
||||
|
||||
/* xattrs hashes can collide and add multiple entries */
|
||||
if (le64_to_cpu(key.skxi_ino) != ino) {
|
||||
ino = le64_to_cpu(key.skxi_ino);
|
||||
if (put_user(ino, (u64 __user *)fx.inodes_ptr)) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
fx.inodes_ptr += sizeof(u64);
|
||||
fx.nr_inodes--;
|
||||
total++;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
scoutfs_key_inc(&key);
|
||||
if (++total == sx.nr_inodes)
|
||||
break;
|
||||
}
|
||||
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
|
||||
sx.output_flags = 0;
|
||||
if (done && total == sroot.nr)
|
||||
sx.output_flags |= SCOUTFS_SEARCH_XATTRS_OFLAG_END;
|
||||
|
||||
if (put_user(sx.output_flags, &usx->output_flags))
|
||||
ret = -EFAULT;
|
||||
else
|
||||
ret = 0;
|
||||
|
||||
scoutfs_srch_destroy_rb_root(&sroot);
|
||||
|
||||
out:
|
||||
kfree(name);
|
||||
|
||||
return ret ?: total;
|
||||
}
|
||||
|
||||
@@ -853,6 +868,7 @@ static long scoutfs_ioc_statfs_more(struct file *file, unsigned long arg)
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_ioctl_statfs_more sfm;
|
||||
int ret;
|
||||
|
||||
if (get_user(sfm.valid_bytes, (__u64 __user *)arg))
|
||||
return -EFAULT;
|
||||
@@ -861,6 +877,12 @@ static long scoutfs_ioc_statfs_more(struct file *file, unsigned long arg)
|
||||
sizeof(struct scoutfs_ioctl_statfs_more));
|
||||
sfm.fsid = le64_to_cpu(super->hdr.fsid);
|
||||
sfm.rid = sbi->rid;
|
||||
sfm.total_meta_blocks = le64_to_cpu(super->total_meta_blocks);
|
||||
sfm.total_data_blocks = le64_to_cpu(super->total_data_blocks);
|
||||
|
||||
ret = scoutfs_client_get_last_seq(sb, &sfm.committed_seq);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
if (copy_to_user((void __user *)arg, &sfm, sfm.valid_bytes))
|
||||
return -EFAULT;
|
||||
@@ -868,6 +890,101 @@ static long scoutfs_ioc_statfs_more(struct file *file, unsigned long arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct copy_alloc_detail_args {
|
||||
struct scoutfs_ioctl_alloc_detail_entry __user *uade;
|
||||
u64 nr;
|
||||
u64 copied;
|
||||
};
|
||||
|
||||
static int copy_alloc_detail_to_user(struct super_block *sb, void *arg,
|
||||
int owner, u64 id, bool meta, bool avail,
|
||||
u64 blocks)
|
||||
{
|
||||
struct copy_alloc_detail_args *args = arg;
|
||||
struct scoutfs_ioctl_alloc_detail_entry ade;
|
||||
|
||||
if (args->copied == args->nr)
|
||||
return -EOVERFLOW;
|
||||
|
||||
ade.blocks = blocks;
|
||||
ade.id = id;
|
||||
ade.meta = !!meta;
|
||||
ade.avail = !!avail;
|
||||
|
||||
if (copy_to_user(&args->uade[args->copied], &ade, sizeof(ade)))
|
||||
return -EFAULT;
|
||||
|
||||
args->copied++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_alloc_detail(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_alloc_detail __user *uad = (void __user *)arg;
|
||||
struct scoutfs_ioctl_alloc_detail ad;
|
||||
struct copy_alloc_detail_args args;
|
||||
|
||||
if (copy_from_user(&ad, uad, sizeof(ad)))
|
||||
return -EFAULT;
|
||||
|
||||
args.uade = (struct scoutfs_ioctl_alloc_detail_entry __user *)
|
||||
(uintptr_t)ad.entries_ptr;
|
||||
args.nr = ad.entries_nr;
|
||||
args.copied = 0;
|
||||
|
||||
return scoutfs_alloc_foreach(sb, copy_alloc_detail_to_user, &args) ?:
|
||||
args.copied;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_move_blocks(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *to = file_inode(file);
|
||||
struct super_block *sb = to->i_sb;
|
||||
struct scoutfs_ioctl_move_blocks __user *umb = (void __user *)arg;
|
||||
struct scoutfs_ioctl_move_blocks mb;
|
||||
struct file *from_file;
|
||||
struct inode *from;
|
||||
int ret;
|
||||
|
||||
if (copy_from_user(&mb, umb, sizeof(mb)))
|
||||
return -EFAULT;
|
||||
|
||||
if (mb.len == 0)
|
||||
return 0;
|
||||
|
||||
if (mb.from_off + mb.len < mb.from_off ||
|
||||
mb.to_off + mb.len < mb.to_off)
|
||||
return -EOVERFLOW;
|
||||
|
||||
from_file = fget(mb.from_fd);
|
||||
if (!from_file)
|
||||
return -EBADF;
|
||||
from = file_inode(from_file);
|
||||
|
||||
if (from == to) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (from->i_sb != sb) {
|
||||
ret = -EXDEV;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_data_move_blocks(from, mb.from_off, mb.len,
|
||||
to, mb.to_off);
|
||||
mnt_drop_write_file(file);
|
||||
out:
|
||||
fput(from_file);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
@@ -887,12 +1004,16 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
return scoutfs_ioc_setattr_more(file, arg);
|
||||
case SCOUTFS_IOC_LISTXATTR_HIDDEN:
|
||||
return scoutfs_ioc_listxattr_hidden(file, arg);
|
||||
case SCOUTFS_IOC_FIND_XATTRS:
|
||||
return scoutfs_ioc_find_xattrs(file, arg);
|
||||
case SCOUTFS_IOC_SEARCH_XATTRS:
|
||||
return scoutfs_ioc_search_xattrs(file, arg);
|
||||
case SCOUTFS_IOC_STATFS_MORE:
|
||||
return scoutfs_ioc_statfs_more(file, arg);
|
||||
case SCOUTFS_IOC_DATA_WAIT_ERR:
|
||||
return scoutfs_ioc_data_wait_err(file, arg);
|
||||
case SCOUTFS_IOC_ALLOC_DETAIL:
|
||||
return scoutfs_ioc_alloc_detail(file, arg);
|
||||
case SCOUTFS_IOC_MOVE_BLOCKS:
|
||||
return scoutfs_ioc_move_blocks(file, arg);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
||||
140
kmod/src/ioctl.h
140
kmod/src/ioctl.h
@@ -78,7 +78,7 @@ struct scoutfs_ioctl_walk_inodes {
|
||||
__u8 _pad[11]; /* padded to align walk_inodes_entry total size */
|
||||
};
|
||||
|
||||
enum {
|
||||
enum scoutfs_ino_walk_seq_type {
|
||||
SCOUTFS_IOC_WALK_INODES_META_SEQ = 0,
|
||||
SCOUTFS_IOC_WALK_INODES_DATA_SEQ,
|
||||
SCOUTFS_IOC_WALK_INODES_UNKNOWN,
|
||||
@@ -176,8 +176,8 @@ struct scoutfs_ioctl_ino_path_result {
|
||||
* an offline record is left behind to trigger demand staging if the
|
||||
* file is read.
|
||||
*
|
||||
* The starting block offset and number of blocks to release are in
|
||||
* units 4KB blocks.
|
||||
* The starting file offset and number of bytes to release must be in
|
||||
* multiples of 4KB.
|
||||
*
|
||||
* The specified range can extend past i_size and can straddle sparse
|
||||
* regions or blocks that are already offline. The only change it makes
|
||||
@@ -193,8 +193,8 @@ struct scoutfs_ioctl_ino_path_result {
|
||||
* presentation of the data in the file.
|
||||
*/
|
||||
struct scoutfs_ioctl_release {
|
||||
__u64 block;
|
||||
__u64 count;
|
||||
__u64 offset;
|
||||
__u64 length;
|
||||
__u64 data_version;
|
||||
};
|
||||
|
||||
@@ -205,7 +205,7 @@ struct scoutfs_ioctl_stage {
|
||||
__u64 data_version;
|
||||
__u64 buf_ptr;
|
||||
__u64 offset;
|
||||
__s32 count;
|
||||
__s32 length;
|
||||
__u32 _pad;
|
||||
};
|
||||
|
||||
@@ -296,34 +296,57 @@ struct scoutfs_ioctl_listxattr_hidden {
|
||||
|
||||
/*
|
||||
* Return the inode numbers of inodes which might contain the given
|
||||
* named xattr. The inode may not have a set xattr with that name, the
|
||||
* caller must check the returned inodes to see if they match.
|
||||
* xattr. The inode may not have a set xattr with that name, the caller
|
||||
* must check the returned inodes to see if they match.
|
||||
*
|
||||
* @next_ino: The next inode number that could be returned. Initialized
|
||||
* to 0 when first searching and set to one past the last inode number
|
||||
* returned to continue searching.
|
||||
* @name_ptr: The address of the name of the xattr to search for. It does
|
||||
* not need to be null terminated.
|
||||
* @inodes_ptr: The address of the array of uint64_t inode numbers in which
|
||||
* to store inode numbers that may contain the xattr. EFAULT may be returned
|
||||
* if this address is not naturally aligned.
|
||||
* @name_bytes: The number of non-null bytes found in the name at name_ptr.
|
||||
* @last_ino: The last inode number that could be returned. U64_MAX to
|
||||
* find all inodes.
|
||||
* @name_ptr: The address of the name of the xattr to search for. It is
|
||||
* not null terminated.
|
||||
* @inodes_ptr: The address of the array of uint64_t inode numbers in
|
||||
* which to store inode numbers that may contain the xattr. EFAULT may
|
||||
* be returned if this address is not naturally aligned.
|
||||
* @output_flags: Set as success is returned. If an error is returned
|
||||
* then this field is undefined and should not be read.
|
||||
* @nr_inodes: The number of elements in the array found at inodes_ptr.
|
||||
* @name_bytes: The number of non-null bytes found in the name at
|
||||
* name_ptr.
|
||||
*
|
||||
* This requires the CAP_SYS_ADMIN capability and will return -EPERM if
|
||||
* it's not granted.
|
||||
*
|
||||
* The number of inode numbers stored in the inodes_ptr array is
|
||||
* returned. If nr_inodes is 0 or last_ino is less than next_ino then 0
|
||||
* will be immediately returned.
|
||||
*
|
||||
* Partial progress can be returned if an error is hit or if nr_inodes
|
||||
* was larger than the internal limit on the number of inodes returned
|
||||
* in a search pass. The _END output flag is set if all the results
|
||||
* including last_ino were searched in this pass.
|
||||
*
|
||||
* It's valuable to provide a large inodes array so that all the results
|
||||
* can be found in one search pass and _END can be set. There are
|
||||
* significant constant costs for performing each search pass.
|
||||
*/
|
||||
struct scoutfs_ioctl_find_xattrs {
|
||||
struct scoutfs_ioctl_search_xattrs {
|
||||
__u64 next_ino;
|
||||
__u64 last_ino;
|
||||
__u64 name_ptr;
|
||||
__u64 inodes_ptr;
|
||||
__u64 output_flags;
|
||||
__u64 nr_inodes;
|
||||
__u16 name_bytes;
|
||||
__u16 nr_inodes;
|
||||
__u8 _pad[4];
|
||||
__u8 _pad[6];
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_FIND_XATTRS _IOR(SCOUTFS_IOCTL_MAGIC, 9, \
|
||||
struct scoutfs_ioctl_find_xattrs)
|
||||
/* set in output_flags if returned inodes reached last_ino */
|
||||
#define SCOUTFS_SEARCH_XATTRS_OFLAG_END (1ULL << 0)
|
||||
|
||||
#define SCOUTFS_IOC_SEARCH_XATTRS _IOR(SCOUTFS_IOCTL_MAGIC, 9, \
|
||||
struct scoutfs_ioctl_search_xattrs)
|
||||
|
||||
/*
|
||||
* Give the user information about the filesystem.
|
||||
@@ -335,13 +358,20 @@ struct scoutfs_ioctl_find_xattrs {
|
||||
* field is set if all of its bytes are within the valid_bytes that the
|
||||
* kernel set on return.
|
||||
*
|
||||
* @committed_seq: All seqs up to and including this seq have been
|
||||
* committed. Can be compared with meta_seq and data_seq from inodes in
|
||||
* stat_more to discover if changes have been committed to disk.
|
||||
*
|
||||
* New fields are only added to the end of the struct.
|
||||
*/
|
||||
struct scoutfs_ioctl_statfs_more {
|
||||
__u64 valid_bytes;
|
||||
__u64 fsid;
|
||||
__u64 rid;
|
||||
} __packed;
|
||||
__u64 committed_seq;
|
||||
__u64 total_meta_blocks;
|
||||
__u64 total_data_blocks;
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_STATFS_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 10, \
|
||||
struct scoutfs_ioctl_statfs_more)
|
||||
@@ -364,4 +394,74 @@ struct scoutfs_ioctl_data_wait_err {
|
||||
#define SCOUTFS_IOC_DATA_WAIT_ERR _IOR(SCOUTFS_IOCTL_MAGIC, 11, \
|
||||
struct scoutfs_ioctl_data_wait_err)
|
||||
|
||||
|
||||
struct scoutfs_ioctl_alloc_detail {
|
||||
__u64 entries_ptr;
|
||||
__u64 entries_nr;
|
||||
};
|
||||
|
||||
struct scoutfs_ioctl_alloc_detail_entry {
|
||||
__u64 id;
|
||||
__u64 blocks;
|
||||
__u8 type;
|
||||
__u8 meta:1,
|
||||
avail:1;
|
||||
__u8 __bit_pad:6;
|
||||
__u8 __pad[6];
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_ALLOC_DETAIL _IOR(SCOUTFS_IOCTL_MAGIC, 12, \
|
||||
struct scoutfs_ioctl_alloc_detail)
|
||||
|
||||
/*
|
||||
* Move extents from one regular file to another at a different offset,
|
||||
* on the same file system.
|
||||
*
|
||||
* from_fd specifies the source file and the ioctl is called on the
|
||||
* destination file. Both files must have write access. from_off
|
||||
* specifies the byte offset in the source, to_off is the byte offset in
|
||||
* the destination, and len is the number of bytes in the region to
|
||||
* move. All of the offsets and lengths must be in multiples of 4KB,
|
||||
* except in the case where the from_off + len ends at the i_size of the
|
||||
* source file.
|
||||
*
|
||||
* This interface only moves extents which are block granular, it does
|
||||
* not perform RMW of sub-block byte extents and it does not overwrite
|
||||
* existing extents in the destination. It will split extents in the
|
||||
* source.
|
||||
*
|
||||
* Only extents within i_size on the source are moved. The destination
|
||||
* i_size will be updated if extents are moved beyond its current
|
||||
* i_size. The i_size update will maintain final partial blocks in the
|
||||
* source.
|
||||
*
|
||||
* It will return an error if either of the files have offline extents.
|
||||
* It will return 0 when all of the extents in the source region have
|
||||
* been moved to the destination. Moving extents updates the ctime,
|
||||
* mtime, meta_seq, data_seq, and data_version fields of both the source
|
||||
* and destination inodes. If an error is returned then partial
|
||||
* progress may have been made and inode fields may have been updated.
|
||||
*
|
||||
* Errors specific to this interface include:
|
||||
*
|
||||
* EINVAL: from_off, len, or to_off aren't a multiple of 4KB; the source
|
||||
* and destination files are the same inode; either the source or
|
||||
* destination is not a regular file; the destination file has
|
||||
* an existing overlapping extent.
|
||||
* EOVERFLOW: either from_off + len or to_off + len exceeded 64bits.
|
||||
* EBADF: from_fd isn't a valid open file descriptor.
|
||||
* EXDEV: the source and destination files are in different filesystems.
|
||||
* EISDIR: either the source or destination is a directory.
|
||||
* ENODATA: either the source or destination file have offline extents.
|
||||
*/
|
||||
struct scoutfs_ioctl_move_blocks {
|
||||
__u64 from_fd;
|
||||
__u64 from_off;
|
||||
__u64 len;
|
||||
__u64 to_off;
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_MOVE_BLOCKS _IOR(SCOUTFS_IOCTL_MAGIC, 13, \
|
||||
struct scoutfs_ioctl_move_blocks)
|
||||
|
||||
#endif
|
||||
|
||||
2543
kmod/src/item.c
Normal file
2543
kmod/src/item.c
Normal file
File diff suppressed because it is too large
Load Diff
39
kmod/src/item.h
Normal file
39
kmod/src/item.h
Normal file
@@ -0,0 +1,39 @@
|
||||
#ifndef _SCOUTFS_ITEM_H_
|
||||
#define _SCOUTFS_ITEM_H_
|
||||
|
||||
int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_lookup_exact(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_next(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *last, void *val, int val_len,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_create_force(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_delete_force(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
u64 scoutfs_item_dirty_pages(struct super_block *sb);
|
||||
int scoutfs_item_write_dirty(struct super_block *sb);
|
||||
int scoutfs_item_write_done(struct super_block *sb);
|
||||
bool scoutfs_item_range_cached(struct super_block *sb,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end, bool *dirty);
|
||||
void scoutfs_item_invalidate(struct super_block *sb, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end);
|
||||
|
||||
int scoutfs_item_setup(struct super_block *sb);
|
||||
void scoutfs_item_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
@@ -78,6 +78,14 @@ static inline void scoutfs_key_set_zeros(struct scoutfs_key *key)
|
||||
key->_sk_second = 0;
|
||||
key->_sk_third = 0;
|
||||
key->_sk_fourth = 0;
|
||||
memset(key->__pad, 0, sizeof(key->__pad));
|
||||
}
|
||||
|
||||
static inline bool scoutfs_key_is_zeros(struct scoutfs_key *key)
|
||||
{
|
||||
return key->sk_zone == 0 && key->_sk_first == 0 && key->sk_type == 0 &&
|
||||
key->_sk_second == 0 && key->_sk_third == 0 &&
|
||||
key->_sk_fourth == 0;
|
||||
}
|
||||
|
||||
static inline void scoutfs_key_copy_or_zeros(struct scoutfs_key *dst,
|
||||
@@ -97,6 +105,7 @@ static inline void scoutfs_key_set_ones(struct scoutfs_key *key)
|
||||
key->_sk_second = cpu_to_le64(U64_MAX);
|
||||
key->_sk_third = cpu_to_le64(U64_MAX);
|
||||
key->_sk_fourth = U8_MAX;
|
||||
memset(key->__pad, 0, sizeof(key->__pad));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -179,29 +188,19 @@ static inline void scoutfs_key_dec(struct scoutfs_key *key)
|
||||
key->sk_zone--;
|
||||
}
|
||||
|
||||
static inline void scoutfs_key_to_be(struct scoutfs_key_be *be,
|
||||
struct scoutfs_key *key)
|
||||
{
|
||||
BUILD_BUG_ON(sizeof(struct scoutfs_key_be) !=
|
||||
sizeof(struct scoutfs_key));
|
||||
/*
|
||||
* Some key types are used by multiple subsystems and shouldn't have
|
||||
* duplicate private key init functions.
|
||||
*/
|
||||
|
||||
be->sk_zone = key->sk_zone;
|
||||
be->_sk_first = le64_to_be64(key->_sk_first);
|
||||
be->sk_type = key->sk_type;
|
||||
be->_sk_second = le64_to_be64(key->_sk_second);
|
||||
be->_sk_third = le64_to_be64(key->_sk_third);
|
||||
be->_sk_fourth = key->_sk_fourth;
|
||||
}
|
||||
|
||||
static inline void scoutfs_key_from_be(struct scoutfs_key *key,
|
||||
struct scoutfs_key_be *be)
|
||||
static inline void scoutfs_key_init_log_trees(struct scoutfs_key *key,
|
||||
u64 rid, u64 nr)
|
||||
{
|
||||
key->sk_zone = be->sk_zone;
|
||||
key->_sk_first = be64_to_le64(be->_sk_first);
|
||||
key->sk_type = be->sk_type;
|
||||
key->_sk_second = be64_to_le64(be->_sk_second);
|
||||
key->_sk_third = be64_to_le64(be->_sk_third);
|
||||
key->_sk_fourth = be->_sk_fourth;
|
||||
*key = (struct scoutfs_key) {
|
||||
.sk_zone = SCOUTFS_LOG_TREES_ZONE,
|
||||
.sklt_rid = cpu_to_le64(rid),
|
||||
.sklt_nr = cpu_to_le64(nr),
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,12 +0,0 @@
|
||||
#ifndef _SCOUTFS_KVEC_H_
|
||||
#define _SCOUTFS_KVEC_H_
|
||||
|
||||
#include <linux/uio.h>
|
||||
|
||||
static inline void kvec_init(struct kvec *kv, void *base, size_t len)
|
||||
{
|
||||
kv->iov_base = base;
|
||||
kv->iov_len = len;
|
||||
}
|
||||
|
||||
#endif
|
||||
554
kmod/src/lock.c
554
kmod/src/lock.c
@@ -21,7 +21,6 @@
|
||||
|
||||
#include "super.h"
|
||||
#include "lock.h"
|
||||
#include "forest.h"
|
||||
#include "scoutfs_trace.h"
|
||||
#include "msg.h"
|
||||
#include "cmp.h"
|
||||
@@ -34,6 +33,7 @@
|
||||
#include "client.h"
|
||||
#include "data.h"
|
||||
#include "xattr.h"
|
||||
#include "item.h"
|
||||
|
||||
/*
|
||||
* scoutfs uses a lock service to manage item cache consistency between
|
||||
@@ -65,7 +65,7 @@
|
||||
* relative to that lock state we resend.
|
||||
*/
|
||||
|
||||
#define GRACE_PERIOD_KT ms_to_ktime(2)
|
||||
#define GRACE_PERIOD_KT ms_to_ktime(10)
|
||||
|
||||
/*
|
||||
* allocated per-super, freed on unmount.
|
||||
@@ -80,6 +80,12 @@ struct lock_info {
|
||||
struct list_head lru_list;
|
||||
unsigned long long lru_nr;
|
||||
struct workqueue_struct *workq;
|
||||
struct work_struct grant_work;
|
||||
struct list_head grant_list;
|
||||
struct delayed_work inv_dwork;
|
||||
struct list_head inv_list;
|
||||
struct work_struct shrink_work;
|
||||
struct list_head shrink_list;
|
||||
atomic64_t next_refresh_gen;
|
||||
struct dentry *tseq_dentry;
|
||||
struct scoutfs_tseq_tree tseq_tree;
|
||||
@@ -88,19 +94,17 @@ struct lock_info {
|
||||
#define DECLARE_LOCK_INFO(sb, name) \
|
||||
struct lock_info *name = SCOUTFS_SB(sb)->lock_info
|
||||
|
||||
static void scoutfs_lock_shrink_worker(struct work_struct *work);
|
||||
|
||||
static bool lock_mode_invalid(int mode)
|
||||
static bool lock_mode_invalid(enum scoutfs_lock_mode mode)
|
||||
{
|
||||
return (unsigned)mode >= SCOUTFS_LOCK_INVALID;
|
||||
}
|
||||
|
||||
static bool lock_mode_can_read(int mode)
|
||||
static bool lock_mode_can_read(enum scoutfs_lock_mode mode)
|
||||
{
|
||||
return mode == SCOUTFS_LOCK_READ || mode == SCOUTFS_LOCK_WRITE;
|
||||
}
|
||||
|
||||
static bool lock_mode_can_write(int mode)
|
||||
static bool lock_mode_can_write(enum scoutfs_lock_mode mode)
|
||||
{
|
||||
return mode == SCOUTFS_LOCK_WRITE || mode == SCOUTFS_LOCK_WRITE_ONLY;
|
||||
}
|
||||
@@ -143,7 +147,7 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
|
||||
* leave cached items behind in the case of invalidating to a read lock.
|
||||
*/
|
||||
static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
int prev, int mode)
|
||||
enum scoutfs_lock_mode prev, enum scoutfs_lock_mode mode)
|
||||
{
|
||||
struct scoutfs_lock_coverage *cov;
|
||||
struct scoutfs_lock_coverage *tmp;
|
||||
@@ -156,15 +160,13 @@ static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
BUG_ON(!(prev == SCOUTFS_LOCK_WRITE && mode == SCOUTFS_LOCK_READ) &&
|
||||
mode != SCOUTFS_LOCK_NULL);
|
||||
|
||||
/* any transition from a mode allowed to dirty items has to write */
|
||||
if (lock_mode_can_write(prev) && scoutfs_trans_has_dirty(sb)) {
|
||||
/* sync when a write lock could have dirtied the current transaction */
|
||||
if (lock_mode_can_write(prev) &&
|
||||
(lock->dirty_trans_seq == scoutfs_trans_sample_seq(sb))) {
|
||||
scoutfs_inc_counter(sb, lock_invalidate_sync);
|
||||
ret = scoutfs_trans_sync(sb, 1);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret > 0) {
|
||||
scoutfs_add_counter(sb, lock_invalidate_commit, ret);
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/* have to invalidate if we're not in the only usable case */
|
||||
@@ -193,6 +195,8 @@ retry:
|
||||
ino++;
|
||||
}
|
||||
}
|
||||
|
||||
scoutfs_item_invalidate(sb, &lock->start, &lock->end);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -220,9 +224,11 @@ static void lock_free(struct lock_info *linfo, struct scoutfs_lock *lock)
|
||||
BUG_ON(!RB_EMPTY_NODE(&lock->node));
|
||||
BUG_ON(!RB_EMPTY_NODE(&lock->range_node));
|
||||
BUG_ON(!list_empty(&lock->lru_head));
|
||||
BUG_ON(!list_empty(&lock->grant_head));
|
||||
BUG_ON(!list_empty(&lock->inv_head));
|
||||
BUG_ON(!list_empty(&lock->shrink_head));
|
||||
BUG_ON(!list_empty(&lock->cov_list));
|
||||
|
||||
scoutfs_forest_clear_lock(sb, lock);
|
||||
kfree(lock);
|
||||
}
|
||||
|
||||
@@ -245,7 +251,9 @@ static struct scoutfs_lock *lock_alloc(struct super_block *sb,
|
||||
RB_CLEAR_NODE(&lock->node);
|
||||
RB_CLEAR_NODE(&lock->range_node);
|
||||
INIT_LIST_HEAD(&lock->lru_head);
|
||||
|
||||
INIT_LIST_HEAD(&lock->grant_head);
|
||||
INIT_LIST_HEAD(&lock->inv_head);
|
||||
INIT_LIST_HEAD(&lock->shrink_head);
|
||||
spin_lock_init(&lock->cov_list_lock);
|
||||
INIT_LIST_HEAD(&lock->cov_list);
|
||||
|
||||
@@ -253,21 +261,22 @@ static struct scoutfs_lock *lock_alloc(struct super_block *sb,
|
||||
lock->end = *end;
|
||||
lock->sb = sb;
|
||||
init_waitqueue_head(&lock->waitq);
|
||||
INIT_WORK(&lock->shrink_work, scoutfs_lock_shrink_worker);
|
||||
lock->mode = SCOUTFS_LOCK_NULL;
|
||||
|
||||
atomic64_set(&lock->forest_bloom_nr, 0);
|
||||
|
||||
trace_scoutfs_lock_alloc(sb, lock);
|
||||
|
||||
return lock;
|
||||
}
|
||||
|
||||
static void lock_inc_count(unsigned int *counts, int mode)
|
||||
static void lock_inc_count(unsigned int *counts, enum scoutfs_lock_mode mode)
|
||||
{
|
||||
BUG_ON(mode < 0 || mode >= SCOUTFS_LOCK_NR_MODES);
|
||||
counts[mode]++;
|
||||
}
|
||||
|
||||
static void lock_dec_count(unsigned int *counts, int mode)
|
||||
static void lock_dec_count(unsigned int *counts, enum scoutfs_lock_mode mode)
|
||||
{
|
||||
BUG_ON(mode < 0 || mode >= SCOUTFS_LOCK_NR_MODES);
|
||||
counts[mode]--;
|
||||
@@ -279,7 +288,7 @@ static void lock_dec_count(unsigned int *counts, int mode)
|
||||
*/
|
||||
static bool lock_counts_match(int granted, unsigned int *counts)
|
||||
{
|
||||
int mode;
|
||||
enum scoutfs_lock_mode mode;
|
||||
|
||||
for (mode = 0; mode < SCOUTFS_LOCK_NR_MODES; mode++) {
|
||||
if (counts[mode] && !lock_modes_match(granted, mode))
|
||||
@@ -296,7 +305,7 @@ static bool lock_counts_match(int granted, unsigned int *counts)
|
||||
*/
|
||||
static bool lock_count_match_exists(int desired, unsigned int *counts)
|
||||
{
|
||||
int mode;
|
||||
enum scoutfs_lock_mode mode;
|
||||
|
||||
for (mode = 0; mode < SCOUTFS_LOCK_NR_MODES; mode++) {
|
||||
if (counts[mode] && lock_modes_match(desired, mode))
|
||||
@@ -312,7 +321,7 @@ static bool lock_count_match_exists(int desired, unsigned int *counts)
|
||||
*/
|
||||
static bool lock_idle(struct scoutfs_lock *lock)
|
||||
{
|
||||
int mode;
|
||||
enum scoutfs_lock_mode mode;
|
||||
|
||||
if (lock->request_pending || lock->invalidate_pending)
|
||||
return false;
|
||||
@@ -540,11 +549,80 @@ static void extend_grace(struct super_block *sb, struct scoutfs_lock *lock)
|
||||
lock->grace_deadline = ktime_add(now, GRACE_PERIOD_KT);
|
||||
}
|
||||
|
||||
static void queue_grant_work(struct lock_info *linfo)
|
||||
{
|
||||
assert_spin_locked(&linfo->lock);
|
||||
|
||||
if (!list_empty(&linfo->grant_list) && !linfo->shutdown)
|
||||
queue_work(linfo->workq, &linfo->grant_work);
|
||||
}
|
||||
|
||||
/*
|
||||
* The client is receiving a lock response message from the server.
|
||||
* This can be reordered with incoming invlidation requests from the
|
||||
* server so we have to be careful to only set the new mode once the old
|
||||
* mode matches.
|
||||
* We immediately queue work on the assumption that the caller might
|
||||
* have made a change (set a lock mode) which can let one of the
|
||||
* invalidating locks make forward progress, even if other locks are
|
||||
* waiting for their grace period to elapse. It's a trade-off between
|
||||
* invalidation latency and burning cpu repeatedly finding that locks
|
||||
* are still in their grace period.
|
||||
*/
|
||||
static void queue_inv_work(struct lock_info *linfo)
|
||||
{
|
||||
assert_spin_locked(&linfo->lock);
|
||||
|
||||
if (!list_empty(&linfo->inv_list) && !linfo->shutdown)
|
||||
mod_delayed_work(linfo->workq, &linfo->inv_dwork, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* The given lock is processing a received a grant response. Trigger a
|
||||
* bug if the cache is inconsistent.
|
||||
*
|
||||
* We only have two modes that can create dirty items. We can't have
|
||||
* dirty items when transitioning from write_only to write because the
|
||||
* writer can't trust the cached items in the cache for reading. And we
|
||||
* don't currently transition directly from write to write_only, we
|
||||
* first go through null. So if we have dirty items as we're granted a
|
||||
* mode it's always incorrect.
|
||||
*
|
||||
* And we can't have cached items that we're going to use for reading if
|
||||
* the previous mode didn't allow reading.
|
||||
*
|
||||
* Inconsistencies have come from all sorts of bugs: invalidation missed
|
||||
* items, the cache was populated outside of locking coverage, lock
|
||||
* holders performed the wrong item operations under their lock,
|
||||
* overlapping locks, out of order granting or invalidating, etc.
|
||||
*/
|
||||
static void bug_on_inconsistent_grant_cache(struct super_block *sb,
|
||||
struct scoutfs_lock *lock,
|
||||
int old_mode, int new_mode)
|
||||
{
|
||||
bool cached;
|
||||
bool dirty;
|
||||
|
||||
cached = scoutfs_item_range_cached(sb, &lock->start, &lock->end,
|
||||
&dirty);
|
||||
if (dirty ||
|
||||
(cached && (!lock_mode_can_read(old_mode) ||
|
||||
!lock_mode_can_read(new_mode)))) {
|
||||
scoutfs_err(sb, "granted lock item cache inconsistency, cached %u dirty %u old_mode %d new_mode %d: start "SK_FMT" end "SK_FMT" refresh_gen %llu mode %u waiters: rd %u wr %u wo %u users: rd %u wr %u wo %u",
|
||||
cached, dirty, old_mode, new_mode, SK_ARG(&lock->start),
|
||||
SK_ARG(&lock->end), lock->refresh_gen, lock->mode,
|
||||
lock->waiters[SCOUTFS_LOCK_READ],
|
||||
lock->waiters[SCOUTFS_LOCK_WRITE],
|
||||
lock->waiters[SCOUTFS_LOCK_WRITE_ONLY],
|
||||
lock->users[SCOUTFS_LOCK_READ],
|
||||
lock->users[SCOUTFS_LOCK_WRITE],
|
||||
lock->users[SCOUTFS_LOCK_WRITE_ONLY]);
|
||||
BUG();
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Each lock has received a grant response message from the server.
|
||||
*
|
||||
* Grant responses can be reordered with incoming invalidation requests
|
||||
* from the server so we have to be careful to only set the new mode
|
||||
* once the old mode matches.
|
||||
*
|
||||
* We extend the grace period as we grant the lock if there is a waiting
|
||||
* locker who can use the lock. This stops invalidation from pulling
|
||||
@@ -555,9 +633,65 @@ static void extend_grace(struct super_block *sb, struct scoutfs_lock *lock)
|
||||
* against the invalidation. In that case they'd extend the grace
|
||||
* period anyway as they unlock.
|
||||
*/
|
||||
int scoutfs_lock_grant_response(struct super_block *sb,
|
||||
struct scoutfs_net_lock *nl)
|
||||
static void lock_grant_worker(struct work_struct *work)
|
||||
{
|
||||
struct lock_info *linfo = container_of(work, struct lock_info,
|
||||
grant_work);
|
||||
struct super_block *sb = linfo->sb;
|
||||
struct scoutfs_net_lock_grant_response *gr;
|
||||
struct scoutfs_net_lock *nl;
|
||||
struct scoutfs_lock *lock;
|
||||
struct scoutfs_lock *tmp;
|
||||
|
||||
scoutfs_inc_counter(sb, lock_grant_work);
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
|
||||
list_for_each_entry_safe(lock, tmp, &linfo->grant_list, grant_head) {
|
||||
gr = &lock->grant_resp;
|
||||
nl = &lock->grant_resp.nl;
|
||||
|
||||
/* wait for reordered invalidation to finish */
|
||||
if (lock->mode != nl->old_mode)
|
||||
continue;
|
||||
|
||||
bug_on_inconsistent_grant_cache(sb, lock, nl->old_mode,
|
||||
nl->new_mode);
|
||||
|
||||
if (!lock_mode_can_read(nl->old_mode) &&
|
||||
lock_mode_can_read(nl->new_mode)) {
|
||||
lock->refresh_gen =
|
||||
atomic64_inc_return(&linfo->next_refresh_gen);
|
||||
}
|
||||
|
||||
lock->request_pending = 0;
|
||||
lock->mode = nl->new_mode;
|
||||
lock->write_version = le64_to_cpu(nl->write_version);
|
||||
lock->roots = gr->roots;
|
||||
|
||||
if (lock_count_match_exists(nl->new_mode, lock->waiters))
|
||||
extend_grace(sb, lock);
|
||||
|
||||
trace_scoutfs_lock_granted(sb, lock);
|
||||
list_del_init(&lock->grant_head);
|
||||
wake_up(&lock->waitq);
|
||||
put_lock(linfo, lock);
|
||||
}
|
||||
|
||||
/* invalidations might be waiting for our reordered grant */
|
||||
queue_inv_work(linfo);
|
||||
spin_unlock(&linfo->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The client is receiving a grant response message from the server. We
|
||||
* find the lock, record the response, and add it to the list for grant
|
||||
* work to process.
|
||||
*/
|
||||
int scoutfs_lock_grant_response(struct super_block *sb,
|
||||
struct scoutfs_net_lock_grant_response *gr)
|
||||
{
|
||||
struct scoutfs_net_lock *nl = &gr->nl;
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
struct scoutfs_lock *lock;
|
||||
|
||||
@@ -568,34 +702,12 @@ int scoutfs_lock_grant_response(struct super_block *sb,
|
||||
/* lock must already be busy with request_pending */
|
||||
lock = lock_lookup(sb, &nl->key, NULL);
|
||||
BUG_ON(!lock);
|
||||
trace_scoutfs_lock_grant_response(sb, lock);
|
||||
BUG_ON(!lock->request_pending);
|
||||
|
||||
trace_scoutfs_lock_grant_response(sb, lock);
|
||||
|
||||
/* resolve unlikely work reordering with invalidation request */
|
||||
while (lock->mode != nl->old_mode) {
|
||||
spin_unlock(&linfo->lock);
|
||||
/* implicit read barrier from waitq locks */
|
||||
wait_event(lock->waitq, lock->mode == nl->old_mode);
|
||||
spin_lock(&linfo->lock);
|
||||
}
|
||||
|
||||
if (!lock_mode_can_read(nl->old_mode) &&
|
||||
lock_mode_can_read(nl->new_mode)) {
|
||||
lock->refresh_gen =
|
||||
atomic64_inc_return(&linfo->next_refresh_gen);
|
||||
}
|
||||
|
||||
lock->request_pending = 0;
|
||||
lock->mode = nl->new_mode;
|
||||
lock->write_version = le64_to_cpu(nl->write_version);
|
||||
|
||||
if (lock_count_match_exists(nl->new_mode, lock->waiters))
|
||||
extend_grace(sb, lock);
|
||||
|
||||
trace_scoutfs_lock_granted(sb, lock);
|
||||
wake_up(&lock->waitq);
|
||||
put_lock(linfo, lock);
|
||||
lock->grant_resp = *gr;
|
||||
list_add_tail(&lock->grant_head, &linfo->grant_list);
|
||||
queue_grant_work(linfo);
|
||||
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
@@ -603,34 +715,9 @@ int scoutfs_lock_grant_response(struct super_block *sb,
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidation waits until the old mode indicates that we've resolved
|
||||
* unlikely races with reordered grant responses from the server and
|
||||
* until the new mode satisfies active users.
|
||||
*
|
||||
* Once it's safe to proceed we set the lock mode here under the lock to
|
||||
* prevent additional users of the old mode while we're invalidating.
|
||||
*/
|
||||
static bool lock_invalidate_safe(struct lock_info *linfo,
|
||||
struct scoutfs_lock *lock,
|
||||
int old_mode, int new_mode)
|
||||
{
|
||||
bool safe;
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
safe = (lock->mode == old_mode) &&
|
||||
lock_counts_match(new_mode, lock->users);
|
||||
if (safe)
|
||||
lock->mode = new_mode;
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
return safe;
|
||||
}
|
||||
|
||||
/*
|
||||
* The client is receiving a lock invalidation request from the server
|
||||
* Each lock has received a lock invalidation request from the server
|
||||
* which specifies a new mode for the lock. The server will only send
|
||||
* one invalidation request at a time. This is executing in a blocking
|
||||
* net receive work context.
|
||||
* one invalidation request at a time for each lock.
|
||||
*
|
||||
* This is an unsolicited request from the server so it can arrive at
|
||||
* any time after we make the server aware of the lock by initially
|
||||
@@ -647,70 +734,134 @@ static bool lock_invalidate_safe(struct lock_info *linfo,
|
||||
* invalidate once the lock mode matches what the server told us to
|
||||
* invalidate.
|
||||
*
|
||||
* We delay invalidation processing until a grace period has elapsed since
|
||||
* the last unlock. The intent is to let users do a reasonable batch of
|
||||
* work before dropping the lock. Continuous unlocking can continuously
|
||||
* extend the deadline.
|
||||
* We delay invalidation processing until a grace period has elapsed
|
||||
* since the last unlock. The intent is to let users do a reasonable
|
||||
* batch of work before dropping the lock. Continuous unlocking can
|
||||
* continuously extend the deadline.
|
||||
*
|
||||
* Before we start invalidating the lock we set the lock to the new
|
||||
* mode, preventing further incompatible users of the old mode from
|
||||
* using the lock while we're invalidating.
|
||||
*
|
||||
* This does a lot of serialized inode invalidation in one context and
|
||||
* performs a lot of repeated calls to sync. It would be nice to get
|
||||
* some concurrent inode invalidation and to more carefully only call
|
||||
* sync when needed.
|
||||
*/
|
||||
static void lock_invalidate_worker(struct work_struct *work)
|
||||
{
|
||||
struct lock_info *linfo = container_of(work, struct lock_info,
|
||||
inv_dwork.work);
|
||||
struct super_block *sb = linfo->sb;
|
||||
struct scoutfs_net_lock *nl;
|
||||
struct scoutfs_lock *lock;
|
||||
struct scoutfs_lock *tmp;
|
||||
unsigned long delay = MAX_JIFFY_OFFSET;
|
||||
ktime_t now = ktime_get();
|
||||
ktime_t deadline;
|
||||
LIST_HEAD(ready);
|
||||
u64 net_id;
|
||||
int ret;
|
||||
|
||||
scoutfs_inc_counter(sb, lock_invalidate_work);
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
|
||||
list_for_each_entry_safe(lock, tmp, &linfo->inv_list, inv_head) {
|
||||
nl = &lock->inv_nl;
|
||||
|
||||
/* wait for reordered grant to finish */
|
||||
if (lock->mode != nl->old_mode)
|
||||
continue;
|
||||
|
||||
/* wait until incompatible holders unlock */
|
||||
if (!lock_counts_match(nl->new_mode, lock->users))
|
||||
continue;
|
||||
|
||||
/* skip if grace hasn't elapsed, record earliest */
|
||||
deadline = lock->grace_deadline;
|
||||
if (!linfo->shutdown && ktime_before(now, deadline)) {
|
||||
delay = min(delay,
|
||||
nsecs_to_jiffies(ktime_to_ns(
|
||||
ktime_sub(deadline, now))));
|
||||
scoutfs_inc_counter(linfo->sb, lock_grace_wait);
|
||||
continue;
|
||||
}
|
||||
/* set the new mode, no incompatible users during inval */
|
||||
lock->mode = nl->new_mode;
|
||||
|
||||
/* move everyone that's ready to our private list */
|
||||
list_move_tail(&lock->inv_head, &ready);
|
||||
}
|
||||
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
if (list_empty(&ready))
|
||||
goto out;
|
||||
|
||||
/* invalidate once the lock is read */
|
||||
list_for_each_entry(lock, &ready, inv_head) {
|
||||
nl = &lock->inv_nl;
|
||||
net_id = lock->inv_net_id;
|
||||
|
||||
ret = lock_invalidate(sb, lock, nl->old_mode, nl->new_mode);
|
||||
BUG_ON(ret);
|
||||
|
||||
/* respond with the key and modes from the request */
|
||||
ret = scoutfs_client_lock_response(sb, net_id, nl);
|
||||
BUG_ON(ret);
|
||||
|
||||
scoutfs_inc_counter(sb, lock_invalidate_response);
|
||||
}
|
||||
|
||||
/* and finish all the invalidated locks */
|
||||
spin_lock(&linfo->lock);
|
||||
|
||||
list_for_each_entry_safe(lock, tmp, &ready, inv_head) {
|
||||
list_del_init(&lock->inv_head);
|
||||
|
||||
lock->invalidate_pending = 0;
|
||||
trace_scoutfs_lock_invalidated(sb, lock);
|
||||
wake_up(&lock->waitq);
|
||||
put_lock(linfo, lock);
|
||||
}
|
||||
|
||||
/* grant might have been waiting for invalidate request */
|
||||
queue_grant_work(linfo);
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
out:
|
||||
/* queue delayed work if invalidations waiting on grace deadline */
|
||||
if (delay != MAX_JIFFY_OFFSET)
|
||||
queue_delayed_work(linfo->workq, &linfo->inv_dwork, delay);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record an incoming invalidate request from the server and add its lock
|
||||
* to the list for processing.
|
||||
*
|
||||
* This is trusting the server and will crash if it's sent bad requests :/
|
||||
*/
|
||||
int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_net_lock *nl)
|
||||
{
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
struct scoutfs_lock *lock;
|
||||
ktime_t deadline;
|
||||
bool grace_waited = false;
|
||||
int ret;
|
||||
|
||||
scoutfs_inc_counter(sb, lock_invalidate_request);
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
lock = get_lock(sb, &nl->key);
|
||||
if (lock) {
|
||||
BUG_ON(lock->invalidate_pending); /* XXX trusting server :/ */
|
||||
lock->invalidate_pending = 1;
|
||||
deadline = lock->grace_deadline;
|
||||
trace_scoutfs_lock_invalidate_request(sb, lock);
|
||||
}
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
BUG_ON(!lock);
|
||||
|
||||
/* wait for a grace period after the most recent unlock */
|
||||
while (ktime_before(ktime_get(), deadline)) {
|
||||
grace_waited = true;
|
||||
scoutfs_inc_counter(linfo->sb, lock_grace_wait);
|
||||
set_current_state(TASK_UNINTERRUPTIBLE);
|
||||
schedule_hrtimeout(&deadline, HRTIMER_MODE_ABS);
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
deadline = lock->grace_deadline;
|
||||
spin_unlock(&linfo->lock);
|
||||
if (lock) {
|
||||
BUG_ON(lock->invalidate_pending);
|
||||
lock->invalidate_pending = 1;
|
||||
lock->inv_nl = *nl;
|
||||
lock->inv_net_id = net_id;
|
||||
list_add_tail(&lock->inv_head, &linfo->inv_list);
|
||||
trace_scoutfs_lock_invalidate_request(sb, lock);
|
||||
queue_inv_work(linfo);
|
||||
}
|
||||
|
||||
if (grace_waited)
|
||||
scoutfs_inc_counter(linfo->sb, lock_grace_elapsed);
|
||||
|
||||
/* sets the lock mode to prevent use of old mode during invalidate */
|
||||
wait_event(lock->waitq, lock_invalidate_safe(linfo, lock, nl->old_mode,
|
||||
nl->new_mode));
|
||||
|
||||
ret = lock_invalidate(sb, lock, nl->old_mode, nl->new_mode);
|
||||
BUG_ON(ret);
|
||||
|
||||
/* respond with the key and modes from the request */
|
||||
ret = scoutfs_client_lock_response(sb, net_id, nl);
|
||||
BUG_ON(ret);
|
||||
|
||||
scoutfs_inc_counter(sb, lock_invalidate_response);
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
|
||||
lock->invalidate_pending = 0;
|
||||
|
||||
trace_scoutfs_lock_invalidated(sb, lock);
|
||||
wake_up(&lock->waitq);
|
||||
put_lock(linfo, lock);
|
||||
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
return 0;
|
||||
@@ -749,6 +900,7 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
|
||||
for (i = 0; lock && i < SCOUTFS_NET_LOCK_MAX_RECOVER_NR; i++) {
|
||||
|
||||
nlr->locks[i].key = lock->start;
|
||||
nlr->locks[i].write_version = cpu_to_le64(lock->write_version);
|
||||
nlr->locks[i].old_mode = lock->mode;
|
||||
nlr->locks[i].new_mode = lock->mode;
|
||||
|
||||
@@ -769,7 +921,7 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
|
||||
}
|
||||
|
||||
static bool lock_wait_cond(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
int mode)
|
||||
enum scoutfs_lock_mode mode)
|
||||
{
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
bool wake;
|
||||
@@ -803,7 +955,7 @@ static bool lock_flags_invalid(int flags)
|
||||
* won't process our request until it receives our invalidation
|
||||
* response.
|
||||
*/
|
||||
static int lock_key_range(struct super_block *sb, int mode, int flags,
|
||||
static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
struct scoutfs_lock **ret_lock)
|
||||
{
|
||||
@@ -911,7 +1063,7 @@ out_unlock:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_lock_ino(struct super_block *sb, int mode, int flags, u64 ino,
|
||||
int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int flags, u64 ino,
|
||||
struct scoutfs_lock **ret_lock)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
@@ -936,7 +1088,7 @@ int scoutfs_lock_ino(struct super_block *sb, int mode, int flags, u64 ino,
|
||||
* is incremented as new locks are acquired and then indicates that an
|
||||
* old inode with a smaller refresh_gen needs to be refreshed.
|
||||
*/
|
||||
int scoutfs_lock_inode(struct super_block *sb, int mode, int flags,
|
||||
int scoutfs_lock_inode(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct inode *inode, struct scoutfs_lock **lock)
|
||||
{
|
||||
int ret;
|
||||
@@ -999,7 +1151,7 @@ static void swap_arg(void *A, void *B, int size)
|
||||
*
|
||||
* (pretty great collision with d_lock() here)
|
||||
*/
|
||||
int scoutfs_lock_inodes(struct super_block *sb, int mode, int flags,
|
||||
int scoutfs_lock_inodes(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct inode *a, struct scoutfs_lock **a_lock,
|
||||
struct inode *b, struct scoutfs_lock **b_lock,
|
||||
struct inode *c, struct scoutfs_lock **c_lock,
|
||||
@@ -1047,7 +1199,7 @@ int scoutfs_lock_inodes(struct super_block *sb, int mode, int flags,
|
||||
/*
|
||||
* The rename lock is magical because it's global.
|
||||
*/
|
||||
int scoutfs_lock_rename(struct super_block *sb, int mode, int flags,
|
||||
int scoutfs_lock_rename(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock)
|
||||
{
|
||||
struct scoutfs_key key = {
|
||||
@@ -1094,7 +1246,7 @@ void scoutfs_lock_get_index_item_range(u8 type, u64 major, u64 ino,
|
||||
* Lock the given index item. We use the index masks to calculate the
|
||||
* start and end key values that are covered by the lock.
|
||||
*/
|
||||
int scoutfs_lock_inode_index(struct super_block *sb, int mode,
|
||||
int scoutfs_lock_inode_index(struct super_block *sb, enum scoutfs_lock_mode mode,
|
||||
u8 type, u64 major, u64 ino,
|
||||
struct scoutfs_lock **ret_lock)
|
||||
{
|
||||
@@ -1106,24 +1258,6 @@ int scoutfs_lock_inode_index(struct super_block *sb, int mode,
|
||||
return lock_key_range(sb, mode, 0, &start, &end, ret_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Today we lock a hash value entirely. If we went to finer grained ino
|
||||
* locking as well we'd need to check the manifest to find the next
|
||||
* possible ino to lock so that we didn't try to iterate over all of
|
||||
* them.
|
||||
*/
|
||||
int scoutfs_lock_xattr_index(struct super_block *sb, int mode, int flags,
|
||||
u64 hash, struct scoutfs_lock **ret_lock)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
|
||||
scoutfs_xattr_index_key(&start, hash, 0, 0);
|
||||
scoutfs_xattr_index_key(&end, hash, U64_MAX, U64_MAX);
|
||||
|
||||
return lock_key_range(sb, mode, flags, &start, &end, ret_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The rid lock protects a mount's private persistent items in the rid
|
||||
* zone. It's held for the duration of the mount. It lets the mount
|
||||
@@ -1135,7 +1269,7 @@ int scoutfs_lock_xattr_index(struct super_block *sb, int mode, int flags,
|
||||
* able to. Maybe we have a bunch free and they're trying to allocate
|
||||
* and are getting ENOSPC.
|
||||
*/
|
||||
int scoutfs_lock_rid(struct super_block *sb, int mode, int flags,
|
||||
int scoutfs_lock_rid(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
u64 rid, struct scoutfs_lock **lock)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
@@ -1156,7 +1290,7 @@ int scoutfs_lock_rid(struct super_block *sb, int mode, int flags,
|
||||
* As we unlock we always extend the grace period to give the caller
|
||||
* another pass at the lock before its invalidated.
|
||||
*/
|
||||
void scoutfs_unlock(struct super_block *sb, struct scoutfs_lock *lock, int mode)
|
||||
void scoutfs_unlock(struct super_block *sb, struct scoutfs_lock *lock, enum scoutfs_lock_mode mode)
|
||||
{
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
|
||||
@@ -1169,9 +1303,12 @@ void scoutfs_unlock(struct super_block *sb, struct scoutfs_lock *lock, int mode)
|
||||
|
||||
lock_dec_count(lock->users, mode);
|
||||
extend_grace(sb, lock);
|
||||
if (lock_mode_can_write(mode))
|
||||
lock->dirty_trans_seq = scoutfs_trans_sample_seq(sb);
|
||||
|
||||
trace_scoutfs_lock_unlock(sb, lock);
|
||||
wake_up(&lock->waitq);
|
||||
queue_inv_work(linfo);
|
||||
put_lock(linfo, lock);
|
||||
|
||||
spin_unlock(&linfo->lock);
|
||||
@@ -1246,7 +1383,7 @@ void scoutfs_lock_del_coverage(struct super_block *sb,
|
||||
* the mode and keys from changing.
|
||||
*/
|
||||
bool scoutfs_lock_protected(struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
int mode)
|
||||
enum scoutfs_lock_mode mode)
|
||||
{
|
||||
signed char lock_mode = ACCESS_ONCE(lock->mode);
|
||||
|
||||
@@ -1256,38 +1393,50 @@ bool scoutfs_lock_protected(struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
}
|
||||
|
||||
/*
|
||||
* The shrink callback got the lock, marked it request_pending, and
|
||||
* handed it off to us. We kick off a null request and the lock will
|
||||
* be freed by the response once all users drain. If this races with
|
||||
* The shrink callback got the lock, marked it request_pending, and put
|
||||
* it on the shrink list. We send a null request and the lock will be
|
||||
* freed by the response once all users drain. If this races with
|
||||
* invalidation then the server will only send the grant response once
|
||||
* the invalidation is finished.
|
||||
*/
|
||||
static void scoutfs_lock_shrink_worker(struct work_struct *work)
|
||||
static void lock_shrink_worker(struct work_struct *work)
|
||||
{
|
||||
struct scoutfs_lock *lock = container_of(work, struct scoutfs_lock,
|
||||
shrink_work);
|
||||
struct super_block *sb = lock->sb;
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
struct lock_info *linfo = container_of(work, struct lock_info,
|
||||
shrink_work);
|
||||
struct super_block *sb = linfo->sb;
|
||||
struct scoutfs_net_lock nl;
|
||||
struct scoutfs_lock *lock;
|
||||
struct scoutfs_lock *tmp;
|
||||
LIST_HEAD(list);
|
||||
int ret;
|
||||
|
||||
/* unlocked lock access, but should be stable since we queued */
|
||||
nl.key = lock->start;
|
||||
nl.old_mode = lock->mode;
|
||||
nl.new_mode = SCOUTFS_LOCK_NULL;
|
||||
scoutfs_inc_counter(sb, lock_shrink_work);
|
||||
|
||||
ret = scoutfs_client_lock_request(sb, &nl);
|
||||
if (ret) {
|
||||
/* oh well, not freeing */
|
||||
scoutfs_inc_counter(sb, lock_shrink_request_aborted);
|
||||
spin_lock(&linfo->lock);
|
||||
list_splice_init(&linfo->shrink_list, &list);
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
list_for_each_entry_safe(lock, tmp, &list, shrink_head) {
|
||||
list_del_init(&lock->shrink_head);
|
||||
|
||||
lock->request_pending = 0;
|
||||
wake_up(&lock->waitq);
|
||||
put_lock(linfo, lock);
|
||||
/* unlocked lock access, but should be stable since we queued */
|
||||
nl.key = lock->start;
|
||||
nl.old_mode = lock->mode;
|
||||
nl.new_mode = SCOUTFS_LOCK_NULL;
|
||||
|
||||
spin_unlock(&linfo->lock);
|
||||
ret = scoutfs_client_lock_request(sb, &nl);
|
||||
if (ret) {
|
||||
/* oh well, not freeing */
|
||||
scoutfs_inc_counter(sb, lock_shrink_aborted);
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
|
||||
lock->request_pending = 0;
|
||||
wake_up(&lock->waitq);
|
||||
put_lock(linfo, lock);
|
||||
|
||||
spin_unlock(&linfo->lock);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1312,6 +1461,7 @@ static int scoutfs_lock_shrink(struct shrinker *shrink,
|
||||
struct scoutfs_lock *lock;
|
||||
struct scoutfs_lock *tmp;
|
||||
unsigned long nr;
|
||||
bool added = false;
|
||||
int ret;
|
||||
|
||||
nr = sc->nr_to_scan;
|
||||
@@ -1325,15 +1475,17 @@ restart:
|
||||
|
||||
BUG_ON(!lock_idle(lock));
|
||||
BUG_ON(lock->mode == SCOUTFS_LOCK_NULL);
|
||||
BUG_ON(!list_empty(&lock->shrink_head));
|
||||
|
||||
if (nr-- == 0)
|
||||
if (linfo->shutdown || nr-- == 0)
|
||||
break;
|
||||
|
||||
__lock_del_lru(linfo, lock);
|
||||
lock->request_pending = 1;
|
||||
queue_work(linfo->workq, &lock->shrink_work);
|
||||
list_add_tail(&lock->shrink_head, &linfo->shrink_list);
|
||||
added = true;
|
||||
|
||||
scoutfs_inc_counter(sb, lock_shrink_queued);
|
||||
scoutfs_inc_counter(sb, lock_shrink_attempted);
|
||||
trace_scoutfs_lock_shrink(sb, lock);
|
||||
|
||||
/* could have bazillions of idle locks */
|
||||
@@ -1343,6 +1495,9 @@ restart:
|
||||
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
if (added)
|
||||
queue_work(linfo->workq, &linfo->shrink_work);
|
||||
|
||||
out:
|
||||
ret = min_t(unsigned long, linfo->lru_nr, INT_MAX);
|
||||
trace_scoutfs_lock_shrink_exit(sb, sc->nr_to_scan, ret);
|
||||
@@ -1377,10 +1532,15 @@ static void lock_tseq_show(struct seq_file *m, struct scoutfs_tseq_entry *ent)
|
||||
}
|
||||
|
||||
/*
|
||||
* We're going to be destroying the locks soon. We shouldn't have any
|
||||
* normal task holders that would have prevented unmount. We can have
|
||||
* internal threads blocked in locks. We force all currently blocked
|
||||
* and future lock calls to return -ESHUTDOWN.
|
||||
* The caller is going to be calling _destroy soon and, critically, is
|
||||
* about to shutdown networking before calling us so that we don't get
|
||||
* any callbacks while we're destroying. We have to ensure that we
|
||||
* won't call networking after this returns.
|
||||
*
|
||||
* Internal fs threads can be using locking, and locking can have async
|
||||
* work pending. We use ->shutdown to force callers to return
|
||||
* -ESHUTDOWN and to prevent the future queueing of work that could call
|
||||
* networking. Locks whose work is stopped will be torn down by _destroy.
|
||||
*/
|
||||
void scoutfs_lock_shutdown(struct super_block *sb)
|
||||
{
|
||||
@@ -1402,6 +1562,10 @@ void scoutfs_lock_shutdown(struct super_block *sb)
|
||||
}
|
||||
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
flush_work(&linfo->grant_work);
|
||||
flush_delayed_work(&linfo->inv_dwork);
|
||||
flush_work(&linfo->shrink_work);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1422,7 +1586,7 @@ void scoutfs_lock_destroy(struct super_block *sb)
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
struct scoutfs_lock *lock;
|
||||
struct rb_node *node;
|
||||
int mode;
|
||||
enum scoutfs_lock_mode mode;
|
||||
|
||||
if (!linfo)
|
||||
return;
|
||||
@@ -1474,6 +1638,12 @@ void scoutfs_lock_destroy(struct super_block *sb)
|
||||
lock->request_pending = 0;
|
||||
if (!list_empty(&lock->lru_head))
|
||||
__lock_del_lru(linfo, lock);
|
||||
if (!list_empty(&lock->grant_head))
|
||||
list_del_init(&lock->grant_head);
|
||||
if (!list_empty(&lock->inv_head))
|
||||
list_del_init(&lock->inv_head);
|
||||
if (!list_empty(&lock->shrink_head))
|
||||
list_del_init(&lock->shrink_head);
|
||||
lock_remove(linfo, lock);
|
||||
lock_free(linfo, lock);
|
||||
}
|
||||
@@ -1501,6 +1671,12 @@ int scoutfs_lock_setup(struct super_block *sb)
|
||||
linfo->shrinker.seeks = DEFAULT_SEEKS;
|
||||
register_shrinker(&linfo->shrinker);
|
||||
INIT_LIST_HEAD(&linfo->lru_list);
|
||||
INIT_WORK(&linfo->grant_work, lock_grant_worker);
|
||||
INIT_LIST_HEAD(&linfo->grant_list);
|
||||
INIT_DELAYED_WORK(&linfo->inv_dwork, lock_invalidate_worker);
|
||||
INIT_LIST_HEAD(&linfo->inv_list);
|
||||
INIT_WORK(&linfo->shrink_work, lock_shrink_worker);
|
||||
INIT_LIST_HEAD(&linfo->shrink_list);
|
||||
atomic64_set(&linfo->next_refresh_gen, 0);
|
||||
scoutfs_tseq_tree_init(&linfo->tseq_tree, lock_tseq_show);
|
||||
|
||||
|
||||
@@ -22,24 +22,32 @@ struct scoutfs_lock {
|
||||
struct rb_node range_node;
|
||||
u64 refresh_gen;
|
||||
u64 write_version;
|
||||
u64 dirty_trans_seq;
|
||||
struct scoutfs_net_roots roots;
|
||||
struct list_head lru_head;
|
||||
wait_queue_head_t waitq;
|
||||
struct work_struct shrink_work;
|
||||
ktime_t grace_deadline;
|
||||
unsigned long request_pending:1,
|
||||
invalidate_pending:1;
|
||||
|
||||
struct list_head grant_head;
|
||||
struct scoutfs_net_lock_grant_response grant_resp;
|
||||
struct list_head inv_head;
|
||||
struct scoutfs_net_lock inv_nl;
|
||||
u64 inv_net_id;
|
||||
struct list_head shrink_head;
|
||||
|
||||
spinlock_t cov_list_lock;
|
||||
struct list_head cov_list;
|
||||
|
||||
int mode;
|
||||
enum scoutfs_lock_mode mode;
|
||||
unsigned int waiters[SCOUTFS_LOCK_NR_MODES];
|
||||
unsigned int users[SCOUTFS_LOCK_NR_MODES];
|
||||
|
||||
struct scoutfs_tseq_entry tseq_entry;
|
||||
|
||||
/* the forest btree code stores data per lock */
|
||||
struct forest_lock_private *forest_private;
|
||||
/* the forest tracks which log tree last saw bloom bit updates */
|
||||
atomic64_t forest_bloom_nr;
|
||||
};
|
||||
|
||||
struct scoutfs_lock_coverage {
|
||||
@@ -49,35 +57,33 @@ struct scoutfs_lock_coverage {
|
||||
};
|
||||
|
||||
int scoutfs_lock_grant_response(struct super_block *sb,
|
||||
struct scoutfs_net_lock *nl);
|
||||
struct scoutfs_net_lock_grant_response *gr);
|
||||
int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_net_lock *nl);
|
||||
int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_key *key);
|
||||
|
||||
int scoutfs_lock_inode(struct super_block *sb, int mode, int flags,
|
||||
int scoutfs_lock_inode(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct inode *inode, struct scoutfs_lock **ret_lock);
|
||||
int scoutfs_lock_ino(struct super_block *sb, int mode, int flags, u64 ino,
|
||||
int scoutfs_lock_ino(struct super_block *sb, enum scoutfs_lock_mode mode, int flags, u64 ino,
|
||||
struct scoutfs_lock **ret_lock);
|
||||
void scoutfs_lock_get_index_item_range(u8 type, u64 major, u64 ino,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end);
|
||||
int scoutfs_lock_inode_index(struct super_block *sb, int mode,
|
||||
int scoutfs_lock_inode_index(struct super_block *sb, enum scoutfs_lock_mode mode,
|
||||
u8 type, u64 major, u64 ino,
|
||||
struct scoutfs_lock **ret_lock);
|
||||
int scoutfs_lock_xattr_index(struct super_block *sb, int mode, int flags,
|
||||
u64 hash, struct scoutfs_lock **ret_lock);
|
||||
int scoutfs_lock_inodes(struct super_block *sb, int mode, int flags,
|
||||
int scoutfs_lock_inodes(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct inode *a, struct scoutfs_lock **a_lock,
|
||||
struct inode *b, struct scoutfs_lock **b_lock,
|
||||
struct inode *c, struct scoutfs_lock **c_lock,
|
||||
struct inode *d, struct scoutfs_lock **D_lock);
|
||||
int scoutfs_lock_rename(struct super_block *sb, int mode, int flags,
|
||||
int scoutfs_lock_rename(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock);
|
||||
int scoutfs_lock_rid(struct super_block *sb, int mode, int flags,
|
||||
int scoutfs_lock_rid(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
u64 rid, struct scoutfs_lock **lock);
|
||||
void scoutfs_unlock(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
int level);
|
||||
enum scoutfs_lock_mode mode);
|
||||
|
||||
void scoutfs_lock_init_coverage(struct scoutfs_lock_coverage *cov);
|
||||
void scoutfs_lock_add_coverage(struct super_block *sb,
|
||||
@@ -88,7 +94,7 @@ bool scoutfs_lock_is_covered(struct super_block *sb,
|
||||
void scoutfs_lock_del_coverage(struct super_block *sb,
|
||||
struct scoutfs_lock_coverage *cov);
|
||||
bool scoutfs_lock_protected(struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
int mode);
|
||||
enum scoutfs_lock_mode mode);
|
||||
|
||||
void scoutfs_free_unused_locks(struct super_block *sb, unsigned long nr);
|
||||
|
||||
|
||||
@@ -20,7 +20,6 @@
|
||||
#include "tseq.h"
|
||||
#include "spbm.h"
|
||||
#include "block.h"
|
||||
#include "radix.h"
|
||||
#include "btree.h"
|
||||
#include "msg.h"
|
||||
#include "scoutfs_trace.h"
|
||||
@@ -87,8 +86,10 @@ struct lock_server_info {
|
||||
struct scoutfs_tseq_tree tseq_tree;
|
||||
struct dentry *tseq_dentry;
|
||||
|
||||
struct scoutfs_radix_allocator *alloc;
|
||||
struct scoutfs_alloc *alloc;
|
||||
struct scoutfs_block_writer *wri;
|
||||
|
||||
atomic64_t write_version;
|
||||
};
|
||||
|
||||
#define DECLARE_LOCK_SERVER_INFO(sb, name) \
|
||||
@@ -117,12 +118,6 @@ struct server_lock_node {
|
||||
struct list_head invalidated;
|
||||
};
|
||||
|
||||
enum {
|
||||
CLE_GRANTED,
|
||||
CLE_REQUESTED,
|
||||
CLE_INVALIDATED,
|
||||
};
|
||||
|
||||
/*
|
||||
* Interactions with the client are tracked with these little mode
|
||||
* wrappers.
|
||||
@@ -489,12 +484,12 @@ static int process_waiting_requests(struct super_block *sb,
|
||||
struct server_lock_node *snode)
|
||||
{
|
||||
DECLARE_LOCK_SERVER_INFO(sb, inf);
|
||||
struct scoutfs_net_lock_grant_response gres;
|
||||
struct scoutfs_net_lock nl;
|
||||
struct client_lock_entry *req;
|
||||
struct client_lock_entry *req_tmp;
|
||||
struct client_lock_entry *gr;
|
||||
struct client_lock_entry *gr_tmp;
|
||||
static atomic64_t write_version = ATOMIC64_INIT(0);
|
||||
u64 wv;
|
||||
int ret;
|
||||
|
||||
@@ -548,12 +543,15 @@ static int process_waiting_requests(struct super_block *sb,
|
||||
|
||||
if (nl.new_mode == SCOUTFS_LOCK_WRITE ||
|
||||
nl.new_mode == SCOUTFS_LOCK_WRITE_ONLY) {
|
||||
wv = atomic64_inc_return(&write_version);
|
||||
wv = atomic64_inc_return(&inf->write_version);
|
||||
nl.write_version = cpu_to_le64(wv);
|
||||
}
|
||||
|
||||
gres.nl = nl;
|
||||
scoutfs_server_get_roots(sb, &gres.roots);
|
||||
|
||||
ret = scoutfs_server_lock_response(sb, req->rid,
|
||||
req->net_id, &nl);
|
||||
req->net_id, &gres);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -575,6 +573,14 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void init_lock_clients_key(struct scoutfs_key *key, u64 rid)
|
||||
{
|
||||
*key = (struct scoutfs_key) {
|
||||
.sk_zone = SCOUTFS_LOCK_CLIENTS_ZONE,
|
||||
.sklc_rid = cpu_to_le64(rid),
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* The server received a greeting from a client for the first time. If
|
||||
* the client had already talked to the server then we must find an
|
||||
@@ -589,23 +595,22 @@ int scoutfs_lock_server_greeting(struct super_block *sb, u64 rid,
|
||||
{
|
||||
DECLARE_LOCK_SERVER_INFO(sb, inf);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_lock_client_btree_key cbk;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
cbk.rid = cpu_to_be64(rid);
|
||||
init_lock_clients_key(&key, rid);
|
||||
|
||||
mutex_lock(&inf->mutex);
|
||||
if (should_exist) {
|
||||
ret = scoutfs_btree_lookup(sb, &super->lock_clients,
|
||||
&cbk, sizeof(cbk), &iref);
|
||||
ret = scoutfs_btree_lookup(sb, &super->lock_clients, &key,
|
||||
&iref);
|
||||
if (ret == 0)
|
||||
scoutfs_btree_put_iref(&iref);
|
||||
} else {
|
||||
ret = scoutfs_btree_insert(sb, inf->alloc, inf->wri,
|
||||
&super->lock_clients,
|
||||
&cbk, sizeof(cbk), NULL, 0);
|
||||
&key, NULL, 0);
|
||||
}
|
||||
mutex_unlock(&inf->mutex);
|
||||
|
||||
@@ -664,6 +669,14 @@ static int finished_recovery(struct super_block *sb, u64 rid, bool cancel)
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void set_max_write_version(struct lock_server_info *inf, u64 new)
|
||||
{
|
||||
u64 old;
|
||||
|
||||
while (new > (old = atomic64_read(&inf->write_version)) &&
|
||||
(atomic64_cmpxchg(&inf->write_version, old, new) != old));
|
||||
}
|
||||
|
||||
/*
|
||||
* We sent a lock recover request to the client when we received its
|
||||
* greeting while in recovery. Here we instantiate all the locks it
|
||||
@@ -727,6 +740,10 @@ int scoutfs_lock_server_recover_response(struct super_block *sb, u64 rid,
|
||||
scoutfs_tseq_add(&inf->tseq_tree, &clent->tseq_entry);
|
||||
|
||||
put_server_lock(inf, snode);
|
||||
|
||||
/* make sure next write lock is greater than all recovered */
|
||||
set_max_write_version(inf,
|
||||
le64_to_cpu(nlr->locks[i].write_version));
|
||||
}
|
||||
|
||||
/* send request for next batch of keys */
|
||||
@@ -738,15 +755,12 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_rid_and_put_ref(struct scoutfs_btree_item_ref *iref,
|
||||
u64 *rid)
|
||||
static int get_rid_and_put_ref(struct scoutfs_btree_item_ref *iref, u64 *rid)
|
||||
{
|
||||
struct scoutfs_lock_client_btree_key *cbk;
|
||||
int ret;
|
||||
|
||||
if (iref->key_len == sizeof(*cbk) && iref->val_len == 0) {
|
||||
cbk = iref->key;
|
||||
*rid = be64_to_cpu(cbk->rid);
|
||||
if (iref->val_len == 0) {
|
||||
*rid = le64_to_cpu(iref->key->sklc_rid);
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -EIO;
|
||||
@@ -767,8 +781,8 @@ static void scoutfs_lock_server_recovery_timeout(struct work_struct *work)
|
||||
recovery_dwork.work);
|
||||
struct super_block *sb = inf->sb;
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_lock_client_btree_key cbk;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
bool timed_out;
|
||||
u64 rid;
|
||||
int ret;
|
||||
@@ -779,9 +793,8 @@ static void scoutfs_lock_server_recovery_timeout(struct work_struct *work)
|
||||
|
||||
/* we enter recovery if there are any client records */
|
||||
for (rid = 0; ; rid++) {
|
||||
cbk.rid = cpu_to_be64(rid);
|
||||
ret = scoutfs_btree_next(sb, &super->lock_clients,
|
||||
&cbk, sizeof(cbk), &iref);
|
||||
init_lock_clients_key(&key, rid);
|
||||
ret = scoutfs_btree_next(sb, &super->lock_clients, &key, &iref);
|
||||
if (ret == -ENOENT) {
|
||||
ret = 0;
|
||||
break;
|
||||
@@ -806,10 +819,9 @@ static void scoutfs_lock_server_recovery_timeout(struct work_struct *work)
|
||||
scoutfs_err(sb, "client rid %016llx lock recovery timed out",
|
||||
rid);
|
||||
|
||||
cbk.rid = cpu_to_be64(rid);
|
||||
init_lock_clients_key(&key, rid);
|
||||
ret = scoutfs_btree_delete(sb, inf->alloc, inf->wri,
|
||||
&super->lock_clients,
|
||||
&cbk, sizeof(cbk));
|
||||
&super->lock_clients, &key);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@@ -838,7 +850,6 @@ int scoutfs_lock_server_farewell(struct super_block *sb, u64 rid)
|
||||
{
|
||||
DECLARE_LOCK_SERVER_INFO(sb, inf);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_lock_client_btree_key cli;
|
||||
struct client_lock_entry *clent;
|
||||
struct client_lock_entry *tmp;
|
||||
struct server_lock_node *snode;
|
||||
@@ -847,10 +858,10 @@ int scoutfs_lock_server_farewell(struct super_block *sb, u64 rid)
|
||||
bool freed;
|
||||
int ret = 0;
|
||||
|
||||
cli.rid = cpu_to_be64(rid);
|
||||
mutex_lock(&inf->mutex);
|
||||
init_lock_clients_key(&key, rid);
|
||||
ret = scoutfs_btree_delete(sb, inf->alloc, inf->wri,
|
||||
&super->lock_clients, &cli, sizeof(cli));
|
||||
&super->lock_clients, &key);
|
||||
mutex_unlock(&inf->mutex);
|
||||
if (ret == -ENOENT) {
|
||||
ret = 0;
|
||||
@@ -951,14 +962,14 @@ static void lock_server_tseq_show(struct seq_file *m,
|
||||
* we time them out.
|
||||
*/
|
||||
int scoutfs_lock_server_setup(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_block_writer *wri)
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, u64 max_vers)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct lock_server_info *inf;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_lock_client_btree_key cbk;
|
||||
struct scoutfs_key key;
|
||||
unsigned int nr;
|
||||
u64 rid;
|
||||
int ret;
|
||||
@@ -977,6 +988,7 @@ int scoutfs_lock_server_setup(struct super_block *sb,
|
||||
scoutfs_tseq_tree_init(&inf->tseq_tree, lock_server_tseq_show);
|
||||
inf->alloc = alloc;
|
||||
inf->wri = wri;
|
||||
atomic64_set(&inf->write_version, max_vers); /* inc_return gives +1 */
|
||||
|
||||
inf->tseq_dentry = scoutfs_tseq_create("server_locks", sbi->debug_root,
|
||||
&inf->tseq_tree);
|
||||
@@ -990,9 +1002,8 @@ int scoutfs_lock_server_setup(struct super_block *sb,
|
||||
/* we enter recovery if there are any client records */
|
||||
nr = 0;
|
||||
for (rid = 0; ; rid++) {
|
||||
cbk.rid = cpu_to_be64(rid);
|
||||
ret = scoutfs_btree_next(sb, &super->lock_clients,
|
||||
&cbk, sizeof(cbk), &iref);
|
||||
init_lock_clients_key(&key, rid);
|
||||
ret = scoutfs_btree_next(sb, &super->lock_clients, &key, &iref);
|
||||
if (ret == -ENOENT)
|
||||
break;
|
||||
if (ret == 0)
|
||||
|
||||
@@ -12,8 +12,8 @@ int scoutfs_lock_server_response(struct super_block *sb, u64 rid,
|
||||
int scoutfs_lock_server_farewell(struct super_block *sb, u64 rid);
|
||||
|
||||
int scoutfs_lock_server_setup(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_block_writer *wri);
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, u64 max_vers);
|
||||
void scoutfs_lock_server_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -100,7 +100,7 @@ do { \
|
||||
} while (0)
|
||||
|
||||
/* listening and their accepting sockets have a fixed locking order */
|
||||
enum {
|
||||
enum spin_lock_subtype {
|
||||
CONN_LOCK_LISTENER,
|
||||
CONN_LOCK_ACCEPTED,
|
||||
};
|
||||
@@ -369,6 +369,7 @@ static int submit_send(struct super_block *sb,
|
||||
msend->nh.cmd = cmd;
|
||||
msend->nh.flags = flags;
|
||||
msend->nh.error = net_err;
|
||||
memset(msend->nh.__pad, 0, sizeof(msend->nh.__pad));
|
||||
msend->nh.data_len = cpu_to_le16(data_len);
|
||||
if (data_len)
|
||||
memcpy(msend->nh.data, data, data_len);
|
||||
|
||||
@@ -76,7 +76,7 @@ struct scoutfs_net_connection {
|
||||
void *info;
|
||||
};
|
||||
|
||||
enum {
|
||||
enum conn_flags {
|
||||
CONN_FL_valid_greeting = (1UL << 0), /* other commands can proceed */
|
||||
CONN_FL_established = (1UL << 1), /* added sends queue send work */
|
||||
CONN_FL_shutting_down = (1UL << 2), /* shutdown work was queued */
|
||||
@@ -102,6 +102,7 @@ static inline void scoutfs_addr_from_sin(struct scoutfs_inet_addr *addr,
|
||||
{
|
||||
addr->addr = be32_to_le32(sin->sin_addr.s_addr);
|
||||
addr->port = be16_to_le16(sin->sin_port);
|
||||
memset(addr->__pad, 0, sizeof(addr->__pad));
|
||||
}
|
||||
|
||||
struct scoutfs_net_connection *
|
||||
|
||||
@@ -16,6 +16,7 @@
|
||||
#include <linux/types.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/debugfs.h>
|
||||
#include <linux/namei.h>
|
||||
|
||||
#include <linux/parser.h>
|
||||
#include <linux/inet.h>
|
||||
@@ -28,24 +29,16 @@
|
||||
|
||||
static const match_table_t tokens = {
|
||||
{Opt_server_addr, "server_addr=%s"},
|
||||
{Opt_metadev_path, "metadev_path=%s"},
|
||||
{Opt_err, NULL}
|
||||
};
|
||||
|
||||
struct options_sb_info {
|
||||
struct dentry *debugfs_dir;
|
||||
u32 btree_force_tiny_blocks;
|
||||
};
|
||||
|
||||
u32 scoutfs_option_u32(struct super_block *sb, int token)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct options_sb_info *osi = sbi->options;
|
||||
|
||||
switch(token) {
|
||||
case Opt_btree_force_tiny_blocks:
|
||||
return osi->btree_force_tiny_blocks;
|
||||
}
|
||||
|
||||
WARN_ON_ONCE(1);
|
||||
return 0;
|
||||
}
|
||||
@@ -90,6 +83,52 @@ static int parse_ipv4(struct super_block *sb, char *str,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int parse_bdev_path(struct super_block *sb, substring_t *substr,
|
||||
char **bdev_path_ret)
|
||||
{
|
||||
char *bdev_path;
|
||||
struct inode *bdev_inode;
|
||||
struct path path;
|
||||
bool got_path = false;
|
||||
int ret;
|
||||
|
||||
bdev_path = match_strdup(substr);
|
||||
if (!bdev_path) {
|
||||
scoutfs_err(sb, "bdev string dup failed");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = kern_path(bdev_path, LOOKUP_FOLLOW, &path);
|
||||
if (ret) {
|
||||
scoutfs_err(sb, "path %s not found for bdev: error %d",
|
||||
bdev_path, ret);
|
||||
goto out;
|
||||
}
|
||||
got_path = true;
|
||||
|
||||
bdev_inode = d_inode(path.dentry);
|
||||
if (!S_ISBLK(bdev_inode->i_mode)) {
|
||||
scoutfs_err(sb, "path %s for bdev is not a block device",
|
||||
bdev_path);
|
||||
ret = -ENOTBLK;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (got_path) {
|
||||
path_put(&path);
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
kfree(bdev_path);
|
||||
} else {
|
||||
*bdev_path_ret = bdev_path;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_parse_options(struct super_block *sb, char *options,
|
||||
struct mount_options *parsed)
|
||||
{
|
||||
@@ -115,6 +154,13 @@ int scoutfs_parse_options(struct super_block *sb, char *options,
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
break;
|
||||
case Opt_metadev_path:
|
||||
|
||||
ret = parse_bdev_path(sb, &args[0],
|
||||
&parsed->metadev_path);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
break;
|
||||
default:
|
||||
scoutfs_err(sb, "Unknown or malformed option, \"%s\"",
|
||||
p);
|
||||
@@ -122,6 +168,11 @@ int scoutfs_parse_options(struct super_block *sb, char *options,
|
||||
}
|
||||
}
|
||||
|
||||
if (!parsed->metadev_path) {
|
||||
scoutfs_err(sb, "Required mount option \"metadev_path\" not found");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -143,13 +194,6 @@ int scoutfs_options_setup(struct super_block *sb)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!debugfs_create_bool("btree_force_tiny_blocks", 0644,
|
||||
osi->debugfs_dir,
|
||||
&osi->btree_force_tiny_blocks)) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret)
|
||||
|
||||
@@ -5,18 +5,15 @@
|
||||
#include <linux/in.h>
|
||||
#include "format.h"
|
||||
|
||||
enum {
|
||||
/*
|
||||
* For debugging we can quickly create huge trees by limiting
|
||||
* the number of items in each block as though the blocks were tiny.
|
||||
*/
|
||||
Opt_btree_force_tiny_blocks,
|
||||
enum scoutfs_mount_options {
|
||||
Opt_server_addr,
|
||||
Opt_metadev_path,
|
||||
Opt_err,
|
||||
};
|
||||
|
||||
struct mount_options {
|
||||
struct sockaddr_in server_addr;
|
||||
char *metadev_path;
|
||||
};
|
||||
|
||||
int scoutfs_parse_options(struct super_block *sb, char *options,
|
||||
|
||||
@@ -112,12 +112,13 @@ static ktime_t random_to(u32 lo, u32 hi)
|
||||
/*
|
||||
* The caller is about to read all the quorum blocks. We invalidate any
|
||||
* cached blocks and issue one large contiguous read to repopulate the
|
||||
* cache. The caller then uses normal sb_bread to read each block. I'm
|
||||
* cache. The caller then uses normal __bread to read each block. I'm
|
||||
* not a huge fan of the plug but I couldn't get the individual
|
||||
* readahead requests merged without it.
|
||||
*/
|
||||
static void readahead_quorum_blocks(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct buffer_head *bh;
|
||||
struct blk_plug plug;
|
||||
int i;
|
||||
@@ -125,7 +126,8 @@ static void readahead_quorum_blocks(struct super_block *sb)
|
||||
blk_start_plug(&plug);
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) {
|
||||
bh = sb_getblk(sb, SCOUTFS_QUORUM_BLKNO + i);
|
||||
bh = __getblk(sbi->meta_bdev, SCOUTFS_QUORUM_BLKNO + i,
|
||||
SCOUTFS_BLOCK_SM_SIZE);
|
||||
if (!bh)
|
||||
continue;
|
||||
|
||||
@@ -144,7 +146,7 @@ struct quorum_block_head {
|
||||
struct list_head head;
|
||||
union {
|
||||
struct scoutfs_quorum_block blk;
|
||||
u8 bytes[SCOUTFS_BLOCK_SIZE];
|
||||
u8 bytes[SCOUTFS_BLOCK_SM_SIZE];
|
||||
};
|
||||
};
|
||||
|
||||
@@ -184,13 +186,13 @@ static size_t quorum_block_bytes(struct scoutfs_quorum_block *blk)
|
||||
static bool invalid_quorum_block(struct buffer_head *bh,
|
||||
struct scoutfs_quorum_block *blk)
|
||||
{
|
||||
return bh->b_size != SCOUTFS_BLOCK_SIZE ||
|
||||
sizeof(struct scoutfs_quorum_block) > SCOUTFS_BLOCK_SIZE ||
|
||||
return bh->b_size != SCOUTFS_BLOCK_SM_SIZE ||
|
||||
sizeof(struct scoutfs_quorum_block) > SCOUTFS_BLOCK_SM_SIZE ||
|
||||
quorum_block_crc(blk) != blk->crc ||
|
||||
le64_to_cpu(blk->blkno) != bh->b_blocknr ||
|
||||
blk->term == 0 ||
|
||||
blk->log_nr > SCOUTFS_QUORUM_LOG_MAX ||
|
||||
quorum_block_bytes(blk) > SCOUTFS_BLOCK_SIZE;
|
||||
quorum_block_bytes(blk) > SCOUTFS_BLOCK_SM_SIZE;
|
||||
}
|
||||
|
||||
/* true if a is stale and should be ignored */
|
||||
@@ -215,6 +217,7 @@ static bool stale_quorum_block(struct scoutfs_quorum_block *a,
|
||||
static int read_quorum_blocks(struct super_block *sb, struct list_head *blocks)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_quorum_block *blk;
|
||||
struct quorum_block_head *qbh;
|
||||
struct quorum_block_head *tmp;
|
||||
@@ -227,7 +230,8 @@ static int read_quorum_blocks(struct super_block *sb, struct list_head *blocks)
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) {
|
||||
brelse(bh);
|
||||
bh = sb_bread(sb, SCOUTFS_QUORUM_BLKNO + i);
|
||||
bh = __bread(sbi->meta_bdev, SCOUTFS_QUORUM_BLKNO + i,
|
||||
SCOUTFS_BLOCK_SM_SIZE);
|
||||
if (!bh) {
|
||||
scoutfs_inc_counter(sb, quorum_read_block_error);
|
||||
ret = -EIO;
|
||||
@@ -291,23 +295,25 @@ static int write_quorum_block(struct super_block *sb,
|
||||
struct scoutfs_quorum_block *our_blk)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_quorum_block *blk;
|
||||
struct buffer_head *bh = NULL;
|
||||
size_t size;
|
||||
int ret;
|
||||
|
||||
BUILD_BUG_ON(sizeof(struct scoutfs_quorum_block) > SCOUTFS_BLOCK_SIZE);
|
||||
BUILD_BUG_ON(sizeof(struct scoutfs_quorum_block) >
|
||||
SCOUTFS_BLOCK_SM_SIZE);
|
||||
|
||||
bh = sb_getblk(sb, SCOUTFS_QUORUM_BLKNO +
|
||||
prandom_u32_max(SCOUTFS_QUORUM_BLOCKS));
|
||||
bh = __getblk(sbi->meta_bdev, SCOUTFS_QUORUM_BLKNO +
|
||||
prandom_u32_max(SCOUTFS_QUORUM_BLOCKS),
|
||||
SCOUTFS_BLOCK_SM_SIZE);
|
||||
if (bh == NULL) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
size = quorum_block_bytes(our_blk);
|
||||
if (WARN_ON_ONCE(size > SCOUTFS_BLOCK_SIZE ||
|
||||
size > bh->b_size)) {
|
||||
if (WARN_ON_ONCE(size > SCOUTFS_BLOCK_SM_SIZE || size > bh->b_size)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
@@ -530,7 +536,7 @@ int scoutfs_quorum_election(struct super_block *sb, ktime_t timeout_abs,
|
||||
trace_scoutfs_quorum_election(sb, prev_term);
|
||||
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
our_blk = kmalloc(SCOUTFS_BLOCK_SIZE, GFP_NOFS);
|
||||
our_blk = kmalloc(SCOUTFS_BLOCK_SM_SIZE, GFP_NOFS);
|
||||
if (!super || !our_blk) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@@ -548,7 +554,7 @@ int scoutfs_quorum_election(struct super_block *sb, ktime_t timeout_abs,
|
||||
SCOUTFS_QUORUM_TERM_HI_MS);
|
||||
|
||||
for (;;) {
|
||||
memset(our_blk, 0, SCOUTFS_BLOCK_SIZE);
|
||||
memset(our_blk, 0, SCOUTFS_BLOCK_SM_SIZE);
|
||||
|
||||
scoutfs_inc_counter(sb, quorum_cycle);
|
||||
|
||||
|
||||
1546
kmod/src/radix.c
1546
kmod/src/radix.c
File diff suppressed because it is too large
Load Diff
@@ -1,45 +0,0 @@
|
||||
#ifndef _SCOUTFS_RADIX_H_
|
||||
#define _SCOUTFS_RADIX_H_
|
||||
|
||||
#include "per_task.h"
|
||||
|
||||
struct scoutfs_block_writer;
|
||||
|
||||
struct scoutfs_radix_allocator {
|
||||
struct mutex mutex;
|
||||
struct scoutfs_radix_root avail;
|
||||
struct scoutfs_radix_root freed;
|
||||
};
|
||||
|
||||
int scoutfs_radix_alloc(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_block_writer *wri, u64 *blkno);
|
||||
int scoutfs_radix_alloc_data(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_radix_root *root,
|
||||
int count, u64 *blkno_ret, int *count_ret);
|
||||
int scoutfs_radix_free(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_block_writer *wri, u64 blkno);
|
||||
int scoutfs_radix_free_data(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_radix_root *root,
|
||||
u64 blkno, int count);
|
||||
int scoutfs_radix_merge(struct super_block *sb,
|
||||
struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_radix_root *dst,
|
||||
struct scoutfs_radix_root *src,
|
||||
struct scoutfs_radix_root *inp, bool meta, u64 count);
|
||||
void scoutfs_radix_init_alloc(struct scoutfs_radix_allocator *alloc,
|
||||
struct scoutfs_radix_root *avail,
|
||||
struct scoutfs_radix_root *freed);
|
||||
void scoutfs_radix_root_init(struct super_block *sb,
|
||||
struct scoutfs_radix_root *root, bool meta);
|
||||
u64 scoutfs_radix_root_free_bytes(struct super_block *sb,
|
||||
struct scoutfs_radix_root *root);
|
||||
u64 scoutfs_radix_bit_leaf_nr(u64 bit);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -58,10 +58,12 @@ do { \
|
||||
|
||||
int scoutfs_server_lock_request(struct super_block *sb, u64 rid,
|
||||
struct scoutfs_net_lock *nl);
|
||||
int scoutfs_server_lock_response(struct super_block *sb, u64 rid,
|
||||
u64 id, struct scoutfs_net_lock *nl);
|
||||
int scoutfs_server_lock_response(struct super_block *sb, u64 rid, u64 id,
|
||||
struct scoutfs_net_lock_grant_response *gr);
|
||||
int scoutfs_server_lock_recover_request(struct super_block *sb, u64 rid,
|
||||
struct scoutfs_key *key);
|
||||
void scoutfs_server_get_roots(struct super_block *sb,
|
||||
struct scoutfs_net_roots *roots);
|
||||
int scoutfs_server_hold_commit(struct super_block *sb);
|
||||
int scoutfs_server_apply_commit(struct super_block *sb, int err);
|
||||
|
||||
|
||||
71
kmod/src/sort_priv.c
Normal file
71
kmod/src/sort_priv.c
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* A copy of sort() from upstream with a priv argument that's passed
|
||||
* to comparison, like list_sort().
|
||||
*/
|
||||
|
||||
/* ------------------------ */
|
||||
|
||||
/*
|
||||
* A fast, small, non-recursive O(nlog n) sort for the Linux kernel
|
||||
*
|
||||
* Jan 23 2005 Matt Mackall <mpm@selenic.com>
|
||||
*/
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/slab.h>
|
||||
#include "sort_priv.h"
|
||||
|
||||
/**
|
||||
* sort - sort an array of elements
|
||||
* @priv: caller's pointer to pass to comparison and swap functions
|
||||
* @base: pointer to data to sort
|
||||
* @num: number of elements
|
||||
* @size: size of each element
|
||||
* @cmp_func: pointer to comparison function
|
||||
* @swap_func: pointer to swap function or NULL
|
||||
*
|
||||
* This function does a heapsort on the given array. You may provide a
|
||||
* swap_func function optimized to your element type.
|
||||
*
|
||||
* Sorting time is O(n log n) both on average and worst-case. While
|
||||
* qsort is about 20% faster on average, it suffers from exploitable
|
||||
* O(n*n) worst-case behavior and extra memory requirements that make
|
||||
* it less suitable for kernel use.
|
||||
*/
|
||||
|
||||
void sort_priv(void *priv, void *base, size_t num, size_t size,
|
||||
int (*cmp_func)(void *priv, const void *, const void *),
|
||||
void (*swap_func)(void *priv, void *, void *, int size))
|
||||
{
|
||||
/* pre-scale counters for performance */
|
||||
int i = (num/2 - 1) * size, n = num * size, c, r;
|
||||
|
||||
/* heapify */
|
||||
for ( ; i >= 0; i -= size) {
|
||||
for (r = i; r * 2 + size < n; r = c) {
|
||||
c = r * 2 + size;
|
||||
if (c < n - size &&
|
||||
cmp_func(priv, base + c, base + c + size) < 0)
|
||||
c += size;
|
||||
if (cmp_func(priv, base + r, base + c) >= 0)
|
||||
break;
|
||||
swap_func(priv, base + r, base + c, size);
|
||||
}
|
||||
}
|
||||
|
||||
/* sort */
|
||||
for (i = n - size; i > 0; i -= size) {
|
||||
swap_func(priv, base, base + i, size);
|
||||
for (r = 0; r * 2 + size < i; r = c) {
|
||||
c = r * 2 + size;
|
||||
if (c < i - size &&
|
||||
cmp_func(priv, base + c, base + c + size) < 0)
|
||||
c += size;
|
||||
if (cmp_func(priv, base + r, base + c) >= 0)
|
||||
break;
|
||||
swap_func(priv, base + r, base + c, size);
|
||||
}
|
||||
}
|
||||
}
|
||||
8
kmod/src/sort_priv.h
Normal file
8
kmod/src/sort_priv.h
Normal file
@@ -0,0 +1,8 @@
|
||||
#ifndef _SCOUTFS_SORT_PRIV_H_
|
||||
#define _SCOUTFS_SORT_PRIV_H_
|
||||
|
||||
void sort_priv(void *priv, void *base, size_t num, size_t size,
|
||||
int (*cmp_func)(void *priv, const void *, const void *),
|
||||
void (*swap_func)(void *priv, void *, void *, int size));
|
||||
|
||||
#endif
|
||||
@@ -47,9 +47,9 @@ bool scoutfs_spbm_empty(struct scoutfs_spbm *spbm)
|
||||
return RB_EMPTY_ROOT(&spbm->root);
|
||||
}
|
||||
|
||||
enum {
|
||||
enum spbm_flags {
|
||||
/* if a node isn't found then return an allocated new node */
|
||||
SPBM_FIND_ALLOC = 0x1,
|
||||
SPBM_FIND_ALLOC = (1 << 0),
|
||||
};
|
||||
static struct spbm_node *find_node(struct scoutfs_spbm *spbm, u64 index,
|
||||
int flags)
|
||||
|
||||
2328
kmod/src/srch.c
Normal file
2328
kmod/src/srch.c
Normal file
File diff suppressed because it is too large
Load Diff
68
kmod/src/srch.h
Normal file
68
kmod/src/srch.h
Normal file
@@ -0,0 +1,68 @@
|
||||
#ifndef _SCOUTFS_SRCH_H_
|
||||
#define _SCOUTFS_SRCH_H_
|
||||
|
||||
struct scoutfs_block;
|
||||
|
||||
struct scoutfs_srch_rb_root {
|
||||
struct rb_root root;
|
||||
struct rb_node *last;
|
||||
unsigned long nr;
|
||||
};
|
||||
|
||||
struct scoutfs_srch_rb_node {
|
||||
struct rb_node node;
|
||||
u64 ino;
|
||||
u64 id;
|
||||
};
|
||||
|
||||
#define scoutfs_srch_foreach_rb_node(snode, node, sroot) \
|
||||
for (node = rb_first(&(sroot)->root); \
|
||||
node && (snode = container_of(node, struct scoutfs_srch_rb_node, \
|
||||
node), 1); \
|
||||
node = rb_next(node))
|
||||
|
||||
int scoutfs_srch_add(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_srch_file *sfl,
|
||||
struct scoutfs_block **bl_ret,
|
||||
u64 hash, u64 ino, u64 id);
|
||||
|
||||
void scoutfs_srch_destroy_rb_root(struct scoutfs_srch_rb_root *sroot);
|
||||
int scoutfs_srch_search_xattrs(struct super_block *sb,
|
||||
struct scoutfs_srch_rb_root *sroot,
|
||||
u64 hash, u64 ino, u64 last_ino, bool *done);
|
||||
|
||||
int scoutfs_srch_rotate_log(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_srch_file *sfl);
|
||||
int scoutfs_srch_get_compact(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root,
|
||||
u64 rid, struct scoutfs_srch_compact *sc);
|
||||
int scoutfs_srch_update_compact(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root, u64 rid,
|
||||
struct scoutfs_srch_compact *sc);
|
||||
int scoutfs_srch_commit_compact(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root, u64 rid,
|
||||
struct scoutfs_srch_compact *res,
|
||||
struct scoutfs_alloc_list_head *av,
|
||||
struct scoutfs_alloc_list_head *fr);
|
||||
int scoutfs_srch_cancel_compact(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_btree_root *root, u64 rid,
|
||||
struct scoutfs_alloc_list_head *av,
|
||||
struct scoutfs_alloc_list_head *fr);
|
||||
|
||||
void scoutfs_srch_destroy(struct super_block *sb);
|
||||
int scoutfs_srch_setup(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
274
kmod/src/super.c
274
kmod/src/super.c
@@ -41,6 +41,9 @@
|
||||
#include "sysfs.h"
|
||||
#include "quorum.h"
|
||||
#include "forest.h"
|
||||
#include "srch.h"
|
||||
#include "item.h"
|
||||
#include "alloc.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
static struct dentry *scoutfs_debugfs_root;
|
||||
@@ -76,11 +79,30 @@ retry:
|
||||
return cpu_to_le64(ret);
|
||||
}
|
||||
|
||||
struct statfs_free_blocks {
|
||||
u64 meta;
|
||||
u64 data;
|
||||
};
|
||||
|
||||
static int count_free_blocks(struct super_block *sb, void *arg, int owner,
|
||||
u64 id, bool meta, bool avail, u64 blocks)
|
||||
{
|
||||
struct statfs_free_blocks *sfb = arg;
|
||||
|
||||
if (meta)
|
||||
sfb->meta += blocks;
|
||||
else
|
||||
sfb->data += blocks;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ask the server for the current statfs fields. The message is very
|
||||
* cheap so we're not worrying about spinning in statfs flooding the
|
||||
* server with requests. We can add a cache and stale results if that
|
||||
* becomes a problem.
|
||||
* Build the free block counts by having alloc read all the persistent
|
||||
* blocks which contain allocators and calling us for each of them.
|
||||
* Only the super block reads aren't cached so repeatedly calling statfs
|
||||
* is like repeated O_DIRECT IO. We can add a cache and stale results
|
||||
* if that IO becomes a problem.
|
||||
*
|
||||
* We fake the number of free inodes value by assuming that we can fill
|
||||
* free blocks with a certain number of inodes. We then the number of
|
||||
@@ -93,30 +115,50 @@ retry:
|
||||
static int scoutfs_statfs(struct dentry *dentry, struct kstatfs *kst)
|
||||
{
|
||||
struct super_block *sb = dentry->d_inode->i_sb;
|
||||
struct scoutfs_net_statfs nstatfs;
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct statfs_free_blocks sfb = {0,};
|
||||
__le32 uuid[4];
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_client_statfs(sb, &nstatfs);
|
||||
if (ret)
|
||||
return ret;
|
||||
scoutfs_inc_counter(sb, statfs);
|
||||
|
||||
kst->f_bfree = le64_to_cpu(nstatfs.bfree);
|
||||
super = kzalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
if (!super) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_alloc_foreach(sb, count_free_blocks, &sfb);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
kst->f_bfree = (sfb.meta << SCOUTFS_BLOCK_SM_LG_SHIFT) + sfb.data;
|
||||
kst->f_type = SCOUTFS_SUPER_MAGIC;
|
||||
kst->f_bsize = SCOUTFS_BLOCK_SIZE;
|
||||
kst->f_blocks = le64_to_cpu(nstatfs.total_blocks);
|
||||
kst->f_bsize = SCOUTFS_BLOCK_SM_SIZE;
|
||||
kst->f_blocks = (le64_to_cpu(super->total_meta_blocks) <<
|
||||
SCOUTFS_BLOCK_SM_LG_SHIFT) +
|
||||
le64_to_cpu(super->total_data_blocks);
|
||||
kst->f_bavail = kst->f_bfree;
|
||||
|
||||
kst->f_ffree = kst->f_bfree * 16;
|
||||
kst->f_files = kst->f_ffree + le64_to_cpu(nstatfs.next_ino);
|
||||
/* arbitrarily assume ~1K / empty file */
|
||||
kst->f_ffree = sfb.meta * (SCOUTFS_BLOCK_LG_SIZE / 1024);
|
||||
kst->f_files = kst->f_ffree + le64_to_cpu(super->next_ino);
|
||||
|
||||
BUILD_BUG_ON(sizeof(uuid) != sizeof(nstatfs.uuid));
|
||||
memcpy(uuid, &nstatfs, sizeof(uuid));
|
||||
BUILD_BUG_ON(sizeof(uuid) != sizeof(super->uuid));
|
||||
memcpy(uuid, super->uuid, sizeof(uuid));
|
||||
kst->f_fsid.val[0] = le32_to_cpu(uuid[0]) ^ le32_to_cpu(uuid[1]);
|
||||
kst->f_fsid.val[1] = le32_to_cpu(uuid[2]) ^ le32_to_cpu(uuid[3]);
|
||||
kst->f_namelen = SCOUTFS_NAME_LEN;
|
||||
kst->f_frsize = SCOUTFS_BLOCK_SIZE;
|
||||
kst->f_frsize = SCOUTFS_BLOCK_SM_SIZE;
|
||||
|
||||
/* the vfs fills f_flags */
|
||||
ret = 0;
|
||||
out:
|
||||
kfree(super);
|
||||
|
||||
/*
|
||||
* We don't take cluster locks in statfs which makes it a very
|
||||
@@ -126,7 +168,7 @@ static int scoutfs_statfs(struct dentry *dentry, struct kstatfs *kst)
|
||||
if (scoutfs_trigger(sb, STATFS_LOCK_PURGE))
|
||||
scoutfs_free_unused_locks(sb, -1UL);
|
||||
|
||||
return 0;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int scoutfs_show_options(struct seq_file *seq, struct dentry *root)
|
||||
@@ -135,10 +177,21 @@ static int scoutfs_show_options(struct seq_file *seq, struct dentry *root)
|
||||
struct mount_options *opts = &SCOUTFS_SB(sb)->opts;
|
||||
|
||||
seq_printf(seq, ",server_addr="SIN_FMT, SIN_ARG(&opts->server_addr));
|
||||
seq_printf(seq, ",metadev_path=%s", opts->metadev_path);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t metadev_path_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
struct mount_options *opts = &SCOUTFS_SB(sb)->opts;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%s", opts->metadev_path);
|
||||
}
|
||||
SCOUTFS_ATTR_RO(metadev_path);
|
||||
|
||||
static ssize_t server_addr_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
@@ -151,6 +204,7 @@ static ssize_t server_addr_show(struct kobject *kobj,
|
||||
SCOUTFS_ATTR_RO(server_addr);
|
||||
|
||||
static struct attribute *mount_options_attrs[] = {
|
||||
SCOUTFS_ATTR_PTR(metadev_path),
|
||||
SCOUTFS_ATTR_PTR(server_addr),
|
||||
NULL,
|
||||
};
|
||||
@@ -163,6 +217,20 @@ static int scoutfs_sync_fs(struct super_block *sb, int wait)
|
||||
return scoutfs_trans_sync(sb, wait);
|
||||
}
|
||||
|
||||
/*
|
||||
* Data dev is closed by generic code, but we have to explicitly close the meta
|
||||
* dev.
|
||||
*/
|
||||
static void scoutfs_metadev_close(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
if (sbi->meta_bdev) {
|
||||
blkdev_put(sbi->meta_bdev, SCOUTFS_META_BDEV_MODE);
|
||||
sbi->meta_bdev = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* This destroys all the state that's built up in the sb info during
|
||||
* mount. It's called by us on errors during mount if we haven't set
|
||||
@@ -178,6 +246,7 @@ static void scoutfs_put_super(struct super_block *sb)
|
||||
sbi->shutdown = true;
|
||||
|
||||
scoutfs_data_destroy(sb);
|
||||
scoutfs_srch_destroy(sb);
|
||||
|
||||
scoutfs_unlock(sb, sbi->rid_lock, SCOUTFS_LOCK_WRITE);
|
||||
sbi->rid_lock = NULL;
|
||||
@@ -185,6 +254,7 @@ static void scoutfs_put_super(struct super_block *sb)
|
||||
scoutfs_shutdown_trans(sb);
|
||||
scoutfs_client_destroy(sb);
|
||||
scoutfs_inode_destroy(sb);
|
||||
scoutfs_item_destroy(sb);
|
||||
scoutfs_forest_destroy(sb);
|
||||
|
||||
/* the server locks the listen address and compacts */
|
||||
@@ -203,6 +273,9 @@ static void scoutfs_put_super(struct super_block *sb)
|
||||
debugfs_remove(sbi->debug_root);
|
||||
scoutfs_destroy_counters(sb);
|
||||
scoutfs_destroy_sysfs(sb);
|
||||
scoutfs_metadev_close(sb);
|
||||
|
||||
kfree(sbi->opts.metadev_path);
|
||||
kfree(sbi);
|
||||
|
||||
sb->s_fs_info = NULL;
|
||||
@@ -227,30 +300,33 @@ static const struct super_operations scoutfs_super_ops = {
|
||||
int scoutfs_write_super(struct super_block *sb,
|
||||
struct scoutfs_super_block *super)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
le64_add_cpu(&super->hdr.seq, 1);
|
||||
|
||||
return scoutfs_block_write_sm(sb, SCOUTFS_SUPER_BLKNO, &super->hdr,
|
||||
return scoutfs_block_write_sm(sb, sbi->meta_bdev, SCOUTFS_SUPER_BLKNO,
|
||||
&super->hdr,
|
||||
sizeof(struct scoutfs_super_block));
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the super block. If it's valid store it in the caller's super
|
||||
* struct.
|
||||
* Read super, specifying bdev.
|
||||
*/
|
||||
int scoutfs_read_super(struct super_block *sb,
|
||||
struct scoutfs_super_block *super_res)
|
||||
static int scoutfs_read_super_from_bdev(struct super_block *sb,
|
||||
struct block_device *bdev,
|
||||
struct scoutfs_super_block *super_res)
|
||||
{
|
||||
struct scoutfs_super_block *super;
|
||||
__le32 calc;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
if (!super)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = scoutfs_block_read_sm(sb, SCOUTFS_SUPER_BLKNO, &super->hdr,
|
||||
sizeof(struct scoutfs_super_block),
|
||||
&calc);
|
||||
ret = scoutfs_block_read_sm(sb, bdev, SCOUTFS_SUPER_BLKNO, &super->hdr,
|
||||
sizeof(struct scoutfs_super_block), &calc);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -276,10 +352,10 @@ int scoutfs_read_super(struct super_block *sb,
|
||||
}
|
||||
|
||||
|
||||
if (super->format_hash != cpu_to_le64(SCOUTFS_FORMAT_HASH)) {
|
||||
scoutfs_err(sb, "super block has invalid format hash 0x%llx, expected 0x%llx",
|
||||
le64_to_cpu(super->format_hash),
|
||||
SCOUTFS_FORMAT_HASH);
|
||||
if (super->version != cpu_to_le64(SCOUTFS_INTEROP_VERSION)) {
|
||||
scoutfs_err(sb, "super block has invalid version %llu, expected %llu",
|
||||
le64_to_cpu(super->version),
|
||||
SCOUTFS_INTEROP_VERSION);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@@ -294,13 +370,61 @@ int scoutfs_read_super(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
*super_res = *super;
|
||||
ret = 0;
|
||||
blkno = (SCOUTFS_QUORUM_BLKNO + SCOUTFS_QUORUM_BLOCKS) >>
|
||||
SCOUTFS_BLOCK_SM_LG_SHIFT;
|
||||
if (le64_to_cpu(super->first_meta_blkno) < blkno) {
|
||||
scoutfs_err(sb, "super block first meta blkno %llu is within quorum blocks",
|
||||
le64_to_cpu(super->first_meta_blkno));
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(super->first_meta_blkno) >
|
||||
le64_to_cpu(super->last_meta_blkno)) {
|
||||
scoutfs_err(sb, "super block first meta blkno %llu is greater than last meta blkno %llu",
|
||||
le64_to_cpu(super->first_meta_blkno),
|
||||
le64_to_cpu(super->last_meta_blkno));
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(super->first_data_blkno) >
|
||||
le64_to_cpu(super->last_data_blkno)) {
|
||||
scoutfs_err(sb, "super block first data blkno %llu is greater than last data blkno %llu",
|
||||
le64_to_cpu(super->first_data_blkno),
|
||||
le64_to_cpu(super->last_data_blkno));
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
blkno = (i_size_read(sb->s_bdev->bd_inode) >>
|
||||
SCOUTFS_BLOCK_SM_SHIFT) - 1;
|
||||
if (le64_to_cpu(super->last_data_blkno) > blkno) {
|
||||
scoutfs_err(sb, "super block last data blkno %llu is outsite device size last blkno %llu",
|
||||
le64_to_cpu(super->last_data_blkno), blkno);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (ret == 0)
|
||||
*super_res = *super;
|
||||
kfree(super);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read the super block from meta dev.
|
||||
*/
|
||||
int scoutfs_read_super(struct super_block *sb,
|
||||
struct scoutfs_super_block *super_res)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
return scoutfs_read_super_from_bdev(sb, sbi->meta_bdev, super_res);
|
||||
}
|
||||
|
||||
/*
|
||||
* This needs to be setup after reading the super because it uses the
|
||||
* fsid found in the super block.
|
||||
@@ -337,10 +461,66 @@ static int assign_random_id(struct scoutfs_sb_info *sbi)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure superblock copies in metadata and data block devices are valid, and
|
||||
* fill in in-memory superblock if so.
|
||||
*/
|
||||
static int scoutfs_read_supers(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_super_block *meta_super = NULL;
|
||||
struct scoutfs_super_block *data_super = NULL;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
int ret = 0;
|
||||
|
||||
meta_super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
data_super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
if (!meta_super || !data_super) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_read_super_from_bdev(sb, sbi->meta_bdev, meta_super);
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "could not get meta_super: error %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_read_super_from_bdev(sb, sb->s_bdev, data_super);
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "could not get data_super: error %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!SCOUTFS_IS_META_BDEV(meta_super)) {
|
||||
scoutfs_err(sb, "meta_super META flag not set");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (SCOUTFS_IS_META_BDEV(data_super)) {
|
||||
scoutfs_err(sb, "data_super META flag set");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (memcmp(meta_super->uuid, data_super->uuid, SCOUTFS_UUID_BYTES)) {
|
||||
scoutfs_err(sb, "superblock UUID mismatch");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
sbi->super = *meta_super;
|
||||
out:
|
||||
kfree(meta_super);
|
||||
kfree(data_super);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi;
|
||||
struct mount_options opts;
|
||||
struct block_device *meta_bdev;
|
||||
struct inode *inode;
|
||||
int ret;
|
||||
|
||||
@@ -379,14 +559,31 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
|
||||
sbi->opts = opts;
|
||||
|
||||
ret = sb_set_blocksize(sb, SCOUTFS_BLOCK_SIZE);
|
||||
if (ret != SCOUTFS_BLOCK_SIZE) {
|
||||
ret = sb_set_blocksize(sb, SCOUTFS_BLOCK_SM_SIZE);
|
||||
if (ret != SCOUTFS_BLOCK_SM_SIZE) {
|
||||
scoutfs_err(sb, "failed to set blocksize, returned %d", ret);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_read_super(sb, &SCOUTFS_SB(sb)->super) ?:
|
||||
meta_bdev =
|
||||
blkdev_get_by_path(sbi->opts.metadev_path,
|
||||
SCOUTFS_META_BDEV_MODE, sb);
|
||||
if (IS_ERR(meta_bdev)) {
|
||||
scoutfs_err(sb, "could not open metadev: error %ld",
|
||||
PTR_ERR(meta_bdev));
|
||||
ret = PTR_ERR(meta_bdev);
|
||||
goto out;
|
||||
}
|
||||
sbi->meta_bdev = meta_bdev;
|
||||
ret = set_blocksize(sbi->meta_bdev, SCOUTFS_BLOCK_SM_SIZE);
|
||||
if (ret != 0) {
|
||||
scoutfs_err(sb, "failed to set metadev blocksize, returned %d",
|
||||
ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_read_supers(sb) ?:
|
||||
scoutfs_debugfs_setup(sb) ?:
|
||||
scoutfs_setup_sysfs(sb) ?:
|
||||
scoutfs_setup_counters(sb) ?:
|
||||
@@ -396,6 +593,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
scoutfs_setup_triggers(sb) ?:
|
||||
scoutfs_block_setup(sb) ?:
|
||||
scoutfs_forest_setup(sb) ?:
|
||||
scoutfs_item_setup(sb) ?:
|
||||
scoutfs_inode_setup(sb) ?:
|
||||
scoutfs_data_setup(sb) ?:
|
||||
scoutfs_setup_trans(sb) ?:
|
||||
@@ -406,7 +604,8 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
scoutfs_client_setup(sb) ?:
|
||||
scoutfs_lock_rid(sb, SCOUTFS_LOCK_WRITE, 0, sbi->rid,
|
||||
&sbi->rid_lock) ?:
|
||||
scoutfs_trans_get_log_trees(sb);
|
||||
scoutfs_trans_get_log_trees(sb) ?:
|
||||
scoutfs_srch_setup(sb);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -483,6 +682,10 @@ static int __init scoutfs_module_init(void)
|
||||
".section .note.git_describe,\"a\"\n"
|
||||
".string \""SCOUTFS_GIT_DESCRIBE"\\n\"\n"
|
||||
".previous\n");
|
||||
__asm__ __volatile__ (
|
||||
".section .note.scoutfs_interop_version,\"a\"\n"
|
||||
".string \""SCOUTFS_INTEROP_VERSION_STR"\\n\"\n"
|
||||
".previous\n");
|
||||
|
||||
scoutfs_init_counters();
|
||||
|
||||
@@ -515,3 +718,4 @@ module_exit(scoutfs_module_exit)
|
||||
MODULE_AUTHOR("Zach Brown <zab@versity.com>");
|
||||
MODULE_LICENSE("GPL");
|
||||
MODULE_INFO(git_describe, SCOUTFS_GIT_DESCRIBE);
|
||||
MODULE_INFO(scoutfs_interop_version, SCOUTFS_INTEROP_VERSION_STR);
|
||||
|
||||
@@ -25,6 +25,7 @@ struct options_sb_info;
|
||||
struct net_info;
|
||||
struct block_info;
|
||||
struct forest_info;
|
||||
struct srch_info;
|
||||
|
||||
struct scoutfs_sb_info {
|
||||
struct super_block *sb;
|
||||
@@ -35,6 +36,8 @@ struct scoutfs_sb_info {
|
||||
|
||||
struct scoutfs_super_block super;
|
||||
|
||||
struct block_device *meta_bdev;
|
||||
|
||||
spinlock_t next_ino_lock;
|
||||
|
||||
struct data_info *data_info;
|
||||
@@ -44,6 +47,8 @@ struct scoutfs_sb_info {
|
||||
struct quorum_info *quorum_info;
|
||||
struct block_info *block_info;
|
||||
struct forest_info *forest_info;
|
||||
struct srch_info *srch_info;
|
||||
struct item_cache_info *item_cache_info;
|
||||
|
||||
wait_queue_head_t trans_hold_wq;
|
||||
struct task_struct *trans_task;
|
||||
@@ -91,6 +96,13 @@ static inline bool SCOUTFS_HAS_SBI(struct super_block *sb)
|
||||
return (sb != NULL) && (SCOUTFS_SB(sb) != NULL);
|
||||
}
|
||||
|
||||
static inline bool SCOUTFS_IS_META_BDEV(struct scoutfs_super_block *super_block)
|
||||
{
|
||||
return !!(le64_to_cpu(super_block->flags) & SCOUTFS_FLAG_IS_META_BDEV);
|
||||
}
|
||||
|
||||
#define SCOUTFS_META_BDEV_MODE (FMODE_READ | FMODE_WRITE | FMODE_EXCL)
|
||||
|
||||
/*
|
||||
* A small string embedded in messages that's used to identify a
|
||||
* specific mount. It's the three most significant bytes of the fsid
|
||||
|
||||
193
kmod/src/trans.c
193
kmod/src/trans.c
@@ -25,8 +25,10 @@
|
||||
#include "counters.h"
|
||||
#include "client.h"
|
||||
#include "inode.h"
|
||||
#include "radix.h"
|
||||
#include "alloc.h"
|
||||
#include "block.h"
|
||||
#include "msg.h"
|
||||
#include "item.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
@@ -58,13 +60,11 @@
|
||||
*/
|
||||
struct trans_info {
|
||||
spinlock_t lock;
|
||||
unsigned reserved_items;
|
||||
unsigned reserved_vals;
|
||||
unsigned holders;
|
||||
bool writing;
|
||||
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_radix_allocator alloc;
|
||||
struct scoutfs_alloc alloc;
|
||||
struct scoutfs_block_writer wri;
|
||||
};
|
||||
|
||||
@@ -110,8 +110,7 @@ int scoutfs_trans_get_log_trees(struct super_block *sb)
|
||||
ret = scoutfs_client_get_log_trees(sb, <);
|
||||
if (ret == 0) {
|
||||
tri->lt = lt;
|
||||
scoutfs_radix_init_alloc(&tri->alloc, <.meta_avail,
|
||||
<.meta_freed);
|
||||
scoutfs_alloc_init(&tri->alloc, <.meta_avail, <.meta_freed);
|
||||
scoutfs_block_writer_init(sb, &tri->wri);
|
||||
|
||||
scoutfs_forest_init_btrees(sb, &tri->alloc, &tri->wri, <);
|
||||
@@ -126,6 +125,7 @@ bool scoutfs_trans_has_dirty(struct super_block *sb)
|
||||
|
||||
return scoutfs_block_writer_has_dirty(sb, &tri->wri);
|
||||
}
|
||||
|
||||
/*
|
||||
* This work func is responsible for writing out all the dirty blocks
|
||||
* that make up the current dirty transaction. It prevents writers from
|
||||
@@ -156,6 +156,8 @@ void scoutfs_trans_write_func(struct work_struct *work)
|
||||
trans_write_work.work);
|
||||
struct super_block *sb = sbi->sb;
|
||||
DECLARE_TRANS_INFO(sb, tri);
|
||||
u64 trans_seq = sbi->trans_seq;
|
||||
char *s = NULL;
|
||||
int ret = 0;
|
||||
|
||||
sbi->trans_task = current;
|
||||
@@ -165,37 +167,49 @@ void scoutfs_trans_write_func(struct work_struct *work)
|
||||
trace_scoutfs_trans_write_func(sb,
|
||||
scoutfs_block_writer_dirty_bytes(sb, &tri->wri));
|
||||
|
||||
if (scoutfs_block_writer_has_dirty(sb, &tri->wri)) {
|
||||
if (sbi->trans_deadline_expired)
|
||||
scoutfs_inc_counter(sb, trans_commit_timer);
|
||||
|
||||
ret = scoutfs_inode_walk_writeback(sb, true) ?:
|
||||
scoutfs_block_writer_write(sb, &tri->wri) ?:
|
||||
scoutfs_inode_walk_writeback(sb, false) ?:
|
||||
commit_btrees(sb) ?:
|
||||
scoutfs_client_advance_seq(sb, &sbi->trans_seq) ?:
|
||||
scoutfs_trans_get_log_trees(sb);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
} else if (sbi->trans_deadline_expired) {
|
||||
/*
|
||||
* If we're not writing data then we only advance the
|
||||
* seq at the sync deadline interval. This keeps idle
|
||||
* mounts from pinning a seq and stopping readers of the
|
||||
* seq indices but doesn't send a message for every sync
|
||||
* syscall.
|
||||
*/
|
||||
ret = scoutfs_client_advance_seq(sb, &sbi->trans_seq);
|
||||
if (!scoutfs_block_writer_has_dirty(sb, &tri->wri) &&
|
||||
!scoutfs_item_dirty_pages(sb)) {
|
||||
if (sbi->trans_deadline_expired) {
|
||||
/*
|
||||
* If we're not writing data then we only advance the
|
||||
* seq at the sync deadline interval. This keeps idle
|
||||
* mounts from pinning a seq and stopping readers of the
|
||||
* seq indices but doesn't send a message for every sync
|
||||
* syscall.
|
||||
*/
|
||||
ret = scoutfs_client_advance_seq(sb, &trans_seq);
|
||||
if (ret < 0)
|
||||
s = "clean advance seq";
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
if (sbi->trans_deadline_expired)
|
||||
scoutfs_inc_counter(sb, trans_commit_timer);
|
||||
|
||||
scoutfs_inc_counter(sb, trans_commit_written);
|
||||
|
||||
/* XXX this all needs serious work for dealing with errors */
|
||||
WARN_ON_ONCE(ret);
|
||||
ret = (s = "data submit", scoutfs_inode_walk_writeback(sb, true)) ?:
|
||||
(s = "item dirty", scoutfs_item_write_dirty(sb)) ?:
|
||||
(s = "data prepare", scoutfs_data_prepare_commit(sb)) ?:
|
||||
(s = "alloc prepare", scoutfs_alloc_prepare_commit(sb,
|
||||
&tri->alloc, &tri->wri)) ?:
|
||||
(s = "meta write", scoutfs_block_writer_write(sb, &tri->wri)) ?:
|
||||
(s = "data wait", scoutfs_inode_walk_writeback(sb, false)) ?:
|
||||
(s = "commit log trees", commit_btrees(sb)) ?:
|
||||
scoutfs_item_write_done(sb) ?:
|
||||
(s = "advance seq", scoutfs_client_advance_seq(sb, &trans_seq)) ?:
|
||||
(s = "get log trees", scoutfs_trans_get_log_trees(sb));
|
||||
out:
|
||||
if (ret < 0)
|
||||
scoutfs_err(sb, "critical transaction commit failure: %s, %d",
|
||||
s, ret);
|
||||
|
||||
spin_lock(&sbi->trans_write_lock);
|
||||
sbi->trans_write_count++;
|
||||
sbi->trans_write_ret = ret;
|
||||
sbi->trans_seq = trans_seq;
|
||||
spin_unlock(&sbi->trans_write_lock);
|
||||
wake_up(&sbi->trans_write_wq);
|
||||
|
||||
@@ -302,12 +316,11 @@ void scoutfs_trans_restart_sync_deadline(struct super_block *sb)
|
||||
* Including nested holds avoids having to deal with writing out partial
|
||||
* transactions while a caller still holds the transaction.
|
||||
*/
|
||||
|
||||
#define SCOUTFS_RESERVATION_MAGIC 0xd57cd13b
|
||||
struct scoutfs_reservation {
|
||||
unsigned magic;
|
||||
unsigned holders;
|
||||
struct scoutfs_item_count reserved;
|
||||
struct scoutfs_item_count actual;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -324,22 +337,16 @@ struct scoutfs_reservation {
|
||||
* delaying or prematurely forcing commits.
|
||||
*/
|
||||
static bool acquired_hold(struct super_block *sb,
|
||||
struct scoutfs_reservation *rsv,
|
||||
const struct scoutfs_item_count *cnt)
|
||||
struct scoutfs_reservation *rsv)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
DECLARE_TRANS_INFO(sb, tri);
|
||||
bool acquired = false;
|
||||
unsigned items;
|
||||
unsigned vals;
|
||||
|
||||
spin_lock(&tri->lock);
|
||||
|
||||
trace_scoutfs_trans_acquired_hold(sb, cnt, rsv, rsv->holders,
|
||||
&rsv->reserved, &rsv->actual,
|
||||
tri->holders, tri->writing,
|
||||
tri->reserved_items,
|
||||
tri->reserved_vals);
|
||||
trace_scoutfs_trans_acquired_hold(sb, rsv, rsv->holders,
|
||||
tri->holders, tri->writing);
|
||||
|
||||
/* use a caller's existing reservation */
|
||||
if (rsv->holders)
|
||||
@@ -349,14 +356,31 @@ static bool acquired_hold(struct super_block *sb,
|
||||
if (tri->writing)
|
||||
goto out;
|
||||
|
||||
/* see if we can reserve space for our item count */
|
||||
items = tri->reserved_items + cnt->items;
|
||||
vals = tri->reserved_vals + cnt->vals;
|
||||
/*
|
||||
* In theory each dirty item page could be straddling two full
|
||||
* blocks, requiring 4 allocations for each item cache page.
|
||||
* That's much too conservative, typically many dirty item cache
|
||||
* pages that are near each other all land in one block. This
|
||||
* rough estimate is still so far beyond what typically happens
|
||||
* that it accounts for having to dirty parent blocks and
|
||||
* whatever dirtying is done during the transaction hold.
|
||||
*/
|
||||
if (scoutfs_alloc_meta_low(sb, &tri->alloc,
|
||||
scoutfs_item_dirty_pages(sb) * 2)) {
|
||||
scoutfs_inc_counter(sb, trans_commit_dirty_meta_full);
|
||||
queue_trans_work(sbi);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* XXX arbitrarily limit to 8 meg transactions */
|
||||
if (scoutfs_block_writer_dirty_bytes(sb, &tri->wri) >=
|
||||
(8 * 1024 * 1024)) {
|
||||
scoutfs_inc_counter(sb, trans_commit_full);
|
||||
/*
|
||||
* Extent modifications can use meta allocators without creating
|
||||
* dirty items so we have to check the meta alloc specifically.
|
||||
* The size of the client's avail and freed roots are bound so
|
||||
* we're unlikely to need very many block allocations per
|
||||
* transaction hold. XXX This should be more precisely tuned.
|
||||
*/
|
||||
if (scoutfs_alloc_meta_low(sb, &tri->alloc, 16)) {
|
||||
scoutfs_inc_counter(sb, trans_commit_meta_alloc_low);
|
||||
queue_trans_work(sbi);
|
||||
goto out;
|
||||
}
|
||||
@@ -368,12 +392,6 @@ static bool acquired_hold(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
tri->reserved_items = items;
|
||||
tri->reserved_vals = vals;
|
||||
|
||||
rsv->reserved.items = cnt->items;
|
||||
rsv->reserved.vals = cnt->vals;
|
||||
|
||||
hold:
|
||||
rsv->holders++;
|
||||
tri->holders++;
|
||||
@@ -386,20 +404,12 @@ out:
|
||||
return acquired;
|
||||
}
|
||||
|
||||
int scoutfs_hold_trans(struct super_block *sb,
|
||||
const struct scoutfs_item_count cnt)
|
||||
int scoutfs_hold_trans(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_reservation *rsv;
|
||||
int ret;
|
||||
|
||||
/*
|
||||
* Caller shouldn't provide garbage counts, nor counts that
|
||||
* can't fit in segments by themselves.
|
||||
*/
|
||||
if (WARN_ON_ONCE(cnt.items <= 0 || cnt.vals < 0))
|
||||
return -EINVAL;
|
||||
|
||||
if (current == sbi->trans_task)
|
||||
return 0;
|
||||
|
||||
@@ -416,7 +426,7 @@ int scoutfs_hold_trans(struct super_block *sb,
|
||||
BUG_ON(rsv->magic != SCOUTFS_RESERVATION_MAGIC);
|
||||
|
||||
ret = wait_event_interruptible(sbi->trans_hold_wq,
|
||||
acquired_hold(sb, rsv, &cnt));
|
||||
acquired_hold(sb, rsv));
|
||||
if (ret && rsv->holders == 0) {
|
||||
current->journal_info = NULL;
|
||||
kfree(rsv);
|
||||
@@ -436,38 +446,6 @@ bool scoutfs_trans_held(void)
|
||||
return rsv && rsv->magic == SCOUTFS_RESERVATION_MAGIC;
|
||||
}
|
||||
|
||||
/*
|
||||
* Record a transaction holder's individual contribution to the dirty
|
||||
* items in the current transaction. We're making sure that the
|
||||
* reservation matches the possible item manipulations while they hold
|
||||
* the reservation.
|
||||
*
|
||||
* It is possible and legitimate for an individual contribution to be
|
||||
* negative if they delete dirty items. The item cache makes sure that
|
||||
* the total dirty item count doesn't fall below zero.
|
||||
*/
|
||||
void scoutfs_trans_track_item(struct super_block *sb, signed items,
|
||||
signed vals)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_reservation *rsv = current->journal_info;
|
||||
|
||||
if (current == sbi->trans_task)
|
||||
return;
|
||||
|
||||
BUG_ON(!rsv || rsv->magic != SCOUTFS_RESERVATION_MAGIC);
|
||||
|
||||
rsv->actual.items += items;
|
||||
rsv->actual.vals += vals;
|
||||
|
||||
trace_scoutfs_trans_track_item(sb, items, vals, rsv->actual.items,
|
||||
rsv->actual.vals, rsv->reserved.items,
|
||||
rsv->reserved.vals);
|
||||
|
||||
WARN_ON_ONCE(rsv->actual.items > rsv->reserved.items);
|
||||
WARN_ON_ONCE(rsv->actual.vals > rsv->reserved.vals);
|
||||
}
|
||||
|
||||
/*
|
||||
* As we drop the last hold in the reservation we try and wake other
|
||||
* hold attempts that were waiting for space. As we drop the last trans
|
||||
@@ -489,16 +467,12 @@ void scoutfs_release_trans(struct super_block *sb)
|
||||
|
||||
spin_lock(&tri->lock);
|
||||
|
||||
trace_scoutfs_release_trans(sb, rsv, rsv->holders, &rsv->reserved,
|
||||
&rsv->actual, tri->holders, tri->writing,
|
||||
tri->reserved_items, tri->reserved_vals);
|
||||
trace_scoutfs_release_trans(sb, rsv, rsv->holders, tri->holders, tri->writing);
|
||||
|
||||
BUG_ON(rsv->holders <= 0);
|
||||
BUG_ON(tri->holders <= 0);
|
||||
|
||||
if (--rsv->holders == 0) {
|
||||
tri->reserved_items -= rsv->reserved.items;
|
||||
tri->reserved_vals -= rsv->reserved.vals;
|
||||
current->journal_info = NULL;
|
||||
kfree(rsv);
|
||||
wake = true;
|
||||
@@ -513,6 +487,23 @@ void scoutfs_release_trans(struct super_block *sb)
|
||||
wake_up(&sbi->trans_hold_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the current transaction sequence. Whether this is racing with
|
||||
* the transaction write thread is entirely dependent on the caller's
|
||||
* context.
|
||||
*/
|
||||
u64 scoutfs_trans_sample_seq(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
u64 ret;
|
||||
|
||||
spin_lock(&sbi->trans_write_lock);
|
||||
ret = sbi->trans_seq;
|
||||
spin_unlock(&sbi->trans_write_lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_setup_trans(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
@@ -6,20 +6,16 @@
|
||||
/* the client will force commits if data allocators get too low */
|
||||
#define SCOUTFS_TRANS_DATA_ALLOC_LWM (256ULL * 1024 * 1024)
|
||||
|
||||
#include "count.h"
|
||||
|
||||
void scoutfs_trans_write_func(struct work_struct *work);
|
||||
int scoutfs_trans_sync(struct super_block *sb, int wait);
|
||||
int scoutfs_file_fsync(struct file *file, loff_t start, loff_t end,
|
||||
int datasync);
|
||||
void scoutfs_trans_restart_sync_deadline(struct super_block *sb);
|
||||
|
||||
int scoutfs_hold_trans(struct super_block *sb,
|
||||
const struct scoutfs_item_count cnt);
|
||||
int scoutfs_hold_trans(struct super_block *sb);
|
||||
bool scoutfs_trans_held(void);
|
||||
void scoutfs_release_trans(struct super_block *sb);
|
||||
void scoutfs_trans_track_item(struct super_block *sb, signed items,
|
||||
signed vals);
|
||||
u64 scoutfs_trans_sample_seq(struct super_block *sb);
|
||||
|
||||
int scoutfs_trans_get_log_trees(struct super_block *sb);
|
||||
bool scoutfs_trans_has_dirty(struct super_block *sb);
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
#ifndef _SCOUTFS_TRIGGERS_H_
|
||||
#define _SCOUTFS_TRIGGERS_H_
|
||||
|
||||
enum {
|
||||
enum scoutfs_trigger {
|
||||
SCOUTFS_TRIGGER_BTREE_STALE_READ,
|
||||
SCOUTFS_TRIGGER_BTREE_ADVANCE_RING_HALF,
|
||||
SCOUTFS_TRIGGER_HARD_STALE_ERROR,
|
||||
|
||||
20
kmod/src/util.h
Normal file
20
kmod/src/util.h
Normal file
@@ -0,0 +1,20 @@
|
||||
#ifndef _SCOUTFS_UTIL_H_
|
||||
#define _SCOUTFS_UTIL_H_
|
||||
|
||||
/*
|
||||
* Little utility helpers that probably belong upstream.
|
||||
*/
|
||||
|
||||
static inline void down_write_two(struct rw_semaphore *a,
|
||||
struct rw_semaphore *b)
|
||||
{
|
||||
BUG_ON(a == b);
|
||||
|
||||
if (a > b)
|
||||
swap(a, b);
|
||||
|
||||
down_write(a);
|
||||
down_write_nested(b, SINGLE_DEPTH_NESTING);
|
||||
}
|
||||
|
||||
#endif
|
||||
273
kmod/src/xattr.c
273
kmod/src/xattr.c
@@ -20,7 +20,7 @@
|
||||
#include "inode.h"
|
||||
#include "key.h"
|
||||
#include "super.h"
|
||||
#include "kvec.h"
|
||||
#include "item.h"
|
||||
#include "forest.h"
|
||||
#include "trans.h"
|
||||
#include "xattr.h"
|
||||
@@ -94,21 +94,17 @@ static int unknown_prefix(const char *name)
|
||||
strncmp(name, SCOUTFS_XATTR_PREFIX, SCOUTFS_XATTR_PREFIX_LEN);
|
||||
}
|
||||
|
||||
struct prefix_tags {
|
||||
unsigned long hide:1,
|
||||
indx:1;
|
||||
};
|
||||
|
||||
#define HIDE_TAG "hide."
|
||||
#define INDX_TAG "indx."
|
||||
#define SRCH_TAG "srch."
|
||||
#define TAG_LEN (sizeof(HIDE_TAG) - 1)
|
||||
|
||||
static int parse_tags(const char *name, unsigned int name_len,
|
||||
struct prefix_tags *tgs)
|
||||
int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
struct scoutfs_xattr_prefix_tags *tgs)
|
||||
{
|
||||
bool found;
|
||||
|
||||
memset(tgs, 0, sizeof(struct prefix_tags));
|
||||
memset(tgs, 0, sizeof(struct scoutfs_xattr_prefix_tags));
|
||||
|
||||
if ((name_len < (SCOUTFS_XATTR_PREFIX_LEN + TAG_LEN + 1)) ||
|
||||
strncmp(name, SCOUTFS_XATTR_PREFIX, SCOUTFS_XATTR_PREFIX_LEN))
|
||||
@@ -120,8 +116,8 @@ static int parse_tags(const char *name, unsigned int name_len,
|
||||
if (!strncmp(name, HIDE_TAG, TAG_LEN)) {
|
||||
if (++tgs->hide == 0)
|
||||
return -EINVAL;
|
||||
} else if (!strncmp(name, INDX_TAG, TAG_LEN)) {
|
||||
if (++tgs->indx == 0)
|
||||
} else if (!strncmp(name, SRCH_TAG, TAG_LEN)) {
|
||||
if (++tgs->srch == 0)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/* only reason to use scoutfs. is tags */
|
||||
@@ -136,17 +132,6 @@ static int parse_tags(const char *name, unsigned int name_len,
|
||||
return 0;
|
||||
}
|
||||
|
||||
void scoutfs_xattr_index_key(struct scoutfs_key *key,
|
||||
u64 hash, u64 ino, u64 id)
|
||||
{
|
||||
scoutfs_key_set_zeros(key);
|
||||
key->sk_zone = SCOUTFS_XATTR_INDEX_ZONE;
|
||||
key->skxi_hash = cpu_to_le64(hash);
|
||||
key->sk_type = SCOUTFS_XATTR_INDEX_NAME_TYPE;
|
||||
key->skxi_ino = cpu_to_le64(ino);
|
||||
key->skxi_id = cpu_to_le64(id);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the next xattr and copy the key, xattr header, and as much of
|
||||
* the name and value into the callers buffer as we can. Returns the
|
||||
@@ -171,7 +156,6 @@ static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_key last;
|
||||
struct kvec val;
|
||||
u8 last_part;
|
||||
int total;
|
||||
u8 part;
|
||||
@@ -194,8 +178,9 @@ static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
|
||||
|
||||
for (;;) {
|
||||
key->skx_part = part;
|
||||
kvec_init(&val, (void *)xat + total, bytes - total);
|
||||
ret = scoutfs_forest_next(sb, key, &last, &val, lock);
|
||||
ret = scoutfs_item_next(sb, key, &last,
|
||||
(void *)xat + total, bytes - total,
|
||||
lock);
|
||||
if (ret < 0) {
|
||||
/* XXX corruption, ran out of parts */
|
||||
if (ret == -ENOENT && part > 0)
|
||||
@@ -271,7 +256,6 @@ static int create_xattr_items(struct inode *inode, u64 id,
|
||||
struct scoutfs_key key;
|
||||
unsigned int part_bytes;
|
||||
unsigned int total;
|
||||
struct kvec val;
|
||||
int ret;
|
||||
|
||||
init_xattr_key(&key, scoutfs_ino(inode),
|
||||
@@ -282,12 +266,13 @@ static int create_xattr_items(struct inode *inode, u64 id,
|
||||
while (total < bytes) {
|
||||
part_bytes = min_t(unsigned int, bytes - total,
|
||||
SCOUTFS_XATTR_MAX_PART_SIZE);
|
||||
kvec_init(&val, (void *)xat + total, part_bytes);
|
||||
|
||||
ret = scoutfs_forest_create(sb, &key, &val, lock);
|
||||
ret = scoutfs_item_create(sb, &key,
|
||||
(void *)xat + total, part_bytes,
|
||||
lock);
|
||||
if (ret) {
|
||||
while (key.skx_part-- > 0)
|
||||
scoutfs_forest_delete_dirty(sb, &key);
|
||||
scoutfs_item_delete(sb, &key, lock);
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -299,24 +284,114 @@ static int create_xattr_items(struct inode *inode, u64 id,
|
||||
}
|
||||
|
||||
/*
|
||||
* Delete and save the items that make up the given xattr. If this
|
||||
* returns an error then the deleted and saved items are left on the
|
||||
* list for the caller to restore.
|
||||
* Delete the items that make up the given xattr. If this returns an
|
||||
* error then no items have been deleted.
|
||||
*/
|
||||
static int delete_xattr_items(struct inode *inode, u32 name_hash, u64 id,
|
||||
u8 nr_parts, struct list_head *list,
|
||||
struct scoutfs_lock *lock)
|
||||
u8 nr_parts, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
init_xattr_key(&key, scoutfs_ino(inode), name_hash, id);
|
||||
|
||||
do {
|
||||
ret = scoutfs_forest_delete_save(sb, &key, list, lock);
|
||||
} while (ret == 0 && ++key.skx_part < nr_parts);
|
||||
/* dirty additional existing old items */
|
||||
for (i = 1; i < nr_parts; i++) {
|
||||
key.skx_part = i;
|
||||
ret = scoutfs_item_dirty(sb, &key, lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 0; i < nr_parts; i++) {
|
||||
key.skx_part = i;
|
||||
ret = scoutfs_item_delete(sb, &key, lock);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller needs to overwrite existing old xattr items with new
|
||||
* items. We carefully stage the changes so that we can always unwind
|
||||
* to the original items if we return an error. Both items have at
|
||||
* least one part. Either the old or new can have more parts. We dirty
|
||||
* and create first because we can always unwind those. We delete last
|
||||
* after dirtying so that it can't fail and we don't have to restore the
|
||||
* deleted items.
|
||||
*/
|
||||
static int change_xattr_items(struct inode *inode, u64 id,
|
||||
struct scoutfs_xattr *new_xat,
|
||||
unsigned int new_bytes, u8 new_parts,
|
||||
u8 old_parts, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_key key;
|
||||
int last_created = -1;
|
||||
int bytes;
|
||||
int off;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
init_xattr_key(&key, scoutfs_ino(inode),
|
||||
xattr_name_hash(new_xat->name, new_xat->name_len), id);
|
||||
|
||||
/* dirty existing old items */
|
||||
for (i = 0; i < old_parts; i++) {
|
||||
key.skx_part = i;
|
||||
ret = scoutfs_item_dirty(sb, &key, lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* create any new items past the old */
|
||||
for (i = old_parts; i < new_parts; i++) {
|
||||
off = i * SCOUTFS_XATTR_MAX_PART_SIZE;
|
||||
bytes = min_t(unsigned int, new_bytes - off,
|
||||
SCOUTFS_XATTR_MAX_PART_SIZE);
|
||||
|
||||
key.skx_part = i;
|
||||
ret = scoutfs_item_create(sb, &key, (void *)new_xat + off,
|
||||
bytes, lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
last_created = i;
|
||||
}
|
||||
|
||||
/* update dirtied overlapping existing items, last partial first */
|
||||
for (i = old_parts - 1; i >= 0; i--) {
|
||||
off = i * SCOUTFS_XATTR_MAX_PART_SIZE;
|
||||
bytes = min_t(unsigned int, new_bytes - off,
|
||||
SCOUTFS_XATTR_MAX_PART_SIZE);
|
||||
|
||||
key.skx_part = i;
|
||||
ret = scoutfs_item_update(sb, &key, (void *)new_xat + off,
|
||||
bytes, lock);
|
||||
/* only last partial can fail, then we unwind created */
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* delete any dirtied old items past new */
|
||||
for (i = new_parts; i < old_parts; i++) {
|
||||
key.skx_part = i;
|
||||
scoutfs_item_delete(sb, &key, lock);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret < 0) {
|
||||
/* delete any newly created items */
|
||||
for (i = old_parts; i <= last_created; i++) {
|
||||
key.skx_part = i;
|
||||
scoutfs_item_delete(sb, &key, lock);
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -346,7 +421,7 @@ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer,
|
||||
|
||||
/* only need enough for caller's name and value sizes */
|
||||
bytes = sizeof(struct scoutfs_xattr) + name_len + size;
|
||||
xat = kmalloc(bytes, GFP_NOFS);
|
||||
xat = __vmalloc(bytes, GFP_NOFS, PAGE_KERNEL);
|
||||
if (!xat)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -389,7 +464,7 @@ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer,
|
||||
ret = le16_to_cpu(xat->val_len);
|
||||
memcpy(buffer, &xat->name[xat->name_len], ret);
|
||||
out:
|
||||
kfree(xat);
|
||||
vfree(xat);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -411,20 +486,17 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *indx_lock = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
size_t name_len = strlen(name);
|
||||
struct scoutfs_key indx_key;
|
||||
struct scoutfs_key key;
|
||||
struct prefix_tags tgs;
|
||||
bool undo_indx = false;
|
||||
bool undo_srch = false;
|
||||
LIST_HEAD(ind_locks);
|
||||
LIST_HEAD(saved);
|
||||
u8 found_parts;
|
||||
unsigned int bytes;
|
||||
u64 ind_seq;
|
||||
u64 hash;
|
||||
u64 hash = 0;
|
||||
u64 id = 0;
|
||||
int ret;
|
||||
int err;
|
||||
@@ -444,14 +516,14 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
if (unknown_prefix(name))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (parse_tags(name, name_len, &tgs) != 0)
|
||||
if (scoutfs_xattr_parse_tags(name, name_len, &tgs) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
if ((tgs.hide || tgs.indx) && !capable(CAP_SYS_ADMIN))
|
||||
if ((tgs.hide || tgs.srch) && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
bytes = sizeof(struct scoutfs_xattr) + name_len + size;
|
||||
xat = kmalloc(bytes, GFP_NOFS);
|
||||
xat = __vmalloc(bytes, GFP_NOFS, PAGE_KERNEL);
|
||||
if (!xat) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@@ -491,29 +563,21 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
|
||||
/* prepare our xattr */
|
||||
if (value) {
|
||||
id = si->next_xattr_id++;
|
||||
if (found_parts)
|
||||
id = le64_to_cpu(key.skx_id);
|
||||
else
|
||||
id = si->next_xattr_id++;
|
||||
xat->name_len = name_len;
|
||||
xat->val_len = cpu_to_le16(size);
|
||||
memset(xat->__pad, 0, sizeof(xat->__pad));
|
||||
memcpy(xat->name, name, name_len);
|
||||
memcpy(&xat->name[xat->name_len], value, size);
|
||||
}
|
||||
|
||||
if (tgs.indx && !(found_parts && value)) {
|
||||
hash = scoutfs_hash64(name, name_len);
|
||||
ret = scoutfs_lock_xattr_index(sb, SCOUTFS_LOCK_WRITE_ONLY, 0,
|
||||
hash, &indx_lock);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
retry:
|
||||
ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
|
||||
scoutfs_inode_index_prepare(sb, &ind_locks, inode, false) ?:
|
||||
scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq,
|
||||
SIC_XATTR_SET(found_parts,
|
||||
value != NULL,
|
||||
name_len, size,
|
||||
tgs.indx));
|
||||
scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq);
|
||||
if (ret > 0)
|
||||
goto retry;
|
||||
if (ret)
|
||||
@@ -523,34 +587,27 @@ retry:
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
|
||||
if (tgs.indx && !(found_parts && value)) {
|
||||
if (tgs.srch && !(found_parts && value)) {
|
||||
if (found_parts)
|
||||
id = le64_to_cpu(key.skx_id);
|
||||
hash = scoutfs_hash64(name, name_len);
|
||||
scoutfs_xattr_index_key(&indx_key, hash, ino, id);
|
||||
if (value)
|
||||
ret = scoutfs_forest_create_force(sb, &indx_key, NULL,
|
||||
indx_lock);
|
||||
else
|
||||
ret = scoutfs_forest_delete_force(sb, &indx_key,
|
||||
indx_lock);
|
||||
ret = scoutfs_forest_srch_add(sb, hash, ino, id);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
undo_indx = true;
|
||||
undo_srch = true;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
if (found_parts)
|
||||
if (found_parts && value)
|
||||
ret = change_xattr_items(inode, id, xat, bytes,
|
||||
xattr_nr_parts(xat), found_parts, lck);
|
||||
else if (found_parts)
|
||||
ret = delete_xattr_items(inode, le64_to_cpu(key.skx_name_hash),
|
||||
le64_to_cpu(key.skx_id), found_parts,
|
||||
&saved, lck);
|
||||
if (value && ret == 0)
|
||||
lck);
|
||||
else
|
||||
ret = create_xattr_items(inode, id, xat, bytes, lck);
|
||||
if (ret < 0) {
|
||||
scoutfs_forest_restore(sb, &saved, lck);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
}
|
||||
scoutfs_forest_free_batch(sb, &saved);
|
||||
|
||||
/* XXX do these want i_mutex or anything? */
|
||||
inode_inc_iversion(inode);
|
||||
@@ -559,13 +616,8 @@ retry:
|
||||
ret = 0;
|
||||
|
||||
release:
|
||||
if (ret < 0 && undo_indx) {
|
||||
if (value)
|
||||
err = scoutfs_forest_delete_force(sb, &indx_key,
|
||||
indx_lock);
|
||||
else
|
||||
err = scoutfs_forest_create_force(sb, &indx_key, NULL,
|
||||
indx_lock);
|
||||
if (ret < 0 && undo_srch) {
|
||||
err = scoutfs_forest_srch_add(sb, hash, ino, id);
|
||||
BUG_ON(err);
|
||||
}
|
||||
|
||||
@@ -573,10 +625,9 @@ release:
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
unlock:
|
||||
up_write(&si->xattr_rwsem);
|
||||
scoutfs_unlock(sb, indx_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
scoutfs_unlock(sb, lck, SCOUTFS_LOCK_WRITE);
|
||||
out:
|
||||
kfree(xat);
|
||||
vfree(xat);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -601,10 +652,10 @@ ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer,
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
struct scoutfs_key key;
|
||||
struct prefix_tags tgs;
|
||||
unsigned int bytes;
|
||||
ssize_t total = 0;
|
||||
u32 name_hash = 0;
|
||||
@@ -640,8 +691,8 @@ ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer,
|
||||
break;
|
||||
}
|
||||
|
||||
is_hidden = parse_tags(xat->name, xat->name_len, &tgs) == 0 &&
|
||||
tgs.hide;
|
||||
is_hidden = scoutfs_xattr_parse_tags(xat->name, xat->name_len,
|
||||
&tgs) == 0 && tgs.hide;
|
||||
|
||||
if (show_hidden == is_hidden) {
|
||||
if (size) {
|
||||
@@ -693,15 +744,12 @@ ssize_t scoutfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
|
||||
int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_lock *indx_lock = NULL;
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_key indx_key;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
struct prefix_tags tgs;
|
||||
bool release = false;
|
||||
unsigned int bytes;
|
||||
struct kvec val;
|
||||
u64 hash;
|
||||
int ret;
|
||||
|
||||
@@ -717,8 +765,8 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
init_xattr_key(&last, ino, U32_MAX, U64_MAX);
|
||||
|
||||
for (;;) {
|
||||
kvec_init(&val, (void *)xat, bytes);
|
||||
ret = scoutfs_forest_next(sb, &key, &last, &val, lock);
|
||||
ret = scoutfs_item_next(sb, &key, &last, (void *)xat, bytes,
|
||||
lock);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
@@ -726,32 +774,23 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
}
|
||||
|
||||
if (key.skx_part != 0 ||
|
||||
parse_tags(xat->name, xat->name_len, &tgs) != 0)
|
||||
scoutfs_xattr_parse_tags(xat->name, xat->name_len,
|
||||
&tgs) != 0)
|
||||
memset(&tgs, 0, sizeof(tgs));
|
||||
|
||||
if (tgs.indx) {
|
||||
hash = scoutfs_hash64(xat->name, xat->name_len);
|
||||
scoutfs_xattr_index_key(&indx_key, hash, ino,
|
||||
le64_to_cpu(key.skx_id));
|
||||
ret = scoutfs_lock_xattr_index(sb,
|
||||
SCOUTFS_LOCK_WRITE_ONLY,
|
||||
0, hash, &indx_lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
ret = scoutfs_hold_trans(sb, SIC_EXACT(2, 0));
|
||||
ret = scoutfs_hold_trans(sb);
|
||||
if (ret < 0)
|
||||
break;
|
||||
release = true;
|
||||
|
||||
ret = scoutfs_forest_delete(sb, &key, lock);
|
||||
ret = scoutfs_item_delete(sb, &key, lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (tgs.indx) {
|
||||
ret = scoutfs_forest_delete_force(sb, &indx_key,
|
||||
indx_lock);
|
||||
if (tgs.srch) {
|
||||
hash = scoutfs_hash64(xat->name, xat->name_len);
|
||||
ret = scoutfs_forest_srch_add(sb, hash, ino,
|
||||
le64_to_cpu(key.skx_id));
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
@@ -759,15 +798,11 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
scoutfs_release_trans(sb);
|
||||
release = false;
|
||||
|
||||
scoutfs_unlock(sb, indx_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
indx_lock = NULL;
|
||||
|
||||
/* don't need to inc, next won't see deleted item */
|
||||
}
|
||||
|
||||
if (release)
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_unlock(sb, indx_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
kfree(xat);
|
||||
out:
|
||||
return ret;
|
||||
|
||||
@@ -14,7 +14,12 @@ ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer,
|
||||
int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
void scoutfs_xattr_index_key(struct scoutfs_key *key,
|
||||
u64 hash, u64 ino, u64 id);
|
||||
struct scoutfs_xattr_prefix_tags {
|
||||
unsigned long hide:1,
|
||||
srch:1;
|
||||
};
|
||||
|
||||
int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
struct scoutfs_xattr_prefix_tags *tgs);
|
||||
|
||||
#endif
|
||||
|
||||
6
tests/.gitignore
vendored
Normal file
6
tests/.gitignore
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
src/*.d
|
||||
src/createmany
|
||||
src/dumb_setxattr
|
||||
src/handle_cat
|
||||
src/bulk_create_paths
|
||||
src/find_xattrs
|
||||
49
tests/Makefile
Normal file
49
tests/Makefile
Normal file
@@ -0,0 +1,49 @@
|
||||
CFLAGS := -Wall -O2 -Werror -D_FILE_OFFSET_BITS=64 -fno-strict-aliasing
|
||||
SHELL := /usr/bin/bash
|
||||
|
||||
# each binary command is built from a single .c file
|
||||
BIN := src/createmany \
|
||||
src/dumb_setxattr \
|
||||
src/handle_cat \
|
||||
src/bulk_create_paths \
|
||||
src/find_xattrs
|
||||
|
||||
DEPS := $(wildcard src/*.d)
|
||||
|
||||
all: $(BIN)
|
||||
|
||||
ifneq ($(DEPS),)
|
||||
-include $(DEPS)
|
||||
endif
|
||||
|
||||
$(BIN): %: %.c Makefile
|
||||
gcc $(CFLAGS) -MD -MP -MF $*.d $< -o $@
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
@rm -f $(BIN) $(DEPS)
|
||||
|
||||
#
|
||||
# Make sure we only have all three items needed for each test: entry in
|
||||
# sequence, test script in tests/, and output in golden/.
|
||||
#
|
||||
.PHONY: check-test-files
|
||||
check-test-files:
|
||||
@for t in $$(grep -v "^#" sequence); do \
|
||||
test -e "tests/$$t" || \
|
||||
echo "no test for list entry: $$t"; \
|
||||
t=$${t%%.sh}; \
|
||||
test -e "golden/$$t" || \
|
||||
echo "no output for list entry: $$t"; \
|
||||
done; \
|
||||
for t in golden/*; do \
|
||||
t=$$(basename "$$t"); \
|
||||
grep -q "^$$t.sh$$" sequence || \
|
||||
echo "output not in list: $$t"; \
|
||||
done; \
|
||||
for t in tests/*; do \
|
||||
t=$$(basename "$$t"); \
|
||||
test "$$t" == "list" && continue; \
|
||||
grep -q "^$$t$$" sequence || \
|
||||
echo "test not in list: $$t"; \
|
||||
done
|
||||
123
tests/README.md
Normal file
123
tests/README.md
Normal file
@@ -0,0 +1,123 @@
|
||||
|
||||
This test suite exercises multi-node scoutfs by using multiple mounts on
|
||||
one host to simulate multiple nodes across a network.
|
||||
|
||||
It also contains a light test wrapper that executes xfstests on one of
|
||||
the test mounts.
|
||||
|
||||
## Invoking Tests
|
||||
|
||||
The basic test invocation has to specify the devices for the fs the
|
||||
number of mounts to test, whether to create a new fs and insert the
|
||||
built module, and where to put the results.
|
||||
|
||||
# bash ./run-tests.sh \
|
||||
-M /dev/vda \
|
||||
-D /dev/vdb \
|
||||
-i \
|
||||
-m \
|
||||
-n 3 \
|
||||
-q 2 \
|
||||
-r ./results
|
||||
|
||||
All options can be seen by running with -h.
|
||||
|
||||
This script is built to test multi-node systems on one host by using
|
||||
different mounts of the same devices. The script creates a fake block
|
||||
device in front of each fs block device for each mount that will be
|
||||
tested. Currently it will create free loop devices and will mount on
|
||||
/mnt/test.[0-9].
|
||||
|
||||
All tests will be run by default. Particular tests can be included or
|
||||
excluded by providing test name regular expressions with the -I and -E
|
||||
options. The definitive list of tests and the order in which they'll be
|
||||
run is found in the sequence file.
|
||||
|
||||
## xfstests
|
||||
|
||||
The last test that is run checks out, builds, and runs xfstests. It
|
||||
needs -X and -x options for the xfstests git repo and branch. It also
|
||||
needs spare devices on which to make scratch scoutfs volumes. The test
|
||||
verifies that the expected set of xfstests tests ran and passed.
|
||||
|
||||
-f /dev/vdc \
|
||||
-e /dev/vdd \
|
||||
-X $HOME/git/scoutfs-xfstests \
|
||||
-x scoutfs \
|
||||
|
||||
An xfstests repo that knows about scoutfs is only required to sprinkle
|
||||
the scoutfs cases throughout the xfstests harness.
|
||||
|
||||
## Individual Test Invocation
|
||||
|
||||
Each test is run in a new bash invocation. A set of directories in the
|
||||
test volume and in the results path are created for the test. Each
|
||||
test's working directory isn't managed.
|
||||
|
||||
Test output, temp files, and dmesg snapshots are all put in a tmp/ dir
|
||||
in the results/ dir. Per-test dirs are only destroyed before each test
|
||||
invocation.
|
||||
|
||||
The harness will check for unexpected output in dmesg after each
|
||||
individual test.
|
||||
|
||||
Each test that fails will have its results appened to the fail.log file
|
||||
in the results/ directory. The details of the failure can be examined
|
||||
in the directories for each test in results/output/ and results/tmp/.
|
||||
|
||||
## Writing tests
|
||||
|
||||
Tests have access to a set of t\_ prefixed bash functions that are found
|
||||
in files in funcs/.
|
||||
|
||||
Tests complete by calling t\_ functions which indicate the result of the
|
||||
test and can return a message. If the tests passes then its output is
|
||||
compared with known good output. If the output doesn't match then the
|
||||
test fails. The t\_ completion functions return specific status codes so
|
||||
that returning without calling one can be detected.
|
||||
|
||||
The golden output has to be consistent across test platforms so there
|
||||
are a number of filter functions which strip out local details from
|
||||
command output. t\_filter\_fs is by far the most used which canonicalizes
|
||||
fs mount paths and block device details.
|
||||
|
||||
Tests can be relatively loose about checking errors. If commands
|
||||
produce output in failure cases then the test will fail without having
|
||||
to specifically test for errors on every command execution. Care should
|
||||
be taken to make sure that blowing through a bunch of commands with no
|
||||
error checking doesn't produce catastrophic results. Usually tests are
|
||||
simple and it's fine.
|
||||
|
||||
A bare sync will sync all the mounted filesystems and ensure that
|
||||
no mounts have dirty data. sync -f can be used to sync just a specific
|
||||
filesystem, though it doesn't exist on all platforms.
|
||||
|
||||
The harness doesn't currently ensure that all mounts are restored after
|
||||
each test invocation. It probably should. Currently it's the
|
||||
responsibility of the test to restore any mounts it alters and there are
|
||||
t\_ functions to mount all configured mount points.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
Tests have a number of exported environment variables that are commonly
|
||||
used during the test.
|
||||
|
||||
| Variable | Description | Origin | Example |
|
||||
| ---------------- | ------------------- | --------------- | ----------------- |
|
||||
| T\_MB[0-9] | per-mount meta bdev | created per run | /dev/loop0 |
|
||||
| T\_DB[0-9] | per-mount data bdev | created per run | /dev/loop1 |
|
||||
| T\_D[0-9] | per-mount test dir | made for test | /mnt/test.[0-9]/t |
|
||||
| T\_META\_DEVICE | main FS meta bdev | -M | /dev/vda |
|
||||
| T\_DATA\_DEVICE | main FS data bdev | -D | /dev/vdb |
|
||||
| T\_EX\_META\_DEV | scratch meta bdev | -f | /dev/vdd |
|
||||
| T\_EX\_DATA\_DEV | scratch meta bdev | -e | /dev/vdc |
|
||||
| T\_M[0-9] | mount paths | mounted per run | /mnt/test.[0-9]/ |
|
||||
| T\_NR\_MOUNTS | number of mounts | -n | 3 |
|
||||
| T\_O[0-9] | mount options | created per run | -o server\_addr= |
|
||||
| T\_QUORUM | quorum count | -q | 2 |
|
||||
| T\_TMP | per-test tmp prefix | made for test | results/tmp/t/tmp |
|
||||
| T\_TMPDIR | per-test tmp dir dir | made for test | results/tmp/t |
|
||||
|
||||
There are also a number of variables that are set in response to options
|
||||
and are exported but their use is rare so they aren't included here.
|
||||
|
||||
58
tests/funcs/exec.sh
Normal file
58
tests/funcs/exec.sh
Normal file
@@ -0,0 +1,58 @@
|
||||
|
||||
t_status_msg()
|
||||
{
|
||||
echo "$*" > "$T_TMPDIR/status.msg"
|
||||
}
|
||||
|
||||
export T_PASS_STATUS=100
|
||||
export T_SKIP_STATUS=101
|
||||
export T_FAIL_STATUS=102
|
||||
export T_FIRST_STATUS="$T_PASS_STATUS"
|
||||
export T_LAST_STATUS="$T_FAIL_STATUS"
|
||||
|
||||
t_pass()
|
||||
{
|
||||
exit $T_PASS_STATUS
|
||||
}
|
||||
|
||||
t_skip()
|
||||
{
|
||||
t_status_msg "$@"
|
||||
exit $T_SKIP_STATUS
|
||||
}
|
||||
|
||||
t_fail()
|
||||
{
|
||||
t_status_msg "$@"
|
||||
exit $T_FAIL_STATUS
|
||||
}
|
||||
|
||||
#
|
||||
# Quietly run a command during a test. If it succeeds then we have a
|
||||
# log of its execution but its output isn't included in the test's
|
||||
# compared output. If it fails then the test fails.
|
||||
#
|
||||
t_quiet()
|
||||
{
|
||||
echo "# $*" >> "$T_TMPDIR/quiet.log"
|
||||
"$@" > "$T_TMPDIR/quiet.log" 2>&1 || \
|
||||
t_fail "quiet command failed"
|
||||
}
|
||||
|
||||
#
|
||||
# redirect test output back to the output of the invoking script intead
|
||||
# of the compared output.
|
||||
#
|
||||
t_restore_output()
|
||||
{
|
||||
exec >&6 2>&1
|
||||
}
|
||||
|
||||
#
|
||||
# redirect a command's output back to the compared output after the
|
||||
# test has restored its output
|
||||
#
|
||||
t_compare_output()
|
||||
{
|
||||
"$@" >&7 2>&1
|
||||
}
|
||||
66
tests/funcs/filter.sh
Normal file
66
tests/funcs/filter.sh
Normal file
@@ -0,0 +1,66 @@
|
||||
|
||||
# filter out device ids and mount paths
|
||||
t_filter_fs()
|
||||
{
|
||||
sed -e 's@mnt/test\.[0-9]*@mnt/test@g' \
|
||||
-e 's@Device: [a-fA-F0-7]*h/[0-9]*d@Device: 0h/0d@g'
|
||||
}
|
||||
|
||||
#
|
||||
# Filter out expected messages. Putting messages here implies that
|
||||
# tests aren't relying on messages to discover failures.. they're
|
||||
# directly testing the result of whatever it is that's generating the
|
||||
# message.
|
||||
#
|
||||
t_filter_dmesg()
|
||||
{
|
||||
local re
|
||||
|
||||
# the kernel can just be noisy
|
||||
re=" used greatest stack depth: "
|
||||
|
||||
# mkfs/mount checks partition tables
|
||||
re="$re|unknown partition table"
|
||||
|
||||
# dm swizzling
|
||||
re="$re|device doesn't appear to be in the dev hash table"
|
||||
re="$re|device-mapper:.*uevent:.*version"
|
||||
re="$re|device-mapper:.*ioctl:.*initialised"
|
||||
|
||||
# some tests try invalid devices
|
||||
re="$re|scoutfs .* error reading super block"
|
||||
re="$re| EXT4-fs (.*): get root inode failed"
|
||||
re="$re| EXT4-fs (.*): mount failed"
|
||||
re="$re| EXT4-fs (.*): no journal found"
|
||||
re="$re| EXT4-fs (.*): VFS: Can't find ext4 filesystem"
|
||||
|
||||
# dropping caches is fine
|
||||
re="$re| drop_caches: "
|
||||
|
||||
# mount and unmount spew a bunch
|
||||
re="$re|scoutfs.*client connected"
|
||||
re="$re|scoutfs.*client disconnected"
|
||||
re="$re|scoutfs.*server setting up"
|
||||
re="$re|scoutfs.*server ready"
|
||||
re="$re|scoutfs.*server accepted"
|
||||
re="$re|scoutfs.*server closing"
|
||||
re="$re|scoutfs.*server shutting down"
|
||||
re="$re|scoutfs.*server stopped"
|
||||
|
||||
# xfstests records test execution in desg
|
||||
re="$re| run fstests "
|
||||
|
||||
# tests that drop unmount io triggers fencing
|
||||
re="$re|scoutfs .* error: fencing "
|
||||
re="$re|scoutfs .*: waiting for .* lock clients"
|
||||
re="$re|scoutfs .*: all lock clients recovered"
|
||||
re="$re|scoutfs .* error: client rid.*lock recovery timed out"
|
||||
|
||||
# some tests mount w/o options
|
||||
re="$re|scoutfs .* error: Required mount option \"metadev_path\" not found"
|
||||
|
||||
# in debugging kernels we can slow things down a bit
|
||||
re="$re|hrtimer: interrupt took .*"
|
||||
|
||||
egrep -v "($re)"
|
||||
}
|
||||
231
tests/funcs/fs.sh
Normal file
231
tests/funcs/fs.sh
Normal file
@@ -0,0 +1,231 @@
|
||||
|
||||
#
|
||||
# Make all previously dirty items in memory in all mounts synced and
|
||||
# visible in the inode seq indexes. We have to force a sync on every
|
||||
# node by dirtying data as that's the only way to guarantee advancing
|
||||
# the sequence number on each node which limits index visibility. Some
|
||||
# distros don't have sync -f so we dirty our mounts then sync
|
||||
# everything.
|
||||
#
|
||||
t_sync_seq_index()
|
||||
{
|
||||
local m
|
||||
|
||||
for m in $T_MS; do
|
||||
t_quiet touch $m
|
||||
done
|
||||
t_quiet sync
|
||||
}
|
||||
|
||||
#
|
||||
# Output the "f.$fsid.r.$rid" identifier string for the given mount
|
||||
# number, 0 is used by default if none is specified.
|
||||
#
|
||||
t_ident()
|
||||
{
|
||||
local nr="${1:-0}"
|
||||
local mnt="$(eval echo \$T_M$nr)"
|
||||
local fsid
|
||||
local rid
|
||||
|
||||
fsid=$(scoutfs statfs -s fsid -p "$mnt")
|
||||
rid=$(scoutfs statfs -s rid -p "$mnt")
|
||||
|
||||
echo "f.${fsid:0:6}.r.${rid:0:6}"
|
||||
}
|
||||
|
||||
#
|
||||
# Output the mount's sysfs path, defaulting to mount 0 if none is
|
||||
# specified.
|
||||
#
|
||||
t_sysfs_path()
|
||||
{
|
||||
local nr="$1"
|
||||
|
||||
echo "/sys/fs/scoutfs/$(t_ident $nr)"
|
||||
}
|
||||
|
||||
#
|
||||
# Output the mount's debugfs path, defaulting to mount 0 if none is
|
||||
# specified.
|
||||
#
|
||||
t_debugfs_path()
|
||||
{
|
||||
local nr="$1"
|
||||
|
||||
echo "/sys/kernel/debug/scoutfs/$(t_ident $nr)"
|
||||
}
|
||||
|
||||
#
|
||||
# output all the configured test nrs for iteration
|
||||
#
|
||||
t_fs_nrs()
|
||||
{
|
||||
seq 0 $((T_NR_MOUNTS - 1))
|
||||
}
|
||||
|
||||
#
|
||||
# Output the mount nr of the current server. This takes no steps to
|
||||
# ensure that the server doesn't shut down and have some other mount
|
||||
# take over.
|
||||
#
|
||||
t_server_nr()
|
||||
{
|
||||
for i in $(t_fs_nrs); do
|
||||
if [ "$(cat $(t_sysfs_path $i)/quorum/is_leader)" == "1" ]; then
|
||||
echo $i
|
||||
return
|
||||
fi
|
||||
done
|
||||
|
||||
t_fail "t_server_nr didn't find a server"
|
||||
}
|
||||
|
||||
#
|
||||
# Output the mount nr of the first client that we find. There can be
|
||||
# no clients if there's only one mount who has to be the server. This
|
||||
# takes no steps to ensure that the client doesn't become a server at
|
||||
# any point.
|
||||
#
|
||||
t_first_client_nr()
|
||||
{
|
||||
for i in $(t_fs_nrs); do
|
||||
if [ "$(cat $(t_sysfs_path $i)/quorum/is_leader)" == "0" ]; then
|
||||
echo $i
|
||||
return
|
||||
fi
|
||||
done
|
||||
|
||||
t_fail "t_first_client_nr didn't find any clients"
|
||||
}
|
||||
|
||||
t_mount()
|
||||
{
|
||||
local nr="$1"
|
||||
|
||||
test "$nr" -lt "$T_NR_MOUNTS" || \
|
||||
t_fail "fs nr $nr invalid"
|
||||
|
||||
eval t_quiet mount -t scoutfs \$T_O$nr \$T_DB$nr \$T_M$nr
|
||||
}
|
||||
|
||||
t_umount()
|
||||
{
|
||||
local nr="$1"
|
||||
|
||||
test "$nr" -lt "$T_NR_MOUNTS" || \
|
||||
t_fail "fs nr $nr invalid"
|
||||
|
||||
eval t_quiet umount \$T_DB$i
|
||||
}
|
||||
|
||||
#
|
||||
# Attempt to mount all the configured mounts, assuming that they're
|
||||
# not already mounted.
|
||||
#
|
||||
t_mount_all()
|
||||
{
|
||||
local pids=""
|
||||
local p
|
||||
|
||||
for i in $(t_fs_nrs); do
|
||||
t_mount $i &
|
||||
p="$!"
|
||||
pids="$pids $!"
|
||||
done
|
||||
for p in $pids; do
|
||||
t_quiet wait $p
|
||||
done
|
||||
}
|
||||
|
||||
#
|
||||
# Attempt to unmount all the configured mounts, assuming that they're
|
||||
# all mounted.
|
||||
#
|
||||
t_umount_all()
|
||||
{
|
||||
local pids=""
|
||||
local p
|
||||
|
||||
for i in $(t_fs_nrs); do
|
||||
t_umount $i &
|
||||
p="$!"
|
||||
pids="$pids $!"
|
||||
done
|
||||
for p in $pids; do
|
||||
t_quiet wait $p
|
||||
done
|
||||
}
|
||||
|
||||
t_remount_all()
|
||||
{
|
||||
t_quiet t_umount_all || t_fail "umounting all failed"
|
||||
t_quiet t_mount_all || t_fail "mounting all failed"
|
||||
}
|
||||
|
||||
t_reinsert_remount_all()
|
||||
{
|
||||
t_quiet t_umount_all || t_fail "umounting all failed"
|
||||
|
||||
t_quiet rmmod scoutfs || \
|
||||
t_fail "rmmod scoutfs failed"
|
||||
t_quiet insmod "$T_KMOD/src/scoutfs.ko" ||
|
||||
t_fail "insmod scoutfs failed"
|
||||
|
||||
t_quiet t_mount_all || t_fail "mounting all failed"
|
||||
}
|
||||
|
||||
t_trigger_path() {
|
||||
local nr="$1"
|
||||
|
||||
echo "/sys/kernel/debug/scoutfs/$(t_ident $nr)/trigger"
|
||||
}
|
||||
|
||||
t_trigger_get() {
|
||||
local which="$1"
|
||||
local nr="$2"
|
||||
|
||||
cat "$(t_trigger_path "$nr")/$which"
|
||||
}
|
||||
|
||||
t_trigger_show() {
|
||||
local which="$1"
|
||||
local string="$2"
|
||||
local nr="$3"
|
||||
|
||||
echo "trigger $which $string: $(t_trigger_get $which $nr)"
|
||||
}
|
||||
|
||||
t_trigger_arm() {
|
||||
local which="$1"
|
||||
local nr="$2"
|
||||
local path=$(t_trigger_path "$nr")
|
||||
|
||||
echo 1 > "$path/$which"
|
||||
t_trigger_show $which armed $nr
|
||||
}
|
||||
|
||||
#
|
||||
# output the value of the given counter for the given mount, defaulting
|
||||
# to mount 0 if a mount isn't specified.
|
||||
#
|
||||
t_counter() {
|
||||
local which="$1"
|
||||
local nr="$2"
|
||||
|
||||
cat "$(t_sysfs_path $nr)/counters/$which"
|
||||
}
|
||||
|
||||
#
|
||||
# output the value of the given counter for the given mount, defaulting
|
||||
# to mount 0 if a mount isn't specified.
|
||||
#
|
||||
t_counter_diff() {
|
||||
local which="$1"
|
||||
local old="$2"
|
||||
local nr="$3"
|
||||
local new
|
||||
|
||||
new="$(t_counter $which $nr)"
|
||||
echo "counter $which diff $((new - old))"
|
||||
}
|
||||
40
tests/funcs/require.sh
Normal file
40
tests/funcs/require.sh
Normal file
@@ -0,0 +1,40 @@
|
||||
|
||||
#
|
||||
# Make sure that all the base command arguments are found in the path.
|
||||
# This isn't strictly necessary as the test will naturally fail if the
|
||||
# command isn't found, but it's nice to fail fast and clearly
|
||||
# communicate why.
|
||||
#
|
||||
t_require_commands() {
|
||||
local c
|
||||
|
||||
for c in "$@"; do
|
||||
which "$c" >/dev/null 2>&1 || \
|
||||
t_fail "command $c not found in path"
|
||||
done
|
||||
}
|
||||
|
||||
#
|
||||
# make sure that we have at least this many mounts
|
||||
#
|
||||
t_require_mounts() {
|
||||
local req="$1"
|
||||
|
||||
test "$T_NR_MOUNTS" -ge "$req" || \
|
||||
t_skip "$req mounts required, only have $T_NR_MOUNTS"
|
||||
}
|
||||
|
||||
#
|
||||
# Require that the meta device be at least the size string argument, as
|
||||
# parsed by numfmt using single char base 2 suffixes (iec).. 64G, etc.
|
||||
#
|
||||
t_require_meta_size() {
|
||||
local dev="$T_META_DEVICE"
|
||||
local req_iec="$1"
|
||||
local req_bytes=$(numfmt --from=iec --to=none $req_iec)
|
||||
local dev_bytes=$(blockdev --getsize64 $dev)
|
||||
local dev_iec=$(numfmt --from=auto --to=iec $dev_bytes)
|
||||
|
||||
test "$dev_bytes" -ge "$req_bytes" || \
|
||||
t_skip "$dev must be at least $req_iec, is $dev_iec"
|
||||
}
|
||||
36
tests/golden/archive-light-cycle
Normal file
36
tests/golden/archive-light-cycle
Normal file
@@ -0,0 +1,36 @@
|
||||
== calculate number of files
|
||||
== create per mount dirs
|
||||
== generate phase scripts
|
||||
== round 1: create
|
||||
== round 1: online
|
||||
== round 1: verify
|
||||
== round 1: release
|
||||
== round 1: offline
|
||||
== round 1: stage
|
||||
== round 1: online
|
||||
== round 1: verify
|
||||
== round 1: release
|
||||
== round 1: offline
|
||||
== round 1: unlink
|
||||
== round 2: create
|
||||
== round 2: online
|
||||
== round 2: verify
|
||||
== round 2: release
|
||||
== round 2: offline
|
||||
== round 2: stage
|
||||
== round 2: online
|
||||
== round 2: verify
|
||||
== round 2: release
|
||||
== round 2: offline
|
||||
== round 2: unlink
|
||||
== round 3: create
|
||||
== round 3: online
|
||||
== round 3: verify
|
||||
== round 3: release
|
||||
== round 3: offline
|
||||
== round 3: stage
|
||||
== round 3: online
|
||||
== round 3: verify
|
||||
== round 3: release
|
||||
== round 3: offline
|
||||
== round 3: unlink
|
||||
53
tests/golden/basic-block-counts
Normal file
53
tests/golden/basic-block-counts
Normal file
@@ -0,0 +1,53 @@
|
||||
== single block write
|
||||
online: 1
|
||||
offline: 0
|
||||
st_blocks: 8
|
||||
== single block overwrite
|
||||
online: 1
|
||||
offline: 0
|
||||
st_blocks: 8
|
||||
== append
|
||||
online: 2
|
||||
offline: 0
|
||||
st_blocks: 16
|
||||
== release
|
||||
online: 0
|
||||
offline: 2
|
||||
st_blocks: 16
|
||||
== duplicate release
|
||||
online: 0
|
||||
offline: 2
|
||||
st_blocks: 16
|
||||
== duplicate release past i_size
|
||||
online: 0
|
||||
offline: 2
|
||||
st_blocks: 16
|
||||
== stage
|
||||
online: 2
|
||||
offline: 0
|
||||
st_blocks: 16
|
||||
== duplicate stage
|
||||
online: 2
|
||||
offline: 0
|
||||
st_blocks: 16
|
||||
== larger file
|
||||
online: 256
|
||||
offline: 0
|
||||
st_blocks: 2048
|
||||
== partial truncate
|
||||
online: 128
|
||||
offline: 0
|
||||
st_blocks: 1024
|
||||
== single sparse block
|
||||
online: 1
|
||||
offline: 0
|
||||
st_blocks: 8
|
||||
== empty file
|
||||
online: 0
|
||||
offline: 0
|
||||
st_blocks: 0
|
||||
== non-regular file
|
||||
online: 0
|
||||
offline: 0
|
||||
st_blocks: 0
|
||||
== cleanup
|
||||
55
tests/golden/basic-posix-consistency
Normal file
55
tests/golden/basic-posix-consistency
Normal file
@@ -0,0 +1,55 @@
|
||||
== root inode updates flow back and forth
|
||||
== stat of created file matches
|
||||
== written file contents match
|
||||
== overwritten file contents match
|
||||
== appended file contents match
|
||||
== fiemap matches after racey appends
|
||||
== unlinked file isn't found
|
||||
== symlink targets match
|
||||
/mnt/test/test/basic-posix-consistency/file.targ
|
||||
/mnt/test/test/basic-posix-consistency/file.targ
|
||||
/mnt/test/test/basic-posix-consistency/file.targ2
|
||||
/mnt/test/test/basic-posix-consistency/file.targ2
|
||||
== new xattrs are visible
|
||||
# file: /mnt/test/test/basic-posix-consistency/file
|
||||
user.xat="1"
|
||||
|
||||
# file: /mnt/test/test/basic-posix-consistency/file
|
||||
user.xat="1"
|
||||
|
||||
== modified xattrs are updated
|
||||
# file: /mnt/test/test/basic-posix-consistency/file
|
||||
user.xat="2"
|
||||
|
||||
# file: /mnt/test/test/basic-posix-consistency/file
|
||||
user.xat="2"
|
||||
|
||||
== deleted xattrs
|
||||
/mnt/test/test/basic-posix-consistency/file: user.xat: No such attribute
|
||||
/mnt/test/test/basic-posix-consistency/file: user.xat: No such attribute
|
||||
== readdir after modification
|
||||
one
|
||||
two
|
||||
three
|
||||
four
|
||||
one
|
||||
two
|
||||
three
|
||||
four
|
||||
two
|
||||
four
|
||||
two
|
||||
four
|
||||
== can delete empty dir
|
||||
== some easy rename cases
|
||||
--- file between dirs
|
||||
--- file within dir
|
||||
--- dir within dir
|
||||
--- overwrite file
|
||||
--- can't overwrite non-empty dir
|
||||
mv: cannot move ‘/mnt/test/test/basic-posix-consistency/dir/c/clobber’ to ‘/mnt/test/test/basic-posix-consistency/dir/a/dir’: Directory not empty
|
||||
--- can overwrite empty dir
|
||||
== path resoluion
|
||||
== inode indexes match after syncing existing
|
||||
== inode indexes match after copying and syncing
|
||||
== inode indexes match after removing and syncing
|
||||
0
tests/golden/createmany-large-names
Normal file
0
tests/golden/createmany-large-names
Normal file
4
tests/golden/createmany-parallel
Normal file
4
tests/golden/createmany-parallel
Normal file
@@ -0,0 +1,4 @@
|
||||
Run createmany in /mnt/test/test/createmany-parallel/0
|
||||
Run createmany in /mnt/test/test/createmany-parallel/1
|
||||
Run createmany in /mnt/test/test/createmany-parallel/2
|
||||
Run createmany in /mnt/test/test/createmany-parallel/3
|
||||
3
tests/golden/createmany-parallel-mounts
Normal file
3
tests/golden/createmany-parallel-mounts
Normal file
@@ -0,0 +1,3 @@
|
||||
== measure initial createmany
|
||||
== measure initial createmany
|
||||
== measure two concurrent createmany runs
|
||||
2
tests/golden/createmany-rename-large-dir
Normal file
2
tests/golden/createmany-rename-large-dir
Normal file
@@ -0,0 +1,2 @@
|
||||
== create large directory with 1220608 files
|
||||
== randomly renaming 5000 files
|
||||
2
tests/golden/cross-mount-data-free
Normal file
2
tests/golden/cross-mount-data-free
Normal file
@@ -0,0 +1,2 @@
|
||||
== repeated cross-mount alloc+free, totalling 2x free
|
||||
== remove empty test file
|
||||
10
tests/golden/dirent-consistency
Normal file
10
tests/golden/dirent-consistency
Normal file
@@ -0,0 +1,10 @@
|
||||
== create per node dirs
|
||||
== touch files on each node
|
||||
== recreate the files
|
||||
== turn the files into directories
|
||||
== rename parent dirs
|
||||
== rename parent dirs back
|
||||
== create some hard links
|
||||
== recreate one of the hard links
|
||||
== delete the remaining hard link
|
||||
== race to blow everything away
|
||||
0
tests/golden/export-get-name-parent
Normal file
0
tests/golden/export-get-name-parent
Normal file
4
tests/golden/inode-items-updated
Normal file
4
tests/golden/inode-items-updated
Normal file
@@ -0,0 +1,4 @@
|
||||
== create files and sync
|
||||
== modify files
|
||||
== mount and unmount
|
||||
== verify files
|
||||
4
tests/golden/lock-conflicting-batch-commit
Normal file
4
tests/golden/lock-conflicting-batch-commit
Normal file
@@ -0,0 +1,4 @@
|
||||
== create per mount files
|
||||
== time independent modification
|
||||
== time concurrent independent modification
|
||||
== time concurrent conflicting modification
|
||||
2
tests/golden/lock-ex-race-processes
Normal file
2
tests/golden/lock-ex-race-processes
Normal file
@@ -0,0 +1,2 @@
|
||||
=== setup files ===
|
||||
=== ping-pong xattr ops ===
|
||||
1
tests/golden/lock-pr-cw-conflict
Normal file
1
tests/golden/lock-pr-cw-conflict
Normal file
@@ -0,0 +1 @@
|
||||
== race writing and index walking
|
||||
3
tests/golden/lock-refleak
Normal file
3
tests/golden/lock-refleak
Normal file
@@ -0,0 +1,3 @@
|
||||
== make test dir
|
||||
== do enough stuff to make lock leaks visible
|
||||
== make sure nothing has leaked
|
||||
2
tests/golden/lock-revoke-getcwd
Normal file
2
tests/golden/lock-revoke-getcwd
Normal file
@@ -0,0 +1,2 @@
|
||||
=== getcwd after lock revocation
|
||||
trigger statfs_lock_purge armed: 1
|
||||
15
tests/golden/lock-shrink-consistency
Normal file
15
tests/golden/lock-shrink-consistency
Normal file
@@ -0,0 +1,15 @@
|
||||
=== setup test file ===
|
||||
# file: /mnt/test/test/lock-shrink-consistency/dir/file
|
||||
user.test="aaa"
|
||||
|
||||
=== commit dirty trans and revoke lock ===
|
||||
trigger statfs_lock_purge armed: 1
|
||||
trigger statfs_lock_purge after it fired: 0
|
||||
=== change xattr on other mount ===
|
||||
# file: /mnt/test/test/lock-shrink-consistency/dir/file
|
||||
user.test="bbb"
|
||||
|
||||
=== verify new xattr under new lock on first mount ===
|
||||
# file: /mnt/test/test/lock-shrink-consistency/dir/file
|
||||
user.test="bbb"
|
||||
|
||||
3
tests/golden/mount-unmount-race
Normal file
3
tests/golden/mount-unmount-race
Normal file
@@ -0,0 +1,3 @@
|
||||
== create per mount files
|
||||
== 30s of racing random mount/umount
|
||||
== mounting any unmounted
|
||||
33
tests/golden/move-blocks
Normal file
33
tests/golden/move-blocks
Normal file
@@ -0,0 +1,33 @@
|
||||
== build test files
|
||||
== wrapped offsets should fail
|
||||
ioctl failed on '/mnt/test/test/move-blocks/to': Value too large for defined data type (75)
|
||||
scoutfs: move-blocks failed: Value too large for defined data type (75)
|
||||
ioctl failed on '/mnt/test/test/move-blocks/to': Value too large for defined data type (75)
|
||||
scoutfs: move-blocks failed: Value too large for defined data type (75)
|
||||
== specifying same file fails
|
||||
ioctl failed on '/mnt/test/test/move-blocks/hardlink': Invalid argument (22)
|
||||
scoutfs: move-blocks failed: Invalid argument (22)
|
||||
== specifying files in other file systems fails
|
||||
ioctl failed on '/mnt/test/test/move-blocks/to': Invalid cross-device link (18)
|
||||
scoutfs: move-blocks failed: Invalid cross-device link (18)
|
||||
== offsets must be multiples of 4KB
|
||||
ioctl failed on '/mnt/test/test/move-blocks/to': Invalid argument (22)
|
||||
scoutfs: move-blocks failed: Invalid argument (22)
|
||||
ioctl failed on '/mnt/test/test/move-blocks/to': Invalid argument (22)
|
||||
scoutfs: move-blocks failed: Invalid argument (22)
|
||||
ioctl failed on '/mnt/test/test/move-blocks/to': Invalid argument (22)
|
||||
scoutfs: move-blocks failed: Invalid argument (22)
|
||||
== can't move onto existing extent
|
||||
ioctl failed on '/mnt/test/test/move-blocks/to': Invalid argument (22)
|
||||
scoutfs: move-blocks failed: Invalid argument (22)
|
||||
== can't move between files with offline extents
|
||||
ioctl failed on '/mnt/test/test/move-blocks/to': No data available (61)
|
||||
scoutfs: move-blocks failed: No data available (61)
|
||||
ioctl failed on '/mnt/test/test/move-blocks/to': No data available (61)
|
||||
scoutfs: move-blocks failed: No data available (61)
|
||||
== basic moves work
|
||||
== moving final partial block sets partial i_size
|
||||
123
|
||||
== moving updates inode fields
|
||||
== moving blocks backwards works
|
||||
== combine many files into one
|
||||
56
tests/golden/offline-extent-waiting
Normal file
56
tests/golden/offline-extent-waiting
Normal file
@@ -0,0 +1,56 @@
|
||||
== create files
|
||||
== waiter shows up in ioctl
|
||||
offline waiting should be empty:
|
||||
0
|
||||
offline waiting should now have one known entry:
|
||||
== multiple waiters on same block listed once
|
||||
offline waiting still has one known entry:
|
||||
== different blocks show up
|
||||
offline waiting now has two known entries:
|
||||
== staging wakes everyone
|
||||
offline waiting should be empty again:
|
||||
0
|
||||
== interruption does no harm
|
||||
offline waiting should now have one known entry:
|
||||
offline waiting should be empty again:
|
||||
0
|
||||
== EIO injection for waiting readers works
|
||||
offline waiting should now have two known entries:
|
||||
2
|
||||
data_wait_err found 2 waiters.
|
||||
offline waiting should now have 0 known entries:
|
||||
0
|
||||
dd: error reading ‘/mnt/test/test/offline-extent-waiting/dir/file’: Input/output error
|
||||
0+0 records in
|
||||
0+0 records out
|
||||
dd: error reading ‘/mnt/test/test/offline-extent-waiting/dir/file’: Input/output error
|
||||
0+0 records in
|
||||
0+0 records out
|
||||
offline waiting should be empty again:
|
||||
0
|
||||
== readahead while offline does no harm
|
||||
== waiting on interesting blocks works
|
||||
offline waiting is empty at block 0
|
||||
0
|
||||
offline waiting is empty at block 1
|
||||
0
|
||||
offline waiting is empty at block 128
|
||||
0
|
||||
offline waiting is empty at block 129
|
||||
0
|
||||
offline waiting is empty at block 254
|
||||
0
|
||||
offline waiting is empty at block 255
|
||||
0
|
||||
== contents match when staging blocks forward
|
||||
== contents match when staging blocks backwards
|
||||
== truncate to same size doesn't wait
|
||||
offline wating should be empty:
|
||||
0
|
||||
== truncating does wait
|
||||
truncate should be waiting for first block:
|
||||
trunate should no longer be waiting:
|
||||
0
|
||||
== writing waits
|
||||
should be waiting for write
|
||||
== cleanup
|
||||
4
tests/golden/persistent-item-vers
Normal file
4
tests/golden/persistent-item-vers
Normal file
@@ -0,0 +1,4 @@
|
||||
== advance lock version by creating unrelated files
|
||||
== create before file version
|
||||
== verify before version, touch after version
|
||||
== verify after version
|
||||
31
tests/golden/setattr_more
Normal file
31
tests/golden/setattr_more
Normal file
@@ -0,0 +1,31 @@
|
||||
== 0 data_version arg fails
|
||||
setattr: data version must not be 0
|
||||
Try `setattr --help' or `setattr --usage' for more information.
|
||||
== args must specify size and offline
|
||||
setattr: must provide size if using --offline option
|
||||
Try `setattr --help' or `setattr --usage' for more information.
|
||||
== only works on regular files
|
||||
failed to open '/mnt/test/test/setattr_more/dir': Is a directory (21)
|
||||
scoutfs: setattr failed: Is a directory (21)
|
||||
setattr_more ioctl failed on '/mnt/test/test/setattr_more/char': Inappropriate ioctl for device (25)
|
||||
scoutfs: setattr failed: Inappropriate ioctl for device (25)
|
||||
== non-zero file size fails
|
||||
setattr_more ioctl failed on '/mnt/test/test/setattr_more/file': Invalid argument (22)
|
||||
scoutfs: setattr failed: Invalid argument (22)
|
||||
== non-zero file data_version fails
|
||||
setattr_more ioctl failed on '/mnt/test/test/setattr_more/file': Invalid argument (22)
|
||||
scoutfs: setattr failed: Invalid argument (22)
|
||||
== large size is set
|
||||
578437695752307201
|
||||
== large data_version is set
|
||||
578437695752307201
|
||||
== large ctime is set
|
||||
1972-02-19 00:06:25.999999999 +0000
|
||||
== large offline extents are created
|
||||
Filesystem type is: 554f4353
|
||||
File size of /mnt/test/test/setattr_more/file is 40988672 (10007 blocks of 4096 bytes)
|
||||
ext: logical_offset: physical_offset: length: expected: flags:
|
||||
0: 0.. 10006: 0.. 10006: 10007: unknown,eof
|
||||
/mnt/test/test/setattr_more/file: 1 extent found
|
||||
== correct offline extent length
|
||||
976563
|
||||
1
tests/golden/setup-error-teardown
Normal file
1
tests/golden/setup-error-teardown
Normal file
@@ -0,0 +1 @@
|
||||
== interrupt waiting mount
|
||||
9
tests/golden/simple-inode-index
Normal file
9
tests/golden/simple-inode-index
Normal file
@@ -0,0 +1,9 @@
|
||||
== dirs shouldn't appear in data_seq queries
|
||||
== two created files are present and come after each other
|
||||
found first
|
||||
found second
|
||||
== unlinked entries must not be present
|
||||
== dirty inodes can not be present
|
||||
== changing metadata must increase meta seq
|
||||
== changing contents must increase data seq
|
||||
== make sure dirtying doesn't livelock walk
|
||||
146
tests/golden/simple-release-extents
Normal file
146
tests/golden/simple-release-extents
Normal file
@@ -0,0 +1,146 @@
|
||||
== simple whole file multi-block releasing
|
||||
== release last block that straddles i_size
|
||||
== release entire file past i_size
|
||||
== releasing offline extents is fine
|
||||
== 0 count is fine
|
||||
== release past i_size is fine
|
||||
== wrapped blocks fails
|
||||
release ioctl failed: Invalid argument (22)
|
||||
scoutfs: release failed: Invalid argument (22)
|
||||
== releasing non-file fails
|
||||
ioctl failed: Inappropriate ioctl for device (25)
|
||||
release: must provide file version --data-version
|
||||
Try `release --help' or `release --usage' for more information.
|
||||
== releasing a non-scoutfs file fails
|
||||
ioctl failed: Inappropriate ioctl for device (25)
|
||||
release: must provide file version --data-version
|
||||
Try `release --help' or `release --usage' for more information.
|
||||
== releasing bad version fails
|
||||
release: must provide file version --data-version
|
||||
Try `release --help' or `release --usage' for more information.
|
||||
== verify small release merging
|
||||
0 0 0: (0 0 1) (1 101 4)
|
||||
0 0 1: (0 0 2) (2 102 3)
|
||||
0 0 2: (0 0 1) (1 101 1) (2 0 1) (3 103 2)
|
||||
0 0 3: (0 0 1) (1 101 2) (3 0 1) (4 104 1)
|
||||
0 0 4: (0 0 1) (1 101 3) (4 0 1)
|
||||
0 1 0: (0 0 2) (2 102 3)
|
||||
0 1 1: (0 0 2) (2 102 3)
|
||||
0 1 2: (0 0 3) (3 103 2)
|
||||
0 1 3: (0 0 2) (2 102 1) (3 0 1) (4 104 1)
|
||||
0 1 4: (0 0 2) (2 102 2) (4 0 1)
|
||||
0 2 0: (0 0 1) (1 101 1) (2 0 1) (3 103 2)
|
||||
0 2 1: (0 0 3) (3 103 2)
|
||||
0 2 2: (0 0 1) (1 101 1) (2 0 1) (3 103 2)
|
||||
0 2 3: (0 0 1) (1 101 1) (2 0 2) (4 104 1)
|
||||
0 2 4: (0 0 1) (1 101 1) (2 0 1) (3 103 1) (4 0 1)
|
||||
0 3 0: (0 0 1) (1 101 2) (3 0 1) (4 104 1)
|
||||
0 3 1: (0 0 2) (2 102 1) (3 0 1) (4 104 1)
|
||||
0 3 2: (0 0 1) (1 101 1) (2 0 2) (4 104 1)
|
||||
0 3 3: (0 0 1) (1 101 2) (3 0 1) (4 104 1)
|
||||
0 3 4: (0 0 1) (1 101 2) (3 0 2)
|
||||
0 4 0: (0 0 1) (1 101 3) (4 0 1)
|
||||
0 4 1: (0 0 2) (2 102 2) (4 0 1)
|
||||
0 4 2: (0 0 1) (1 101 1) (2 0 1) (3 103 1) (4 0 1)
|
||||
0 4 3: (0 0 1) (1 101 2) (3 0 2)
|
||||
0 4 4: (0 0 1) (1 101 3) (4 0 1)
|
||||
1 0 0: (0 0 2) (2 102 3)
|
||||
1 0 1: (0 0 2) (2 102 3)
|
||||
1 0 2: (0 0 3) (3 103 2)
|
||||
1 0 3: (0 0 2) (2 102 1) (3 0 1) (4 104 1)
|
||||
1 0 4: (0 0 2) (2 102 2) (4 0 1)
|
||||
1 1 0: (0 0 2) (2 102 3)
|
||||
1 1 1: (0 100 1) (1 0 1) (2 102 3)
|
||||
1 1 2: (0 100 1) (1 0 2) (3 103 2)
|
||||
1 1 3: (0 100 1) (1 0 1) (2 102 1) (3 0 1) (4 104 1)
|
||||
1 1 4: (0 100 1) (1 0 1) (2 102 2) (4 0 1)
|
||||
1 2 0: (0 0 3) (3 103 2)
|
||||
1 2 1: (0 100 1) (1 0 2) (3 103 2)
|
||||
1 2 2: (0 100 1) (1 0 2) (3 103 2)
|
||||
1 2 3: (0 100 1) (1 0 3) (4 104 1)
|
||||
1 2 4: (0 100 1) (1 0 2) (3 103 1) (4 0 1)
|
||||
1 3 0: (0 0 2) (2 102 1) (3 0 1) (4 104 1)
|
||||
1 3 1: (0 100 1) (1 0 1) (2 102 1) (3 0 1) (4 104 1)
|
||||
1 3 2: (0 100 1) (1 0 3) (4 104 1)
|
||||
1 3 3: (0 100 1) (1 0 1) (2 102 1) (3 0 1) (4 104 1)
|
||||
1 3 4: (0 100 1) (1 0 1) (2 102 1) (3 0 2)
|
||||
1 4 0: (0 0 2) (2 102 2) (4 0 1)
|
||||
1 4 1: (0 100 1) (1 0 1) (2 102 2) (4 0 1)
|
||||
1 4 2: (0 100 1) (1 0 2) (3 103 1) (4 0 1)
|
||||
1 4 3: (0 100 1) (1 0 1) (2 102 1) (3 0 2)
|
||||
1 4 4: (0 100 1) (1 0 1) (2 102 2) (4 0 1)
|
||||
2 0 0: (0 0 1) (1 101 1) (2 0 1) (3 103 2)
|
||||
2 0 1: (0 0 3) (3 103 2)
|
||||
2 0 2: (0 0 1) (1 101 1) (2 0 1) (3 103 2)
|
||||
2 0 3: (0 0 1) (1 101 1) (2 0 2) (4 104 1)
|
||||
2 0 4: (0 0 1) (1 101 1) (2 0 1) (3 103 1) (4 0 1)
|
||||
2 1 0: (0 0 3) (3 103 2)
|
||||
2 1 1: (0 100 1) (1 0 2) (3 103 2)
|
||||
2 1 2: (0 100 1) (1 0 2) (3 103 2)
|
||||
2 1 3: (0 100 1) (1 0 3) (4 104 1)
|
||||
2 1 4: (0 100 1) (1 0 2) (3 103 1) (4 0 1)
|
||||
2 2 0: (0 0 1) (1 101 1) (2 0 1) (3 103 2)
|
||||
2 2 1: (0 100 1) (1 0 2) (3 103 2)
|
||||
2 2 2: (0 100 2) (2 0 1) (3 103 2)
|
||||
2 2 3: (0 100 2) (2 0 2) (4 104 1)
|
||||
2 2 4: (0 100 2) (2 0 1) (3 103 1) (4 0 1)
|
||||
2 3 0: (0 0 1) (1 101 1) (2 0 2) (4 104 1)
|
||||
2 3 1: (0 100 1) (1 0 3) (4 104 1)
|
||||
2 3 2: (0 100 2) (2 0 2) (4 104 1)
|
||||
2 3 3: (0 100 2) (2 0 2) (4 104 1)
|
||||
2 3 4: (0 100 2) (2 0 3)
|
||||
2 4 0: (0 0 1) (1 101 1) (2 0 1) (3 103 1) (4 0 1)
|
||||
2 4 1: (0 100 1) (1 0 2) (3 103 1) (4 0 1)
|
||||
2 4 2: (0 100 2) (2 0 1) (3 103 1) (4 0 1)
|
||||
2 4 3: (0 100 2) (2 0 3)
|
||||
2 4 4: (0 100 2) (2 0 1) (3 103 1) (4 0 1)
|
||||
3 0 0: (0 0 1) (1 101 2) (3 0 1) (4 104 1)
|
||||
3 0 1: (0 0 2) (2 102 1) (3 0 1) (4 104 1)
|
||||
3 0 2: (0 0 1) (1 101 1) (2 0 2) (4 104 1)
|
||||
3 0 3: (0 0 1) (1 101 2) (3 0 1) (4 104 1)
|
||||
3 0 4: (0 0 1) (1 101 2) (3 0 2)
|
||||
3 1 0: (0 0 2) (2 102 1) (3 0 1) (4 104 1)
|
||||
3 1 1: (0 100 1) (1 0 1) (2 102 1) (3 0 1) (4 104 1)
|
||||
3 1 2: (0 100 1) (1 0 3) (4 104 1)
|
||||
3 1 3: (0 100 1) (1 0 1) (2 102 1) (3 0 1) (4 104 1)
|
||||
3 1 4: (0 100 1) (1 0 1) (2 102 1) (3 0 2)
|
||||
3 2 0: (0 0 1) (1 101 1) (2 0 2) (4 104 1)
|
||||
3 2 1: (0 100 1) (1 0 3) (4 104 1)
|
||||
3 2 2: (0 100 2) (2 0 2) (4 104 1)
|
||||
3 2 3: (0 100 2) (2 0 2) (4 104 1)
|
||||
3 2 4: (0 100 2) (2 0 3)
|
||||
3 3 0: (0 0 1) (1 101 2) (3 0 1) (4 104 1)
|
||||
3 3 1: (0 100 1) (1 0 1) (2 102 1) (3 0 1) (4 104 1)
|
||||
3 3 2: (0 100 2) (2 0 2) (4 104 1)
|
||||
3 3 3: (0 100 3) (3 0 1) (4 104 1)
|
||||
3 3 4: (0 100 3) (3 0 2)
|
||||
3 4 0: (0 0 1) (1 101 2) (3 0 2)
|
||||
3 4 1: (0 100 1) (1 0 1) (2 102 1) (3 0 2)
|
||||
3 4 2: (0 100 2) (2 0 3)
|
||||
3 4 3: (0 100 3) (3 0 2)
|
||||
3 4 4: (0 100 3) (3 0 2)
|
||||
4 0 0: (0 0 1) (1 101 3) (4 0 1)
|
||||
4 0 1: (0 0 2) (2 102 2) (4 0 1)
|
||||
4 0 2: (0 0 1) (1 101 1) (2 0 1) (3 103 1) (4 0 1)
|
||||
4 0 3: (0 0 1) (1 101 2) (3 0 2)
|
||||
4 0 4: (0 0 1) (1 101 3) (4 0 1)
|
||||
4 1 0: (0 0 2) (2 102 2) (4 0 1)
|
||||
4 1 1: (0 100 1) (1 0 1) (2 102 2) (4 0 1)
|
||||
4 1 2: (0 100 1) (1 0 2) (3 103 1) (4 0 1)
|
||||
4 1 3: (0 100 1) (1 0 1) (2 102 1) (3 0 2)
|
||||
4 1 4: (0 100 1) (1 0 1) (2 102 2) (4 0 1)
|
||||
4 2 0: (0 0 1) (1 101 1) (2 0 1) (3 103 1) (4 0 1)
|
||||
4 2 1: (0 100 1) (1 0 2) (3 103 1) (4 0 1)
|
||||
4 2 2: (0 100 2) (2 0 1) (3 103 1) (4 0 1)
|
||||
4 2 3: (0 100 2) (2 0 3)
|
||||
4 2 4: (0 100 2) (2 0 1) (3 103 1) (4 0 1)
|
||||
4 3 0: (0 0 1) (1 101 2) (3 0 2)
|
||||
4 3 1: (0 100 1) (1 0 1) (2 102 1) (3 0 2)
|
||||
4 3 2: (0 100 2) (2 0 3)
|
||||
4 3 3: (0 100 3) (3 0 2)
|
||||
4 3 4: (0 100 3) (3 0 2)
|
||||
4 4 0: (0 0 1) (1 101 3) (4 0 1)
|
||||
4 4 1: (0 100 1) (1 0 1) (2 102 2) (4 0 1)
|
||||
4 4 2: (0 100 2) (2 0 1) (3 103 1) (4 0 1)
|
||||
4 4 3: (0 100 3) (3 0 2)
|
||||
4 4 4: (0 100 4) (4 0 1)
|
||||
23
tests/golden/simple-staging
Normal file
23
tests/golden/simple-staging
Normal file
@@ -0,0 +1,23 @@
|
||||
== create/release/stage single block file
|
||||
== create/release/stage larger file
|
||||
== multiple release,drop_cache,stage cycles
|
||||
== release+stage shouldn't change stat, data seq or vers
|
||||
== stage does change meta_seq
|
||||
== can't use stage to extend online file
|
||||
stage: must provide file version with --data-version
|
||||
Try `stage --help' or `stage --usage' for more information.
|
||||
== wrapped region fails
|
||||
stage returned -1, not 4096: error Invalid argument (22)
|
||||
scoutfs: stage failed: Input/output error (5)
|
||||
== non-block aligned offset fails
|
||||
stage returned -1, not 4095: error Invalid argument (22)
|
||||
scoutfs: stage failed: Input/output error (5)
|
||||
== non-block aligned len within block fails
|
||||
stage returned -1, not 1024: error Invalid argument (22)
|
||||
scoutfs: stage failed: Input/output error (5)
|
||||
== partial final block that writes to i_size does work
|
||||
== zero length stage doesn't bring blocks online
|
||||
== stage of non-regular file fails
|
||||
ioctl failed: Inappropriate ioctl for device (25)
|
||||
stage: must provide file version with --data-version
|
||||
Try `stage --help' or `stage --usage' for more information.
|
||||
18
tests/golden/simple-xattr-unit
Normal file
18
tests/golden/simple-xattr-unit
Normal file
@@ -0,0 +1,18 @@
|
||||
=== XATTR_ flag combinations
|
||||
dumb_setxattr -p /mnt/test/test/simple-xattr-unit/file -n user.test -v val -c -r
|
||||
returned -1 errno 22 (Invalid argument)
|
||||
dumb_setxattr -p /mnt/test/test/simple-xattr-unit/file -n user.test -v val -r
|
||||
returned -1 errno 61 (No data available)
|
||||
dumb_setxattr -p /mnt/test/test/simple-xattr-unit/file -n user.test -v val -c
|
||||
returned 0
|
||||
dumb_setxattr -p /mnt/test/test/simple-xattr-unit/file -n user.test -v val -c
|
||||
returned -1 errno 17 (File exists)
|
||||
dumb_setxattr -p /mnt/test/test/simple-xattr-unit/file -n user.test -v val -r
|
||||
returned 0
|
||||
=== bad lengths
|
||||
setfattr: /mnt/test/test/simple-xattr-unit/file: Operation not supported
|
||||
setfattr: /mnt/test/test/simple-xattr-unit/file: Numerical result out of range
|
||||
setfattr: /mnt/test/test/simple-xattr-unit/file: Numerical result out of range
|
||||
setfattr: /mnt/test/test/simple-xattr-unit/file: Argument list too long
|
||||
=== good length boundaries
|
||||
=== 500 random lengths
|
||||
13
tests/golden/srch-basic-functionality
Normal file
13
tests/golden/srch-basic-functionality
Normal file
@@ -0,0 +1,13 @@
|
||||
== create new xattrs
|
||||
== update existing xattr
|
||||
== remove an xattr
|
||||
== remove xattr with files
|
||||
== create entries in current log
|
||||
== delete small fraction
|
||||
== remove files
|
||||
== create entries that exceed one log
|
||||
== delete fractions in phases
|
||||
== remove files
|
||||
== create entries for exceed search entry limit
|
||||
== delete half
|
||||
== entirely remove third batch
|
||||
0
tests/golden/stage-multi-part
Normal file
0
tests/golden/stage-multi-part
Normal file
2
tests/golden/stage-release-race-alloc
Normal file
2
tests/golden/stage-release-race-alloc
Normal file
@@ -0,0 +1,2 @@
|
||||
== create initial files
|
||||
== race stage and release
|
||||
11
tests/golden/stale-btree-read
Normal file
11
tests/golden/stale-btree-read
Normal file
@@ -0,0 +1,11 @@
|
||||
== create file for xattr ping pong
|
||||
# file: /mnt/test/test/stale-btree-read/file
|
||||
user.xat="initial"
|
||||
|
||||
== retry btree block read
|
||||
trigger btree_stale_read armed: 1
|
||||
# file: /mnt/test/test/stale-btree-read/file
|
||||
user.xat="btree"
|
||||
|
||||
trigger btree_stale_read after: 0
|
||||
counter btree_stale_read diff 1
|
||||
281
tests/golden/xfstests
Normal file
281
tests/golden/xfstests
Normal file
@@ -0,0 +1,281 @@
|
||||
Ran:
|
||||
generic/001
|
||||
generic/002
|
||||
generic/005
|
||||
generic/006
|
||||
generic/007
|
||||
generic/011
|
||||
generic/013
|
||||
generic/014
|
||||
generic/020
|
||||
generic/028
|
||||
generic/032
|
||||
generic/034
|
||||
generic/035
|
||||
generic/037
|
||||
generic/039
|
||||
generic/040
|
||||
generic/041
|
||||
generic/053
|
||||
generic/056
|
||||
generic/057
|
||||
generic/062
|
||||
generic/065
|
||||
generic/066
|
||||
generic/067
|
||||
generic/069
|
||||
generic/070
|
||||
generic/071
|
||||
generic/073
|
||||
generic/076
|
||||
generic/084
|
||||
generic/086
|
||||
generic/087
|
||||
generic/088
|
||||
generic/090
|
||||
generic/092
|
||||
generic/098
|
||||
generic/101
|
||||
generic/104
|
||||
generic/106
|
||||
generic/107
|
||||
generic/117
|
||||
generic/124
|
||||
generic/129
|
||||
generic/131
|
||||
generic/169
|
||||
generic/184
|
||||
generic/221
|
||||
generic/228
|
||||
generic/236
|
||||
generic/245
|
||||
generic/249
|
||||
generic/257
|
||||
generic/258
|
||||
generic/286
|
||||
generic/294
|
||||
generic/306
|
||||
generic/307
|
||||
generic/308
|
||||
generic/309
|
||||
generic/313
|
||||
generic/315
|
||||
generic/322
|
||||
generic/335
|
||||
generic/336
|
||||
generic/337
|
||||
generic/341
|
||||
generic/342
|
||||
generic/343
|
||||
generic/348
|
||||
generic/360
|
||||
generic/376
|
||||
generic/377
|
||||
Not
|
||||
run:
|
||||
generic/004
|
||||
generic/008
|
||||
generic/009
|
||||
generic/012
|
||||
generic/015
|
||||
generic/016
|
||||
generic/018
|
||||
generic/021
|
||||
generic/022
|
||||
generic/026
|
||||
generic/031
|
||||
generic/033
|
||||
generic/050
|
||||
generic/052
|
||||
generic/058
|
||||
generic/059
|
||||
generic/060
|
||||
generic/061
|
||||
generic/063
|
||||
generic/064
|
||||
generic/079
|
||||
generic/081
|
||||
generic/082
|
||||
generic/091
|
||||
generic/094
|
||||
generic/096
|
||||
generic/110
|
||||
generic/111
|
||||
generic/113
|
||||
generic/114
|
||||
generic/115
|
||||
generic/116
|
||||
generic/118
|
||||
generic/119
|
||||
generic/121
|
||||
generic/122
|
||||
generic/123
|
||||
generic/128
|
||||
generic/130
|
||||
generic/134
|
||||
generic/135
|
||||
generic/136
|
||||
generic/138
|
||||
generic/139
|
||||
generic/140
|
||||
generic/142
|
||||
generic/143
|
||||
generic/144
|
||||
generic/145
|
||||
generic/146
|
||||
generic/147
|
||||
generic/148
|
||||
generic/149
|
||||
generic/150
|
||||
generic/151
|
||||
generic/152
|
||||
generic/153
|
||||
generic/154
|
||||
generic/155
|
||||
generic/156
|
||||
generic/157
|
||||
generic/158
|
||||
generic/159
|
||||
generic/160
|
||||
generic/161
|
||||
generic/162
|
||||
generic/163
|
||||
generic/171
|
||||
generic/172
|
||||
generic/173
|
||||
generic/174
|
||||
generic/177
|
||||
generic/178
|
||||
generic/179
|
||||
generic/180
|
||||
generic/181
|
||||
generic/182
|
||||
generic/183
|
||||
generic/185
|
||||
generic/188
|
||||
generic/189
|
||||
generic/190
|
||||
generic/191
|
||||
generic/193
|
||||
generic/194
|
||||
generic/195
|
||||
generic/196
|
||||
generic/197
|
||||
generic/198
|
||||
generic/199
|
||||
generic/200
|
||||
generic/201
|
||||
generic/202
|
||||
generic/203
|
||||
generic/205
|
||||
generic/206
|
||||
generic/207
|
||||
generic/210
|
||||
generic/211
|
||||
generic/212
|
||||
generic/214
|
||||
generic/216
|
||||
generic/217
|
||||
generic/218
|
||||
generic/219
|
||||
generic/220
|
||||
generic/222
|
||||
generic/223
|
||||
generic/225
|
||||
generic/227
|
||||
generic/229
|
||||
generic/230
|
||||
generic/235
|
||||
generic/238
|
||||
generic/240
|
||||
generic/244
|
||||
generic/250
|
||||
generic/252
|
||||
generic/253
|
||||
generic/254
|
||||
generic/255
|
||||
generic/256
|
||||
generic/259
|
||||
generic/260
|
||||
generic/261
|
||||
generic/262
|
||||
generic/263
|
||||
generic/264
|
||||
generic/265
|
||||
generic/266
|
||||
generic/267
|
||||
generic/268
|
||||
generic/271
|
||||
generic/272
|
||||
generic/276
|
||||
generic/277
|
||||
generic/278
|
||||
generic/279
|
||||
generic/281
|
||||
generic/282
|
||||
generic/283
|
||||
generic/284
|
||||
generic/287
|
||||
generic/288
|
||||
generic/289
|
||||
generic/290
|
||||
generic/291
|
||||
generic/292
|
||||
generic/293
|
||||
generic/295
|
||||
generic/296
|
||||
generic/301
|
||||
generic/302
|
||||
generic/303
|
||||
generic/304
|
||||
generic/305
|
||||
generic/312
|
||||
generic/314
|
||||
generic/316
|
||||
generic/317
|
||||
generic/318
|
||||
generic/324
|
||||
generic/326
|
||||
generic/327
|
||||
generic/328
|
||||
generic/329
|
||||
generic/330
|
||||
generic/331
|
||||
generic/332
|
||||
generic/353
|
||||
generic/355
|
||||
generic/356
|
||||
generic/357
|
||||
generic/358
|
||||
generic/359
|
||||
generic/361
|
||||
generic/362
|
||||
generic/363
|
||||
generic/364
|
||||
generic/365
|
||||
generic/366
|
||||
generic/367
|
||||
generic/368
|
||||
generic/369
|
||||
generic/370
|
||||
generic/371
|
||||
generic/372
|
||||
generic/373
|
||||
generic/374
|
||||
generic/378
|
||||
generic/379
|
||||
generic/380
|
||||
generic/381
|
||||
generic/382
|
||||
generic/383
|
||||
generic/384
|
||||
generic/385
|
||||
generic/386
|
||||
shared/001
|
||||
shared/002
|
||||
shared/003
|
||||
shared/004
|
||||
shared/032
|
||||
shared/051
|
||||
shared/289
|
||||
Passed all 72 tests
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user