mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-11 14:10:26 +00:00
Compare commits
66 Commits
zab/check
...
greg/per-k
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
c6ed82f19c | ||
|
|
c122c6823f | ||
|
|
dfa6a8c78b | ||
|
|
b66e52f3f8 | ||
|
|
fb93d82b1e | ||
|
|
49acbb4415 | ||
|
|
7b039a1d18 | ||
|
|
ccd65b9a61 | ||
|
|
aeb1dbc5f5 | ||
|
|
e20d3ae1e8 | ||
|
|
3228749957 | ||
|
|
db445ce517 | ||
|
|
bb5d98730b | ||
|
|
cb0838a0ef | ||
|
|
7eaed848ed | ||
|
|
267c1cc2d5 | ||
|
|
c6b92329b3 | ||
|
|
91e7f051cf | ||
|
|
7645f04363 | ||
|
|
8c06302984 | ||
|
|
1bc83e9e2d | ||
|
|
38c6d66ffc | ||
|
|
6a17dc335f | ||
|
|
e0bb6ca481 | ||
|
|
38e6f11ee4 | ||
|
|
442980f1c9 | ||
|
|
82c2d0b1d0 | ||
|
|
4a8240748e | ||
|
|
60ca950f42 | ||
|
|
9c45e8b7ef | ||
|
|
ee9e8c3e1a | ||
|
|
5f156b7a36 | ||
|
|
3a51ca369b | ||
|
|
460f3ce503 | ||
|
|
fb5331a1d9 | ||
|
|
5a53e7144d | ||
|
|
a23877b150 | ||
|
|
5ccdf3c9f0 | ||
|
|
270726a6ea | ||
|
|
de304628ea | ||
|
|
6a99ca9ede | ||
|
|
0521bd0e6b | ||
|
|
361491846d | ||
|
|
9ba4271c26 | ||
|
|
90cfaf17d1 | ||
|
|
6931cb7b0e | ||
|
|
7d4db05445 | ||
|
|
7b71250072 | ||
|
|
8e37be279c | ||
|
|
d6642da44d | ||
|
|
4b87045447 | ||
|
|
3f773a8594 | ||
|
|
c385eea9a1 | ||
|
|
c296bc1959 | ||
|
|
3052feac29 | ||
|
|
1fa0d7727c | ||
|
|
2af6f47c8b | ||
|
|
6db69b7a4f | ||
|
|
8ca1f1994d | ||
|
|
48716461e4 | ||
|
|
965b692bdc | ||
|
|
c3c4b08038 | ||
|
|
0519830229 | ||
|
|
4d6e1a14ae | ||
|
|
fc3e061ea8 | ||
|
|
a4bc3fb27d |
@@ -1,6 +1,36 @@
|
||||
Versity ScoutFS Release Notes
|
||||
=============================
|
||||
|
||||
---
|
||||
v1.21
|
||||
\
|
||||
*Jul 1, 2024*
|
||||
|
||||
This release adds features that rely on incompatible changes to
|
||||
structure the file system. The process of advancing the format version
|
||||
to enable these features is described in scoutfs(5).
|
||||
|
||||
Added the ".indx." extended attribute tag which can be used to determine
|
||||
the sorting of files in a global index.
|
||||
|
||||
Added ScoutFS quotas which let rules define file size and count limits
|
||||
in terms of ".totl." extended attribute totals.
|
||||
|
||||
Added the project ID file attribute which is inherited from parent
|
||||
directories on creation. ScoutFS quota rules can reference project IDs.
|
||||
|
||||
Add a retention attribute for files which prevents modification once
|
||||
enabled.
|
||||
|
||||
---
|
||||
v1.20
|
||||
\
|
||||
*Apr 22, 2024*
|
||||
|
||||
Minor changes to packaging to better support "weak" module linking of
|
||||
the kernel module, and to including git hashes in the built package. No
|
||||
changes in runtime behaviour.
|
||||
|
||||
---
|
||||
v1.19
|
||||
\
|
||||
|
||||
@@ -12,17 +12,22 @@ else
|
||||
SP = @:
|
||||
endif
|
||||
|
||||
SCOUTFS_GIT_DESCRIBE := \
|
||||
SCOUTFS_GIT_DESCRIBE ?= \
|
||||
$(shell git describe --all --abbrev=6 --long 2>/dev/null || \
|
||||
echo no-git)
|
||||
|
||||
ESCAPED_GIT_DESCRIBE := \
|
||||
$(shell echo $(SCOUTFS_GIT_DESCRIBE) |sed -e 's/\//\\\//g')
|
||||
|
||||
RPM_GITHASH ?= $(shell git rev-parse --short HEAD)
|
||||
|
||||
SCOUTFS_ARGS := SCOUTFS_GIT_DESCRIBE=$(SCOUTFS_GIT_DESCRIBE) \
|
||||
RPM_GITHASH=$(RPM_GITHASH) \
|
||||
CONFIG_SCOUTFS_FS=m -C $(SK_KSRC) M=$(CURDIR)/src \
|
||||
EXTRA_CFLAGS="-Werror"
|
||||
|
||||
# - We use the git describe from tags to set up the RPM versioning
|
||||
RPM_VERSION := $(shell git describe --long --tags | awk -F '-' '{gsub(/^v/,""); print $$1}')
|
||||
RPM_GITHASH := $(shell git rev-parse --short HEAD)
|
||||
TARFILE = scoutfs-kmod-$(RPM_VERSION).tar
|
||||
|
||||
|
||||
@@ -41,7 +46,8 @@ modules_install:
|
||||
|
||||
%.spec: %.spec.in .FORCE
|
||||
sed -e 's/@@VERSION@@/$(RPM_VERSION)/g' \
|
||||
-e 's/@@GITHASH@@/$(RPM_GITHASH)/g' < $< > $@+
|
||||
-e 's/@@GITHASH@@/$(RPM_GITHASH)/g' \
|
||||
-e 's/@@GITDESCRIBE@@/$(ESCAPED_GIT_DESCRIBE)/g' < $< > $@+
|
||||
mv $@+ $@
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
%define kmod_name scoutfs
|
||||
%define kmod_version @@VERSION@@
|
||||
%define kmod_git_hash @@GITHASH@@
|
||||
%define kmod_git_describe @@GITDESCRIBE@@
|
||||
%define pkg_date %(date +%%Y%%m%%d)
|
||||
|
||||
# Disable the building of the debug package(s).
|
||||
@@ -9,6 +10,7 @@
|
||||
# take kernel version or default to uname -r
|
||||
%{!?kversion: %global kversion %(uname -r)}
|
||||
%global kernel_version %{kversion}
|
||||
%global sanitized_kversion %(echo %{kernel_version} | sed -e 's/-/_/g' -e 's/\.el.*//')
|
||||
|
||||
%if 0%{?el7}
|
||||
%global kernel_source() /usr/src/kernels/%{kernel_version}.$(arch)
|
||||
@@ -27,7 +29,7 @@ Name: kmod-%{kmod_name}
|
||||
%endif
|
||||
Summary: %{kmod_name} kernel module
|
||||
Version: %{kmod_version}
|
||||
Release: %{_release}%{?dist}
|
||||
Release: %{_release}+%{sanitized_kversion}%{?dist}
|
||||
License: GPLv2
|
||||
Group: System/Kernel
|
||||
URL: http://scoutfs.org/
|
||||
@@ -75,7 +77,7 @@ echo "Building for kernel: %{kernel_version} flavors: '%{flavors_to_build}'"
|
||||
for flavor in %flavors_to_build; do
|
||||
rm -rf obj/$flavor
|
||||
cp -r source obj/$flavor
|
||||
make SK_KSRC=%{kernel_source $flavor} -C obj/$flavor module
|
||||
make RPM_GITHASH=%{kmod_git_hash} SCOUTFS_GIT_DESCRIBE=%{kmod_git_describe} SK_KSRC=%{kernel_source $flavor} -C obj/$flavor module
|
||||
done
|
||||
|
||||
%install
|
||||
@@ -97,10 +99,21 @@ find %{buildroot} -type f -name \*.ko -exec %{__chmod} u+x \{\} \;
|
||||
/lib/modules
|
||||
|
||||
%post
|
||||
weak-modules --add-kernel --no-initramfs
|
||||
echo /lib/modules/%{kversion}/%{install_mod_dir}/scoutfs.ko | weak-modules --add-modules --no-initramfs
|
||||
depmod -a
|
||||
%endif
|
||||
|
||||
%clean
|
||||
rm -rf %{buildroot}
|
||||
|
||||
%preun
|
||||
# stash our modules for postun cleanup
|
||||
SCOUTFS_RPM_NAME=$(rpm -q %{name} | grep "%{version}-%{release}")
|
||||
rpm -ql $SCOUTFS_RPM_NAME | grep '\.ko$' > /var/run/%{name}-modules-%{version}-%{release} || true
|
||||
|
||||
%postun
|
||||
if [ -x /sbin/weak-modules ]; then
|
||||
cat /var/run/%{name}-modules-%{version}-%{release} | /sbin/weak-modules --remove-modules --no-initramfs
|
||||
fi
|
||||
|
||||
rm /var/run/%{name}-modules-%{version}-%{release} || true
|
||||
|
||||
@@ -9,6 +9,7 @@ CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include
|
||||
|
||||
scoutfs-y += \
|
||||
acl.o \
|
||||
attr_x.o \
|
||||
avl.o \
|
||||
alloc.o \
|
||||
block.o \
|
||||
@@ -34,6 +35,7 @@ scoutfs-y += \
|
||||
options.o \
|
||||
per_task.o \
|
||||
quorum.o \
|
||||
quota.o \
|
||||
recov.o \
|
||||
scoutfs_trace.o \
|
||||
server.o \
|
||||
@@ -42,10 +44,12 @@ scoutfs-y += \
|
||||
srch.o \
|
||||
super.o \
|
||||
sysfs.o \
|
||||
totl.o \
|
||||
trans.o \
|
||||
triggers.o \
|
||||
tseq.o \
|
||||
volopt.o \
|
||||
wkic.o \
|
||||
xattr.o
|
||||
|
||||
#
|
||||
|
||||
@@ -98,11 +98,9 @@ struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct s
|
||||
acl = ERR_PTR(ret);
|
||||
}
|
||||
|
||||
#ifndef KC___POSIX_ACL_CREATE
|
||||
/* can set null negative cache */
|
||||
if (!IS_ERR(acl))
|
||||
set_cached_acl(inode, type, acl);
|
||||
#endif
|
||||
|
||||
kfree(value);
|
||||
|
||||
@@ -194,10 +192,8 @@ int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
|
||||
}
|
||||
|
||||
out:
|
||||
#ifndef KC___POSIX_ACL_CREATE
|
||||
if (!ret)
|
||||
set_cached_acl(inode, type, acl);
|
||||
#endif
|
||||
|
||||
kfree(value);
|
||||
|
||||
|
||||
252
kmod/src/attr_x.c
Normal file
252
kmod/src/attr_x.c
Normal file
@@ -0,0 +1,252 @@
|
||||
/*
|
||||
* Copyright (C) 2024 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "super.h"
|
||||
#include "inode.h"
|
||||
#include "ioctl.h"
|
||||
#include "lock.h"
|
||||
#include "trans.h"
|
||||
#include "attr_x.h"
|
||||
|
||||
static int validate_attr_x_input(struct super_block *sb, struct scoutfs_ioctl_inode_attr_x *iax)
|
||||
{
|
||||
int ret;
|
||||
|
||||
if ((iax->x_mask & SCOUTFS_IOC_IAX__UNKNOWN) ||
|
||||
(iax->x_flags & SCOUTFS_IOC_IAX_F__UNKNOWN))
|
||||
return -EINVAL;
|
||||
|
||||
if ((iax->x_mask & SCOUTFS_IOC_IAX_RETENTION) &&
|
||||
(ret = scoutfs_fmt_vers_unsupported(sb, SCOUTFS_FORMAT_VERSION_FEAT_RETENTION)))
|
||||
return ret;
|
||||
|
||||
if ((iax->x_mask & SCOUTFS_IOC_IAX_PROJECT_ID) &&
|
||||
(ret = scoutfs_fmt_vers_unsupported(sb, SCOUTFS_FORMAT_VERSION_FEAT_PROJECT_ID)))
|
||||
return ret;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the mask indicates interest in the given attr then set the field
|
||||
* to the caller's value and return the new size if it didn't already
|
||||
* include the attr field.
|
||||
*/
|
||||
#define fill_attr(size, iax, bit, field, val) \
|
||||
({ \
|
||||
__typeof__(iax) _iax = (iax); \
|
||||
__typeof__(size) _size = (size); \
|
||||
\
|
||||
if (_iax->x_mask & (bit)) { \
|
||||
_iax->field = (val); \
|
||||
_size = max(_size, offsetof(struct scoutfs_ioctl_inode_attr_x, field) + \
|
||||
sizeof_field(struct scoutfs_ioctl_inode_attr_x, field)); \
|
||||
} \
|
||||
\
|
||||
_size; \
|
||||
})
|
||||
|
||||
/*
|
||||
* Returns -errno on error, or >= number of bytes filled by the
|
||||
* response. 0 can be returned if no attributes are requested in the
|
||||
* input x_mask.
|
||||
*/
|
||||
int scoutfs_get_attr_x(struct inode *inode, struct scoutfs_ioctl_inode_attr_x *iax)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
size_t size = 0;
|
||||
u64 offline;
|
||||
u64 online;
|
||||
u64 bits;
|
||||
int ret;
|
||||
|
||||
if (iax->x_mask == 0) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = validate_attr_x_input(sb, iax);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
inode_lock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_META_SEQ,
|
||||
meta_seq, scoutfs_inode_meta_seq(inode));
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_DATA_SEQ,
|
||||
data_seq, scoutfs_inode_data_seq(inode));
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_DATA_VERSION,
|
||||
data_version, scoutfs_inode_data_version(inode));
|
||||
if (iax->x_mask & (SCOUTFS_IOC_IAX_ONLINE_BLOCKS | SCOUTFS_IOC_IAX_OFFLINE_BLOCKS)) {
|
||||
scoutfs_inode_get_onoff(inode, &online, &offline);
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_ONLINE_BLOCKS,
|
||||
online_blocks, online);
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_OFFLINE_BLOCKS,
|
||||
offline_blocks, offline);
|
||||
}
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CTIME, ctime_sec, inode->i_ctime.tv_sec);
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CTIME, ctime_nsec, inode->i_ctime.tv_nsec);
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CRTIME, crtime_sec, si->crtime.tv_sec);
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_CRTIME, crtime_nsec, si->crtime.tv_nsec);
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_SIZE, size, i_size_read(inode));
|
||||
if (iax->x_mask & SCOUTFS_IOC_IAX__BITS) {
|
||||
bits = 0;
|
||||
if ((iax->x_mask & SCOUTFS_IOC_IAX_RETENTION) &&
|
||||
(scoutfs_inode_get_flags(inode) & SCOUTFS_INO_FLAG_RETENTION))
|
||||
bits |= SCOUTFS_IOC_IAX_B_RETENTION;
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX__BITS, bits, bits);
|
||||
}
|
||||
size = fill_attr(size, iax, SCOUTFS_IOC_IAX_PROJECT_ID,
|
||||
project_id, scoutfs_inode_get_proj(inode));
|
||||
|
||||
ret = size;
|
||||
unlock:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
|
||||
inode_unlock(inode);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool valid_attr_changes(struct inode *inode, struct scoutfs_ioctl_inode_attr_x *iax)
|
||||
{
|
||||
/* provided data_version must be non-zero */
|
||||
if ((iax->x_mask & SCOUTFS_IOC_IAX_DATA_VERSION) && (iax->data_version == 0))
|
||||
return false;
|
||||
|
||||
/* can only set size or data version in new regular files */
|
||||
if (((iax->x_mask & SCOUTFS_IOC_IAX_SIZE) ||
|
||||
(iax->x_mask & SCOUTFS_IOC_IAX_DATA_VERSION)) &&
|
||||
(!S_ISREG(inode->i_mode) || scoutfs_inode_data_version(inode) != 0))
|
||||
return false;
|
||||
|
||||
/* must provide non-zero data_version with non-zero size */
|
||||
if (((iax->x_mask & SCOUTFS_IOC_IAX_SIZE) && (iax->size > 0)) &&
|
||||
(!(iax->x_mask & SCOUTFS_IOC_IAX_DATA_VERSION) || (iax->data_version == 0)))
|
||||
return false;
|
||||
|
||||
/* must provide non-zero size when setting offline extents to that size */
|
||||
if ((iax->x_flags & SCOUTFS_IOC_IAX_F_SIZE_OFFLINE) &&
|
||||
(!(iax->x_mask & SCOUTFS_IOC_IAX_SIZE) || (iax->size == 0)))
|
||||
return false;
|
||||
|
||||
/* the retention bit only applies to regular files */
|
||||
if ((iax->x_mask & SCOUTFS_IOC_IAX_RETENTION) && !S_ISREG(inode->i_mode))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int scoutfs_set_attr_x(struct inode *inode, struct scoutfs_ioctl_inode_attr_x *iax)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
LIST_HEAD(ind_locks);
|
||||
bool set_data_seq;
|
||||
int ret;
|
||||
|
||||
/* initially all setting is root only, could loosen with finer grained checks */
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (iax->x_mask == 0) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = validate_attr_x_input(sb, iax);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
inode_lock(inode);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE, SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
/* check for errors before making any changes */
|
||||
if (!valid_attr_changes(inode, iax)) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* retention prevents modification unless also clearing retention */
|
||||
ret = scoutfs_inode_check_retention(inode);
|
||||
if (ret < 0 && !((iax->x_mask & SCOUTFS_IOC_IAX_RETENTION) &&
|
||||
!(iax->bits & SCOUTFS_IOC_IAX_B_RETENTION)))
|
||||
goto unlock;
|
||||
|
||||
/* setting only so we don't see 0 data seq with nonzero data_version */
|
||||
if ((iax->x_mask & SCOUTFS_IOC_IAX_DATA_VERSION) && (iax->data_version > 0))
|
||||
set_data_seq = true;
|
||||
else
|
||||
set_data_seq = false;
|
||||
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, set_data_seq, true);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = scoutfs_dirty_inode_item(inode, lock);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
|
||||
/* creating offline extent first, it might fail */
|
||||
if (iax->x_flags & SCOUTFS_IOC_IAX_F_SIZE_OFFLINE) {
|
||||
ret = scoutfs_data_init_offline_extent(inode, iax->size, lock);
|
||||
if (ret)
|
||||
goto release;
|
||||
}
|
||||
|
||||
/* make all changes once they're all checked and will succeed */
|
||||
if (iax->x_mask & SCOUTFS_IOC_IAX_DATA_VERSION)
|
||||
scoutfs_inode_set_data_version(inode, iax->data_version);
|
||||
if (iax->x_mask & SCOUTFS_IOC_IAX_SIZE)
|
||||
i_size_write(inode, iax->size);
|
||||
if (iax->x_mask & SCOUTFS_IOC_IAX_CTIME) {
|
||||
inode->i_ctime.tv_sec = iax->ctime_sec;
|
||||
inode->i_ctime.tv_nsec = iax->ctime_nsec;
|
||||
}
|
||||
if (iax->x_mask & SCOUTFS_IOC_IAX_CRTIME) {
|
||||
si->crtime.tv_sec = iax->crtime_sec;
|
||||
si->crtime.tv_nsec = iax->crtime_nsec;
|
||||
}
|
||||
if (iax->x_mask & SCOUTFS_IOC_IAX_RETENTION) {
|
||||
scoutfs_inode_set_flags(inode, ~SCOUTFS_INO_FLAG_RETENTION,
|
||||
(iax->bits & SCOUTFS_IOC_IAX_B_RETENTION) ?
|
||||
SCOUTFS_INO_FLAG_RETENTION : 0);
|
||||
}
|
||||
if (iax->x_mask & SCOUTFS_IOC_IAX_PROJECT_ID)
|
||||
scoutfs_inode_set_proj(inode, iax->project_id);
|
||||
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
ret = 0;
|
||||
release:
|
||||
scoutfs_release_trans(sb);
|
||||
unlock:
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
inode_unlock(inode);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
11
kmod/src/attr_x.h
Normal file
11
kmod/src/attr_x.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#ifndef _SCOUTFS_ATTR_X_H_
|
||||
#define _SCOUTFS_ATTR_X_H_
|
||||
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include "ioctl.h"
|
||||
|
||||
int scoutfs_get_attr_x(struct inode *inode, struct scoutfs_ioctl_inode_attr_x *iax);
|
||||
int scoutfs_set_attr_x(struct inode *inode, struct scoutfs_ioctl_inode_attr_x *iax);
|
||||
|
||||
#endif
|
||||
@@ -683,6 +683,7 @@ int scoutfs_block_read_ref(struct super_block *sb, struct scoutfs_block_ref *ref
|
||||
struct scoutfs_block_header *hdr;
|
||||
struct block_private *bp = NULL;
|
||||
bool retried = false;
|
||||
__le32 crc = 0;
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
@@ -695,7 +696,9 @@ retry:
|
||||
|
||||
/* corrupted writes might be a sign of a stale reference */
|
||||
if (!test_bit(BLOCK_BIT_CRC_VALID, &bp->bits)) {
|
||||
if (hdr->crc != block_calc_crc(hdr, SCOUTFS_BLOCK_LG_SIZE)) {
|
||||
crc = block_calc_crc(hdr, SCOUTFS_BLOCK_LG_SIZE);
|
||||
if (hdr->crc != crc) {
|
||||
trace_scoutfs_block_stale(sb, ref, hdr, magic, le32_to_cpu(crc));
|
||||
ret = -ESTALE;
|
||||
goto out;
|
||||
}
|
||||
@@ -705,6 +708,7 @@ retry:
|
||||
|
||||
if (hdr->magic != cpu_to_le32(magic) || hdr->fsid != cpu_to_le64(sbi->fsid) ||
|
||||
hdr->seq != ref->seq || hdr->blkno != ref->blkno) {
|
||||
trace_scoutfs_block_stale(sb, ref, hdr, magic, 0);
|
||||
ret = -ESTALE;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -162,6 +162,8 @@
|
||||
EXPAND_COUNTER(orphan_scan_error) \
|
||||
EXPAND_COUNTER(orphan_scan_item) \
|
||||
EXPAND_COUNTER(orphan_scan_omap_set) \
|
||||
EXPAND_COUNTER(quota_info_count_objects) \
|
||||
EXPAND_COUNTER(quota_info_scan_objects) \
|
||||
EXPAND_COUNTER(quorum_candidate_server_stopping) \
|
||||
EXPAND_COUNTER(quorum_elected) \
|
||||
EXPAND_COUNTER(quorum_fence_error) \
|
||||
@@ -199,20 +201,19 @@
|
||||
EXPAND_COUNTER(srch_read_stale) \
|
||||
EXPAND_COUNTER(statfs) \
|
||||
EXPAND_COUNTER(totl_read_copied) \
|
||||
EXPAND_COUNTER(totl_read_finalized) \
|
||||
EXPAND_COUNTER(totl_read_fs) \
|
||||
EXPAND_COUNTER(totl_read_item) \
|
||||
EXPAND_COUNTER(totl_read_logged) \
|
||||
EXPAND_COUNTER(trans_commit_data_alloc_low) \
|
||||
EXPAND_COUNTER(trans_commit_dirty_meta_full) \
|
||||
EXPAND_COUNTER(trans_commit_fsync) \
|
||||
EXPAND_COUNTER(trans_commit_meta_alloc_low) \
|
||||
EXPAND_COUNTER(trans_commit_sync_fs) \
|
||||
EXPAND_COUNTER(trans_commit_timer) \
|
||||
EXPAND_COUNTER(trans_commit_written)
|
||||
EXPAND_COUNTER(trans_commit_written) \
|
||||
EXPAND_COUNTER(wkic_count_objects) \
|
||||
EXPAND_COUNTER(wkic_scan_objects)
|
||||
|
||||
#define FIRST_COUNTER alloc_alloc_data
|
||||
#define LAST_COUNTER trans_commit_written
|
||||
#define LAST_COUNTER wkic_scan_objects
|
||||
|
||||
#undef EXPAND_COUNTER
|
||||
#define EXPAND_COUNTER(which) struct percpu_counter which;
|
||||
|
||||
@@ -586,6 +586,12 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (create && !si->staging) {
|
||||
ret = scoutfs_inode_check_retention(inode);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* convert unwritten to written, could be staging */
|
||||
if (create && ext.map && (ext.flags & SEF_UNWRITTEN)) {
|
||||
un.start = iblock;
|
||||
@@ -1104,6 +1110,10 @@ long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
|
||||
|
||||
while(iblock <= last) {
|
||||
|
||||
ret = scoutfs_quota_check_data(sb, inode);
|
||||
if (ret)
|
||||
goto out_extent;
|
||||
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false, true);
|
||||
if (ret)
|
||||
goto out_extent;
|
||||
@@ -1155,9 +1165,9 @@ out:
|
||||
* on regular files with no data extents. It's used to restore a file
|
||||
* with an offline extent which can then trigger staging.
|
||||
*
|
||||
* The caller has taken care of locking the inode. We're updating the
|
||||
* inode offline count as we create the offline extent so we take care
|
||||
* of the index locking, updating, and transaction.
|
||||
* The caller must take care of cluster locking, transactions, inode
|
||||
* updates, and index updates (so that they can atomically make this
|
||||
* change along with other metadata changes).
|
||||
*/
|
||||
int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
|
||||
struct scoutfs_lock *lock)
|
||||
@@ -1171,7 +1181,6 @@ int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
|
||||
.lock = lock,
|
||||
};
|
||||
const u64 count = DIV_ROUND_UP(size, SCOUTFS_BLOCK_SM_SIZE);
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 on;
|
||||
u64 off;
|
||||
int ret;
|
||||
@@ -1184,28 +1193,10 @@ int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* we're updating meta_seq with offline block count */
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_dirty_inode_item(inode, lock);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
|
||||
down_write(&si->extent_sem);
|
||||
ret = scoutfs_ext_insert(sb, &data_ext_ops, &args,
|
||||
0, count, 0, SEF_OFFLINE);
|
||||
up_write(&si->extent_sem);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
|
||||
unlock:
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -1273,6 +1264,9 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (!is_stage && (ret = scoutfs_inode_check_retention(to)))
|
||||
goto out;
|
||||
|
||||
if ((from_off & SCOUTFS_BLOCK_SM_MASK) ||
|
||||
(to_off & SCOUTFS_BLOCK_SM_MASK) ||
|
||||
((byte_len & SCOUTFS_BLOCK_SM_MASK) &&
|
||||
@@ -1807,37 +1801,6 @@ int scoutfs_data_wait_check_iov(struct inode *inode, const struct iovec *iov,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_data_wait_check_iter(struct inode *inode, loff_t pos, struct iov_iter *iter,
|
||||
u8 sef, u8 op, struct scoutfs_data_wait *dw,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
size_t count = iov_iter_count(iter);
|
||||
size_t off = iter->iov_offset;
|
||||
const struct iovec *iov;
|
||||
size_t len;
|
||||
int ret = 0;
|
||||
|
||||
for (iov = iter->iov; count > 0; iov++) {
|
||||
len = iov->iov_len - off;
|
||||
if (len == 0)
|
||||
continue;
|
||||
|
||||
/* aren't we waiting on too much data here ? */
|
||||
ret = scoutfs_data_wait_check(inode, pos, len,
|
||||
sef, op, dw, lock);
|
||||
|
||||
if (ret != 0)
|
||||
break;
|
||||
|
||||
|
||||
pos += len;
|
||||
count -= len;
|
||||
off = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_data_wait(struct inode *inode, struct scoutfs_data_wait *dw)
|
||||
{
|
||||
DECLARE_DATA_WAIT_ROOT(inode->i_sb, rt);
|
||||
|
||||
@@ -65,9 +65,6 @@ int scoutfs_data_wait_check_iov(struct inode *inode, const struct iovec *iov,
|
||||
unsigned long nr_segs, loff_t pos, u8 sef,
|
||||
u8 op, struct scoutfs_data_wait *ow,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_data_wait_check_iter(struct inode *inode, loff_t pos, struct iov_iter *iter,
|
||||
u8 sef, u8 op, struct scoutfs_data_wait *ow,
|
||||
struct scoutfs_lock *lock);
|
||||
bool scoutfs_data_wait_found(struct scoutfs_data_wait *ow);
|
||||
int scoutfs_data_wait(struct inode *inode,
|
||||
struct scoutfs_data_wait *ow);
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "forest.h"
|
||||
#include "acl.h"
|
||||
#include "counters.h"
|
||||
#include "quota.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
@@ -651,6 +652,10 @@ static struct inode *lock_hold_create(struct inode *dir, struct dentry *dentry,
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = scoutfs_quota_check_inode(sb, dir);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
if (orph_lock) {
|
||||
ret = scoutfs_lock_orphan(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, ino, orph_lock);
|
||||
if (ret < 0)
|
||||
@@ -672,6 +677,8 @@ retry:
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
scoutfs_inode_set_proj(inode, scoutfs_inode_get_proj(dir));
|
||||
|
||||
ret = scoutfs_dirty_inode_item(dir, *dir_lock);
|
||||
out:
|
||||
if (ret)
|
||||
@@ -926,12 +933,16 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = scoutfs_inode_check_retention(inode);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
|
||||
hash = dirent_name_hash(dentry->d_name.name, dentry->d_name.len);
|
||||
|
||||
ret = lookup_dirent(sb, scoutfs_ino(dir), dentry->d_name.name, dentry->d_name.len, hash,
|
||||
&dent, dir_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
goto unlock;
|
||||
|
||||
if (should_orphan(inode)) {
|
||||
ret = scoutfs_lock_orphan(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, scoutfs_ino(inode),
|
||||
@@ -1632,6 +1643,10 @@ static int scoutfs_rename_common(struct inode *old_dir,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if ((old_inode && (ret = scoutfs_inode_check_retention(old_inode))) ||
|
||||
(new_inode && (ret = scoutfs_inode_check_retention(new_inode))))
|
||||
goto out_unlock;
|
||||
|
||||
if (should_orphan(new_inode)) {
|
||||
ret = scoutfs_lock_orphan(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, scoutfs_ino(new_inode),
|
||||
&orph_lock);
|
||||
|
||||
@@ -28,6 +28,7 @@
|
||||
#include "inode.h"
|
||||
#include "per_task.h"
|
||||
#include "omap.h"
|
||||
#include "quota.h"
|
||||
|
||||
#ifdef KC_LINUX_HAVE_FOP_AIO_READ
|
||||
/*
|
||||
@@ -108,6 +109,10 @@ retry:
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_inode_check_retention(inode);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -122,6 +127,10 @@ retry:
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_quota_check_data(sb, inode);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* XXX: remove SUID bit */
|
||||
|
||||
ret = __generic_file_aio_write(iocb, iov, nr_segs, &iocb->ki_pos);
|
||||
@@ -171,10 +180,8 @@ retry:
|
||||
goto out;
|
||||
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
|
||||
ret = scoutfs_data_wait_check_iter(inode, iocb->ki_pos, to,
|
||||
SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_READ,
|
||||
&dw, scoutfs_inode_lock);
|
||||
ret = scoutfs_data_wait_check(inode, iocb->ki_pos, iov_iter_count(to), SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_READ, &dw, scoutfs_inode_lock);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
} else {
|
||||
@@ -205,8 +212,7 @@ ssize_t scoutfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
|
||||
struct scoutfs_lock *scoutfs_inode_lock = NULL;
|
||||
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
|
||||
DECLARE_DATA_WAIT(dw);
|
||||
int ret;
|
||||
int written;
|
||||
ssize_t ret;
|
||||
|
||||
retry:
|
||||
inode_lock(inode);
|
||||
@@ -219,23 +225,29 @@ retry:
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_inode_check_retention(inode);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_complete_truncate(inode, scoutfs_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_quota_check_data(sb, inode);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_per_task_add_excl(&si->pt_data_lock, &pt_ent, scoutfs_inode_lock)) {
|
||||
/* data_version is per inode, whole file must be online */
|
||||
ret = scoutfs_data_wait_check_iter(inode, iocb->ki_pos, from,
|
||||
SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_WRITE,
|
||||
&dw, scoutfs_inode_lock);
|
||||
ret = scoutfs_data_wait_check(inode, 0, i_size_read(inode), SEF_OFFLINE,
|
||||
SCOUTFS_IOC_DWO_WRITE, &dw, scoutfs_inode_lock);
|
||||
if (ret != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* XXX: remove SUID bit */
|
||||
|
||||
written = __generic_file_write_iter(iocb, from);
|
||||
ret = __generic_file_write_iter(iocb, from);
|
||||
|
||||
out:
|
||||
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
|
||||
@@ -248,10 +260,10 @@ out:
|
||||
goto retry;
|
||||
}
|
||||
|
||||
if (ret > 0 || ret == -EIOCBQUEUED)
|
||||
ret = generic_write_sync(iocb, written);
|
||||
if (ret > 0)
|
||||
ret = generic_write_sync(iocb, ret);
|
||||
|
||||
return written ? written : ret;
|
||||
return ret;
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -238,19 +238,16 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
|
||||
* We return -ESTALE if we hit stale blocks to give the caller a chance
|
||||
* to reset their state and retry with a newer version of the btrees.
|
||||
*/
|
||||
int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg)
|
||||
int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
|
||||
struct scoutfs_key *key, struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg)
|
||||
{
|
||||
struct forest_read_items_data rid = {
|
||||
.cb = cb,
|
||||
.cb_arg = arg,
|
||||
};
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_net_roots roots;
|
||||
struct scoutfs_bloom_block *bb;
|
||||
struct forest_bloom_nrs bloom;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
@@ -264,18 +261,14 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
scoutfs_inc_counter(sb, forest_read_items);
|
||||
calc_bloom_nrs(&bloom, bloom_key);
|
||||
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
trace_scoutfs_forest_using_roots(sb, &roots.fs_root, &roots.logs_root);
|
||||
trace_scoutfs_forest_using_roots(sb, &roots->fs_root, &roots->logs_root);
|
||||
|
||||
*start = orig_start;
|
||||
*end = orig_end;
|
||||
|
||||
/* start with fs root items */
|
||||
rid.fic |= FIC_FS_ROOT;
|
||||
ret = scoutfs_btree_read_items(sb, &roots.fs_root, key, start, end,
|
||||
ret = scoutfs_btree_read_items(sb, &roots->fs_root, key, start, end,
|
||||
forest_read_items, &rid);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@@ -283,7 +276,7 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
|
||||
scoutfs_key_init_log_trees(<k, 0, 0);
|
||||
for (;; scoutfs_key_inc(<k)) {
|
||||
ret = scoutfs_btree_next(sb, &roots.logs_root, <k, &iref);
|
||||
ret = scoutfs_btree_next(sb, &roots->logs_root, <k, &iref);
|
||||
if (ret == 0) {
|
||||
if (iref.val_len == sizeof(lt)) {
|
||||
ltk = *iref.key;
|
||||
@@ -340,6 +333,23 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg)
|
||||
{
|
||||
struct scoutfs_net_roots roots;
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret == 0)
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, key, bloom_key, start, end,
|
||||
cb, arg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the items are deltas then combine the src with the destination
|
||||
* value and store the result in the destination.
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
struct scoutfs_alloc;
|
||||
struct scoutfs_block_writer;
|
||||
struct scoutfs_block;
|
||||
struct scoutfs_lock;
|
||||
|
||||
#include "btree.h"
|
||||
|
||||
@@ -23,6 +24,10 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
|
||||
struct scoutfs_key *key, struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_set_bloom_bits(struct super_block *sb,
|
||||
struct scoutfs_lock *lock);
|
||||
void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq);
|
||||
|
||||
@@ -8,9 +8,14 @@
|
||||
*/
|
||||
#define SCOUTFS_FORMAT_VERSION_MIN 1
|
||||
#define SCOUTFS_FORMAT_VERSION_MIN_STR __stringify(SCOUTFS_FORMAT_VERSION_MIN)
|
||||
#define SCOUTFS_FORMAT_VERSION_MAX 1
|
||||
#define SCOUTFS_FORMAT_VERSION_MAX 2
|
||||
#define SCOUTFS_FORMAT_VERSION_MAX_STR __stringify(SCOUTFS_FORMAT_VERSION_MAX)
|
||||
|
||||
#define SCOUTFS_FORMAT_VERSION_FEAT_RETENTION 2
|
||||
#define SCOUTFS_FORMAT_VERSION_FEAT_PROJECT_ID 2
|
||||
#define SCOUTFS_FORMAT_VERSION_FEAT_QUOTA 2
|
||||
#define SCOUTFS_FORMAT_VERSION_FEAT_INDX_TAG 2
|
||||
|
||||
/* statfs(2) f_type */
|
||||
#define SCOUTFS_SUPER_MAGIC 0x554f4353 /* "SCOU" */
|
||||
|
||||
@@ -175,6 +180,10 @@ struct scoutfs_key {
|
||||
#define sko_rid _sk_first
|
||||
#define sko_ino _sk_second
|
||||
|
||||
/* quota rules */
|
||||
#define skqr_hash _sk_second
|
||||
#define skqr_coll_nr _sk_third
|
||||
|
||||
/* xattr totl */
|
||||
#define skxt_a _sk_first
|
||||
#define skxt_b _sk_second
|
||||
@@ -585,7 +594,9 @@ struct scoutfs_log_merge_freeing {
|
||||
*/
|
||||
#define SCOUTFS_INODE_INDEX_ZONE 4
|
||||
#define SCOUTFS_ORPHAN_ZONE 8
|
||||
#define SCOUTFS_QUOTA_ZONE 10
|
||||
#define SCOUTFS_XATTR_TOTL_ZONE 12
|
||||
#define SCOUTFS_XATTR_INDX_ZONE 14
|
||||
#define SCOUTFS_FS_ZONE 16
|
||||
#define SCOUTFS_LOCK_ZONE 20
|
||||
/* Items only stored in server btrees */
|
||||
@@ -608,6 +619,9 @@ struct scoutfs_log_merge_freeing {
|
||||
/* orphan zone, redundant type used for clarity */
|
||||
#define SCOUTFS_ORPHAN_TYPE 4
|
||||
|
||||
/* quota zone */
|
||||
#define SCOUTFS_QUOTA_RULE_TYPE 4
|
||||
|
||||
/* fs zone */
|
||||
#define SCOUTFS_INODE_TYPE 4
|
||||
#define SCOUTFS_XATTR_TYPE 8
|
||||
@@ -661,6 +675,34 @@ struct scoutfs_xattr_totl_val {
|
||||
__le64 count;
|
||||
};
|
||||
|
||||
#define SQ_RF_TOTL_COUNT (1 << 0)
|
||||
#define SQ_RF__UNKNOWN (~((1 << 1) - 1))
|
||||
|
||||
#define SQ_NS_LITERAL 0
|
||||
#define SQ_NS_PROJ 1
|
||||
#define SQ_NS_UID 2
|
||||
#define SQ_NS_GID 3
|
||||
#define SQ_NS__NR 4
|
||||
#define SQ_NS__NR_SELECT (SQ_NS__NR - 1) /* !literal */
|
||||
|
||||
#define SQ_NF_SELECT (1 << 0)
|
||||
#define SQ_NF__UNKNOWN (~((1 << 1) - 1))
|
||||
|
||||
#define SQ_OP_INODE 0
|
||||
#define SQ_OP_DATA 1
|
||||
#define SQ_OP__NR 2
|
||||
|
||||
struct scoutfs_quota_rule_val {
|
||||
__le64 name_val[3];
|
||||
__le64 limit;
|
||||
__u8 prio;
|
||||
__u8 op;
|
||||
__u8 rule_flags;
|
||||
__u8 name_source[3];
|
||||
__u8 name_flags[3];
|
||||
__u8 _pad[7];
|
||||
};
|
||||
|
||||
/* XXX does this exist upstream somewhere? */
|
||||
#define member_sizeof(TYPE, MEMBER) (sizeof(((TYPE *)0)->MEMBER))
|
||||
|
||||
@@ -859,9 +901,38 @@ struct scoutfs_inode {
|
||||
struct scoutfs_timespec ctime;
|
||||
struct scoutfs_timespec mtime;
|
||||
struct scoutfs_timespec crtime;
|
||||
__le64 proj;
|
||||
};
|
||||
|
||||
#define SCOUTFS_INO_FLAG_TRUNCATE 0x1
|
||||
#define SCOUTFS_INODE_FMT_V1_BYTES offsetof(struct scoutfs_inode, proj)
|
||||
|
||||
/*
|
||||
* There are so few versions that we don't mind doing this work inline
|
||||
* so that both utils and kernel can share these. Mounting has already
|
||||
* checked that the format version is within the supported min and max,
|
||||
* so these functions only deal with size variance within that band.
|
||||
*/
|
||||
/* Returns the native written inode size for the given format version, 0 for bad version */
|
||||
static inline int scoutfs_inode_vers_bytes(__u64 fmt_vers)
|
||||
{
|
||||
if (fmt_vers == 1)
|
||||
return SCOUTFS_INODE_FMT_V1_BYTES;
|
||||
else
|
||||
return sizeof(struct scoutfs_inode);
|
||||
}
|
||||
/*
|
||||
* Returns true if bytes is a valid inode size to read from the given
|
||||
* version. The given version must be greater than the version that
|
||||
* introduced the size.
|
||||
*/
|
||||
static inline int scoutfs_inode_valid_vers_bytes(__u64 fmt_vers, int bytes)
|
||||
{
|
||||
return (bytes == sizeof(struct scoutfs_inode) && fmt_vers == SCOUTFS_FORMAT_VERSION_MAX) ||
|
||||
(bytes == SCOUTFS_INODE_FMT_V1_BYTES);
|
||||
}
|
||||
|
||||
#define SCOUTFS_INO_FLAG_TRUNCATE 0x1
|
||||
#define SCOUTFS_INO_FLAG_RETENTION 0x2
|
||||
|
||||
#define SCOUTFS_ROOT_INO 1
|
||||
|
||||
|
||||
172
kmod/src/inode.c
172
kmod/src/inode.c
@@ -91,7 +91,7 @@ static void scoutfs_inode_ctor(void *obj)
|
||||
|
||||
init_rwsem(&si->extent_sem);
|
||||
mutex_init(&si->item_mutex);
|
||||
seqcount_init(&si->seqcount);
|
||||
seqlock_init(&si->seqlock);
|
||||
si->staging = false;
|
||||
scoutfs_per_task_init(&si->pt_data_lock);
|
||||
atomic64_set(&si->data_waitq.changed, 0);
|
||||
@@ -250,7 +250,7 @@ static void set_item_info(struct scoutfs_inode_info *si,
|
||||
set_item_major(si, SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE, sinode->data_seq);
|
||||
}
|
||||
|
||||
static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
static void load_inode(struct inode *inode, struct scoutfs_inode *cinode, int inode_bytes)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
@@ -278,6 +278,7 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
si->flags = le32_to_cpu(cinode->flags);
|
||||
si->crtime.tv_sec = le64_to_cpu(cinode->crtime.sec);
|
||||
si->crtime.tv_nsec = le32_to_cpu(cinode->crtime.nsec);
|
||||
si->proj = le64_to_cpu(cinode->proj);
|
||||
|
||||
/*
|
||||
* i_blocks is initialized from online and offline and is then
|
||||
@@ -298,6 +299,24 @@ void scoutfs_inode_init_key(struct scoutfs_key *key, u64 ino)
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* Read an inode item into the caller's buffer and return the size that
|
||||
* we read. Returns errors if the inode size is unsupported or doesn't
|
||||
* make sense for the format version.
|
||||
*/
|
||||
static int lookup_inode_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_inode *sinode, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_item_lookup_smaller_zero(sb, key, sinode, sizeof(struct scoutfs_inode), lock);
|
||||
if (ret >= 0 && !scoutfs_inode_valid_vers_bytes(sbi->fmt_vers, ret))
|
||||
return -EIO;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Refresh the vfs inode fields if the lock indicates that the current
|
||||
* contents could be stale.
|
||||
@@ -333,12 +352,12 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock)
|
||||
|
||||
mutex_lock(&si->item_mutex);
|
||||
if (atomic64_read(&si->last_refreshed) < refresh_gen) {
|
||||
ret = scoutfs_item_lookup_exact(sb, &key, &sinode,
|
||||
sizeof(sinode), lock);
|
||||
if (ret == 0) {
|
||||
load_inode(inode, &sinode);
|
||||
ret = lookup_inode_item(sb, &key, &sinode, lock);
|
||||
if (ret > 0) {
|
||||
load_inode(inode, &sinode, ret);
|
||||
atomic64_set(&si->last_refreshed, refresh_gen);
|
||||
scoutfs_lock_add_coverage(sb, lock, &si->ino_lock_cov);
|
||||
ret = 0;
|
||||
}
|
||||
} else {
|
||||
ret = 0;
|
||||
@@ -486,6 +505,10 @@ retry:
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_inode_check_retention(inode);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
attr_size = (attr->ia_valid & ATTR_SIZE) ? attr->ia_size :
|
||||
i_size_read(inode);
|
||||
|
||||
@@ -566,11 +589,9 @@ static void set_trans_seq(struct inode *inode, u64 *seq)
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
if (*seq != sbi->trans_seq) {
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&si->seqcount);
|
||||
write_seqlock(&si->seqlock);
|
||||
*seq = sbi->trans_seq;
|
||||
write_seqcount_end(&si->seqcount);
|
||||
preempt_enable();
|
||||
write_sequnlock(&si->seqlock);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -592,22 +613,18 @@ void scoutfs_inode_inc_data_version(struct inode *inode)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&si->seqcount);
|
||||
write_seqlock(&si->seqlock);
|
||||
si->data_version++;
|
||||
write_seqcount_end(&si->seqcount);
|
||||
preempt_enable();
|
||||
write_sequnlock(&si->seqlock);
|
||||
}
|
||||
|
||||
void scoutfs_inode_set_data_version(struct inode *inode, u64 data_version)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&si->seqcount);
|
||||
write_seqlock(&si->seqlock);
|
||||
si->data_version = data_version;
|
||||
write_seqcount_end(&si->seqcount);
|
||||
preempt_enable();
|
||||
write_sequnlock(&si->seqlock);
|
||||
}
|
||||
|
||||
void scoutfs_inode_add_onoff(struct inode *inode, s64 on, s64 off)
|
||||
@@ -616,8 +633,7 @@ void scoutfs_inode_add_onoff(struct inode *inode, s64 on, s64 off)
|
||||
|
||||
if (inode && (on || off)) {
|
||||
si = SCOUTFS_I(inode);
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&si->seqcount);
|
||||
write_seqlock(&si->seqlock);
|
||||
|
||||
/* inode and extents out of sync, bad callers */
|
||||
if (((s64)si->online_blocks + on < 0) ||
|
||||
@@ -638,8 +654,7 @@ void scoutfs_inode_add_onoff(struct inode *inode, s64 on, s64 off)
|
||||
si->online_blocks,
|
||||
si->offline_blocks);
|
||||
|
||||
write_seqcount_end(&si->seqcount);
|
||||
preempt_enable();
|
||||
write_sequnlock(&si->seqlock);
|
||||
}
|
||||
|
||||
/* any time offline extents decreased we try and wake waiters */
|
||||
@@ -647,16 +662,16 @@ void scoutfs_inode_add_onoff(struct inode *inode, s64 on, s64 off)
|
||||
scoutfs_data_wait_changed(inode);
|
||||
}
|
||||
|
||||
static u64 read_seqcount_u64(struct inode *inode, u64 *val)
|
||||
static u64 read_seqlock_u64(struct inode *inode, u64 *val)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
unsigned int seq;
|
||||
unsigned seq;
|
||||
u64 v;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&si->seqcount);
|
||||
seq = read_seqbegin(&si->seqlock);
|
||||
v = *val;
|
||||
} while (read_seqcount_retry(&si->seqcount, seq));
|
||||
} while (read_seqretry(&si->seqlock, seq));
|
||||
|
||||
return v;
|
||||
}
|
||||
@@ -665,33 +680,82 @@ u64 scoutfs_inode_meta_seq(struct inode *inode)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
return read_seqcount_u64(inode, &si->meta_seq);
|
||||
return read_seqlock_u64(inode, &si->meta_seq);
|
||||
}
|
||||
|
||||
u64 scoutfs_inode_data_seq(struct inode *inode)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
return read_seqcount_u64(inode, &si->data_seq);
|
||||
return read_seqlock_u64(inode, &si->data_seq);
|
||||
}
|
||||
|
||||
u64 scoutfs_inode_data_version(struct inode *inode)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
return read_seqcount_u64(inode, &si->data_version);
|
||||
return read_seqlock_u64(inode, &si->data_version);
|
||||
}
|
||||
|
||||
void scoutfs_inode_get_onoff(struct inode *inode, s64 *on, s64 *off)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
unsigned int seq;
|
||||
unsigned seq;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&si->seqcount);
|
||||
seq = read_seqbegin(&si->seqlock);
|
||||
*on = SCOUTFS_I(inode)->online_blocks;
|
||||
*off = SCOUTFS_I(inode)->offline_blocks;
|
||||
} while (read_seqcount_retry(&si->seqcount, seq));
|
||||
} while (read_seqretry(&si->seqlock, seq));
|
||||
}
|
||||
|
||||
/*
|
||||
* Get our private scoutfs inode flags, not the vfs i_flags.
|
||||
*/
|
||||
u32 scoutfs_inode_get_flags(struct inode *inode)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
unsigned seq;
|
||||
u32 flags;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&si->seqlock);
|
||||
flags = si->flags;
|
||||
} while (read_seqretry(&si->seqlock, seq));
|
||||
|
||||
return flags;
|
||||
}
|
||||
|
||||
void scoutfs_inode_set_flags(struct inode *inode, u32 and, u32 or)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
write_seqlock(&si->seqlock);
|
||||
si->flags = (si->flags & and) | or;
|
||||
write_sequnlock(&si->seqlock);
|
||||
}
|
||||
|
||||
u64 scoutfs_inode_get_proj(struct inode *inode)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
unsigned seq;
|
||||
u64 proj;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&si->seqlock);
|
||||
proj = si->proj;
|
||||
} while (read_seqretry(&si->seqlock, seq));
|
||||
|
||||
return proj;
|
||||
}
|
||||
|
||||
void scoutfs_inode_set_proj(struct inode *inode, u64 proj)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
write_seqlock(&si->seqlock);
|
||||
si->proj = proj;
|
||||
write_sequnlock(&si->seqlock);
|
||||
}
|
||||
|
||||
static int scoutfs_iget_test(struct inode *inode, void *arg)
|
||||
@@ -803,7 +867,7 @@ out:
|
||||
return inode;
|
||||
}
|
||||
|
||||
static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
static void store_inode(struct scoutfs_inode *cinode, struct inode *inode, int inode_bytes)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
u64 online_blocks;
|
||||
@@ -839,6 +903,7 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
cinode->crtime.sec = cpu_to_le64(si->crtime.tv_sec);
|
||||
cinode->crtime.nsec = cpu_to_le32(si->crtime.tv_nsec);
|
||||
memset(cinode->crtime.__pad, 0, sizeof(cinode->crtime.__pad));
|
||||
cinode->proj = cpu_to_le64(si->proj);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -862,15 +927,18 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
int scoutfs_dirty_inode_item(struct inode *inode, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
int inode_bytes;
|
||||
int ret;
|
||||
|
||||
store_inode(&sinode, inode);
|
||||
inode_bytes = scoutfs_inode_vers_bytes(sbi->fmt_vers);
|
||||
store_inode(&sinode, inode, inode_bytes);
|
||||
|
||||
scoutfs_inode_init_key(&key, scoutfs_ino(inode));
|
||||
|
||||
ret = scoutfs_item_update(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
ret = scoutfs_item_update(sb, &key, &sinode, inode_bytes, lock);
|
||||
if (!ret)
|
||||
trace_scoutfs_dirty_inode(inode);
|
||||
return ret;
|
||||
@@ -1072,9 +1140,11 @@ void scoutfs_update_inode_item(struct inode *inode, struct scoutfs_lock *lock,
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
int inode_bytes;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
@@ -1083,15 +1153,17 @@ void scoutfs_update_inode_item(struct inode *inode, struct scoutfs_lock *lock,
|
||||
/* set the meta version once per trans for any inode updates */
|
||||
scoutfs_inode_set_meta_seq(inode);
|
||||
|
||||
inode_bytes = scoutfs_inode_vers_bytes(sbi->fmt_vers);
|
||||
|
||||
/* only race with other inode field stores once */
|
||||
store_inode(&sinode, inode);
|
||||
store_inode(&sinode, inode, inode_bytes);
|
||||
|
||||
ret = update_indices(sb, si, ino, inode->i_mode, &sinode, lock_list, lock);
|
||||
BUG_ON(ret);
|
||||
|
||||
scoutfs_inode_init_key(&key, ino);
|
||||
|
||||
err = scoutfs_item_update(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
err = scoutfs_item_update(sb, &key, &sinode, inode_bytes, lock);
|
||||
if (err) {
|
||||
scoutfs_err(sb, "inode %llu update err %d", ino, err);
|
||||
BUG_ON(err);
|
||||
@@ -1459,10 +1531,12 @@ out:
|
||||
int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, dev_t rdev,
|
||||
u64 ino, struct scoutfs_lock *lock, struct inode **inode_ret)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_inode_info *si;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
struct inode *inode;
|
||||
int inode_bytes;
|
||||
int ret;
|
||||
|
||||
inode = new_inode(sb);
|
||||
@@ -1478,6 +1552,7 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
|
||||
si->offline_blocks = 0;
|
||||
si->next_readdir_pos = SCOUTFS_DIRENT_FIRST_POS;
|
||||
si->next_xattr_id = 0;
|
||||
si->proj = 0;
|
||||
si->have_item = false;
|
||||
atomic64_set(&si->last_refreshed, lock->refresh_gen);
|
||||
scoutfs_lock_add_coverage(sb, lock, &si->ino_lock_cov);
|
||||
@@ -1493,14 +1568,16 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
|
||||
inode->i_rdev = rdev;
|
||||
set_inode_ops(inode);
|
||||
|
||||
store_inode(&sinode, inode);
|
||||
inode_bytes = scoutfs_inode_vers_bytes(sbi->fmt_vers);
|
||||
|
||||
store_inode(&sinode, inode, inode_bytes);
|
||||
scoutfs_inode_init_key(&key, scoutfs_ino(inode));
|
||||
|
||||
ret = scoutfs_omap_set(sb, ino);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_item_create(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
ret = scoutfs_item_create(sb, &key, &sinode, inode_bytes, lock);
|
||||
if (ret < 0)
|
||||
scoutfs_omap_clear(sb, ino);
|
||||
out:
|
||||
@@ -1754,7 +1831,7 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
|
||||
}
|
||||
|
||||
scoutfs_inode_init_key(&key, ino);
|
||||
ret = scoutfs_item_lookup_exact(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
ret = lookup_inode_item(sb, &key, &sinode, lock);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
@@ -2143,6 +2220,17 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return an error if the inode has the retention flag set and can not
|
||||
* be modified. This mimics the errno returned by the vfs whan an
|
||||
* inode's immutable flag is set. The flag won't be set on older format
|
||||
* versions so we don't check the mounted format version here.
|
||||
*/
|
||||
int scoutfs_inode_check_retention(struct inode *inode)
|
||||
{
|
||||
return (scoutfs_inode_get_flags(inode) & SCOUTFS_INO_FLAG_RETENTION) ? -EPERM : 0;
|
||||
}
|
||||
|
||||
int scoutfs_inode_setup(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
@@ -21,6 +21,7 @@ struct scoutfs_inode_info {
|
||||
u64 data_version;
|
||||
u64 online_blocks;
|
||||
u64 offline_blocks;
|
||||
u64 proj;
|
||||
u32 flags;
|
||||
struct kc_timespec crtime;
|
||||
|
||||
@@ -47,7 +48,7 @@ struct scoutfs_inode_info {
|
||||
atomic64_t last_refreshed;
|
||||
|
||||
/* initialized once for slab object */
|
||||
seqcount_t seqcount;
|
||||
seqlock_t seqlock;
|
||||
bool staging; /* holder of i_mutex is staging */
|
||||
struct scoutfs_per_task pt_data_lock;
|
||||
struct scoutfs_data_waitq data_waitq;
|
||||
@@ -120,8 +121,15 @@ u64 scoutfs_inode_meta_seq(struct inode *inode);
|
||||
u64 scoutfs_inode_data_seq(struct inode *inode);
|
||||
u64 scoutfs_inode_data_version(struct inode *inode);
|
||||
void scoutfs_inode_get_onoff(struct inode *inode, s64 *on, s64 *off);
|
||||
u32 scoutfs_inode_get_flags(struct inode *inode);
|
||||
void scoutfs_inode_set_flags(struct inode *inode, u32 and, u32 or);
|
||||
u64 scoutfs_inode_get_proj(struct inode *inode);
|
||||
void scoutfs_inode_set_proj(struct inode *inode, u64 proj);
|
||||
|
||||
int scoutfs_complete_truncate(struct inode *inode, struct scoutfs_lock *lock);
|
||||
|
||||
int scoutfs_inode_check_retention(struct inode *inode);
|
||||
|
||||
int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock);
|
||||
#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
|
||||
int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
|
||||
681
kmod/src/ioctl.c
681
kmod/src/ioctl.c
@@ -42,6 +42,10 @@
|
||||
#include "alloc.h"
|
||||
#include "server.h"
|
||||
#include "counters.h"
|
||||
#include "attr_x.h"
|
||||
#include "totl.h"
|
||||
#include "wkic.h"
|
||||
#include "quota.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
@@ -545,20 +549,41 @@ out:
|
||||
static long scoutfs_ioc_stat_more(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct scoutfs_ioctl_stat_more stm;
|
||||
struct scoutfs_ioctl_inode_attr_x *iax = NULL;
|
||||
struct scoutfs_ioctl_stat_more *stm = NULL;
|
||||
int ret;
|
||||
|
||||
stm.meta_seq = scoutfs_inode_meta_seq(inode);
|
||||
stm.data_seq = scoutfs_inode_data_seq(inode);
|
||||
stm.data_version = scoutfs_inode_data_version(inode);
|
||||
scoutfs_inode_get_onoff(inode, &stm.online_blocks, &stm.offline_blocks);
|
||||
stm.crtime_sec = si->crtime.tv_sec;
|
||||
stm.crtime_nsec = si->crtime.tv_nsec;
|
||||
iax = kmalloc(sizeof(struct scoutfs_ioctl_inode_attr_x), GFP_KERNEL);
|
||||
stm = kmalloc(sizeof(struct scoutfs_ioctl_stat_more), GFP_KERNEL);
|
||||
if (!iax || !stm) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_to_user((void __user *)arg, &stm, sizeof(stm)))
|
||||
return -EFAULT;
|
||||
iax->x_mask = SCOUTFS_IOC_IAX_META_SEQ | SCOUTFS_IOC_IAX_DATA_SEQ |
|
||||
SCOUTFS_IOC_IAX_DATA_VERSION | SCOUTFS_IOC_IAX_ONLINE_BLOCKS |
|
||||
SCOUTFS_IOC_IAX_OFFLINE_BLOCKS | SCOUTFS_IOC_IAX_CRTIME;
|
||||
iax->x_flags = 0;
|
||||
ret = scoutfs_get_attr_x(inode, iax);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
return 0;
|
||||
stm->meta_seq = iax->meta_seq;
|
||||
stm->data_seq = iax->data_seq;
|
||||
stm->data_version = iax->data_version;
|
||||
stm->online_blocks = iax->online_blocks;
|
||||
stm->offline_blocks = iax->offline_blocks;
|
||||
stm->crtime_sec = iax->crtime_sec;
|
||||
stm->crtime_nsec = iax->crtime_nsec;
|
||||
|
||||
if (copy_to_user((void __user *)arg, stm, sizeof(struct scoutfs_ioctl_stat_more)))
|
||||
ret = -EFAULT;
|
||||
else
|
||||
ret = 0;
|
||||
out:
|
||||
kfree(iax);
|
||||
kfree(stm);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool inc_wrapped(u64 *ino, u64 *iblock)
|
||||
@@ -615,24 +640,19 @@ static long scoutfs_ioc_data_waiting(struct file *file, unsigned long arg)
|
||||
* This is used when restoring files, it lets the caller set all the
|
||||
* inode attributes which are otherwise unreachable. Changing the file
|
||||
* size can only be done for regular files with a data_version of 0.
|
||||
*
|
||||
* We unconditionally fill the iax attributes from the sm set and let
|
||||
* set_attr_x check them.
|
||||
*/
|
||||
static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file->f_inode;
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_ioctl_setattr_more __user *usm = (void __user *)arg;
|
||||
struct scoutfs_ioctl_inode_attr_x *iax = NULL;
|
||||
struct scoutfs_ioctl_setattr_more sm;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
LIST_HEAD(ind_locks);
|
||||
bool set_data_seq;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!(file->f_mode & FMODE_WRITE)) {
|
||||
ret = -EBADF;
|
||||
goto out;
|
||||
@@ -643,65 +663,38 @@ static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((sm.i_size > 0 && sm.data_version == 0) ||
|
||||
((sm.flags & SCOUTFS_IOC_SETATTR_MORE_OFFLINE) && !sm.i_size) ||
|
||||
(sm.flags & SCOUTFS_IOC_SETATTR_MORE_UNKNOWN)) {
|
||||
if (sm.flags & SCOUTFS_IOC_SETATTR_MORE_UNKNOWN) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
iax = kzalloc(sizeof(struct scoutfs_ioctl_inode_attr_x), GFP_KERNEL);
|
||||
if (!iax) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
iax->x_mask = SCOUTFS_IOC_IAX_DATA_VERSION | SCOUTFS_IOC_IAX_CTIME |
|
||||
SCOUTFS_IOC_IAX_CRTIME | SCOUTFS_IOC_IAX_SIZE;
|
||||
iax->data_version = sm.data_version;
|
||||
iax->ctime_sec = sm.ctime_sec;
|
||||
iax->ctime_nsec = sm.ctime_nsec;
|
||||
iax->crtime_sec = sm.crtime_sec;
|
||||
iax->crtime_nsec = sm.crtime_nsec;
|
||||
iax->size = sm.i_size;
|
||||
|
||||
if (sm.flags & SCOUTFS_IOC_SETATTR_MORE_OFFLINE)
|
||||
iax->x_flags |= SCOUTFS_IOC_IAX_F_SIZE_OFFLINE;
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret)
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
inode_lock(inode);
|
||||
ret = scoutfs_set_attr_x(inode, iax);
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lock);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
/* can only change size/dv on untouched regular files */
|
||||
if ((sm.i_size != 0 || sm.data_version != 0) &&
|
||||
((!S_ISREG(inode->i_mode) ||
|
||||
scoutfs_inode_data_version(inode) != 0))) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* create offline extents in potentially many transactions */
|
||||
if (sm.flags & SCOUTFS_IOC_SETATTR_MORE_OFFLINE) {
|
||||
ret = scoutfs_data_init_offline_extent(inode, sm.i_size, lock);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* setting only so we don't see 0 data seq with nonzero data_version */
|
||||
set_data_seq = sm.data_version != 0 ? true : false;
|
||||
ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, set_data_seq, false);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
if (sm.data_version)
|
||||
scoutfs_inode_set_data_version(inode, sm.data_version);
|
||||
if (sm.i_size)
|
||||
i_size_write(inode, sm.i_size);
|
||||
inode->i_ctime.tv_sec = sm.ctime_sec;
|
||||
inode->i_ctime.tv_nsec = sm.ctime_nsec;
|
||||
si->crtime.tv_sec = sm.crtime_sec;
|
||||
si->crtime.tv_nsec = sm.crtime_nsec;
|
||||
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
ret = 0;
|
||||
|
||||
scoutfs_release_trans(sb);
|
||||
unlock:
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
inode_unlock(inode);
|
||||
mnt_drop_write_file(file);
|
||||
out:
|
||||
|
||||
kfree(iax);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1035,124 +1028,32 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct xattr_total_entry {
|
||||
struct rb_node node;
|
||||
struct scoutfs_ioctl_xattr_total xt;
|
||||
u64 fs_seq;
|
||||
u64 fs_total;
|
||||
u64 fs_count;
|
||||
u64 fin_seq;
|
||||
u64 fin_total;
|
||||
s64 fin_count;
|
||||
u64 log_seq;
|
||||
u64 log_total;
|
||||
s64 log_count;
|
||||
struct read_xattr_total_iter_cb_args {
|
||||
struct scoutfs_ioctl_xattr_total *xt;
|
||||
unsigned int copied;
|
||||
unsigned int total;
|
||||
};
|
||||
|
||||
static int cmp_xt_entry_name(const struct xattr_total_entry *a,
|
||||
const struct xattr_total_entry *b)
|
||||
|
||||
{
|
||||
return scoutfs_cmp_u64s(a->xt.name[0], b->xt.name[0]) ?:
|
||||
scoutfs_cmp_u64s(a->xt.name[1], b->xt.name[1]) ?:
|
||||
scoutfs_cmp_u64s(a->xt.name[2], b->xt.name[2]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Record the contribution of the three classes of logged items we can
|
||||
* see: the item in the fs_root, items from finalized log btrees, and
|
||||
* items from active log btrees. Once we have the full set the caller
|
||||
* can decide which of the items contribute to the total it sends to the
|
||||
* user.
|
||||
* This is called under an RCU read lock so it can't copy to userspace.
|
||||
*/
|
||||
static int read_xattr_total_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
u64 seq, u8 flags, void *val, int val_len, int fic, void *arg)
|
||||
static int read_xattr_total_iter_cb(struct scoutfs_key *key, void *val, unsigned int val_len,
|
||||
void *cb_arg)
|
||||
{
|
||||
struct read_xattr_total_iter_cb_args *cba = cb_arg;
|
||||
struct scoutfs_xattr_totl_val *tval = val;
|
||||
struct xattr_total_entry *ent;
|
||||
struct xattr_total_entry rd;
|
||||
struct rb_root *root = arg;
|
||||
struct rb_node *parent;
|
||||
struct rb_node **node;
|
||||
int cmp;
|
||||
struct scoutfs_ioctl_xattr_total *xt = &cba->xt[cba->copied];
|
||||
|
||||
rd.xt.name[0] = le64_to_cpu(key->skxt_a);
|
||||
rd.xt.name[1] = le64_to_cpu(key->skxt_b);
|
||||
rd.xt.name[2] = le64_to_cpu(key->skxt_c);
|
||||
xt->name[0] = le64_to_cpu(key->skxt_a);
|
||||
xt->name[1] = le64_to_cpu(key->skxt_b);
|
||||
xt->name[2] = le64_to_cpu(key->skxt_c);
|
||||
xt->total = le64_to_cpu(tval->total);
|
||||
xt->count = le64_to_cpu(tval->count);
|
||||
|
||||
/* find entry matching name */
|
||||
node = &root->rb_node;
|
||||
parent = NULL;
|
||||
cmp = -1;
|
||||
while (*node) {
|
||||
parent = *node;
|
||||
ent = container_of(*node, struct xattr_total_entry, node);
|
||||
|
||||
/* sort merge items by key then newest to oldest */
|
||||
cmp = cmp_xt_entry_name(&rd, ent);
|
||||
if (cmp < 0)
|
||||
node = &(*node)->rb_left;
|
||||
else if (cmp > 0)
|
||||
node = &(*node)->rb_right;
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
/* allocate and insert new node if we need to */
|
||||
if (cmp != 0) {
|
||||
ent = kzalloc(sizeof(*ent), GFP_KERNEL);
|
||||
if (!ent)
|
||||
return -ENOMEM;
|
||||
|
||||
memcpy(&ent->xt.name, &rd.xt.name, sizeof(ent->xt.name));
|
||||
|
||||
rb_link_node(&ent->node, parent, node);
|
||||
rb_insert_color(&ent->node, root);
|
||||
}
|
||||
|
||||
if (fic & FIC_FS_ROOT) {
|
||||
ent->fs_seq = seq;
|
||||
ent->fs_total = le64_to_cpu(tval->total);
|
||||
ent->fs_count = le64_to_cpu(tval->count);
|
||||
} else if (fic & FIC_FINALIZED) {
|
||||
ent->fin_seq = seq;
|
||||
ent->fin_total += le64_to_cpu(tval->total);
|
||||
ent->fin_count += le64_to_cpu(tval->count);
|
||||
} else {
|
||||
ent->log_seq = seq;
|
||||
ent->log_total += le64_to_cpu(tval->total);
|
||||
ent->log_count += le64_to_cpu(tval->count);
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, totl_read_item);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* these are always _safe, node stores next */
|
||||
#define for_each_xt_ent(ent, node, root) \
|
||||
for (node = rb_first(root); \
|
||||
node && (ent = rb_entry(node, struct xattr_total_entry, node), \
|
||||
node = rb_next(node), 1); )
|
||||
|
||||
#define for_each_xt_ent_reverse(ent, node, root) \
|
||||
for (node = rb_last(root); \
|
||||
node && (ent = rb_entry(node, struct xattr_total_entry, node), \
|
||||
node = rb_prev(node), 1); )
|
||||
|
||||
static void free_xt_ent(struct rb_root *root, struct xattr_total_entry *ent)
|
||||
{
|
||||
rb_erase(&ent->node, root);
|
||||
kfree(ent);
|
||||
}
|
||||
|
||||
static void free_all_xt_ents(struct rb_root *root)
|
||||
{
|
||||
struct xattr_total_entry *ent;
|
||||
struct rb_node *node;
|
||||
|
||||
for_each_xt_ent(ent, node, root)
|
||||
free_xt_ent(root, ent);
|
||||
if (++cba->copied < cba->total)
|
||||
return -EAGAIN;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1162,30 +1063,6 @@ static void free_all_xt_ents(struct rb_root *root)
|
||||
* have been committed. It doesn't use locking to force commits and
|
||||
* block writers so it can be a little bit out of date with respect to
|
||||
* dirty xattrs in memory across the system.
|
||||
*
|
||||
* Our reader has to be careful because the log btree merging code can
|
||||
* write partial results to the fs_root. This means that a reader can
|
||||
* see both cases where new finalized logs should be applied to the old
|
||||
* fs items and where old finalized logs have already been applied to
|
||||
* the partially merged fs items. Currently active logged items are
|
||||
* always applied on top of all cases.
|
||||
*
|
||||
* These cases are differentiated with a combination of sequence numbers
|
||||
* in items, the count of contributing xattrs, and a flag
|
||||
* differentiating finalized and active logged items. This lets us
|
||||
* recognize all cases, including when finalized logs were merged and
|
||||
* deleted the fs item.
|
||||
*
|
||||
* We're allocating a tracking struct for each totl name we see while
|
||||
* traversing the item btrees. The forest reader is providing the items
|
||||
* it finds in leaf blocks that contain the search key. In the worst
|
||||
* case all of these blocks are full and none of the items overlap. At
|
||||
* most, figure order a thousand names per mount. But in practice many
|
||||
* of these factors fall away: leaf blocks aren't fill, leaf items
|
||||
* overlap, there aren't finalized log btrees, and not all mounts are
|
||||
* actively changing totals. We're much more likely to only read a
|
||||
* leaf block's worth of totals that have been long since merged into
|
||||
* the fs_root.
|
||||
*/
|
||||
static long scoutfs_ioc_read_xattr_totals(struct file *file, unsigned long arg)
|
||||
{
|
||||
@@ -1193,14 +1070,13 @@ static long scoutfs_ioc_read_xattr_totals(struct file *file, unsigned long arg)
|
||||
struct scoutfs_ioctl_read_xattr_totals __user *urxt = (void __user *)arg;
|
||||
struct scoutfs_ioctl_read_xattr_totals rxt;
|
||||
struct scoutfs_ioctl_xattr_total __user *uxt;
|
||||
struct xattr_total_entry *ent;
|
||||
struct read_xattr_total_iter_cb_args cba = {NULL, };
|
||||
struct scoutfs_key range_start;
|
||||
struct scoutfs_key range_end;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_key bloom_key;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
struct rb_root root = RB_ROOT;
|
||||
struct rb_node *node;
|
||||
int count = 0;
|
||||
unsigned int copied = 0;
|
||||
unsigned int total;
|
||||
unsigned int ready;
|
||||
int ret;
|
||||
|
||||
if (!(file->f_mode & FMODE_READ)) {
|
||||
@@ -1213,6 +1089,13 @@ static long scoutfs_ioc_read_xattr_totals(struct file *file, unsigned long arg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
cba.xt = (void *)__get_free_page(GFP_KERNEL);
|
||||
if (!cba.xt) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
cba.total = PAGE_SIZE / sizeof(struct scoutfs_ioctl_xattr_total);
|
||||
|
||||
if (copy_from_user(&rxt, urxt, sizeof(rxt))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
@@ -1225,101 +1108,40 @@ static long scoutfs_ioc_read_xattr_totals(struct file *file, unsigned long arg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_key_set_zeros(&bloom_key);
|
||||
bloom_key.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
scoutfs_xattr_init_totl_key(&start, rxt.pos_name);
|
||||
total = div_u64(min_t(u64, rxt.totals_bytes, INT_MAX),
|
||||
sizeof(struct scoutfs_ioctl_xattr_total));
|
||||
|
||||
while (rxt.totals_bytes >= sizeof(struct scoutfs_ioctl_xattr_total)) {
|
||||
scoutfs_totl_set_range(&range_start, &range_end);
|
||||
scoutfs_xattr_init_totl_key(&key, rxt.pos_name);
|
||||
|
||||
scoutfs_key_set_ones(&end);
|
||||
end.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
if (scoutfs_key_compare(&start, &end) > 0)
|
||||
while (copied < total) {
|
||||
cba.copied = 0;
|
||||
ret = scoutfs_wkic_iterate(sb, &key, &range_end, &range_start, &range_end,
|
||||
read_xattr_total_iter_cb, &cba);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (cba.copied == 0)
|
||||
break;
|
||||
|
||||
key = start;
|
||||
ret = scoutfs_forest_read_items(sb, &key, &bloom_key, &start, &end,
|
||||
read_xattr_total_item, &root);
|
||||
if (ret < 0) {
|
||||
if (ret == -ESTALE) {
|
||||
free_all_xt_ents(&root);
|
||||
continue;
|
||||
}
|
||||
ready = min(total - copied, cba.copied);
|
||||
|
||||
if (copy_to_user(&uxt[copied], cba.xt, ready * sizeof(cba.xt[0]))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (RB_EMPTY_ROOT(&root))
|
||||
break;
|
||||
|
||||
/* trim totals that fall outside of the consistent range */
|
||||
for_each_xt_ent(ent, node, &root) {
|
||||
scoutfs_xattr_init_totl_key(&key, ent->xt.name);
|
||||
if (scoutfs_key_compare(&key, &start) < 0) {
|
||||
free_xt_ent(&root, ent);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
for_each_xt_ent_reverse(ent, node, &root) {
|
||||
scoutfs_xattr_init_totl_key(&key, ent->xt.name);
|
||||
if (scoutfs_key_compare(&key, &end) > 0) {
|
||||
free_xt_ent(&root, ent);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* copy resulting unique non-zero totals to userspace */
|
||||
for_each_xt_ent(ent, node, &root) {
|
||||
if (rxt.totals_bytes < sizeof(ent->xt))
|
||||
break;
|
||||
|
||||
/* start with the fs item if we have it */
|
||||
if (ent->fs_seq != 0) {
|
||||
ent->xt.total = ent->fs_total;
|
||||
ent->xt.count = ent->fs_count;
|
||||
scoutfs_inc_counter(sb, totl_read_fs);
|
||||
}
|
||||
|
||||
/* apply finalized logs if they're newer or creating */
|
||||
if (((ent->fs_seq != 0) && (ent->fin_seq > ent->fs_seq)) ||
|
||||
((ent->fs_seq == 0) && (ent->fin_count > 0))) {
|
||||
ent->xt.total += ent->fin_total;
|
||||
ent->xt.count += ent->fin_count;
|
||||
scoutfs_inc_counter(sb, totl_read_finalized);
|
||||
}
|
||||
|
||||
/* always apply active logs which must be newer than fs and finalized */
|
||||
if (ent->log_seq > 0) {
|
||||
ent->xt.total += ent->log_total;
|
||||
ent->xt.count += ent->log_count;
|
||||
scoutfs_inc_counter(sb, totl_read_logged);
|
||||
}
|
||||
|
||||
if (ent->xt.total != 0 || ent->xt.count != 0) {
|
||||
if (copy_to_user(uxt, &ent->xt, sizeof(ent->xt))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
uxt++;
|
||||
rxt.totals_bytes -= sizeof(ent->xt);
|
||||
count++;
|
||||
scoutfs_inc_counter(sb, totl_read_copied);
|
||||
}
|
||||
|
||||
free_xt_ent(&root, ent);
|
||||
}
|
||||
|
||||
/* continue after the last possible key read */
|
||||
start = end;
|
||||
scoutfs_key_inc(&start);
|
||||
scoutfs_xattr_init_totl_key(&key, cba.xt[ready - 1].name);
|
||||
scoutfs_key_inc(&key);
|
||||
copied += ready;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
free_all_xt_ents(&root);
|
||||
if (cba.xt)
|
||||
free_page((long)cba.xt);
|
||||
|
||||
return ret ?: count;
|
||||
return ret ?: copied;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_get_allocated_inos(struct file *file, unsigned long arg)
|
||||
@@ -1504,6 +1326,265 @@ out:
|
||||
return nr ?: ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_get_attr_x(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_ioctl_inode_attr_x __user *uiax = (void __user *)arg;
|
||||
struct scoutfs_ioctl_inode_attr_x *iax = NULL;
|
||||
int ret;
|
||||
|
||||
iax = kmalloc(sizeof(struct scoutfs_ioctl_inode_attr_x), GFP_KERNEL);
|
||||
if (!iax) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = get_user(iax->x_mask, &uiax->x_mask) ?:
|
||||
get_user(iax->x_flags, &uiax->x_flags);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_get_attr_x(inode, iax);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* only copy results after dropping cluster locks (could fault) */
|
||||
if (ret > 0 && copy_to_user(uiax, iax, ret) != 0)
|
||||
ret = -EFAULT;
|
||||
else
|
||||
ret = 0;
|
||||
out:
|
||||
kfree(iax);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_set_attr_x(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct inode *inode = file_inode(file);
|
||||
struct scoutfs_ioctl_inode_attr_x __user *uiax = (void __user *)arg;
|
||||
struct scoutfs_ioctl_inode_attr_x *iax = NULL;
|
||||
int ret;
|
||||
|
||||
iax = kmalloc(sizeof(struct scoutfs_ioctl_inode_attr_x), GFP_KERNEL);
|
||||
if (!iax) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(iax, uiax, sizeof(struct scoutfs_ioctl_inode_attr_x))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = mnt_want_write_file(file);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_set_attr_x(inode, iax);
|
||||
|
||||
mnt_drop_write_file(file);
|
||||
out:
|
||||
kfree(iax);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_get_quota_rules(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_get_quota_rules __user *ugqr = (void __user *)arg;
|
||||
struct scoutfs_ioctl_get_quota_rules gqr;
|
||||
struct scoutfs_ioctl_quota_rule __user *uirules;
|
||||
struct scoutfs_ioctl_quota_rule *irules;
|
||||
struct page *page = NULL;
|
||||
int copied = 0;
|
||||
int nr;
|
||||
int ret;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&gqr, ugqr, sizeof(gqr)))
|
||||
return -EFAULT;
|
||||
|
||||
if (gqr.rules_nr == 0)
|
||||
return 0;
|
||||
|
||||
uirules = (void __user *)gqr.rules_ptr;
|
||||
/* limit rules copied per call */
|
||||
gqr.rules_nr = min_t(u64, gqr.rules_nr, INT_MAX);
|
||||
|
||||
page = alloc_page(GFP_KERNEL | __GFP_ZERO);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
irules = page_address(page);
|
||||
|
||||
while (copied < gqr.rules_nr) {
|
||||
nr = min_t(u64, gqr.rules_nr - copied,
|
||||
PAGE_SIZE / sizeof(struct scoutfs_ioctl_quota_rule));
|
||||
ret = scoutfs_quota_get_rules(sb, gqr.iterator, page_address(page), nr);
|
||||
if (ret <= 0)
|
||||
goto out;
|
||||
|
||||
if (copy_to_user(&uirules[copied], irules, ret * sizeof(irules[0]))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
copied += ret;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (page)
|
||||
__free_page(page);
|
||||
|
||||
if (ret == 0 && copy_to_user(ugqr->iterator, gqr.iterator, sizeof(gqr.iterator)))
|
||||
ret = -EFAULT;
|
||||
|
||||
return ret ?: copied;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_mod_quota_rule(struct file *file, unsigned long arg, bool is_add)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_quota_rule __user *uirule = (void __user *)arg;
|
||||
struct scoutfs_ioctl_quota_rule irule;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (copy_from_user(&irule, uirule, sizeof(irule)))
|
||||
return -EFAULT;
|
||||
|
||||
return scoutfs_quota_mod_rule(sb, is_add, &irule);
|
||||
}
|
||||
|
||||
struct read_index_buf {
|
||||
int nr;
|
||||
int size;
|
||||
struct scoutfs_ioctl_xattr_index_entry ents[0];
|
||||
};
|
||||
|
||||
#define READ_INDEX_BUF_MAX_ENTS \
|
||||
((PAGE_SIZE - sizeof(struct read_index_buf)) / \
|
||||
sizeof(struct scoutfs_ioctl_xattr_index_entry))
|
||||
|
||||
/*
|
||||
* This doesn't filter out duplicates, the caller filters them out to
|
||||
* catch duplicates between iteration calls.
|
||||
*/
|
||||
static int read_index_cb(struct scoutfs_key *key, void *val, unsigned int val_len, void *cb_arg)
|
||||
{
|
||||
struct read_index_buf *rib = cb_arg;
|
||||
struct scoutfs_ioctl_xattr_index_entry *ent = &rib->ents[rib->nr];
|
||||
u64 xid;
|
||||
|
||||
if (val_len != 0)
|
||||
return -EIO;
|
||||
|
||||
/* discard the xid, they're not exposed to ioctl callers */
|
||||
scoutfs_xattr_get_indx_key(key, &ent->major, &ent->minor, &ent->ino, &xid);
|
||||
|
||||
if (++rib->nr == rib->size)
|
||||
return rib->nr;
|
||||
|
||||
return -EAGAIN;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_read_xattr_index(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_read_xattr_index __user *urxi = (void __user *)arg;
|
||||
struct scoutfs_ioctl_xattr_index_entry __user *uents;
|
||||
struct scoutfs_ioctl_xattr_index_entry *ent;
|
||||
struct scoutfs_ioctl_xattr_index_entry prev;
|
||||
struct scoutfs_ioctl_read_xattr_index rxi;
|
||||
struct read_index_buf *rib;
|
||||
struct page *page = NULL;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
int copied = 0;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (!capable(CAP_SYS_ADMIN)) {
|
||||
ret = -EPERM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (copy_from_user(&rxi, urxi, sizeof(rxi))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
uents = (void __user *)rxi.entries_ptr;
|
||||
rxi.entries_nr = min_t(u64, rxi.entries_nr, INT_MAX);
|
||||
|
||||
page = alloc_page(GFP_KERNEL);
|
||||
if (!page) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
rib = page_address(page);
|
||||
|
||||
scoutfs_xattr_init_indx_key(&first, rxi.first.major, rxi.first.minor, rxi.first.ino, 0);
|
||||
scoutfs_xattr_init_indx_key(&last, rxi.last.major, rxi.last.minor, rxi.last.ino, U64_MAX);
|
||||
scoutfs_xattr_indx_get_range(&start, &end);
|
||||
|
||||
if (scoutfs_key_compare(&first, &last) > 0) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* 0 ino doesn't exist, can't ever match entry to return */
|
||||
memset(&prev, 0, sizeof(prev));
|
||||
|
||||
while (copied < rxi.entries_nr) {
|
||||
rib->nr = 0;
|
||||
rib->size = min_t(u64, rxi.entries_nr - copied, READ_INDEX_BUF_MAX_ENTS);
|
||||
ret = scoutfs_wkic_iterate(sb, &first, &last, &start, &end,
|
||||
read_index_cb, rib);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
if (rib->nr == 0)
|
||||
break;
|
||||
|
||||
/*
|
||||
* Copy entries to userspace, skipping duplicate entries
|
||||
* that can result from multiple xattrs indexing an
|
||||
* inode at the same position and which can span
|
||||
* multiple cache iterations. (Comparing in order of
|
||||
* most likely to change to fail fast.)
|
||||
*/
|
||||
for (i = 0, ent = rib->ents; i < rib->nr; i++, ent++) {
|
||||
if (ent->ino == prev.ino && ent->minor == prev.minor &&
|
||||
ent->major == prev.major)
|
||||
continue;
|
||||
|
||||
if (copy_to_user(&uents[copied], ent, sizeof(*ent))) {
|
||||
ret = -EFAULT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
prev = *ent;
|
||||
copied++;
|
||||
}
|
||||
|
||||
scoutfs_xattr_init_indx_key(&first, prev.major, prev.minor, prev.ino, U64_MAX);
|
||||
scoutfs_key_inc(&first);
|
||||
}
|
||||
|
||||
ret = copied;
|
||||
out:
|
||||
if (page)
|
||||
__free_page(page);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
{
|
||||
switch (cmd) {
|
||||
@@ -1541,6 +1622,18 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
return scoutfs_ioc_get_allocated_inos(file, arg);
|
||||
case SCOUTFS_IOC_GET_REFERRING_ENTRIES:
|
||||
return scoutfs_ioc_get_referring_entries(file, arg);
|
||||
case SCOUTFS_IOC_GET_ATTR_X:
|
||||
return scoutfs_ioc_get_attr_x(file, arg);
|
||||
case SCOUTFS_IOC_SET_ATTR_X:
|
||||
return scoutfs_ioc_set_attr_x(file, arg);
|
||||
case SCOUTFS_IOC_GET_QUOTA_RULES:
|
||||
return scoutfs_ioc_get_quota_rules(file, arg);
|
||||
case SCOUTFS_IOC_ADD_QUOTA_RULE:
|
||||
return scoutfs_ioc_mod_quota_rule(file, arg, true);
|
||||
case SCOUTFS_IOC_DEL_QUOTA_RULE:
|
||||
return scoutfs_ioc_mod_quota_rule(file, arg, false);
|
||||
case SCOUTFS_IOC_READ_XATTR_INDEX:
|
||||
return scoutfs_ioc_read_xattr_index(file, arg);
|
||||
}
|
||||
|
||||
return -ENOTTY;
|
||||
|
||||
170
kmod/src/ioctl.h
170
kmod/src/ioctl.h
@@ -673,4 +673,174 @@ struct scoutfs_ioctl_dirent {
|
||||
#define SCOUTFS_IOC_GET_REFERRING_ENTRIES \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 17, struct scoutfs_ioctl_get_referring_entries)
|
||||
|
||||
struct scoutfs_ioctl_inode_attr_x {
|
||||
__u64 x_mask;
|
||||
__u64 x_flags;
|
||||
__u64 meta_seq;
|
||||
__u64 data_seq;
|
||||
__u64 data_version;
|
||||
__u64 online_blocks;
|
||||
__u64 offline_blocks;
|
||||
__u64 ctime_sec;
|
||||
__u32 ctime_nsec;
|
||||
__u32 crtime_nsec;
|
||||
__u64 crtime_sec;
|
||||
__u64 size;
|
||||
__u64 bits;
|
||||
__u64 project_id;
|
||||
};
|
||||
|
||||
/*
|
||||
* Behavioral flags set in the x_flags field. These flags don't
|
||||
* necessarily correspond to specific attributes, but instead change the
|
||||
* behaviour of a _get_ or _set_ operation.
|
||||
*
|
||||
* @SCOUTFS_IOC_IAX_F_SIZE_OFFLINE: When setting i_size, also create
|
||||
* extents which are marked offline for the region of the file from
|
||||
* offset 0 to the new set size. This can only be set when setting the
|
||||
* size and has no effect if setting the size fails.
|
||||
*/
|
||||
#define SCOUTFS_IOC_IAX_F_SIZE_OFFLINE (1ULL << 0)
|
||||
#define SCOUTFS_IOC_IAX_F__UNKNOWN (U64_MAX << 1)
|
||||
|
||||
/*
|
||||
* Single-bit values stored in the @bits field. These indicate whether
|
||||
* the bit is set, or not. The main _IAX_ bits set in the mask indicate
|
||||
* whether this value bit is populated by _get or stored by _set.
|
||||
*/
|
||||
#define SCOUTFS_IOC_IAX_B_RETENTION (1ULL << 0)
|
||||
|
||||
/*
|
||||
* x_mask bits which indicate which attributes of the inode to populate
|
||||
* on return for _get or to set on the inode for _set. Each mask bit
|
||||
* corresponds to the matching named field in the attr_x struct passed
|
||||
* to the _get_ and _set_ calls.
|
||||
*
|
||||
* Each field can have different permissions or other attribute
|
||||
* requirements which can cause calls to fail. If _set_ fails then no
|
||||
* other attribute changes will have been made by the same call.
|
||||
*
|
||||
* @SCOUTFS_IOC_IAX_RETENTION: Mark a file for retention. When marked,
|
||||
* no modification can be made to the file other than changing extended
|
||||
* attributes outside the "user." prefix and clearing the retention
|
||||
* mark. This can only be set on regular files and requires root (the
|
||||
* CAP_SYS_ADMIN capability). Other attributes can be set with a
|
||||
* set_attr_x call on a retention inode as long as that call also
|
||||
* successfully clears the retention mark.
|
||||
*/
|
||||
#define SCOUTFS_IOC_IAX_META_SEQ (1ULL << 0)
|
||||
#define SCOUTFS_IOC_IAX_DATA_SEQ (1ULL << 1)
|
||||
#define SCOUTFS_IOC_IAX_DATA_VERSION (1ULL << 2)
|
||||
#define SCOUTFS_IOC_IAX_ONLINE_BLOCKS (1ULL << 3)
|
||||
#define SCOUTFS_IOC_IAX_OFFLINE_BLOCKS (1ULL << 4)
|
||||
#define SCOUTFS_IOC_IAX_CTIME (1ULL << 5)
|
||||
#define SCOUTFS_IOC_IAX_CRTIME (1ULL << 6)
|
||||
#define SCOUTFS_IOC_IAX_SIZE (1ULL << 7)
|
||||
#define SCOUTFS_IOC_IAX_RETENTION (1ULL << 8)
|
||||
#define SCOUTFS_IOC_IAX_PROJECT_ID (1ULL << 9)
|
||||
|
||||
/* single bit attributes that are packed in the bits field as _B_ */
|
||||
#define SCOUTFS_IOC_IAX__BITS (SCOUTFS_IOC_IAX_RETENTION)
|
||||
/* inverse of all the bits we understand */
|
||||
#define SCOUTFS_IOC_IAX__UNKNOWN (U64_MAX << 10)
|
||||
|
||||
#define SCOUTFS_IOC_GET_ATTR_X \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 18, struct scoutfs_ioctl_inode_attr_x)
|
||||
|
||||
#define SCOUTFS_IOC_SET_ATTR_X \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 19, struct scoutfs_ioctl_inode_attr_x)
|
||||
|
||||
/*
|
||||
* (These fields are documented in the order that they're displayed by
|
||||
* the scoutfs cli utility which matches the sort order of the rules.)
|
||||
*
|
||||
* @prio: The priority of the rule. Rules are sorted by their fields
|
||||
* with prio at the highest magnitude. When multiple rules match the
|
||||
* rule with the highest sort order is enforced. The priority field
|
||||
* lets rules override the default field sort order.
|
||||
*
|
||||
* @name_val[3]: The three 64bit values that make up the name of the
|
||||
* totl xattr whose total will be checked against the rule's limit to
|
||||
* see if the quota rule has been exceeded. The behavior of the values
|
||||
* can be changed by their corresponding name_source and name_flags.
|
||||
*
|
||||
* @name_source[3]: The SQ_NS_ enums that control where the value comes
|
||||
* from. _LITERAL uses the value from name_val. Inode attribute
|
||||
* sources (_PROJ, _UID, _GID) are taken from the inode of the operation
|
||||
* that is being checked against the rule.
|
||||
*
|
||||
* @name_flags[3]: The SQ_NF_ enums that alter the name values. _SELECT
|
||||
* makes the rule only match if the inode attribute of the operation
|
||||
* matches the attribute value stored in name_val. This lets rules
|
||||
* match a specific value of an attribute rather than mapping all
|
||||
* attribute values of to totl names.
|
||||
*
|
||||
* @op: The SQ_OP_ enums which specify the operation that can't exceed
|
||||
* the rule's limit. _INODE checks inode creation and the inode
|
||||
* attributes are taken from the inode that would be created. _DATA
|
||||
* checks file data block allocation and the inode fields come from the
|
||||
* inode that is allocating the blocks.
|
||||
*
|
||||
* @limit: The 64bit value that is checked against the totl value
|
||||
* described by the rule. If the totl value is greater than or equal to
|
||||
* this value of the matching rule then the operation will return
|
||||
* -EDQUOT.
|
||||
*
|
||||
* @rule_flags: SQ_RF_TOTL_COUNT indicates that the rule's limit should
|
||||
* be checked against the number of xattrs contributing to a totl value
|
||||
* instead of the sum of the xattrs.
|
||||
*/
|
||||
struct scoutfs_ioctl_quota_rule {
|
||||
__u64 name_val[3];
|
||||
__u64 limit;
|
||||
__u8 prio;
|
||||
__u8 op;
|
||||
__u8 rule_flags;
|
||||
__u8 name_source[3];
|
||||
__u8 name_flags[3];
|
||||
__u8 _pad[7];
|
||||
};
|
||||
|
||||
struct scoutfs_ioctl_get_quota_rules {
|
||||
__u64 iterator[2];
|
||||
__u64 rules_ptr;
|
||||
__u64 rules_nr;
|
||||
};
|
||||
|
||||
/*
|
||||
* Rules are uniquely identified by their non-padded fields. Addition will fail
|
||||
* with -EEXIST if the specified rule already exists and deletion must find a rule
|
||||
* with all matching fields to delete.
|
||||
*/
|
||||
#define SCOUTFS_IOC_GET_QUOTA_RULES \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 20, struct scoutfs_ioctl_get_quota_rules)
|
||||
#define SCOUTFS_IOC_ADD_QUOTA_RULE \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 21, struct scoutfs_ioctl_quota_rule)
|
||||
#define SCOUTFS_IOC_DEL_QUOTA_RULE \
|
||||
_IOW(SCOUTFS_IOCTL_MAGIC, 22, struct scoutfs_ioctl_quota_rule)
|
||||
|
||||
/*
|
||||
* Inodes can be indexed in a global key space at a position determined
|
||||
* by a .indx. tagged xattr. The xattr name specifies the two index
|
||||
* position values, with major having the more significant comparison
|
||||
* order.
|
||||
*/
|
||||
struct scoutfs_ioctl_xattr_index_entry {
|
||||
__u64 minor;
|
||||
__u64 ino;
|
||||
__u8 major;
|
||||
__u8 _pad[7];
|
||||
};
|
||||
|
||||
struct scoutfs_ioctl_read_xattr_index {
|
||||
__u64 flags;
|
||||
struct scoutfs_ioctl_xattr_index_entry first;
|
||||
struct scoutfs_ioctl_xattr_index_entry last;
|
||||
__u64 entries_ptr;
|
||||
__u64 entries_nr;
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_READ_XATTR_INDEX \
|
||||
_IOR(SCOUTFS_IOCTL_MAGIC, 23, struct scoutfs_ioctl_read_xattr_index)
|
||||
|
||||
#endif
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "item.h"
|
||||
#include "forest.h"
|
||||
#include "block.h"
|
||||
#include "msg.h"
|
||||
#include "trans.h"
|
||||
#include "counters.h"
|
||||
#include "scoutfs_trace.h"
|
||||
@@ -1670,13 +1671,24 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lock_safe(struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
static int lock_safe(struct super_block *sb, struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
int mode)
|
||||
{
|
||||
if (WARN_ON_ONCE(!scoutfs_lock_protected(lock, key, mode)))
|
||||
bool prot = scoutfs_lock_protected(lock, key, mode);
|
||||
|
||||
if (!prot) {
|
||||
static bool once = false;
|
||||
if (!once) {
|
||||
scoutfs_err(sb, "lock (start "SK_FMT" end "SK_FMT" mode 0x%x) does not protect operation (key "SK_FMT" mode 0x%x)",
|
||||
SK_ARG(&lock->start), SK_ARG(&lock->end), lock->mode,
|
||||
SK_ARG(key), mode);
|
||||
dump_stack();
|
||||
once = true;
|
||||
}
|
||||
return -EINVAL;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int optional_lock_mode_match(struct scoutfs_lock *lock, int mode)
|
||||
@@ -1708,8 +1720,8 @@ static int copy_val(void *dst, int dst_len, void *src, int src_len)
|
||||
* The amount of bytes copied is returned which can be 0 or truncated if
|
||||
* the caller's buffer isn't big enough.
|
||||
*/
|
||||
int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
static int item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, int len_limit, struct scoutfs_lock *lock)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct cached_item *item;
|
||||
@@ -1718,7 +1730,7 @@ int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_lookup);
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_READ)))
|
||||
if ((ret = lock_safe(sb, lock, key, SCOUTFS_LOCK_READ)))
|
||||
goto out;
|
||||
|
||||
ret = get_cached_page(sb, cinf, lock, key, false, false, 0, &pg);
|
||||
@@ -1729,6 +1741,8 @@ int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
item = item_rbtree_walk(&pg->item_root, key, NULL, NULL, NULL);
|
||||
if (!item || item->deletion)
|
||||
ret = -ENOENT;
|
||||
else if (len_limit > 0 && item->val_len > len_limit)
|
||||
ret = -EIO;
|
||||
else
|
||||
ret = copy_val(val, val_len, item->val, item->val_len);
|
||||
|
||||
@@ -1737,13 +1751,38 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
return item_lookup(sb, key, val, val_len, 0, lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy an item's value into the caller's buffer. If the item's value
|
||||
* is larger than the caller's buffer then -EIO is returned. If the
|
||||
* item is smaller then the bytes from the end of the copied value to
|
||||
* the end of the buffer are zeroed. The number of value bytes copied
|
||||
* is returned, and 0 can be returned for an item with no value.
|
||||
*/
|
||||
int scoutfs_item_lookup_smaller_zero(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = item_lookup(sb, key, val, val_len, val_len, lock);
|
||||
if (ret >= 0 && ret < val_len)
|
||||
memset(val + ret, 0, val_len - ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_item_lookup_exact(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_item_lookup(sb, key, val, val_len, lock);
|
||||
ret = item_lookup(sb, key, val, val_len, 0, lock);
|
||||
if (ret == val_len)
|
||||
ret = 0;
|
||||
else if (ret >= 0)
|
||||
@@ -1793,7 +1832,7 @@ int scoutfs_item_next(struct super_block *sb, struct scoutfs_key *key,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_READ)))
|
||||
if ((ret = lock_safe(sb, lock, key, SCOUTFS_LOCK_READ)))
|
||||
goto out;
|
||||
|
||||
pos = *key;
|
||||
@@ -1874,7 +1913,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_dirty);
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_WRITE)))
|
||||
if ((ret = lock_safe(sb, lock, key, SCOUTFS_LOCK_WRITE)))
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_forest_set_bloom_bits(sb, lock);
|
||||
@@ -1920,7 +1959,7 @@ static int item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_create);
|
||||
|
||||
if ((ret = lock_safe(lock, key, mode)) ||
|
||||
if ((ret = lock_safe(sb, lock, key, mode)) ||
|
||||
(ret = optional_lock_mode_match(primary, SCOUTFS_LOCK_WRITE)))
|
||||
goto out;
|
||||
|
||||
@@ -1963,7 +2002,7 @@ int scoutfs_item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
return item_create(sb, key, val, val_len, lock, NULL,
|
||||
SCOUTFS_LOCK_READ, false);
|
||||
SCOUTFS_LOCK_WRITE, false);
|
||||
}
|
||||
|
||||
int scoutfs_item_create_force(struct super_block *sb, struct scoutfs_key *key,
|
||||
@@ -1994,7 +2033,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_update);
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_WRITE)))
|
||||
if ((ret = lock_safe(sb, lock, key, SCOUTFS_LOCK_WRITE)))
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_forest_set_bloom_bits(sb, lock);
|
||||
@@ -2062,7 +2101,7 @@ int scoutfs_item_delta(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_delta);
|
||||
|
||||
if ((ret = lock_safe(lock, key, SCOUTFS_LOCK_WRITE_ONLY)))
|
||||
if ((ret = lock_safe(sb, lock, key, SCOUTFS_LOCK_WRITE_ONLY)))
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_forest_set_bloom_bits(sb, lock);
|
||||
@@ -2135,7 +2174,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_delete);
|
||||
|
||||
if ((ret = lock_safe(lock, key, mode)) ||
|
||||
if ((ret = lock_safe(sb, lock, key, mode)) ||
|
||||
(ret = optional_lock_mode_match(primary, SCOUTFS_LOCK_WRITE)))
|
||||
goto out;
|
||||
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
|
||||
int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_lookup_smaller_zero(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_lookup_exact(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
@@ -36,6 +36,8 @@
|
||||
#include "item.h"
|
||||
#include "omap.h"
|
||||
#include "util.h"
|
||||
#include "totl.h"
|
||||
#include "quota.h"
|
||||
|
||||
/*
|
||||
* scoutfs uses a lock service to manage item cache consistency between
|
||||
@@ -185,6 +187,9 @@ static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (lock->start.sk_zone == SCOUTFS_QUOTA_ZONE && !lock_mode_can_read(mode))
|
||||
scoutfs_quota_invalidate(sb);
|
||||
|
||||
/* have to invalidate if we're not in the only usable case */
|
||||
if (!(prev == SCOUTFS_LOCK_WRITE && mode == SCOUTFS_LOCK_READ)) {
|
||||
retry:
|
||||
@@ -1244,10 +1249,29 @@ int scoutfs_lock_xattr_totl(struct super_block *sb, enum scoutfs_lock_mode mode,
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
|
||||
scoutfs_key_set_zeros(&start);
|
||||
start.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
scoutfs_key_set_ones(&end);
|
||||
end.sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
scoutfs_totl_set_range(&start, &end);
|
||||
|
||||
return lock_key_range(sb, mode, flags, &start, &end, lock);
|
||||
}
|
||||
|
||||
int scoutfs_lock_xattr_indx(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
|
||||
scoutfs_xattr_indx_get_range(&start, &end);
|
||||
|
||||
return lock_key_range(sb, mode, flags, &start, &end, lock);
|
||||
}
|
||||
|
||||
int scoutfs_lock_quota(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
|
||||
scoutfs_quota_get_lock_range(&start, &end);
|
||||
|
||||
return lock_key_range(sb, mode, flags, &start, &end, lock);
|
||||
}
|
||||
|
||||
@@ -86,6 +86,10 @@ int scoutfs_lock_orphan(struct super_block *sb, enum scoutfs_lock_mode mode, int
|
||||
u64 ino, struct scoutfs_lock **lock);
|
||||
int scoutfs_lock_xattr_totl(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock);
|
||||
int scoutfs_lock_xattr_indx(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock);
|
||||
int scoutfs_lock_quota(struct super_block *sb, enum scoutfs_lock_mode mode, int flags,
|
||||
struct scoutfs_lock **lock);
|
||||
void scoutfs_unlock(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
enum scoutfs_lock_mode mode);
|
||||
|
||||
|
||||
1266
kmod/src/quota.c
Normal file
1266
kmod/src/quota.c
Normal file
File diff suppressed because it is too large
Load Diff
48
kmod/src/quota.h
Normal file
48
kmod/src/quota.h
Normal file
@@ -0,0 +1,48 @@
|
||||
#ifndef _SCOUTFS_QUOTA_H_
|
||||
#define _SCOUTFS_QUOTA_H_
|
||||
|
||||
#include "ioctl.h"
|
||||
|
||||
/*
|
||||
* Each rule's name can be in the ruleset's rbtree associated with the
|
||||
* source attr that it selects. This lets checks only test rules that
|
||||
* the inputs could match. The 'i' field indicates which name is in the
|
||||
* tree so we can find the containing rule.
|
||||
*
|
||||
* This is mostly private to quota.c but we expose it for tracing.
|
||||
*/
|
||||
struct squota_rule {
|
||||
u64 limit;
|
||||
u8 prio;
|
||||
u8 op;
|
||||
u8 rule_flags;
|
||||
struct squota_rule_name {
|
||||
struct rb_node node;
|
||||
u64 val;
|
||||
u8 source;
|
||||
u8 flags;
|
||||
u8 i;
|
||||
} names[3];
|
||||
};
|
||||
|
||||
/* private to quota.c, only here for tracing */
|
||||
struct squota_input {
|
||||
u64 attrs[SQ_NS__NR_SELECT];
|
||||
u8 op;
|
||||
};
|
||||
|
||||
int scoutfs_quota_check_inode(struct super_block *sb, struct inode *dir);
|
||||
int scoutfs_quota_check_data(struct super_block *sb, struct inode *inode);
|
||||
|
||||
int scoutfs_quota_get_rules(struct super_block *sb, u64 *iterator,
|
||||
struct scoutfs_ioctl_quota_rule *irules, int nr);
|
||||
int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
|
||||
struct scoutfs_ioctl_quota_rule *irule);
|
||||
|
||||
void scoutfs_quota_get_lock_range(struct scoutfs_key *start, struct scoutfs_key *end);
|
||||
void scoutfs_quota_invalidate(struct super_block *sb);
|
||||
|
||||
int scoutfs_quota_setup(struct super_block *sb);
|
||||
void scoutfs_quota_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
@@ -37,6 +37,10 @@
|
||||
#include "net.h"
|
||||
#include "data.h"
|
||||
#include "ext.h"
|
||||
#include "quota.h"
|
||||
|
||||
#include "trace/quota.h"
|
||||
#include "trace/wkic.h"
|
||||
|
||||
struct lock_info;
|
||||
|
||||
@@ -2395,6 +2399,44 @@ TRACE_EVENT(scoutfs_block_dirty_ref,
|
||||
__entry->block_blkno, __entry->block_seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_block_stale,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_block_ref *ref,
|
||||
struct scoutfs_block_header *hdr, u32 magic, u32 crc),
|
||||
|
||||
TP_ARGS(sb, ref, hdr, magic, crc),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, ref_blkno)
|
||||
__field(__u64, ref_seq)
|
||||
__field(__u32, hdr_crc)
|
||||
__field(__u32, hdr_magic)
|
||||
__field(__u64, hdr_fsid)
|
||||
__field(__u64, hdr_seq)
|
||||
__field(__u64, hdr_blkno)
|
||||
__field(__u32, magic)
|
||||
__field(__u32, crc)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->ref_blkno = le64_to_cpu(ref->blkno);
|
||||
__entry->ref_seq = le64_to_cpu(ref->seq);
|
||||
__entry->hdr_crc = le32_to_cpu(hdr->crc);
|
||||
__entry->hdr_magic = le32_to_cpu(hdr->magic);
|
||||
__entry->hdr_fsid = le64_to_cpu(hdr->fsid);
|
||||
__entry->hdr_seq = le64_to_cpu(hdr->seq);
|
||||
__entry->hdr_blkno = le64_to_cpu(hdr->blkno);
|
||||
__entry->magic = magic;
|
||||
__entry->crc = crc;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" ref_blkno %llu ref_seq %016llx hdr_crc %08x hdr_magic %08x hdr_fsid %016llx hdr_seq %016llx hdr_blkno %llu magic %08x crc %08x",
|
||||
SCSB_TRACE_ARGS, __entry->ref_blkno, __entry->ref_seq, __entry->hdr_crc,
|
||||
__entry->hdr_magic, __entry->hdr_fsid, __entry->hdr_seq, __entry->hdr_blkno,
|
||||
__entry->magic, __entry->crc)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_block_class,
|
||||
TP_PROTO(struct super_block *sb, void *bp, u64 blkno, int refcount, int io_count,
|
||||
unsigned long bits, __u64 accessed),
|
||||
|
||||
@@ -49,6 +49,8 @@
|
||||
#include "volopt.h"
|
||||
#include "fence.h"
|
||||
#include "xattr.h"
|
||||
#include "wkic.h"
|
||||
#include "quota.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
static struct dentry *scoutfs_debugfs_root;
|
||||
@@ -194,7 +196,9 @@ static void scoutfs_put_super(struct super_block *sb)
|
||||
scoutfs_shutdown_trans(sb);
|
||||
scoutfs_volopt_destroy(sb);
|
||||
scoutfs_client_destroy(sb);
|
||||
scoutfs_quota_destroy(sb);
|
||||
scoutfs_inode_destroy(sb);
|
||||
scoutfs_wkic_destroy(sb);
|
||||
scoutfs_item_destroy(sb);
|
||||
scoutfs_forest_destroy(sb);
|
||||
scoutfs_data_destroy(sb);
|
||||
@@ -544,7 +548,9 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
scoutfs_block_setup(sb) ?:
|
||||
scoutfs_forest_setup(sb) ?:
|
||||
scoutfs_item_setup(sb) ?:
|
||||
scoutfs_wkic_setup(sb) ?:
|
||||
scoutfs_inode_setup(sb) ?:
|
||||
scoutfs_quota_setup(sb) ?:
|
||||
scoutfs_data_setup(sb) ?:
|
||||
scoutfs_setup_trans(sb) ?:
|
||||
scoutfs_omap_setup(sb) ?:
|
||||
|
||||
@@ -30,6 +30,8 @@ struct recov_info;
|
||||
struct omap_info;
|
||||
struct volopt_info;
|
||||
struct fence_info;
|
||||
struct wkic_info;
|
||||
struct squota_info;
|
||||
|
||||
struct scoutfs_sb_info {
|
||||
struct super_block *sb;
|
||||
@@ -55,6 +57,8 @@ struct scoutfs_sb_info {
|
||||
struct omap_info *omap_info;
|
||||
struct volopt_info *volopt_info;
|
||||
struct item_cache_info *item_cache_info;
|
||||
struct wkic_info *wkic_info;
|
||||
struct squota_info *squota_info;
|
||||
struct fence_info *fence_info;
|
||||
|
||||
/* tracks tasks waiting for data extents */
|
||||
@@ -156,4 +160,17 @@ int scoutfs_write_super(struct super_block *sb,
|
||||
/* to keep this out of the ioctl.h public interface definition */
|
||||
long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
|
||||
|
||||
/*
|
||||
* Returns 0 when supported, non-zero -errno when unsupported.
|
||||
*/
|
||||
static inline int scoutfs_fmt_vers_unsupported(struct super_block *sb, u64 vers)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
if (sbi && (sbi->fmt_vers < vers))
|
||||
return -EOPNOTSUPP;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
90
kmod/src/totl.c
Normal file
90
kmod/src/totl.c
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "forest.h"
|
||||
#include "totl.h"
|
||||
|
||||
void scoutfs_totl_set_range(struct scoutfs_key *start, struct scoutfs_key *end)
|
||||
{
|
||||
scoutfs_key_set_zeros(start);
|
||||
start->sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
scoutfs_key_set_ones(end);
|
||||
end->sk_zone = SCOUTFS_XATTR_TOTL_ZONE;
|
||||
}
|
||||
|
||||
void scoutfs_totl_merge_init(struct scoutfs_totl_merging *merg)
|
||||
{
|
||||
memset(merg, 0, sizeof(struct scoutfs_totl_merging));
|
||||
}
|
||||
|
||||
void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
|
||||
u64 seq, u8 flags, void *val, int val_len, int fic)
|
||||
{
|
||||
struct scoutfs_xattr_totl_val *tval = val;
|
||||
|
||||
if (fic & FIC_FS_ROOT) {
|
||||
merg->fs_seq = seq;
|
||||
merg->fs_total = le64_to_cpu(tval->total);
|
||||
merg->fs_count = le64_to_cpu(tval->count);
|
||||
} else if (fic & FIC_FINALIZED) {
|
||||
merg->fin_seq = seq;
|
||||
merg->fin_total += le64_to_cpu(tval->total);
|
||||
merg->fin_count += le64_to_cpu(tval->count);
|
||||
} else {
|
||||
merg->log_seq = seq;
|
||||
merg->log_total += le64_to_cpu(tval->total);
|
||||
merg->log_count += le64_to_cpu(tval->count);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* .totl. item merging has to be careful because the log btree merging
|
||||
* code can write partial results to the fs_root. This means that a
|
||||
* reader can see both cases where new finalized logs should be applied
|
||||
* to the old fs items and where old finalized logs have already been
|
||||
* applied to the partially merged fs items. Currently active logged
|
||||
* items are always applied on top of all cases.
|
||||
*
|
||||
* These cases are differentiated with a combination of sequence numbers
|
||||
* in items, the count of contributing xattrs, and a flag
|
||||
* differentiating finalized and active logged items. This lets us
|
||||
* recognize all cases, including when finalized logs were merged and
|
||||
* deleted the fs item.
|
||||
*/
|
||||
void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total, __u64 *count)
|
||||
{
|
||||
*total = 0;
|
||||
*count = 0;
|
||||
|
||||
/* start with the fs item if we have it */
|
||||
if (merg->fs_seq != 0) {
|
||||
*total = merg->fs_total;
|
||||
*count = merg->fs_count;
|
||||
}
|
||||
|
||||
/* apply finalized logs if they're newer or creating */
|
||||
if (((merg->fs_seq != 0) && (merg->fin_seq > merg->fs_seq)) ||
|
||||
((merg->fs_seq == 0) && (merg->fin_count > 0))) {
|
||||
*total += merg->fin_total;
|
||||
*count += merg->fin_count;
|
||||
}
|
||||
|
||||
/* always apply active logs which must be newer than fs and finalized */
|
||||
if (merg->log_seq > 0) {
|
||||
*total += merg->log_total;
|
||||
*count += merg->log_count;
|
||||
}
|
||||
}
|
||||
24
kmod/src/totl.h
Normal file
24
kmod/src/totl.h
Normal file
@@ -0,0 +1,24 @@
|
||||
#ifndef _SCOUTFS_TOTL_H_
|
||||
#define _SCOUTFS_TOTL_H_
|
||||
|
||||
#include "key.h"
|
||||
|
||||
struct scoutfs_totl_merging {
|
||||
u64 fs_seq;
|
||||
u64 fs_total;
|
||||
u64 fs_count;
|
||||
u64 fin_seq;
|
||||
u64 fin_total;
|
||||
s64 fin_count;
|
||||
u64 log_seq;
|
||||
u64 log_total;
|
||||
s64 log_count;
|
||||
};
|
||||
|
||||
void scoutfs_totl_set_range(struct scoutfs_key *start, struct scoutfs_key *end);
|
||||
void scoutfs_totl_merge_init(struct scoutfs_totl_merging *merg);
|
||||
void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
|
||||
u64 seq, u8 flags, void *val, int val_len, int fic);
|
||||
void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total, __u64 *count);
|
||||
|
||||
#endif
|
||||
143
kmod/src/trace/quota.h
Normal file
143
kmod/src/trace/quota.h
Normal file
@@ -0,0 +1,143 @@
|
||||
|
||||
/*
|
||||
* Tracing squota_input
|
||||
*/
|
||||
#define SQI_FMT "[%u %llu %llu %llu]"
|
||||
|
||||
#define SQI_ARGS(i) \
|
||||
(i)->op, (i)->attrs[0], (i)->attrs[1], (i)->attrs[2]
|
||||
|
||||
#define SQI_FIELDS(pref) \
|
||||
__array(__u64, pref##_attrs, SQ_NS__NR_SELECT) \
|
||||
__field(__u8, pref##_op)
|
||||
|
||||
#define SQI_ASSIGN(pref, i) \
|
||||
__entry->pref##_attrs[0] = (i)->attrs[0]; \
|
||||
__entry->pref##_attrs[1] = (i)->attrs[1]; \
|
||||
__entry->pref##_attrs[2] = (i)->attrs[2]; \
|
||||
__entry->pref##_op = (i)->op;
|
||||
|
||||
#define SQI_ENTRY_ARGS(pref) \
|
||||
__entry->pref##_op, __entry->pref##_attrs[0], \
|
||||
__entry->pref##_attrs[1], __entry->pref##_attrs[2]
|
||||
|
||||
/*
|
||||
* Tracing squota_rule
|
||||
*/
|
||||
#define SQR_FMT "[%u %llu,%u,%x %llu,%u,%x %llu,%u,%x %u %llu]"
|
||||
|
||||
#define SQR_ARGS(r) \
|
||||
(r)->prio, \
|
||||
(r)->name_val[0], (r)->name_source[0], (r)->name_flags[0], \
|
||||
(r)->name_val[1], (r)->name_source[1], (r)->name_flags[1], \
|
||||
(r)->name_val[2], (r)->name_source[2], (r)->name_flags[2], \
|
||||
(r)->op, (r)->limit \
|
||||
|
||||
#define SQR_FIELDS(pref) \
|
||||
__array(__u64, pref##_name_val, 3) \
|
||||
__field(__u64, pref##_limit) \
|
||||
__array(__u8, pref##_name_source, 3) \
|
||||
__array(__u8, pref##_name_flags, 3) \
|
||||
__field(__u8, pref##_prio) \
|
||||
__field(__u8, pref##_op)
|
||||
|
||||
#define SQR_ASSIGN(pref, r) \
|
||||
__entry->pref##_name_val[0] = (r)->names[0].val; \
|
||||
__entry->pref##_name_val[1] = (r)->names[1].val; \
|
||||
__entry->pref##_name_val[2] = (r)->names[2].val; \
|
||||
__entry->pref##_limit = (r)->limit; \
|
||||
__entry->pref##_name_source[0] = (r)->names[0].source; \
|
||||
__entry->pref##_name_source[1] = (r)->names[1].source; \
|
||||
__entry->pref##_name_source[2] = (r)->names[2].source; \
|
||||
__entry->pref##_name_flags[0] = (r)->names[0].flags; \
|
||||
__entry->pref##_name_flags[1] = (r)->names[1].flags; \
|
||||
__entry->pref##_name_flags[2] = (r)->names[2].flags; \
|
||||
__entry->pref##_prio = (r)->prio; \
|
||||
__entry->pref##_op = (r)->op;
|
||||
|
||||
#define SQR_ENTRY_ARGS(pref) \
|
||||
__entry->pref##_prio, __entry->pref##_name_val[0], \
|
||||
__entry->pref##_name_source[0], __entry->pref##_name_flags[0], \
|
||||
__entry->pref##_name_val[1], __entry->pref##_name_source[1], \
|
||||
__entry->pref##_name_flags[1], __entry->pref##_name_val[2], \
|
||||
__entry->pref##_name_source[2], __entry->pref##_name_flags[2], \
|
||||
__entry->pref##_op, __entry->pref##_limit
|
||||
|
||||
TRACE_EVENT(scoutfs_quota_check,
|
||||
TP_PROTO(struct super_block *sb, long rs_ptr, struct squota_input *inp, int ret),
|
||||
|
||||
TP_ARGS(sb, rs_ptr, inp, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(long, rs_ptr)
|
||||
SQI_FIELDS(i)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->rs_ptr = rs_ptr;
|
||||
SQI_ASSIGN(i, inp);
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" rs_ptr %ld ret %d inp "SQI_FMT,
|
||||
SCSB_TRACE_ARGS, __entry->rs_ptr, __entry->ret, SQI_ENTRY_ARGS(i))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_quota_rule_op_class,
|
||||
TP_PROTO(struct super_block *sb, struct squota_rule *rule, int ret),
|
||||
|
||||
TP_ARGS(sb, rule, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
SQR_FIELDS(r)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
SQR_ASSIGN(r, rule);
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" "SQR_FMT" ret %d",
|
||||
SCSB_TRACE_ARGS, SQR_ENTRY_ARGS(r), __entry->ret)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_quota_rule_op_class, scoutfs_quota_add_rule,
|
||||
TP_PROTO(struct super_block *sb, struct squota_rule *rule, int ret),
|
||||
TP_ARGS(sb, rule, ret)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_quota_rule_op_class, scoutfs_quota_del_rule,
|
||||
TP_PROTO(struct super_block *sb, struct squota_rule *rule, int ret),
|
||||
TP_ARGS(sb, rule, ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_quota_totl_check,
|
||||
TP_PROTO(struct super_block *sb, struct squota_input *inp, struct scoutfs_key *key,
|
||||
u64 limit, int ret),
|
||||
|
||||
TP_ARGS(sb, inp, key, limit, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
SQI_FIELDS(i)
|
||||
sk_trace_define(k)
|
||||
__field(__u64, limit)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
SQI_ASSIGN(i, inp);
|
||||
sk_trace_assign(k, key);
|
||||
__entry->limit = limit;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" inp "SQI_FMT" key "SK_FMT" limit %llu ret %d",
|
||||
SCSB_TRACE_ARGS, SQI_ENTRY_ARGS(i), sk_trace_args(k), __entry->limit,
|
||||
__entry->ret)
|
||||
);
|
||||
112
kmod/src/trace/wkic.h
Normal file
112
kmod/src/trace/wkic.h
Normal file
@@ -0,0 +1,112 @@
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_wkic_wpage_class,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, ptr)
|
||||
__field(int, which)
|
||||
__field(bool, n0l)
|
||||
__field(bool, n1l)
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->ptr = ptr;
|
||||
__entry->which = which;
|
||||
__entry->n0l = n0l;
|
||||
__entry->n1l = n1l;
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
__entry->which = which;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" ptr %p wh %d nl %u,%u start "SK_FMT " end "SK_FMT, SCSB_TRACE_ARGS,
|
||||
__entry->ptr, __entry->which, __entry->n0l, __entry->n1l,
|
||||
sk_trace_args(start), sk_trace_args(end))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_alloced,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_freeing,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_found,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_trimmed,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_erased,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_inserting,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_inserted,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_shrinking,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_dropping,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_replaying,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_wkic_wpage_class, scoutfs_wkic_wpage_filled,
|
||||
TP_PROTO(struct super_block *sb, void *ptr, int which, bool n0l, bool n1l,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, ptr, which, n0l, n1l, start, end)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_wkic_read_items,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end),
|
||||
|
||||
TP_ARGS(sb, key, start, end),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
sk_trace_define(key)
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
sk_trace_assign(key, start);
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" key "SK_FMT" start "SK_FMT " end "SK_FMT, SCSB_TRACE_ARGS,
|
||||
sk_trace_args(key), sk_trace_args(start), sk_trace_args(end))
|
||||
);
|
||||
1160
kmod/src/wkic.c
Normal file
1160
kmod/src/wkic.c
Normal file
File diff suppressed because it is too large
Load Diff
19
kmod/src/wkic.h
Normal file
19
kmod/src/wkic.h
Normal file
@@ -0,0 +1,19 @@
|
||||
#ifndef _SCOUTFS_WKIC_H_
|
||||
#define _SCOUTFS_WKIC_H_
|
||||
|
||||
#include "format.h"
|
||||
|
||||
typedef int (*wkic_iter_cb_t)(struct scoutfs_key *key, void *val, unsigned int val_len,
|
||||
void *cb_arg);
|
||||
|
||||
int scoutfs_wkic_iterate(struct super_block *sb, struct scoutfs_key *key, struct scoutfs_key *last,
|
||||
struct scoutfs_key *range_start, struct scoutfs_key *range_end,
|
||||
wkic_iter_cb_t cb, void *cb_arg);
|
||||
int scoutfs_wkic_iterate_stable(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *last, struct scoutfs_key *range_start,
|
||||
struct scoutfs_key *range_end, wkic_iter_cb_t cb, void *cb_arg);
|
||||
|
||||
int scoutfs_wkic_setup(struct super_block *sb);
|
||||
void scoutfs_wkic_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
251
kmod/src/xattr.c
251
kmod/src/xattr.c
@@ -81,7 +81,20 @@ static void init_xattr_key(struct scoutfs_key *key, u64 ino, u32 name_hash,
|
||||
#define SCOUTFS_XATTR_PREFIX "scoutfs."
|
||||
#define SCOUTFS_XATTR_PREFIX_LEN (sizeof(SCOUTFS_XATTR_PREFIX) - 1)
|
||||
|
||||
/*
|
||||
* We could have hidden the logic that needs this in a user-prefix
|
||||
* specific .set handler, but I wanted to make sure that we always
|
||||
* applied that logic from any call chains to _xattr_set. The
|
||||
* additional strcmp isn't so expensive given all the rest of the work
|
||||
* we're doing in here.
|
||||
*/
|
||||
static inline bool is_user(const char *name)
|
||||
{
|
||||
return !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
|
||||
}
|
||||
|
||||
#define HIDE_TAG "hide."
|
||||
#define INDX_TAG "indx."
|
||||
#define SRCH_TAG "srch."
|
||||
#define TOTL_TAG "totl."
|
||||
#define TAG_LEN (sizeof(HIDE_TAG) - 1)
|
||||
@@ -103,6 +116,9 @@ int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
if (!strncmp(name, HIDE_TAG, TAG_LEN)) {
|
||||
if (++tgs->hide == 0)
|
||||
return -EINVAL;
|
||||
} else if (!strncmp(name, INDX_TAG, TAG_LEN)) {
|
||||
if (++tgs->indx == 0)
|
||||
return -EINVAL;
|
||||
} else if (!strncmp(name, SRCH_TAG, TAG_LEN)) {
|
||||
if (++tgs->srch == 0)
|
||||
return -EINVAL;
|
||||
@@ -540,47 +556,57 @@ static int parse_totl_u64(const char *s, int len, u64 *res)
|
||||
}
|
||||
|
||||
/*
|
||||
* non-destructive relatively quick parse of the last 3 dotted u64s that
|
||||
* make up the name of the xattr total. -EINVAL is returned if there
|
||||
* are anything but 3 valid u64 encodings between single dots at the end
|
||||
* of the name.
|
||||
* non-destructive relatively quick parse of final dotted u64s in an
|
||||
* xattr name. If the required number of values are found then we
|
||||
* return the number of bytes in the name that are not the final dotted
|
||||
* u64s with their dots. -EINVAL is returned if we didn't find the
|
||||
* required number of values.
|
||||
*/
|
||||
static int parse_totl_key(struct scoutfs_key *key, const char *name, int name_len)
|
||||
static int parse_dotted_u64s(u64 *u64s, int nr, const char *name, int name_len)
|
||||
{
|
||||
u64 tot_name[3];
|
||||
int end = name_len;
|
||||
int nr = 0;
|
||||
int len;
|
||||
int ret;
|
||||
int i;
|
||||
int u;
|
||||
|
||||
/* parse name elements in reserve order from end of xattr name string */
|
||||
for (i = name_len - 1; i >= 0 && nr < ARRAY_SIZE(tot_name); i--) {
|
||||
for (u = nr - 1, i = name_len - 1; u >= 0 && i >= 0; i--) {
|
||||
if (name[i] != '.')
|
||||
continue;
|
||||
|
||||
len = end - (i + 1);
|
||||
ret = parse_totl_u64(&name[i + 1], len, &tot_name[nr]);
|
||||
ret = parse_totl_u64(&name[i + 1], len, &u64s[u]);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
end = i;
|
||||
nr++;
|
||||
u--;
|
||||
}
|
||||
|
||||
if (nr == ARRAY_SIZE(tot_name)) {
|
||||
/* swap to account for parsing in reverse */
|
||||
swap(tot_name[0], tot_name[2]);
|
||||
scoutfs_xattr_init_totl_key(key, tot_name);
|
||||
ret = 0;
|
||||
} else {
|
||||
if (u == -1)
|
||||
ret = end;
|
||||
else
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_totl_key(struct scoutfs_key *key, const char *name, int name_len)
|
||||
{
|
||||
u64 u64s[3];
|
||||
int ret;
|
||||
|
||||
ret = parse_dotted_u64s(u64s, ARRAY_SIZE(u64s), name, name_len);
|
||||
if (ret >= 0) {
|
||||
scoutfs_xattr_init_totl_key(key, u64s);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int apply_totl_delta(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_xattr_totl_val *tval, struct scoutfs_lock *lock)
|
||||
{
|
||||
@@ -607,6 +633,72 @@ int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len)
|
||||
return SCOUTFS_DELTA_COMBINED;
|
||||
}
|
||||
|
||||
void scoutfs_xattr_indx_get_range(struct scoutfs_key *start, struct scoutfs_key *end)
|
||||
{
|
||||
scoutfs_key_set_zeros(start);
|
||||
start->sk_zone = SCOUTFS_XATTR_INDX_ZONE;
|
||||
scoutfs_key_set_ones(end);
|
||||
end->sk_zone = SCOUTFS_XATTR_INDX_ZONE;
|
||||
}
|
||||
|
||||
/*
|
||||
* .indx. keys are a bit funny because we're iterating over index keys
|
||||
* by major:minor:inode:xattr_id. That doesn't map nicely to the
|
||||
* comparison precedence of the key fields. We have to mess around a
|
||||
* little bit to get the major into the most significant key bits and
|
||||
* the low bits of xattr id into the least significant key bits.
|
||||
*/
|
||||
void scoutfs_xattr_init_indx_key(struct scoutfs_key *key, u8 major, u64 minor, u64 ino, u64 xid)
|
||||
{
|
||||
scoutfs_key_set_zeros(key);
|
||||
key->sk_zone = SCOUTFS_XATTR_INDX_ZONE;
|
||||
|
||||
key->_sk_first = cpu_to_le64(((u64)major << 56) | (minor >> 8));
|
||||
key->_sk_second = cpu_to_le64((minor << 56) | (ino >> 8));
|
||||
key->_sk_third = cpu_to_le64((ino << 56) | (xid >> 8));
|
||||
key->_sk_fourth = xid & 0xff;
|
||||
}
|
||||
|
||||
void scoutfs_xattr_get_indx_key(struct scoutfs_key *key, u8 *major, u64 *minor, u64 *ino, u64 *xid)
|
||||
{
|
||||
*major = le64_to_cpu(key->_sk_first) >> 56;
|
||||
*minor = (le64_to_cpu(key->_sk_first) << 8) | (le64_to_cpu(key->_sk_second) >> 56);
|
||||
*ino = (le64_to_cpu(key->_sk_second) << 8) | (le64_to_cpu(key->_sk_third) >> 56);
|
||||
*xid = (le64_to_cpu(key->_sk_third) << 8) | key->_sk_fourth;
|
||||
}
|
||||
|
||||
void scoutfs_xattr_set_indx_key_xid(struct scoutfs_key *key, u64 xid)
|
||||
{
|
||||
u8 major;
|
||||
u64 minor;
|
||||
u64 ino;
|
||||
u64 dummy;
|
||||
|
||||
scoutfs_xattr_get_indx_key(key, &major, &minor, &ino, &dummy);
|
||||
scoutfs_xattr_init_indx_key(key, major, minor, ino, xid);
|
||||
}
|
||||
|
||||
/*
|
||||
* This initial parsing of the name doesn't yet have access to an xattr
|
||||
* id to put in the key. That's added later as the existing xattr is
|
||||
* found or a new xattr's id is allocated.
|
||||
*/
|
||||
static int parse_indx_key(struct scoutfs_key *key, const char *name, int name_len, u64 ino)
|
||||
{
|
||||
u64 u64s[2];
|
||||
int ret;
|
||||
|
||||
ret = parse_dotted_u64s(u64s, ARRAY_SIZE(u64s), name, name_len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (u64s[0] > U8_MAX)
|
||||
return -EINVAL;
|
||||
|
||||
scoutfs_xattr_init_indx_key(key, u64s[0], u64s[1], ino, 0);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The confusing swiss army knife of creating, modifying, and deleting
|
||||
* xattrs.
|
||||
@@ -627,7 +719,7 @@ int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len)
|
||||
int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_len,
|
||||
const void *value, size_t size, int flags,
|
||||
const struct scoutfs_xattr_prefix_tags *tgs,
|
||||
struct scoutfs_lock *lck, struct scoutfs_lock *totl_lock,
|
||||
struct scoutfs_lock *lck, struct scoutfs_lock *tag_lock,
|
||||
struct list_head *ind_locks)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
@@ -635,10 +727,11 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct scoutfs_xattr_totl_val tval = {0,};
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_key totl_key;
|
||||
struct scoutfs_key tag_key;
|
||||
struct scoutfs_key key;
|
||||
bool undo_srch = false;
|
||||
bool undo_totl = false;
|
||||
bool undo_indx = false;
|
||||
u8 found_parts;
|
||||
unsigned int xat_bytes_totl;
|
||||
unsigned int xat_bytes;
|
||||
@@ -651,7 +744,8 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
|
||||
trace_scoutfs_xattr_set(sb, name_len, value, size, flags);
|
||||
|
||||
if (WARN_ON_ONCE(tgs->totl && !totl_lock))
|
||||
if (WARN_ON_ONCE(tgs->totl && tgs->indx) ||
|
||||
WARN_ON_ONCE((tgs->totl | tgs->indx) && !tag_lock))
|
||||
return -EINVAL;
|
||||
|
||||
/* mirror the syscall's errors for large names and values */
|
||||
@@ -664,10 +758,22 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
(flags & ~(XATTR_CREATE | XATTR_REPLACE)))
|
||||
return -EINVAL;
|
||||
|
||||
if ((tgs->hide | tgs->srch | tgs->totl) && !capable(CAP_SYS_ADMIN))
|
||||
if ((tgs->hide | tgs->indx | tgs->srch | tgs->totl) && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (tgs->totl && ((ret = parse_totl_key(&totl_key, name, name_len)) != 0))
|
||||
if (tgs->totl && ((ret = parse_totl_key(&tag_key, name, name_len)) != 0))
|
||||
return ret;
|
||||
|
||||
if (tgs->indx &&
|
||||
(ret = scoutfs_fmt_vers_unsupported(sb, SCOUTFS_FORMAT_VERSION_FEAT_INDX_TAG)))
|
||||
return ret;
|
||||
|
||||
if (tgs->indx && ((ret = parse_indx_key(&tag_key, name, name_len, ino)) != 0))
|
||||
return ret;
|
||||
|
||||
/* retention blocks user. xattr modification, all else allowed */
|
||||
ret = scoutfs_inode_check_retention(inode);
|
||||
if (ret < 0 && is_user(name))
|
||||
return ret;
|
||||
|
||||
/* allocate enough to always read an existing xattr's totl */
|
||||
@@ -708,6 +814,12 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
/* found fields in key will also be used */
|
||||
found_parts = ret >= 0 ? xattr_nr_parts(xat) : 0;
|
||||
|
||||
/* use existing xattr's id or allocate new when creating */
|
||||
if (found_parts)
|
||||
id = le64_to_cpu(key.skx_id);
|
||||
else if (value)
|
||||
id = si->next_xattr_id++;
|
||||
|
||||
if (found_parts && tgs->totl) {
|
||||
/* parse old totl value before we clobber xat buf */
|
||||
val_len = ret - offsetof(struct scoutfs_xattr, name[xat->name_len]);
|
||||
@@ -718,12 +830,25 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
le64_add_cpu(&tval.total, -total);
|
||||
}
|
||||
|
||||
/*
|
||||
* indx xattrs don't have a value. After returning an error for
|
||||
* non-zero val length or short circuiting modifying with the
|
||||
* same 0 length, all we're left with is creating or deleting
|
||||
* the xattr.
|
||||
*/
|
||||
if (tgs->indx) {
|
||||
if (size != 0) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
if (found_parts && value) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
/* prepare the xattr header, name, and start of value in first item */
|
||||
if (value) {
|
||||
if (found_parts)
|
||||
id = le64_to_cpu(key.skx_id);
|
||||
else
|
||||
id = si->next_xattr_id++;
|
||||
xat->name_len = name_len;
|
||||
xat->val_len = cpu_to_le16(size);
|
||||
memset(xat->__pad, 0, sizeof(xat->__pad));
|
||||
@@ -741,9 +866,18 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
le64_add_cpu(&tval.total, total);
|
||||
}
|
||||
|
||||
if (tgs->indx) {
|
||||
scoutfs_xattr_set_indx_key_xid(&tag_key, id);
|
||||
if (value)
|
||||
ret = scoutfs_item_create_force(sb, &tag_key, NULL, 0, tag_lock, NULL);
|
||||
else
|
||||
ret = scoutfs_item_delete_force(sb, &tag_key, tag_lock, NULL);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
undo_indx = true;
|
||||
}
|
||||
|
||||
if (tgs->srch && !(found_parts && value)) {
|
||||
if (found_parts)
|
||||
id = le64_to_cpu(key.skx_id);
|
||||
hash = scoutfs_hash64(name, name_len);
|
||||
ret = scoutfs_forest_srch_add(sb, hash, ino, id);
|
||||
if (ret < 0)
|
||||
@@ -752,7 +886,7 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
}
|
||||
|
||||
if (tgs->totl) {
|
||||
ret = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
ret = apply_totl_delta(sb, &tag_key, &tval, tag_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
undo_totl = true;
|
||||
@@ -777,6 +911,13 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
if (ret < 0 && undo_indx) {
|
||||
if (value)
|
||||
err = scoutfs_item_delete_force(sb, &tag_key, tag_lock, NULL);
|
||||
else
|
||||
err = scoutfs_item_create_force(sb, &tag_key, NULL, 0, tag_lock, NULL);
|
||||
BUG_ON(err); /* inconsistent */
|
||||
}
|
||||
if (ret < 0 && undo_srch) {
|
||||
err = scoutfs_forest_srch_add(sb, hash, ino, id);
|
||||
BUG_ON(err);
|
||||
@@ -785,7 +926,7 @@ out:
|
||||
/* _delta() on dirty items shouldn't fail */
|
||||
tval.total = cpu_to_le64(-le64_to_cpu(tval.total));
|
||||
tval.count = cpu_to_le64(-le64_to_cpu(tval.count));
|
||||
err = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
err = apply_totl_delta(sb, &tag_key, &tval, tag_lock);
|
||||
BUG_ON(err);
|
||||
}
|
||||
|
||||
@@ -801,7 +942,7 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name, const void
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_lock *totl_lock = NULL;
|
||||
struct scoutfs_lock *tag_lock = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
size_t name_len = strlen(name);
|
||||
LIST_HEAD(ind_locks);
|
||||
@@ -816,8 +957,11 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name, const void
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
if (tgs.totl) {
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &totl_lock);
|
||||
if (tgs.totl || tgs.indx) {
|
||||
if (tgs.totl)
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &tag_lock);
|
||||
else
|
||||
ret = scoutfs_lock_xattr_indx(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &tag_lock);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
}
|
||||
@@ -836,7 +980,7 @@ retry:
|
||||
goto release;
|
||||
|
||||
ret = scoutfs_xattr_set_locked(dentry->d_inode, name, name_len, value, size, flags, &tgs,
|
||||
lck, totl_lock, &ind_locks);
|
||||
lck, tag_lock, &ind_locks);
|
||||
if (ret == 0)
|
||||
scoutfs_update_inode_item(inode, lck, &ind_locks);
|
||||
|
||||
@@ -845,7 +989,7 @@ release:
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
unlock:
|
||||
scoutfs_unlock(sb, lck, SCOUTFS_LOCK_WRITE);
|
||||
scoutfs_unlock(sb, totl_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
scoutfs_unlock(sb, tag_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1055,14 +1199,15 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
{
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *totl_lock = NULL;
|
||||
struct scoutfs_lock *tag_lock = NULL;
|
||||
struct scoutfs_xattr_totl_val tval;
|
||||
struct scoutfs_key totl_key;
|
||||
struct scoutfs_key tag_key;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_key key;
|
||||
bool release = false;
|
||||
unsigned int bytes;
|
||||
unsigned int val_len;
|
||||
u8 locked_zone = 0;
|
||||
void *value;
|
||||
u64 total;
|
||||
u64 hash;
|
||||
@@ -1108,16 +1253,32 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = parse_totl_key(&totl_key, xat->name, xat->name_len) ?:
|
||||
ret = parse_totl_key(&tag_key, xat->name, xat->name_len) ?:
|
||||
parse_totl_u64(value, val_len, &total);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tgs.totl && totl_lock == NULL) {
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &totl_lock);
|
||||
if (tgs.indx) {
|
||||
ret = parse_indx_key(&tag_key, xat->name, xat->name_len, ino);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((tgs.totl || tgs.indx) && locked_zone != tag_key.sk_zone) {
|
||||
if (tag_lock) {
|
||||
scoutfs_unlock(sb, tag_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
tag_lock = NULL;
|
||||
}
|
||||
if (tgs.totl)
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0,
|
||||
&tag_lock);
|
||||
else
|
||||
ret = scoutfs_lock_xattr_indx(sb, SCOUTFS_LOCK_WRITE_ONLY, 0,
|
||||
&tag_lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
locked_zone = tag_key.sk_zone;
|
||||
}
|
||||
|
||||
ret = scoutfs_hold_trans(sb, false);
|
||||
@@ -1140,7 +1301,13 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
if (tgs.totl) {
|
||||
tval.total = cpu_to_le64(-total);
|
||||
tval.count = cpu_to_le64(-1LL);
|
||||
ret = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
ret = apply_totl_delta(sb, &tag_key, &tval, tag_lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if (tgs.indx) {
|
||||
ret = scoutfs_item_delete_force(sb, &tag_key, tag_lock, NULL);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
@@ -1153,7 +1320,7 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
|
||||
if (release)
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_unlock(sb, totl_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
scoutfs_unlock(sb, tag_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
kfree(xat);
|
||||
out:
|
||||
return ret;
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
|
||||
struct scoutfs_xattr_prefix_tags {
|
||||
unsigned long hide:1,
|
||||
indx:1,
|
||||
srch:1,
|
||||
totl:1;
|
||||
};
|
||||
@@ -30,4 +31,9 @@ int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
void scoutfs_xattr_init_totl_key(struct scoutfs_key *key, u64 *name);
|
||||
int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len);
|
||||
|
||||
void scoutfs_xattr_indx_get_range(struct scoutfs_key *start, struct scoutfs_key *end);
|
||||
void scoutfs_xattr_init_indx_key(struct scoutfs_key *key, u8 major, u64 minor, u64 ino, u64 xid);
|
||||
void scoutfs_xattr_get_indx_key(struct scoutfs_key *key, u8 *major, u64 *minor, u64 *ino, u64 *xid);
|
||||
void scoutfs_xattr_set_indx_key_xid(struct scoutfs_key *key, u64 xid);
|
||||
|
||||
#endif
|
||||
|
||||
1
tests/.gitignore
vendored
1
tests/.gitignore
vendored
@@ -9,3 +9,4 @@ src/find_xattrs
|
||||
src/stage_tmpfile
|
||||
src/create_xattr_loop
|
||||
src/o_tmpfile_umask
|
||||
src/o_tmpfile_linkat
|
||||
|
||||
@@ -12,7 +12,8 @@ BIN := src/createmany \
|
||||
src/find_xattrs \
|
||||
src/create_xattr_loop \
|
||||
src/fragmented_data_extents \
|
||||
src/o_tmpfile_umask
|
||||
src/o_tmpfile_umask \
|
||||
src/o_tmpfile_linkat
|
||||
|
||||
DEPS := $(wildcard src/*.d)
|
||||
|
||||
|
||||
@@ -113,6 +113,7 @@ used during the test.
|
||||
| T\_EX\_META\_DEV | scratch meta bdev | -f | /dev/vdd |
|
||||
| T\_EX\_DATA\_DEV | scratch meta bdev | -e | /dev/vdc |
|
||||
| T\_M[0-9] | mount paths | mounted per run | /mnt/test.[0-9]/ |
|
||||
| T\_MODULE | built kernel module | created per run | ../kmod/src/..ko |
|
||||
| T\_NR\_MOUNTS | number of mounts | -n | 3 |
|
||||
| T\_O[0-9] | mount options | created per run | -o server\_addr= |
|
||||
| T\_QUORUM | quorum count | -q | 2 |
|
||||
|
||||
@@ -147,6 +147,10 @@ t_filter_dmesg()
|
||||
# ignore systemd-journal rotating
|
||||
re="$re|systemd-journald.*"
|
||||
|
||||
# format vers back/compat tries bad mounts
|
||||
re="$re|scoutfs .* error.*outside of supported version.*"
|
||||
re="$re|scoutfs .* error.*could not get .*super.*"
|
||||
|
||||
egrep -v "($re)" | \
|
||||
ignore_harmless_unwind_kasan_stack_oob
|
||||
}
|
||||
|
||||
@@ -29,13 +29,12 @@ t_mount_rid()
|
||||
}
|
||||
|
||||
#
|
||||
# Output the "f.$fsid.r.$rid" identifier string for the given mount
|
||||
# number, 0 is used by default if none is specified.
|
||||
# Output the "f.$fsid.r.$rid" identifier string for the given path
|
||||
# in a mounted scoutfs volume.
|
||||
#
|
||||
t_ident()
|
||||
t_ident_from_mnt()
|
||||
{
|
||||
local nr="${1:-0}"
|
||||
local mnt="$(eval echo \$T_M$nr)"
|
||||
local mnt="$1"
|
||||
local fsid
|
||||
local rid
|
||||
|
||||
@@ -45,6 +44,38 @@ t_ident()
|
||||
echo "f.${fsid:0:6}.r.${rid:0:6}"
|
||||
}
|
||||
|
||||
#
|
||||
# Output the "f.$fsid.r.$rid" identifier string for the given mount
|
||||
# number, 0 is used by default if none is specified.
|
||||
#
|
||||
t_ident()
|
||||
{
|
||||
local nr="${1:-0}"
|
||||
local mnt="$(eval echo \$T_M$nr)"
|
||||
|
||||
t_ident_from_mnt "$mnt"
|
||||
}
|
||||
|
||||
#
|
||||
# Output the sysfs path for a path in a mounted fs.
|
||||
#
|
||||
t_sysfs_path_from_ident()
|
||||
{
|
||||
local ident="$1"
|
||||
|
||||
echo "/sys/fs/scoutfs/$ident"
|
||||
}
|
||||
|
||||
#
|
||||
# Output the sysfs path for a path in a mounted fs.
|
||||
#
|
||||
t_sysfs_path_from_mnt()
|
||||
{
|
||||
local mnt="$1"
|
||||
|
||||
t_sysfs_path_from_ident $(t_ident_from_mnt $mnt)
|
||||
}
|
||||
|
||||
#
|
||||
# Output the mount's sysfs path, defaulting to mount 0 if none is
|
||||
# specified.
|
||||
@@ -53,7 +84,7 @@ t_sysfs_path()
|
||||
{
|
||||
local nr="$1"
|
||||
|
||||
echo "/sys/fs/scoutfs/$(t_ident $nr)"
|
||||
t_sysfs_path_from_ident $(t_ident $nr)
|
||||
}
|
||||
|
||||
#
|
||||
|
||||
157
tests/golden/basic-posix-acl
Normal file
157
tests/golden/basic-posix-acl
Normal file
@@ -0,0 +1,157 @@
|
||||
== setup test directory
|
||||
== getfacl
|
||||
directory drwxr-xr-x 0 0 0 '.'
|
||||
# file: .
|
||||
# owner: root
|
||||
# group: root
|
||||
user::rwx
|
||||
group::r-x
|
||||
other::r-x
|
||||
|
||||
== basic non-acl access through permissions
|
||||
directory drwxr-xr-x 0 44444 0 'dir-testuid'
|
||||
touch: cannot touch 'dir-testuid/file-group-write': Permission denied
|
||||
touch: cannot touch 'symlinkdir-testuid/symlink-file-group-write': Permission denied
|
||||
regular empty file -rw-r--r-- 22222 44444 0 'dir-testuid/file-group-write'
|
||||
regular empty file -rw-r--r-- 22222 44444 0 'symlinkdir-testuid/symlink-file-group-write'
|
||||
== basic acl access
|
||||
directory drwxr-xr-x 0 0 0 'dir-root'
|
||||
touch: cannot touch 'dir-root/file-group-write': Permission denied
|
||||
touch: cannot touch 'symlinkdir-root/file-group-write': Permission denied
|
||||
# file: dir-root
|
||||
# owner: root
|
||||
# group: root
|
||||
user::rwx
|
||||
user:22222:rwx
|
||||
group::r-x
|
||||
mask::rwx
|
||||
other::r-x
|
||||
|
||||
regular empty file -rw-r--r-- 22222 0 0 'dir-root/file-group-write'
|
||||
regular empty file -rw-r--r-- 22222 0 0 'symlinkdir-root/file-group-write'
|
||||
== directory exec
|
||||
Success
|
||||
Success
|
||||
# file: dir-root
|
||||
# owner: root
|
||||
# group: root
|
||||
user::rwx
|
||||
user:22222:rw-
|
||||
group::r-x
|
||||
mask::rwx
|
||||
other::r-x
|
||||
|
||||
bash: line 0: cd: dir-root: Permission denied
|
||||
Failed
|
||||
bash: line 0: cd: symlinkdir-root: Permission denied
|
||||
Failed
|
||||
# file: dir-root
|
||||
# owner: root
|
||||
# group: root
|
||||
user::rwx
|
||||
user:22222:rw-
|
||||
group::r-x
|
||||
group:44444:rwx
|
||||
mask::rwx
|
||||
other::r-x
|
||||
|
||||
Success
|
||||
Success
|
||||
== get/set attr
|
||||
regular empty file -rw-r--r-- 0 0 0 'file-root'
|
||||
setfattr: file-root: Permission denied
|
||||
# file: file-root
|
||||
# owner: root
|
||||
# group: root
|
||||
user::rw-
|
||||
user:22222:rw-
|
||||
group::r--
|
||||
mask::rw-
|
||||
other::r--
|
||||
|
||||
# file: file-root
|
||||
user.test2="Success"
|
||||
|
||||
# file: file-root
|
||||
# owner: root
|
||||
# group: root
|
||||
user::rw-
|
||||
group::r--
|
||||
mask::r--
|
||||
other::r--
|
||||
|
||||
setfattr: file-root: Permission denied
|
||||
# file: file-root
|
||||
user.test2="Success"
|
||||
|
||||
# file: file-root
|
||||
# owner: root
|
||||
# group: root
|
||||
user::rw-
|
||||
group::r--
|
||||
group:44444:rw-
|
||||
mask::rw-
|
||||
other::r--
|
||||
|
||||
# file: file-root
|
||||
user.test2="Success"
|
||||
user.test4="Success"
|
||||
|
||||
== inheritance / default acl
|
||||
directory drwxr-xr-x 0 0 0 'dir-root2'
|
||||
mkdir: cannot create directory 'dir-root2/dir': Permission denied
|
||||
touch: cannot touch 'dir-root2/dir/file': No such file or directory
|
||||
# file: dir-root2
|
||||
# owner: root
|
||||
# group: root
|
||||
user::rwx
|
||||
group::r-x
|
||||
other::r-x
|
||||
default:user::rwx
|
||||
default:user:22222:rwx
|
||||
default:group::r-x
|
||||
default:mask::rwx
|
||||
default:other::r-x
|
||||
|
||||
mkdir: cannot create directory 'dir-root2/dir': Permission denied
|
||||
touch: cannot touch 'dir-root2/dir/file': No such file or directory
|
||||
# file: dir-root2
|
||||
# owner: root
|
||||
# group: root
|
||||
user::rwx
|
||||
user:22222:rwx
|
||||
group::r-x
|
||||
mask::rwx
|
||||
other::r-x
|
||||
default:user::rwx
|
||||
default:user:22222:rwx
|
||||
default:group::r-x
|
||||
default:mask::rwx
|
||||
default:other::r-x
|
||||
|
||||
directory drwxrwxr-x 22222 0 4 'dir-root2/dir'
|
||||
# file: dir-root2/dir
|
||||
# owner: 22222
|
||||
# group: root
|
||||
user::rwx
|
||||
user:22222:rwx
|
||||
group::r-x
|
||||
mask::rwx
|
||||
other::r-x
|
||||
default:user::rwx
|
||||
default:user:22222:rwx
|
||||
default:group::r-x
|
||||
default:mask::rwx
|
||||
default:other::r-x
|
||||
|
||||
regular empty file -rw-rw-r-- 22222 0 0 'dir-root2/dir/file'
|
||||
# file: dir-root2/dir/file
|
||||
# owner: 22222
|
||||
# group: root
|
||||
user::rw-
|
||||
user:22222:rwx #effective:rw-
|
||||
group::r-x #effective:r--
|
||||
mask::rw-
|
||||
other::r--
|
||||
|
||||
== cleanup
|
||||
@@ -56,3 +56,4 @@ mv: cannot move '/mnt/test/test/basic-posix-consistency/dir/c/clobber' to '/mnt/
|
||||
== inode indexes match after removing and syncing
|
||||
== concurrent creates make one file
|
||||
one-file
|
||||
== cleanup
|
||||
|
||||
@@ -25,3 +25,4 @@ rc: 0
|
||||
equal_prepared
|
||||
large_prepared
|
||||
resized larger test rc: 0
|
||||
== cleanup
|
||||
|
||||
4
tests/golden/format-version-forward-back
Normal file
4
tests/golden/format-version-forward-back
Normal file
@@ -0,0 +1,4 @@
|
||||
== ensuring utils and module for old versions
|
||||
== unmounting test fs and removing test module
|
||||
== testing combinations of old and new format versions
|
||||
== restoring test module and mount
|
||||
24
tests/golden/projects
Normal file
24
tests/golden/projects
Normal file
@@ -0,0 +1,24 @@
|
||||
== default new files don't have project
|
||||
0
|
||||
== set new project on files and dirs
|
||||
8675309
|
||||
8675309
|
||||
== non-root can see id
|
||||
8675309
|
||||
== can use IDs around long width limits
|
||||
2147483647
|
||||
2147483648
|
||||
4294967295
|
||||
9223372036854775807
|
||||
9223372036854775808
|
||||
18446744073709551615
|
||||
== created files and dirs inherit project id
|
||||
8675309
|
||||
8675309
|
||||
== inheritance continues
|
||||
8675309
|
||||
== clearing project id stops inheritance
|
||||
0
|
||||
0
|
||||
== o_tmpfile creations inherit dir
|
||||
8675309
|
||||
41
tests/golden/quota
Normal file
41
tests/golden/quota
Normal file
@@ -0,0 +1,41 @@
|
||||
== prepare dir with write perm for test ids
|
||||
== test assumes starting with no rules, empty list
|
||||
== add rule
|
||||
7 13,L,- 15,L,- 17,L,- I 33 -
|
||||
== list is empty again after delete
|
||||
== can change limits without deleting
|
||||
1 1,L,- 1,L,- 1,L,- I 100 -
|
||||
1 1,L,- 1,L,- 1,L,- I 101 -
|
||||
1 1,L,- 1,L,- 1,L,- I 99 -
|
||||
== wipe and restore rules in bulk
|
||||
7 15,L,- 0,L,- 0,L,- I 33 -
|
||||
7 14,L,- 0,L,- 0,L,- I 33 -
|
||||
7 13,L,- 0,L,- 0,L,- I 33 -
|
||||
7 12,L,- 0,L,- 0,L,- I 33 -
|
||||
7 11,L,- 0,L,- 0,L,- I 33 -
|
||||
7 10,L,- 0,L,- 0,L,- I 33 -
|
||||
7 15,L,- 0,L,- 0,L,- I 33 -
|
||||
7 14,L,- 0,L,- 0,L,- I 33 -
|
||||
7 13,L,- 0,L,- 0,L,- I 33 -
|
||||
7 12,L,- 0,L,- 0,L,- I 33 -
|
||||
7 11,L,- 0,L,- 0,L,- I 33 -
|
||||
7 10,L,- 0,L,- 0,L,- I 33 -
|
||||
== default rule prevents file creation
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
== decreasing totl allows file creation again
|
||||
== attr selecting rules prevent creation
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
== multi attr selecting doesn't prevent partial
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
== op differentiates
|
||||
== higher priority rule applies
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
== data rules with total and count prevent write and fallocate
|
||||
dd: error writing '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
fallocate: fallocate failed: Disk quota exceeded
|
||||
dd: error writing '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
fallocate: fallocate failed: Disk quota exceeded
|
||||
== added rules work after bulk restore
|
||||
touch: cannot touch '/mnt/test/test/quota/dir/file': Disk quota exceeded
|
||||
== cleanup
|
||||
28
tests/golden/retention-basic
Normal file
28
tests/golden/retention-basic
Normal file
@@ -0,0 +1,28 @@
|
||||
== setting retention on dir fails
|
||||
attr_x ioctl failed on '/mnt/test/test/retention-basic': Invalid argument (22)
|
||||
scoutfs: set-attr-x failed: Invalid argument (22)
|
||||
== set retention
|
||||
== get-attr-x shows retention
|
||||
1
|
||||
== unpriv can't clear retention
|
||||
attr_x ioctl failed on '/mnt/test/test/retention-basic/file-1': Operation not permitted (1)
|
||||
scoutfs: set-attr-x failed: Operation not permitted (1)
|
||||
== can set hidden scoutfs xattr in retention
|
||||
== setting user. xattr fails in retention
|
||||
setfattr: /mnt/test/test/retention-basic/file-1: Operation not permitted
|
||||
== file deletion fails in retention
|
||||
rm: cannot remove '/mnt/test/test/retention-basic/file-1': Operation not permitted
|
||||
== file rename fails in retention
|
||||
mv: cannot move '/mnt/test/test/retention-basic/file-1' to '/mnt/test/test/retention-basic/file-2': Operation not permitted
|
||||
== file write fails in retention
|
||||
date: write error: Operation not permitted
|
||||
== file truncate fails in retention
|
||||
truncate: failed to truncate '/mnt/test/test/retention-basic/file-1' at 0 bytes: Operation not permitted
|
||||
== setattr fails in retention
|
||||
touch: setting times of '/mnt/test/test/retention-basic/file-1': Operation not permitted
|
||||
== clear retention
|
||||
== file write
|
||||
== file rename
|
||||
== setattr
|
||||
== xattr deletion
|
||||
== cleanup
|
||||
@@ -1,2 +1,3 @@
|
||||
== create initial files
|
||||
== race stage and release
|
||||
== cleanup
|
||||
|
||||
@@ -73,6 +73,7 @@ $(basename $0) options:
|
||||
-t | Enabled trace events that match the given glob argument.
|
||||
| Multiple options enable multiple globbed events.
|
||||
-T <nr> | Multiply the original trace buffer size by nr during the run.
|
||||
-V <nr> | Set mkfs device format version.
|
||||
-X | xfstests git repo. Used by tests/xfstests.sh.
|
||||
-x | xfstests git branch to checkout and track.
|
||||
-y | xfstests ./check additional args
|
||||
@@ -176,6 +177,11 @@ while true; do
|
||||
T_TRACE_MULT="$2"
|
||||
shift
|
||||
;;
|
||||
-V)
|
||||
test -n "$2" || die "-V must have a format version argument"
|
||||
T_MKFS_FORMAT_VERSION="-V $2"
|
||||
shift
|
||||
;;
|
||||
-X)
|
||||
test -n "$2" || die "-X requires xfstests git repo dir argument"
|
||||
T_XFSTESTS_REPO="$2"
|
||||
@@ -344,7 +350,7 @@ if [ -n "$T_MKFS" ]; then
|
||||
done
|
||||
|
||||
msg "making new filesystem with $T_QUORUM quorum members"
|
||||
cmd scoutfs mkfs -f $quo $T_DATA_ALLOC_ZONE_BLOCKS \
|
||||
cmd scoutfs mkfs -f $quo $T_DATA_ALLOC_ZONE_BLOCKS $T_MKFS_FORMAT_VERSION \
|
||||
"$T_META_DEVICE" "$T_DATA_DEVICE"
|
||||
fi
|
||||
|
||||
@@ -352,7 +358,8 @@ if [ -n "$T_INSMOD" ]; then
|
||||
msg "removing and reinserting scoutfs module"
|
||||
test -e /sys/module/scoutfs && cmd rmmod scoutfs
|
||||
cmd modprobe libcrc32c
|
||||
cmd insmod "$T_KMOD/src/scoutfs.ko"
|
||||
T_MODULE="$T_KMOD/src/scoutfs.ko"
|
||||
cmd insmod "$T_MODULE"
|
||||
fi
|
||||
|
||||
if [ -n "$T_TRACE_MULT" ]; then
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
export-get-name-parent.sh
|
||||
basic-block-counts.sh
|
||||
basic-bad-mounts.sh
|
||||
basic-posix-acl.sh
|
||||
inode-items-updated.sh
|
||||
simple-inode-index.sh
|
||||
simple-staging.sh
|
||||
@@ -12,12 +13,16 @@ data-prealloc.sh
|
||||
setattr_more.sh
|
||||
offline-extent-waiting.sh
|
||||
move-blocks.sh
|
||||
projects.sh
|
||||
large-fragmented-free.sh
|
||||
format-version-forward-back.sh
|
||||
enospc.sh
|
||||
srch-safe-merge-pos.sh
|
||||
srch-basic-functionality.sh
|
||||
simple-xattr-unit.sh
|
||||
retention-basic.sh
|
||||
totl-xattr-tag.sh
|
||||
quota.sh
|
||||
lock-refleak.sh
|
||||
lock-shrink-consistency.sh
|
||||
lock-pr-cw-conflict.sh
|
||||
|
||||
71
tests/src/o_tmpfile_linkat.c
Normal file
71
tests/src/o_tmpfile_linkat.c
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (C) 2023 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
|
||||
static void linkat_tmpfile(char *dir, char *lpath)
|
||||
{
|
||||
char proc_self[PATH_MAX];
|
||||
int ret;
|
||||
int fd;
|
||||
|
||||
fd = open(dir, O_RDWR | O_TMPFILE, 0777);
|
||||
if (fd < 0) {
|
||||
perror("open(O_TMPFILE)");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
snprintf(proc_self, sizeof(proc_self), "/proc/self/fd/%d", fd);
|
||||
|
||||
ret = linkat(AT_FDCWD, proc_self, AT_FDCWD, lpath, AT_SYMLINK_FOLLOW);
|
||||
if (ret < 0) {
|
||||
perror("linkat");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use O_TMPFILE and linkat to create a new visible file, used to test
|
||||
* the O_TMPFILE creation path by inspecting the created file.
|
||||
*/
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char *lpath;
|
||||
char *dir;
|
||||
|
||||
if (argc < 3) {
|
||||
printf("%s <open_dir> <linkat_path>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
dir = argv[1];
|
||||
lpath = argv[2];
|
||||
|
||||
linkat_tmpfile(dir, lpath);
|
||||
|
||||
return 0;
|
||||
}
|
||||
110
tests/tests/basic-posix-acl.sh
Normal file
110
tests/tests/basic-posix-acl.sh
Normal file
@@ -0,0 +1,110 @@
|
||||
|
||||
#
|
||||
# test basic POSIX acl functionality.
|
||||
#
|
||||
|
||||
t_require_commands stat rm touch mkdir getfacl setfacl id sudo
|
||||
t_require_mounts 2
|
||||
|
||||
# from quota.sh
|
||||
TEST_UID=22222
|
||||
TEST_GID=44444
|
||||
|
||||
# sys_setreuid() set fs[uid] to e[ug]id
|
||||
SET_UID="--ruid=$TEST_UID --euid=$TEST_UID"
|
||||
SET_GID="--rgid=$TEST_GID --egid=$TEST_GID --clear-groups"
|
||||
|
||||
# helper to avoid capturing dates from ls output
|
||||
L() {
|
||||
stat -c "%F %A %u %g %s %N" $@
|
||||
}
|
||||
|
||||
echo "== setup test directory"
|
||||
cd "$T_D0"
|
||||
|
||||
echo "== getfacl"
|
||||
L .
|
||||
getfacl .
|
||||
|
||||
echo "== basic non-acl access through permissions"
|
||||
rm -rf dir-testuid
|
||||
mkdir dir-testuid
|
||||
ln -sf dir-testuid symlinkdir-testuid
|
||||
chown root:44444 dir-testuid
|
||||
L dir-testuid
|
||||
setpriv $SET_UID $SET_GID touch dir-testuid/file-group-write
|
||||
setpriv $SET_UID $SET_GID touch symlinkdir-testuid/symlink-file-group-write
|
||||
chmod g+w dir-testuid
|
||||
setpriv $SET_UID $SET_GID touch dir-testuid/file-group-write
|
||||
setpriv $SET_UID $SET_GID touch symlinkdir-testuid/symlink-file-group-write
|
||||
L dir-testuid/file-group-write
|
||||
L symlinkdir-testuid/symlink-file-group-write
|
||||
|
||||
echo "== basic acl access"
|
||||
rm -rf dir-root
|
||||
mkdir dir-root
|
||||
ln -sf dir-root symlinkdir-root
|
||||
L dir-root
|
||||
setpriv $SET_UID touch dir-root/file-group-write
|
||||
setpriv $SET_UID touch symlinkdir-root/file-group-write
|
||||
setfacl -m u:22222:rwx dir-root
|
||||
getfacl dir-root
|
||||
setpriv $SET_UID touch dir-root/file-group-write
|
||||
setpriv $SET_UID touch symlinkdir-root/file-group-write
|
||||
L dir-root/file-group-write
|
||||
L symlinkdir-root/file-group-write
|
||||
|
||||
echo "== directory exec"
|
||||
setpriv $SET_UID bash -c "cd dir-root && echo Success"
|
||||
setpriv $SET_UID bash -c "cd symlinkdir-root && echo Success"
|
||||
setfacl -m u:22222:rw dir-root
|
||||
getfacl dir-root
|
||||
setpriv $SET_UID bash -c "cd dir-root || echo Failed"
|
||||
setpriv $SET_UID bash -c "cd symlinkdir-root || echo Failed"
|
||||
setfacl -m g:44444:rwx dir-root
|
||||
getfacl dir-root
|
||||
setpriv $SET_GID bash -c "cd dir-root && echo Success"
|
||||
setpriv $SET_GID bash -c "cd symlinkdir-root && echo Success"
|
||||
|
||||
echo "== get/set attr"
|
||||
rm -rf file-root
|
||||
touch file-root
|
||||
L file-root
|
||||
setpriv $SET_UID getfattr -d file-root
|
||||
setpriv $SET_UID setfattr -n "user.test1" -v "Success" file-root
|
||||
setpriv $SET_UID getfattr -d file-root
|
||||
setfacl -m u:22222:rw file-root
|
||||
getfacl file-root
|
||||
setpriv $SET_UID setfattr -n "user.test2" -v "Success" file-root
|
||||
setpriv $SET_UID getfattr -d file-root
|
||||
setfacl -x u:22222 file-root
|
||||
getfacl file-root
|
||||
setpriv $SET_UID setfattr -n "user.test3" -v "Success" file-root
|
||||
setpriv $SET_UID getfattr -d file-root
|
||||
setfacl -m g:44444:rw file-root
|
||||
getfacl file-root
|
||||
setpriv $SET_GID setfattr -n "user.test4" -v "Success" file-root
|
||||
setpriv $SET_GID getfattr -d file-root
|
||||
|
||||
echo "== inheritance / default acl"
|
||||
rm -rf dir-root2
|
||||
mkdir dir-root2
|
||||
L dir-root2
|
||||
setpriv $SET_UID mkdir dir-root2/dir
|
||||
setpriv $SET_UID touch dir-root2/dir/file
|
||||
setfacl -m d:u:22222:rwx dir-root2
|
||||
getfacl dir-root2
|
||||
setpriv $SET_UID mkdir dir-root2/dir
|
||||
setpriv $SET_UID touch dir-root2/dir/file
|
||||
setfacl -m u:22222:rwx dir-root2
|
||||
getfacl dir-root2
|
||||
setpriv $SET_UID mkdir dir-root2/dir
|
||||
setpriv $SET_UID touch dir-root2/dir/file
|
||||
L dir-root2/dir
|
||||
getfacl dir-root2/dir
|
||||
L dir-root2/dir/file
|
||||
getfacl dir-root2/dir/file
|
||||
|
||||
echo "== cleanup"
|
||||
|
||||
t_pass
|
||||
@@ -210,4 +210,7 @@ done
|
||||
wait
|
||||
ls "$T_D0/concurrent"
|
||||
|
||||
echo "== cleanup"
|
||||
rm -f "$T_TMP.0" "$T_TMP.1"
|
||||
|
||||
t_pass
|
||||
|
||||
@@ -73,4 +73,7 @@ test "$large_tot" -gt "$equal_tot" ; echo "resized larger test rc: $?"
|
||||
umount "$SCR"
|
||||
losetup -d "$scr_loop"
|
||||
|
||||
echo "== cleanup"
|
||||
rm -f "$T_TMP.small" "$T_TMP.equal" "$T_TMP.large"
|
||||
|
||||
t_pass
|
||||
|
||||
@@ -28,7 +28,7 @@ while [ "$SECONDS" -lt "$END" ]; do
|
||||
for i in $(t_fs_nrs); do
|
||||
if [ "$i" -ge "$quorum_nr" ]; then
|
||||
t_umount $i &
|
||||
echo "umount $i pid $pid quo $quorum_nr" \
|
||||
echo "umount $i rid $rid quo $quorum_nr" \
|
||||
>> $T_TMP.log
|
||||
mounted[$i]=0
|
||||
fi
|
||||
@@ -53,6 +53,9 @@ while [ "$SECONDS" -lt "$END" ]; do
|
||||
|
||||
for i in "${lock_arr[@]}"; do
|
||||
if [[ ! " ${rid_arr[*]} " =~ " $i " ]]; then
|
||||
echo -e "RID($i) exists" >> $T_TMP.log
|
||||
echo -e "rid_arr:\n${rid_arr[@]}" >> $T_TMP.log
|
||||
echo -e "lock_arr:\n${lock_arr[@]}" >> $T_TMP.log
|
||||
t_fail "RID($i): exists when not mounted"
|
||||
fi
|
||||
done
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
# Test clustered parallel createmany
|
||||
#
|
||||
|
||||
t_require_commands mkdir createmany
|
||||
t_require_commands mkdir createmany bc
|
||||
t_require_mounts 2
|
||||
|
||||
COUNT=50000
|
||||
@@ -17,14 +17,14 @@ mkdir -p $T_D0/dir/0
|
||||
mkdir $T_D1/dir/1
|
||||
|
||||
echo "== measure initial createmany"
|
||||
START=$SECONDS
|
||||
START=$(date +%s.%N)
|
||||
createmany -o "$T_D0/file_" $COUNT >> $T_TMP.full
|
||||
sync
|
||||
SINGLE=$((SECONDS - START))
|
||||
echo single $SINGLE >> $T_TMP.full
|
||||
END=$(date +%s.%N)
|
||||
SINGLE=$(echo "$END - $START" | bc)
|
||||
|
||||
echo "== measure two concurrent createmany runs"
|
||||
START=$SECONDS
|
||||
START=$(date +%s.%N)
|
||||
(cd $T_D0/dir/0; createmany -o ./file_ $COUNT > /dev/null) &
|
||||
pids="$!"
|
||||
(cd $T_D1/dir/1; createmany -o ./file_ $COUNT > /dev/null) &
|
||||
@@ -33,7 +33,9 @@ for p in $pids; do
|
||||
wait $p
|
||||
done
|
||||
sync
|
||||
BOTH=$((SECONDS - START))
|
||||
END=$(date +%s.%N)
|
||||
BOTH=$(echo "$END - $START" | bc)
|
||||
|
||||
echo both $BOTH >> $T_TMP.full
|
||||
|
||||
# Multi node still adds significant overhead, even with our CW locks
|
||||
@@ -44,7 +46,7 @@ echo both $BOTH >> $T_TMP.full
|
||||
# exceed this factor should the CW locked items go back to fully
|
||||
# synchronized operation.
|
||||
FACTOR=200
|
||||
if [ "$BOTH" -gt $(($SINGLE*$FACTOR)) ]; then
|
||||
if [ $(echo "$BOTH > ( $SINGLE * $FACTOR )" | bc) == "1" ]; then
|
||||
t_fail "both createmany took $BOTH sec, more than $FACTOR x single $SINGLE sec"
|
||||
fi
|
||||
|
||||
|
||||
179
tests/tests/format-version-forward-back.sh
Normal file
179
tests/tests/format-version-forward-back.sh
Normal file
@@ -0,0 +1,179 @@
|
||||
#
|
||||
# Test our basic ability to work with different format versions.
|
||||
#
|
||||
# The current code being tested has a range of supported format
|
||||
# versions. For each of the older supported format versions we have a
|
||||
# git hash of the commit before the next greater version was introduced.
|
||||
# We build versions of the scoutfs utility and kernel module for the
|
||||
# last commit in tree that had a lesser supported version as its max
|
||||
# supported version. We use those binaries to test forward and back
|
||||
# compat as new and old code works with a persistent volume with a given
|
||||
# format version.
|
||||
#
|
||||
|
||||
mount_has_format_version()
|
||||
{
|
||||
local mnt="$1"
|
||||
local vers="$2"
|
||||
local sysfs_fmt_vers="$(t_sysfs_path_from_mnt $SCR)/format_version"
|
||||
|
||||
test "$(cat $sysfs_fmt_vers)" == "$vers"
|
||||
}
|
||||
|
||||
SCR="/mnt/scoutfs.scratch"
|
||||
|
||||
MIN=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_min:"){print $2}')
|
||||
MAX=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_max:"){print $2}')
|
||||
|
||||
echo "min: $MIN max: $MAX" > "$T_TMP.log"
|
||||
|
||||
test "$MIN" -gt 0 -a "$MAX" -gt 0 -a "$MIN" -le "$MAX" || \
|
||||
t_fail "parsed bad versions, min: $MIN max: $MAX"
|
||||
|
||||
test "$MIN" == "$MAX" && \
|
||||
t_skip "only one supported format version: $MIN"
|
||||
|
||||
# prepare dir and wipe any weird old partial state
|
||||
builds="$T_RESULTS/format_version_builds"
|
||||
mkdir -p "$builds"
|
||||
|
||||
echo "== ensuring utils and module for old versions"
|
||||
declare -A commits
|
||||
commits[1]=c3c4b080
|
||||
for vers in $(seq $MIN $((MAX - 1))); do
|
||||
dir="$builds/$vers"
|
||||
platform=$(uname -rp)
|
||||
buildmark="$dir/buildmark"
|
||||
commit="${commits[$vers]}"
|
||||
|
||||
test -n "$commit" || \
|
||||
t_fail "no commit for vers $vers"
|
||||
|
||||
# have our files for this version
|
||||
test "$(cat $buildmark 2>&1)" == "$platform" && \
|
||||
continue
|
||||
|
||||
# build as one big sequence of commands that can return failure
|
||||
(
|
||||
set -o pipefail
|
||||
|
||||
rm -rf $dir &&
|
||||
mkdir -p $dir/building &&
|
||||
cd "$T_TESTS/.." &&
|
||||
git archive --format=tar "$commit" | tar -C "$dir/building" -xf - &&
|
||||
cd - &&
|
||||
find $dir &&
|
||||
make -C "$dir/building" &&
|
||||
mv $dir/building/utils/src/scoutfs $dir &&
|
||||
mv $dir/building/kmod/src/scoutfs.ko $dir &&
|
||||
rm -rf $dir/building &&
|
||||
echo "$platform" > $buildmark &&
|
||||
find $dir &&
|
||||
cat $buildmark
|
||||
) >> "$T_TMP.log" 2>&1 || t_fail "version $vers build failed"
|
||||
done
|
||||
|
||||
echo "== unmounting test fs and removing test module"
|
||||
t_quiet t_umount_all
|
||||
t_quiet rmmod scoutfs
|
||||
|
||||
echo "== testing combinations of old and new format versions"
|
||||
mkdir -p "$SCR"
|
||||
for vers in $(seq $MIN $((MAX - 1))); do
|
||||
old_scoutfs="$builds/$vers/scoutfs"
|
||||
old_module="$builds/$vers/scoutfs.ko"
|
||||
|
||||
echo "mkfs $vers" >> "$T_TMP.log"
|
||||
t_quiet $old_scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV" \
|
||||
|| t_fail "mkfs $vers failed"
|
||||
|
||||
echo "mount $vers with $vers" >> "$T_TMP.log"
|
||||
t_quiet insmod $old_module
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$vers"
|
||||
|
||||
echo "creating files in $vers" >> "$T_TMP.log"
|
||||
t_quiet touch "$SCR/file-"{1,2,3}
|
||||
stat "$SCR"/file-* > "$T_TMP.stat" || \
|
||||
t_fail "stat in $vers failed"
|
||||
|
||||
echo "remounting $vers fs with $MAX" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
insmod "$T_MODULE"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$vers"
|
||||
|
||||
echo "verifying stat in $vers with $MAX" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "keep/update/del existing, create new in $vers" >> "$T_TMP.log"
|
||||
t_quiet touch "$SCR/file-2"
|
||||
t_quiet rm -f "$SCR/file-3"
|
||||
t_quiet touch "$SCR/file-4"
|
||||
stat "$SCR"/file-* > "$T_TMP.stat" || \
|
||||
t_fail "stat in $vers failed"
|
||||
|
||||
echo "remounting $vers fs with $vers" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
insmod "$old_module"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$vers"
|
||||
|
||||
echo "verifying stat in $vers with $vers" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "changing format vers to $MAX" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
t_quiet scoutfs change-format-version -F -V $MAX $T_EX_META_DEV "$T_EX_DATA_DEV"
|
||||
|
||||
echo "mount fs $MAX with old $vers should fail" >> "$T_TMP.log"
|
||||
insmod "$old_module"
|
||||
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR" >> "$T_TMP.log" 2>&1
|
||||
if [ "$?" == "0" ]; then
|
||||
umount "$SCR"
|
||||
t_fail "old code ver $vers able to mount new ver $MAX"
|
||||
fi
|
||||
|
||||
echo "remounting $MAX fs with $MAX" >> "$T_TMP.log"
|
||||
rmmod scoutfs
|
||||
insmod "$T_MODULE"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$MAX"
|
||||
|
||||
echo "verifying stat in $MAX with $MAX" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "keep/update/del existing, create new in $MAX" >> "$T_TMP.log"
|
||||
t_quiet touch "$SCR/file-2"
|
||||
t_quiet rm -f "$SCR/file-4"
|
||||
t_quiet touch "$SCR/file-5"
|
||||
stat "$SCR"/file-* > "$T_TMP.stat" || \
|
||||
t_fail "stat in $MAX failed"
|
||||
|
||||
echo "remounting $MAX fs with $MAX again" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$MAX"
|
||||
|
||||
echo "verifying stat in $MAX with $MAX again" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "done with old vers $vers" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
done
|
||||
|
||||
echo "== restoring test module and mount"
|
||||
insmod "$T_MODULE"
|
||||
t_mount_all
|
||||
|
||||
t_pass
|
||||
52
tests/tests/projects.sh
Normal file
52
tests/tests/projects.sh
Normal file
@@ -0,0 +1,52 @@
|
||||
|
||||
# notable id to recognize in output
|
||||
ID=8675309
|
||||
|
||||
echo "== default new files don't have project"
|
||||
touch "$T_D0/file"
|
||||
scoutfs get-attr-x -p "$T_D0/file"
|
||||
|
||||
echo "== set new project on files and dirs"
|
||||
mkdir "$T_D0/dir"
|
||||
scoutfs set-attr-x -p $ID "$T_D0/file"
|
||||
scoutfs set-attr-x -p $ID "$T_D0/dir"
|
||||
scoutfs get-attr-x -p "$T_D0/file"
|
||||
scoutfs get-attr-x -p "$T_D0/dir"
|
||||
|
||||
echo "== non-root can see id"
|
||||
chmod 644 "$T_D0/file"
|
||||
setpriv --ruid=12345 --euid=12345 scoutfs get-attr-x -p "$T_D0/file"
|
||||
|
||||
echo "== can use IDs around long width limits"
|
||||
touch "$T_D0/ids"
|
||||
for id in 0x7FFFFFFF 0x80000000 0xFFFFFFFF \
|
||||
0x7FFFFFFFFFFFFFFF 0x8000000000000000 0xFFFFFFFFFFFFFFFF; do
|
||||
scoutfs set-attr-x -p $id "$T_D0/ids"
|
||||
scoutfs get-attr-x -p "$T_D0/ids"
|
||||
done
|
||||
|
||||
echo "== created files and dirs inherit project id"
|
||||
touch "$T_D0/dir/file"
|
||||
mkdir "$T_D0/dir/sub"
|
||||
scoutfs get-attr-x -p "$T_D0/dir/file"
|
||||
scoutfs get-attr-x -p "$T_D0/dir/sub"
|
||||
|
||||
echo "== inheritance continues"
|
||||
mkdir "$T_D0/dir/sub/more"
|
||||
scoutfs get-attr-x -p "$T_D0/dir/sub/more"
|
||||
|
||||
# .. just inherits 0 :)
|
||||
echo "== clearing project id stops inheritance"
|
||||
scoutfs set-attr-x -p 0 "$T_D0/dir"
|
||||
touch "$T_D0/dir/another-file"
|
||||
mkdir "$T_D0/dir/another-sub"
|
||||
scoutfs get-attr-x -p "$T_D0/dir/another-file"
|
||||
scoutfs get-attr-x -p "$T_D0/dir/another-sub"
|
||||
|
||||
echo "== o_tmpfile creations inherit dir"
|
||||
scoutfs set-attr-x -p $ID "$T_D0/dir"
|
||||
o_tmpfile_linkat "$T_D0/dir" "$T_D0/dir/tmpfile"
|
||||
scoutfs get-attr-x -p "$T_D0/dir/tmpfile"
|
||||
|
||||
|
||||
t_pass
|
||||
153
tests/tests/quota.sh
Normal file
153
tests/tests/quota.sh
Normal file
@@ -0,0 +1,153 @@
|
||||
|
||||
TEST_UID=22222
|
||||
TEST_GID=44444
|
||||
|
||||
# sys_setreuid() set fs[uid] to e[ug]id
|
||||
SET_UID="--ruid=$TEST_UID --euid=$TEST_UID"
|
||||
SET_GID="--rgid=$TEST_GID --egid=$TEST_GID --clear-groups"
|
||||
|
||||
FILE="$T_D0/dir/file"
|
||||
|
||||
sync_and_drop()
|
||||
{
|
||||
sync
|
||||
echo 1 > $(t_debugfs_path)/drop_weak_item_cache
|
||||
echo 1 > $(t_debugfs_path)/drop_quota_check_cache
|
||||
}
|
||||
|
||||
reset_all()
|
||||
{
|
||||
rm -f "$FILE"
|
||||
scoutfs quota-wipe -p "$T_M0"
|
||||
getfattr --absolute-names -d -m - "$T_D0" | \
|
||||
grep "^scoutfs.totl." | \
|
||||
cut -d '=' -f 1 | \
|
||||
xargs -n 1 -I'{}' setfattr -x '{}' "$T_D0"
|
||||
}
|
||||
|
||||
echo "== prepare dir with write perm for test ids"
|
||||
mkdir "$T_D0/dir"
|
||||
chown --quiet $TEST_UID "$T_D0/dir"
|
||||
chgrp --quiet $TEST_GID "$T_D0/dir"
|
||||
|
||||
echo "== test assumes starting with no rules, empty list"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
|
||||
echo "== add rule"
|
||||
scoutfs quota-add -p "$T_M0" -r "7 13,L,- 15,L,- 17,L,- I 33 -"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
|
||||
echo "== list is empty again after delete"
|
||||
scoutfs quota-del -p "$T_M0" -r "7 13,L,- 15,L,- 17,L,- I 33 -"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
|
||||
echo "== can change limits without deleting"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 100 -"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 101 -"
|
||||
scoutfs quota-del -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 100 -"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 99 -"
|
||||
scoutfs quota-del -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 101 -"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
scoutfs quota-del -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 99 -"
|
||||
reset_all
|
||||
|
||||
echo "== wipe and restore rules in bulk"
|
||||
for a in $(seq 10 15); do
|
||||
scoutfs quota-add -p "$T_M0" -r "7 $a,L,- 0,L,- 0,L,- I 33 -"
|
||||
done
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
scoutfs quota-list -p "$T_M0" > "$T_TMP.list"
|
||||
scoutfs quota-wipe -p "$T_M0"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
scoutfs quota-restore -p "$T_M0" < "$T_TMP.list"
|
||||
scoutfs quota-list -p "$T_M0"
|
||||
reset_all
|
||||
|
||||
echo "== default rule prevents file creation"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.1.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== decreasing totl allows file creation again"
|
||||
setfattr -x scoutfs.totl.test.1.1.1 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE"
|
||||
reset_all
|
||||
|
||||
echo "== attr selecting rules prevent creation"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S 1,L,- 1,L,- I 1 -"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_GID,G,S 1,L,- 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.1.1 -v 2 "$T_D0"
|
||||
setfattr -n scoutfs.totl.test.$TEST_GID.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE" 2>&1 | t_filter_fs
|
||||
setpriv $SET_GID touch "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== multi attr selecting doesn't prevent partial"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S $TEST_GID,G,S 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.$TEST_GID.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE"
|
||||
rm -f "$FILE"
|
||||
setpriv $SET_GID touch "$FILE"
|
||||
rm -f "$FILE"
|
||||
setpriv $SET_UID $SET_GID touch "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== op differentiates"
|
||||
# inode ops succeed in presence of data rule
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S 1,L,- 1,L,- D 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
# data ops succeed in presence of inode rule
|
||||
touch "$FILE"
|
||||
chown --quiet $TEST_UID "$FILE"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S 1,L,- 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID fallocate -l 4096 "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== higher priority rule applies"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S 1,L,- 1,L,- I 1000 -"
|
||||
scoutfs quota-add -p "$T_M0" -r "2 $TEST_UID,U,S 1,L,- 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== data rules with total and count prevent write and fallocate"
|
||||
touch "$FILE"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- D 1 -"
|
||||
setfattr -n scoutfs.totl.test.1.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
dd if=/dev/zero of="$FILE" bs=4096 count=1 conv=notrunc status=none 2>&1 | t_filter_fs
|
||||
fallocate -l 4096 "$FILE" 2>&1 | t_filter_fs
|
||||
scoutfs quota-del -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- D 1 -"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 1,L,- 1,L,- 1,L,- D 0 C"
|
||||
sync_and_drop
|
||||
dd if=/dev/zero of="$FILE" bs=4096 count=1 conv=notrunc status=none 2>&1 | t_filter_fs
|
||||
fallocate -l 4096 "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== added rules work after bulk restore"
|
||||
seq -f " 1 %.0f,U,S 1,L,- 1,L,- I 1 -" 9000050000 -1 9000000000 > "$T_TMP.lots"
|
||||
scoutfs quota-restore -p "$T_M0" < "$T_TMP.lots"
|
||||
scoutfs quota-list -p "$T_M0" > "$T_TMP.list"
|
||||
diff -u "$T_TMP.lots" "$T_TMP.list"
|
||||
scoutfs quota-add -p "$T_M0" -r "1 $TEST_UID,U,S 1,L,- 1,L,- I 1 -"
|
||||
setfattr -n scoutfs.totl.test.$TEST_UID.1.1 -v 2 "$T_D0"
|
||||
sync_and_drop
|
||||
setpriv $SET_UID touch "$FILE" 2>&1 | t_filter_fs
|
||||
reset_all
|
||||
|
||||
echo "== cleanup"
|
||||
rm -f "$T_TMP.lots" "$T_TMP.list"
|
||||
|
||||
t_pass
|
||||
57
tests/tests/retention-basic.sh
Normal file
57
tests/tests/retention-basic.sh
Normal file
@@ -0,0 +1,57 @@
|
||||
t_require_commands scoutfs touch rm setfattr
|
||||
|
||||
touch "$T_D0/file-1"
|
||||
|
||||
echo "== setting retention on dir fails"
|
||||
scoutfs set-attr-x -t 1 "$T_D0" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== set retention"
|
||||
scoutfs set-attr-x -t 1 "$T_D0/file-1"
|
||||
|
||||
echo "== get-attr-x shows retention"
|
||||
scoutfs get-attr-x -t "$T_D0/file-1"
|
||||
|
||||
echo "== unpriv can't clear retention"
|
||||
setpriv --ruid=12345 --euid=12345 scoutfs set-attr-x -t 0 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== can set hidden scoutfs xattr in retention"
|
||||
setfattr -n scoutfs.hide.srch.retention_test -v val "$T_D0/file-1"
|
||||
|
||||
echo "== setting user. xattr fails in retention"
|
||||
setfattr -n user.retention_test -v val "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== file deletion fails in retention"
|
||||
rm -f "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== file rename fails in retention"
|
||||
mv $T_D0/file-1 $T_D0/file-2 2>&1 | t_filter_fs
|
||||
|
||||
echo "== file write fails in retention"
|
||||
date >> $T_D0/file-1
|
||||
|
||||
echo "== file truncate fails in retention"
|
||||
truncate -s 0 $T_D0/file-1 2>&1 | t_filter_fs
|
||||
|
||||
echo "== setattr fails in retention"
|
||||
touch $T_D0/file-1 2>&1 | t_filter_fs
|
||||
|
||||
echo "== clear retention"
|
||||
scoutfs set-attr-x -t 0 "$T_D0/file-1"
|
||||
|
||||
echo "== file write"
|
||||
date >> $T_D0/file-1
|
||||
|
||||
echo "== file rename"
|
||||
mv $T_D0/file-1 $T_D0/file-2
|
||||
mv $T_D0/file-2 $T_D0/file-1
|
||||
|
||||
echo "== setattr"
|
||||
touch $T_D0/file-1 2>&1 | t_filter_fs
|
||||
|
||||
echo "== xattr deletion"
|
||||
setfattr -x scoutfs.hide.srch.retention_test "$T_D0/file-1"
|
||||
|
||||
echo "== cleanup"
|
||||
rm -f "$T_D0/file-1"
|
||||
|
||||
t_pass
|
||||
@@ -62,4 +62,7 @@ for r in $(seq 1 1000); do
|
||||
done
|
||||
done
|
||||
|
||||
echo "== cleanup"
|
||||
rm -f "$T_TMP.log"
|
||||
|
||||
t_pass
|
||||
|
||||
@@ -3,6 +3,7 @@ t_require_commands touch rm setfattr scoutfs find_xattrs
|
||||
read_xattr_totals()
|
||||
{
|
||||
sync
|
||||
echo 1 > $(t_debugfs_path)/drop_weak_item_cache
|
||||
scoutfs read-xattr-totals -p "$T_M0"
|
||||
}
|
||||
|
||||
@@ -112,7 +113,6 @@ for phase in create update remove; do
|
||||
echo "$k.0.0 = ${totals[$k]}, ${counts[$k]}"
|
||||
done ) | grep -v "= 0, 0$" | sort -n >> $T_TMP.check_arr
|
||||
|
||||
sync
|
||||
read_xattr_totals | sort -n >> $T_TMP.check_read
|
||||
|
||||
diff -u $T_TMP.check_arr $T_TMP.check_read || \
|
||||
|
||||
@@ -7,7 +7,7 @@ FMTIOC_H := format.h ioctl.h
|
||||
FMTIOC_KMOD := $(addprefix ../kmod/src/,$(FMTIOC_H))
|
||||
|
||||
CFLAGS := -Wall -O2 -Werror -D_FILE_OFFSET_BITS=64 -g -msse4.2 \
|
||||
-I src/ -fno-strict-aliasing \
|
||||
-fno-strict-aliasing \
|
||||
-DSCOUTFS_FORMAT_HASH=0x$(SCOUTFS_FORMAT_HASH)LLU
|
||||
|
||||
ifneq ($(wildcard $(firstword $(FMTIOC_KMOD))),)
|
||||
@@ -15,9 +15,8 @@ CFLAGS += -I../kmod/src
|
||||
endif
|
||||
|
||||
BIN := src/scoutfs
|
||||
OBJ_DIRS := src src/check
|
||||
OBJ := $(foreach dir,$(OBJ_DIRS),$(patsubst %.c,%.o,$(wildcard $(dir)/*.c)))
|
||||
DEPS := $(foreach dir,$(OBJ_DIRS),$(wildcard $(dir)/*.d))
|
||||
OBJ := $(patsubst %.c,%.o,$(wildcard src/*.c))
|
||||
DEPS := $(wildcard */*.d)
|
||||
|
||||
all: $(BIN)
|
||||
|
||||
|
||||
@@ -270,6 +270,21 @@ metadata that is bound to a specific volume and should not be
|
||||
transferred with the file by tools that read extended attributes, like
|
||||
.BR tar(1) .
|
||||
.TP
|
||||
.B .indx.
|
||||
Attributes with the .indx. tag dd the inode containing the attribute to
|
||||
a filesystem-wide index. The name of the extended attribute must end
|
||||
with strings representing two values separated by dots. The first value
|
||||
is an unsigned 8bit value and the second is an unsigned 64bit value.
|
||||
These attributes can only be modified with root privileges and the
|
||||
attributes can not have a value.
|
||||
.sp
|
||||
The inodes in the index are stored in increasing sort order of the
|
||||
values, with the first u8 value being most significant. Inodes can be
|
||||
at many positions as tracked by many extended attributes, and their
|
||||
position follows the creation, renaming, or deletion of the attributes.
|
||||
The index can be read with the read-xattr-index command which uses the
|
||||
underlying READ_XATTR_INDEX ioctl.
|
||||
.TP
|
||||
.B .srch.
|
||||
Attributes with the .srch. tag are indexed so that they can be
|
||||
found by the
|
||||
@@ -295,6 +310,36 @@ with the
|
||||
ioctl.
|
||||
.RE
|
||||
|
||||
.SH FILE RETENTION MODE
|
||||
A file can be set to retention mode by setting the
|
||||
.IB RETENTION
|
||||
attribute with the
|
||||
.IB SET_ATTR_X
|
||||
ioctl. This flag can only be set on regular files and requires root
|
||||
permission (the
|
||||
.IB CAP_SYS_ADMIN
|
||||
capability).
|
||||
.sp
|
||||
Once in retention mode all modifications of the file will fail. The
|
||||
only exceptions are that system extended attributes (all those without
|
||||
the "user." prefix) may be modified. The retention bit may be cleared
|
||||
with sufficient priveledges to remove the retention restrictions on
|
||||
other modifications.
|
||||
.RE
|
||||
|
||||
.SH PROJECT IDs
|
||||
All inodes have a project ID attribute that can be set via the
|
||||
SET_ATTR_X ioctl and displayed with the GET_ATTR_X ioctl. Project IDs
|
||||
are an unsigned 64bit value and the value of 0 is reserved to indicate
|
||||
that no project ID is assigned. If a project ID is set on a directory
|
||||
then all inodes created with it as the initial parent inheret that ID,
|
||||
for all file types. This includes files initially unlinked from the
|
||||
namespace when created with O_TMPFILE. Project IDs are only
|
||||
automatically inherited from the parent dir on initial creation.
|
||||
They're not changed as directory entry linkes to the inode are created
|
||||
or renamed.
|
||||
.RE
|
||||
|
||||
.SH FORMAT VERSION
|
||||
The format version defines the layout and use of structures stored on
|
||||
devices and passed over the network. The version is incremented for
|
||||
@@ -373,6 +418,19 @@ The version that a mount is using is shown in the
|
||||
file in the mount's sysfs directory, typically
|
||||
.I /sys/fs/scoutfs/f.FSID.r.RID/
|
||||
.RE
|
||||
.sp
|
||||
The defined format versions are:
|
||||
.RS
|
||||
.TP
|
||||
.sp
|
||||
.B 1
|
||||
Initial format version.
|
||||
.TP
|
||||
.B 2
|
||||
Added retention mode by setting the retention attribute. Added the
|
||||
project ID inode attribute. Added quota rules and enforcement. Added
|
||||
the .indx. extended attribute tag.
|
||||
.RE
|
||||
|
||||
.SH CORRUPTION DETECTION
|
||||
A
|
||||
|
||||
@@ -209,6 +209,16 @@ A path within a ScoutFS filesystem.
|
||||
.RE
|
||||
.PD
|
||||
|
||||
.TP
|
||||
.BI "get-attr-x FILE"
|
||||
.sp
|
||||
Display ScoutFS-specific attributes from a file. If no options are
|
||||
given than all the attributes that the command supports will be
|
||||
displayed. If attributes are specified with options then only those
|
||||
attributes are displayed. If only one attribute is specified then it
|
||||
will not have a label prefix in the display output. The --help option
|
||||
will list the attributes that the command supports. The file system may
|
||||
support a different set of attributes.
|
||||
.TP
|
||||
.BI "get-referring-entries [-p|--path PATH] INO"
|
||||
.sp
|
||||
@@ -506,6 +516,15 @@ A path within a ScoutFS filesystem.
|
||||
.RE
|
||||
.PD
|
||||
|
||||
.TP
|
||||
.BI "set-attr-x FILE"
|
||||
.sp
|
||||
Set ScoutFS-specific attributes on a file. Only the attributes that are
|
||||
spcified by options will be set. The --help option will list the
|
||||
attributes that the command understands. The file system may support a
|
||||
different set of attributes.
|
||||
.PD
|
||||
|
||||
.TP
|
||||
.BI "setattr FILE [-d, --data-version=VERSION [-s, --size=SIZE [-o, --offline]]] [-t, --ctime=TIMESPEC]"
|
||||
.sp
|
||||
|
||||
304
utils/src/attr_x.c
Normal file
304
utils/src/attr_x.c
Normal file
@@ -0,0 +1,304 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <argp.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "ioctl.h"
|
||||
#include "parse.h"
|
||||
#include "cmd.h"
|
||||
|
||||
struct attr_x_args {
|
||||
bool set;
|
||||
char *filename;
|
||||
struct scoutfs_ioctl_inode_attr_x iax;
|
||||
};
|
||||
|
||||
#define pr(iax, name, label, fmt, args...) \
|
||||
do { \
|
||||
if ((iax->x_mask & SCOUTFS_IOC_IAX_##name)) { \
|
||||
if (__builtin_popcount(iax->x_mask) > 1) \
|
||||
printf(label ": " fmt "\n", ##args); \
|
||||
else \
|
||||
printf(fmt "\n", ##args); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define prb(iax, name, label) \
|
||||
pr(iax, name, label, "%u", !!((iax)->bits & SCOUTFS_IOC_IAX_B_##name))
|
||||
|
||||
static int do_attr_x(struct attr_x_args *args)
|
||||
{
|
||||
struct scoutfs_ioctl_inode_attr_x *iax = &args->iax;
|
||||
int fd = -1;
|
||||
int ret;
|
||||
int op;
|
||||
|
||||
if (args->set) {
|
||||
/* nothing to do if not setting */
|
||||
if (iax->x_mask == 0)
|
||||
return 0;
|
||||
op = SCOUTFS_IOC_SET_ATTR_X;
|
||||
} else {
|
||||
/* get all known if none specified */
|
||||
if (iax->x_mask == 0)
|
||||
iax->x_mask = ~SCOUTFS_IOC_IAX__UNKNOWN;
|
||||
op = SCOUTFS_IOC_GET_ATTR_X;
|
||||
}
|
||||
|
||||
fd = open(args->filename, O_RDONLY);
|
||||
if (fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open '%s': %s (%d)\n",
|
||||
args->filename, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = ioctl(fd, op, iax);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "attr_x ioctl failed on '%s': "
|
||||
"%s (%d)\n", args->filename, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!args->set) {
|
||||
pr(iax, META_SEQ, "meta_seq", "%llu", iax->meta_seq);
|
||||
pr(iax, DATA_SEQ, "data_seq", "%llu", iax->data_seq);
|
||||
pr(iax, DATA_VERSION, "data_version", "%llu", iax->data_version);
|
||||
pr(iax, ONLINE_BLOCKS, "online_blocks", "%llu", iax->online_blocks);
|
||||
pr(iax, OFFLINE_BLOCKS, "offline_blocks", "%llu", iax->offline_blocks);
|
||||
pr(iax, CTIME, "ctime", "%llu.%u", iax->ctime_sec, iax->ctime_nsec);
|
||||
pr(iax, CRTIME, "crtime", "%llu.%u", iax->crtime_sec, iax->crtime_nsec);
|
||||
pr(iax, SIZE, "size", "%llu", iax->size);
|
||||
prb(iax, RETENTION, "retention");
|
||||
pr(iax, PROJECT_ID, "project_id", "%llu", iax->project_id);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (fd >= 0)
|
||||
close(fd);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called for both get and set. The get calls won't have
|
||||
* arguments and are only setting the mask. The set calls parse the
|
||||
* value to set. We could have defaults by making set option arguments
|
||||
* optional, like setting the current time for timestamps, but that
|
||||
* hasn't been needed.
|
||||
*
|
||||
* Option value parsing places no constraints on the attributes or
|
||||
* values themselves once parsed. This lets us use the set command to
|
||||
* test the kernel's testing for invalid attribute combinations and
|
||||
* values.
|
||||
*/
|
||||
static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct attr_x_args *args = state->input;
|
||||
struct timespec ts;
|
||||
int ret;
|
||||
u64 x;
|
||||
|
||||
switch (key) {
|
||||
case 'm':
|
||||
args->iax.x_mask |= SCOUTFS_IOC_IAX_META_SEQ;
|
||||
if (arg) {
|
||||
ret = parse_u64(arg, &args->iax.meta_seq);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 'd':
|
||||
args->iax.x_mask |= SCOUTFS_IOC_IAX_DATA_SEQ;
|
||||
if (arg) {
|
||||
ret = parse_u64(arg, &args->iax.data_seq);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 'v':
|
||||
args->iax.x_mask |= SCOUTFS_IOC_IAX_DATA_VERSION;
|
||||
if (arg) {
|
||||
ret = parse_u64(arg, &args->iax.data_version);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (args->iax.data_version == 0)
|
||||
argp_error(state, "data version must not be 0");
|
||||
}
|
||||
break;
|
||||
case 'n':
|
||||
args->iax.x_mask |= SCOUTFS_IOC_IAX_ONLINE_BLOCKS;
|
||||
if (arg) {
|
||||
ret = parse_u64(arg, &args->iax.online_blocks);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 'f':
|
||||
args->iax.x_mask |= SCOUTFS_IOC_IAX_OFFLINE_BLOCKS;
|
||||
if (arg) {
|
||||
ret = parse_u64(arg, &args->iax.offline_blocks);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 'c':
|
||||
args->iax.x_mask |= SCOUTFS_IOC_IAX_CTIME;
|
||||
if (arg) {
|
||||
ret = parse_timespec(arg, &ts);
|
||||
if (ret)
|
||||
return ret;
|
||||
args->iax.ctime_sec = ts.tv_sec;
|
||||
args->iax.ctime_nsec = ts.tv_nsec;
|
||||
}
|
||||
break;
|
||||
case 'r':
|
||||
args->iax.x_mask |= SCOUTFS_IOC_IAX_CRTIME;
|
||||
if (arg) {
|
||||
ret = parse_timespec(arg, &ts);
|
||||
if (ret)
|
||||
return ret;
|
||||
args->iax.crtime_sec = ts.tv_sec;
|
||||
args->iax.crtime_nsec = ts.tv_nsec;
|
||||
}
|
||||
break;
|
||||
case 's':
|
||||
args->iax.x_mask |= SCOUTFS_IOC_IAX_SIZE;
|
||||
if (arg) {
|
||||
ret = parse_u64(arg, &args->iax.size);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case 't':
|
||||
args->iax.x_mask |= SCOUTFS_IOC_IAX_RETENTION;
|
||||
if (arg) {
|
||||
ret = parse_u64(arg, &x);
|
||||
if (ret)
|
||||
return ret;
|
||||
if (x)
|
||||
args->iax.bits |= SCOUTFS_IOC_IAX_B_RETENTION;
|
||||
}
|
||||
break;
|
||||
case 'p':
|
||||
args->iax.x_mask |= SCOUTFS_IOC_IAX_PROJECT_ID;
|
||||
if (arg) {
|
||||
ret = parse_u64(arg, &args->iax.project_id);
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
if (!args->filename)
|
||||
args->filename = strdup_or_error(state, arg);
|
||||
else
|
||||
argp_error(state, "more than one argument given");
|
||||
break;
|
||||
case ARGP_KEY_FINI:
|
||||
if (!args->filename)
|
||||
argp_error(state, "no filename given");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The get options are derived from these by copying the struct and
|
||||
* modifying fields.
|
||||
*/
|
||||
static struct argp_option set_options[] = {
|
||||
{ "meta_seq", 'm', "SEQ", 0, "Inode Metadata change index sequence number"},
|
||||
{ "data_seq", 'd', "SEQ", 0, "File Data change index sequence number"},
|
||||
{ "data_version", 'v', "VERSION", 0, "File Data contents version"},
|
||||
{ "online_blocks", 'n', "COUNT", 0, "Online data block count"},
|
||||
{ "offline_blocks", 'f', "COUNT", 0, "Offline data block count"},
|
||||
{ "ctime", 'c', "SECS.NSECS", 0, "Inode change time (posix ctime)"},
|
||||
{ "crtime", 'r', "SECS.NSECS", 0, "ScoutFS creation time"},
|
||||
{ "size", 's', "SIZE", 0, "Inode i_size field"},
|
||||
{ "retention", 't', "0|1", 0, "Retention flag"},
|
||||
{ "project_id", 'p', "PROJECT_ID", 0, "Project ID"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp get_argp = {
|
||||
NULL, /* dynamically built */
|
||||
parse_opt,
|
||||
"FILE",
|
||||
"get extensible file attributes"
|
||||
};
|
||||
|
||||
static int get_attr_x_cmd(int argc, char **argv)
|
||||
{
|
||||
struct attr_x_args args = {0,};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&get_argp, argc, argv, 0, NULL, &args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_attr_x(&args);
|
||||
}
|
||||
|
||||
/*
|
||||
* The set options match the get arguments but don't take argument
|
||||
* values to set.
|
||||
*/
|
||||
static void build_get_options(void)
|
||||
{
|
||||
struct argp_option **opts = (struct argp_option **)&get_argp.options;
|
||||
int i;
|
||||
|
||||
*opts = calloc(array_size(set_options), sizeof(set_options[0]));
|
||||
assert(*opts);
|
||||
|
||||
memcpy(*opts, set_options, array_size(set_options) * sizeof(set_options[0]));
|
||||
|
||||
for (i = 0; i < array_size(set_options) - 1; i++)
|
||||
(*opts)[i].arg = NULL;
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) get_ctor(void)
|
||||
{
|
||||
build_get_options();
|
||||
|
||||
cmd_register_argp("get-attr-x", &get_argp, GROUP_AGENT, get_attr_x_cmd);
|
||||
}
|
||||
|
||||
static struct argp set_argp = {
|
||||
set_options,
|
||||
parse_opt,
|
||||
"FILE",
|
||||
"Set extensible file attributes"
|
||||
};
|
||||
|
||||
static int set_attr_x_cmd(int argc, char **argv)
|
||||
{
|
||||
struct attr_x_args args = {.set = true,};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&set_argp, argc, argv, 0, NULL, &args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_attr_x(&args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) set_ctor(void)
|
||||
{
|
||||
cmd_register_argp("set-attr-x", &set_argp, GROUP_AGENT, set_attr_x_cmd);
|
||||
}
|
||||
@@ -10,11 +10,6 @@
|
||||
* Just a quick simple native bitmap.
|
||||
*/
|
||||
|
||||
int test_bit(unsigned long *bits, u64 nr)
|
||||
{
|
||||
return !!(bits[nr / BITS_PER_LONG] & (1UL << (nr & (BITS_PER_LONG - 1))));
|
||||
}
|
||||
|
||||
void set_bit(unsigned long *bits, u64 nr)
|
||||
{
|
||||
bits[nr / BITS_PER_LONG] |= 1UL << (nr & (BITS_PER_LONG - 1));
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
#ifndef _BITMAP_H_
|
||||
#define _BITMAP_H_
|
||||
|
||||
int test_bit(unsigned long *bits, u64 nr);
|
||||
void set_bit(unsigned long *bits, u64 nr);
|
||||
void clear_bit(unsigned long *bits, u64 nr);
|
||||
u64 find_next_set_bit(unsigned long *start, u64 from, u64 total);
|
||||
|
||||
@@ -94,6 +94,18 @@ static int do_change_fmt_vers(struct change_fmt_vers_args *args)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((le64_to_cpu(meta_super->flags) & SCOUTFS_FLAG_IS_META_BDEV) == 0) {
|
||||
printf("device argument #1 is not a meta device (swap arguments?)\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((le64_to_cpu(data_super->flags) & SCOUTFS_FLAG_IS_META_BDEV) != 0) {
|
||||
printf("device argument #2 is not a data device (swap arguments?)\n");
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(meta_super->fmt_vers) < SCOUTFS_FORMAT_VERSION_MIN ||
|
||||
le64_to_cpu(meta_super->fmt_vers) > SCOUTFS_FORMAT_VERSION_MAX) {
|
||||
fprintf(stderr, "meta super block has format version %llu outside of supported version range %u-%u",
|
||||
@@ -119,6 +131,16 @@ static int do_change_fmt_vers(struct change_fmt_vers_args *args)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(meta_super->fmt_vers) > args->fmt_vers ||
|
||||
le64_to_cpu(data_super->fmt_vers) > args->fmt_vers) {
|
||||
ret = -EPERM;
|
||||
printf("Downgrade of Meta Format Version: %llu and Data Format Version: %llu to Format Version: %llu is not allowed\n",
|
||||
le64_to_cpu(meta_super->fmt_vers),
|
||||
le64_to_cpu(data_super->fmt_vers),
|
||||
args->fmt_vers);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(meta_super->fmt_vers) != args->fmt_vers) {
|
||||
meta_super->fmt_vers = cpu_to_le64(args->fmt_vers);
|
||||
|
||||
|
||||
@@ -1,159 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "bitmap.h"
|
||||
#include "key.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "block.h"
|
||||
#include "btree.h"
|
||||
#include "extent.h"
|
||||
#include "iter.h"
|
||||
#include "sns.h"
|
||||
|
||||
/*
|
||||
* We check the list blocks serially.
|
||||
*
|
||||
* XXX:
|
||||
* - compare ref seqs
|
||||
* - detect cycles?
|
||||
*/
|
||||
int alloc_list_meta_iter(struct scoutfs_alloc_list_head *lhead, extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
struct scoutfs_alloc_list_block *lblk;
|
||||
struct scoutfs_block_ref ref;
|
||||
struct block *blk = NULL;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
|
||||
ref = lhead->ref;
|
||||
|
||||
while (ref.blkno) {
|
||||
blkno = le64_to_cpu(ref.blkno);
|
||||
|
||||
ret = cb(blkno, 1, cb_arg);
|
||||
if (ret < 0) {
|
||||
ret = xlate_iter_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = block_get(&blk, blkno, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
lblk = block_buf(blk);
|
||||
/* XXX verify block */
|
||||
/* XXX sort? maybe */
|
||||
|
||||
ref = lblk->next;
|
||||
|
||||
block_put(&blk);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int alloc_root_meta_iter(struct scoutfs_alloc_root *root, extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
return btree_meta_iter(&root->root, cb, cb_arg);
|
||||
}
|
||||
|
||||
int alloc_list_extent_iter(struct scoutfs_alloc_list_head *lhead, extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
struct scoutfs_alloc_list_block *lblk;
|
||||
struct scoutfs_block_ref ref;
|
||||
struct block *blk = NULL;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
ref = lhead->ref;
|
||||
|
||||
while (ref.blkno) {
|
||||
blkno = le64_to_cpu(ref.blkno);
|
||||
|
||||
ret = block_get(&blk, blkno, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("alloc_list_block", blkno, 0);
|
||||
|
||||
lblk = block_buf(blk);
|
||||
/* XXX verify block */
|
||||
/* XXX sort? maybe */
|
||||
|
||||
ret = 0;
|
||||
for (i = 0; i < le32_to_cpu(lblk->nr); i++) {
|
||||
blkno = le64_to_cpu(lblk->blknos[le32_to_cpu(lblk->start) + i]);
|
||||
|
||||
ret = cb(blkno, 1, cb_arg);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
ref = lblk->next;
|
||||
|
||||
block_put(&blk);
|
||||
sns_pop();
|
||||
if (ret < 0) {
|
||||
ret = xlate_iter_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool valid_free_extent_key(struct scoutfs_key *key)
|
||||
{
|
||||
return (key->sk_zone == SCOUTFS_FREE_EXTENT_BLKNO_ZONE ||
|
||||
key->sk_zone == SCOUTFS_FREE_EXTENT_ORDER_ZONE) &&
|
||||
(!key->_sk_fourth && !key->sk_type &&
|
||||
(key->sk_zone == SCOUTFS_FREE_EXTENT_ORDER_ZONE || !key->_sk_third));
|
||||
}
|
||||
|
||||
static int free_item_cb(struct scoutfs_key *key, void *val, u16 val_len, void *cb_arg)
|
||||
{
|
||||
struct extent_cb_arg_t *ecba = cb_arg;
|
||||
u64 start;
|
||||
u64 len;
|
||||
|
||||
/* XXX not sure these eios are what we want */
|
||||
|
||||
if (val_len != 0)
|
||||
return -EIO;
|
||||
|
||||
if (!valid_free_extent_key(key))
|
||||
return -EIO;
|
||||
|
||||
if (key->sk_zone == SCOUTFS_FREE_EXTENT_ORDER_ZONE)
|
||||
return -ECHECK_ITER_DONE;
|
||||
|
||||
start = le64_to_cpu(key->skfb_end) - le64_to_cpu(key->skfb_len) + 1;
|
||||
len = le64_to_cpu(key->skfb_len);
|
||||
|
||||
return ecba->cb(start, len, ecba->cb_arg);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call the callback with each of the primary BLKNO free extents stored
|
||||
* in item in the given alloc root. It doesn't visit the secondary
|
||||
* ORDER extents.
|
||||
*/
|
||||
int alloc_root_extent_iter(struct scoutfs_alloc_root *root, extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
struct extent_cb_arg_t ecba = { .cb = cb, .cb_arg = cb_arg };
|
||||
|
||||
return btree_item_iter(&root->root, free_item_cb, &ecba);
|
||||
}
|
||||
@@ -1,12 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_ALLOC_H
|
||||
#define _SCOUTFS_UTILS_CHECK_ALLOC_H
|
||||
|
||||
#include "extent.h"
|
||||
|
||||
int alloc_list_meta_iter(struct scoutfs_alloc_list_head *lhead, extent_cb_t cb, void *cb_arg);
|
||||
int alloc_root_meta_iter(struct scoutfs_alloc_root *root, extent_cb_t cb, void *cb_arg);
|
||||
|
||||
int alloc_list_extent_iter(struct scoutfs_alloc_list_head *lhead, extent_cb_t cb, void *cb_arg);
|
||||
int alloc_root_extent_iter(struct scoutfs_alloc_root *root, extent_cb_t cb, void *cb_arg);
|
||||
|
||||
#endif
|
||||
@@ -1,564 +0,0 @@
|
||||
#define _ISOC11_SOURCE /* aligned_alloc */
|
||||
#define _DEFAULT_SOURCE /* syscall() */
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <sys/syscall.h>
|
||||
#include <linux/aio_abi.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "list.h"
|
||||
#include "cmp.h"
|
||||
#include "hash.h"
|
||||
|
||||
#include "block.h"
|
||||
#include "debug.h"
|
||||
#include "eno.h"
|
||||
|
||||
static struct block_data {
|
||||
struct list_head *hash_lists;
|
||||
size_t hash_nr;
|
||||
|
||||
struct list_head active_head;
|
||||
struct list_head inactive_head;
|
||||
struct list_head dirty_list;
|
||||
size_t nr_active;
|
||||
size_t nr_inactive;
|
||||
size_t nr_dirty;
|
||||
|
||||
int meta_fd;
|
||||
size_t max_cached;
|
||||
size_t nr_events;
|
||||
|
||||
aio_context_t ctx;
|
||||
struct iocb *iocbs;
|
||||
struct iocb **iocbps;
|
||||
struct io_event *events;
|
||||
} global_bdat;
|
||||
|
||||
struct block {
|
||||
struct list_head hash_head;
|
||||
struct list_head lru_head;
|
||||
struct list_head dirty_head;
|
||||
struct list_head submit_head;
|
||||
unsigned long refcount;
|
||||
unsigned long uptodate:1,
|
||||
active:1;
|
||||
u64 blkno;
|
||||
void *buf;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
#define BLK_FMT \
|
||||
"blkno %llu rc %ld d %u a %u"
|
||||
#define BLK_ARG(blk) \
|
||||
(blk)->blkno, (blk)->refcount, !list_empty(&(blk)->dirty_head), blk->active
|
||||
#define debug_blk(blk, fmt, args...) \
|
||||
debug(fmt " " BLK_FMT, ##args, BLK_ARG(blk))
|
||||
|
||||
/*
|
||||
* This just allocates and initialzies the block. The caller is
|
||||
* responsible for putting it on the appropriate initial lists and
|
||||
* managing refcounts.
|
||||
*/
|
||||
static struct block *alloc_block(struct block_data *bdat, u64 blkno, size_t size)
|
||||
{
|
||||
struct block *blk;
|
||||
|
||||
blk = calloc(1, sizeof(struct block));
|
||||
if (blk) {
|
||||
blk->buf = aligned_alloc(4096, size); /* XXX static alignment :/ */
|
||||
if (!blk->buf) {
|
||||
free(blk);
|
||||
blk = NULL;
|
||||
} else {
|
||||
INIT_LIST_HEAD(&blk->hash_head);
|
||||
INIT_LIST_HEAD(&blk->lru_head);
|
||||
INIT_LIST_HEAD(&blk->dirty_head);
|
||||
INIT_LIST_HEAD(&blk->submit_head);
|
||||
blk->blkno = blkno;
|
||||
blk->size = size;
|
||||
}
|
||||
}
|
||||
|
||||
return blk;
|
||||
}
|
||||
|
||||
static void free_block(struct block_data *bdat, struct block *blk)
|
||||
{
|
||||
debug_blk(blk, "free");
|
||||
|
||||
if (!list_empty(&blk->lru_head)) {
|
||||
if (blk->active)
|
||||
bdat->nr_active--;
|
||||
else
|
||||
bdat->nr_inactive--;
|
||||
list_del(&blk->lru_head);
|
||||
}
|
||||
|
||||
if (!list_empty(&blk->dirty_head)) {
|
||||
bdat->nr_dirty--;
|
||||
list_del(&blk->dirty_head);
|
||||
}
|
||||
|
||||
if (!list_empty(&blk->hash_head))
|
||||
list_del(&blk->hash_head);
|
||||
|
||||
if (!list_empty(&blk->submit_head))
|
||||
list_del(&blk->submit_head);
|
||||
|
||||
free(blk->buf);
|
||||
free(blk);
|
||||
}
|
||||
|
||||
static bool blk_is_dirty(struct block *blk)
|
||||
{
|
||||
return !list_empty(&blk->dirty_head);
|
||||
}
|
||||
|
||||
/*
|
||||
* Rebalance the cache.
|
||||
*
|
||||
* First we shrink the cache to limit it to max_cached blocks.
|
||||
* Logically, we walk from oldest to newest in the inactive list and
|
||||
* then in the active list. Since these lists are physically one
|
||||
* list_head list we achieve this with a reverse walk starting from the
|
||||
* active head.
|
||||
*
|
||||
* Then we rebalnace the size of the two lists. The constraint is that
|
||||
* we don't let the active list grow larger than the inactive list. We
|
||||
* move blocks from the oldest tail of the active list to the newest
|
||||
* head of the inactive list.
|
||||
*
|
||||
* <- [active head] <-> [ .. active list .. ] <-> [inactive head] <-> [ .. inactive list .. ] ->
|
||||
*/
|
||||
static void rebalance_cache(struct block_data *bdat)
|
||||
{
|
||||
struct block *blk;
|
||||
struct block *blk_;
|
||||
|
||||
list_for_each_entry_safe_reverse(blk, blk_, &bdat->active_head, lru_head) {
|
||||
if ((bdat->nr_active + bdat->nr_inactive) < bdat->max_cached)
|
||||
break;
|
||||
|
||||
if (&blk->lru_head == &bdat->inactive_head || blk->refcount > 0 ||
|
||||
blk_is_dirty(blk))
|
||||
continue;
|
||||
|
||||
free_block(bdat, blk);
|
||||
}
|
||||
|
||||
list_for_each_entry_safe_reverse(blk, blk_, &bdat->inactive_head, lru_head) {
|
||||
if (bdat->nr_active <= bdat->nr_inactive || &blk->lru_head == &bdat->active_head)
|
||||
break;
|
||||
|
||||
list_move(&blk->lru_head, &bdat->inactive_head);
|
||||
blk->active = 0;
|
||||
bdat->nr_active--;
|
||||
bdat->nr_inactive++;
|
||||
}
|
||||
}
|
||||
|
||||
static void make_active(struct block_data *bdat, struct block *blk)
|
||||
{
|
||||
if (!blk->active) {
|
||||
if (!list_empty(&blk->lru_head)) {
|
||||
list_move(&blk->lru_head, &bdat->active_head);
|
||||
bdat->nr_inactive--;
|
||||
} else {
|
||||
list_add(&blk->lru_head, &bdat->active_head);
|
||||
}
|
||||
|
||||
blk->active = 1;
|
||||
bdat->nr_active++;
|
||||
}
|
||||
}
|
||||
|
||||
static int compar_iocbp(const void *A, const void *B)
|
||||
{
|
||||
struct iocb *a = *(struct iocb **)A;
|
||||
struct iocb *b = *(struct iocb **)B;
|
||||
|
||||
return scoutfs_cmp(a->aio_offset, b->aio_offset);
|
||||
}
|
||||
|
||||
static int submit_and_wait(struct block_data *bdat, struct list_head *list)
|
||||
{
|
||||
struct io_event *event;
|
||||
struct iocb *iocb;
|
||||
struct block *blk;
|
||||
int ret;
|
||||
int err;
|
||||
int nr;
|
||||
int i;
|
||||
|
||||
err = 0;
|
||||
nr = 0;
|
||||
list_for_each_entry(blk, list, submit_head) {
|
||||
iocb = &bdat->iocbs[nr];
|
||||
bdat->iocbps[nr] = iocb;
|
||||
|
||||
memset(iocb, 0, sizeof(struct iocb));
|
||||
|
||||
iocb->aio_data = (intptr_t)blk;
|
||||
iocb->aio_lio_opcode = blk_is_dirty(blk) ? IOCB_CMD_PWRITE : IOCB_CMD_PREAD;
|
||||
iocb->aio_fildes = bdat->meta_fd;
|
||||
iocb->aio_buf = (intptr_t)blk->buf;
|
||||
iocb->aio_nbytes = blk->size;
|
||||
iocb->aio_offset = blk->blkno * blk->size;
|
||||
|
||||
nr++;
|
||||
|
||||
debug_blk(blk, "submit");
|
||||
|
||||
if ((nr < bdat->nr_events) && blk->submit_head.next != list)
|
||||
continue;
|
||||
|
||||
qsort(bdat->iocbps, nr, sizeof(bdat->iocbps[0]), compar_iocbp);
|
||||
|
||||
ret = syscall(__NR_io_submit, bdat->ctx, nr, bdat->iocbps);
|
||||
if (ret != nr) {
|
||||
if (ret >= 0)
|
||||
errno = EIO;
|
||||
ret = -errno;
|
||||
printf("fatal system error submitting async IO: "ENO_FMT"\n",
|
||||
ENO_ARG(-ret));
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = syscall(__NR_io_getevents, bdat->ctx, nr, nr, bdat->events, NULL);
|
||||
if (ret != nr) {
|
||||
if (ret >= 0)
|
||||
errno = EIO;
|
||||
ret = -errno;
|
||||
printf("fatal system error getting IO events: "ENO_FMT"\n",
|
||||
ENO_ARG(-ret));
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
for (i = 0; i < nr; i++) {
|
||||
event = &bdat->events[i];
|
||||
iocb = (struct iocb *)(intptr_t)event->obj;
|
||||
blk = (struct block *)(intptr_t)event->data;
|
||||
|
||||
debug_blk(blk, "complete res %lld", (long long)event->res);
|
||||
|
||||
if (event->res >= 0 && event->res != blk->size)
|
||||
event->res = -EIO;
|
||||
|
||||
/* io errors are fatal */
|
||||
if (event->res < 0) {
|
||||
ret = event->res;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (iocb->aio_lio_opcode == IOCB_CMD_PREAD) {
|
||||
blk->uptodate = 1;
|
||||
} else {
|
||||
list_del_init(&blk->dirty_head);
|
||||
bdat->nr_dirty--;
|
||||
}
|
||||
}
|
||||
nr = 0;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret ?: err;
|
||||
}
|
||||
|
||||
static void inc_refcount(struct block *blk)
|
||||
{
|
||||
blk->refcount++;
|
||||
}
|
||||
|
||||
void block_put(struct block **blkp)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
struct block *blk = *blkp;
|
||||
|
||||
if (blk) {
|
||||
blk->refcount--;
|
||||
*blkp = NULL;
|
||||
|
||||
rebalance_cache(bdat);
|
||||
}
|
||||
}
|
||||
|
||||
static struct list_head *hash_bucket(struct block_data *bdat, u64 blkno)
|
||||
{
|
||||
u32 hash = scoutfs_hash32(&blkno, sizeof(blkno));
|
||||
|
||||
return &bdat->hash_lists[hash % bdat->hash_nr];
|
||||
}
|
||||
|
||||
static struct block *get_or_alloc(struct block_data *bdat, u64 blkno, int bf)
|
||||
{
|
||||
struct list_head *bucket = hash_bucket(bdat, blkno);
|
||||
struct block *search;
|
||||
struct block *blk;
|
||||
size_t size;
|
||||
|
||||
size = (bf & BF_SM) ? SCOUTFS_BLOCK_SM_SIZE : SCOUTFS_BLOCK_LG_SIZE;
|
||||
|
||||
blk = NULL;
|
||||
list_for_each_entry(search, bucket, hash_head) {
|
||||
if (search->blkno && blkno && search->size == size) {
|
||||
blk = search;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!blk) {
|
||||
blk = alloc_block(bdat, blkno, size);
|
||||
if (blk) {
|
||||
list_add(&blk->hash_head, bucket);
|
||||
list_add(&blk->lru_head, &bdat->inactive_head);
|
||||
bdat->nr_inactive++;
|
||||
}
|
||||
}
|
||||
if (blk)
|
||||
inc_refcount(blk);
|
||||
|
||||
return blk;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a block.
|
||||
*
|
||||
* The caller holds a refcount to the block while it's in use that
|
||||
* prevents it from being removed from the cache. It must be dropped
|
||||
* with block_put();
|
||||
*/
|
||||
int block_get(struct block **blk_ret, u64 blkno, int bf)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
struct block *blk;
|
||||
LIST_HEAD(list);
|
||||
int ret;
|
||||
|
||||
blk = get_or_alloc(bdat, blkno, bf);
|
||||
if (!blk) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((bf & BF_ZERO)) {
|
||||
memset(blk->buf, 0, blk->size);
|
||||
blk->uptodate = 1;
|
||||
}
|
||||
|
||||
if (bf & BF_OVERWRITE)
|
||||
blk->uptodate = 1;
|
||||
|
||||
if (!blk->uptodate) {
|
||||
list_add(&blk->submit_head, &list);
|
||||
ret = submit_and_wait(bdat, &list);
|
||||
list_del_init(&blk->submit_head);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((bf & BF_DIRTY) && !blk_is_dirty(blk)) {
|
||||
list_add_tail(&bdat->dirty_list, &blk->dirty_head);
|
||||
bdat->nr_dirty++;
|
||||
}
|
||||
|
||||
make_active(bdat, blk);
|
||||
|
||||
rebalance_cache(bdat);
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret < 0)
|
||||
block_put(&blk);
|
||||
*blk_ret = blk;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *block_buf(struct block *blk)
|
||||
{
|
||||
return blk->buf;
|
||||
}
|
||||
|
||||
size_t block_size(struct block *blk)
|
||||
{
|
||||
return blk->size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Drop the block from the cache, regardless of if it was free or not.
|
||||
* This is used to avoid writing blocks which were dirtied but then
|
||||
* later freed.
|
||||
*
|
||||
* The block is immediately freed and can't be referenced after this
|
||||
* returns.
|
||||
*/
|
||||
void block_drop(struct block **blkp)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
|
||||
free_block(bdat, *blkp);
|
||||
*blkp = NULL;
|
||||
rebalance_cache(bdat);
|
||||
}
|
||||
|
||||
/*
|
||||
* This doesn't quite work for mixing large and small blocks, but that's
|
||||
* fine, we never do that.
|
||||
*/
|
||||
static int compar_u64(const void *A, const void *B)
|
||||
{
|
||||
u64 a = *((u64 *)A);
|
||||
u64 b = *((u64 *)B);
|
||||
|
||||
return scoutfs_cmp(a, b);
|
||||
}
|
||||
|
||||
/*
|
||||
* This read-ahead is synchronous and errors are ignored. If any of the
|
||||
* blknos aren't present in the cache then we issue concurrent reads for
|
||||
* them and wait. Any existing cached blocks will be left as is.
|
||||
*
|
||||
* We might be trying to read a lot more than the number of events so we
|
||||
* sort the caller's blknos before iterating over them rather than
|
||||
* relying on submission sorting the blocks in each submitted set.
|
||||
*/
|
||||
void block_readahead(u64 *blknos, size_t nr)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
struct block *blk;
|
||||
struct block *blk_;
|
||||
LIST_HEAD(list);
|
||||
size_t i;
|
||||
|
||||
if (nr == 0)
|
||||
return;
|
||||
|
||||
qsort(blknos, nr, sizeof(blknos[0]), compar_u64);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
blk = get_or_alloc(bdat, blknos[i], 0);
|
||||
if (blk) {
|
||||
if (!blk->uptodate)
|
||||
list_add_tail(&blk->submit_head, &list);
|
||||
else
|
||||
block_put(&blk);
|
||||
}
|
||||
}
|
||||
|
||||
(void)submit_and_wait(bdat, &list);
|
||||
|
||||
list_for_each_entry_safe(blk, blk_, &list, submit_head) {
|
||||
list_del_init(&blk->submit_head);
|
||||
block_put(&blk);
|
||||
}
|
||||
|
||||
rebalance_cache(bdat);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller's block changes form a consistent transaction. If the amount of dirty
|
||||
* blocks is large enough we issue a write.
|
||||
*/
|
||||
int block_try_commit(bool force)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
struct block *blk;
|
||||
struct block *blk_;
|
||||
LIST_HEAD(list);
|
||||
int ret;
|
||||
|
||||
if (!force && bdat->nr_dirty < bdat->nr_events)
|
||||
return 0;
|
||||
|
||||
list_for_each_entry(blk, &bdat->dirty_list, dirty_head) {
|
||||
list_add_tail(&blk->submit_head, &list);
|
||||
inc_refcount(blk);
|
||||
}
|
||||
|
||||
ret = submit_and_wait(bdat, &list);
|
||||
|
||||
list_for_each_entry_safe(blk, blk_, &list, submit_head) {
|
||||
list_del_init(&blk->submit_head);
|
||||
block_put(&blk);
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
printf("error writing dirty transaction blocks\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = block_get(&blk, SCOUTFS_SUPER_BLKNO, BF_SM | BF_OVERWRITE | BF_DIRTY);
|
||||
if (ret == 0) {
|
||||
list_add(&blk->submit_head, &list);
|
||||
ret = submit_and_wait(bdat, &list);
|
||||
list_del_init(&blk->submit_head);
|
||||
block_put(&blk);
|
||||
} else {
|
||||
ret = -ENOMEM;
|
||||
}
|
||||
if (ret < 0)
|
||||
printf("error writing super block to commit transaction\n");
|
||||
|
||||
out:
|
||||
rebalance_cache(bdat);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int block_setup(int meta_fd, size_t max_cached_bytes, size_t max_dirty_bytes)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
size_t i;
|
||||
int ret;
|
||||
|
||||
bdat->max_cached = DIV_ROUND_UP(max_cached_bytes, SCOUTFS_BLOCK_LG_SIZE);
|
||||
bdat->hash_nr = bdat->max_cached / 4;
|
||||
bdat->nr_events = DIV_ROUND_UP(max_dirty_bytes, SCOUTFS_BLOCK_LG_SIZE);
|
||||
|
||||
bdat->iocbs = calloc(bdat->nr_events, sizeof(bdat->iocbs[0]));
|
||||
bdat->iocbps = calloc(bdat->nr_events, sizeof(bdat->iocbps[0]));
|
||||
bdat->events = calloc(bdat->nr_events, sizeof(bdat->events[0]));
|
||||
bdat->hash_lists = calloc(bdat->hash_nr, sizeof(bdat->hash_lists[0]));
|
||||
if (!bdat->iocbs || !bdat->iocbps || !bdat->events || !bdat->hash_lists) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&bdat->active_head);
|
||||
INIT_LIST_HEAD(&bdat->inactive_head);
|
||||
INIT_LIST_HEAD(&bdat->dirty_list);
|
||||
bdat->meta_fd = meta_fd;
|
||||
list_add(&bdat->inactive_head, &bdat->active_head);
|
||||
|
||||
for (i = 0; i < bdat->hash_nr; i++)
|
||||
INIT_LIST_HEAD(&bdat->hash_lists[i]);
|
||||
|
||||
ret = syscall(__NR_io_setup, bdat->nr_events, &bdat->ctx);
|
||||
|
||||
out:
|
||||
if (ret < 0) {
|
||||
free(bdat->iocbs);
|
||||
free(bdat->iocbps);
|
||||
free(bdat->events);
|
||||
free(bdat->hash_lists);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void block_shutdown(void)
|
||||
{
|
||||
struct block_data *bdat = &global_bdat;
|
||||
|
||||
syscall(SYS_io_destroy, bdat->ctx);
|
||||
|
||||
free(bdat->iocbs);
|
||||
free(bdat->iocbps);
|
||||
free(bdat->events);
|
||||
free(bdat->hash_lists);
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_BLOCK_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_BLOCK_H_
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
struct block;
|
||||
|
||||
#include "sparse.h"
|
||||
|
||||
/* block flags passed to block_get() */
|
||||
enum {
|
||||
BF_ZERO = (1 << 0), /* zero contents buf as block is returned */
|
||||
BF_DIRTY = (1 << 1), /* block will be written with transaction */
|
||||
BF_SM = (1 << 2), /* small 4k block instead of large 64k block */
|
||||
BF_OVERWRITE = (1 << 3), /* caller will overwrite contents, don't read */
|
||||
};
|
||||
|
||||
int block_get(struct block **blk_ret, u64 blkno, int bf);
|
||||
void block_put(struct block **blkp);
|
||||
|
||||
void *block_buf(struct block *blk);
|
||||
size_t block_size(struct block *blk);
|
||||
void block_drop(struct block **blkp);
|
||||
|
||||
void block_readahead(u64 *blknos, size_t nr);
|
||||
int block_try_commit(bool force);
|
||||
|
||||
int block_setup(int meta_fd, size_t max_cached_bytes, size_t max_dirty_bytes);
|
||||
void block_shutdown(void);
|
||||
|
||||
#endif
|
||||
@@ -1,209 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "key.h"
|
||||
#include "avl.h"
|
||||
|
||||
#include "block.h"
|
||||
#include "btree.h"
|
||||
#include "extent.h"
|
||||
#include "iter.h"
|
||||
#include "sns.h"
|
||||
#include "meta.h"
|
||||
#include "problem.h"
|
||||
|
||||
static inline void *item_val(struct scoutfs_btree_block *bt, struct scoutfs_btree_item *item)
|
||||
{
|
||||
return (void *)bt + le16_to_cpu(item->val_off);
|
||||
}
|
||||
|
||||
static void readahead_refs(struct scoutfs_btree_block *bt)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct scoutfs_block_ref *ref;
|
||||
u64 *blknos;
|
||||
u64 blkno;
|
||||
u16 valid = 0;
|
||||
u16 nr = le16_to_cpu(bt->nr_items);
|
||||
int i;
|
||||
|
||||
blknos = calloc(nr, sizeof(blknos[0]));
|
||||
if (!blknos)
|
||||
return;
|
||||
|
||||
node = avl_first(&bt->item_root);
|
||||
|
||||
for (i = 0; i < nr; i++) {
|
||||
item = container_of(node, struct scoutfs_btree_item, node);
|
||||
ref = item_val(bt, item);
|
||||
blkno = le64_to_cpu(ref->blkno);
|
||||
|
||||
if (valid_meta_blkno(blkno))
|
||||
blknos[valid++] = blkno;
|
||||
|
||||
node = avl_next(&bt->item_root, &item->node);
|
||||
}
|
||||
|
||||
if (valid > 0)
|
||||
block_readahead(blknos, valid);
|
||||
free(blknos);
|
||||
}
|
||||
|
||||
/*
|
||||
* Call the callback on the referenced block. Then if the block
|
||||
* contains referneces read it and recurse into all its references.
|
||||
*/
|
||||
static int btree_ref_meta_iter(struct scoutfs_block_ref *ref, unsigned level, extent_cb_t cb,
|
||||
void *cb_arg)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct block *blk = NULL;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
blkno = le64_to_cpu(ref->blkno);
|
||||
if (!blkno)
|
||||
return 0;
|
||||
|
||||
ret = cb(blkno, 1, cb_arg);
|
||||
if (ret < 0) {
|
||||
ret = xlate_iter_errno(ret);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (level == 0)
|
||||
return 0;
|
||||
|
||||
ret = block_get(&blk, blkno, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
sns_push("btree_parent", blkno, 0);
|
||||
|
||||
bt = block_buf(blk);
|
||||
|
||||
/* XXX integrate verification with block cache */
|
||||
if (bt->level != level) {
|
||||
problem(PB_BTREE_BLOCK_BAD_LEVEL, "expected %u level %u", level, bt->level);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* read-ahead last level of parents */
|
||||
if (level == 2)
|
||||
readahead_refs(bt);
|
||||
|
||||
node = avl_first(&bt->item_root);
|
||||
|
||||
for (i = 0; i < le16_to_cpu(bt->nr_items); i++) {
|
||||
item = container_of(node, struct scoutfs_btree_item, node);
|
||||
ref = item_val(bt, item);
|
||||
|
||||
ret = btree_ref_meta_iter(ref, level - 1, cb, cb_arg);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
node = avl_next(&bt->item_root, &item->node);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
block_put(&blk);
|
||||
sns_pop();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btree_meta_iter(struct scoutfs_btree_root *root, extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
/* XXX check root */
|
||||
if (root->height == 0)
|
||||
return 0;
|
||||
|
||||
return btree_ref_meta_iter(&root->ref, root->height - 1, cb, cb_arg);
|
||||
}
|
||||
|
||||
static int btree_ref_item_iter(struct scoutfs_block_ref *ref, unsigned level,
|
||||
btree_item_cb_t cb, void *cb_arg)
|
||||
{
|
||||
struct scoutfs_btree_item *item;
|
||||
struct scoutfs_btree_block *bt;
|
||||
struct scoutfs_avl_node *node;
|
||||
struct block *blk = NULL;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
blkno = le64_to_cpu(ref->blkno);
|
||||
if (!blkno)
|
||||
return 0;
|
||||
|
||||
ret = block_get(&blk, blkno, 0);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (level)
|
||||
sns_push("btree_parent", blkno, 0);
|
||||
else
|
||||
sns_push("btree_leaf", blkno, 0);
|
||||
|
||||
bt = block_buf(blk);
|
||||
|
||||
/* XXX integrate verification with block cache */
|
||||
if (bt->level != level) {
|
||||
problem(PB_BTREE_BLOCK_BAD_LEVEL, "expected %u level %u", level, bt->level);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* read-ahead leaves that contain items */
|
||||
if (level == 1)
|
||||
readahead_refs(bt);
|
||||
|
||||
node = avl_first(&bt->item_root);
|
||||
|
||||
for (i = 0; i < le16_to_cpu(bt->nr_items); i++) {
|
||||
item = container_of(node, struct scoutfs_btree_item, node);
|
||||
|
||||
if (level) {
|
||||
ref = item_val(bt, item);
|
||||
ret = btree_ref_item_iter(ref, level - 1, cb, cb_arg);
|
||||
} else {
|
||||
ret = cb(&item->key, item_val(bt, item),
|
||||
le16_to_cpu(item->val_len), cb_arg);
|
||||
debug("free item key "SK_FMT" ret %d", SK_ARG(&item->key), ret);
|
||||
}
|
||||
if (ret < 0) {
|
||||
ret = xlate_iter_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
node = avl_next(&bt->item_root, &item->node);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
block_put(&blk);
|
||||
sns_pop();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int btree_item_iter(struct scoutfs_btree_root *root, btree_item_cb_t cb, void *cb_arg)
|
||||
{
|
||||
/* XXX check root */
|
||||
if (root->height == 0)
|
||||
return 0;
|
||||
|
||||
return btree_ref_item_iter(&root->ref, root->height - 1, cb, cb_arg);
|
||||
}
|
||||
@@ -1,14 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_BTREE_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_BTREE_H_
|
||||
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
|
||||
#include "extent.h"
|
||||
|
||||
typedef int (*btree_item_cb_t)(struct scoutfs_key *key, void *val, u16 val_len, void *cb_arg);
|
||||
|
||||
int btree_meta_iter(struct scoutfs_btree_root *root, extent_cb_t cb, void *cb_arg);
|
||||
int btree_item_iter(struct scoutfs_btree_root *root, btree_item_cb_t cb, void *cb_arg);
|
||||
|
||||
#endif
|
||||
@@ -1,149 +0,0 @@
|
||||
#define _GNU_SOURCE /* O_DIRECT */
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "parse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "ioctl.h"
|
||||
#include "cmd.h"
|
||||
#include "dev.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "block.h"
|
||||
#include "debug.h"
|
||||
#include "meta.h"
|
||||
#include "super.h"
|
||||
|
||||
struct check_args {
|
||||
char *meta_device;
|
||||
char *data_device;
|
||||
char *debug_path;
|
||||
};
|
||||
|
||||
static int do_check(struct check_args *args)
|
||||
{
|
||||
int debug_fd = -1;
|
||||
int meta_fd = -1;
|
||||
int data_fd = -1;
|
||||
int ret;
|
||||
|
||||
if (args->debug_path) {
|
||||
debug_fd = open(args->debug_path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
|
||||
if (debug_fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "error opening debug output file '%s': %s (%d)\n",
|
||||
args->debug_path, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
debug_enable(debug_fd);
|
||||
}
|
||||
|
||||
meta_fd = open(args->meta_device, O_DIRECT | O_RDWR | O_EXCL);
|
||||
if (meta_fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open meta device '%s': %s (%d)\n",
|
||||
args->meta_device, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
data_fd = open(args->data_device, O_DIRECT | O_RDWR | O_EXCL);
|
||||
if (data_fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open data device '%s': %s (%d)\n",
|
||||
args->data_device, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = block_setup(meta_fd, 128 * 1024 * 1024, 32 * 1024 * 1024);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = check_supers() ?:
|
||||
check_meta_alloc();
|
||||
out:
|
||||
/* and tear it all down */
|
||||
block_shutdown();
|
||||
super_shutdown();
|
||||
debug_disable();
|
||||
|
||||
if (meta_fd >= 0)
|
||||
close(meta_fd);
|
||||
if (data_fd >= 0)
|
||||
close(data_fd);
|
||||
if (debug_fd >= 0)
|
||||
close(debug_fd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct check_args *args = state->input;
|
||||
|
||||
switch (key) {
|
||||
case 'd':
|
||||
args->debug_path = strdup_or_error(state, arg);
|
||||
break;
|
||||
case 'e':
|
||||
case ARGP_KEY_ARG:
|
||||
if (!args->meta_device)
|
||||
args->meta_device = strdup_or_error(state, arg);
|
||||
else if (!args->data_device)
|
||||
args->data_device = strdup_or_error(state, arg);
|
||||
else
|
||||
argp_error(state, "more than two device arguments given");
|
||||
break;
|
||||
case ARGP_KEY_FINI:
|
||||
if (!args->meta_device)
|
||||
argp_error(state, "no metadata device argument given");
|
||||
if (!args->data_device)
|
||||
argp_error(state, "no data device argument given");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{ "debug", 'd', "FILE_PATH", 0, "Path to debug output file, will be created or truncated"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp argp = {
|
||||
options,
|
||||
parse_opt,
|
||||
"META-DEVICE DATA-DEVICE",
|
||||
"Check filesystem consistency"
|
||||
};
|
||||
|
||||
static int check_cmd(int argc, char **argv)
|
||||
{
|
||||
struct check_args check_args = {NULL};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&argp, argc, argv, 0, NULL, &check_args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_check(&check_args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) check_ctor(void)
|
||||
{
|
||||
cmd_register_argp("check", &argp, GROUP_CORE, check_cmd);
|
||||
}
|
||||
@@ -1,16 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "debug.h"
|
||||
|
||||
int debug_fd = -1;
|
||||
|
||||
void debug_enable(int fd)
|
||||
{
|
||||
debug_fd = fd;
|
||||
}
|
||||
|
||||
void debug_disable(void)
|
||||
{
|
||||
if (debug_fd >= 0)
|
||||
debug_fd = -1;
|
||||
}
|
||||
@@ -1,17 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_DEBUG_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_DEBUG_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define debug(fmt, args...) \
|
||||
do { \
|
||||
if (debug_fd >= 0) \
|
||||
dprintf(debug_fd, fmt"\n", ##args); \
|
||||
} while (0)
|
||||
|
||||
extern int debug_fd;
|
||||
|
||||
void debug_enable(int fd);
|
||||
void debug_disable(void);
|
||||
|
||||
#endif
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_ENO_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_ENO_H_
|
||||
|
||||
#include <errno.h>
|
||||
|
||||
#define ENO_FMT "%d (%s)"
|
||||
#define ENO_ARG(eno) eno, strerror(eno)
|
||||
|
||||
#endif
|
||||
@@ -1,312 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "lk_rbtree_wrapper.h"
|
||||
|
||||
#include "debug.h"
|
||||
#include "extent.h"
|
||||
|
||||
/*
|
||||
* In-memory extent management in rbtree nodes.
|
||||
*/
|
||||
|
||||
bool extents_overlap(u64 a_start, u64 a_len, u64 b_start, u64 b_len)
|
||||
{
|
||||
u64 a_end = a_start + a_len;
|
||||
u64 b_end = b_start + b_len;
|
||||
|
||||
return !((a_end <= b_start) || (b_end <= a_start));
|
||||
}
|
||||
|
||||
static int ext_contains(struct extent_node *ext, u64 start, u64 len)
|
||||
{
|
||||
return ext->start <= start && ext->start + ext->len >= start + len;
|
||||
}
|
||||
|
||||
/*
|
||||
* True if the given extent is bisected by the given range; there's
|
||||
* leftover containing extents on both the left and right sides of the
|
||||
* range in the extent.
|
||||
*/
|
||||
static int ext_bisected(struct extent_node *ext, u64 start, u64 len)
|
||||
{
|
||||
return ext->start < start && ext->start + ext->len > start + len;
|
||||
}
|
||||
|
||||
static struct extent_node *ext_from_rbnode(struct rb_node *rbnode)
|
||||
{
|
||||
return rbnode ? container_of(rbnode, struct extent_node, rbnode) : NULL;
|
||||
}
|
||||
|
||||
static struct extent_node *next_ext(struct extent_node *ext)
|
||||
{
|
||||
return ext ? ext_from_rbnode(rb_next(&ext->rbnode)) : NULL;
|
||||
}
|
||||
|
||||
static struct extent_node *prev_ext(struct extent_node *ext)
|
||||
{
|
||||
return ext ? ext_from_rbnode(rb_prev(&ext->rbnode)) : NULL;
|
||||
}
|
||||
|
||||
struct walk_results {
|
||||
unsigned bisect_to_leaf:1;
|
||||
struct extent_node *found;
|
||||
struct extent_node *next;
|
||||
struct rb_node *parent;
|
||||
struct rb_node **node;
|
||||
};
|
||||
|
||||
static void walk_extents(struct extent_root *root, u64 start, u64 len, struct walk_results *wlk)
|
||||
{
|
||||
struct rb_node **node = &root->rbroot.rb_node;
|
||||
struct extent_node *ext;
|
||||
u64 end = start + len;
|
||||
int cmp;
|
||||
|
||||
wlk->found = NULL;
|
||||
wlk->next = NULL;
|
||||
wlk->parent = NULL;
|
||||
|
||||
while (*node) {
|
||||
wlk->parent = *node;
|
||||
ext = ext_from_rbnode(*node);
|
||||
cmp = end <= ext->start ? -1 :
|
||||
start >= ext->start + ext->len ? 1 : 0;
|
||||
|
||||
if (cmp < 0) {
|
||||
node = &ext->rbnode.rb_left;
|
||||
wlk->next = ext;
|
||||
} else if (cmp > 0) {
|
||||
node = &ext->rbnode.rb_right;
|
||||
} else {
|
||||
wlk->found = ext;
|
||||
if (!(wlk->bisect_to_leaf && ext_bisected(ext, start, len)))
|
||||
break;
|
||||
/* walk right so we can insert greater right from bisection */
|
||||
node = &ext->rbnode.rb_right;
|
||||
}
|
||||
}
|
||||
|
||||
wlk->node = node;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return an extent that overlaps with the given range.
|
||||
*/
|
||||
int extent_lookup(struct extent_root *root, u64 start, u64 len, struct extent_node *found)
|
||||
{
|
||||
struct walk_results wlk = { 0, };
|
||||
int ret;
|
||||
|
||||
walk_extents(root, start, len, &wlk);
|
||||
if (wlk.found) {
|
||||
memset(found, 0, sizeof(struct extent_node));
|
||||
found->start = wlk.found->start;
|
||||
found->len = wlk.found->len;
|
||||
ret = 0;
|
||||
} else {
|
||||
ret = -ENOENT;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Callers can iterate through direct node references and are entirely
|
||||
* responsible for consistency when doing so.
|
||||
*/
|
||||
struct extent_node *extent_first(struct extent_root *root)
|
||||
{
|
||||
struct walk_results wlk = { 0, };
|
||||
|
||||
walk_extents(root, 0, 1, &wlk);
|
||||
|
||||
return wlk.found ?: wlk.next;
|
||||
}
|
||||
|
||||
struct extent_node *extent_next(struct extent_node *ext)
|
||||
{
|
||||
return next_ext(ext);
|
||||
}
|
||||
|
||||
struct extent_node *extent_prev(struct extent_node *ext)
|
||||
{
|
||||
return prev_ext(ext);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a new extent into the tree. We can extend existing nodes,
|
||||
* merge with neighbours, or remove existing extents entirely if we
|
||||
* insert a range that fully spans existing nodes.
|
||||
*/
|
||||
static int walk_insert(struct extent_root *root, u64 start, u64 len, int found_err)
|
||||
{
|
||||
struct walk_results wlk = { 0, };
|
||||
struct extent_node *ext;
|
||||
struct extent_node *nei;
|
||||
int ret;
|
||||
|
||||
walk_extents(root, start, len, &wlk);
|
||||
|
||||
ext = wlk.found;
|
||||
if (ext && found_err) {
|
||||
ret = found_err;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!ext) {
|
||||
ext = malloc(sizeof(struct extent_node));
|
||||
if (!ext) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ext->start = start;
|
||||
ext->len = len;
|
||||
|
||||
rb_link_node(&ext->rbnode, wlk.parent, wlk.node);
|
||||
rb_insert_color(&ext->rbnode, &root->rbroot);
|
||||
}
|
||||
|
||||
/* start by expanding an existing extent if our range is larger */
|
||||
if (start < ext->start) {
|
||||
ext->len += ext->start - start;
|
||||
ext->start = start;
|
||||
}
|
||||
if (ext->start + ext->len < start + len)
|
||||
ext->len += (start + len) - (ext->start + ext->len);
|
||||
|
||||
/* drop any fully spanned neighbors, possibly merging with a final adjacent one */
|
||||
|
||||
while ((nei = prev_ext(ext))) {
|
||||
if (nei->start + nei->len < ext->start)
|
||||
break;
|
||||
|
||||
if (nei->start < ext->start) {
|
||||
ext->len += ext->start - nei->start;
|
||||
ext->start = nei->start;
|
||||
}
|
||||
|
||||
rb_erase(&nei->rbnode, &root->rbroot);
|
||||
free(nei);
|
||||
}
|
||||
|
||||
while ((nei = next_ext(ext))) {
|
||||
if (ext->start + ext->len < nei->start)
|
||||
break;
|
||||
|
||||
if (ext->start + ext->len < nei->start + nei->len)
|
||||
ext->len += (nei->start + nei->len) - (ext->start + ext->len);
|
||||
|
||||
rb_erase(&nei->rbnode, &root->rbroot);
|
||||
free(nei);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
debug("start %llu len %llu ret %d", start, len, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a new extent. The specified extent must not overlap with any
|
||||
* existing extents or -EEXIST is returned.
|
||||
*/
|
||||
int extent_insert_new(struct extent_root *root, u64 start, u64 len)
|
||||
{
|
||||
return walk_insert(root, start, len, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert an extent, extending any existing extents that may overlap.
|
||||
*/
|
||||
int extent_insert_extend(struct extent_root *root, u64 start, u64 len)
|
||||
{
|
||||
return walk_insert(root, start, len, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove the specified extent from an existing node. The given extent must be fully
|
||||
* contained in a single node or -ENOENT is returned.
|
||||
*/
|
||||
int extent_remove(struct extent_root *root, u64 start, u64 len)
|
||||
{
|
||||
struct extent_node *ext;
|
||||
struct extent_node *ins;
|
||||
struct walk_results wlk = {
|
||||
.bisect_to_leaf = 1,
|
||||
};
|
||||
int ret;
|
||||
|
||||
walk_extents(root, start, len, &wlk);
|
||||
|
||||
if (!(ext = wlk.found) || !ext_contains(ext, start, len)) {
|
||||
ret = -ENOENT;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (ext_bisected(ext, start, len)) {
|
||||
debug("found bisected start %llu len %llu", ext->start, ext->len);
|
||||
ins = malloc(sizeof(struct extent_node));
|
||||
if (!ins) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ins->start = start + len;
|
||||
ins->len = (ext->start + ext->len) - ins->start;
|
||||
|
||||
rb_link_node(&ins->rbnode, wlk.parent, wlk.node);
|
||||
rb_insert_color(&ins->rbnode, &root->rbroot);
|
||||
}
|
||||
|
||||
if (start > ext->start) {
|
||||
ext->len = start - ext->start;
|
||||
} else if (len < ext->len) {
|
||||
ext->start += len;
|
||||
ext->len -= len;
|
||||
} else {
|
||||
rb_erase(&ext->rbnode, &root->rbroot);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
debug("start %llu len %llu ret %d", start, len, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void extent_root_init(struct extent_root *root)
|
||||
{
|
||||
root->rbroot = RB_ROOT;
|
||||
root->total = 0;
|
||||
}
|
||||
|
||||
void extent_root_free(struct extent_root *root)
|
||||
{
|
||||
struct extent_node *ext;
|
||||
struct rb_node *node;
|
||||
struct rb_node *tmp;
|
||||
|
||||
for (node = rb_first(&root->rbroot); node && ((tmp = rb_next(node)), 1); node = tmp) {
|
||||
ext = rb_entry(node, struct extent_node, rbnode);
|
||||
rb_erase(&ext->rbnode, &root->rbroot);
|
||||
free(ext);
|
||||
}
|
||||
}
|
||||
|
||||
void extent_root_print(struct extent_root *root)
|
||||
{
|
||||
struct extent_node *ext;
|
||||
struct rb_node *node;
|
||||
struct rb_node *tmp;
|
||||
|
||||
for (node = rb_first(&root->rbroot); node && ((tmp = rb_next(node)), 1); node = tmp) {
|
||||
ext = rb_entry(node, struct extent_node, rbnode);
|
||||
debug(" start %llu len %llu", ext->start, ext->len);
|
||||
}
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_EXTENT_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_EXTENT_H_
|
||||
|
||||
#include "lk_rbtree_wrapper.h"
|
||||
|
||||
struct extent_root {
|
||||
struct rb_root rbroot;
|
||||
u64 total;
|
||||
};
|
||||
|
||||
struct extent_node {
|
||||
struct rb_node rbnode;
|
||||
u64 start;
|
||||
u64 len;
|
||||
};
|
||||
|
||||
typedef int (*extent_cb_t)(u64 start, u64 len, void *arg);
|
||||
|
||||
struct extent_cb_arg_t {
|
||||
extent_cb_t cb;
|
||||
void *cb_arg;
|
||||
};
|
||||
|
||||
bool extents_overlap(u64 a_start, u64 a_len, u64 b_start, u64 b_len);
|
||||
|
||||
int extent_lookup(struct extent_root *root, u64 start, u64 len, struct extent_node *found);
|
||||
struct extent_node *extent_first(struct extent_root *root);
|
||||
struct extent_node *extent_next(struct extent_node *ext);
|
||||
struct extent_node *extent_prev(struct extent_node *ext);
|
||||
int extent_insert_new(struct extent_root *root, u64 start, u64 len);
|
||||
int extent_insert_extend(struct extent_root *root, u64 start, u64 len);
|
||||
int extent_remove(struct extent_root *root, u64 start, u64 len);
|
||||
|
||||
void extent_root_init(struct extent_root *root);
|
||||
void extent_root_free(struct extent_root *root);
|
||||
void extent_root_print(struct extent_root *root);
|
||||
|
||||
#endif
|
||||
@@ -1,540 +0,0 @@
|
||||
#define _GNU_SOURCE /* O_DIRECT */
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
#include <argp.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "bitmap.h"
|
||||
#include "parse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "crc.h"
|
||||
#include "cmd.h"
|
||||
#include "dev.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "block.h"
|
||||
#include "btree.h"
|
||||
#include "log_trees.h"
|
||||
#include "super.h"
|
||||
|
||||
/* huh. */
|
||||
#define OFF_MAX (off_t)((u64)((off_t)~0ULL) >> 1)
|
||||
|
||||
#define SCOUTFS_META_IMAGE_HEADER_MAGIC 0x8aee00d098fa60c5ULL
|
||||
#define SCOUTFS_META_IMAGE_BLOCK_HEADER_MAGIC 0x70bd5e9269effd86ULL
|
||||
|
||||
struct scoutfs_meta_image_header {
|
||||
__le64 magic;
|
||||
__le64 total_bytes;
|
||||
__le32 version;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_meta_image_block_header {
|
||||
__le64 magic;
|
||||
__le64 offset;
|
||||
__le32 size;
|
||||
__le32 crc;
|
||||
} __packed;
|
||||
|
||||
struct image_args {
|
||||
char *meta_device;
|
||||
bool is_read;
|
||||
bool show_header;
|
||||
u64 ra_window;
|
||||
};
|
||||
|
||||
struct block_bitmaps {
|
||||
unsigned long *bits;
|
||||
u64 size;
|
||||
u64 count;
|
||||
};
|
||||
|
||||
#define errf(fmt, args...) \
|
||||
dprintf(STDERR_FILENO, fmt, ##args)
|
||||
|
||||
static int set_meta_bit(u64 start, u64 len, void *arg)
|
||||
{
|
||||
struct block_bitmaps *bm = arg;
|
||||
int ret;
|
||||
|
||||
if (len != 1) {
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
if (!test_bit(bm->bits, start)) {
|
||||
set_bit(bm->bits, start);
|
||||
bm->count++;
|
||||
}
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_ref_bits(struct block_bitmaps *bm)
|
||||
{
|
||||
struct scoutfs_super_block *super = global_super;
|
||||
int ret;
|
||||
u64 i;
|
||||
|
||||
/*
|
||||
* There are almost no small blocks we need to read, so we read
|
||||
* them as the large blocks that contain them to simplify the
|
||||
* block reading process.
|
||||
*/
|
||||
set_meta_bit(SCOUTFS_SUPER_BLKNO >> SCOUTFS_BLOCK_SM_LG_SHIFT, 1, bm);
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++)
|
||||
set_meta_bit((SCOUTFS_QUORUM_BLKNO + i) >> SCOUTFS_BLOCK_SM_LG_SHIFT, 1, bm);
|
||||
|
||||
ret = alloc_root_meta_iter(&super->meta_alloc[0], set_meta_bit, bm) ?:
|
||||
alloc_root_meta_iter(&super->meta_alloc[1], set_meta_bit, bm) ?:
|
||||
alloc_root_meta_iter(&super->data_alloc, set_meta_bit, bm) ?:
|
||||
alloc_list_meta_iter(&super->server_meta_avail[0], set_meta_bit, bm) ?:
|
||||
alloc_list_meta_iter(&super->server_meta_avail[1], set_meta_bit, bm) ?:
|
||||
alloc_list_meta_iter(&super->server_meta_freed[0], set_meta_bit, bm) ?:
|
||||
alloc_list_meta_iter(&super->server_meta_freed[1], set_meta_bit, bm) ?:
|
||||
btree_meta_iter(&super->fs_root, set_meta_bit, bm) ?:
|
||||
btree_meta_iter(&super->logs_root, set_meta_bit, bm) ?:
|
||||
btree_meta_iter(&super->log_merge, set_meta_bit, bm) ?:
|
||||
btree_meta_iter(&super->mounted_clients, set_meta_bit, bm) ?:
|
||||
btree_meta_iter(&super->srch_root, set_meta_bit, bm) ?:
|
||||
log_trees_meta_iter(set_meta_bit, bm);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Note that this temporarily modifies the header that it's given.
|
||||
*/
|
||||
static __le32 calc_crc(struct scoutfs_meta_image_block_header *bh, void *buf, size_t size)
|
||||
{
|
||||
__le32 saved = bh->crc;
|
||||
u32 crc = ~0;
|
||||
|
||||
bh->crc = 0;
|
||||
crc = crc32c(crc, bh, sizeof(*bh));
|
||||
crc = crc32c(crc, buf, size);
|
||||
bh->crc = saved;
|
||||
|
||||
return cpu_to_le32(crc);
|
||||
}
|
||||
|
||||
static void printf_header(struct scoutfs_meta_image_header *hdr)
|
||||
{
|
||||
errf("magic: 0x%016llx\n"
|
||||
"total_bytes: %llu\n"
|
||||
"version: %u\n",
|
||||
le64_to_cpu(hdr->magic),
|
||||
le64_to_cpu(hdr->total_bytes),
|
||||
le32_to_cpu(hdr->version));
|
||||
}
|
||||
|
||||
typedef ssize_t (*rw_func_t)(int fd, void *buf, size_t count, off_t offset);
|
||||
|
||||
static inline ssize_t rw_read(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
return read(fd, buf, count);
|
||||
}
|
||||
|
||||
static inline ssize_t rw_pread(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
return pread(fd, buf, count, offset);
|
||||
}
|
||||
|
||||
static inline ssize_t rw_write(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
return write(fd, buf, count);
|
||||
}
|
||||
|
||||
static inline ssize_t rw_pwrite(int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
return pwrite(fd, buf, count, offset);
|
||||
}
|
||||
|
||||
static int rw_full_count(rw_func_t func, u64 *tot, int fd, void *buf, size_t count, off_t offset)
|
||||
{
|
||||
ssize_t sret;
|
||||
|
||||
while (count > 0) {
|
||||
sret = func(fd, buf, count, offset);
|
||||
if (sret <= 0 || sret > count) {
|
||||
if (sret < 0)
|
||||
return -errno;
|
||||
else
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (tot)
|
||||
*tot += sret;
|
||||
buf += sret;
|
||||
count -= sret;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int read_image(struct image_args *args, int fd, struct block_bitmaps *bm)
|
||||
{
|
||||
struct scoutfs_meta_image_block_header bh;
|
||||
struct scoutfs_meta_image_header hdr;
|
||||
u64 opening;
|
||||
void *buf;
|
||||
off_t off;
|
||||
u64 bit;
|
||||
u64 ra;
|
||||
int ret;
|
||||
|
||||
buf = malloc(SCOUTFS_BLOCK_LG_SIZE);
|
||||
if (!buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hdr.magic = cpu_to_le64(SCOUTFS_META_IMAGE_HEADER_MAGIC);
|
||||
hdr.total_bytes = cpu_to_le64(sizeof(hdr) +
|
||||
(bm->count * (SCOUTFS_BLOCK_LG_SIZE + sizeof(bh))));
|
||||
hdr.version = cpu_to_le32(1);
|
||||
|
||||
if (args->show_header) {
|
||||
printf_header(&hdr);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = rw_full_count(rw_write, NULL, STDOUT_FILENO, &hdr, sizeof(hdr), 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
opening = args->ra_window;
|
||||
ra = 0;
|
||||
bit = 0;
|
||||
|
||||
for (bit = 0; (bit = find_next_set_bit(bm->bits, bit, bm->size)) < bm->size; bit++) {
|
||||
|
||||
/* readahead to open the full window, then a block at a time */
|
||||
do {
|
||||
ra = find_next_set_bit(bm->bits, ra, bm->size);
|
||||
if (ra < bm->size) {
|
||||
off = ra << SCOUTFS_BLOCK_LG_SHIFT;
|
||||
posix_fadvise(fd, off, SCOUTFS_BLOCK_LG_SIZE, POSIX_FADV_WILLNEED);
|
||||
ra++;
|
||||
if (opening)
|
||||
opening -= min(opening, SCOUTFS_BLOCK_LG_SIZE);
|
||||
}
|
||||
} while (opening > 0);
|
||||
|
||||
off = bit << SCOUTFS_BLOCK_LG_SHIFT;
|
||||
ret = rw_full_count(rw_pread, NULL, fd, buf, SCOUTFS_BLOCK_LG_SIZE, off);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Might as well try to drop the pages we've used to
|
||||
* reduce memory pressure on our read-ahead pages that
|
||||
* are waiting.
|
||||
*/
|
||||
posix_fadvise(fd, off, SCOUTFS_BLOCK_LG_SIZE, POSIX_FADV_DONTNEED);
|
||||
|
||||
bh.magic = SCOUTFS_META_IMAGE_BLOCK_HEADER_MAGIC;
|
||||
bh.offset = cpu_to_le64(off);
|
||||
bh.size = cpu_to_le32(SCOUTFS_BLOCK_LG_SIZE);
|
||||
bh.crc = calc_crc(&bh, buf, SCOUTFS_BLOCK_LG_SIZE);
|
||||
|
||||
ret = rw_full_count(rw_write, NULL, STDOUT_FILENO, &bh, sizeof(bh), 0) ?:
|
||||
rw_full_count(rw_write, NULL, STDOUT_FILENO, buf, SCOUTFS_BLOCK_LG_SIZE, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
free(buf);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int invalid_header(struct scoutfs_meta_image_header *hdr)
|
||||
{
|
||||
if (le64_to_cpu(hdr->magic) != SCOUTFS_META_IMAGE_HEADER_MAGIC) {
|
||||
errf("bad image header magic 0x%016llx (!= expected %016llx)\n",
|
||||
le64_to_cpu(hdr->magic), SCOUTFS_META_IMAGE_HEADER_MAGIC);
|
||||
|
||||
} else if (le32_to_cpu(hdr->version) != 1) {
|
||||
errf("unknown image header version %u\n", le32_to_cpu(hdr->version));
|
||||
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
/*
|
||||
* Doesn't catch offset+size overflowing, presumes pwrite() will return
|
||||
* an error.
|
||||
*/
|
||||
static int invalid_block_header(struct scoutfs_meta_image_block_header *bh)
|
||||
{
|
||||
if (le64_to_cpu(bh->magic) != SCOUTFS_META_IMAGE_BLOCK_HEADER_MAGIC) {
|
||||
errf("bad block header magic 0x%016llx (!= expected %016llx)\n",
|
||||
le64_to_cpu(bh->magic), SCOUTFS_META_IMAGE_BLOCK_HEADER_MAGIC);
|
||||
|
||||
} else if (le32_to_cpu(bh->size) == 0) {
|
||||
errf("invalid block header size %u\n", le32_to_cpu(bh->size));
|
||||
|
||||
} else if (le32_to_cpu(bh->size) > SIZE_MAX) {
|
||||
errf("block header size %u too large for size_t (> %zu)\n",
|
||||
le32_to_cpu(bh->size), (size_t)SIZE_MAX);
|
||||
|
||||
} else if (le64_to_cpu(bh->offset) > OFF_MAX) {
|
||||
errf("block header offset %llu too large for off_t (> %llu)\n",
|
||||
le64_to_cpu(bh->offset), (u64)OFF_MAX);
|
||||
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
static int write_image(struct image_args *args, int fd, struct block_bitmaps *bm)
|
||||
{
|
||||
struct scoutfs_meta_image_block_header bh;
|
||||
struct scoutfs_meta_image_header hdr;
|
||||
size_t writeback_batch = (2 * 1024 * 1024);
|
||||
size_t buf_size;
|
||||
size_t dirty;
|
||||
size_t size;
|
||||
off_t first;
|
||||
off_t last;
|
||||
off_t off;
|
||||
__le32 calc;
|
||||
void *buf;
|
||||
u64 tot;
|
||||
int ret;
|
||||
|
||||
tot = 0;
|
||||
|
||||
ret = rw_full_count(rw_read, &tot, STDIN_FILENO, &hdr, sizeof(hdr), 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (args->show_header) {
|
||||
printf_header(&hdr);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = invalid_header(&hdr);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
dirty = 0;
|
||||
first = OFF_MAX;
|
||||
last = 0;
|
||||
buf = NULL;
|
||||
buf_size = 0;
|
||||
|
||||
while (tot < le64_to_cpu(hdr.total_bytes)) {
|
||||
|
||||
ret = rw_full_count(rw_read, &tot, STDIN_FILENO, &bh, sizeof(bh), 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = invalid_block_header(&bh);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
size = le32_to_cpu(bh.size);
|
||||
if (buf_size < size) {
|
||||
buf = realloc(buf, size);
|
||||
if (!buf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
buf_size = size;
|
||||
}
|
||||
|
||||
ret = rw_full_count(rw_read, &tot, STDIN_FILENO, buf, size, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
calc = calc_crc(&bh, buf, size);
|
||||
if (calc != bh.crc) {
|
||||
errf("crc err");
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
off = le64_to_cpu(bh.offset);
|
||||
|
||||
ret = rw_full_count(rw_pwrite, NULL, fd, buf, size, off);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
dirty += size;
|
||||
first = min(first, off);
|
||||
last = max(last, off);
|
||||
if (dirty >= writeback_batch) {
|
||||
posix_fadvise(fd, first, last, POSIX_FADV_DONTNEED);
|
||||
dirty = 0;
|
||||
first = OFF_MAX;
|
||||
last = 0;
|
||||
}
|
||||
}
|
||||
|
||||
ret = fsync(fd);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int do_image(struct image_args *args)
|
||||
{
|
||||
struct block_bitmaps bm = { .bits = NULL };
|
||||
int meta_fd = -1;
|
||||
u64 dev_size;
|
||||
mode_t mode;
|
||||
int ret;
|
||||
|
||||
mode = args->is_read ? O_RDONLY : O_RDWR;
|
||||
|
||||
meta_fd = open(args->meta_device, mode);
|
||||
if (meta_fd < 0) {
|
||||
ret = -errno;
|
||||
errf("failed to open meta device '%s': %s (%d)\n",
|
||||
args->meta_device, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (args->is_read) {
|
||||
ret = flush_device(meta_fd);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = get_device_size(args->meta_device, meta_fd, &dev_size);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
bm.size = DIV_ROUND_UP(dev_size, SCOUTFS_BLOCK_LG_SIZE);
|
||||
bm.bits = calloc(1, round_up(bm.size, BITS_PER_LONG) / 8);
|
||||
if (!bm.bits) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = block_setup(meta_fd, 128 * 1024 * 1024, 32 * 1024 * 1024) ?:
|
||||
check_supers() ?:
|
||||
get_ref_bits(&bm) ?:
|
||||
read_image(args, meta_fd, &bm);
|
||||
block_shutdown();
|
||||
} else {
|
||||
ret = write_image(args, meta_fd, &bm);
|
||||
}
|
||||
out:
|
||||
free(bm.bits);
|
||||
|
||||
if (meta_fd >= 0)
|
||||
close(meta_fd);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
{
|
||||
struct image_args *args = state->input;
|
||||
int ret;
|
||||
|
||||
switch (key) {
|
||||
case 'h':
|
||||
args->show_header = true;
|
||||
break;
|
||||
case 'r':
|
||||
ret = parse_u64(arg, &args->ra_window);
|
||||
if (ret)
|
||||
argp_error(state, "readahead winddoe parse error");
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
if (!args->meta_device)
|
||||
args->meta_device = strdup_or_error(state, arg);
|
||||
else
|
||||
argp_error(state, "more than two device arguments given");
|
||||
break;
|
||||
case ARGP_KEY_FINI:
|
||||
if (!args->meta_device)
|
||||
argp_error(state, "no metadata device argument given");
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{ "show-header", 'h', NULL, 0, "Print image header and exit without processing stream" },
|
||||
{ "readahead", 'r', "NR", 0, "Maintain read-ahead window of NR blocks" },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp read_image_argp = {
|
||||
options,
|
||||
parse_opt,
|
||||
"META-DEVICE",
|
||||
"Read metadata image stream from metadata device file"
|
||||
};
|
||||
|
||||
#define DEFAULT_RA_WINDOW (512 * 1024)
|
||||
|
||||
static int read_image_cmd(int argc, char **argv)
|
||||
{
|
||||
struct image_args image_args = {
|
||||
.is_read = true,
|
||||
.ra_window = DEFAULT_RA_WINDOW,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&read_image_argp, argc, argv, 0, NULL, &image_args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_image(&image_args);
|
||||
}
|
||||
|
||||
static struct argp write_image_argp = {
|
||||
options,
|
||||
parse_opt,
|
||||
"META-DEVICE",
|
||||
"Write metadata image stream to metadata device file"
|
||||
};
|
||||
|
||||
static int write_image_cmd(int argc, char **argv)
|
||||
{
|
||||
struct image_args image_args = {
|
||||
.is_read = false,
|
||||
.ra_window = DEFAULT_RA_WINDOW,
|
||||
};
|
||||
int ret;
|
||||
|
||||
ret = argp_parse(&write_image_argp, argc, argv, 0, NULL, &image_args);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
return do_image(&image_args);
|
||||
}
|
||||
|
||||
static void __attribute__((constructor)) image_ctor(void)
|
||||
{
|
||||
cmd_register_argp("read-metadata-image", &read_image_argp, GROUP_CORE, read_image_cmd);
|
||||
cmd_register_argp("write-metadata-image", &write_image_argp, GROUP_CORE, write_image_cmd);
|
||||
}
|
||||
@@ -1,15 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_ITER_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_ITER_H_
|
||||
|
||||
/*
|
||||
* Callbacks can return a weird -errno that we'll never use to indicate
|
||||
* that iteration can stop and return 0 for success.
|
||||
*/
|
||||
#define ECHECK_ITER_DONE EL2HLT
|
||||
|
||||
static inline int xlate_iter_errno(int ret)
|
||||
{
|
||||
return ret == -ECHECK_ITER_DONE ? 0 : ret;
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -1,98 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "key.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "btree.h"
|
||||
#include "debug.h"
|
||||
#include "extent.h"
|
||||
#include "iter.h"
|
||||
#include "sns.h"
|
||||
#include "log_trees.h"
|
||||
#include "super.h"
|
||||
|
||||
struct iter_args {
|
||||
extent_cb_t cb;
|
||||
void *cb_arg;
|
||||
};
|
||||
|
||||
static int lt_meta_iter(struct scoutfs_key *key, void *val, u16 val_len, void *cb_arg)
|
||||
{
|
||||
struct iter_args *ia = cb_arg;
|
||||
struct scoutfs_log_trees *lt;
|
||||
int ret;
|
||||
|
||||
if (val_len != sizeof(struct scoutfs_log_trees))
|
||||
; /* XXX */
|
||||
|
||||
lt = val;
|
||||
|
||||
sns_push("log_trees", le64_to_cpu(lt->rid), le64_to_cpu(lt->nr));
|
||||
|
||||
debug("lt rid 0x%16llx nr %llu", le64_to_cpu(lt->rid), le64_to_cpu(lt->nr));
|
||||
|
||||
sns_push("meta_avail", 0, 0);
|
||||
ret = alloc_list_meta_iter(<->meta_avail, ia->cb, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("meta_freed", 0, 0);
|
||||
ret = alloc_list_meta_iter(<->meta_freed, ia->cb, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("item_root", 0, 0);
|
||||
ret = btree_meta_iter(<->item_root, ia->cb, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (lt->bloom_ref.blkno) {
|
||||
sns_push("bloom_ref", 0, 0);
|
||||
ret = ia->cb(le64_to_cpu(lt->bloom_ref.blkno), 1, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0) {
|
||||
ret = xlate_iter_errno(ret);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
sns_push("data_avail", 0, 0);
|
||||
ret = alloc_root_meta_iter(<->data_avail, ia->cb, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("data_freed", 0, 0);
|
||||
ret = alloc_root_meta_iter(<->data_freed, ia->cb, ia->cb_arg);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
sns_pop();
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Call the callers callback with the extent of all the metadata block references contained
|
||||
* in log btrees. We walk the logs_root btree items and walk all the metadata structures
|
||||
* they reference.
|
||||
*/
|
||||
int log_trees_meta_iter(extent_cb_t cb, void *cb_arg)
|
||||
{
|
||||
struct scoutfs_super_block *super = global_super;
|
||||
struct iter_args ia = { .cb = cb, .cb_arg = cb_arg };
|
||||
|
||||
return btree_item_iter(&super->logs_root, lt_meta_iter, &ia);
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_LOG_TREES_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_LOG_TREES_H_
|
||||
|
||||
#include "extent.h"
|
||||
|
||||
int log_trees_meta_iter(extent_cb_t cb, void *cb_arg);
|
||||
|
||||
#endif
|
||||
@@ -1,367 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <sys/mman.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
#include "bitmap.h"
|
||||
#include "key.h"
|
||||
|
||||
#include "alloc.h"
|
||||
#include "btree.h"
|
||||
#include "debug.h"
|
||||
#include "extent.h"
|
||||
#include "sns.h"
|
||||
#include "log_trees.h"
|
||||
#include "meta.h"
|
||||
#include "problem.h"
|
||||
#include "super.h"
|
||||
|
||||
static struct meta_data {
|
||||
struct extent_root meta_refed;
|
||||
struct extent_root meta_free;
|
||||
struct {
|
||||
u64 ref_blocks;
|
||||
u64 free_extents;
|
||||
u64 free_blocks;
|
||||
} stats;
|
||||
} global_mdat;
|
||||
|
||||
bool valid_meta_blkno(u64 blkno)
|
||||
{
|
||||
u64 tot = le64_to_cpu(global_super->total_meta_blocks);
|
||||
|
||||
return blkno >= SCOUTFS_META_DEV_START_BLKNO && blkno < tot;
|
||||
}
|
||||
|
||||
static bool valid_meta_extent(u64 start, u64 len)
|
||||
{
|
||||
u64 tot = le64_to_cpu(global_super->total_meta_blocks);
|
||||
bool valid;
|
||||
|
||||
valid = len > 0 &&
|
||||
start >= SCOUTFS_META_DEV_START_BLKNO &&
|
||||
start < tot &&
|
||||
len <= tot &&
|
||||
((start + len) <= tot) &&
|
||||
((start + len) > start);
|
||||
|
||||
debug("start %llu len %llu valid %u", start, len, !!valid);
|
||||
|
||||
if (!valid)
|
||||
problem(PB_META_EXTENT_INVALID, "start %llu len %llu", start, len);
|
||||
|
||||
return valid;
|
||||
}
|
||||
|
||||
/*
|
||||
* Track references to individual metadata blocks. This uses the extent
|
||||
* callback type but is only ever called for single block references.
|
||||
* Any reference to a block that has already been referenced is
|
||||
* considered invalid and is ignored. Later repair will resolve
|
||||
* duplicate references.
|
||||
*/
|
||||
static int insert_meta_ref(u64 start, u64 len, void *arg)
|
||||
{
|
||||
struct meta_data *mdat = &global_mdat;
|
||||
struct extent_root *root = arg;
|
||||
int ret = 0;
|
||||
|
||||
/* this is tracking single metadata block references */
|
||||
if (len != 1) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (valid_meta_blkno(start)) {
|
||||
ret = extent_insert_new(root, start, len);
|
||||
if (ret == 0)
|
||||
mdat->stats.ref_blocks++;
|
||||
else if (ret == -EEXIST)
|
||||
problem(PB_META_REF_OVERLAPS_EXISTING, "blkno %llu", start);
|
||||
}
|
||||
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int insert_meta_free(u64 start, u64 len, void *arg)
|
||||
{
|
||||
struct meta_data *mdat = &global_mdat;
|
||||
struct extent_root *root = arg;
|
||||
int ret = 0;
|
||||
|
||||
if (valid_meta_extent(start, len)) {
|
||||
ret = extent_insert_new(root, start, len);
|
||||
if (ret == 0) {
|
||||
mdat->stats.free_extents++;
|
||||
mdat->stats.free_blocks++;
|
||||
|
||||
} else if (ret == -EEXIST) {
|
||||
problem(PB_META_FREE_OVERLAPS_EXISTING,
|
||||
"start %llu llen %llu", start, len);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Walk all metadata references in the system. This walk doesn't need
|
||||
* to read metadata that doesn't contain any metadata references so it
|
||||
* can skip the bulk of metadata blocks. This gives us the set of
|
||||
* referenced metadata blocks which we can then use to repair metadata
|
||||
* allocator structures.
|
||||
*/
|
||||
static int get_meta_refs(void)
|
||||
{
|
||||
struct meta_data *mdat = &global_mdat;
|
||||
struct scoutfs_super_block *super = global_super;
|
||||
int ret;
|
||||
|
||||
extent_root_init(&mdat->meta_refed);
|
||||
|
||||
/* XXX record reserved blocks around super as referenced */
|
||||
|
||||
sns_push("meta_alloc", 0, 0);
|
||||
ret = alloc_root_meta_iter(&super->meta_alloc[0], insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("meta_alloc", 1, 0);
|
||||
ret = alloc_root_meta_iter(&super->meta_alloc[1], insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("data_alloc", 1, 0);
|
||||
ret = alloc_root_meta_iter(&super->data_alloc, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_avail", 0, 0);
|
||||
ret = alloc_list_meta_iter(&super->server_meta_avail[0],
|
||||
insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_avail", 1, 0);
|
||||
ret = alloc_list_meta_iter(&super->server_meta_avail[1],
|
||||
insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_freed", 0, 0);
|
||||
ret = alloc_list_meta_iter(&super->server_meta_freed[0],
|
||||
insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_freed", 1, 0);
|
||||
ret = alloc_list_meta_iter(&super->server_meta_freed[1],
|
||||
insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("fs_root", 0, 0);
|
||||
ret = btree_meta_iter(&super->fs_root, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("logs_root", 0, 0);
|
||||
ret = btree_meta_iter(&super->logs_root, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("log_merge", 0, 0);
|
||||
ret = btree_meta_iter(&super->log_merge, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("mounted_clients", 0, 0);
|
||||
ret = btree_meta_iter(&super->mounted_clients, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("srch_root", 0, 0);
|
||||
ret = btree_meta_iter(&super->srch_root, insert_meta_ref, &mdat->meta_refed);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = log_trees_meta_iter(insert_meta_ref, &mdat->meta_refed);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
printf("found %llu referenced metadata blocks\n", mdat->stats.ref_blocks);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_meta_free(void)
|
||||
{
|
||||
struct meta_data *mdat = &global_mdat;
|
||||
struct scoutfs_super_block *super = global_super;
|
||||
int ret;
|
||||
|
||||
extent_root_init(&mdat->meta_free);
|
||||
|
||||
sns_push("meta_alloc", 0, 0);
|
||||
ret = alloc_root_extent_iter(&super->meta_alloc[0], insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("meta_alloc", 1, 0);
|
||||
ret = alloc_root_extent_iter(&super->meta_alloc[1], insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_avail", 0, 0);
|
||||
ret = alloc_list_extent_iter(&super->server_meta_avail[0],
|
||||
insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_avail", 1, 0);
|
||||
ret = alloc_list_extent_iter(&super->server_meta_avail[1],
|
||||
insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_freed", 0, 0);
|
||||
ret = alloc_list_extent_iter(&super->server_meta_freed[0],
|
||||
insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
sns_push("server_meta_freed", 1, 0);
|
||||
ret = alloc_list_extent_iter(&super->server_meta_freed[1],
|
||||
insert_meta_free, &mdat->meta_free);
|
||||
sns_pop();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
printf("found %llu free metadata blocks in %llu extents\n",
|
||||
mdat->stats.free_blocks, mdat->stats.free_extents);
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* All the space between referenced blocks must be recorded in the free
|
||||
* extents. The free extent walk didn't check that the extents
|
||||
* overlapped with references, we do that here. Remember that metadata
|
||||
* block references were merged into extents here, the refed extents
|
||||
* aren't necessarily all a single block.
|
||||
*/
|
||||
static int compare_refs_and_free(void)
|
||||
{
|
||||
struct meta_data *mdat = &global_mdat;
|
||||
struct extent_node *ref;
|
||||
struct extent_node *free;
|
||||
struct extent_node *next;
|
||||
struct extent_node *prev;
|
||||
u64 expect;
|
||||
u64 start;
|
||||
u64 end;
|
||||
|
||||
expect = 0;
|
||||
ref = extent_first(&mdat->meta_refed);
|
||||
free = extent_first(&mdat->meta_free);
|
||||
while (ref || free) {
|
||||
|
||||
printf("exp %llu ref %llu.%llu free %llu.%llu\n",
|
||||
expect, ref ? ref->start : 0, ref ? ref->len : 0,
|
||||
free ? free->start : 0, free ? free->len : 0);
|
||||
|
||||
/* referenced marked free, remove ref from free and continue from same point */
|
||||
if (ref && free && extents_overlap(ref->start, ref->len, free->start, free->len)) {
|
||||
printf("ref extent %llu.%llu overlaps free %llu %llu\n",
|
||||
ref->start, ref->len, free->start, free->len);
|
||||
|
||||
start = max(ref->start, free->start);
|
||||
end = min(ref->start + ref->len, free->start + free->len);
|
||||
|
||||
prev = extent_prev(free);
|
||||
|
||||
extent_remove(&mdat->meta_free, start, end - start);
|
||||
|
||||
if (prev)
|
||||
free = extent_next(prev);
|
||||
else
|
||||
free = extent_first(&mdat->meta_free);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* see which extent starts earlier */
|
||||
if (!free || (ref && ref->start <= free->start))
|
||||
next = ref;
|
||||
else
|
||||
next = free;
|
||||
|
||||
/* untracked region before next extent */
|
||||
if (expect < next->start) {
|
||||
printf("missing free extent %llu.%llu\n", expect, next->start - expect);
|
||||
expect = next->start;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
/* didn't overlap, advance past next extent */
|
||||
expect = next->start + next->len;
|
||||
if (next == ref)
|
||||
ref = extent_next(ref);
|
||||
else
|
||||
free = extent_next(free);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check the metadata allocators by comparing the set of referenced
|
||||
* blocks with the set of free blocks that are stored in free btree
|
||||
* items and alloc list blocks.
|
||||
*/
|
||||
int check_meta_alloc(void)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = get_meta_refs();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = get_meta_free();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = compare_refs_and_free();
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_META_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_META_H_
|
||||
|
||||
bool valid_meta_blkno(u64 blkno);
|
||||
|
||||
int check_meta_alloc(void);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
#include <string.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#include "util.h"
|
||||
#include "padding.h"
|
||||
|
||||
bool padding_is_zeros(const void *data, size_t sz)
|
||||
{
|
||||
static char zeros[32] = {0,};
|
||||
const size_t batch = array_size(zeros);
|
||||
|
||||
while (sz >= batch) {
|
||||
if (memcmp(data, zeros, batch))
|
||||
return false;
|
||||
data += batch;
|
||||
sz -= batch;
|
||||
}
|
||||
|
||||
if (sz > 0 && memcmp(data, zeros, sz))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_PADDING_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_PADDING_H_
|
||||
|
||||
bool padding_is_zeros(const void *data, size_t sz);
|
||||
|
||||
#endif
|
||||
@@ -1,23 +0,0 @@
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "problem.h"
|
||||
|
||||
#if 0
|
||||
#define PROB_STR(pb) [pb] = #pb
|
||||
static char *prob_strs[] = {
|
||||
PROB_STR(PB_META_EXTENT_INVALID),
|
||||
PROB_STR(PB_META_EXTENT_OVERLAPS_EXISTING),
|
||||
};
|
||||
#endif
|
||||
|
||||
static struct problem_data {
|
||||
uint64_t counts[PB__NR];
|
||||
} global_pdat;
|
||||
|
||||
void problem_record(prob_t pb)
|
||||
{
|
||||
struct problem_data *pdat = &global_pdat;
|
||||
|
||||
pdat->counts[pb]++;
|
||||
}
|
||||
@@ -1,23 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_PROBLEM_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_PROBLEM_H_
|
||||
|
||||
#include "debug.h"
|
||||
#include "sns.h"
|
||||
|
||||
typedef enum {
|
||||
PB_META_EXTENT_INVALID,
|
||||
PB_META_REF_OVERLAPS_EXISTING,
|
||||
PB_META_FREE_OVERLAPS_EXISTING,
|
||||
PB_BTREE_BLOCK_BAD_LEVEL,
|
||||
PB__NR,
|
||||
} prob_t;
|
||||
|
||||
#define problem(pb, fmt, ...) \
|
||||
do { \
|
||||
debug("problem found: "#pb": %s: "fmt, sns_str(), __VA_ARGS__); \
|
||||
problem_record(pb); \
|
||||
} while (0)
|
||||
|
||||
void problem_record(prob_t pb);
|
||||
|
||||
#endif
|
||||
@@ -1,118 +0,0 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sns.h"
|
||||
|
||||
/*
|
||||
* This "str num stack" is used to describe our location in metadata at
|
||||
* any given time.
|
||||
*
|
||||
* As we descend into structures we pop a string on decribing them,
|
||||
* perhaps with associated numbers. Pushing and popping is very cheap
|
||||
* and only rarely do we format the stack into a string, as an arbitrary
|
||||
* example:
|
||||
* super.fs_root.btree_parent:1231.btree_leaf:3231"
|
||||
*/
|
||||
|
||||
#define SNS_MAX_DEPTH 1000
|
||||
#define SNS_STR_SIZE (SNS_MAX_DEPTH * (SNS_MAX_STR_LEN + 1 + 16 + 1))
|
||||
|
||||
static struct sns_data {
|
||||
unsigned int depth;
|
||||
|
||||
struct sns_entry {
|
||||
char *str;
|
||||
size_t len;
|
||||
u64 a;
|
||||
u64 b;
|
||||
} ents[SNS_MAX_DEPTH];
|
||||
|
||||
char str[SNS_STR_SIZE];
|
||||
|
||||
} global_lsdat;
|
||||
|
||||
void _sns_push(char *str, size_t len, u64 a, u64 b)
|
||||
{
|
||||
struct sns_data *lsdat = &global_lsdat;
|
||||
|
||||
if (lsdat->depth < SNS_MAX_DEPTH) {
|
||||
lsdat->ents[lsdat->depth++] = (struct sns_entry) {
|
||||
.str = str,
|
||||
.len = len,
|
||||
.a = a,
|
||||
.b = b,
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
void sns_pop(void)
|
||||
{
|
||||
struct sns_data *lsdat = &global_lsdat;
|
||||
|
||||
if (lsdat->depth > 0)
|
||||
lsdat->depth--;
|
||||
}
|
||||
|
||||
static char *append_str(char *pos, char *str, size_t len)
|
||||
{
|
||||
memcpy(pos, str, len);
|
||||
return pos + len;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is not called for x = 0 so we don't need to emit an initial 0.
|
||||
* We could by using do {} while instead of while {}.
|
||||
*/
|
||||
static char *append_u64x(char *pos, u64 x)
|
||||
{
|
||||
static char hex[] = "0123456789abcdef";
|
||||
|
||||
while (x) {
|
||||
*pos++ = hex[x & 0xf];
|
||||
x >>= 4;
|
||||
}
|
||||
|
||||
return pos;
|
||||
}
|
||||
|
||||
static char *append_char(char *pos, char c)
|
||||
{
|
||||
*(pos++) = c;
|
||||
return pos;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a pointer to a null terminated string that describes the
|
||||
* current location stack. The string buffer is global.
|
||||
*/
|
||||
char *sns_str(void)
|
||||
{
|
||||
struct sns_data *lsdat = &global_lsdat;
|
||||
struct sns_entry *ent;
|
||||
char *pos;
|
||||
int i;
|
||||
|
||||
pos = lsdat->str;
|
||||
for (i = 0; i < lsdat->depth; i++) {
|
||||
ent = &lsdat->ents[i];
|
||||
|
||||
if (i)
|
||||
pos = append_char(pos, '.');
|
||||
|
||||
pos = append_str(pos, ent->str, ent->len);
|
||||
|
||||
if (ent->a) {
|
||||
pos = append_char(pos, ':');
|
||||
pos = append_u64x(pos, ent->a);
|
||||
}
|
||||
|
||||
if (ent->b) {
|
||||
pos = append_char(pos, ':');
|
||||
pos = append_u64x(pos, ent->b);
|
||||
}
|
||||
}
|
||||
|
||||
*pos = '\0';
|
||||
|
||||
return lsdat->str;
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_SNS_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_SNS_H_
|
||||
|
||||
#include <assert.h>
|
||||
|
||||
#include "sparse.h"
|
||||
|
||||
#define SNS_MAX_STR_LEN 20
|
||||
|
||||
#define sns_push(str, a, b) \
|
||||
do { \
|
||||
build_assert(sizeof(str) - 1 <= SNS_MAX_STR_LEN); \
|
||||
_sns_push((str), sizeof(str) - 1, a, b); \
|
||||
} while (0)
|
||||
|
||||
void _sns_push(char *str, size_t len, u64 a, u64 b);
|
||||
void sns_pop(void);
|
||||
char *sns_str(void);
|
||||
|
||||
#endif
|
||||
@@ -1,57 +0,0 @@
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include "sparse.h"
|
||||
#include "util.h"
|
||||
#include "format.h"
|
||||
|
||||
#include "block.h"
|
||||
#include "super.h"
|
||||
|
||||
/*
|
||||
* After we check the super blocks we provide a global buffer to track
|
||||
* the current super block. It is referenced to get static information
|
||||
* about the system and is also modified and written as part of
|
||||
* transactions.
|
||||
*/
|
||||
struct scoutfs_super_block *global_super;
|
||||
|
||||
/*
|
||||
* After checking the supers we save a copy of it in a global buffer that's used by
|
||||
* other modules to track the current super. It can be modified and written during commits.
|
||||
*/
|
||||
int check_supers(void)
|
||||
{
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct block *blk = NULL;
|
||||
int ret;
|
||||
|
||||
global_super = malloc(sizeof(struct scoutfs_super_block));
|
||||
if (!global_super) {
|
||||
printf("error allocating super block buffer\n");
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = block_get(&blk, SCOUTFS_SUPER_BLKNO, BF_SM);
|
||||
if (ret < 0) {
|
||||
printf("error reading super block\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
super = block_buf(blk);
|
||||
|
||||
memcpy(global_super, super, sizeof(struct scoutfs_super_block));
|
||||
ret = 0;
|
||||
out:
|
||||
block_put(&blk);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void super_shutdown(void)
|
||||
{
|
||||
free(global_super);
|
||||
}
|
||||
@@ -1,9 +0,0 @@
|
||||
#ifndef _SCOUTFS_UTILS_CHECK_SUPER_H_
|
||||
#define _SCOUTFS_UTILS_CHECK_SUPER_H_
|
||||
|
||||
extern struct scoutfs_super_block *global_super;
|
||||
|
||||
int check_supers(void);
|
||||
void super_shutdown(void);
|
||||
|
||||
#endif
|
||||
@@ -141,4 +141,13 @@ static inline void scoutfs_key_dec(struct scoutfs_key *key)
|
||||
key->sk_zone--;
|
||||
}
|
||||
|
||||
static inline void scoutfs_xattr_get_indx_key(struct scoutfs_key *key, u8 *major, u64 *minor,
|
||||
u64 *ino, u64 *xid)
|
||||
{
|
||||
*major = le64_to_cpu(key->_sk_first) >> 56;
|
||||
*minor = (le64_to_cpu(key->_sk_first) << 8) | (le64_to_cpu(key->_sk_second) >> 56);
|
||||
*ino = (le64_to_cpu(key->_sk_second) << 8) | (le64_to_cpu(key->_sk_third) >> 56);
|
||||
*xid = (le64_to_cpu(key->_sk_third) << 8) | key->_sk_fourth;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -156,16 +156,6 @@ static inline void list_move_tail(struct list_head *list,
|
||||
list_add_tail(list, head);
|
||||
}
|
||||
|
||||
/**
|
||||
* list_is_head - tests whether @list is the list @head
|
||||
* @list: the entry to test
|
||||
* @head: the head of the list
|
||||
*/
|
||||
static inline int list_is_head(const struct list_head *list, const struct list_head *head)
|
||||
{
|
||||
return list == head;
|
||||
}
|
||||
|
||||
/**
|
||||
* list_empty - tests whether a list is empty
|
||||
* @head: the list to test.
|
||||
@@ -252,15 +242,6 @@ static inline void list_splice_init(struct list_head *list,
|
||||
for (pos = (head)->next, n = pos->next; pos != (head); \
|
||||
pos = n, n = pos->next)
|
||||
|
||||
/**
|
||||
* list_entry_is_head - test if the entry points to the head of the list
|
||||
* @pos: the type * to cursor
|
||||
* @head: the head for your list.
|
||||
* @member: the name of the list_head within the struct.
|
||||
*/
|
||||
#define list_entry_is_head(pos, head, member) \
|
||||
(&pos->member == (head))
|
||||
|
||||
/**
|
||||
* list_for_each_entry - iterate over list of given type
|
||||
* @pos: the type * to use as a loop counter.
|
||||
@@ -326,28 +307,4 @@ static inline void list_splice_init(struct list_head *list,
|
||||
#define list_next_entry(pos, member) \
|
||||
list_entry((pos)->member.next, typeof(*(pos)), member)
|
||||
|
||||
/**
|
||||
* list_prev_entry - get the prev element in list
|
||||
* @pos: the type * to cursor
|
||||
* @member: the name of the list_head within the struct.
|
||||
*/
|
||||
#define list_prev_entry(pos, member) \
|
||||
list_entry((pos)->member.prev, typeof(*(pos)), member)
|
||||
|
||||
/**
|
||||
* list_for_each_entry_safe_reverse - iterate backwards over list safe against removal
|
||||
* @pos: the type * to use as a loop cursor.
|
||||
* @n: another type * to use as temporary storage
|
||||
* @head: the head for your list.
|
||||
* @member: the name of the list_head within the struct.
|
||||
*
|
||||
* Iterate backwards over list of given type, safe against removal
|
||||
* of list entry.
|
||||
*/
|
||||
#define list_for_each_entry_safe_reverse(pos, n, head, member) \
|
||||
for (pos = list_last_entry(head, typeof(*pos), member), \
|
||||
n = list_prev_entry(pos, member); \
|
||||
!list_entry_is_head(pos, head, member); \
|
||||
pos = n, n = list_prev_entry(n, member))
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,24 +0,0 @@
|
||||
#ifndef _LK_RBTREE_WRAPPER_H_
|
||||
#define _LK_RBTREE_WRAPPER_H_
|
||||
|
||||
/*
|
||||
* We're using this lame hack to build and use the kernel's rbtree in
|
||||
* userspace. We drop the kernel's rbtree*[ch] implementation in and
|
||||
* use them with this wrapper. We only have to remove the kernel
|
||||
* includes from the imported files.
|
||||
*/
|
||||
|
||||
#include <stdbool.h>
|
||||
#include "util.h"
|
||||
|
||||
#define rcu_assign_pointer(a, b) do { a = b; } while (0)
|
||||
#define READ_ONCE(a) ({ a; })
|
||||
#define WRITE_ONCE(a, b) do { a = b; } while (0)
|
||||
#define unlikely(a) ({ a; })
|
||||
#define EXPORT_SYMBOL(a) /* nop */
|
||||
|
||||
#include "rbtree_types.h"
|
||||
#include "rbtree.h"
|
||||
#include "rbtree_augmented.h"
|
||||
|
||||
#endif
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user