mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-09 13:23:14 +00:00
Compare commits
1 Commits
v1.11
...
ben/fence_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b898c89c11 |
117
ReleaseNotes.md
117
ReleaseNotes.md
@@ -1,123 +1,6 @@
|
||||
Versity ScoutFS Release Notes
|
||||
=============================
|
||||
|
||||
---
|
||||
v1.11
|
||||
\
|
||||
*Feb 2, 2023*
|
||||
|
||||
Fixed a free extent processing error that could prevent mount from
|
||||
proceeding when free data extents were sufficiently fragmented. It now
|
||||
properly handle very fragmented free extent maps.
|
||||
|
||||
Fixed a statfs server processing race that could return spurious errors
|
||||
and shut down the server. With the race closed statfs processing is
|
||||
reliable.
|
||||
|
||||
Fixed a rare livelock in the move\_blocks ioctl. With the right
|
||||
relationship between ioctl arguments and eventual file extent items the
|
||||
core loop in the move\_blocks ioctl could get stuck looping on an extent
|
||||
item and never return. The loop exit conditions were fixed and the loop
|
||||
will always advance through all extents.
|
||||
|
||||
Changed the 'print' scoutfs commands to flush the block cache for the
|
||||
devices. It was inconvenient to expect cache flushing to be a separate
|
||||
step to ensure consistency with remote node writes.
|
||||
|
||||
---
|
||||
v1.10
|
||||
\
|
||||
*Dec 7, 2022*
|
||||
|
||||
Fixed a potential directory entry cache management deadlock that could
|
||||
occur when many nodes performed heavy metadata write loads across shared
|
||||
directories and their child subdirectories. The deadlock could halt
|
||||
invalidation progress on a node which could then stop use of locks that
|
||||
needed invalidation on that node which would result in almost all tasks
|
||||
hanging on those locks that would never make progress.
|
||||
|
||||
Fixed a circumstance where metadata change sequence index item
|
||||
modification could leave behind old stale metadata sequence items. The
|
||||
duplication case required concurrent metadata updates across mounts with
|
||||
particular open transaction patterns so the duplicate items are rare.
|
||||
They resulted in a small amount of additional load when walking change
|
||||
indexes but had no effect on correctness.
|
||||
|
||||
Fixed a rare case where sparse file extension might not write partial
|
||||
blocks of zeros which was found in testing. This required using
|
||||
truncate to extend files past file sizes that end in partial blocks
|
||||
along with the right transaction commit and memory reclaim patterns.
|
||||
This never affected regular non-sparse files nor files prepopulated with
|
||||
fallocate.
|
||||
|
||||
---
|
||||
v1.9
|
||||
\
|
||||
*Oct 29, 2022*
|
||||
|
||||
Fix VFS cached directory entry consistency verification that could cause
|
||||
spurious "no such file or directory" (ENOENT) errors from rename over
|
||||
NFS under certain conditions. The problem was only every with the
|
||||
consistency of in-memory cached dentry objects, persistent data was
|
||||
correct and eventual eviction of the bad cached objects would stop
|
||||
generating the errors.
|
||||
|
||||
---
|
||||
v1.8
|
||||
\
|
||||
*Oct 18, 2022*
|
||||
|
||||
Add support for Linux POSIX Access Control Lists, as described in
|
||||
acl(5). Mount options are added to enable ("acl") and disable ("noacl")
|
||||
support. The default is to support ACLs. ACLs are stored in the
|
||||
existing extended attribute scheme so adding support is does not require
|
||||
a format change.
|
||||
|
||||
Add options to control data extent preallocation. The default behavior
|
||||
does not change. The options can relax the limits on preallocation
|
||||
which will then trigger under more write patterns and increase the risk
|
||||
of preallocated space which is never used. The options are described in
|
||||
scoutfs(5).
|
||||
|
||||
---
|
||||
v1.7
|
||||
\
|
||||
*Aug 26, 2022*
|
||||
|
||||
* **Fixed possible persistent errors moving freed data extents**
|
||||
\
|
||||
Fixed a case where the server could hit persistent errors trying to
|
||||
move a client's freed extents in one commit. The client had to free
|
||||
a large number of extents that occupied distant positions in the
|
||||
global free extent btree. Very large fragmented files could cause
|
||||
this. The server now moves the freed extents in multiple commits and
|
||||
can always ensure forward progress.
|
||||
|
||||
* **Fixed possible persistent errors from freed duplicate extents**
|
||||
\
|
||||
Background orphan deletion wasn't properly synchronizing with
|
||||
foreground tasks deleting very large files. If a deletion took long
|
||||
enough then background deletion could also attempt to delete inode items
|
||||
while the deletion was making progress. This could create duplicate
|
||||
deletions of data extent items which causes the server to abort when
|
||||
it later discovers the duplicate extents as it merges free lists.
|
||||
|
||||
---
|
||||
v1.6
|
||||
\
|
||||
*Jul 7, 2022*
|
||||
|
||||
* **Fix memory leaks in rare corner cases**
|
||||
\
|
||||
Analysis tools found a few corner cases that leaked small structures,
|
||||
generally around error handling or startup and shutdown.
|
||||
|
||||
* **Add --skip-likely-huge scoutfs print command option**
|
||||
\
|
||||
Add an option to scoutfs print to reduce the size of the output
|
||||
so that it can be used to see system-wide metadata without being
|
||||
overwhelmed by file-level details.
|
||||
|
||||
---
|
||||
v1.5
|
||||
\
|
||||
|
||||
@@ -8,7 +8,6 @@ CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include
|
||||
-include $(src)/Makefile.kernelcompat
|
||||
|
||||
scoutfs-y += \
|
||||
acl.o \
|
||||
avl.o \
|
||||
alloc.o \
|
||||
block.o \
|
||||
|
||||
@@ -34,12 +34,3 @@ endif
|
||||
ifneq (,$(shell grep 'FMODE_KABI_ITERATE' include/linux/fs.h))
|
||||
ccflags-y += -DKC_FMODE_KABI_ITERATE
|
||||
endif
|
||||
|
||||
#
|
||||
# v4.7-rc2-23-g0d4d717f2583
|
||||
#
|
||||
# Added user_ns argument to posix_acl_valid
|
||||
#
|
||||
ifneq (,$(shell grep 'posix_acl_valid.*user_ns,' include/linux/posix_acl.h))
|
||||
ccflags-y += -DKC_POSIX_ACL_VALID_USER_NS
|
||||
endif
|
||||
|
||||
355
kmod/src/acl.c
355
kmod/src/acl.c
@@ -1,355 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/posix_acl.h>
|
||||
#include <linux/posix_acl_xattr.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "super.h"
|
||||
#include "scoutfs_trace.h"
|
||||
#include "xattr.h"
|
||||
#include "acl.h"
|
||||
#include "inode.h"
|
||||
#include "trans.h"
|
||||
|
||||
/*
|
||||
* POSIX draft ACLs are stored as full xattr items with the entries
|
||||
* encoded as the kernel's posix_acl_xattr_{header,entry} value structs.
|
||||
*
|
||||
* They're accessed and modified via user facing synthetic xattrs, iops
|
||||
* calls from the kernel, during inode mode changes, and during inode
|
||||
* creation.
|
||||
*
|
||||
* ACL access devolves into xattr access which is relatively expensive
|
||||
* so we maintain the cached native form in the vfs inode. We drop the
|
||||
* cache in lock invalidation which means that cached acl access must
|
||||
* always be performed under cluster locking.
|
||||
*/
|
||||
|
||||
static int acl_xattr_name_len(int type, char **name, size_t *name_len)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
*name = XATTR_NAME_POSIX_ACL_ACCESS;
|
||||
if (name_len)
|
||||
*name_len = sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1;
|
||||
break;
|
||||
case ACL_TYPE_DEFAULT:
|
||||
*name = XATTR_NAME_POSIX_ACL_DEFAULT;
|
||||
if (name_len)
|
||||
*name_len = sizeof(XATTR_NAME_POSIX_ACL_DEFAULT) - 1;
|
||||
break;
|
||||
default:
|
||||
ret = -EINVAL;
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct posix_acl *acl;
|
||||
char *value = NULL;
|
||||
char *name;
|
||||
int ret;
|
||||
|
||||
if (!IS_POSIXACL(inode))
|
||||
return NULL;
|
||||
|
||||
acl = get_cached_acl(inode, type);
|
||||
if (acl != ACL_NOT_CACHED)
|
||||
return acl;
|
||||
|
||||
ret = acl_xattr_name_len(type, &name, NULL);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
ret = scoutfs_xattr_get_locked(inode, name, NULL, 0, lock);
|
||||
if (ret > 0) {
|
||||
value = kzalloc(ret, GFP_NOFS);
|
||||
if (!value)
|
||||
ret = -ENOMEM;
|
||||
else
|
||||
ret = scoutfs_xattr_get_locked(inode, name, value, ret, lock);
|
||||
}
|
||||
if (ret > 0) {
|
||||
acl = posix_acl_from_xattr(&init_user_ns, value, ret);
|
||||
} else if (ret == -ENODATA || ret == 0) {
|
||||
acl = NULL;
|
||||
} else {
|
||||
acl = ERR_PTR(ret);
|
||||
}
|
||||
|
||||
/* can set null negative cache */
|
||||
if (!IS_ERR(acl))
|
||||
set_cached_acl(inode, type, acl);
|
||||
|
||||
kfree(value);
|
||||
|
||||
return acl;
|
||||
}
|
||||
|
||||
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct posix_acl *acl;
|
||||
int ret;
|
||||
|
||||
if (!IS_POSIXACL(inode))
|
||||
return NULL;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
|
||||
if (ret < 0) {
|
||||
acl = ERR_PTR(ret);
|
||||
} else {
|
||||
acl = scoutfs_get_acl_locked(inode, type, lock);
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
|
||||
}
|
||||
|
||||
return acl;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller has acquired the locks and dirtied the inode, they'll
|
||||
* update the inode item if we return 0.
|
||||
*/
|
||||
int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
|
||||
struct scoutfs_lock *lock, struct list_head *ind_locks)
|
||||
{
|
||||
static const struct scoutfs_xattr_prefix_tags tgs = {0,}; /* never scoutfs. prefix */
|
||||
bool set_mode = false;
|
||||
char *value = NULL;
|
||||
umode_t new_mode;
|
||||
size_t name_len;
|
||||
char *name;
|
||||
int size = 0;
|
||||
int ret;
|
||||
|
||||
ret = acl_xattr_name_len(type, &name, &name_len);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
switch (type) {
|
||||
case ACL_TYPE_ACCESS:
|
||||
if (acl) {
|
||||
ret = posix_acl_update_mode(inode, &new_mode, &acl);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
set_mode = true;
|
||||
}
|
||||
break;
|
||||
case ACL_TYPE_DEFAULT:
|
||||
if (!S_ISDIR(inode->i_mode)) {
|
||||
ret = acl ? -EINVAL : 0;
|
||||
goto out;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (acl) {
|
||||
size = posix_acl_xattr_size(acl->a_count);
|
||||
value = kmalloc(size, GFP_NOFS);
|
||||
if (!value) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_xattr_set_locked(inode, name, name_len, value, size, 0, &tgs,
|
||||
lock, NULL, ind_locks);
|
||||
if (ret == 0 && set_mode) {
|
||||
inode->i_mode = new_mode;
|
||||
if (!value) {
|
||||
/* can be setting an acl that only affects mode, didn't need xattr */
|
||||
inode_inc_iversion(inode);
|
||||
inode->i_ctime = CURRENT_TIME;
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
if (!ret)
|
||||
set_cached_acl(inode, type, acl);
|
||||
|
||||
kfree(value);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
LIST_HEAD(ind_locks);
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE, SCOUTFS_LKF_REFRESH_INODE, inode, &lock) ?:
|
||||
scoutfs_inode_index_lock_hold(inode, &ind_locks, false, true);
|
||||
if (ret == 0) {
|
||||
ret = scoutfs_dirty_inode_item(inode, lock) ?:
|
||||
scoutfs_set_acl_locked(inode, acl, type, lock, &ind_locks);
|
||||
if (ret == 0)
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
}
|
||||
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
|
||||
int type)
|
||||
{
|
||||
struct posix_acl *acl;
|
||||
int ret = 0;
|
||||
|
||||
if (!IS_POSIXACL(dentry->d_inode))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
acl = scoutfs_get_acl(dentry->d_inode, type);
|
||||
if (IS_ERR(acl))
|
||||
return PTR_ERR(acl);
|
||||
if (acl == NULL)
|
||||
return -ENODATA;
|
||||
|
||||
ret = posix_acl_to_xattr(&init_user_ns, acl, value, size);
|
||||
posix_acl_release(acl);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
|
||||
int flags, int type)
|
||||
{
|
||||
struct posix_acl *acl = NULL;
|
||||
int ret;
|
||||
|
||||
if (!inode_owner_or_capable(dentry->d_inode))
|
||||
return -EPERM;
|
||||
|
||||
if (!IS_POSIXACL(dentry->d_inode))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (value) {
|
||||
acl = posix_acl_from_xattr(&init_user_ns, value, size);
|
||||
if (IS_ERR(acl))
|
||||
return PTR_ERR(acl);
|
||||
|
||||
if (acl) {
|
||||
ret = kc_posix_acl_valid(&init_user_ns, acl);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = scoutfs_set_acl(dentry->d_inode, acl, type);
|
||||
out:
|
||||
posix_acl_release(acl);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Apply the parent's default acl to new inodes access acl and inherit
|
||||
* it as the default for new directories. The caller holds locks and a
|
||||
* transaction.
|
||||
*/
|
||||
int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir,
|
||||
struct scoutfs_lock *lock, struct scoutfs_lock *dir_lock,
|
||||
struct list_head *ind_locks)
|
||||
{
|
||||
struct posix_acl *acl = NULL;
|
||||
int ret = 0;
|
||||
|
||||
if (!S_ISLNK(inode->i_mode)) {
|
||||
if (IS_POSIXACL(dir)) {
|
||||
acl = scoutfs_get_acl_locked(dir, ACL_TYPE_DEFAULT, dir_lock);
|
||||
if (IS_ERR(acl))
|
||||
return PTR_ERR(acl);
|
||||
}
|
||||
|
||||
if (!acl)
|
||||
inode->i_mode &= ~current_umask();
|
||||
}
|
||||
|
||||
if (IS_POSIXACL(dir) && acl) {
|
||||
if (S_ISDIR(inode->i_mode)) {
|
||||
ret = scoutfs_set_acl_locked(inode, acl, ACL_TYPE_DEFAULT,
|
||||
lock, ind_locks);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
ret = posix_acl_create(&acl, GFP_NOFS, &inode->i_mode);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (ret > 0)
|
||||
ret = scoutfs_set_acl_locked(inode, acl, ACL_TYPE_ACCESS,
|
||||
lock, ind_locks);
|
||||
} else {
|
||||
cache_no_acl(inode);
|
||||
}
|
||||
out:
|
||||
posix_acl_release(acl);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Update the access ACL based on a newly set mode. If we return an
|
||||
* error then the xattr wasn't changed.
|
||||
*
|
||||
* Annoyingly, setattr_copy has logic that transforms the final set mode
|
||||
* that we want to use to update the acl. But we don't want to modify
|
||||
* the other inode fields while discovering the resulting mode. We're
|
||||
* relying on acl_chmod not caring about the transformation (currently
|
||||
* just clears sgid). It would be better if we could get the resulting
|
||||
* mode to give to acl_chmod without modifying the other inode fields.
|
||||
*
|
||||
* The caller has the inode mutex, a cluster lock, transaction, and will
|
||||
* update the inode item if we return success.
|
||||
*/
|
||||
int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr,
|
||||
struct scoutfs_lock *lock, struct list_head *ind_locks)
|
||||
{
|
||||
struct posix_acl *acl;
|
||||
int ret = 0;
|
||||
|
||||
if (!IS_POSIXACL(inode) || !(attr->ia_valid & ATTR_MODE))
|
||||
return 0;
|
||||
|
||||
if (S_ISLNK(inode->i_mode))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
acl = scoutfs_get_acl_locked(inode, ACL_TYPE_ACCESS, lock);
|
||||
if (IS_ERR_OR_NULL(acl))
|
||||
return PTR_ERR(acl);
|
||||
|
||||
ret = posix_acl_chmod(&acl, GFP_KERNEL, attr->ia_mode);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = scoutfs_set_acl_locked(inode, acl, ACL_TYPE_ACCESS, lock, ind_locks);
|
||||
posix_acl_release(acl);
|
||||
return ret;
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
#ifndef _SCOUTFS_ACL_H_
|
||||
#define _SCOUTFS_ACL_H_
|
||||
|
||||
struct posix_acl *scoutfs_get_acl(struct inode *inode, int type);
|
||||
struct posix_acl *scoutfs_get_acl_locked(struct inode *inode, int type, struct scoutfs_lock *lock);
|
||||
int scoutfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
|
||||
int scoutfs_set_acl_locked(struct inode *inode, struct posix_acl *acl, int type,
|
||||
struct scoutfs_lock *lock, struct list_head *ind_locks);
|
||||
int scoutfs_acl_get_xattr(struct dentry *dentry, const char *name, void *value, size_t size,
|
||||
int type);
|
||||
int scoutfs_acl_set_xattr(struct dentry *dentry, const char *name, const void *value, size_t size,
|
||||
int flags, int type);
|
||||
int scoutfs_acl_chmod_locked(struct inode *inode, struct iattr *attr,
|
||||
struct scoutfs_lock *lock, struct list_head *ind_locks);
|
||||
int scoutfs_init_acl_locked(struct inode *inode, struct inode *dir,
|
||||
struct scoutfs_lock *lock, struct scoutfs_lock *dir_lock,
|
||||
struct list_head *ind_locks);
|
||||
#endif
|
||||
@@ -892,11 +892,12 @@ static int find_zone_extent(struct super_block *sb, struct scoutfs_alloc_root *r
|
||||
* -ENOENT is returned if we run out of extents in the source tree
|
||||
* before moving the total.
|
||||
*
|
||||
* If meta_budget is non-zero then -EINPROGRESS can be returned if the
|
||||
* the caller's budget is consumed in the allocator during this call
|
||||
* (though not necessarily by us, we don't have per-thread tracking of
|
||||
* allocator consumption :/). The call can still have made progress and
|
||||
* caller is expected commit the dirty trees and examining the resulting
|
||||
* If meta_reserved is non-zero then -EINPROGRESS can be returned if the
|
||||
* current meta allocator's avail blocks or room for freed blocks would
|
||||
* have fallen under the reserved amount. The could have been
|
||||
* successfully dirtied in this case but the number of blocks moved is
|
||||
* not returned. The caller is expected to deal with the partial
|
||||
* progress by commiting the dirty trees and examining the resulting
|
||||
* modified trees to see if they need to continue moving extents.
|
||||
*
|
||||
* The caller can specify that extents in the source tree should first
|
||||
@@ -913,7 +914,7 @@ int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_alloc_root *dst,
|
||||
struct scoutfs_alloc_root *src, u64 total,
|
||||
__le64 *exclusive, __le64 *vacant, u64 zone_blocks, u64 meta_budget)
|
||||
__le64 *exclusive, __le64 *vacant, u64 zone_blocks, u64 meta_reserved)
|
||||
{
|
||||
struct alloc_ext_args args = {
|
||||
.alloc = alloc,
|
||||
@@ -921,8 +922,6 @@ int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
};
|
||||
struct scoutfs_extent found;
|
||||
struct scoutfs_extent ext;
|
||||
u32 avail_start = 0;
|
||||
u32 freed_start = 0;
|
||||
u64 moved = 0;
|
||||
u64 count;
|
||||
int ret = 0;
|
||||
@@ -933,9 +932,6 @@ int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
vacant = NULL;
|
||||
}
|
||||
|
||||
if (meta_budget != 0)
|
||||
scoutfs_alloc_meta_remaining(alloc, &avail_start, &freed_start);
|
||||
|
||||
while (moved < total) {
|
||||
count = total - moved;
|
||||
|
||||
@@ -968,24 +964,14 @@ int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
if (meta_budget != 0 &&
|
||||
scoutfs_alloc_meta_low_since(alloc, avail_start, freed_start, meta_budget,
|
||||
extent_mod_blocks(src->root.height) +
|
||||
extent_mod_blocks(dst->root.height))) {
|
||||
if (meta_reserved != 0 &&
|
||||
scoutfs_alloc_meta_low(sb, alloc, meta_reserved +
|
||||
extent_mod_blocks(src->root.height) +
|
||||
extent_mod_blocks(dst->root.height))) {
|
||||
ret = -EINPROGRESS;
|
||||
break;
|
||||
}
|
||||
|
||||
/* return partial if the server alloc can't dirty any more */
|
||||
if (scoutfs_alloc_meta_low(sb, alloc, 50 + extent_mod_blocks(src->root.height) +
|
||||
extent_mod_blocks(dst->root.height))) {
|
||||
if (WARN_ON_ONCE(!moved))
|
||||
ret = -ENOSPC;
|
||||
else
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* searching set start/len, finish initializing alloced extent */
|
||||
ext.map = found.map ? ext.start - found.start + found.map : 0;
|
||||
ext.flags = found.flags;
|
||||
@@ -1365,27 +1351,6 @@ void scoutfs_alloc_meta_remaining(struct scoutfs_alloc *alloc, u32 *avail_total,
|
||||
} while (read_seqretry(&alloc->seqlock, seq));
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the caller's consumption of nr from either avail or
|
||||
* freed would end up exceeding their budget relative to the starting
|
||||
* remaining snapshot they took.
|
||||
*/
|
||||
bool scoutfs_alloc_meta_low_since(struct scoutfs_alloc *alloc, u32 avail_start, u32 freed_start,
|
||||
u32 budget, u32 nr)
|
||||
{
|
||||
u32 avail_use;
|
||||
u32 freed_use;
|
||||
u32 avail;
|
||||
u32 freed;
|
||||
|
||||
scoutfs_alloc_meta_remaining(alloc, &avail, &freed);
|
||||
|
||||
avail_use = avail_start - avail;
|
||||
freed_use = freed_start - freed;
|
||||
|
||||
return ((avail_use + nr) > budget) || ((freed_use + nr) > budget);
|
||||
}
|
||||
|
||||
bool scoutfs_alloc_test_flag(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc, u32 flag)
|
||||
{
|
||||
@@ -1582,10 +1547,12 @@ out:
|
||||
* call the caller's callback. This assumes that the super it's reading
|
||||
* could be stale and will retry if it encounters stale blocks.
|
||||
*/
|
||||
int scoutfs_alloc_foreach(struct super_block *sb, scoutfs_alloc_foreach_cb_t cb, void *arg)
|
||||
int scoutfs_alloc_foreach(struct super_block *sb,
|
||||
scoutfs_alloc_foreach_cb_t cb, void *arg)
|
||||
{
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
struct scoutfs_block_ref stale_refs[2] = {{0,}};
|
||||
struct scoutfs_block_ref refs[2] = {{0,}};
|
||||
int ret;
|
||||
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
@@ -1594,18 +1561,26 @@ int scoutfs_alloc_foreach(struct super_block *sb, scoutfs_alloc_foreach_cb_t cb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
retry:
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_alloc_foreach_super(sb, super, cb, arg);
|
||||
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &super->logs_root.ref,
|
||||
&super->srch_root.ref);
|
||||
} while (ret == -ESTALE);
|
||||
refs[0] = super->logs_root.ref;
|
||||
refs[1] = super->srch_root.ref;
|
||||
|
||||
ret = scoutfs_alloc_foreach_super(sb, super, cb, arg);
|
||||
out:
|
||||
if (ret == -ESTALE) {
|
||||
if (memcmp(&stale_refs, &refs, sizeof(refs)) == 0) {
|
||||
ret = -EIO;
|
||||
} else {
|
||||
BUILD_BUG_ON(sizeof(stale_refs) != sizeof(refs));
|
||||
memcpy(stale_refs, refs, sizeof(stale_refs));
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
kfree(super);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -19,11 +19,14 @@
|
||||
(128ULL * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
/*
|
||||
* The default size that we'll try to preallocate. This is trying to
|
||||
* hit the limit of large efficient device writes while minimizing
|
||||
* wasted preallocation that is never used.
|
||||
* The largest aligned region that we'll try to allocate at the end of
|
||||
* the file as it's extended. This is also limited to the current file
|
||||
* size so we can only waste at most twice the total file size when
|
||||
* files are less than this. We try to keep this around the point of
|
||||
* diminishing returns in streaming performance of common data devices
|
||||
* to limit waste.
|
||||
*/
|
||||
#define SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS \
|
||||
#define SCOUTFS_DATA_EXTEND_PREALLOC_LIMIT \
|
||||
(8ULL * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
|
||||
|
||||
/*
|
||||
@@ -128,7 +131,7 @@ int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_alloc_root *dst,
|
||||
struct scoutfs_alloc_root *src, u64 total,
|
||||
__le64 *exclusive, __le64 *vacant, u64 zone_blocks, u64 meta_budget);
|
||||
__le64 *exclusive, __le64 *vacant, u64 zone_blocks, u64 meta_reserved);
|
||||
int scoutfs_alloc_insert(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri, struct scoutfs_alloc_root *root,
|
||||
u64 start, u64 len);
|
||||
@@ -156,8 +159,6 @@ int scoutfs_alloc_splice_list(struct super_block *sb,
|
||||
bool scoutfs_alloc_meta_low(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc, u32 nr);
|
||||
void scoutfs_alloc_meta_remaining(struct scoutfs_alloc *alloc, u32 *avail_total, u32 *freed_space);
|
||||
bool scoutfs_alloc_meta_low_since(struct scoutfs_alloc *alloc, u32 avail_start, u32 freed_start,
|
||||
u32 budget, u32 nr);
|
||||
bool scoutfs_alloc_test_flag(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc, u32 flag);
|
||||
|
||||
|
||||
@@ -677,7 +677,7 @@ out:
|
||||
int scoutfs_block_read_ref(struct super_block *sb, struct scoutfs_block_ref *ref, u32 magic,
|
||||
struct scoutfs_block **bl_ret)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_block_header *hdr;
|
||||
struct block_private *bp = NULL;
|
||||
bool retried = false;
|
||||
@@ -701,7 +701,7 @@ retry:
|
||||
set_bit(BLOCK_BIT_CRC_VALID, &bp->bits);
|
||||
}
|
||||
|
||||
if (hdr->magic != cpu_to_le32(magic) || hdr->fsid != cpu_to_le64(sbi->fsid) ||
|
||||
if (hdr->magic != cpu_to_le32(magic) || hdr->fsid != super->hdr.fsid ||
|
||||
hdr->seq != ref->seq || hdr->blkno != ref->blkno) {
|
||||
ret = -ESTALE;
|
||||
goto out;
|
||||
@@ -728,36 +728,6 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool stale_refs_match(struct scoutfs_block_ref *caller, struct scoutfs_block_ref *saved)
|
||||
{
|
||||
return !caller || (caller->blkno == saved->blkno && caller->seq == saved->seq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a read of a reference that gave ESTALE should be retried or
|
||||
* should generate a hard error. If this is the second time we got
|
||||
* ESTALE from the same refs then we return EIO and the caller should
|
||||
* stop. As long as we keep seeing different refs we'll return ESTALE
|
||||
* and the caller can keep trying.
|
||||
*/
|
||||
int scoutfs_block_check_stale(struct super_block *sb, int ret,
|
||||
struct scoutfs_block_saved_refs *saved,
|
||||
struct scoutfs_block_ref *a, struct scoutfs_block_ref *b)
|
||||
{
|
||||
if (ret == -ESTALE) {
|
||||
if (stale_refs_match(a, &saved->refs[0]) && stale_refs_match(b, &saved->refs[1])){
|
||||
ret = -EIO;
|
||||
} else {
|
||||
if (a)
|
||||
saved->refs[0] = *a;
|
||||
if (b)
|
||||
saved->refs[1] = *b;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void scoutfs_block_put(struct super_block *sb, struct scoutfs_block *bl)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(bl))
|
||||
@@ -827,7 +797,7 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
u32 magic, struct scoutfs_block **bl_ret,
|
||||
u64 dirty_blkno, u64 *ref_blkno)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_block *cow_bl = NULL;
|
||||
struct scoutfs_block *bl = NULL;
|
||||
struct block_private *exist_bp = NULL;
|
||||
@@ -895,7 +865,7 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
|
||||
hdr = bl->data;
|
||||
hdr->magic = cpu_to_le32(magic);
|
||||
hdr->fsid = cpu_to_le64(sbi->fsid);
|
||||
hdr->fsid = super->hdr.fsid;
|
||||
hdr->blkno = cpu_to_le64(bl->blkno);
|
||||
prandom_bytes(&hdr->seq, sizeof(hdr->seq));
|
||||
|
||||
|
||||
@@ -13,17 +13,6 @@ struct scoutfs_block {
|
||||
void *priv;
|
||||
};
|
||||
|
||||
struct scoutfs_block_saved_refs {
|
||||
struct scoutfs_block_ref refs[2];
|
||||
};
|
||||
|
||||
#define DECLARE_SAVED_REFS(name) \
|
||||
struct scoutfs_block_saved_refs name = {{{0,}}}
|
||||
|
||||
int scoutfs_block_check_stale(struct super_block *sb, int ret,
|
||||
struct scoutfs_block_saved_refs *saved,
|
||||
struct scoutfs_block_ref *a, struct scoutfs_block_ref *b);
|
||||
|
||||
int scoutfs_block_read_ref(struct super_block *sb, struct scoutfs_block_ref *ref, u32 magic,
|
||||
struct scoutfs_block **bl_ret);
|
||||
void scoutfs_block_put(struct super_block *sb, struct scoutfs_block *bl);
|
||||
|
||||
@@ -356,6 +356,7 @@ static int client_greeting(struct super_block *sb,
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct client_info *client = sbi->client_info;
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_net_greeting *gr = resp;
|
||||
bool new_server;
|
||||
int ret;
|
||||
@@ -370,9 +371,9 @@ static int client_greeting(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (gr->fsid != cpu_to_le64(sbi->fsid)) {
|
||||
if (gr->fsid != super->hdr.fsid) {
|
||||
scoutfs_warn(sb, "server greeting response fsid 0x%llx did not match client fsid 0x%llx",
|
||||
le64_to_cpu(gr->fsid), sbi->fsid);
|
||||
le64_to_cpu(gr->fsid), le64_to_cpu(super->hdr.fsid));
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@@ -475,6 +476,7 @@ static void scoutfs_client_connect_worker(struct work_struct *work)
|
||||
connect_dwork.work);
|
||||
struct super_block *sb = client->sb;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_mount_options opts;
|
||||
struct scoutfs_net_greeting greet;
|
||||
struct sockaddr_in sin;
|
||||
@@ -506,7 +508,7 @@ static void scoutfs_client_connect_worker(struct work_struct *work)
|
||||
goto out;
|
||||
|
||||
/* send a greeting to verify endpoints of each connection */
|
||||
greet.fsid = cpu_to_le64(sbi->fsid);
|
||||
greet.fsid = super->hdr.fsid;
|
||||
greet.fmt_vers = cpu_to_le64(sbi->fmt_vers);
|
||||
greet.server_term = cpu_to_le64(client->server_term);
|
||||
greet.rid = cpu_to_le64(sbi->rid);
|
||||
|
||||
@@ -75,6 +75,8 @@
|
||||
EXPAND_COUNTER(data_write_begin_enobufs_retry) \
|
||||
EXPAND_COUNTER(dentry_revalidate_error) \
|
||||
EXPAND_COUNTER(dentry_revalidate_invalid) \
|
||||
EXPAND_COUNTER(dentry_revalidate_locked) \
|
||||
EXPAND_COUNTER(dentry_revalidate_orphan) \
|
||||
EXPAND_COUNTER(dentry_revalidate_rcu) \
|
||||
EXPAND_COUNTER(dentry_revalidate_root) \
|
||||
EXPAND_COUNTER(dentry_revalidate_valid) \
|
||||
@@ -187,6 +189,8 @@
|
||||
EXPAND_COUNTER(srch_search_retry_empty) \
|
||||
EXPAND_COUNTER(srch_search_sorted) \
|
||||
EXPAND_COUNTER(srch_search_sorted_block) \
|
||||
EXPAND_COUNTER(srch_search_stale_eio) \
|
||||
EXPAND_COUNTER(srch_search_stale_retry) \
|
||||
EXPAND_COUNTER(srch_search_xattrs) \
|
||||
EXPAND_COUNTER(srch_read_stale) \
|
||||
EXPAND_COUNTER(statfs) \
|
||||
|
||||
169
kmod/src/data.c
169
kmod/src/data.c
@@ -366,27 +366,27 @@ static inline u64 ext_last(struct scoutfs_extent *ext)
|
||||
|
||||
/*
|
||||
* The caller is writing to a logical iblock that doesn't have an
|
||||
* allocated extent. The caller has searched for an extent containing
|
||||
* iblock. If it already existed then it must be unallocated and
|
||||
* offline.
|
||||
* allocated extent.
|
||||
*
|
||||
* We implement two preallocation strategies. Typically we only
|
||||
* preallocate for simple streaming writes and limit preallocation while
|
||||
* the file is small. The largest efficient allocation size is
|
||||
* typically large enough that it would be unreasonable to allocate that
|
||||
* much for all small files.
|
||||
* We always allocate an extent starting at the logical iblock. The
|
||||
* caller has searched for an extent containing iblock. If it already
|
||||
* existed then it must be unallocated and offline.
|
||||
*
|
||||
* Optionally, we can simply preallocate large empty aligned regions.
|
||||
* This can waste a lot of space for small or sparse files but is
|
||||
* reasonable when a file population is known to be large and dense but
|
||||
* known to be written with non-streaming write patterns.
|
||||
* Preallocation is used if we're strictly contiguously extending
|
||||
* writes. That is, if the logical block offset equals the number of
|
||||
* online blocks. We try to preallocate the number of blocks existing
|
||||
* so that small files don't waste inordinate amounts of space and large
|
||||
* files will eventually see large extents. This only works for
|
||||
* contiguous single stream writes or stages of files from the first
|
||||
* block. It doesn't work for concurrent stages, releasing behind
|
||||
* staging, sparse files, multi-node writes, etc. fallocate() is always
|
||||
* a better tool to use.
|
||||
*/
|
||||
static int alloc_block(struct super_block *sb, struct inode *inode,
|
||||
struct scoutfs_extent *ext, u64 iblock,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
DECLARE_DATA_INFO(sb, datinf);
|
||||
struct scoutfs_mount_options opts;
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct data_ext_args args = {
|
||||
.ino = ino,
|
||||
@@ -394,22 +394,17 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
|
||||
.lock = lock,
|
||||
};
|
||||
struct scoutfs_extent found;
|
||||
struct scoutfs_extent pre = {0,};
|
||||
bool undo_pre = false;
|
||||
struct scoutfs_extent pre;
|
||||
u64 blkno = 0;
|
||||
u64 online;
|
||||
u64 offline;
|
||||
u8 flags;
|
||||
u64 start;
|
||||
u64 count;
|
||||
u64 rem;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
trace_scoutfs_data_alloc_block_enter(sb, ino, iblock, ext);
|
||||
|
||||
scoutfs_options_read(sb, &opts);
|
||||
|
||||
/* can only allocate over existing unallocated offline extent */
|
||||
if (WARN_ON_ONCE(ext->len &&
|
||||
!(iblock >= ext->start && iblock <= ext_last(ext) &&
|
||||
@@ -418,106 +413,66 @@ static int alloc_block(struct super_block *sb, struct inode *inode,
|
||||
|
||||
mutex_lock(&datinf->mutex);
|
||||
|
||||
/* default to single allocation at the written block */
|
||||
start = iblock;
|
||||
count = 1;
|
||||
/* copy existing flags for preallocated regions */
|
||||
flags = ext->len ? ext->flags : 0;
|
||||
scoutfs_inode_get_onoff(inode, &online, &offline);
|
||||
|
||||
if (ext->len) {
|
||||
/*
|
||||
* Assume that offline writers are going to be writing
|
||||
* all the offline extents and try to preallocate the
|
||||
* rest of the unwritten extent.
|
||||
*/
|
||||
/* limit preallocation to remaining existing (offline) extent */
|
||||
count = ext->len - (iblock - ext->start);
|
||||
|
||||
} else if (opts.data_prealloc_contig_only) {
|
||||
/*
|
||||
* Only preallocate when a quick test of the online
|
||||
* block counts looks like we're a simple streaming
|
||||
* write. Try to write until the next extent but limit
|
||||
* the preallocation size to the number of online
|
||||
* blocks.
|
||||
*/
|
||||
scoutfs_inode_get_onoff(inode, &online, &offline);
|
||||
if (iblock > 1 && iblock == online) {
|
||||
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
|
||||
iblock, 1, &found);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
if (found.len && found.start > iblock)
|
||||
count = found.start - iblock;
|
||||
else
|
||||
count = opts.data_prealloc_blocks;
|
||||
|
||||
count = min(iblock, count);
|
||||
}
|
||||
|
||||
flags = ext->flags;
|
||||
} else {
|
||||
/*
|
||||
* Preallocation of aligned regions only preallocates if
|
||||
* the aligned region contains no extents at all. This
|
||||
* could be fooled by offline sparse extents but we
|
||||
* don't want to iterate over all offline extents in the
|
||||
* aligned region.
|
||||
*/
|
||||
div64_u64_rem(iblock, opts.data_prealloc_blocks, &rem);
|
||||
start = iblock - rem;
|
||||
count = opts.data_prealloc_blocks;
|
||||
ret = scoutfs_ext_next(sb, &data_ext_ops, &args, start, 1, &found);
|
||||
/* otherwise alloc to next extent */
|
||||
ret = scoutfs_ext_next(sb, &data_ext_ops, &args,
|
||||
iblock, 1, &found);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
if (found.len && found.start < start + count)
|
||||
count = 1;
|
||||
if (found.len && found.start > iblock)
|
||||
count = found.start - iblock;
|
||||
else
|
||||
count = SCOUTFS_DATA_EXTEND_PREALLOC_LIMIT;
|
||||
flags = 0;
|
||||
}
|
||||
|
||||
/* overall prealloc limit */
|
||||
count = min_t(u64, count, opts.data_prealloc_blocks);
|
||||
count = min_t(u64, count, SCOUTFS_DATA_EXTEND_PREALLOC_LIMIT);
|
||||
|
||||
/* only strictly contiguous extending writes will try to preallocate */
|
||||
if (iblock > 1 && iblock == online)
|
||||
count = min(iblock, count);
|
||||
else
|
||||
count = 1;
|
||||
|
||||
ret = scoutfs_alloc_data(sb, datinf->alloc, datinf->wri,
|
||||
&datinf->dalloc, count, &blkno, &count);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* An aligned prealloc attempt that gets a smaller extent can
|
||||
* fail to cover iblock, make sure that it does. This is a
|
||||
* pathological case so we don't try to move the window past
|
||||
* iblock. Just enough to cover it, which we know is safe.
|
||||
*/
|
||||
if (start + count <= iblock)
|
||||
start += (iblock - (start + count) + 1);
|
||||
ret = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock, 1, blkno, 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (count > 1) {
|
||||
pre.start = start;
|
||||
pre.len = count;
|
||||
pre.map = blkno;
|
||||
pre.start = iblock + 1;
|
||||
pre.len = count - 1;
|
||||
pre.map = blkno + 1;
|
||||
pre.flags = flags | SEF_UNWRITTEN;
|
||||
ret = scoutfs_ext_set(sb, &data_ext_ops, &args, pre.start,
|
||||
pre.len, pre.map, pre.flags);
|
||||
if (ret < 0)
|
||||
if (ret < 0) {
|
||||
err = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock,
|
||||
1, 0, flags);
|
||||
BUG_ON(err); /* couldn't restore original */
|
||||
goto out;
|
||||
undo_pre = true;
|
||||
}
|
||||
}
|
||||
|
||||
ret = scoutfs_ext_set(sb, &data_ext_ops, &args, iblock, 1, blkno + (iblock - start), 0);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* tell the caller we have a single block, could check next? */
|
||||
ext->start = iblock;
|
||||
ext->len = 1;
|
||||
ext->map = blkno + (iblock - start);
|
||||
ext->map = blkno;
|
||||
ext->flags = 0;
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret < 0 && blkno > 0) {
|
||||
if (undo_pre) {
|
||||
err = scoutfs_ext_set(sb, &data_ext_ops, &args,
|
||||
pre.start, pre.len, 0, flags);
|
||||
BUG_ON(err); /* leaked preallocated extent */
|
||||
}
|
||||
err = scoutfs_free_data(sb, datinf->alloc, datinf->wri,
|
||||
&datinf->data_freed, blkno, count);
|
||||
BUG_ON(err); /* leaked free blocks */
|
||||
@@ -631,8 +586,8 @@ static int scoutfs_get_block_read(struct inode *inode, sector_t iblock,
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh,
|
||||
int create)
|
||||
static int scoutfs_get_block_write(struct inode *inode, sector_t iblock,
|
||||
struct buffer_head *bh, int create)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
int ret;
|
||||
@@ -1192,9 +1147,9 @@ static void truncate_inode_pages_extent(struct inode *inode, u64 start, u64 len)
|
||||
* explained above the move_blocks ioctl argument structure definition.
|
||||
*
|
||||
* The caller has processed the ioctl args and performed the most basic
|
||||
* argument sanity and inode checks, but we perform more detailed inode
|
||||
* checks once we have the inode lock and refreshed inodes. Our job is
|
||||
* to safely lock the two files and move the extents.
|
||||
* inode checks, but we perform more detailed inode checks once we have
|
||||
* the inode lock and refreshed inodes. Our job is to safely lock the
|
||||
* two files and move the extents.
|
||||
*/
|
||||
#define MOVE_DATA_EXTENTS_PER_HOLD 16
|
||||
int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
@@ -1254,15 +1209,6 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
count = (byte_len + SCOUTFS_BLOCK_SM_MASK) >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
to_iblock = to_off >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
|
||||
/* only move extent blocks inside i_size, careful not to wrap */
|
||||
from_size = i_size_read(from);
|
||||
if (from_off >= from_size) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
if (from_off + byte_len > from_size)
|
||||
count = ((from_size - from_off) + SCOUTFS_BLOCK_SM_MASK) >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
|
||||
if (S_ISDIR(from->i_mode) || S_ISDIR(to->i_mode)) {
|
||||
ret = -EISDIR;
|
||||
goto out;
|
||||
@@ -1338,8 +1284,9 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
break;
|
||||
}
|
||||
|
||||
/* done if next extent starts after moving region */
|
||||
if (ext.start >= from_iblock + count) {
|
||||
/* only move extents within count and i_size */
|
||||
if (ext.start >= from_iblock + count ||
|
||||
ext.start >= i_size_read(from)) {
|
||||
done = true;
|
||||
ret = 0;
|
||||
break;
|
||||
@@ -1347,14 +1294,12 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
|
||||
from_start = max(ext.start, from_iblock);
|
||||
map = ext.map + (from_start - ext.start);
|
||||
len = min(from_iblock + count, ext.start + ext.len) - from_start;
|
||||
to_start = to_iblock + (from_start - from_iblock);
|
||||
len = min3(from_iblock + count,
|
||||
round_up((u64)i_size_read(from),
|
||||
SCOUTFS_BLOCK_SM_SIZE),
|
||||
ext.start + ext.len) - from_start;
|
||||
|
||||
/* we'd get stuck, shouldn't happen */
|
||||
if (WARN_ON_ONCE(len == 0)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
to_start = to_iblock + (from_start - from_iblock);
|
||||
|
||||
if (is_stage) {
|
||||
ret = scoutfs_ext_next(sb, &data_ext_ops, &to_args,
|
||||
|
||||
@@ -43,9 +43,6 @@ extern const struct file_operations scoutfs_file_fops;
|
||||
struct scoutfs_alloc;
|
||||
struct scoutfs_block_writer;
|
||||
|
||||
int scoutfs_get_block_write(struct inode *inode, sector_t iblock, struct buffer_head *bh,
|
||||
int create);
|
||||
|
||||
int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
|
||||
u64 ino, u64 iblock, u64 last, bool offline,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
413
kmod/src/dir.c
413
kmod/src/dir.c
@@ -32,7 +32,6 @@
|
||||
#include "hash.h"
|
||||
#include "omap.h"
|
||||
#include "forest.h"
|
||||
#include "acl.h"
|
||||
#include "counters.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
@@ -60,6 +59,8 @@
|
||||
* All the entries have a dirent struct with the full name in their
|
||||
* value. The dirent struct contains the name hash and readdir position
|
||||
* so that any item use can reference all the items for a given entry.
|
||||
* This is important for deleting all the items given a dentry that was
|
||||
* populated by lookup.
|
||||
*/
|
||||
|
||||
static unsigned int mode_to_type(umode_t mode)
|
||||
@@ -98,12 +99,100 @@ static unsigned int dentry_type(enum scoutfs_dentry_type type)
|
||||
return DT_UNKNOWN;
|
||||
}
|
||||
|
||||
/*
|
||||
* @lock_cov: tells revalidation that the dentry is still locked and valid.
|
||||
*
|
||||
* @pos, @hash: lets us remove items on final unlink without having to
|
||||
* look them up.
|
||||
*/
|
||||
struct dentry_info {
|
||||
struct scoutfs_lock_coverage lock_cov;
|
||||
u64 hash;
|
||||
u64 pos;
|
||||
};
|
||||
|
||||
static struct kmem_cache *dentry_info_cache;
|
||||
|
||||
static void scoutfs_d_release(struct dentry *dentry)
|
||||
{
|
||||
struct super_block *sb = dentry->d_sb;
|
||||
struct dentry_info *di = dentry->d_fsdata;
|
||||
|
||||
if (di) {
|
||||
scoutfs_lock_del_coverage(sb, &di->lock_cov);
|
||||
kmem_cache_free(dentry_info_cache, di);
|
||||
dentry->d_fsdata = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int scoutfs_d_revalidate(struct dentry *dentry, unsigned int flags);
|
||||
|
||||
const struct dentry_operations scoutfs_dentry_ops = {
|
||||
static const struct dentry_operations scoutfs_dentry_ops = {
|
||||
.d_release = scoutfs_d_release,
|
||||
.d_revalidate = scoutfs_d_revalidate,
|
||||
};
|
||||
|
||||
static int alloc_dentry_info(struct dentry *dentry)
|
||||
{
|
||||
struct dentry_info *di;
|
||||
|
||||
smp_rmb();
|
||||
if (dentry->d_op == &scoutfs_dentry_ops)
|
||||
return 0;
|
||||
|
||||
di = kmem_cache_zalloc(dentry_info_cache, GFP_NOFS);
|
||||
if (!di)
|
||||
return -ENOMEM;
|
||||
|
||||
scoutfs_lock_init_coverage(&di->lock_cov);
|
||||
|
||||
spin_lock(&dentry->d_lock);
|
||||
if (!dentry->d_fsdata) {
|
||||
dentry->d_fsdata = di;
|
||||
smp_wmb();
|
||||
d_set_d_op(dentry, &scoutfs_dentry_ops);
|
||||
}
|
||||
spin_unlock(&dentry->d_lock);
|
||||
|
||||
if (di != dentry->d_fsdata)
|
||||
kmem_cache_free(dentry_info_cache, di);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void update_dentry_info(struct super_block *sb, struct dentry *dentry,
|
||||
u64 hash, u64 pos, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct dentry_info *di = dentry->d_fsdata;
|
||||
|
||||
if (WARN_ON_ONCE(di == NULL))
|
||||
return;
|
||||
|
||||
scoutfs_lock_add_coverage(sb, lock, &di->lock_cov);
|
||||
di->hash = hash;
|
||||
di->pos = pos;
|
||||
}
|
||||
|
||||
static u64 dentry_info_hash(struct dentry *dentry)
|
||||
{
|
||||
struct dentry_info *di = dentry->d_fsdata;
|
||||
|
||||
if (WARN_ON_ONCE(di == NULL))
|
||||
return 0;
|
||||
|
||||
return di->hash;
|
||||
}
|
||||
|
||||
static u64 dentry_info_pos(struct dentry *dentry)
|
||||
{
|
||||
struct dentry_info *di = dentry->d_fsdata;
|
||||
|
||||
if (WARN_ON_ONCE(di == NULL))
|
||||
return 0;
|
||||
|
||||
return di->pos;
|
||||
}
|
||||
|
||||
static void init_dirent_key(struct scoutfs_key *key, u8 type, u64 ino,
|
||||
u64 major, u64 minor)
|
||||
{
|
||||
@@ -228,105 +317,62 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int lookup_dentry_dirent(struct super_block *sb, u64 dir_ino, struct dentry *dentry,
|
||||
struct scoutfs_dirent *dent_ret,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
return lookup_dirent(sb, dir_ino, dentry->d_name.name, dentry->d_name.len,
|
||||
dirent_name_hash(dentry->d_name.name, dentry->d_name.len),
|
||||
dent_ret, lock);
|
||||
}
|
||||
|
||||
static u64 dentry_parent_ino(struct dentry *dentry)
|
||||
{
|
||||
struct dentry *parent = NULL;
|
||||
struct inode *dir;
|
||||
u64 dir_ino = 0;
|
||||
|
||||
if ((parent = dget_parent(dentry)) && (dir = parent->d_inode))
|
||||
dir_ino = scoutfs_ino(dir);
|
||||
|
||||
dput(parent);
|
||||
return dir_ino;
|
||||
}
|
||||
|
||||
/* negative dentries return 0, our root ino is non-zero (1) */
|
||||
static u64 dentry_ino(struct dentry *dentry)
|
||||
{
|
||||
return dentry->d_inode ? scoutfs_ino(dentry->d_inode) : 0;
|
||||
}
|
||||
|
||||
static void set_dentry_fsdata(struct dentry *dentry, struct scoutfs_lock *lock)
|
||||
{
|
||||
void *now = (void *)(unsigned long)lock->refresh_gen;
|
||||
void *was;
|
||||
|
||||
/* didn't want to alloc :/ */
|
||||
BUILD_BUG_ON(sizeof(dentry->d_fsdata) != sizeof(u64));
|
||||
BUILD_BUG_ON(sizeof(dentry->d_fsdata) != sizeof(long));
|
||||
|
||||
do {
|
||||
was = dentry->d_fsdata;
|
||||
} while (cmpxchg(&dentry->d_fsdata, was, now) != was);
|
||||
}
|
||||
|
||||
static bool test_dentry_fsdata(struct dentry *dentry, u64 refresh)
|
||||
{
|
||||
u64 fsd = (unsigned long)ACCESS_ONCE(dentry->d_fsdata);
|
||||
|
||||
return fsd == refresh;
|
||||
}
|
||||
|
||||
/*
|
||||
* Validate an operation caller's input dentry argument. If the fsdata
|
||||
* is valid then the underlying dirent items couldn't have changed and
|
||||
* we return 0. If fsdata is no longer protected by a lock or its
|
||||
* fields don't match then we check the dirent item. If the dirent item
|
||||
* doesn't match what the caller expected given their dentry fields then
|
||||
* we return an error.
|
||||
* Verify that the caller's dentry still precisely matches our dirent
|
||||
* items.
|
||||
*
|
||||
* The caller has a dentry that the vfs revalidated before they acquired
|
||||
* their locks. If the dentry is still covered by a lock we immediately
|
||||
* return 0. If not, we check items and return -ENOENT if a positive
|
||||
* dentry no longer matches the items or -EEXIST if a negative entry's
|
||||
* name now has an item.
|
||||
*/
|
||||
static int validate_dentry(struct super_block *sb, u64 dir_ino, struct dentry *dentry,
|
||||
struct scoutfs_lock *lock)
|
||||
static int verify_entry(struct super_block *sb, u64 dir_ino, struct dentry *dentry,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
u64 ino = dentry_ino(dentry);
|
||||
struct dentry_info *di = dentry->d_fsdata;
|
||||
struct scoutfs_dirent dent = {0,};
|
||||
const char *name;
|
||||
u64 dentry_ino;
|
||||
int name_len;
|
||||
u64 hash;
|
||||
int ret;
|
||||
|
||||
if (test_dentry_fsdata(dentry, lock->refresh_gen)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
if (scoutfs_lock_is_covered(sb, &di->lock_cov))
|
||||
return 0;
|
||||
|
||||
ret = lookup_dentry_dirent(sb, dir_ino, dentry, &dent, lock);
|
||||
dentry_ino = dentry->d_inode ? scoutfs_ino(dentry->d_inode) : 0;
|
||||
name = dentry->d_name.name;
|
||||
name_len = dentry->d_name.len;
|
||||
hash = dirent_name_hash(name, name_len);
|
||||
|
||||
ret = lookup_dirent(sb, dir_ino, name, name_len, hash, &dent, lock);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
/* use negative zeroed dent when lookup gave -ENOENT */
|
||||
if (!ino && dent.ino) {
|
||||
/* caller expected negative but there was a dirent */
|
||||
ret = -EEXIST;
|
||||
} else if (ino && !dent.ino) {
|
||||
/* caller expected positive but there was no dirent */
|
||||
ret = -ENOENT;
|
||||
} else if (ino != le64_to_cpu(dent.ino)) {
|
||||
/* name linked to different inode than caller's */
|
||||
ret = -ESTALE;
|
||||
if (dentry_ino != le64_to_cpu(dent.ino) || di->hash != le64_to_cpu(dent.hash) ||
|
||||
di->pos != le64_to_cpu(dent.pos)) {
|
||||
if (dentry_ino)
|
||||
ret = -ENOENT;
|
||||
else
|
||||
ret = -EEXIST;
|
||||
} else {
|
||||
/* dirent ino matches dentry ino */
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
out:
|
||||
trace_scoutfs_validate_dentry(sb, dentry, dir_ino, ino, le64_to_cpu(dent.ino),
|
||||
lock->refresh_gen, ret);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int scoutfs_d_revalidate(struct dentry *dentry, unsigned int flags)
|
||||
{
|
||||
struct super_block *sb = dentry->d_sb;
|
||||
u64 dir_ino = dentry_parent_ino(dentry);
|
||||
struct dentry_info *di = dentry->d_fsdata;
|
||||
struct dentry *parent = dget_parent(dentry);
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct scoutfs_dirent dent;
|
||||
bool is_covered = false;
|
||||
struct inode *dir;
|
||||
u64 dentry_ino;
|
||||
int ret;
|
||||
|
||||
/* don't think this happens but we can find out */
|
||||
@@ -348,7 +394,47 @@ static int scoutfs_d_revalidate(struct dentry *dentry, unsigned int flags)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (test_dentry_fsdata(dentry, scoutfs_lock_ino_refresh_gen(sb, dir_ino))) {
|
||||
if (WARN_ON_ONCE(di == NULL)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
is_covered = scoutfs_lock_is_covered(sb, &di->lock_cov);
|
||||
if (is_covered) {
|
||||
scoutfs_inc_counter(sb, dentry_revalidate_locked);
|
||||
ret = 1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (!parent || !parent->d_inode) {
|
||||
scoutfs_inc_counter(sb, dentry_revalidate_orphan);
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
dir = parent->d_inode;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, dir, &lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = lookup_dirent(sb, scoutfs_ino(dir),
|
||||
dentry->d_name.name, dentry->d_name.len,
|
||||
dirent_name_hash(dentry->d_name.name,
|
||||
dentry->d_name.len),
|
||||
&dent, lock);
|
||||
if (ret == -ENOENT) {
|
||||
dent.ino = 0;
|
||||
dent.hash = 0;
|
||||
dent.pos = 0;
|
||||
} else if (ret < 0) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
dentry_ino = dentry->d_inode ? scoutfs_ino(dentry->d_inode) : 0;
|
||||
|
||||
if ((dentry_ino == le64_to_cpu(dent.ino))) {
|
||||
update_dentry_info(sb, dentry, le64_to_cpu(dent.hash),
|
||||
le64_to_cpu(dent.pos), lock);
|
||||
scoutfs_inc_counter(sb, dentry_revalidate_valid);
|
||||
ret = 1;
|
||||
} else {
|
||||
@@ -357,7 +443,10 @@ static int scoutfs_d_revalidate(struct dentry *dentry, unsigned int flags)
|
||||
}
|
||||
|
||||
out:
|
||||
trace_scoutfs_d_revalidate(sb, dentry, flags, dir_ino, ret);
|
||||
trace_scoutfs_d_revalidate(sb, dentry, flags, parent, is_covered, ret);
|
||||
|
||||
dput(parent);
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
|
||||
|
||||
if (ret < 0 && ret != -ECHILD)
|
||||
scoutfs_inc_counter(sb, dentry_revalidate_error);
|
||||
@@ -394,6 +483,10 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry,
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = alloc_dentry_info(dentry);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, dir, &dir_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -407,7 +500,8 @@ static struct dentry *scoutfs_lookup(struct inode *dir, struct dentry *dentry,
|
||||
ino = le64_to_cpu(dent.ino);
|
||||
}
|
||||
if (ret == 0)
|
||||
set_dentry_fsdata(dentry, dir_lock);
|
||||
update_dentry_info(sb, dentry, le64_to_cpu(dent.hash),
|
||||
le64_to_cpu(dent.pos), dir_lock);
|
||||
|
||||
scoutfs_unlock(sb, dir_lock, SCOUTFS_LOCK_READ);
|
||||
|
||||
@@ -631,6 +725,10 @@ static struct inode *lock_hold_create(struct inode *dir, struct dentry *dentry,
|
||||
int ret = 0;
|
||||
u64 ino;
|
||||
|
||||
ret = alloc_dentry_info(dentry);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
ret = scoutfs_alloc_ino(sb, S_ISDIR(mode), &ino);
|
||||
if (ret)
|
||||
return ERR_PTR(ret);
|
||||
@@ -667,8 +765,7 @@ retry:
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = scoutfs_new_inode(sb, dir, mode, rdev, ino, *inode_lock, &inode) ?:
|
||||
scoutfs_init_acl_locked(inode, dir, *inode_lock, *dir_lock, ind_locks);
|
||||
ret = scoutfs_new_inode(sb, dir, mode, rdev, ino, *inode_lock, &inode);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -719,7 +816,7 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
return PTR_ERR(inode);
|
||||
si = SCOUTFS_I(inode);
|
||||
|
||||
ret = validate_dentry(sb, scoutfs_ino(dir), dentry, dir_lock);
|
||||
ret = verify_entry(sb, scoutfs_ino(dir), dentry, dir_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -732,7 +829,7 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
set_dentry_fsdata(dentry, dir_lock);
|
||||
update_dentry_info(sb, dentry, hash, pos, dir_lock);
|
||||
|
||||
i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
@@ -806,15 +903,19 @@ static int scoutfs_link(struct dentry *old_dentry,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = validate_dentry(sb, scoutfs_ino(dir), dentry, dir_lock);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
|
||||
if (inode->i_nlink >= SCOUTFS_LINK_MAX) {
|
||||
ret = -EMLINK;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
ret = alloc_dentry_info(dentry);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
ret = verify_entry(sb, scoutfs_ino(dir), dentry, dir_lock);
|
||||
if (ret < 0)
|
||||
goto out_unlock;
|
||||
|
||||
dir_size = i_size_read(dir) + dentry->d_name.len;
|
||||
|
||||
if (inode->i_nlink == 0) {
|
||||
@@ -840,7 +941,7 @@ retry:
|
||||
goto out;
|
||||
|
||||
if (del_orphan) {
|
||||
ret = scoutfs_inode_orphan_delete(sb, scoutfs_ino(inode), orph_lock, inode_lock);
|
||||
ret = scoutfs_inode_orphan_delete(sb, scoutfs_ino(inode), orph_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@@ -852,11 +953,11 @@ retry:
|
||||
scoutfs_ino(inode), inode->i_mode, dir_lock,
|
||||
inode_lock);
|
||||
if (ret) {
|
||||
err = scoutfs_inode_orphan_create(sb, scoutfs_ino(inode), orph_lock, inode_lock);
|
||||
err = scoutfs_inode_orphan_create(sb, scoutfs_ino(inode), orph_lock);
|
||||
WARN_ON_ONCE(err); /* no orphan, might not scan and delete after crash */
|
||||
goto out;
|
||||
}
|
||||
set_dentry_fsdata(dentry, dir_lock);
|
||||
update_dentry_info(sb, dentry, hash, pos, dir_lock);
|
||||
|
||||
i_size_write(dir, dir_size);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
@@ -904,11 +1005,9 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
struct scoutfs_lock *inode_lock = NULL;
|
||||
struct scoutfs_lock *orph_lock = NULL;
|
||||
struct scoutfs_lock *dir_lock = NULL;
|
||||
struct scoutfs_dirent dent;
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 ind_seq;
|
||||
u64 hash;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
ret = scoutfs_lock_inodes(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE,
|
||||
@@ -917,7 +1016,11 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = validate_dentry(sb, scoutfs_ino(dir), dentry, dir_lock);
|
||||
ret = alloc_dentry_info(dentry);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
ret = verify_entry(sb, scoutfs_ino(dir), dentry, dir_lock);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
|
||||
@@ -926,13 +1029,6 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
hash = dirent_name_hash(dentry->d_name.name, dentry->d_name.len);
|
||||
|
||||
ret = lookup_dirent(sb, scoutfs_ino(dir), dentry->d_name.name, dentry->d_name.len, hash,
|
||||
&dent, dir_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (should_orphan(inode)) {
|
||||
ret = scoutfs_lock_orphan(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, scoutfs_ino(inode),
|
||||
&orph_lock);
|
||||
@@ -951,20 +1047,21 @@ retry:
|
||||
goto unlock;
|
||||
|
||||
if (should_orphan(inode)) {
|
||||
ret = scoutfs_inode_orphan_create(sb, scoutfs_ino(inode), orph_lock, inode_lock);
|
||||
ret = scoutfs_inode_orphan_create(sb, scoutfs_ino(inode), orph_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = del_entry_items(sb, scoutfs_ino(dir), le64_to_cpu(dent.hash), le64_to_cpu(dent.pos),
|
||||
scoutfs_ino(inode), dir_lock, inode_lock);
|
||||
ret = del_entry_items(sb, scoutfs_ino(dir), dentry_info_hash(dentry),
|
||||
dentry_info_pos(dentry), scoutfs_ino(inode),
|
||||
dir_lock, inode_lock);
|
||||
if (ret) {
|
||||
ret = scoutfs_inode_orphan_delete(sb, scoutfs_ino(inode), orph_lock, inode_lock);
|
||||
ret = scoutfs_inode_orphan_delete(sb, scoutfs_ino(inode), orph_lock);
|
||||
WARN_ON_ONCE(ret); /* should have been dirty */
|
||||
goto out;
|
||||
}
|
||||
|
||||
set_dentry_fsdata(dentry, dir_lock);
|
||||
update_dentry_info(sb, dentry, 0, 0, dir_lock);
|
||||
|
||||
dir->i_ctime = ts;
|
||||
dir->i_mtime = ts;
|
||||
@@ -1145,11 +1242,10 @@ const struct inode_operations scoutfs_symlink_iops = {
|
||||
.put_link = scoutfs_put_link,
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.setxattr = scoutfs_setxattr,
|
||||
.getxattr = scoutfs_getxattr,
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
.removexattr = scoutfs_removexattr,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -1177,13 +1273,17 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
name_len > PATH_MAX || name_len > SCOUTFS_SYMLINK_MAX_SIZE)
|
||||
return -ENAMETOOLONG;
|
||||
|
||||
ret = alloc_dentry_info(dentry);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
inode = lock_hold_create(dir, dentry, S_IFLNK|S_IRWXUGO, 0,
|
||||
&dir_lock, &inode_lock, NULL, &ind_locks);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
si = SCOUTFS_I(inode);
|
||||
|
||||
ret = validate_dentry(sb, scoutfs_ino(dir), dentry, dir_lock);
|
||||
ret = verify_entry(sb, scoutfs_ino(dir), dentry, dir_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -1201,7 +1301,7 @@ static int scoutfs_symlink(struct inode *dir, struct dentry *dentry,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
set_dentry_fsdata(dentry, dir_lock);
|
||||
update_dentry_info(sb, dentry, hash, pos, dir_lock);
|
||||
|
||||
i_size_write(dir, i_size_read(dir) + dentry->d_name.len);
|
||||
dir->i_mtime = dir->i_ctime = CURRENT_TIME;
|
||||
@@ -1531,8 +1631,6 @@ static int scoutfs_rename_common(struct inode *old_dir,
|
||||
struct scoutfs_lock *old_inode_lock = NULL;
|
||||
struct scoutfs_lock *new_inode_lock = NULL;
|
||||
struct scoutfs_lock *orph_lock = NULL;
|
||||
struct scoutfs_dirent new_dent;
|
||||
struct scoutfs_dirent old_dent;
|
||||
struct timespec now;
|
||||
bool ins_new = false;
|
||||
bool del_new = false;
|
||||
@@ -1580,18 +1678,19 @@ static int scoutfs_rename_common(struct inode *old_dir,
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
/* make sure that the entries assumed by the argument still exist */
|
||||
ret = validate_dentry(sb, scoutfs_ino(old_dir), old_dentry, old_dir_lock) ?:
|
||||
validate_dentry(sb, scoutfs_ino(new_dir), new_dentry, new_dir_lock);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
/* test dir i_size now that it's refreshed */
|
||||
if (new_inode && S_ISDIR(new_inode->i_mode) && i_size_read(new_inode)) {
|
||||
ret = -ENOTEMPTY;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
/* make sure that the entries assumed by the argument still exist */
|
||||
ret = alloc_dentry_info(old_dentry) ?:
|
||||
alloc_dentry_info(new_dentry) ?:
|
||||
verify_entry(sb, scoutfs_ino(old_dir), old_dentry, old_dir_lock) ?:
|
||||
verify_entry(sb, scoutfs_ino(new_dir), new_dentry, new_dir_lock);
|
||||
if (ret)
|
||||
goto out_unlock;
|
||||
|
||||
if ((flags & RENAME_NOREPLACE) && (new_inode != NULL)) {
|
||||
ret = -EEXIST;
|
||||
@@ -1634,12 +1733,10 @@ retry:
|
||||
|
||||
/* remove the new entry if it exists */
|
||||
if (new_inode) {
|
||||
ret = lookup_dirent(sb, scoutfs_ino(new_dir), new_dentry->d_name.name,
|
||||
new_dentry->d_name.len, new_hash, &new_dent, new_dir_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = del_entry_items(sb, scoutfs_ino(new_dir), le64_to_cpu(new_dent.hash),
|
||||
le64_to_cpu(new_dent.pos), scoutfs_ino(new_inode),
|
||||
ret = del_entry_items(sb, scoutfs_ino(new_dir),
|
||||
dentry_info_hash(new_dentry),
|
||||
dentry_info_pos(new_dentry),
|
||||
scoutfs_ino(new_inode),
|
||||
new_dir_lock, new_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
@@ -1655,22 +1752,18 @@ retry:
|
||||
goto out;
|
||||
del_new = true;
|
||||
|
||||
ret = lookup_dirent(sb, scoutfs_ino(old_dir), old_dentry->d_name.name,
|
||||
old_dentry->d_name.len, old_hash, &old_dent, old_dir_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* remove the old entry */
|
||||
ret = del_entry_items(sb, scoutfs_ino(old_dir), le64_to_cpu(old_dent.hash),
|
||||
le64_to_cpu(old_dent.pos), scoutfs_ino(old_inode),
|
||||
ret = del_entry_items(sb, scoutfs_ino(old_dir),
|
||||
dentry_info_hash(old_dentry),
|
||||
dentry_info_pos(old_dentry),
|
||||
scoutfs_ino(old_inode),
|
||||
old_dir_lock, old_inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
ins_old = true;
|
||||
|
||||
if (should_orphan(new_inode)) {
|
||||
ret = scoutfs_inode_orphan_create(sb, scoutfs_ino(new_inode), orph_lock,
|
||||
new_inode_lock);
|
||||
ret = scoutfs_inode_orphan_create(sb, scoutfs_ino(new_inode), orph_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
@@ -1678,7 +1771,7 @@ retry:
|
||||
/* won't fail from here on out, update all the vfs structs */
|
||||
|
||||
/* the caller will use d_move to move the old_dentry into place */
|
||||
set_dentry_fsdata(old_dentry, new_dir_lock);
|
||||
update_dentry_info(sb, old_dentry, new_hash, new_pos, new_dir_lock);
|
||||
|
||||
i_size_write(old_dir, i_size_read(old_dir) - old_dentry->d_name.len);
|
||||
if (!new_inode)
|
||||
@@ -1743,8 +1836,8 @@ out:
|
||||
err = 0;
|
||||
if (ins_old)
|
||||
err = add_entry_items(sb, scoutfs_ino(old_dir),
|
||||
le64_to_cpu(old_dent.hash),
|
||||
le64_to_cpu(old_dent.pos),
|
||||
dentry_info_hash(old_dentry),
|
||||
dentry_info_pos(old_dentry),
|
||||
old_dentry->d_name.name,
|
||||
old_dentry->d_name.len,
|
||||
scoutfs_ino(old_inode),
|
||||
@@ -1760,8 +1853,8 @@ out:
|
||||
|
||||
if (ins_new && err == 0)
|
||||
err = add_entry_items(sb, scoutfs_ino(new_dir),
|
||||
le64_to_cpu(new_dent.hash),
|
||||
le64_to_cpu(new_dent.pos),
|
||||
dentry_info_hash(new_dentry),
|
||||
dentry_info_pos(new_dentry),
|
||||
new_dentry->d_name.name,
|
||||
new_dentry->d_name.len,
|
||||
scoutfs_ino(new_inode),
|
||||
@@ -1832,7 +1925,7 @@ static int scoutfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mod
|
||||
return PTR_ERR(inode);
|
||||
si = SCOUTFS_I(inode);
|
||||
|
||||
ret = scoutfs_inode_orphan_create(sb, scoutfs_ino(inode), orph_lock, inode_lock);
|
||||
ret = scoutfs_inode_orphan_create(sb, scoutfs_ino(inode), orph_lock);
|
||||
if (ret < 0)
|
||||
goto out; /* XXX returning error but items created */
|
||||
|
||||
@@ -1885,14 +1978,32 @@ const struct inode_operations_wrapper scoutfs_dir_iops = {
|
||||
.rename = scoutfs_rename,
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.setxattr = scoutfs_setxattr,
|
||||
.getxattr = scoutfs_getxattr,
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
.removexattr = scoutfs_removexattr,
|
||||
.symlink = scoutfs_symlink,
|
||||
.permission = scoutfs_permission,
|
||||
},
|
||||
.tmpfile = scoutfs_tmpfile,
|
||||
.rename2 = scoutfs_rename2,
|
||||
};
|
||||
|
||||
void scoutfs_dir_exit(void)
|
||||
{
|
||||
if (dentry_info_cache) {
|
||||
kmem_cache_destroy(dentry_info_cache);
|
||||
dentry_info_cache = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
int scoutfs_dir_init(void)
|
||||
{
|
||||
dentry_info_cache = kmem_cache_create("scoutfs_dentry_info",
|
||||
sizeof(struct dentry_info), 0,
|
||||
SLAB_RECLAIM_ACCOUNT, NULL);
|
||||
if (!dentry_info_cache)
|
||||
return -ENOMEM;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -8,8 +8,6 @@ extern const struct file_operations scoutfs_dir_fops;
|
||||
extern const struct inode_operations_wrapper scoutfs_dir_iops;
|
||||
extern const struct inode_operations scoutfs_symlink_iops;
|
||||
|
||||
extern const struct dentry_operations scoutfs_dentry_ops;
|
||||
|
||||
struct scoutfs_link_backref_entry {
|
||||
struct list_head head;
|
||||
u64 dir_ino;
|
||||
@@ -31,4 +29,7 @@ int scoutfs_dir_add_next_linkref(struct super_block *sb, u64 ino,
|
||||
int scoutfs_symlink_drop(struct super_block *sb, u64 ino,
|
||||
struct scoutfs_lock *lock, u64 i_size);
|
||||
|
||||
int scoutfs_dir_init(void);
|
||||
void scoutfs_dir_exit(void);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -78,6 +78,11 @@ struct forest_refs {
|
||||
struct scoutfs_block_ref logs_ref;
|
||||
};
|
||||
|
||||
/* initialize some refs that initially aren't equal */
|
||||
#define DECLARE_STALE_TRACKING_SUPER_REFS(a, b) \
|
||||
struct forest_refs a = {{cpu_to_le64(0),}}; \
|
||||
struct forest_refs b = {{cpu_to_le64(1),}}
|
||||
|
||||
struct forest_bloom_nrs {
|
||||
unsigned int nrs[SCOUTFS_FOREST_BLOOM_NRS];
|
||||
};
|
||||
@@ -131,11 +136,11 @@ static struct scoutfs_block *read_bloom_ref(struct super_block *sb, struct scout
|
||||
int scoutfs_forest_next_hint(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *next)
|
||||
{
|
||||
DECLARE_STALE_TRACKING_SUPER_REFS(prev_refs, refs);
|
||||
struct scoutfs_net_roots roots;
|
||||
struct scoutfs_btree_root item_root;
|
||||
struct scoutfs_log_trees *lt;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
struct scoutfs_key found;
|
||||
struct scoutfs_key ltk;
|
||||
bool checked_fs;
|
||||
@@ -150,6 +155,8 @@ retry:
|
||||
goto out;
|
||||
|
||||
trace_scoutfs_forest_using_roots(sb, &roots.fs_root, &roots.logs_root);
|
||||
refs.fs_ref = roots.fs_root.ref;
|
||||
refs.logs_ref = roots.logs_root.ref;
|
||||
|
||||
scoutfs_key_init_log_trees(<k, 0, 0);
|
||||
checked_fs = false;
|
||||
@@ -205,10 +212,14 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
|
||||
if (ret == -ESTALE)
|
||||
if (ret == -ESTALE) {
|
||||
if (memcmp(&prev_refs, &refs, sizeof(refs)) == 0)
|
||||
return -EIO;
|
||||
prev_refs = refs;
|
||||
goto retry;
|
||||
}
|
||||
out:
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -530,8 +541,9 @@ void scoutfs_forest_dec_inode_count(struct super_block *sb)
|
||||
|
||||
/*
|
||||
* Return the total inode count from the super block and all the
|
||||
* log_btrees it references. ESTALE from read blocks is returned to the
|
||||
* caller who is expected to retry or return hard errors.
|
||||
* log_btrees it references. This assumes it's working with a block
|
||||
* reference hierarchy that should be fully consistent. If we see
|
||||
* ESTALE we've hit persistent corruption.
|
||||
*/
|
||||
int scoutfs_forest_inode_count(struct super_block *sb, struct scoutfs_super_block *super,
|
||||
u64 *inode_count)
|
||||
@@ -560,6 +572,8 @@ int scoutfs_forest_inode_count(struct super_block *sb, struct scoutfs_super_bloc
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
else if (ret == -ESTALE)
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
196
kmod/src/inode.c
196
kmod/src/inode.c
@@ -19,8 +19,6 @@
|
||||
#include <linux/pagemap.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/list_sort.h>
|
||||
#include <linux/workqueue.h>
|
||||
#include <linux/buffer_head.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "super.h"
|
||||
@@ -38,7 +36,6 @@
|
||||
#include "omap.h"
|
||||
#include "forest.h"
|
||||
#include "btree.h"
|
||||
#include "acl.h"
|
||||
|
||||
/*
|
||||
* XXX
|
||||
@@ -69,10 +66,8 @@ struct inode_sb_info {
|
||||
|
||||
struct delayed_work orphan_scan_dwork;
|
||||
|
||||
struct workqueue_struct *iput_workq;
|
||||
struct work_struct iput_work;
|
||||
spinlock_t iput_lock;
|
||||
struct list_head iput_list;
|
||||
struct llist_head iput_llist;
|
||||
};
|
||||
|
||||
#define DECLARE_INODE_SB_INFO(sb, name) \
|
||||
@@ -99,9 +94,7 @@ static void scoutfs_inode_ctor(void *obj)
|
||||
init_rwsem(&si->xattr_rwsem);
|
||||
INIT_LIST_HEAD(&si->writeback_entry);
|
||||
scoutfs_lock_init_coverage(&si->ino_lock_cov);
|
||||
INIT_LIST_HEAD(&si->iput_head);
|
||||
si->iput_count = 0;
|
||||
si->iput_flags = 0;
|
||||
atomic_set(&si->iput_count, 0);
|
||||
|
||||
inode_init_once(&si->inode);
|
||||
}
|
||||
@@ -143,22 +136,20 @@ void scoutfs_destroy_inode(struct inode *inode)
|
||||
static const struct inode_operations scoutfs_file_iops = {
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.setxattr = scoutfs_setxattr,
|
||||
.getxattr = scoutfs_getxattr,
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
.removexattr = scoutfs_removexattr,
|
||||
.fiemap = scoutfs_data_fiemap,
|
||||
};
|
||||
|
||||
static const struct inode_operations scoutfs_special_iops = {
|
||||
.getattr = scoutfs_getattr,
|
||||
.setattr = scoutfs_setattr,
|
||||
.setxattr = generic_setxattr,
|
||||
.getxattr = generic_getxattr,
|
||||
.setxattr = scoutfs_setxattr,
|
||||
.getxattr = scoutfs_getxattr,
|
||||
.listxattr = scoutfs_listxattr,
|
||||
.removexattr = generic_removexattr,
|
||||
.get_acl = scoutfs_get_acl,
|
||||
.removexattr = scoutfs_removexattr,
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -331,6 +322,7 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock)
|
||||
load_inode(inode, &sinode);
|
||||
atomic64_set(&si->last_refreshed, refresh_gen);
|
||||
scoutfs_lock_add_coverage(sb, lock, &si->ino_lock_cov);
|
||||
si->drop_invalidated = false;
|
||||
}
|
||||
} else {
|
||||
ret = 0;
|
||||
@@ -362,7 +354,6 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
SCOUTFS_DECLARE_PER_TASK_ENTRY(pt_ent);
|
||||
LIST_HEAD(ind_locks);
|
||||
int ret;
|
||||
|
||||
@@ -373,13 +364,6 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
scoutfs_per_task_add(&si->pt_data_lock, &pt_ent, lock);
|
||||
ret = block_truncate_page(inode->i_mapping, new_size, scoutfs_get_block_write);
|
||||
scoutfs_per_task_del(&si->pt_data_lock, &pt_ent);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
scoutfs_inode_queue_writeback(inode);
|
||||
|
||||
if (new_size != i_size_read(inode))
|
||||
scoutfs_inode_inc_data_version(inode);
|
||||
|
||||
@@ -391,7 +375,6 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
|
||||
inode_inc_iversion(inode);
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
|
||||
unlock:
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
|
||||
@@ -524,15 +507,10 @@ retry:
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_acl_chmod_locked(inode, attr, lock, &ind_locks);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
|
||||
setattr_copy(inode, attr);
|
||||
inode_inc_iversion(inode);
|
||||
scoutfs_update_inode_item(inode, lock, &ind_locks);
|
||||
|
||||
release:
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
out:
|
||||
@@ -969,8 +947,7 @@ void scoutfs_inode_init_index_key(struct scoutfs_key *key, u8 type, u64 major,
|
||||
static int update_index_items(struct super_block *sb,
|
||||
struct scoutfs_inode_info *si, u64 ino, u8 type,
|
||||
u64 major, u32 minor,
|
||||
struct list_head *lock_list,
|
||||
struct scoutfs_lock *primary)
|
||||
struct list_head *lock_list)
|
||||
{
|
||||
struct scoutfs_lock *ins_lock;
|
||||
struct scoutfs_lock *del_lock;
|
||||
@@ -987,7 +964,7 @@ static int update_index_items(struct super_block *sb,
|
||||
scoutfs_inode_init_index_key(&ins, type, major, minor, ino);
|
||||
|
||||
ins_lock = find_index_lock(lock_list, type, major, minor, ino);
|
||||
ret = scoutfs_item_create_force(sb, &ins, NULL, 0, ins_lock, primary);
|
||||
ret = scoutfs_item_create_force(sb, &ins, NULL, 0, ins_lock);
|
||||
if (ret || !will_del_index(si, type, major, minor))
|
||||
return ret;
|
||||
|
||||
@@ -999,7 +976,7 @@ static int update_index_items(struct super_block *sb,
|
||||
|
||||
del_lock = find_index_lock(lock_list, type, get_item_major(si, type),
|
||||
get_item_minor(si, type), ino);
|
||||
ret = scoutfs_item_delete_force(sb, &del, del_lock, primary);
|
||||
ret = scoutfs_item_delete_force(sb, &del, del_lock);
|
||||
if (ret) {
|
||||
err = scoutfs_item_delete(sb, &ins, ins_lock);
|
||||
BUG_ON(err);
|
||||
@@ -1011,8 +988,7 @@ static int update_index_items(struct super_block *sb,
|
||||
static int update_indices(struct super_block *sb,
|
||||
struct scoutfs_inode_info *si, u64 ino, umode_t mode,
|
||||
struct scoutfs_inode *sinode,
|
||||
struct list_head *lock_list,
|
||||
struct scoutfs_lock *primary)
|
||||
struct list_head *lock_list)
|
||||
{
|
||||
struct index_update {
|
||||
u8 type;
|
||||
@@ -1032,7 +1008,7 @@ static int update_indices(struct super_block *sb,
|
||||
continue;
|
||||
|
||||
ret = update_index_items(sb, si, ino, upd->type, upd->major,
|
||||
upd->minor, lock_list, primary);
|
||||
upd->minor, lock_list);
|
||||
if (ret)
|
||||
break;
|
||||
}
|
||||
@@ -1072,7 +1048,7 @@ void scoutfs_update_inode_item(struct inode *inode, struct scoutfs_lock *lock,
|
||||
/* only race with other inode field stores once */
|
||||
store_inode(&sinode, inode);
|
||||
|
||||
ret = update_indices(sb, si, ino, inode->i_mode, &sinode, lock_list, lock);
|
||||
ret = update_indices(sb, si, ino, inode->i_mode, &sinode, lock_list);
|
||||
BUG_ON(ret);
|
||||
|
||||
scoutfs_inode_init_key(&key, ino);
|
||||
@@ -1341,7 +1317,7 @@ void scoutfs_inode_index_unlock(struct super_block *sb, struct list_head *list)
|
||||
|
||||
/* this is called on final inode cleanup so enoent is fine */
|
||||
static int remove_index(struct super_block *sb, u64 ino, u8 type, u64 major,
|
||||
u32 minor, struct list_head *ind_locks, struct scoutfs_lock *primary)
|
||||
u32 minor, struct list_head *ind_locks)
|
||||
{
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_lock *lock;
|
||||
@@ -1350,7 +1326,7 @@ static int remove_index(struct super_block *sb, u64 ino, u8 type, u64 major,
|
||||
scoutfs_inode_init_index_key(&key, type, major, minor, ino);
|
||||
|
||||
lock = find_index_lock(ind_locks, type, major, minor, ino);
|
||||
ret = scoutfs_item_delete_force(sb, &key, lock, primary);
|
||||
ret = scoutfs_item_delete_force(sb, &key, lock);
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
return ret;
|
||||
@@ -1367,17 +1343,16 @@ static int remove_index(struct super_block *sb, u64 ino, u8 type, u64 major,
|
||||
*/
|
||||
static int remove_index_items(struct super_block *sb, u64 ino,
|
||||
struct scoutfs_inode *sinode,
|
||||
struct list_head *ind_locks,
|
||||
struct scoutfs_lock *primary)
|
||||
struct list_head *ind_locks)
|
||||
{
|
||||
umode_t mode = le32_to_cpu(sinode->mode);
|
||||
int ret;
|
||||
|
||||
ret = remove_index(sb, ino, SCOUTFS_INODE_INDEX_META_SEQ_TYPE,
|
||||
le64_to_cpu(sinode->meta_seq), 0, ind_locks, primary);
|
||||
le64_to_cpu(sinode->meta_seq), 0, ind_locks);
|
||||
if (ret == 0 && S_ISREG(mode))
|
||||
ret = remove_index(sb, ino, SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE,
|
||||
le64_to_cpu(sinode->data_seq), 0, ind_locks, primary);
|
||||
le64_to_cpu(sinode->data_seq), 0, ind_locks);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1467,6 +1442,7 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
|
||||
si->have_item = false;
|
||||
atomic64_set(&si->last_refreshed, lock->refresh_gen);
|
||||
scoutfs_lock_add_coverage(sb, lock, &si->ino_lock_cov);
|
||||
si->drop_invalidated = false;
|
||||
si->flags = 0;
|
||||
|
||||
scoutfs_inode_set_meta_seq(inode);
|
||||
@@ -1509,24 +1485,22 @@ static void init_orphan_key(struct scoutfs_key *key, u64 ino)
|
||||
* zone under a write only lock while the caller has the inode protected
|
||||
* by a write lock.
|
||||
*/
|
||||
int scoutfs_inode_orphan_create(struct super_block *sb, u64 ino, struct scoutfs_lock *lock,
|
||||
struct scoutfs_lock *primary)
|
||||
int scoutfs_inode_orphan_create(struct super_block *sb, u64 ino, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_key key;
|
||||
|
||||
init_orphan_key(&key, ino);
|
||||
|
||||
return scoutfs_item_create_force(sb, &key, NULL, 0, lock, primary);
|
||||
return scoutfs_item_create_force(sb, &key, NULL, 0, lock);
|
||||
}
|
||||
|
||||
int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_lock *lock,
|
||||
struct scoutfs_lock *primary)
|
||||
int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_key key;
|
||||
|
||||
init_orphan_key(&key, ino);
|
||||
|
||||
return scoutfs_item_delete_force(sb, &key, lock, primary);
|
||||
return scoutfs_item_delete_force(sb, &key, lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1579,7 +1553,7 @@ retry:
|
||||
|
||||
release = true;
|
||||
|
||||
ret = remove_index_items(sb, ino, sinode, &ind_locks, lock);
|
||||
ret = remove_index_items(sb, ino, sinode, &ind_locks);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -1594,7 +1568,7 @@ retry:
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_inode_orphan_delete(sb, ino, orph_lock, lock);
|
||||
ret = scoutfs_inode_orphan_delete(sb, ino, orph_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -1711,7 +1685,6 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
bool clear_trying = false;
|
||||
u64 group_nr;
|
||||
int bit_nr;
|
||||
int ret;
|
||||
@@ -1731,7 +1704,6 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
clear_trying = true;
|
||||
|
||||
/* can't delete if it's cached in local or remote mounts */
|
||||
if (scoutfs_omap_test(sb, ino) || test_bit_le(bit_nr, ldata->map.bits)) {
|
||||
@@ -1758,7 +1730,7 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
|
||||
|
||||
ret = delete_inode_items(sb, ino, &sinode, lock, orph_lock);
|
||||
out:
|
||||
if (clear_trying)
|
||||
if (ldata)
|
||||
clear_bit(bit_nr, ldata->trying);
|
||||
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
@@ -1768,18 +1740,18 @@ out:
|
||||
}
|
||||
|
||||
/*
|
||||
* As we evicted an inode we need to decide to try and delete its items
|
||||
* or not, which is expensive. We only try when we have lock coverage
|
||||
* and the inode has been unlinked. This catches the common case of
|
||||
* regular deletion so deletion will be performed in the final unlink
|
||||
* task. It also catches open-unlink or o_tmpfile that aren't cached on
|
||||
* other nodes.
|
||||
* As we drop an inode we need to decide to try and delete its items or
|
||||
* not, which is expensive. The two common cases we want to get right
|
||||
* both have cluster lock coverage and don't want to delete. Dropping
|
||||
* unused inodes during read lock invalidation has the current lock and
|
||||
* sees a nonzero nlink and knows not to delete. Final iput after a
|
||||
* local unlink also has a lock, sees a zero nlink, and tries to perform
|
||||
* item deletion in the task that dropped the last link, as users
|
||||
* expect.
|
||||
*
|
||||
* Inodes being evicted outside of lock coverage, by referenced dentries
|
||||
* or inodes that survived the attempt to drop them as their lock was
|
||||
* invalidated, will not try to delete. This means that cross-mount
|
||||
* open/unlink will almost certainly fall back to the orphan scanner to
|
||||
* perform final deletion.
|
||||
* Evicting an inode outside of cluster locking is the odd slow path
|
||||
* that involves lock contention during use the worst cross-mount
|
||||
* open-unlink/delete case.
|
||||
*/
|
||||
void scoutfs_evict_inode(struct inode *inode)
|
||||
{
|
||||
@@ -1795,7 +1767,7 @@ void scoutfs_evict_inode(struct inode *inode)
|
||||
/* clear before trying to delete tests */
|
||||
scoutfs_omap_clear(sb, ino);
|
||||
|
||||
if (scoutfs_lock_is_covered(sb, &si->ino_lock_cov) && inode->i_nlink == 0)
|
||||
if (!scoutfs_lock_is_covered(sb, &si->ino_lock_cov) || inode->i_nlink == 0)
|
||||
try_delete_inode_items(sb, scoutfs_ino(inode));
|
||||
}
|
||||
|
||||
@@ -1820,56 +1792,30 @@ int scoutfs_drop_inode(struct inode *inode)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
const bool covered = scoutfs_lock_is_covered(sb, &si->ino_lock_cov);
|
||||
|
||||
trace_scoutfs_drop_inode(sb, scoutfs_ino(inode), inode->i_nlink, inode_unhashed(inode),
|
||||
covered);
|
||||
si->drop_invalidated);
|
||||
|
||||
return !covered || generic_drop_inode(inode);
|
||||
return si->drop_invalidated || !scoutfs_lock_is_covered(sb, &si->ino_lock_cov) ||
|
||||
generic_drop_inode(inode);
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* These iput workers can be concurrent amongst cpus. This lets us get
|
||||
* some concurrency when these async final iputs end up performing very
|
||||
* expensive inode deletion. Typically they're dropping linked inodes
|
||||
* that lost lock coverage and the iput will evict without deleting.
|
||||
*
|
||||
* Keep in mind that the dputs in d_prune can ascend into parents and
|
||||
* end up performing the final iput->evict deletion on other inodes.
|
||||
*/
|
||||
static void iput_worker(struct work_struct *work)
|
||||
{
|
||||
struct inode_sb_info *inf = container_of(work, struct inode_sb_info, iput_work);
|
||||
struct scoutfs_inode_info *si;
|
||||
struct inode *inode;
|
||||
unsigned long count;
|
||||
unsigned long flags;
|
||||
struct scoutfs_inode_info *tmp;
|
||||
struct llist_node *inodes;
|
||||
bool more;
|
||||
|
||||
spin_lock(&inf->iput_lock);
|
||||
while ((si = list_first_entry_or_null(&inf->iput_list, struct scoutfs_inode_info,
|
||||
iput_head))) {
|
||||
list_del_init(&si->iput_head);
|
||||
count = si->iput_count;
|
||||
flags = si->iput_flags;
|
||||
si->iput_count = 0;
|
||||
si->iput_flags = 0;
|
||||
spin_unlock(&inf->iput_lock);
|
||||
inodes = llist_del_all(&inf->iput_llist);
|
||||
|
||||
inode = &si->inode;
|
||||
|
||||
/* can't touch during unmount, dcache destroys w/o locks */
|
||||
if ((flags & SI_IPUT_FLAG_PRUNE) && !inf->stopped)
|
||||
d_prune_aliases(inode);
|
||||
|
||||
while (count-- > 0)
|
||||
iput(inode);
|
||||
|
||||
/* can't touch inode after final iput */
|
||||
|
||||
spin_lock(&inf->iput_lock);
|
||||
llist_for_each_entry_safe(si, tmp, inodes, iput_llnode) {
|
||||
do {
|
||||
more = atomic_dec_return(&si->iput_count) > 0;
|
||||
iput(&si->inode);
|
||||
} while (more);
|
||||
}
|
||||
spin_unlock(&inf->iput_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1886,21 +1832,15 @@ static void iput_worker(struct work_struct *work)
|
||||
* Nothing stops multiple puts of an inode before the work runs so we
|
||||
* can track multiple puts in flight.
|
||||
*/
|
||||
void scoutfs_inode_queue_iput(struct inode *inode, unsigned long flags)
|
||||
void scoutfs_inode_queue_iput(struct inode *inode)
|
||||
{
|
||||
DECLARE_INODE_SB_INFO(inode->i_sb, inf);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
bool should_queue;
|
||||
|
||||
spin_lock(&inf->iput_lock);
|
||||
si->iput_count++;
|
||||
si->iput_flags |= flags;
|
||||
if ((should_queue = list_empty(&si->iput_head)))
|
||||
list_add_tail(&si->iput_head, &inf->iput_list);
|
||||
spin_unlock(&inf->iput_lock);
|
||||
|
||||
if (should_queue)
|
||||
queue_work(inf->iput_workq, &inf->iput_work);
|
||||
if (atomic_inc_return(&si->iput_count) == 1)
|
||||
llist_add(&si->iput_llnode, &inf->iput_llist);
|
||||
smp_wmb(); /* count and list visible before work executes */
|
||||
schedule_work(&inf->iput_work);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -2104,7 +2044,7 @@ int scoutfs_inode_walk_writeback(struct super_block *sb, bool write)
|
||||
trace_scoutfs_inode_walk_writeback(sb, scoutfs_ino(inode),
|
||||
write, ret);
|
||||
if (ret) {
|
||||
scoutfs_inode_queue_iput(inode, 0);
|
||||
scoutfs_inode_queue_iput(inode);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -2120,7 +2060,7 @@ int scoutfs_inode_walk_writeback(struct super_block *sb, bool write)
|
||||
if (!write)
|
||||
list_del_init(&si->writeback_entry);
|
||||
|
||||
scoutfs_inode_queue_iput(inode, 0);
|
||||
scoutfs_inode_queue_iput(inode);
|
||||
}
|
||||
|
||||
spin_unlock(&inf->writeback_lock);
|
||||
@@ -2145,15 +2085,7 @@ int scoutfs_inode_setup(struct super_block *sb)
|
||||
spin_lock_init(&inf->ino_alloc.lock);
|
||||
INIT_DELAYED_WORK(&inf->orphan_scan_dwork, inode_orphan_scan_worker);
|
||||
INIT_WORK(&inf->iput_work, iput_worker);
|
||||
spin_lock_init(&inf->iput_lock);
|
||||
INIT_LIST_HEAD(&inf->iput_list);
|
||||
|
||||
/* re-entrant, worker locks with itself and queueing */
|
||||
inf->iput_workq = alloc_workqueue("scoutfs_inode_iput", WQ_UNBOUND, 0);
|
||||
if (!inf->iput_workq) {
|
||||
kfree(inf);
|
||||
return -ENOMEM;
|
||||
}
|
||||
init_llist_head(&inf->iput_llist);
|
||||
|
||||
sbi->inode_sb_info = inf;
|
||||
|
||||
@@ -2189,18 +2121,14 @@ void scoutfs_inode_flush_iput(struct super_block *sb)
|
||||
DECLARE_INODE_SB_INFO(sb, inf);
|
||||
|
||||
if (inf)
|
||||
flush_workqueue(inf->iput_workq);
|
||||
flush_work(&inf->iput_work);
|
||||
}
|
||||
|
||||
void scoutfs_inode_destroy(struct super_block *sb)
|
||||
{
|
||||
struct inode_sb_info *inf = SCOUTFS_SB(sb)->inode_sb_info;
|
||||
|
||||
if (inf) {
|
||||
if (inf->iput_workq)
|
||||
destroy_workqueue(inf->iput_workq);
|
||||
kfree(inf);
|
||||
}
|
||||
kfree(inf);
|
||||
}
|
||||
|
||||
void scoutfs_inode_exit(void)
|
||||
|
||||
@@ -56,16 +56,14 @@ struct scoutfs_inode_info {
|
||||
|
||||
struct scoutfs_lock_coverage ino_lock_cov;
|
||||
|
||||
struct list_head iput_head;
|
||||
unsigned long iput_count;
|
||||
unsigned long iput_flags;
|
||||
/* drop if i_count hits 0, allows drop while invalidate holds coverage */
|
||||
bool drop_invalidated;
|
||||
struct llist_node iput_llnode;
|
||||
atomic_t iput_count;
|
||||
|
||||
struct inode inode;
|
||||
};
|
||||
|
||||
/* try to prune dcache aliases with queued iput */
|
||||
#define SI_IPUT_FLAG_PRUNE (1 << 0)
|
||||
|
||||
static inline struct scoutfs_inode_info *SCOUTFS_I(struct inode *inode)
|
||||
{
|
||||
return container_of(inode, struct scoutfs_inode_info, inode);
|
||||
@@ -80,7 +78,7 @@ struct inode *scoutfs_alloc_inode(struct super_block *sb);
|
||||
void scoutfs_destroy_inode(struct inode *inode);
|
||||
int scoutfs_drop_inode(struct inode *inode);
|
||||
void scoutfs_evict_inode(struct inode *inode);
|
||||
void scoutfs_inode_queue_iput(struct inode *inode, unsigned long flags);
|
||||
void scoutfs_inode_queue_iput(struct inode *inode);
|
||||
|
||||
#define SCOUTFS_IGF_LINKED (1 << 0) /* enoent if nlink == 0 */
|
||||
struct inode *scoutfs_iget(struct super_block *sb, u64 ino, int lkf, int igf);
|
||||
@@ -127,10 +125,8 @@ int scoutfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
|
||||
struct kstat *stat);
|
||||
int scoutfs_setattr(struct dentry *dentry, struct iattr *attr);
|
||||
|
||||
int scoutfs_inode_orphan_create(struct super_block *sb, u64 ino, struct scoutfs_lock *lock,
|
||||
struct scoutfs_lock *primary);
|
||||
int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_lock *lock,
|
||||
struct scoutfs_lock *primary);
|
||||
int scoutfs_inode_orphan_create(struct super_block *sb, u64 ino, struct scoutfs_lock *lock);
|
||||
int scoutfs_inode_orphan_delete(struct super_block *sb, u64 ino, struct scoutfs_lock *lock);
|
||||
void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb);
|
||||
|
||||
void scoutfs_inode_queue_writeback(struct inode *inode);
|
||||
|
||||
@@ -1676,14 +1676,6 @@ static int lock_safe(struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int optional_lock_mode_match(struct scoutfs_lock *lock, int mode)
|
||||
{
|
||||
if (WARN_ON_ONCE(lock && lock->mode != mode))
|
||||
return -EINVAL;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy the cached item's value into the caller's value. The number of
|
||||
* bytes copied is returned. A null val returns 0.
|
||||
@@ -1840,19 +1832,12 @@ out:
|
||||
* also increase the seqs. It lets us limit the inputs of item merging
|
||||
* to the last stable seq and ensure that all the items in open
|
||||
* transactions and granted locks will have greater seqs.
|
||||
*
|
||||
* This is a little awkward for WRITE_ONLY locks which can have much
|
||||
* older versions than the version of locked primary data that they're
|
||||
* operating on behalf of. Callers can optionally provide that primary
|
||||
* lock to get the version from. This ensures that items created under
|
||||
* WRITE_ONLY locks can not have versions less than their primary data.
|
||||
*/
|
||||
static u64 item_seq(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
struct scoutfs_lock *primary)
|
||||
static u64 item_seq(struct super_block *sb, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
return max3(sbi->trans_seq, lock->write_seq, primary ? primary->write_seq : 0);
|
||||
return max(sbi->trans_seq, lock->write_seq);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1887,7 +1872,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key *key,
|
||||
if (!item || item->deletion) {
|
||||
ret = -ENOENT;
|
||||
} else {
|
||||
item->seq = item_seq(sb, lock, NULL);
|
||||
item->seq = item_seq(sb, lock);
|
||||
mark_item_dirty(sb, cinf, pg, NULL, item);
|
||||
ret = 0;
|
||||
}
|
||||
@@ -1904,10 +1889,10 @@ out:
|
||||
*/
|
||||
static int item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock,
|
||||
struct scoutfs_lock *primary, int mode, bool force)
|
||||
int mode, bool force)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
const u64 seq = item_seq(sb, lock, primary);
|
||||
const u64 seq = item_seq(sb, lock);
|
||||
struct cached_item *found;
|
||||
struct cached_item *item;
|
||||
struct cached_page *pg;
|
||||
@@ -1917,8 +1902,7 @@ static int item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_create);
|
||||
|
||||
if ((ret = lock_safe(lock, key, mode)) ||
|
||||
(ret = optional_lock_mode_match(primary, SCOUTFS_LOCK_WRITE)))
|
||||
if ((ret = lock_safe(lock, key, mode)))
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_forest_set_bloom_bits(sb, lock);
|
||||
@@ -1959,15 +1943,15 @@ out:
|
||||
int scoutfs_item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
return item_create(sb, key, val, val_len, lock, NULL,
|
||||
return item_create(sb, key, val, val_len, lock,
|
||||
SCOUTFS_LOCK_READ, false);
|
||||
}
|
||||
|
||||
int scoutfs_item_create_force(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len,
|
||||
struct scoutfs_lock *lock, struct scoutfs_lock *primary)
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
return item_create(sb, key, val, val_len, lock, primary,
|
||||
return item_create(sb, key, val, val_len, lock,
|
||||
SCOUTFS_LOCK_WRITE_ONLY, true);
|
||||
}
|
||||
|
||||
@@ -1981,7 +1965,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
const u64 seq = item_seq(sb, lock, NULL);
|
||||
const u64 seq = item_seq(sb, lock);
|
||||
struct cached_item *item;
|
||||
struct cached_item *found;
|
||||
struct cached_page *pg;
|
||||
@@ -2041,16 +2025,12 @@ out:
|
||||
* current items so the caller always writes with write only locks. If
|
||||
* combining the current delta item and the caller's item results in a
|
||||
* null we can just drop it, we don't have to emit a deletion item.
|
||||
*
|
||||
* Delta items don't have to worry about creating items with old
|
||||
* versions under write_only locks. The versions don't impact how we
|
||||
* merge two items.
|
||||
*/
|
||||
int scoutfs_item_delta(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
const u64 seq = item_seq(sb, lock, NULL);
|
||||
const u64 seq = item_seq(sb, lock);
|
||||
struct cached_item *item;
|
||||
struct cached_page *pg;
|
||||
struct rb_node **pnode;
|
||||
@@ -2119,11 +2099,10 @@ out:
|
||||
* deletion item if there isn't one already cached.
|
||||
*/
|
||||
static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock, struct scoutfs_lock *primary,
|
||||
int mode, bool force)
|
||||
struct scoutfs_lock *lock, int mode, bool force)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
const u64 seq = item_seq(sb, lock, primary);
|
||||
const u64 seq = item_seq(sb, lock);
|
||||
struct cached_item *item;
|
||||
struct cached_page *pg;
|
||||
struct rb_node **pnode;
|
||||
@@ -2132,8 +2111,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
|
||||
scoutfs_inc_counter(sb, item_delete);
|
||||
|
||||
if ((ret = lock_safe(lock, key, mode)) ||
|
||||
(ret = optional_lock_mode_match(primary, SCOUTFS_LOCK_WRITE)))
|
||||
if ((ret = lock_safe(lock, key, mode)))
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_forest_set_bloom_bits(sb, lock);
|
||||
@@ -2183,13 +2161,13 @@ out:
|
||||
int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
return item_delete(sb, key, lock, NULL, SCOUTFS_LOCK_WRITE, false);
|
||||
return item_delete(sb, key, lock, SCOUTFS_LOCK_WRITE, false);
|
||||
}
|
||||
|
||||
int scoutfs_item_delete_force(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock, struct scoutfs_lock *primary)
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
return item_delete(sb, key, lock, primary, SCOUTFS_LOCK_WRITE_ONLY, true);
|
||||
return item_delete(sb, key, lock, SCOUTFS_LOCK_WRITE_ONLY, true);
|
||||
}
|
||||
|
||||
u64 scoutfs_item_dirty_pages(struct super_block *sb)
|
||||
|
||||
@@ -15,15 +15,16 @@ int scoutfs_item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_create_force(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len,
|
||||
struct scoutfs_lock *lock, struct scoutfs_lock *primary);
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_delta(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_delete_force(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock, struct scoutfs_lock *primary);
|
||||
int scoutfs_item_delete_force(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
u64 scoutfs_item_dirty_pages(struct super_block *sb);
|
||||
int scoutfs_item_write_dirty(struct super_block *sb);
|
||||
|
||||
@@ -46,10 +46,4 @@ static inline int dir_emit_dots(struct file *file, void *dirent,
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef KC_POSIX_ACL_VALID_USER_NS
|
||||
#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(user_ns, acl)
|
||||
#else
|
||||
#define kc_posix_acl_valid(user_ns, acl) posix_acl_valid(acl)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -18,7 +18,6 @@
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sort.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/posix_acl.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "lock.h"
|
||||
@@ -130,13 +129,16 @@ static bool lock_modes_match(int granted, int requested)
|
||||
* allows deletions to be performed by unlink without having to wait for
|
||||
* remote cached inodes to be dropped.
|
||||
*
|
||||
* We kick the d_prune and iput off to async work because they can end
|
||||
* up in final iput and inode eviction item deletion which would
|
||||
* deadlock. d_prune->dput can end up in iput on parents in different
|
||||
* locks entirely.
|
||||
* If the cached inode was already deferring final inode deletion then
|
||||
* we can't perform that inline in invalidation. The locking alone
|
||||
* deadlock, and it might also take multiple transactions to fully
|
||||
* delete an inode with significant metadata. We only perform the iput
|
||||
* inline if we know that possible eviction can't perform the final
|
||||
* deletion, otherwise we kick it off to async work.
|
||||
*/
|
||||
static void invalidate_inode(struct super_block *sb, u64 ino)
|
||||
{
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
struct scoutfs_inode_info *si;
|
||||
struct inode *inode;
|
||||
|
||||
@@ -150,9 +152,17 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
|
||||
scoutfs_data_wait_changed(inode);
|
||||
}
|
||||
|
||||
forget_all_cached_acls(inode);
|
||||
/* can't touch during unmount, dcache destroys w/o locks */
|
||||
if (!linfo->unmounting)
|
||||
d_prune_aliases(inode);
|
||||
|
||||
scoutfs_inode_queue_iput(inode, SI_IPUT_FLAG_PRUNE);
|
||||
si->drop_invalidated = true;
|
||||
if (scoutfs_lock_is_covered(sb, &si->ino_lock_cov) && inode->i_nlink > 0) {
|
||||
iput(inode);
|
||||
} else {
|
||||
/* defer iput to work context so we don't evict inodes from invalidation */
|
||||
scoutfs_inode_queue_iput(inode);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,6 +198,16 @@ static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
/* have to invalidate if we're not in the only usable case */
|
||||
if (!(prev == SCOUTFS_LOCK_WRITE && mode == SCOUTFS_LOCK_READ)) {
|
||||
retry:
|
||||
/* invalidate inodes before removing coverage */
|
||||
if (lock->start.sk_zone == SCOUTFS_FS_ZONE) {
|
||||
ino = le64_to_cpu(lock->start.ski_ino);
|
||||
last = le64_to_cpu(lock->end.ski_ino);
|
||||
while (ino <= last) {
|
||||
invalidate_inode(sb, ino);
|
||||
ino++;
|
||||
}
|
||||
}
|
||||
|
||||
/* remove cov items to tell users that their cache is stale */
|
||||
spin_lock(&lock->cov_list_lock);
|
||||
list_for_each_entry_safe(cov, tmp, &lock->cov_list, head) {
|
||||
@@ -203,16 +223,6 @@ retry:
|
||||
}
|
||||
spin_unlock(&lock->cov_list_lock);
|
||||
|
||||
/* invalidate inodes after removing coverage so drop/evict aren't covered */
|
||||
if (lock->start.sk_zone == SCOUTFS_FS_ZONE) {
|
||||
ino = le64_to_cpu(lock->start.ski_ino);
|
||||
last = le64_to_cpu(lock->end.ski_ino);
|
||||
while (ino <= last) {
|
||||
invalidate_inode(sb, ino);
|
||||
ino++;
|
||||
}
|
||||
}
|
||||
|
||||
scoutfs_item_invalidate(sb, &lock->start, &lock->end);
|
||||
}
|
||||
|
||||
@@ -1515,38 +1525,6 @@ void scoutfs_lock_flush_invalidate(struct super_block *sb)
|
||||
flush_work(&linfo->inv_work);
|
||||
}
|
||||
|
||||
static u64 get_held_lock_refresh_gen(struct super_block *sb, struct scoutfs_key *start)
|
||||
{
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
struct scoutfs_lock *lock;
|
||||
u64 refresh_gen = 0;
|
||||
|
||||
/* this can be called from all manner of places */
|
||||
if (!linfo)
|
||||
return 0;
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
lock = lock_lookup(sb, start, NULL);
|
||||
if (lock) {
|
||||
if (lock_mode_can_read(lock->mode))
|
||||
refresh_gen = lock->refresh_gen;
|
||||
}
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
return refresh_gen;
|
||||
}
|
||||
|
||||
u64 scoutfs_lock_ino_refresh_gen(struct super_block *sb, u64 ino)
|
||||
{
|
||||
struct scoutfs_key start;
|
||||
|
||||
scoutfs_key_set_zeros(&start);
|
||||
start.sk_zone = SCOUTFS_FS_ZONE;
|
||||
start.ski_ino = cpu_to_le64(ino & ~(u64)SCOUTFS_LOCK_INODE_GROUP_MASK);
|
||||
|
||||
return get_held_lock_refresh_gen(sb, &start);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller is going to be shutting down transactions and the client.
|
||||
* We need to make sure that locking won't call either after we return.
|
||||
|
||||
@@ -100,8 +100,6 @@ void scoutfs_lock_del_coverage(struct super_block *sb,
|
||||
bool scoutfs_lock_protected(struct scoutfs_lock *lock, struct scoutfs_key *key,
|
||||
enum scoutfs_lock_mode mode);
|
||||
|
||||
u64 scoutfs_lock_ino_refresh_gen(struct super_block *sb, u64 ino);
|
||||
|
||||
void scoutfs_free_unused_locks(struct super_block *sb);
|
||||
|
||||
int scoutfs_lock_setup(struct super_block *sb);
|
||||
|
||||
@@ -355,7 +355,6 @@ static int submit_send(struct super_block *sb,
|
||||
}
|
||||
if (rid != 0) {
|
||||
spin_unlock(&conn->lock);
|
||||
kfree(msend);
|
||||
return -ENOTCONN;
|
||||
}
|
||||
}
|
||||
@@ -1346,12 +1345,10 @@ scoutfs_net_alloc_conn(struct super_block *sb,
|
||||
if (!conn)
|
||||
return NULL;
|
||||
|
||||
if (info_size) {
|
||||
conn->info = kzalloc(info_size, GFP_NOFS);
|
||||
if (!conn->info) {
|
||||
kfree(conn);
|
||||
return NULL;
|
||||
}
|
||||
conn->info = kzalloc(info_size, GFP_NOFS);
|
||||
if (!conn->info) {
|
||||
kfree(conn);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
conn->workq = alloc_workqueue("scoutfs_net_%s",
|
||||
|
||||
@@ -157,15 +157,6 @@ static int free_rid(struct omap_rid_list *list, struct omap_rid_entry *entry)
|
||||
return nr;
|
||||
}
|
||||
|
||||
static void free_rid_list(struct omap_rid_list *list)
|
||||
{
|
||||
struct omap_rid_entry *entry;
|
||||
struct omap_rid_entry *tmp;
|
||||
|
||||
list_for_each_entry_safe(entry, tmp, &list->head, head)
|
||||
free_rid(list, entry);
|
||||
}
|
||||
|
||||
static int copy_rids(struct omap_rid_list *to, struct omap_rid_list *from, spinlock_t *from_lock)
|
||||
{
|
||||
struct omap_rid_entry *entry;
|
||||
@@ -813,10 +804,6 @@ void scoutfs_omap_server_shutdown(struct super_block *sb)
|
||||
llist_for_each_entry_safe(req, tmp, requests, llnode)
|
||||
kfree(req);
|
||||
|
||||
spin_lock(&ominf->lock);
|
||||
free_rid_list(&ominf->rids);
|
||||
spin_unlock(&ominf->lock);
|
||||
|
||||
synchronize_rcu();
|
||||
}
|
||||
|
||||
@@ -877,10 +864,6 @@ void scoutfs_omap_destroy(struct super_block *sb)
|
||||
rhashtable_walk_stop(&iter);
|
||||
rhashtable_walk_exit(&iter);
|
||||
|
||||
spin_lock(&ominf->lock);
|
||||
free_rid_list(&ominf->rids);
|
||||
spin_unlock(&ominf->lock);
|
||||
|
||||
rhashtable_destroy(&ominf->group_ht);
|
||||
rhashtable_destroy(&ominf->req_ht);
|
||||
kfree(ominf);
|
||||
|
||||
@@ -27,25 +27,16 @@
|
||||
#include "options.h"
|
||||
#include "super.h"
|
||||
#include "inode.h"
|
||||
#include "alloc.h"
|
||||
|
||||
enum {
|
||||
Opt_acl,
|
||||
Opt_data_prealloc_blocks,
|
||||
Opt_data_prealloc_contig_only,
|
||||
Opt_metadev_path,
|
||||
Opt_noacl,
|
||||
Opt_orphan_scan_delay_ms,
|
||||
Opt_quorum_slot_nr,
|
||||
Opt_err,
|
||||
};
|
||||
|
||||
static const match_table_t tokens = {
|
||||
{Opt_acl, "acl"},
|
||||
{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
|
||||
{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
|
||||
{Opt_metadev_path, "metadev_path=%s"},
|
||||
{Opt_noacl, "noacl"},
|
||||
{Opt_orphan_scan_delay_ms, "orphan_scan_delay_ms=%s"},
|
||||
{Opt_quorum_slot_nr, "quorum_slot_nr=%s"},
|
||||
{Opt_err, NULL}
|
||||
@@ -115,17 +106,11 @@ static void free_options(struct scoutfs_mount_options *opts)
|
||||
#define DEFAULT_ORPHAN_SCAN_DELAY_MS (10 * MSEC_PER_SEC)
|
||||
#define MAX_ORPHAN_SCAN_DELAY_MS (60 * MSEC_PER_SEC)
|
||||
|
||||
#define MIN_DATA_PREALLOC_BLOCKS 1ULL
|
||||
#define MAX_DATA_PREALLOC_BLOCKS ((unsigned long long)SCOUTFS_BLOCK_SM_MAX)
|
||||
|
||||
static void init_default_options(struct scoutfs_mount_options *opts)
|
||||
{
|
||||
memset(opts, 0, sizeof(*opts));
|
||||
|
||||
opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS;
|
||||
opts->data_prealloc_contig_only = 1;
|
||||
opts->quorum_slot_nr = -1;
|
||||
opts->orphan_scan_delay_ms = -1;
|
||||
opts->orphan_scan_delay_ms = DEFAULT_ORPHAN_SCAN_DELAY_MS;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -137,7 +122,6 @@ static void init_default_options(struct scoutfs_mount_options *opts)
|
||||
static int parse_options(struct super_block *sb, char *options, struct scoutfs_mount_options *opts)
|
||||
{
|
||||
substring_t args[MAX_OPT_ARGS];
|
||||
u64 nr64;
|
||||
int nr;
|
||||
int token;
|
||||
char *p;
|
||||
@@ -150,44 +134,12 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
|
||||
token = match_token(p, tokens, args);
|
||||
switch (token) {
|
||||
|
||||
case Opt_acl:
|
||||
sb->s_flags |= MS_POSIXACL;
|
||||
break;
|
||||
|
||||
case Opt_data_prealloc_blocks:
|
||||
ret = match_u64(args, &nr64);
|
||||
if (ret < 0 ||
|
||||
nr64 < MIN_DATA_PREALLOC_BLOCKS || nr64 > MAX_DATA_PREALLOC_BLOCKS) {
|
||||
scoutfs_err(sb, "invalid data_prealloc_blocks option, must be between %llu and %llu",
|
||||
MIN_DATA_PREALLOC_BLOCKS, MAX_DATA_PREALLOC_BLOCKS);
|
||||
if (ret == 0)
|
||||
ret = -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
opts->data_prealloc_blocks = nr64;
|
||||
break;
|
||||
|
||||
case Opt_data_prealloc_contig_only:
|
||||
ret = match_int(args, &nr);
|
||||
if (ret < 0 || nr < 0 || nr > 1) {
|
||||
scoutfs_err(sb, "invalid data_prealloc_contig_only option, bool must only be 0 or 1");
|
||||
if (ret == 0)
|
||||
ret = -EINVAL;
|
||||
return ret;
|
||||
}
|
||||
opts->data_prealloc_contig_only = nr;
|
||||
break;
|
||||
|
||||
case Opt_metadev_path:
|
||||
ret = parse_bdev_path(sb, &args[0], &opts->metadev_path);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
break;
|
||||
|
||||
case Opt_noacl:
|
||||
sb->s_flags &= ~MS_POSIXACL;
|
||||
break;
|
||||
|
||||
case Opt_orphan_scan_delay_ms:
|
||||
if (opts->orphan_scan_delay_ms != -1) {
|
||||
scoutfs_err(sb, "multiple orphan_scan_delay_ms options provided, only provide one.");
|
||||
@@ -229,9 +181,6 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
|
||||
}
|
||||
}
|
||||
|
||||
if (opts->orphan_scan_delay_ms == -1)
|
||||
opts->orphan_scan_delay_ms = DEFAULT_ORPHAN_SCAN_DELAY_MS;
|
||||
|
||||
if (!opts->metadev_path) {
|
||||
scoutfs_err(sb, "Required mount option \"metadev_path\" not found");
|
||||
return -EINVAL;
|
||||
@@ -301,17 +250,10 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
|
||||
{
|
||||
struct super_block *sb = root->d_sb;
|
||||
struct scoutfs_mount_options opts;
|
||||
const bool is_acl = !!(sb->s_flags & MS_POSIXACL);
|
||||
|
||||
scoutfs_options_read(sb, &opts);
|
||||
|
||||
if (is_acl)
|
||||
seq_puts(seq, ",acl");
|
||||
seq_printf(seq, ",data_prealloc_blocks=%llu", opts.data_prealloc_blocks);
|
||||
seq_printf(seq, ",data_prealloc_contig_only=%u", opts.data_prealloc_contig_only);
|
||||
seq_printf(seq, ",metadev_path=%s", opts.metadev_path);
|
||||
if (!is_acl)
|
||||
seq_puts(seq, ",noacl");
|
||||
seq_printf(seq, ",orphan_scan_delay_ms=%u", opts.orphan_scan_delay_ms);
|
||||
if (opts.quorum_slot_nr >= 0)
|
||||
seq_printf(seq, ",quorum_slot_nr=%d", opts.quorum_slot_nr);
|
||||
@@ -319,83 +261,6 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static ssize_t data_prealloc_blocks_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
struct scoutfs_mount_options opts;
|
||||
|
||||
scoutfs_options_read(sb, &opts);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%llu", opts.data_prealloc_blocks);
|
||||
}
|
||||
static ssize_t data_prealloc_blocks_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
DECLARE_OPTIONS_INFO(sb, optinf);
|
||||
char nullterm[30]; /* more than enough for octal -U64_MAX */
|
||||
u64 val;
|
||||
int len;
|
||||
int ret;
|
||||
|
||||
len = min(count, sizeof(nullterm) - 1);
|
||||
memcpy(nullterm, buf, len);
|
||||
nullterm[len] = '\0';
|
||||
|
||||
ret = kstrtoll(nullterm, 0, &val);
|
||||
if (ret < 0 || val < MIN_DATA_PREALLOC_BLOCKS || val > MAX_DATA_PREALLOC_BLOCKS) {
|
||||
scoutfs_err(sb, "invalid data_prealloc_blocks option, must be between %llu and %llu",
|
||||
MIN_DATA_PREALLOC_BLOCKS, MAX_DATA_PREALLOC_BLOCKS);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
write_seqlock(&optinf->seqlock);
|
||||
optinf->opts.data_prealloc_blocks = val;
|
||||
write_sequnlock(&optinf->seqlock);
|
||||
|
||||
return count;
|
||||
}
|
||||
SCOUTFS_ATTR_RW(data_prealloc_blocks);
|
||||
|
||||
static ssize_t data_prealloc_contig_only_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
struct scoutfs_mount_options opts;
|
||||
|
||||
scoutfs_options_read(sb, &opts);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u", opts.data_prealloc_contig_only);
|
||||
}
|
||||
static ssize_t data_prealloc_contig_only_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
DECLARE_OPTIONS_INFO(sb, optinf);
|
||||
char nullterm[20]; /* more than enough for octal -U32_MAX */
|
||||
long val;
|
||||
int len;
|
||||
int ret;
|
||||
|
||||
len = min(count, sizeof(nullterm) - 1);
|
||||
memcpy(nullterm, buf, len);
|
||||
nullterm[len] = '\0';
|
||||
|
||||
ret = kstrtol(nullterm, 0, &val);
|
||||
if (ret < 0 || val < 0 || val > 1) {
|
||||
scoutfs_err(sb, "invalid data_prealloc_contig_only option, bool must be 0 or 1");
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
write_seqlock(&optinf->seqlock);
|
||||
optinf->opts.data_prealloc_contig_only = val;
|
||||
write_sequnlock(&optinf->seqlock);
|
||||
|
||||
return count;
|
||||
}
|
||||
SCOUTFS_ATTR_RW(data_prealloc_contig_only);
|
||||
|
||||
static ssize_t metadev_path_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
@@ -460,8 +325,6 @@ static ssize_t quorum_slot_nr_show(struct kobject *kobj, struct kobj_attribute *
|
||||
SCOUTFS_ATTR_RO(quorum_slot_nr);
|
||||
|
||||
static struct attribute *options_attrs[] = {
|
||||
SCOUTFS_ATTR_PTR(data_prealloc_blocks),
|
||||
SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
|
||||
SCOUTFS_ATTR_PTR(metadev_path),
|
||||
SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
|
||||
SCOUTFS_ATTR_PTR(quorum_slot_nr),
|
||||
|
||||
@@ -6,8 +6,6 @@
|
||||
#include "format.h"
|
||||
|
||||
struct scoutfs_mount_options {
|
||||
u64 data_prealloc_blocks;
|
||||
bool data_prealloc_contig_only;
|
||||
char *metadev_path;
|
||||
unsigned int orphan_scan_delay_ms;
|
||||
int quorum_slot_nr;
|
||||
|
||||
@@ -114,7 +114,6 @@ struct quorum_status {
|
||||
|
||||
struct quorum_info {
|
||||
struct super_block *sb;
|
||||
struct scoutfs_quorum_config qconf;
|
||||
struct work_struct work;
|
||||
struct socket *sock;
|
||||
bool shutdown;
|
||||
@@ -135,18 +134,11 @@ struct quorum_info {
|
||||
#define DECLARE_QUORUM_INFO_KOBJ(kobj, name) \
|
||||
DECLARE_QUORUM_INFO(SCOUTFS_SYSFS_ATTRS_SB(kobj), name)
|
||||
|
||||
static bool quorum_slot_present(struct scoutfs_quorum_config *qconf, int i)
|
||||
static bool quorum_slot_present(struct scoutfs_super_block *super, int i)
|
||||
{
|
||||
BUG_ON(i < 0 || i > SCOUTFS_QUORUM_MAX_SLOTS);
|
||||
|
||||
return qconf->slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4);
|
||||
}
|
||||
|
||||
static void quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin)
|
||||
{
|
||||
BUG_ON(i < 0 || i >= SCOUTFS_QUORUM_MAX_SLOTS);
|
||||
|
||||
scoutfs_addr_to_sin(sin, &qconf->slots[i].addr);
|
||||
return super->qconf.slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4);
|
||||
}
|
||||
|
||||
static ktime_t election_timeout(void)
|
||||
@@ -168,6 +160,7 @@ static ktime_t heartbeat_timeout(void)
|
||||
static int create_socket(struct super_block *sb)
|
||||
{
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct socket *sock = NULL;
|
||||
struct sockaddr_in sin;
|
||||
int addrlen;
|
||||
@@ -181,7 +174,7 @@ static int create_socket(struct super_block *sb)
|
||||
|
||||
sock->sk->sk_allocation = GFP_NOFS;
|
||||
|
||||
quorum_slot_sin(&qinf->qconf, qinf->our_quorum_slot_nr, &sin);
|
||||
scoutfs_quorum_slot_sin(super, qinf->our_quorum_slot_nr, &sin);
|
||||
|
||||
addrlen = sizeof(sin);
|
||||
ret = kernel_bind(sock, (struct sockaddr *)&sin, addrlen);
|
||||
@@ -211,13 +204,13 @@ static __le32 quorum_message_crc(struct scoutfs_quorum_message *qmes)
|
||||
static void send_msg_members(struct super_block *sb, int type, u64 term,
|
||||
int only)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
ktime_t now;
|
||||
int i;
|
||||
|
||||
struct scoutfs_quorum_message qmes = {
|
||||
.fsid = cpu_to_le64(sbi->fsid),
|
||||
.fsid = super->hdr.fsid,
|
||||
.term = cpu_to_le64(term),
|
||||
.type = type,
|
||||
.from = qinf->our_quorum_slot_nr,
|
||||
@@ -241,11 +234,11 @@ static void send_msg_members(struct super_block *sb, int type, u64 term,
|
||||
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (!quorum_slot_present(&qinf->qconf, i) ||
|
||||
if (!quorum_slot_present(super, i) ||
|
||||
(only >= 0 && i != only) || i == qinf->our_quorum_slot_nr)
|
||||
continue;
|
||||
|
||||
scoutfs_quorum_slot_sin(&qinf->qconf, i, &sin);
|
||||
scoutfs_quorum_slot_sin(super, i, &sin);
|
||||
now = ktime_get();
|
||||
kernel_sendmsg(qinf->sock, &mh, &kv, 1, kv.iov_len);
|
||||
|
||||
@@ -273,7 +266,7 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
|
||||
ktime_t abs_to)
|
||||
{
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_quorum_message qmes;
|
||||
struct timeval tv;
|
||||
ktime_t rel_to;
|
||||
@@ -316,10 +309,10 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
|
||||
|
||||
if (ret != sizeof(qmes) ||
|
||||
qmes.crc != quorum_message_crc(&qmes) ||
|
||||
qmes.fsid != cpu_to_le64(sbi->fsid) ||
|
||||
qmes.fsid != super->hdr.fsid ||
|
||||
qmes.type >= SCOUTFS_QUORUM_MSG_INVALID ||
|
||||
qmes.from >= SCOUTFS_QUORUM_MAX_SLOTS ||
|
||||
!quorum_slot_present(&qinf->qconf, qmes.from)) {
|
||||
!quorum_slot_present(super, qmes.from)) {
|
||||
/* should we be trying to open a new socket? */
|
||||
scoutfs_inc_counter(sb, quorum_recv_invalid);
|
||||
return -EAGAIN;
|
||||
@@ -349,7 +342,7 @@ static int read_quorum_block(struct super_block *sb, u64 blkno, struct scoutfs_q
|
||||
bool check_rid)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
const u64 fsid = sbi->fsid;
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
const u64 rid = sbi->rid;
|
||||
char msg[150];
|
||||
__le32 crc;
|
||||
@@ -374,9 +367,9 @@ static int read_quorum_block(struct super_block *sb, u64 blkno, struct scoutfs_q
|
||||
else if (le32_to_cpu(blk->hdr.magic) != SCOUTFS_BLOCK_MAGIC_QUORUM)
|
||||
snprintf(msg, sizeof(msg), "blk magic %08x != %08x",
|
||||
le32_to_cpu(blk->hdr.magic), SCOUTFS_BLOCK_MAGIC_QUORUM);
|
||||
else if (blk->hdr.fsid != cpu_to_le64(fsid))
|
||||
else if (blk->hdr.fsid != super->hdr.fsid)
|
||||
snprintf(msg, sizeof(msg), "blk fsid %016llx != %016llx",
|
||||
le64_to_cpu(blk->hdr.fsid), fsid);
|
||||
le64_to_cpu(blk->hdr.fsid), le64_to_cpu(super->hdr.fsid));
|
||||
else if (le64_to_cpu(blk->hdr.blkno) != blkno)
|
||||
snprintf(msg, sizeof(msg), "blk blkno %llu != %llu",
|
||||
le64_to_cpu(blk->hdr.blkno), blkno);
|
||||
@@ -417,7 +410,8 @@ out:
|
||||
*/
|
||||
static void read_greatest_term(struct super_block *sb, u64 *term)
|
||||
{
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_quorum_block blk;
|
||||
int ret;
|
||||
int e;
|
||||
@@ -426,7 +420,7 @@ static void read_greatest_term(struct super_block *sb, u64 *term)
|
||||
*term = 0;
|
||||
|
||||
for (s = 0; s < SCOUTFS_QUORUM_MAX_SLOTS; s++) {
|
||||
if (!quorum_slot_present(&qinf->qconf, s))
|
||||
if (!quorum_slot_present(super, s))
|
||||
continue;
|
||||
|
||||
ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + s, &blk, false);
|
||||
@@ -520,15 +514,14 @@ static int update_quorum_block(struct super_block *sb, int event, u64 term, bool
|
||||
* keeps us from being fenced while we allow userspace fencing to take a
|
||||
* reasonably long time. We still want to timeout eventually.
|
||||
*/
|
||||
int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_config *qconf,
|
||||
u64 term)
|
||||
int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term)
|
||||
{
|
||||
#define NR_OLD 2
|
||||
struct scoutfs_quorum_block_event old[SCOUTFS_QUORUM_MAX_SLOTS][NR_OLD] = {{{0,}}};
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_quorum_block blk;
|
||||
struct sockaddr_in sin;
|
||||
const __le64 lefsid = cpu_to_le64(sbi->fsid);
|
||||
const u64 rid = sbi->rid;
|
||||
bool fence_started = false;
|
||||
u64 fenced = 0;
|
||||
@@ -541,7 +534,7 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_c
|
||||
BUILD_BUG_ON(SCOUTFS_QUORUM_BLOCKS < SCOUTFS_QUORUM_MAX_SLOTS);
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (!quorum_slot_present(qconf, i))
|
||||
if (!quorum_slot_present(super, i))
|
||||
continue;
|
||||
|
||||
ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + i, &blk, false);
|
||||
@@ -574,11 +567,11 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_c
|
||||
continue;
|
||||
|
||||
scoutfs_inc_counter(sb, quorum_fence_leader);
|
||||
quorum_slot_sin(qconf, i, &sin);
|
||||
scoutfs_quorum_slot_sin(super, i, &sin);
|
||||
fence_rid = old[i][j].rid;
|
||||
|
||||
scoutfs_info(sb, "fencing previous leader "SCSBF" at term %llu in slot %u with address "SIN_FMT,
|
||||
SCSB_LEFR_ARGS(lefsid, fence_rid),
|
||||
SCSB_LEFR_ARGS(super->hdr.fsid, fence_rid),
|
||||
le64_to_cpu(old[i][j].term), i, SIN_ARG(&sin));
|
||||
ret = scoutfs_fence_start(sb, le64_to_cpu(fence_rid), sin.sin_addr.s_addr,
|
||||
SCOUTFS_FENCE_QUORUM_BLOCK_LEADER);
|
||||
@@ -759,7 +752,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
|
||||
qst.server_start_term = qst.term;
|
||||
qst.server_event = SCOUTFS_QUORUM_EVENT_ELECT;
|
||||
scoutfs_server_start(sb, &qinf->qconf, qst.term);
|
||||
scoutfs_server_start(sb, qst.term);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -884,25 +877,16 @@ out:
|
||||
*/
|
||||
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin)
|
||||
{
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_quorum_block blk;
|
||||
u64 elect_term;
|
||||
u64 term = 0;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
if (!super) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (!quorum_slot_present(&super->qconf, i))
|
||||
if (!quorum_slot_present(super, i))
|
||||
continue;
|
||||
|
||||
ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + i, &blk, false);
|
||||
@@ -916,7 +900,7 @@ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin)
|
||||
if (elect_term > term &&
|
||||
elect_term > le64_to_cpu(blk.events[SCOUTFS_QUORUM_EVENT_STOP].term)) {
|
||||
term = elect_term;
|
||||
scoutfs_quorum_slot_sin(&super->qconf, i, sin);
|
||||
scoutfs_quorum_slot_sin(super, i, sin);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -925,7 +909,6 @@ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin)
|
||||
ret = -ENOENT;
|
||||
|
||||
out:
|
||||
kfree(super);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -941,9 +924,12 @@ u8 scoutfs_quorum_votes_needed(struct super_block *sb)
|
||||
return qinf->votes_needed;
|
||||
}
|
||||
|
||||
void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin)
|
||||
void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i,
|
||||
struct sockaddr_in *sin)
|
||||
{
|
||||
return quorum_slot_sin(qconf, i, sin);
|
||||
BUG_ON(i < 0 || i >= SCOUTFS_QUORUM_MAX_SLOTS);
|
||||
|
||||
scoutfs_addr_to_sin(sin, &super->qconf.slots[i].addr);
|
||||
}
|
||||
|
||||
static char *role_str(int role)
|
||||
@@ -1074,10 +1060,11 @@ static inline bool valid_ipv4_port(__be16 port)
|
||||
return port != 0 && be16_to_cpu(port) != U16_MAX;
|
||||
}
|
||||
|
||||
static int verify_quorum_slots(struct super_block *sb, struct quorum_info *qinf,
|
||||
struct scoutfs_quorum_config *qconf)
|
||||
static int verify_quorum_slots(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
char slots[(SCOUTFS_QUORUM_MAX_SLOTS * 3) + 1];
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct sockaddr_in other;
|
||||
struct sockaddr_in sin;
|
||||
int found = 0;
|
||||
@@ -1087,10 +1074,10 @@ static int verify_quorum_slots(struct super_block *sb, struct quorum_info *qinf,
|
||||
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (!quorum_slot_present(qconf, i))
|
||||
if (!quorum_slot_present(super, i))
|
||||
continue;
|
||||
|
||||
scoutfs_quorum_slot_sin(qconf, i, &sin);
|
||||
scoutfs_quorum_slot_sin(super, i, &sin);
|
||||
|
||||
if (!valid_ipv4_unicast(sin.sin_addr.s_addr)) {
|
||||
scoutfs_err(sb, "quorum slot #%d has invalid ipv4 unicast address: "SIN_FMT,
|
||||
@@ -1105,10 +1092,10 @@ static int verify_quorum_slots(struct super_block *sb, struct quorum_info *qinf,
|
||||
}
|
||||
|
||||
for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) {
|
||||
if (!quorum_slot_present(qconf, j))
|
||||
if (!quorum_slot_present(super, j))
|
||||
continue;
|
||||
|
||||
scoutfs_quorum_slot_sin(qconf, j, &other);
|
||||
scoutfs_quorum_slot_sin(super, j, &other);
|
||||
|
||||
if (sin.sin_addr.s_addr == other.sin_addr.s_addr &&
|
||||
sin.sin_port == other.sin_port) {
|
||||
@@ -1126,11 +1113,11 @@ static int verify_quorum_slots(struct super_block *sb, struct quorum_info *qinf,
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!quorum_slot_present(qconf, qinf->our_quorum_slot_nr)) {
|
||||
if (!quorum_slot_present(super, qinf->our_quorum_slot_nr)) {
|
||||
char *str = slots;
|
||||
*str = '\0';
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (quorum_slot_present(qconf, i)) {
|
||||
if (quorum_slot_present(super, i)) {
|
||||
ret = snprintf(str, &slots[ARRAY_SIZE(slots)] - str, "%c%u",
|
||||
str == slots ? ' ' : ',', i);
|
||||
if (ret < 2 || ret > 3) {
|
||||
@@ -1154,22 +1141,16 @@ static int verify_quorum_slots(struct super_block *sb, struct quorum_info *qinf,
|
||||
else
|
||||
qinf->votes_needed = (found / 2) + 1;
|
||||
|
||||
qinf->qconf = *qconf;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Once this schedules the quorum worker it can be elected leader and
|
||||
* start the server, possibly before this returns. The quorum agent
|
||||
* would be responsible for tracking the quorum config in the super
|
||||
* block if it changes. Until then uses a static config that it reads
|
||||
* during setup.
|
||||
* start the server, possibly before this returns.
|
||||
*/
|
||||
int scoutfs_quorum_setup(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct scoutfs_mount_options opts;
|
||||
struct quorum_info *qinf;
|
||||
int ret;
|
||||
@@ -1179,9 +1160,7 @@ int scoutfs_quorum_setup(struct super_block *sb)
|
||||
return 0;
|
||||
|
||||
qinf = kzalloc(sizeof(struct quorum_info), GFP_KERNEL);
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_KERNEL);
|
||||
if (!qinf || !super) {
|
||||
kfree(qinf);
|
||||
if (!qinf) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@@ -1195,11 +1174,7 @@ int scoutfs_quorum_setup(struct super_block *sb)
|
||||
sbi->quorum_info = qinf;
|
||||
qinf->sb = sb;
|
||||
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = verify_quorum_slots(sb, qinf, &super->qconf);
|
||||
ret = verify_quorum_slots(sb);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -1219,7 +1194,6 @@ out:
|
||||
if (ret)
|
||||
scoutfs_quorum_destroy(sb);
|
||||
|
||||
kfree(super);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -4,11 +4,10 @@
|
||||
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin);
|
||||
|
||||
u8 scoutfs_quorum_votes_needed(struct super_block *sb);
|
||||
void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i,
|
||||
void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i,
|
||||
struct sockaddr_in *sin);
|
||||
|
||||
int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_config *qconf,
|
||||
u64 term);
|
||||
int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term);
|
||||
|
||||
int scoutfs_quorum_setup(struct super_block *sb);
|
||||
void scoutfs_quorum_shutdown(struct super_block *sb);
|
||||
|
||||
@@ -691,16 +691,16 @@ TRACE_EVENT(scoutfs_evict_inode,
|
||||
|
||||
TRACE_EVENT(scoutfs_drop_inode,
|
||||
TP_PROTO(struct super_block *sb, __u64 ino, unsigned int nlink,
|
||||
unsigned int unhashed, bool lock_covered),
|
||||
unsigned int unhashed, bool drop_invalidated),
|
||||
|
||||
TP_ARGS(sb, ino, nlink, unhashed, lock_covered),
|
||||
TP_ARGS(sb, ino, nlink, unhashed, drop_invalidated),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, ino)
|
||||
__field(unsigned int, nlink)
|
||||
__field(unsigned int, unhashed)
|
||||
__field(unsigned int, lock_covered)
|
||||
__field(unsigned int, drop_invalidated)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
@@ -708,12 +708,12 @@ TRACE_EVENT(scoutfs_drop_inode,
|
||||
__entry->ino = ino;
|
||||
__entry->nlink = nlink;
|
||||
__entry->unhashed = unhashed;
|
||||
__entry->lock_covered = !!lock_covered;
|
||||
__entry->drop_invalidated = !!drop_invalidated;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" ino %llu nlink %u unhashed %d lock_covered %u", SCSB_TRACE_ARGS,
|
||||
TP_printk(SCSBF" ino %llu nlink %u unhashed %d drop_invalidated %u", SCSB_TRACE_ARGS,
|
||||
__entry->ino, __entry->nlink, __entry->unhashed,
|
||||
__entry->lock_covered)
|
||||
__entry->drop_invalidated)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_inode_walk_writeback,
|
||||
@@ -1417,71 +1417,42 @@ TRACE_EVENT(scoutfs_rename,
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_d_revalidate,
|
||||
TP_PROTO(struct super_block *sb, struct dentry *dentry, int flags, u64 dir_ino, int ret),
|
||||
TP_PROTO(struct super_block *sb,
|
||||
struct dentry *dentry, int flags, struct dentry *parent,
|
||||
bool is_covered, int ret),
|
||||
|
||||
TP_ARGS(sb, dentry, flags, dir_ino, ret),
|
||||
TP_ARGS(sb, dentry, flags, parent, is_covered, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, dentry)
|
||||
__string(name, dentry->d_name.name)
|
||||
__field(__u64, ino)
|
||||
__field(__u64, dir_ino)
|
||||
__field(__u64, parent_ino)
|
||||
__field(int, flags)
|
||||
__field(int, is_root)
|
||||
__field(int, is_covered)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->dentry = dentry;
|
||||
__assign_str(name, dentry->d_name.name)
|
||||
__entry->ino = dentry->d_inode ? scoutfs_ino(dentry->d_inode) : 0;
|
||||
__entry->dir_ino = dir_ino;
|
||||
__entry->ino = dentry->d_inode ?
|
||||
scoutfs_ino(dentry->d_inode) : 0;
|
||||
__entry->parent_ino = parent->d_inode ?
|
||||
scoutfs_ino(parent->d_inode) : 0;
|
||||
__entry->flags = flags;
|
||||
__entry->is_root = IS_ROOT(dentry);
|
||||
__entry->is_covered = is_covered;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" dentry %p name %s ino %llu dir_ino %llu flags 0x%x s_root %u ret %d",
|
||||
SCSB_TRACE_ARGS, __entry->dentry, __get_str(name), __entry->ino, __entry->dir_ino,
|
||||
__entry->flags, __entry->is_root, __entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_validate_dentry,
|
||||
TP_PROTO(struct super_block *sb, struct dentry *dentry, u64 dir_ino, u64 dentry_ino,
|
||||
u64 dent_ino, u64 refresh_gen, int ret),
|
||||
|
||||
TP_ARGS(sb, dentry, dir_ino, dentry_ino, dent_ino, refresh_gen, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, dentry)
|
||||
__field(__u64, dir_ino)
|
||||
__string(name, dentry->d_name.name)
|
||||
__field(__u64, dentry_ino)
|
||||
__field(__u64, dent_ino)
|
||||
__field(__u64, fsdata_gen)
|
||||
__field(__u64, refresh_gen)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->dentry = dentry;
|
||||
__entry->dir_ino = dir_ino;
|
||||
__assign_str(name, dentry->d_name.name)
|
||||
__entry->dentry_ino = dentry_ino;
|
||||
__entry->dent_ino = dent_ino;
|
||||
__entry->fsdata_gen = (unsigned long long)dentry->d_fsdata;
|
||||
__entry->refresh_gen = refresh_gen;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" dentry %p dir %llu name %s dentry_ino %llu dent_ino %llu fsdata_gen %llu refresh_gen %llu ret %d",
|
||||
SCSB_TRACE_ARGS, __entry->dentry, __entry->dir_ino, __get_str(name),
|
||||
__entry->dentry_ino, __entry->dent_ino, __entry->fsdata_gen,
|
||||
__entry->refresh_gen, __entry->ret)
|
||||
TP_printk(SCSBF" name %s ino %llu parent_ino %llu flags 0x%x s_root %u is_covered %u ret %d",
|
||||
SCSB_TRACE_ARGS, __get_str(name), __entry->ino,
|
||||
__entry->parent_ino, __entry->flags,
|
||||
__entry->is_root,
|
||||
__entry->is_covered,
|
||||
__entry->ret)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_super_lifecycle_class,
|
||||
|
||||
@@ -130,9 +130,9 @@ struct server_info {
|
||||
struct mutex srch_mutex;
|
||||
struct mutex mounted_clients_mutex;
|
||||
|
||||
/* stable super stored from commits, given in locks and rpcs */
|
||||
seqcount_t stable_seqcount;
|
||||
struct scoutfs_super_block stable_super;
|
||||
/* stable versions stored from commits, given in locks and rpcs */
|
||||
seqcount_t roots_seqcount;
|
||||
struct scoutfs_net_roots roots;
|
||||
|
||||
/* serializing and get and set volume options */
|
||||
seqcount_t volopt_seqcount;
|
||||
@@ -143,18 +143,11 @@ struct server_info {
|
||||
struct work_struct fence_pending_recov_work;
|
||||
/* while running we check for fenced mounts to reclaim */
|
||||
struct delayed_work reclaim_dwork;
|
||||
|
||||
/* a running server gets a static quorum config from quorum as it starts */
|
||||
struct scoutfs_quorum_config qconf;
|
||||
/* a running server maintains a private dirty super */
|
||||
struct scoutfs_super_block dirty_super;
|
||||
};
|
||||
|
||||
#define DECLARE_SERVER_INFO(sb, name) \
|
||||
struct server_info *name = SCOUTFS_SB(sb)->server_info
|
||||
|
||||
#define DIRTY_SUPER_SB(sb) (&SCOUTFS_SB(sb)->server_info->dirty_super)
|
||||
|
||||
/*
|
||||
* The server tracks each connected client.
|
||||
*/
|
||||
@@ -476,22 +469,16 @@ static void commit_end(struct super_block *sb, struct commit_users *cusers, int
|
||||
wake_up(&cusers->waitq);
|
||||
}
|
||||
|
||||
static void get_stable(struct super_block *sb, struct scoutfs_super_block *super,
|
||||
struct scoutfs_net_roots *roots)
|
||||
static void get_roots(struct super_block *sb,
|
||||
struct scoutfs_net_roots *roots)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
unsigned int seq;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&server->stable_seqcount);
|
||||
if (super)
|
||||
*super = server->stable_super;
|
||||
if (roots) {
|
||||
roots->fs_root = server->stable_super.fs_root;
|
||||
roots->logs_root = server->stable_super.logs_root;
|
||||
roots->srch_root = server->stable_super.srch_root;
|
||||
}
|
||||
} while (read_seqcount_retry(&server->stable_seqcount, seq));
|
||||
seq = read_seqcount_begin(&server->roots_seqcount);
|
||||
*roots = server->roots;
|
||||
} while (read_seqcount_retry(&server->roots_seqcount, seq));
|
||||
}
|
||||
|
||||
u64 scoutfs_server_seq(struct super_block *sb)
|
||||
@@ -523,12 +510,17 @@ void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq)
|
||||
}
|
||||
}
|
||||
|
||||
static void set_stable_super(struct server_info *server, struct scoutfs_super_block *super)
|
||||
static void set_roots(struct server_info *server,
|
||||
struct scoutfs_btree_root *fs_root,
|
||||
struct scoutfs_btree_root *logs_root,
|
||||
struct scoutfs_btree_root *srch_root)
|
||||
{
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&server->stable_seqcount);
|
||||
server->stable_super = *super;
|
||||
write_seqcount_end(&server->stable_seqcount);
|
||||
write_seqcount_begin(&server->roots_seqcount);
|
||||
server->roots.fs_root = *fs_root;
|
||||
server->roots.logs_root = *logs_root;
|
||||
server->roots.srch_root = *srch_root;
|
||||
write_seqcount_end(&server->roots_seqcount);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
@@ -553,7 +545,7 @@ static void scoutfs_server_commit_func(struct work_struct *work)
|
||||
struct server_info *server = container_of(work, struct server_info,
|
||||
commit_work);
|
||||
struct super_block *sb = server->sb;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct commit_users *cusers = &server->cusers;
|
||||
int ret;
|
||||
|
||||
@@ -611,7 +603,8 @@ static void scoutfs_server_commit_func(struct work_struct *work)
|
||||
goto out;
|
||||
}
|
||||
|
||||
set_stable_super(server, super);
|
||||
set_roots(server, &super->fs_root, &super->logs_root,
|
||||
&super->srch_root);
|
||||
|
||||
/* swizzle the active and idle server alloc/freed heads */
|
||||
server->other_ind ^= 1;
|
||||
@@ -648,7 +641,7 @@ static int server_alloc_inodes(struct super_block *sb,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_net_inode_alloc ial = { 0, };
|
||||
COMMIT_HOLD(hold);
|
||||
__le64 lecount;
|
||||
@@ -701,13 +694,13 @@ static int alloc_move_refill_zoned(struct super_block *sb, struct scoutfs_alloc_
|
||||
|
||||
static int alloc_move_empty(struct super_block *sb,
|
||||
struct scoutfs_alloc_root *dst,
|
||||
struct scoutfs_alloc_root *src, u64 meta_budget)
|
||||
struct scoutfs_alloc_root *src, u64 meta_reserved)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
return scoutfs_alloc_move(sb, &server->alloc, &server->wri,
|
||||
dst, src, le64_to_cpu(src->total_len), NULL, NULL, 0,
|
||||
meta_budget);
|
||||
meta_reserved);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -816,7 +809,7 @@ static void mod_bitmap_bits(__le64 *dst, u64 dst_zone_blocks,
|
||||
static int get_data_alloc_zone_bits(struct super_block *sb, u64 rid, __le64 *exclusive,
|
||||
__le64 *vacant, u64 zone_blocks)
|
||||
{
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_log_trees *lt;
|
||||
struct scoutfs_key key;
|
||||
@@ -1047,7 +1040,7 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
u64 rid, struct commit_hold *hold)
|
||||
{
|
||||
struct server_info *server = SCOUTFS_SB(sb)->server_info;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_log_merge_status stat;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
struct scoutfs_log_trees each_lt;
|
||||
@@ -1233,82 +1226,6 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The calling get_log_trees ran out of available blocks in its commit's
|
||||
* metadata allocator while moving extents from the log tree's
|
||||
* data_freed into the core data_avail. This finishes moving the
|
||||
* extents in as many additional commits as it takes. The logs mutex
|
||||
* is nested inside holding commits so we recheck the persistent item
|
||||
* each time we commit to make sure it's still what we think. The
|
||||
* caller is still going to send the item to the client so we update the
|
||||
* caller's each time we make progress. This is a best-effort attempt
|
||||
* to clean up and it's valid to leave extents in data_freed we don't
|
||||
* return errors to the caller. The client will continue the work later
|
||||
* in get_log_trees or as the rid is reclaimed.
|
||||
*/
|
||||
static void try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees *lt)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
const u64 rid = le64_to_cpu(lt->rid);
|
||||
const u64 nr = le64_to_cpu(lt->nr);
|
||||
struct scoutfs_log_trees drain;
|
||||
struct scoutfs_key key;
|
||||
COMMIT_HOLD(hold);
|
||||
int ret = 0;
|
||||
int err;
|
||||
|
||||
scoutfs_key_init_log_trees(&key, rid, nr);
|
||||
|
||||
while (lt->data_freed.total_len != 0) {
|
||||
server_hold_commit(sb, &hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
|
||||
ret = find_log_trees_item(sb, &super->logs_root, false, rid, U64_MAX, &drain);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
/* careful to only keep draining the caller's specific open trans */
|
||||
if (drain.nr != lt->nr || drain.get_trans_seq != lt->get_trans_seq ||
|
||||
drain.commit_trans_seq != lt->commit_trans_seq || drain.flags != lt->flags) {
|
||||
ret = -ENOENT;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = scoutfs_btree_dirty(sb, &server->alloc, &server->wri,
|
||||
&super->logs_root, &key);
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
/* moving can modify and return errors, always update caller and item */
|
||||
mutex_lock(&server->alloc_mutex);
|
||||
ret = alloc_move_empty(sb, &super->data_alloc, &drain.data_freed,
|
||||
COMMIT_HOLD_ALLOC_BUDGET / 2);
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
if (ret == -EINPROGRESS)
|
||||
ret = 0;
|
||||
|
||||
*lt = drain;
|
||||
err = scoutfs_btree_force(sb, &server->alloc, &server->wri,
|
||||
&super->logs_root, &key, &drain, sizeof(drain));
|
||||
BUG_ON(err < 0); /* dirtying must guarantee success */
|
||||
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
if (ret < 0) {
|
||||
ret = 0; /* don't try to abort, ignoring ret */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* try to cleanly abort and write any partial dirty btree blocks, but ignore result */
|
||||
if (ret < 0) {
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
server_apply_commit(sb, &hold, 0);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Give the client roots to all the trees that they'll use to build
|
||||
* their transaction.
|
||||
@@ -1336,7 +1253,7 @@ static int server_get_log_trees(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
__le64 exclusive[SCOUTFS_DATA_ALLOC_ZONE_LE64S];
|
||||
@@ -1434,9 +1351,7 @@ static int server_get_log_trees(struct super_block *sb,
|
||||
goto update;
|
||||
}
|
||||
|
||||
ret = alloc_move_empty(sb, &super->data_alloc, <.data_freed, 100);
|
||||
if (ret == -EINPROGRESS)
|
||||
ret = 0;
|
||||
ret = alloc_move_empty(sb, &super->data_alloc, <.data_freed, 0);
|
||||
if (ret < 0) {
|
||||
err_str = "emptying committed data_freed";
|
||||
goto update;
|
||||
@@ -1514,10 +1429,6 @@ out:
|
||||
scoutfs_err(sb, "error %d getting log trees for rid %016llx: %s",
|
||||
ret, rid, err_str);
|
||||
|
||||
/* try to drain excessive data_freed with additional commits, if needed, ignoring err */
|
||||
if (ret == 0)
|
||||
try_drain_data_freed(sb, <);
|
||||
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, <, sizeof(lt));
|
||||
}
|
||||
|
||||
@@ -1531,7 +1442,7 @@ static int server_commit_log_trees(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
const u64 rid = scoutfs_net_client_rid(conn);
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
@@ -1586,13 +1497,6 @@ static int server_commit_log_trees(struct super_block *sb,
|
||||
if (ret < 0 || committed)
|
||||
goto unlock;
|
||||
|
||||
/* make sure _update succeeds before we modify srch items */
|
||||
ret = scoutfs_btree_dirty(sb, &server->alloc, &server->wri, &super->logs_root, &key);
|
||||
if (ret < 0) {
|
||||
err_str = "dirtying lt item";
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* try to rotate the srch log when big enough */
|
||||
mutex_lock(&server->srch_mutex);
|
||||
ret = scoutfs_srch_rotate_log(sb, &server->alloc, &server->wri,
|
||||
@@ -1607,7 +1511,6 @@ static int server_commit_log_trees(struct super_block *sb,
|
||||
|
||||
ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
|
||||
&super->logs_root, &key, <, sizeof(lt));
|
||||
BUG_ON(ret < 0); /* dirtying should have guaranteed success */
|
||||
if (ret < 0)
|
||||
err_str = "updating log trees item";
|
||||
|
||||
@@ -1639,7 +1542,7 @@ static int server_get_roots(struct super_block *sb,
|
||||
memset(&roots, 0, sizeof(roots));
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
get_stable(sb, NULL, &roots);
|
||||
get_roots(sb, &roots);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
@@ -1669,7 +1572,7 @@ static int server_get_roots(struct super_block *sb,
|
||||
*/
|
||||
static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
|
||||
{
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_log_trees lt;
|
||||
@@ -1766,8 +1669,9 @@ out:
|
||||
*/
|
||||
static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret)
|
||||
{
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_log_trees *lt;
|
||||
struct scoutfs_key key;
|
||||
@@ -1923,8 +1827,9 @@ static int server_srch_get_compact(struct super_block *sb,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_srch_compact *sc = NULL;
|
||||
COMMIT_HOLD(hold);
|
||||
int ret;
|
||||
@@ -1989,7 +1894,8 @@ static int server_srch_commit_compact(struct super_block *sb,
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_srch_compact *sc;
|
||||
struct scoutfs_alloc_list_head av;
|
||||
struct scoutfs_alloc_list_head fr;
|
||||
@@ -2064,7 +1970,8 @@ static int splice_log_merge_completions(struct super_block *sb,
|
||||
bool no_ranges)
|
||||
{
|
||||
struct server_info *server = SCOUTFS_SB(sb)->server_info;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_log_merge_complete comp;
|
||||
struct scoutfs_log_merge_freeing fr;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
@@ -2381,7 +2288,7 @@ static void server_log_merge_free_work(struct work_struct *work)
|
||||
struct server_info *server = container_of(work, struct server_info,
|
||||
log_merge_free_work);
|
||||
struct super_block *sb = server->sb;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_log_merge_freeing fr;
|
||||
struct scoutfs_key key;
|
||||
COMMIT_HOLD(hold);
|
||||
@@ -2473,7 +2380,8 @@ static int server_get_log_merge(struct super_block *sb,
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_log_merge_status stat;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
struct scoutfs_log_merge_range remain;
|
||||
@@ -2756,7 +2664,8 @@ static int server_commit_log_merge(struct super_block *sb,
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_log_merge_request orig_req;
|
||||
struct scoutfs_log_merge_complete *comp;
|
||||
struct scoutfs_log_merge_status stat;
|
||||
@@ -2991,7 +2900,7 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_volume_options *volopt;
|
||||
COMMIT_HOLD(hold);
|
||||
u64 opt;
|
||||
@@ -3060,7 +2969,7 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_volume_options *volopt;
|
||||
COMMIT_HOLD(hold);
|
||||
__le64 *opt;
|
||||
@@ -3114,7 +3023,7 @@ static int server_resize_devices(struct super_block *sb, struct scoutfs_net_conn
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_net_resize_devices *nrd;
|
||||
COMMIT_HOLD(hold);
|
||||
u64 meta_tot;
|
||||
@@ -3221,19 +3130,16 @@ static int count_free_blocks(struct super_block *sb, void *arg, int owner,
|
||||
}
|
||||
|
||||
/*
|
||||
* We calculate the total inode count and free blocks from the last
|
||||
* stable super that was written. Other users also walk stable blocks
|
||||
* so by joining them we don't have to worry about ensuring that we've
|
||||
* locked all the dirty structures that the summations could reference.
|
||||
* We handle stale reads by retrying with the most recent stable super.
|
||||
* We calculate the total inode count and free blocks from the current in-memory dirty
|
||||
* versions of the super block and log_trees structs, so we have to lock them.
|
||||
*/
|
||||
static int server_statfs(struct super_block *sb, struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct scoutfs_super_block super;
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_net_statfs nst = {{0,}};
|
||||
struct statfs_free_blocks sfb = {0,};
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
u64 inode_count;
|
||||
int ret;
|
||||
|
||||
@@ -3242,24 +3148,24 @@ static int server_statfs(struct super_block *sb, struct scoutfs_net_connection *
|
||||
goto out;
|
||||
}
|
||||
|
||||
do {
|
||||
get_stable(sb, &super, NULL);
|
||||
mutex_lock(&server->alloc_mutex);
|
||||
ret = scoutfs_alloc_foreach_super(sb, super, count_free_blocks, &sfb);
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_alloc_foreach_super(sb, &super, count_free_blocks, &sfb) ?:
|
||||
scoutfs_forest_inode_count(sb, &super, &inode_count);
|
||||
if (ret < 0 && ret != -ESTALE)
|
||||
goto out;
|
||||
mutex_lock(&server->logs_mutex);
|
||||
ret = scoutfs_forest_inode_count(sb, super, &inode_count);
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &super.logs_root.ref,
|
||||
&super.srch_root.ref);
|
||||
} while (ret == -ESTALE);
|
||||
|
||||
BUILD_BUG_ON(sizeof(nst.uuid) != sizeof(super.uuid));
|
||||
memcpy(nst.uuid, super.uuid, sizeof(nst.uuid));
|
||||
BUILD_BUG_ON(sizeof(nst.uuid) != sizeof(super->uuid));
|
||||
memcpy(nst.uuid, super->uuid, sizeof(nst.uuid));
|
||||
nst.free_meta_blocks = cpu_to_le64(sfb.meta);
|
||||
nst.total_meta_blocks = super.total_meta_blocks;
|
||||
nst.total_meta_blocks = super->total_meta_blocks;
|
||||
nst.free_data_blocks = cpu_to_le64(sfb.data);
|
||||
nst.total_data_blocks = super.total_data_blocks;
|
||||
nst.total_data_blocks = super->total_data_blocks;
|
||||
nst.inode_count = cpu_to_le64(inode_count);
|
||||
|
||||
ret = 0;
|
||||
@@ -3290,7 +3196,7 @@ static int insert_mounted_client(struct super_block *sb, u64 rid, u64 gr_flags,
|
||||
struct sockaddr_in *sin)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_mounted_client_btree_val mcv;
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
@@ -3316,7 +3222,7 @@ static int lookup_mounted_client_addr(struct super_block *sb, u64 rid,
|
||||
union scoutfs_inet_addr *addr)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_mounted_client_btree_val *mcv;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
@@ -3350,7 +3256,7 @@ static int lookup_mounted_client_addr(struct super_block *sb, u64 rid,
|
||||
static int delete_mounted_client(struct super_block *sb, u64 rid)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
@@ -3374,7 +3280,7 @@ static int delete_mounted_client(struct super_block *sb, u64 rid)
|
||||
static int cancel_srch_compact(struct super_block *sb, u64 rid)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_alloc_list_head av;
|
||||
struct scoutfs_alloc_list_head fr;
|
||||
int ret;
|
||||
@@ -3426,7 +3332,7 @@ static int cancel_srch_compact(struct super_block *sb, u64 rid)
|
||||
static int cancel_log_merge(struct super_block *sb, u64 rid)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_log_merge_status stat;
|
||||
struct scoutfs_log_merge_request req;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
@@ -3550,7 +3456,7 @@ static int server_greeting(struct super_block *sb,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_net_greeting *gr = arg;
|
||||
struct scoutfs_net_greeting greet;
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
@@ -3566,9 +3472,10 @@ static int server_greeting(struct super_block *sb,
|
||||
goto send_err;
|
||||
}
|
||||
|
||||
if (gr->fsid != cpu_to_le64(sbi->fsid)) {
|
||||
if (gr->fsid != super->hdr.fsid) {
|
||||
scoutfs_warn(sb, "client rid %016llx greeting fsid 0x%llx did not match server fsid 0x%llx",
|
||||
le64_to_cpu(gr->rid), le64_to_cpu(gr->fsid), sbi->fsid);
|
||||
le64_to_cpu(gr->rid), le64_to_cpu(gr->fsid),
|
||||
le64_to_cpu(super->hdr.fsid));
|
||||
ret = -EINVAL;
|
||||
goto send_err;
|
||||
}
|
||||
@@ -3708,7 +3615,7 @@ static void farewell_worker(struct work_struct *work)
|
||||
struct server_info *server = container_of(work, struct server_info,
|
||||
farewell_work);
|
||||
struct super_block *sb = server->sb;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_mounted_client_btree_val *mcv;
|
||||
struct farewell_request *tmp;
|
||||
struct farewell_request *fw;
|
||||
@@ -4070,7 +3977,7 @@ static void recovery_timeout(struct super_block *sb)
|
||||
static int start_recovery(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
unsigned int nr = 0;
|
||||
@@ -4187,7 +4094,8 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
struct server_info *server = container_of(work, struct server_info,
|
||||
work);
|
||||
struct super_block *sb = server->sb;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_net_connection *conn = NULL;
|
||||
struct scoutfs_mount_options opts;
|
||||
DECLARE_WAIT_QUEUE_HEAD(waitq);
|
||||
@@ -4199,13 +4107,13 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
trace_scoutfs_server_work_enter(sb, 0, 0);
|
||||
|
||||
scoutfs_options_read(sb, &opts);
|
||||
scoutfs_quorum_slot_sin(&server->qconf, opts.quorum_slot_nr, &sin);
|
||||
scoutfs_quorum_slot_sin(super, opts.quorum_slot_nr, &sin);
|
||||
scoutfs_info(sb, "server starting at "SIN_FMT, SIN_ARG(&sin));
|
||||
|
||||
scoutfs_block_writer_init(sb, &server->wri);
|
||||
|
||||
/* first make sure no other servers are still running */
|
||||
ret = scoutfs_quorum_fence_leaders(sb, &server->qconf, server->term);
|
||||
ret = scoutfs_quorum_fence_leaders(sb, server->term);
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "server error %d attempting to fence previous leaders", ret);
|
||||
goto out;
|
||||
@@ -4241,7 +4149,8 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
write_seqcount_end(&server->volopt_seqcount);
|
||||
|
||||
atomic64_set(&server->seq_atomic, le64_to_cpu(super->seq));
|
||||
set_stable_super(server, super);
|
||||
set_roots(server, &super->fs_root, &super->logs_root,
|
||||
&super->srch_root);
|
||||
|
||||
/* prepare server alloc for this transaction, larger first */
|
||||
if (le64_to_cpu(super->server_meta_avail[0].total_nr) <
|
||||
@@ -4335,12 +4244,11 @@ out:
|
||||
/*
|
||||
* Start the server but don't wait for it to complete.
|
||||
*/
|
||||
void scoutfs_server_start(struct super_block *sb, struct scoutfs_quorum_config *qconf, u64 term)
|
||||
void scoutfs_server_start(struct super_block *sb, u64 term)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
if (cmpxchg(&server->status, SERVER_DOWN, SERVER_STARTING) == SERVER_DOWN) {
|
||||
server->qconf = *qconf;
|
||||
server->term = term;
|
||||
queue_work(server->wq, &server->work);
|
||||
}
|
||||
@@ -4392,7 +4300,7 @@ int scoutfs_server_setup(struct super_block *sb)
|
||||
INIT_WORK(&server->log_merge_free_work, server_log_merge_free_work);
|
||||
mutex_init(&server->srch_mutex);
|
||||
mutex_init(&server->mounted_clients_mutex);
|
||||
seqcount_init(&server->stable_seqcount);
|
||||
seqcount_init(&server->roots_seqcount);
|
||||
seqcount_init(&server->volopt_seqcount);
|
||||
mutex_init(&server->volopt_mutex);
|
||||
INIT_WORK(&server->fence_pending_recov_work, fence_pending_recov_worker);
|
||||
|
||||
@@ -75,7 +75,7 @@ u64 scoutfs_server_seq(struct super_block *sb);
|
||||
u64 scoutfs_server_next_seq(struct super_block *sb);
|
||||
void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq);
|
||||
|
||||
void scoutfs_server_start(struct super_block *sb, struct scoutfs_quorum_config *qconf, u64 term);
|
||||
void scoutfs_server_start(struct super_block *sb, u64 term);
|
||||
void scoutfs_server_stop(struct super_block *sb);
|
||||
void scoutfs_server_stop_wait(struct super_block *sb);
|
||||
bool scoutfs_server_is_running(struct super_block *sb);
|
||||
|
||||
@@ -861,6 +861,7 @@ int scoutfs_srch_search_xattrs(struct super_block *sb,
|
||||
struct scoutfs_srch_rb_root *sroot,
|
||||
u64 hash, u64 ino, u64 last_ino, bool *done)
|
||||
{
|
||||
struct scoutfs_net_roots prev_roots;
|
||||
struct scoutfs_net_roots roots;
|
||||
struct scoutfs_srch_entry start;
|
||||
struct scoutfs_srch_entry end;
|
||||
@@ -868,7 +869,6 @@ int scoutfs_srch_search_xattrs(struct super_block *sb,
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_srch_file sfl;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
struct scoutfs_key key;
|
||||
unsigned long limit = SRCH_LIMIT;
|
||||
int ret;
|
||||
@@ -877,6 +877,7 @@ int scoutfs_srch_search_xattrs(struct super_block *sb,
|
||||
|
||||
*done = false;
|
||||
srch_init_rb_root(sroot);
|
||||
memset(&prev_roots, 0, sizeof(prev_roots));
|
||||
|
||||
start.hash = cpu_to_le64(hash);
|
||||
start.ino = cpu_to_le64(ino);
|
||||
@@ -891,6 +892,7 @@ retry:
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret)
|
||||
goto out;
|
||||
memset(&roots.fs_root, 0, sizeof(roots.fs_root));
|
||||
|
||||
end = final;
|
||||
|
||||
@@ -966,10 +968,16 @@ retry:
|
||||
*done = sre_cmp(&end, &final) == 0;
|
||||
ret = 0;
|
||||
out:
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.srch_root.ref,
|
||||
&roots.logs_root.ref);
|
||||
if (ret == -ESTALE)
|
||||
goto retry;
|
||||
if (ret == -ESTALE) {
|
||||
if (memcmp(&prev_roots, &roots, sizeof(roots)) == 0) {
|
||||
scoutfs_inc_counter(sb, srch_search_stale_eio);
|
||||
ret = -EIO;
|
||||
} else {
|
||||
scoutfs_inc_counter(sb, srch_search_stale_retry);
|
||||
prev_roots = roots;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -995,14 +1003,6 @@ int scoutfs_srch_rotate_log(struct super_block *sb,
|
||||
le64_to_cpu(sfl->ref.blkno), 0);
|
||||
ret = scoutfs_btree_insert(sb, alloc, wri, root, &key,
|
||||
sfl, sizeof(*sfl));
|
||||
/*
|
||||
* While it's fine to replay moving the client's logging srch
|
||||
* file to the core btree item, server commits should keep it
|
||||
* from happening. So we'll warn if we see it happen. This can
|
||||
* be removed eventually.
|
||||
*/
|
||||
if (WARN_ON_ONCE(ret == -EEXIST))
|
||||
ret = 0;
|
||||
if (ret == 0) {
|
||||
memset(sfl, 0, sizeof(*sfl));
|
||||
scoutfs_inc_counter(sb, srch_rotate_log);
|
||||
|
||||
@@ -47,7 +47,6 @@
|
||||
#include "omap.h"
|
||||
#include "volopt.h"
|
||||
#include "fence.h"
|
||||
#include "xattr.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
static struct dentry *scoutfs_debugfs_root;
|
||||
@@ -461,8 +460,9 @@ static int scoutfs_read_supers(struct super_block *sb)
|
||||
goto out;
|
||||
}
|
||||
|
||||
sbi->fsid = le64_to_cpu(meta_super->hdr.fsid);
|
||||
|
||||
sbi->fmt_vers = le64_to_cpu(meta_super->fmt_vers);
|
||||
sbi->super = *meta_super;
|
||||
out:
|
||||
kfree(meta_super);
|
||||
kfree(data_super);
|
||||
@@ -482,10 +482,8 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
sb->s_magic = SCOUTFS_SUPER_MAGIC;
|
||||
sb->s_maxbytes = MAX_LFS_FILESIZE;
|
||||
sb->s_op = &scoutfs_super_ops;
|
||||
sb->s_d_op = &scoutfs_dentry_ops;
|
||||
sb->s_export_op = &scoutfs_export_ops;
|
||||
sb->s_xattr = scoutfs_xattr_handlers;
|
||||
sb->s_flags |= MS_I_VERSION | MS_POSIXACL;
|
||||
sb->s_flags |= MS_I_VERSION;
|
||||
|
||||
/* btree blocks use long lived bh->b_data refs */
|
||||
mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS);
|
||||
@@ -498,7 +496,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
|
||||
ret = assign_random_id(sbi);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
return ret;
|
||||
|
||||
spin_lock_init(&sbi->next_ino_lock);
|
||||
spin_lock_init(&sbi->data_wait_root.lock);
|
||||
@@ -507,7 +505,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
/* parse options early for use during setup */
|
||||
ret = scoutfs_options_early_setup(sb, data);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
return ret;
|
||||
scoutfs_options_read(sb, &opts);
|
||||
|
||||
ret = sb_set_blocksize(sb, SCOUTFS_BLOCK_SM_SIZE);
|
||||
@@ -630,6 +628,7 @@ MODULE_ALIAS_FS("scoutfs");
|
||||
static void teardown_module(void)
|
||||
{
|
||||
debugfs_remove(scoutfs_debugfs_root);
|
||||
scoutfs_dir_exit();
|
||||
scoutfs_inode_exit();
|
||||
scoutfs_sysfs_exit();
|
||||
}
|
||||
@@ -667,6 +666,7 @@ static int __init scoutfs_module_init(void)
|
||||
goto out;
|
||||
}
|
||||
ret = scoutfs_inode_init() ?:
|
||||
scoutfs_dir_init() ?:
|
||||
register_filesystem(&scoutfs_fs_type);
|
||||
out:
|
||||
if (ret)
|
||||
|
||||
@@ -35,10 +35,11 @@ struct scoutfs_sb_info {
|
||||
struct super_block *sb;
|
||||
|
||||
/* assigned once at the start of each mount, read-only */
|
||||
u64 fsid;
|
||||
u64 rid;
|
||||
u64 fmt_vers;
|
||||
|
||||
struct scoutfs_super_block super;
|
||||
|
||||
struct block_device *meta_bdev;
|
||||
|
||||
spinlock_t next_ino_lock;
|
||||
@@ -134,14 +135,14 @@ static inline bool scoutfs_unmounting(struct super_block *sb)
|
||||
(int)(le64_to_cpu(fsid) >> SCSB_SHIFT), \
|
||||
(int)(le64_to_cpu(rid) >> SCSB_SHIFT)
|
||||
#define SCSB_ARGS(sb) \
|
||||
(int)(SCOUTFS_SB(sb)->fsid >> SCSB_SHIFT), \
|
||||
(int)(le64_to_cpu(SCOUTFS_SB(sb)->super.hdr.fsid) >> SCSB_SHIFT), \
|
||||
(int)(SCOUTFS_SB(sb)->rid >> SCSB_SHIFT)
|
||||
#define SCSB_TRACE_FIELDS \
|
||||
__field(__u64, fsid) \
|
||||
__field(__u64, rid)
|
||||
#define SCSB_TRACE_ASSIGN(sb) \
|
||||
__entry->fsid = SCOUTFS_HAS_SBI(sb) ? \
|
||||
SCOUTFS_SB(sb)->fsid : 0; \
|
||||
le64_to_cpu(SCOUTFS_SB(sb)->super.hdr.fsid) : 0;\
|
||||
__entry->rid = SCOUTFS_HAS_SBI(sb) ? \
|
||||
SCOUTFS_SB(sb)->rid : 0;
|
||||
#define SCSB_TRACE_ARGS \
|
||||
|
||||
@@ -60,9 +60,10 @@ static ssize_t fsid_show(struct kobject *kobj, struct attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct super_block *sb = KOBJ_TO_SB(kobj, sb_id_kobj);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%016llx\n", sbi->fsid);
|
||||
return snprintf(buf, PAGE_SIZE, "%016llx\n",
|
||||
le64_to_cpu(super->hdr.fsid));
|
||||
}
|
||||
ATTR_FUNCS_RO(fsid);
|
||||
|
||||
@@ -267,7 +268,7 @@ int __init scoutfs_sysfs_init(void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
void scoutfs_sysfs_exit(void)
|
||||
void __exit scoutfs_sysfs_exit(void)
|
||||
{
|
||||
if (scoutfs_kset)
|
||||
kset_unregister(scoutfs_kset);
|
||||
|
||||
@@ -53,6 +53,6 @@ int scoutfs_setup_sysfs(struct super_block *sb);
|
||||
void scoutfs_destroy_sysfs(struct super_block *sb);
|
||||
|
||||
int __init scoutfs_sysfs_init(void);
|
||||
void scoutfs_sysfs_exit(void);
|
||||
void __exit scoutfs_sysfs_exit(void);
|
||||
|
||||
#endif
|
||||
|
||||
315
kmod/src/xattr.c
315
kmod/src/xattr.c
@@ -15,7 +15,6 @@
|
||||
#include <linux/dcache.h>
|
||||
#include <linux/xattr.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/posix_acl.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "inode.h"
|
||||
@@ -27,7 +26,6 @@
|
||||
#include "xattr.h"
|
||||
#include "lock.h"
|
||||
#include "hash.h"
|
||||
#include "acl.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
@@ -81,6 +79,16 @@ static void init_xattr_key(struct scoutfs_key *key, u64 ino, u32 name_hash,
|
||||
#define SCOUTFS_XATTR_PREFIX "scoutfs."
|
||||
#define SCOUTFS_XATTR_PREFIX_LEN (sizeof(SCOUTFS_XATTR_PREFIX) - 1)
|
||||
|
||||
static int unknown_prefix(const char *name)
|
||||
{
|
||||
return strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
|
||||
strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
|
||||
strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
|
||||
strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)&&
|
||||
strncmp(name, SCOUTFS_XATTR_PREFIX, SCOUTFS_XATTR_PREFIX_LEN);
|
||||
}
|
||||
|
||||
|
||||
#define HIDE_TAG "hide."
|
||||
#define SRCH_TAG "srch."
|
||||
#define TOTL_TAG "totl."
|
||||
@@ -447,17 +455,22 @@ out:
|
||||
* Copy the value for the given xattr name into the caller's buffer, if it
|
||||
* fits. Return the bytes copied or -ERANGE if it doesn't fit.
|
||||
*/
|
||||
int scoutfs_xattr_get_locked(struct inode *inode, const char *name, void *buffer, size_t size,
|
||||
struct scoutfs_lock *lck)
|
||||
ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer,
|
||||
size_t size)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
struct scoutfs_key key;
|
||||
unsigned int xat_bytes;
|
||||
size_t name_len;
|
||||
int ret;
|
||||
|
||||
if (unknown_prefix(name))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
name_len = strlen(name);
|
||||
if (name_len > SCOUTFS_XATTR_MAX_NAME_LEN)
|
||||
return -ENODATA;
|
||||
@@ -467,6 +480,10 @@ int scoutfs_xattr_get_locked(struct inode *inode, const char *name, void *buffer
|
||||
if (!xat)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lck);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
down_read(&si->xattr_rwsem);
|
||||
|
||||
ret = get_next_xattr(inode, &key, xat, xat_bytes, name, name_len, 0, 0, lck);
|
||||
@@ -492,27 +509,12 @@ int scoutfs_xattr_get_locked(struct inode *inode, const char *name, void *buffer
|
||||
ret = copy_xattr_value(sb, &key, xat, xat_bytes, buffer, size, lck);
|
||||
unlock:
|
||||
up_read(&si->xattr_rwsem);
|
||||
|
||||
scoutfs_unlock(sb, lck, SCOUTFS_LOCK_READ);
|
||||
out:
|
||||
kfree(xat);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int scoutfs_xattr_get(struct dentry *dentry, const char *name, void *buffer, size_t size)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_lock *lock = NULL;
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ, 0, inode, &lock);
|
||||
if (ret == 0) {
|
||||
ret = scoutfs_xattr_get_locked(inode, name, buffer, size, lock);
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_READ);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void scoutfs_xattr_init_totl_key(struct scoutfs_key *key, u64 *name)
|
||||
{
|
||||
scoutfs_key_set_zeros(key);
|
||||
@@ -617,32 +619,30 @@ int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len)
|
||||
* cause creation to fail if the xattr already exists (_CREATE) or
|
||||
* doesn't already exist (_REPLACE). xattrs can have a zero length
|
||||
* value.
|
||||
*
|
||||
* The caller has acquired cluster locks, holds a transaction, and has
|
||||
* dirtied the inode item so that they can update it after we modify it.
|
||||
* The caller has to know the tags to acquire cluster locks before
|
||||
* holding the transaction so they pass in the parsed tags, or all 0s for
|
||||
* non scoutfs. prefixes.
|
||||
*/
|
||||
int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_len,
|
||||
const void *value, size_t size, int flags,
|
||||
const struct scoutfs_xattr_prefix_tags *tgs,
|
||||
struct scoutfs_lock *lck, struct scoutfs_lock *totl_lock,
|
||||
struct list_head *ind_locks)
|
||||
static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct scoutfs_xattr_totl_val tval = {0,};
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
struct scoutfs_lock *totl_lock = NULL;
|
||||
size_t name_len = strlen(name);
|
||||
struct scoutfs_key totl_key;
|
||||
struct scoutfs_key key;
|
||||
bool undo_srch = false;
|
||||
bool undo_totl = false;
|
||||
LIST_HEAD(ind_locks);
|
||||
u8 found_parts;
|
||||
unsigned int xat_bytes_totl;
|
||||
unsigned int xat_bytes;
|
||||
unsigned int val_len;
|
||||
u64 ind_seq;
|
||||
u64 total;
|
||||
u64 hash = 0;
|
||||
u64 id = 0;
|
||||
@@ -651,9 +651,6 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
|
||||
trace_scoutfs_xattr_set(sb, name_len, value, size, flags);
|
||||
|
||||
if (WARN_ON_ONCE(tgs->totl && !totl_lock))
|
||||
return -EINVAL;
|
||||
|
||||
/* mirror the syscall's errors for large names and values */
|
||||
if (name_len > SCOUTFS_XATTR_MAX_NAME_LEN)
|
||||
return -ERANGE;
|
||||
@@ -664,10 +661,16 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
(flags & ~(XATTR_CREATE | XATTR_REPLACE)))
|
||||
return -EINVAL;
|
||||
|
||||
if ((tgs->hide | tgs->srch | tgs->totl) && !capable(CAP_SYS_ADMIN))
|
||||
if (unknown_prefix(name))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (scoutfs_xattr_parse_tags(name, name_len, &tgs) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
if ((tgs.hide | tgs.srch | tgs.totl) && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (tgs->totl && ((ret = parse_totl_key(&totl_key, name, name_len)) != 0))
|
||||
if (tgs.totl && ((ret = parse_totl_key(&totl_key, name, name_len)) != 0))
|
||||
return ret;
|
||||
|
||||
/* allocate enough to always read an existing xattr's totl */
|
||||
@@ -676,44 +679,51 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
/* but store partial first item that only includes the new xattr's value */
|
||||
xat_bytes = first_item_bytes(name_len, size);
|
||||
xat = kmalloc(xat_bytes_totl, GFP_NOFS);
|
||||
if (!xat)
|
||||
return -ENOMEM;
|
||||
if (!xat) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lck);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
down_write(&si->xattr_rwsem);
|
||||
|
||||
/* find an existing xattr to delete, including possible totl value */
|
||||
ret = get_next_xattr(inode, &key, xat, xat_bytes_totl, name, name_len, 0, 0, lck);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto out;
|
||||
goto unlock;
|
||||
|
||||
/* check existence constraint flags */
|
||||
if (ret == -ENOENT && (flags & XATTR_REPLACE)) {
|
||||
ret = -ENODATA;
|
||||
goto out;
|
||||
goto unlock;
|
||||
} else if (ret >= 0 && (flags & XATTR_CREATE)) {
|
||||
ret = -EEXIST;
|
||||
goto out;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* not an error to delete something that doesn't exist */
|
||||
if (ret == -ENOENT && !value) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* s64 count delta if we create or delete */
|
||||
if (tgs->totl)
|
||||
if (tgs.totl)
|
||||
tval.count = cpu_to_le64((u64)!!(value) - (u64)!!(ret != -ENOENT));
|
||||
|
||||
/* found fields in key will also be used */
|
||||
found_parts = ret >= 0 ? xattr_nr_parts(xat) : 0;
|
||||
|
||||
if (found_parts && tgs->totl) {
|
||||
if (found_parts && tgs.totl) {
|
||||
/* parse old totl value before we clobber xat buf */
|
||||
val_len = ret - offsetof(struct scoutfs_xattr, name[xat->name_len]);
|
||||
ret = parse_totl_u64(&xat->name[xat->name_len], val_len, &total);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
goto unlock;
|
||||
|
||||
le64_add_cpu(&tval.total, -total);
|
||||
}
|
||||
@@ -732,90 +742,15 @@ int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_
|
||||
min(size, SCOUTFS_XATTR_MAX_PART_SIZE -
|
||||
offsetof(struct scoutfs_xattr, name[name_len])));
|
||||
|
||||
if (tgs->totl) {
|
||||
if (tgs.totl) {
|
||||
ret = parse_totl_u64(value, size, &total);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
le64_add_cpu(&tval.total, total);
|
||||
}
|
||||
|
||||
if (tgs->srch && !(found_parts && value)) {
|
||||
if (found_parts)
|
||||
id = le64_to_cpu(key.skx_id);
|
||||
hash = scoutfs_hash64(name, name_len);
|
||||
ret = scoutfs_forest_srch_add(sb, hash, ino, id);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
undo_srch = true;
|
||||
}
|
||||
|
||||
if (tgs->totl) {
|
||||
ret = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
undo_totl = true;
|
||||
}
|
||||
|
||||
if (found_parts && value)
|
||||
ret = change_xattr_items(inode, id, xat, xat_bytes, value, size,
|
||||
xattr_nr_parts(xat), found_parts, lck);
|
||||
else if (found_parts)
|
||||
ret = delete_xattr_items(inode, le64_to_cpu(key.skx_name_hash),
|
||||
le64_to_cpu(key.skx_id), found_parts,
|
||||
lck);
|
||||
else
|
||||
ret = create_xattr_items(inode, id, xat, xat_bytes, value, size,
|
||||
xattr_nr_parts(xat), lck);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* XXX do these want i_mutex or anything? */
|
||||
inode_inc_iversion(inode);
|
||||
inode->i_ctime = CURRENT_TIME;
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
if (ret < 0 && undo_srch) {
|
||||
err = scoutfs_forest_srch_add(sb, hash, ino, id);
|
||||
BUG_ON(err);
|
||||
}
|
||||
if (ret < 0 && undo_totl) {
|
||||
/* _delta() on dirty items shouldn't fail */
|
||||
tval.total = cpu_to_le64(-le64_to_cpu(tval.total));
|
||||
tval.count = cpu_to_le64(-le64_to_cpu(tval.count));
|
||||
err = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
BUG_ON(err);
|
||||
}
|
||||
|
||||
up_write(&si->xattr_rwsem);
|
||||
kfree(xat);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int scoutfs_xattr_set(struct dentry *dentry, const char *name, const void *value,
|
||||
size_t size, int flags)
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
struct scoutfs_lock *totl_lock = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
size_t name_len = strlen(name);
|
||||
LIST_HEAD(ind_locks);
|
||||
u64 ind_seq;
|
||||
int ret;
|
||||
|
||||
if (scoutfs_xattr_parse_tags(name, name_len, &tgs) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_WRITE,
|
||||
SCOUTFS_LKF_REFRESH_INODE, inode, &lck);
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
if (tgs.totl) {
|
||||
ret = scoutfs_lock_xattr_totl(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, &totl_lock);
|
||||
if (ret)
|
||||
@@ -835,98 +770,80 @@ retry:
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
|
||||
ret = scoutfs_xattr_set_locked(dentry->d_inode, name, name_len, value, size, flags, &tgs,
|
||||
lck, totl_lock, &ind_locks);
|
||||
if (ret == 0)
|
||||
scoutfs_update_inode_item(inode, lck, &ind_locks);
|
||||
if (tgs.srch && !(found_parts && value)) {
|
||||
if (found_parts)
|
||||
id = le64_to_cpu(key.skx_id);
|
||||
hash = scoutfs_hash64(name, name_len);
|
||||
ret = scoutfs_forest_srch_add(sb, hash, ino, id);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
undo_srch = true;
|
||||
}
|
||||
|
||||
if (tgs.totl) {
|
||||
ret = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
undo_totl = true;
|
||||
}
|
||||
|
||||
if (found_parts && value)
|
||||
ret = change_xattr_items(inode, id, xat, xat_bytes, value, size,
|
||||
xattr_nr_parts(xat), found_parts, lck);
|
||||
else if (found_parts)
|
||||
ret = delete_xattr_items(inode, le64_to_cpu(key.skx_name_hash),
|
||||
le64_to_cpu(key.skx_id), found_parts,
|
||||
lck);
|
||||
else
|
||||
ret = create_xattr_items(inode, id, xat, xat_bytes, value, size,
|
||||
xattr_nr_parts(xat), lck);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
|
||||
/* XXX do these want i_mutex or anything? */
|
||||
inode_inc_iversion(inode);
|
||||
inode->i_ctime = CURRENT_TIME;
|
||||
scoutfs_update_inode_item(inode, lck, &ind_locks);
|
||||
ret = 0;
|
||||
|
||||
release:
|
||||
if (ret < 0 && undo_srch) {
|
||||
err = scoutfs_forest_srch_add(sb, hash, ino, id);
|
||||
BUG_ON(err);
|
||||
}
|
||||
if (ret < 0 && undo_totl) {
|
||||
/* _delta() on dirty items shouldn't fail */
|
||||
tval.total = cpu_to_le64(-le64_to_cpu(tval.total));
|
||||
tval.count = cpu_to_le64(-le64_to_cpu(tval.count));
|
||||
err = apply_totl_delta(sb, &totl_key, &tval, totl_lock);
|
||||
BUG_ON(err);
|
||||
}
|
||||
|
||||
scoutfs_release_trans(sb);
|
||||
scoutfs_inode_index_unlock(sb, &ind_locks);
|
||||
unlock:
|
||||
up_write(&si->xattr_rwsem);
|
||||
scoutfs_unlock(sb, lck, SCOUTFS_LOCK_WRITE);
|
||||
scoutfs_unlock(sb, totl_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
out:
|
||||
kfree(xat);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Future kernels have this amazing hack to rewind the name to get the
|
||||
* skipped prefix. We're back in the stone ages without the handler
|
||||
* arg, so we Just Know that this is possible. This will become a
|
||||
* compat hook to either call the kernel's xattr_full_name(handler), or
|
||||
* our hack to use the flags as the prefix length.
|
||||
*/
|
||||
static const char *full_name_hack(void *handler, const char *name, int len)
|
||||
int scoutfs_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags)
|
||||
{
|
||||
return name - len;
|
||||
}
|
||||
if (size == 0)
|
||||
value = ""; /* set empty value */
|
||||
|
||||
static int scoutfs_xattr_get_handler(struct dentry *dentry, const char *name,
|
||||
void *value, size_t size, int handler_flags)
|
||||
{
|
||||
name = full_name_hack(NULL, name, handler_flags);
|
||||
return scoutfs_xattr_get(dentry, name, value, size);
|
||||
}
|
||||
|
||||
static int scoutfs_xattr_set_handler(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags, int handler_flags)
|
||||
{
|
||||
name = full_name_hack(NULL, name, handler_flags);
|
||||
return scoutfs_xattr_set(dentry, name, value, size, flags);
|
||||
}
|
||||
|
||||
static const struct xattr_handler scoutfs_xattr_user_handler = {
|
||||
.prefix = XATTR_USER_PREFIX,
|
||||
.flags = XATTR_USER_PREFIX_LEN,
|
||||
.get = scoutfs_xattr_get_handler,
|
||||
.set = scoutfs_xattr_set_handler,
|
||||
};
|
||||
|
||||
static const struct xattr_handler scoutfs_xattr_scoutfs_handler = {
|
||||
.prefix = SCOUTFS_XATTR_PREFIX,
|
||||
.flags = SCOUTFS_XATTR_PREFIX_LEN,
|
||||
.get = scoutfs_xattr_get_handler,
|
||||
.set = scoutfs_xattr_set_handler,
|
||||
};
|
||||
|
||||
static const struct xattr_handler scoutfs_xattr_trusted_handler = {
|
||||
.prefix = XATTR_TRUSTED_PREFIX,
|
||||
.flags = XATTR_TRUSTED_PREFIX_LEN,
|
||||
.get = scoutfs_xattr_get_handler,
|
||||
.set = scoutfs_xattr_set_handler,
|
||||
};
|
||||
|
||||
static const struct xattr_handler scoutfs_xattr_security_handler = {
|
||||
.prefix = XATTR_SECURITY_PREFIX,
|
||||
.flags = XATTR_SECURITY_PREFIX_LEN,
|
||||
.get = scoutfs_xattr_get_handler,
|
||||
.set = scoutfs_xattr_set_handler,
|
||||
};
|
||||
|
||||
static const struct xattr_handler scoutfs_xattr_acl_access_handler = {
|
||||
.prefix = XATTR_NAME_POSIX_ACL_ACCESS,
|
||||
.flags = ACL_TYPE_ACCESS,
|
||||
.get = scoutfs_acl_get_xattr,
|
||||
.set = scoutfs_acl_set_xattr,
|
||||
};
|
||||
|
||||
static const struct xattr_handler scoutfs_xattr_acl_default_handler = {
|
||||
.prefix = XATTR_NAME_POSIX_ACL_DEFAULT,
|
||||
.flags = ACL_TYPE_DEFAULT,
|
||||
.get = scoutfs_acl_get_xattr,
|
||||
.set = scoutfs_acl_set_xattr,
|
||||
};
|
||||
|
||||
const struct xattr_handler *scoutfs_xattr_handlers[] = {
|
||||
&scoutfs_xattr_user_handler,
|
||||
&scoutfs_xattr_scoutfs_handler,
|
||||
&scoutfs_xattr_trusted_handler,
|
||||
&scoutfs_xattr_security_handler,
|
||||
&scoutfs_xattr_acl_access_handler,
|
||||
&scoutfs_xattr_acl_default_handler,
|
||||
NULL
|
||||
};
|
||||
int scoutfs_removexattr(struct dentry *dentry, const char *name)
|
||||
{
|
||||
return scoutfs_xattr_set(dentry, name, NULL, 0, XATTR_REPLACE);
|
||||
}
|
||||
|
||||
ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer,
|
||||
size_t size, __u32 *hash_pos, __u64 *id_pos,
|
||||
|
||||
@@ -1,29 +1,25 @@
|
||||
#ifndef _SCOUTFS_XATTR_H_
|
||||
#define _SCOUTFS_XATTR_H_
|
||||
|
||||
ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer,
|
||||
size_t size);
|
||||
int scoutfs_setxattr(struct dentry *dentry, const char *name,
|
||||
const void *value, size_t size, int flags);
|
||||
int scoutfs_removexattr(struct dentry *dentry, const char *name);
|
||||
ssize_t scoutfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
|
||||
ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer,
|
||||
size_t size, __u32 *hash_pos, __u64 *id_pos,
|
||||
bool e_range, bool show_hidden);
|
||||
|
||||
int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
struct scoutfs_xattr_prefix_tags {
|
||||
unsigned long hide:1,
|
||||
srch:1,
|
||||
totl:1;
|
||||
};
|
||||
|
||||
extern const struct xattr_handler *scoutfs_xattr_handlers[];
|
||||
|
||||
int scoutfs_xattr_get_locked(struct inode *inode, const char *name, void *buffer, size_t size,
|
||||
struct scoutfs_lock *lck);
|
||||
int scoutfs_xattr_set_locked(struct inode *inode, const char *name, size_t name_len,
|
||||
const void *value, size_t size, int flags,
|
||||
const struct scoutfs_xattr_prefix_tags *tgs,
|
||||
struct scoutfs_lock *lck, struct scoutfs_lock *totl_lock,
|
||||
struct list_head *ind_locks);
|
||||
|
||||
ssize_t scoutfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
|
||||
ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer,
|
||||
size_t size, __u32 *hash_pos, __u64 *id_pos,
|
||||
bool e_range, bool show_hidden);
|
||||
int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
struct scoutfs_xattr_prefix_tags *tgs);
|
||||
|
||||
|
||||
1
tests/.gitignore
vendored
1
tests/.gitignore
vendored
@@ -8,4 +8,3 @@ src/bulk_create_paths
|
||||
src/find_xattrs
|
||||
src/stage_tmpfile
|
||||
src/create_xattr_loop
|
||||
src/o_tmpfile_umask
|
||||
|
||||
@@ -10,9 +10,7 @@ BIN := src/createmany \
|
||||
src/bulk_create_paths \
|
||||
src/stage_tmpfile \
|
||||
src/find_xattrs \
|
||||
src/create_xattr_loop \
|
||||
src/fragmented_data_extents \
|
||||
src/o_tmpfile_umask
|
||||
src/create_xattr_loop
|
||||
|
||||
DEPS := $(wildcard src/*.d)
|
||||
|
||||
|
||||
@@ -377,14 +377,6 @@ t_wait_for_leader() {
|
||||
done
|
||||
}
|
||||
|
||||
t_get_sysfs_mount_option() {
|
||||
local nr="$1"
|
||||
local name="$2"
|
||||
local opt="$(t_sysfs_path $nr)/mount_options/$name"
|
||||
|
||||
cat "$opt"
|
||||
}
|
||||
|
||||
t_set_sysfs_mount_option() {
|
||||
local nr="$1"
|
||||
local name="$2"
|
||||
@@ -413,7 +405,7 @@ t_save_all_sysfs_mount_options() {
|
||||
|
||||
for i in $(t_fs_nrs); do
|
||||
opt="$(t_sysfs_path $i)/mount_options/$name"
|
||||
ind="${name}_${i}"
|
||||
ind="$name_$i"
|
||||
|
||||
_saved_opts[$ind]="$(cat $opt)"
|
||||
done
|
||||
@@ -425,7 +417,7 @@ t_restore_all_sysfs_mount_options() {
|
||||
local i
|
||||
|
||||
for i in $(t_fs_nrs); do
|
||||
ind="${name}_${i}"
|
||||
ind="$name_$i"
|
||||
|
||||
t_set_sysfs_mount_option $i $name "${_saved_opts[$ind]}"
|
||||
done
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
== truncate writes zeroed partial end of file block
|
||||
0000000 0a79 0a79 0a79 0a79 0a79 0a79 0a79 0a79
|
||||
*
|
||||
0006144 0000 0000 0000 0000 0000 0000 0000 0000
|
||||
*
|
||||
0012288
|
||||
@@ -1,26 +0,0 @@
|
||||
== initial writes smaller than prealloc grow to prealloc size
|
||||
/mnt/test/test/data-prealloc/file-1: 7 extents found
|
||||
/mnt/test/test/data-prealloc/file-2: 7 extents found
|
||||
== larger files get full prealloc extents
|
||||
/mnt/test/test/data-prealloc/file-1: 9 extents found
|
||||
/mnt/test/test/data-prealloc/file-2: 9 extents found
|
||||
== non-streaming writes with contig have per-block extents
|
||||
/mnt/test/test/data-prealloc/file-1: 32 extents found
|
||||
/mnt/test/test/data-prealloc/file-2: 32 extents found
|
||||
== any writes to region prealloc get full extents
|
||||
/mnt/test/test/data-prealloc/file-1: 4 extents found
|
||||
/mnt/test/test/data-prealloc/file-2: 4 extents found
|
||||
/mnt/test/test/data-prealloc/file-1: 4 extents found
|
||||
/mnt/test/test/data-prealloc/file-2: 4 extents found
|
||||
== streaming offline writes get full extents either way
|
||||
/mnt/test/test/data-prealloc/file-1: 4 extents found
|
||||
/mnt/test/test/data-prealloc/file-2: 4 extents found
|
||||
/mnt/test/test/data-prealloc/file-1: 4 extents found
|
||||
/mnt/test/test/data-prealloc/file-2: 4 extents found
|
||||
== goofy preallocation amounts work
|
||||
/mnt/test/test/data-prealloc/file-1: 5 extents found
|
||||
/mnt/test/test/data-prealloc/file-2: 5 extents found
|
||||
/mnt/test/test/data-prealloc/file-1: 5 extents found
|
||||
/mnt/test/test/data-prealloc/file-2: 5 extents found
|
||||
/mnt/test/test/data-prealloc/file-1: 3 extents found
|
||||
/mnt/test/test/data-prealloc/file-2: 3 extents found
|
||||
@@ -1,3 +0,0 @@
|
||||
== creating fragmented extents
|
||||
== unlink file with moved extents to free extents per block
|
||||
== cleanup
|
||||
@@ -7,4 +7,3 @@ found second
|
||||
== changing metadata must increase meta seq
|
||||
== changing contents must increase data seq
|
||||
== make sure dirtying doesn't livelock walk
|
||||
== concurrent update attempts maintain single entries
|
||||
|
||||
@@ -1,11 +1,3 @@
|
||||
== non-acl O_TMPFILE creation honors umask
|
||||
umask 022
|
||||
fstat after open(0777): 0100755
|
||||
stat after linkat: 0100755
|
||||
umask 077
|
||||
fstat after open(0777): 0100700
|
||||
stat after linkat: 0100700
|
||||
== stage from tmpfile
|
||||
total file size 33669120
|
||||
00000000 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |AAAAAAAAAAAAAAAA|
|
||||
*
|
||||
@@ -40,7 +40,6 @@ generic/092
|
||||
generic/098
|
||||
generic/101
|
||||
generic/104
|
||||
generic/105
|
||||
generic/106
|
||||
generic/107
|
||||
generic/117
|
||||
@@ -52,7 +51,6 @@ generic/184
|
||||
generic/221
|
||||
generic/228
|
||||
generic/236
|
||||
generic/237
|
||||
generic/245
|
||||
generic/249
|
||||
generic/257
|
||||
@@ -65,7 +63,6 @@ generic/308
|
||||
generic/309
|
||||
generic/313
|
||||
generic/315
|
||||
generic/319
|
||||
generic/322
|
||||
generic/335
|
||||
generic/336
|
||||
@@ -75,7 +72,6 @@ generic/342
|
||||
generic/343
|
||||
generic/348
|
||||
generic/360
|
||||
generic/375
|
||||
generic/376
|
||||
generic/377
|
||||
Not
|
||||
@@ -286,4 +282,4 @@ shared/004
|
||||
shared/032
|
||||
shared/051
|
||||
shared/289
|
||||
Passed all 79 tests
|
||||
Passed all 75 tests
|
||||
|
||||
@@ -58,7 +58,6 @@ $(basename $0) options:
|
||||
-m | Run mkfs on the device before mounting and running
|
||||
| tests. Implies unmounting existing mounts first.
|
||||
-n <nr> | The number of devices and mounts to test.
|
||||
-o <opts> | Add option string to all mounts during all tests.
|
||||
-P | Enable trace_printk.
|
||||
-p | Exit script after preparing mounts only, don't run tests.
|
||||
-q <nr> | The first <nr> mounts will be quorum members. Must be
|
||||
@@ -69,7 +68,6 @@ $(basename $0) options:
|
||||
-s | Skip git repo checkouts.
|
||||
-t | Enabled trace events that match the given glob argument.
|
||||
| Multiple options enable multiple globbed events.
|
||||
-T <nr> | Multiply the original trace buffer size by nr during the run.
|
||||
-X | xfstests git repo. Used by tests/xfstests.sh.
|
||||
-x | xfstests git branch to checkout and track.
|
||||
-y | xfstests ./check additional args
|
||||
@@ -138,12 +136,6 @@ while true; do
|
||||
T_NR_MOUNTS="$2"
|
||||
shift
|
||||
;;
|
||||
-o)
|
||||
test -n "$2" || die "-o must have option string argument"
|
||||
# always appending to existing options
|
||||
T_MNT_OPTIONS+=",$2"
|
||||
shift
|
||||
;;
|
||||
-P)
|
||||
T_TRACE_PRINTK="1"
|
||||
;;
|
||||
@@ -168,11 +160,6 @@ while true; do
|
||||
T_TRACE_GLOB+=("$2")
|
||||
shift
|
||||
;;
|
||||
-T)
|
||||
test -n "$2" || die "-T must have trace buffer size multiplier argument"
|
||||
T_TRACE_MULT="$2"
|
||||
shift
|
||||
;;
|
||||
-X)
|
||||
test -n "$2" || die "-X requires xfstests git repo dir argument"
|
||||
T_XFSTESTS_REPO="$2"
|
||||
@@ -358,13 +345,6 @@ if [ -n "$T_INSMOD" ]; then
|
||||
cmd insmod "$T_KMOD/src/scoutfs.ko"
|
||||
fi
|
||||
|
||||
if [ -n "$T_TRACE_MULT" ]; then
|
||||
orig_trace_size=$(cat /sys/kernel/debug/tracing/buffer_size_kb)
|
||||
mult_trace_size=$((orig_trace_size * T_TRACE_MULT))
|
||||
msg "increasing trace buffer size from $orig_trace_size KiB to $mult_trace_size KiB"
|
||||
echo $mult_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
|
||||
fi
|
||||
|
||||
nr_globs=${#T_TRACE_GLOB[@]}
|
||||
if [ $nr_globs -gt 0 ]; then
|
||||
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
|
||||
@@ -394,7 +374,6 @@ fi
|
||||
# always describe tracing in the logs
|
||||
cmd cat /sys/kernel/debug/tracing/set_event
|
||||
cmd grep . /sys/kernel/debug/tracing/options/trace_printk \
|
||||
/sys/kernel/debug/tracing/buffer_size_kb \
|
||||
/proc/sys/kernel/ftrace_dump_on_oops
|
||||
|
||||
#
|
||||
@@ -451,7 +430,6 @@ for i in $(seq 0 $((T_NR_MOUNTS - 1))); do
|
||||
if [ "$i" -lt "$T_QUORUM" ]; then
|
||||
opts="$opts,quorum_slot_nr=$i"
|
||||
fi
|
||||
opts="${opts}${T_MNT_OPTIONS}"
|
||||
|
||||
msg "mounting $meta_dev|$data_dev on $dir"
|
||||
cmd mount -t scoutfs $opts "$data_dev" "$dir" &
|
||||
@@ -626,9 +604,6 @@ if [ -n "$T_TRACE_GLOB" -o -n "$T_TRACE_PRINTK" ]; then
|
||||
echo 0 > /sys/kernel/debug/tracing/events/scoutfs/enable
|
||||
echo 0 > /sys/kernel/debug/tracing/options/trace_printk
|
||||
cat /sys/kernel/debug/tracing/trace > "$T_RESULTS/traces"
|
||||
if [ -n "$orig_trace_size" ]; then
|
||||
echo $orig_trace_size > /sys/kernel/debug/tracing/buffer_size_kb
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$skipped" == 0 -a "$failed" == 0 ]; then
|
||||
|
||||
@@ -6,12 +6,9 @@ simple-inode-index.sh
|
||||
simple-staging.sh
|
||||
simple-release-extents.sh
|
||||
fallocate.sh
|
||||
basic-truncate.sh
|
||||
data-prealloc.sh
|
||||
setattr_more.sh
|
||||
offline-extent-waiting.sh
|
||||
move-blocks.sh
|
||||
large-fragmented-free.sh
|
||||
enospc.sh
|
||||
srch-basic-functionality.sh
|
||||
simple-xattr-unit.sh
|
||||
@@ -27,7 +24,7 @@ createmany-large-names.sh
|
||||
createmany-rename-large-dir.sh
|
||||
stage-release-race-alloc.sh
|
||||
stage-multi-part.sh
|
||||
o_tmpfile.sh
|
||||
stage-tmpfile.sh
|
||||
basic-posix-consistency.sh
|
||||
dirent-consistency.sh
|
||||
mkdir-rename-rmdir.sh
|
||||
|
||||
@@ -1,113 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2021 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
/*
|
||||
* This creates fragmented data extents.
|
||||
*
|
||||
* A file is created that has alternating free and allocated extents.
|
||||
* This also results in the global allocator having the matching
|
||||
* fragmented free extent pattern. While that file is being created,
|
||||
* occasionally an allocated extent is moved to another file. This
|
||||
* results in a file that has fragmented extents at a given stride that
|
||||
* can be deleted to create free data extents with a given stride.
|
||||
*
|
||||
* We don't have hole punching so to do this quickly we use a goofy
|
||||
* combination of fallocate, truncate, and our move_blocks ioctl.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <linux/types.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include "ioctl.h"
|
||||
|
||||
#define BLOCK_SIZE 4096
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
struct scoutfs_ioctl_move_blocks mb = {0,};
|
||||
unsigned long long freed_extents;
|
||||
unsigned long long move_stride;
|
||||
unsigned long long i;
|
||||
int alloc_fd;
|
||||
int trunc_fd;
|
||||
off_t off;
|
||||
int ret;
|
||||
|
||||
if (argc != 5) {
|
||||
printf("%s <freed_extents> <move_stride> <alloc_file> <trunc_file>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
freed_extents = strtoull(argv[1], NULL, 0);
|
||||
move_stride = strtoull(argv[2], NULL, 0);
|
||||
|
||||
alloc_fd = open(argv[3], O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
|
||||
if (alloc_fd == -1) {
|
||||
fprintf(stderr, "error opening %s: %d (%s)\n", argv[3], errno, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
trunc_fd = open(argv[4], O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR);
|
||||
if (trunc_fd == -1) {
|
||||
fprintf(stderr, "error opening %s: %d (%s)\n", argv[4], errno, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (i = 0, off = 0; i < freed_extents; i++, off += BLOCK_SIZE * 2) {
|
||||
|
||||
ret = fallocate(alloc_fd, 0, off, BLOCK_SIZE * 2);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "fallocate at off %llu error: %d (%s)\n",
|
||||
(unsigned long long)off, errno, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ret = ftruncate(alloc_fd, off + BLOCK_SIZE);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "truncate to off %llu error: %d (%s)\n",
|
||||
(unsigned long long)off + BLOCK_SIZE, errno, strerror(errno));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((i % move_stride) == 0) {
|
||||
mb.from_fd = alloc_fd;
|
||||
mb.from_off = off;
|
||||
mb.len = BLOCK_SIZE;
|
||||
mb.to_off = i * BLOCK_SIZE;
|
||||
|
||||
ret = ioctl(trunc_fd, SCOUTFS_IOC_MOVE_BLOCKS, &mb);
|
||||
if (ret < 0) {
|
||||
fprintf(stderr, "move from off %llu error: %d (%s)\n",
|
||||
(unsigned long long)off,
|
||||
errno, strerror(errno));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (alloc_fd > -1)
|
||||
close(alloc_fd);
|
||||
if (trunc_fd > -1)
|
||||
close(trunc_fd);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,97 +0,0 @@
|
||||
/*
|
||||
* Show the modes of files as we create them with O_TMPFILE and link
|
||||
* them into the namespace.
|
||||
*
|
||||
* Copyright (C) 2022 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
|
||||
static void linkat_tmpfile_modes(char *dir, char *lpath, mode_t mode)
|
||||
{
|
||||
char proc_self[PATH_MAX];
|
||||
struct stat st;
|
||||
int ret;
|
||||
int fd;
|
||||
|
||||
umask(mode);
|
||||
printf("umask 0%o\n", mode);
|
||||
|
||||
fd = open(dir, O_RDWR | O_TMPFILE, 0777);
|
||||
if (fd < 0) {
|
||||
perror("open(O_TMPFILE)");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ret = fstat(fd, &st);
|
||||
if (ret < 0) {
|
||||
perror("fstat");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("fstat after open(0777): 0%o\n", st.st_mode);
|
||||
|
||||
snprintf(proc_self, sizeof(proc_self), "/proc/self/fd/%d", fd);
|
||||
|
||||
ret = linkat(AT_FDCWD, proc_self, AT_FDCWD, lpath, AT_SYMLINK_FOLLOW);
|
||||
if (ret < 0) {
|
||||
perror("linkat");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
ret = stat(lpath, &st);
|
||||
if (ret < 0) {
|
||||
perror("fstat");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("stat after linkat: 0%o\n", st.st_mode);
|
||||
|
||||
ret = unlink(lpath);
|
||||
if (ret < 0) {
|
||||
perror("unlink");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char *lpath;
|
||||
char *dir;
|
||||
|
||||
if (argc < 3) {
|
||||
printf("%s <open_dir> <linkat_path>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
dir = argv[1];
|
||||
lpath = argv[2];
|
||||
|
||||
linkat_tmpfile_modes(dir, lpath, 022);
|
||||
linkat_tmpfile_modes(dir, lpath, 077);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
#
|
||||
# Test basic correctness of truncate.
|
||||
#
|
||||
|
||||
t_require_commands yes dd od truncate
|
||||
|
||||
FILE="$T_D0/file"
|
||||
|
||||
#
|
||||
# We forgot to write a dirty block that zeroed the tail of a partial
|
||||
# final block as we truncated past it.
|
||||
#
|
||||
echo "== truncate writes zeroed partial end of file block"
|
||||
yes | dd of="$FILE" bs=8K count=1 status=none
|
||||
sync
|
||||
truncate -s 6K "$FILE"
|
||||
truncate -s 12K "$FILE"
|
||||
echo 3 > /proc/sys/vm/drop_caches
|
||||
od -Ad -x "$FILE"
|
||||
|
||||
t_pass
|
||||
@@ -1,136 +0,0 @@
|
||||
#
|
||||
# test that the data prealloc options behave as expected. We write to
|
||||
# two files a block at a time so that a single file doesn't naturally
|
||||
# merge adjacent consecutive allocations. (we don't have multiple
|
||||
# allocation cursors)
|
||||
#
|
||||
t_require_commands scoutfs stat filefrag dd touch truncate
|
||||
|
||||
write_forwards()
|
||||
{
|
||||
local prefix="$1"
|
||||
local nr="$2"
|
||||
local blk
|
||||
|
||||
touch "$prefix"-{1,2}
|
||||
truncate -s 0 "$prefix"-{1,2}
|
||||
|
||||
for blk in $(seq 0 1 $((nr - 1))); do
|
||||
dd if=/dev/zero of="$prefix"-1 bs=4096 seek=$blk count=1 conv=notrunc status=none
|
||||
dd if=/dev/zero of="$prefix"-2 bs=4096 seek=$blk count=1 conv=notrunc status=none
|
||||
done
|
||||
}
|
||||
|
||||
write_backwards()
|
||||
{
|
||||
local prefix="$1"
|
||||
local nr="$2"
|
||||
local blk
|
||||
|
||||
touch "$prefix"-{1,2}
|
||||
truncate -s 0 "$prefix"-{1,2}
|
||||
|
||||
for blk in $(seq $((nr - 1)) -1 0); do
|
||||
dd if=/dev/zero of="$prefix"-1 bs=4096 seek=$blk count=1 conv=notrunc status=none
|
||||
dd if=/dev/zero of="$prefix"-2 bs=4096 seek=$blk count=1 conv=notrunc status=none
|
||||
done
|
||||
}
|
||||
|
||||
release_files() {
|
||||
local prefix="$1"
|
||||
local size=$(($2 * 4096))
|
||||
local vers
|
||||
local f
|
||||
|
||||
for f in "$prefix"*; do
|
||||
size=$(stat -c "%s" "$f")
|
||||
vers=$(scoutfs stat -s data_version "$f")
|
||||
scoutfs release "$f" -V "$vers" -o 0 -l $size
|
||||
done
|
||||
}
|
||||
|
||||
stage_files() {
|
||||
local prefix="$1"
|
||||
local nr="$2"
|
||||
local vers
|
||||
local f
|
||||
|
||||
for blk in $(seq 0 1 $((nr - 1))); do
|
||||
for f in "$prefix"*; do
|
||||
vers=$(scoutfs stat -s data_version "$f")
|
||||
scoutfs stage /dev/zero "$f" -V "$vers" -o $((blk * 4096)) -l 4096
|
||||
done
|
||||
done
|
||||
}
|
||||
|
||||
print_extents_found()
|
||||
{
|
||||
local prefix="$1"
|
||||
|
||||
filefrag "$prefix"* 2>&1 | grep "extent.*found" | t_filter_fs
|
||||
}
|
||||
|
||||
t_save_all_sysfs_mount_options data_prealloc_blocks
|
||||
t_save_all_sysfs_mount_options data_prealloc_contig_only
|
||||
restore_options()
|
||||
{
|
||||
t_restore_all_sysfs_mount_options data_prealloc_blocks
|
||||
t_restore_all_sysfs_mount_options data_prealloc_contig_only
|
||||
}
|
||||
trap restore_options EXIT
|
||||
|
||||
prefix="$T_D0/file"
|
||||
|
||||
echo "== initial writes smaller than prealloc grow to prealloc size"
|
||||
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
|
||||
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
|
||||
write_forwards $prefix 64
|
||||
print_extents_found $prefix
|
||||
|
||||
echo "== larger files get full prealloc extents"
|
||||
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
|
||||
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
|
||||
write_forwards $prefix 128
|
||||
print_extents_found $prefix
|
||||
|
||||
echo "== non-streaming writes with contig have per-block extents"
|
||||
t_set_sysfs_mount_option 0 data_prealloc_blocks 32
|
||||
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
|
||||
write_backwards $prefix 32
|
||||
print_extents_found $prefix
|
||||
|
||||
echo "== any writes to region prealloc get full extents"
|
||||
t_set_sysfs_mount_option 0 data_prealloc_blocks 16
|
||||
t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
|
||||
write_forwards $prefix 64
|
||||
print_extents_found $prefix
|
||||
write_backwards $prefix 64
|
||||
print_extents_found $prefix
|
||||
|
||||
echo "== streaming offline writes get full extents either way"
|
||||
t_set_sysfs_mount_option 0 data_prealloc_blocks 16
|
||||
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
|
||||
write_forwards $prefix 64
|
||||
release_files $prefix 64
|
||||
stage_files $prefix 64
|
||||
print_extents_found $prefix
|
||||
t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
|
||||
release_files $prefix 64
|
||||
stage_files $prefix 64
|
||||
print_extents_found $prefix
|
||||
|
||||
echo "== goofy preallocation amounts work"
|
||||
t_set_sysfs_mount_option 0 data_prealloc_blocks 7
|
||||
t_set_sysfs_mount_option 0 data_prealloc_contig_only 1
|
||||
write_forwards $prefix 14
|
||||
print_extents_found $prefix
|
||||
t_set_sysfs_mount_option 0 data_prealloc_blocks 13
|
||||
t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
|
||||
write_forwards $prefix 53
|
||||
print_extents_found $prefix
|
||||
t_set_sysfs_mount_option 0 data_prealloc_blocks 1
|
||||
t_set_sysfs_mount_option 0 data_prealloc_contig_only 0
|
||||
write_forwards $prefix 3
|
||||
print_extents_found $prefix
|
||||
|
||||
t_pass
|
||||
@@ -1,22 +0,0 @@
|
||||
#
|
||||
# Make sure the server can handle a transaction with a data_freed whose
|
||||
# blocks all hit different btree blocks in the main free list. It
|
||||
# probably has to be merged in multiple commits.
|
||||
#
|
||||
|
||||
t_require_commands fragmented_data_extents
|
||||
|
||||
EXTENTS_PER_BTREE_BLOCK=600
|
||||
EXTENTS_PER_LIST_BLOCK=8192
|
||||
FREED_EXTENTS=$((EXTENTS_PER_BTREE_BLOCK * EXTENTS_PER_LIST_BLOCK))
|
||||
|
||||
echo "== creating fragmented extents"
|
||||
fragmented_data_extents $FREED_EXTENTS $EXTENTS_PER_BTREE_BLOCK "$T_D0/alloc" "$T_D0/move"
|
||||
|
||||
echo "== unlink file with moved extents to free extents per block"
|
||||
rm -f "$T_D0/move"
|
||||
|
||||
echo "== cleanup"
|
||||
rm -f "$T_D0/alloc"
|
||||
|
||||
t_pass
|
||||
@@ -1,16 +0,0 @@
|
||||
#
|
||||
# basic tests of O_TMPFILE
|
||||
#
|
||||
|
||||
t_require_commands stage_tmpfile hexdump
|
||||
|
||||
echo "== non-acl O_TMPFILE creation honors umask"
|
||||
o_tmpfile_umask "$T_D0" "$T_D0/umask-file"
|
||||
|
||||
echo "== stage from tmpfile"
|
||||
DEST_FILE="$T_D0/dest_file"
|
||||
stage_tmpfile $T_D0 $DEST_FILE
|
||||
hexdump -C "$DEST_FILE"
|
||||
rm -f "$DEST_FILE"
|
||||
|
||||
t_pass
|
||||
@@ -103,34 +103,4 @@ while [ "$nr" -lt 100 ]; do
|
||||
((nr++))
|
||||
done
|
||||
|
||||
#
|
||||
# make sure rapid concurrent metadata updates don't create multiple
|
||||
# meta_seq entries
|
||||
#
|
||||
# we had a bug where deletion items created under concurrent_write locks
|
||||
# could get versions older than the items they're deleting which were
|
||||
# protected by read/write locks.
|
||||
#
|
||||
echo "== concurrent update attempts maintain single entries"
|
||||
FILES=4
|
||||
nr=1
|
||||
while [ "$nr" -lt 10 ]; do
|
||||
# touch a bunch of files in parallel from all mounts
|
||||
for i in $(t_fs_nrs); do
|
||||
eval path="\$T_D${i}"
|
||||
seq -f "$path/file-%.0f" 1 $FILES | xargs touch &
|
||||
done
|
||||
wait || t_fail "concurrent file updates failed"
|
||||
|
||||
# make sure no inodes have duplicate entries
|
||||
sync
|
||||
scoutfs walk-inodes -p "$T_D0" meta_seq -- 0 -1 | \
|
||||
grep -v "minor" | \
|
||||
awk '{print $4}' | \
|
||||
sort -n | uniq -c | \
|
||||
awk '($1 != 1)' | \
|
||||
sort -n
|
||||
((nr++))
|
||||
done
|
||||
|
||||
t_pass
|
||||
|
||||
@@ -36,8 +36,7 @@ test_xattr_lengths() {
|
||||
else
|
||||
echo "$name=\"$val\"" > "$T_TMP.good"
|
||||
fi
|
||||
cmp "$T_TMP.good" "$T_TMP.got" || \
|
||||
t_fail "cmp failed name len $name_len val len $val_len"
|
||||
cmp "$T_TMP.good" "$T_TMP.got" || exit 1
|
||||
|
||||
setfattr -x $name "$FILE"
|
||||
}
|
||||
|
||||
15
tests/tests/stage-tmpfile.sh
Normal file
15
tests/tests/stage-tmpfile.sh
Normal file
@@ -0,0 +1,15 @@
|
||||
#
|
||||
# Run tmpfile_stage and check the output with hexdump.
|
||||
#
|
||||
|
||||
t_require_commands stage_tmpfile hexdump
|
||||
|
||||
DEST_FILE="$T_D0/dest_file"
|
||||
|
||||
stage_tmpfile $T_D0 $DEST_FILE
|
||||
|
||||
hexdump -C "$DEST_FILE"
|
||||
|
||||
rm -fr "$DEST_FILE"
|
||||
|
||||
t_pass
|
||||
@@ -65,6 +65,7 @@ generic/030 # mmap missing
|
||||
generic/075 # file content mismatch failures (fds, etc)
|
||||
generic/080 # mmap missing
|
||||
generic/103 # enospc causes trans commit failures
|
||||
generic/105 # needs trigage: something about acls
|
||||
generic/108 # mount fails on failing device?
|
||||
generic/112 # file content mismatch failures (fds, etc)
|
||||
generic/120 # (can't exec 'cause no mmap)
|
||||
@@ -72,14 +73,17 @@ generic/126 # (can't exec 'cause no mmap)
|
||||
generic/141 # mmap missing
|
||||
generic/213 # enospc causes trans commit failures
|
||||
generic/215 # mmap missing
|
||||
generic/237 # wrong error return from failing setfacl?
|
||||
generic/246 # mmap missing
|
||||
generic/247 # mmap missing
|
||||
generic/248 # mmap missing
|
||||
generic/319 # utils output change? update branch?
|
||||
generic/321 # requires selinux enabled for '+' in ls?
|
||||
generic/325 # mmap missing
|
||||
generic/338 # BUG_ON update inode error handling
|
||||
generic/346 # mmap missing
|
||||
generic/347 # _dmthin_mount doesn't work?
|
||||
generic/375 # utils output change? update branch?
|
||||
EOF
|
||||
|
||||
t_restore_output
|
||||
|
||||
140
utils/fenced/ipmi-remote-host
Normal file
140
utils/fenced/ipmi-remote-host
Normal file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/bash
|
||||
# /usr/libexec/scoutfs-fenced/run/ipmi-remote-host
|
||||
|
||||
# ipmi configuration
|
||||
SCOUTFS_IPMI_CONFIG_FILE=${SCOUTFS_IPMI_CONFIG_FILE:-/etc/scoutfs/scoutfs-ipmi.conf}
|
||||
SCOUTFS_IPMI_HOSTS_FILE=${SCOUTFS_IPMI_HOSTS_FILE:-/etc/scoutfs/scoutfs-ipmi-hosts.conf}
|
||||
|
||||
## hosts file format
|
||||
## SCOUTFS_HOST_IP IPMI_ADDRESS
|
||||
## ex:
|
||||
# 192.168.1.1 192.168.10.1
|
||||
|
||||
# command setup
|
||||
IPMI_POWER="/sbin/ipmipower"
|
||||
SSH_CMD="ssh -o ConnectTimeout=3 -o BatchMode=yes -o StrictHostKeyChecking=no"
|
||||
LOGGER="/bin/logger -p local3.crit -t scoutfs-fenced"
|
||||
|
||||
$LOGGER "ipmi fence script invoked: IP: $SCOUTFS_FENCED_REQ_IP RID: $SCOUTFS_FENCED_REQ_RID TEST: $IPMITEST"
|
||||
|
||||
echo_fail() {
|
||||
echo "$@" >&2
|
||||
$LOGGER "fence failed: $@"
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo_log() {
|
||||
echo "$@" >&2
|
||||
$LOGGER "fence info: $@"
|
||||
}
|
||||
|
||||
echo_test_pass() {
|
||||
echo -e "\xE2\x9C\x94 $@"
|
||||
}
|
||||
|
||||
echo_test_fail() {
|
||||
echo -e "\xE2\x9D\x8C $@"
|
||||
}
|
||||
|
||||
test -n "$SCOUTFS_IPMI_CONFIG_FILE" || \
|
||||
echo_fail "SCOUTFS_IPMI_CONFIG_FILE isn't set"
|
||||
test -r "$SCOUTFS_IPMI_CONFIG_FILE" || \
|
||||
echo_fail "$SCOUTFS_IPMI_CONFIG_FILE isn't readable file"
|
||||
. "$SCOUTFS_IPMI_CONFIG_FILE"
|
||||
test -n "$SCOUTFS_IPMI_HOSTS_FILE" || \
|
||||
echo_fail "SCOUTFS_IPMI_HOSTS_FILE isn't set"
|
||||
test -r "$SCOUTFS_IPMI_HOSTS_FILE" || \
|
||||
echo_fail "$SCOUTFS_IPMI_HOSTS_FILE isn't readable file"
|
||||
test -x "$IPMI_POWER" || \
|
||||
echo_fail "$IPMI_POWER not found, need to install freeimpi?"
|
||||
|
||||
export ip="$SCOUTFS_FENCED_REQ_IP"
|
||||
export rid="$SCOUTFS_FENCED_REQ_RID"
|
||||
|
||||
getIPMIhost () {
|
||||
host=$(awk -v ip="$1" '$1 == ip {print $2}' "$SCOUTFS_IPMI_HOSTS_FILE") || \
|
||||
echo_fail "lookup ipmi host failed"
|
||||
echo "$host"
|
||||
}
|
||||
|
||||
powerOffHost() {
|
||||
# older versions of ipmipower inverted wait-until-off/wait-until-on, so specify both
|
||||
$IPMI_POWER $IPMI_OPTS -h "$1" --wait-until-off --wait-until-on --off || \
|
||||
echo_fail "ipmi power off $1 failed"
|
||||
|
||||
ipmioutput=$($IPMI_POWER $IPMI_OPTS -h "$1" --stat) || \
|
||||
echo_fail "ipmi power stat $1 failed"
|
||||
|
||||
if [[ ! "$ipmioutput" =~ off ]]; then
|
||||
echo_fail "ipmi stat $1 not off"
|
||||
fi
|
||||
|
||||
$LOGGER "ipmi fence power down $1 success"
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
if [ -n "$IPMITEST" ]; then
|
||||
for i in $(awk '!/^($|[[:space:]]*#)/ {print $1}' "$SCOUTFS_IPMI_HOSTS_FILE"); do
|
||||
if ! $SSH_CMD "$i" /bin/true; then
|
||||
echo_test_fail "ssh $i"
|
||||
else
|
||||
echo_test_pass "ssh $i"
|
||||
fi
|
||||
host=$(getIPMIhost "$i")
|
||||
if [ -z "$host" ]; then
|
||||
echo_test_fail "ipmi config $i $host"
|
||||
else
|
||||
if ! $IPMI_POWER $IPMI_OPTS -h "$host" --stat; then
|
||||
echo_test_fail "ipmi $i"
|
||||
else
|
||||
echo_test_pass "ipmi $i"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ -z "$ip" ]; then
|
||||
echo_fail "no IP given for fencing"
|
||||
fi
|
||||
|
||||
host=$(getIPMIhost "$ip")
|
||||
if [ -z "$host" ]; then
|
||||
echo_fail "no IPMI host found for fence IP"
|
||||
fi
|
||||
|
||||
# first check via ssh if the mount still exists
|
||||
# if ssh succeeds, we will only power down the node if mounted
|
||||
if ! output=$($SSH_CMD "$ip" "echo BEGIN; LC_ALL=C egrep -m 1 '(^0x*|^$rid$)' /sys/kernel/boot_params/version /sys/fs/scoutfs/f*r*/rid; echo END"); then
|
||||
# ssh not working, just power down host
|
||||
powerOffHost "$host"
|
||||
fi
|
||||
|
||||
if [[ ! "$output" =~ BEGIN ]]; then
|
||||
# ssh failure
|
||||
echo_log "no BEGIN"
|
||||
powerOffHost "$host"
|
||||
fi
|
||||
|
||||
if [[ ! "$output" =~ \/boot_params\/ ]]; then
|
||||
# ssh failure
|
||||
echo_log "no boot params"
|
||||
powerOffHost "$host"
|
||||
fi
|
||||
|
||||
if [[ ! "$output" =~ END ]]; then
|
||||
# ssh failure
|
||||
echo_log "no END"
|
||||
powerOffHost "$host"
|
||||
fi
|
||||
|
||||
if [[ "$output" =~ "rid:$rid" ]]; then
|
||||
# rid still mounted, power down
|
||||
echo_log "rid $rid still mounted"
|
||||
powerOffHost "$host"
|
||||
fi
|
||||
|
||||
$LOGGER "ipmi fence host $ip/$host success (rid $rid not mounted)"
|
||||
exit 0
|
||||
|
||||
36
utils/fenced/local-force-unmount
Normal file
36
utils/fenced/local-force-unmount
Normal file
@@ -0,0 +1,36 @@
|
||||
#!/usr/bin/bash
|
||||
# /usr/libexec/scoutfs-fenced/run/local-force-umount
|
||||
|
||||
echo_fail() {
|
||||
echo "$@" > /dev/stderr
|
||||
exit 1
|
||||
}
|
||||
|
||||
rid="$SCOUTFS_FENCED_REQ_RID"
|
||||
|
||||
#
|
||||
# Look for a local mount with the rid to fence. Typically we'll at
|
||||
# least find the mount with the server that requested the fence that
|
||||
# we're processing. But it's possible that mounts are unmounted
|
||||
# before, or while, we're running.
|
||||
#
|
||||
mnts=$(findmnt -l -n -t scoutfs -o TARGET) || \
|
||||
echo_fail "findmnt -t scoutfs failed" > /dev/stderr
|
||||
|
||||
for mnt in $mnts; do
|
||||
mnt_rid=$(scoutfs statfs -p "$mnt" -s rid) || \
|
||||
echo_fail "scoutfs statfs $mnt failed"
|
||||
|
||||
if [ "$mnt_rid" == "$rid" ]; then
|
||||
umount -f "$mnt" || \
|
||||
echo_fail "umout -f $mnt"
|
||||
|
||||
exit 0
|
||||
fi
|
||||
done
|
||||
|
||||
#
|
||||
# If the mount doesn't exist on this host then it can't access the
|
||||
# devices by definition and can be considered fenced.
|
||||
#
|
||||
exit 0
|
||||
139
utils/fenced/powerman-remote-host
Normal file
139
utils/fenced/powerman-remote-host
Normal file
@@ -0,0 +1,139 @@
|
||||
#!/usr/bin/bash
|
||||
# /usr/libexec/scoutfs-fenced/run/powerman-remote-host
|
||||
|
||||
# powerman configuration
|
||||
SCOUTFS_PM_CONFIG_FILE=${SCOUTFS_PM_CONFIG_FILE:-/etc/scoutfs/scoutfs-pm.conf}
|
||||
SCOUTFS_PM_HOSTS_FILE=${SCOUTFS_PM_HOSTS_FILE:-/etc/scoutfs/scoutfs-pm-hosts.conf}
|
||||
|
||||
## hosts file format
|
||||
## SCOUTFS_HOST_IP POWERMAN_NODE_NAME
|
||||
## ex:
|
||||
# 192.168.1.1 dm1
|
||||
|
||||
# command setup
|
||||
PM_CMD="/usr/bin/pm"
|
||||
SSH_CMD="ssh -o ConnectTimeout=3 -o BatchMode=yes -o StrictHostKeyChecking=no"
|
||||
LOGGER="/bin/logger -p local3.crit -t scoutfs-fenced"
|
||||
|
||||
$LOGGER "ipmi fence script invoked: IP: $SCOUTFS_FENCED_REQ_IP RID: $SCOUTFS_FENCED_REQ_RID TEST: $IPMITEST"
|
||||
|
||||
echo_fail() {
|
||||
echo "$@" >&2
|
||||
$LOGGER "fence failed: $@"
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo_log() {
|
||||
echo "$@" >&2
|
||||
$LOGGER "fence info: $@"
|
||||
}
|
||||
|
||||
echo_test_pass() {
|
||||
echo -e "\xE2\x9C\x94 $@"
|
||||
}
|
||||
|
||||
echo_test_fail() {
|
||||
echo -e "\xE2\x9D\x8C $@"
|
||||
}
|
||||
|
||||
test -n "$SCOUTFS_PM_CONFIG_FILE" || \
|
||||
echo_fail "SCOUTFS_PM_CONFIG_FILE isn't set"
|
||||
test -r "$SCOUTFS_PM_CONFIG_FILE" || \
|
||||
echo_fail "$SCOUTFS_PM_CONFIG_FILE isn't readable file"
|
||||
. "$SCOUTFS_PM_CONFIG_FILE"
|
||||
test -n "$SCOUTFS_PM_HOSTS_FILE" || \
|
||||
echo_fail "SCOUTFS_PM_HOSTS_FILE isn't set"
|
||||
test -r "$SCOUTFS_PM_HOSTS_FILE" || \
|
||||
echo_fail "$SCOUTFS_PM_HOSTS_FILE isn't readable file"
|
||||
test -x "$PM_CMD" || \
|
||||
echo_fail "$PMCMD not found, need to install powerman?"
|
||||
|
||||
export ip="$SCOUTFS_FENCED_REQ_IP"
|
||||
fence_rid="$SCOUTFS_FENCED_REQ_RID"
|
||||
|
||||
getPMhost () {
|
||||
host=$(awk -v ip="$1" '$1 == ip {print $2}' "$SCOUTFS_PM_HOSTS_FILE") || \
|
||||
echo_fail "lookup pm host failed"
|
||||
echo "$host"
|
||||
}
|
||||
|
||||
powerOffHost() {
|
||||
$PM_CMD $PM_OPTS "$1" -0 || \
|
||||
echo_fail "pm power off $host failed"
|
||||
|
||||
pmoutput=$($PM_CMD $PM_OPTS "$1" -q | grep "$1") || \
|
||||
echo_fail "powerman power stat $1 failed"
|
||||
|
||||
if [[ ! "$pmoutput" =~ off ]]; then
|
||||
echo_fail "powerman stat $1 not off"
|
||||
fi
|
||||
|
||||
$LOGGER "powerman fence power down $1 success"
|
||||
|
||||
exit 0
|
||||
}
|
||||
|
||||
if [ -n "$PMTEST" ]; then
|
||||
for i in $(awk '!/^($|[[:space:]]*#)/ {print $1}' "$SCOUTFS_PM_HOSTS_FILE"); do
|
||||
if ! $SSH_CMD "$i" /bin/true; then
|
||||
echo_test_fail "ssh $i"
|
||||
else
|
||||
echo_test_pass "ssh $i"
|
||||
fi
|
||||
host=$(getPMhost "$i")
|
||||
if [ -z "$host" ]; then
|
||||
echo_test_fail "pm config $i $host"
|
||||
else
|
||||
if ! $PM_CMD $PM_OPTS "$host" -q; then
|
||||
echo_test_fail "pm $i"
|
||||
else
|
||||
echo_test_pass "pm $i"
|
||||
fi
|
||||
fi
|
||||
done
|
||||
exit 0
|
||||
fi
|
||||
|
||||
if [ -z "$ip" ]; then
|
||||
echo_fail "no IP given for fencing"
|
||||
fi
|
||||
|
||||
host=$(getPMhost "$ip")
|
||||
if [ -z "$host" ]; then
|
||||
echo_fail "no host found for fence IP"
|
||||
fi
|
||||
|
||||
# first check via ssh if the mount still exists
|
||||
# if ssh succeeds, we will only power down the node if mounted
|
||||
if ! output=$($SSH_CMD "$ip" "echo BEGIN; LC_ALL=C egrep -m 1 '(^0x*|^$rid$)' /sys/kernel/boot_params/version /sys/fs/scoutfs/f*r*/rid; echo END"); then
|
||||
# ssh not working, just power down host
|
||||
powerOffHost "$host"
|
||||
fi
|
||||
|
||||
if [[ ! "$output" =~ BEGIN ]]; then
|
||||
# ssh failure
|
||||
echo_log "no BEGIN"
|
||||
powerOffHost "$host"
|
||||
fi
|
||||
|
||||
if [[ ! "$output" =~ \/boot_params\/ ]]; then
|
||||
# ssh failure
|
||||
echo_log "no boot params"
|
||||
powerOffHost "$host"
|
||||
fi
|
||||
|
||||
if [[ ! "$output" =~ END ]]; then
|
||||
# ssh failure
|
||||
echo_log "no END"
|
||||
powerOffHost "$host"
|
||||
fi
|
||||
|
||||
if [[ "$output" =~ "rid:$rid" ]]; then
|
||||
# rid still mounted, power down
|
||||
echo_log "rid $rid still mounted"
|
||||
powerOffHost "$host"
|
||||
fi
|
||||
|
||||
$LOGGER "powerman fence host $ip/$host success (rid $rid not mounted)"
|
||||
exit 0
|
||||
|
||||
11
utils/fenced/scoutfs-ipmi-hosts.conf
Normal file
11
utils/fenced/scoutfs-ipmi-hosts.conf
Normal file
@@ -0,0 +1,11 @@
|
||||
# /etc/scoutfs/scoutfs-ipmi-hosts.conf
|
||||
|
||||
## config file format
|
||||
##
|
||||
## SCOUTFS_HOST_IP must match the interface used for scoutfs
|
||||
## leader/follower communications
|
||||
##
|
||||
## SCOUTFS_HOST_IP IPMI_ADDRESS
|
||||
## ex:
|
||||
#192.168.1.1 192.168.10.1
|
||||
|
||||
10
utils/fenced/scoutfs-ipmi.conf
Normal file
10
utils/fenced/scoutfs-ipmi.conf
Normal file
@@ -0,0 +1,10 @@
|
||||
#!/usr/bin/bash
|
||||
# /etc/scoutfs/scoutfs-ipmi.conf
|
||||
|
||||
IPMI_USER="admin"
|
||||
IPMI_PASSWORD="password"
|
||||
IPMI_OPTS="-D LAN_2_0 -u $IPMI_USER -p $IPMI_PASSWORD"
|
||||
|
||||
# some Intel BMCs need -I 17
|
||||
# IPMI_OPTS="-D LAN_2_0 -u $IPMI_USER -p $IPMI_PASSWORD -I 17"
|
||||
|
||||
11
utils/fenced/scoutfs-pm-hosts.conf
Normal file
11
utils/fenced/scoutfs-pm-hosts.conf
Normal file
@@ -0,0 +1,11 @@
|
||||
# /etc/scoutfs/scoutfs-ipmi-hosts.conf
|
||||
|
||||
## config file format
|
||||
##
|
||||
## SCOUTFS_HOST_IP must match the interface used for scoutfs
|
||||
## leader/follower communications
|
||||
##
|
||||
## SCOUTFS_HOST_IP POWERMAN_NODE_NAME
|
||||
## ex:
|
||||
#192.168.1.1 node1
|
||||
|
||||
8
utils/fenced/scoutfs-pm.conf
Normal file
8
utils/fenced/scoutfs-pm.conf
Normal file
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/bash
|
||||
# /etc/scoutfs/scoutfs-pm.conf
|
||||
|
||||
PM_OPTS=""
|
||||
|
||||
# optionally specify remote powerman server
|
||||
#PM_OPTS="-h pm-server.localdomain"
|
||||
|
||||
@@ -15,61 +15,12 @@ general mount options described in the
|
||||
.BR mount (8)
|
||||
manual page.
|
||||
.TP
|
||||
.B acl
|
||||
The acl mount option enables support for POSIX Access Control Lists
|
||||
as detailed in
|
||||
.BR acl (5) .
|
||||
Support for POSIX ACLs is the default.
|
||||
.TP
|
||||
.B data_prealloc_blocks=<blocks>
|
||||
Set the size of preallocation regions of data files, in 4KiB blocks.
|
||||
Writes to these regions that contain no extents will attempt to
|
||||
preallocate the size of the full region. This can waste a lot of space
|
||||
with small files, files with sparse regions, and files whose final
|
||||
length isn't a multiple of the preallocation size. The following
|
||||
data_prealloc_contig_only option, which is the default, restricts this
|
||||
behaviour to waste less space.
|
||||
.sp
|
||||
All the preallocation options can be changed in an active mount by
|
||||
writing to their respective files in the options directory in the
|
||||
mount's sysfs directory.
|
||||
.sp
|
||||
It is worth noting that it is always more efficient in every way to use
|
||||
.BR fallocate (2)
|
||||
to precisely allocate large extents for the resulting size of the file.
|
||||
Always attempt to enable it in software that supports it.
|
||||
.TP
|
||||
.B data_prealloc_contig_only=<0|1>
|
||||
This option, currently the default, limits file data preallocation in
|
||||
two ways. First, it will only preallocate when extending a fully
|
||||
allocated file. Second, it will limit the size of preallocation to the
|
||||
existing length of the file. These limits reduce the amount of
|
||||
preallocation wasted per file at the cost of multiple initial extents in
|
||||
all files. It only supports simple streaming writes, any other write
|
||||
pattern will not be recognized and could result in many fragmented
|
||||
extent allocations.
|
||||
.sp
|
||||
This option can be disabled to encourage large allocated extents
|
||||
regardless of write patterns. This can be helpful if files are written
|
||||
with initial sparse regions (perhaps by multiple threads writing to
|
||||
different regions) and wasted space isn't an issue (perhaps because the
|
||||
file population contains few small files).
|
||||
.TP
|
||||
.B metadev_path=<device>
|
||||
The metadev_path option specifies the path to the block device that
|
||||
contains the filesystem's metadata.
|
||||
.sp
|
||||
This option is required.
|
||||
.TP
|
||||
.B noacl
|
||||
The noacl mount option disables the default support for POSIX Access
|
||||
Control Lists. Any existing system.posix_acl_default and
|
||||
system.posix_acl_access extended attributes remain in inodes. They
|
||||
will appear in listings from
|
||||
.BR listxattr (5)
|
||||
but specific retrieval or reomval operations will fail. They will be
|
||||
used for enforcement again if ACL support is later enabled.
|
||||
.TP
|
||||
.B orphan_scan_delay_ms=<number>
|
||||
This option sets the average expected delay, in milliseconds, between
|
||||
each mount's scan of the global orphaned inode list. Jitter is added to
|
||||
|
||||
@@ -597,7 +597,7 @@ format.
|
||||
.PD
|
||||
|
||||
.TP
|
||||
.BI "print {-S|--skip-likely-huge} META-DEVICE"
|
||||
.BI "print META-DEVICE"
|
||||
.sp
|
||||
Prints out all of the metadata in the file system. This makes no effort
|
||||
to ensure that the structures are consistent as they're traversed and
|
||||
@@ -607,25 +607,13 @@ output.
|
||||
.PD 0
|
||||
.TP
|
||||
.sp
|
||||
.B "-S, --skip-likely-huge"
|
||||
Skip printing structures that are likely to be very large. The
|
||||
structures that are skipped tend to be global and whose size tends to be
|
||||
related to the size of the volume. Examples of skipped structures include
|
||||
the global fs items, srch files, and metadata and data
|
||||
allocators. Similar structures that are not skipped are related to the
|
||||
number of mounts and are maintained at a relatively reasonable size.
|
||||
These include per-mount log trees, srch files, allocators, and the
|
||||
metadata allocators used by server commits.
|
||||
.sp
|
||||
Skipping the larger structures limits the print output to a relatively
|
||||
constant size rather than being a large multiple of the used metadata
|
||||
space of the volume making the output much more useful for inspection.
|
||||
.TP
|
||||
.B "META-DEVICE"
|
||||
The path to the metadata device for the filesystem whose metadata will be
|
||||
printed. An attempt will be made to flush the host's buffer cache for
|
||||
this device with the BLKFLSBUF ioctl, or with posix_fadvise() if
|
||||
the path refers to a regular file.
|
||||
printed. Since this command reads via the host's buffer cache, it may not
|
||||
reflect the current blocks in the filesystem possibly written to the shared
|
||||
block devices from another host, unless
|
||||
.B blockdev \--flushbufs
|
||||
command is used first.
|
||||
.RE
|
||||
.PD
|
||||
|
||||
|
||||
@@ -55,14 +55,21 @@ install -m 755 -D src/scoutfs $RPM_BUILD_ROOT%{_sbindir}/scoutfs
|
||||
install -m 644 -D src/ioctl.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/ioctl.h
|
||||
install -m 644 -D src/format.h $RPM_BUILD_ROOT%{_includedir}/scoutfs/format.h
|
||||
install -m 755 -D fenced/scoutfs-fenced $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/scoutfs-fenced
|
||||
install -m 755 -D fenced/local-force-unmount $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/run/local-force-unmount
|
||||
install -m 755 -D fenced/ipmi-remote-host $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/run/ipmi-remote-host
|
||||
install -m 755 -D fenced/powerman-remote-host $RPM_BUILD_ROOT%{_libexecdir}/scoutfs-fenced/run/powerman-remote-host
|
||||
install -m 644 -D fenced/scoutfs-fenced.service $RPM_BUILD_ROOT%{_unitdir}/scoutfs-fenced.service
|
||||
install -m 644 -D fenced/scoutfs-fenced.conf.example $RPM_BUILD_ROOT%{_sysconfdir}/scoutfs/scoutfs-fenced.conf.example
|
||||
install -m 644 -D fenced/scoutfs-ipmi.conf $RPM_BUILD_ROOT%{_sysconfdir}/scoutfs/scoutfs-ipmi.conf
|
||||
install -m 644 -D fenced/scoutfs-ipmi-hosts.conf $RPM_BUILD_ROOT%{_sysconfdir}/scoutfs/scoutfs-ipmi-hosts.conf
|
||||
install -m 644 -D fenced/scoutfs-pm.conf $RPM_BUILD_ROOT%{_sysconfdir}/scoutfs/scoutfs-pm.conf
|
||||
install -m 644 -D fenced/scoutfs-pm-hosts.conf $RPM_BUILD_ROOT%{_sysconfdir}/scoutfs/scoutfs-pm-hosts.conf
|
||||
|
||||
%files
|
||||
%defattr(644,root,root,755)
|
||||
%{_mandir}/man*/scoutfs*.gz
|
||||
%{_unitdir}/scoutfs-fenced.service
|
||||
%{_sysconfdir}/scoutfs
|
||||
%config(noreplace) %{_sysconfdir}/scoutfs
|
||||
%defattr(755,root,root,755)
|
||||
%{_sbindir}/scoutfs
|
||||
%{_libexecdir}/scoutfs-fenced
|
||||
|
||||
@@ -3,7 +3,6 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/fs.h>
|
||||
#include <errno.h>
|
||||
@@ -104,44 +103,3 @@ char *size_str(u64 nr, unsigned size)
|
||||
|
||||
return suffixes[i];
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to flush the local read cache for a device. This is only a best
|
||||
* effort as these interfaces don't block waiting to fully purge the
|
||||
* cache. This is OK because it's used by cached readers that are known
|
||||
* to be racy anyway.
|
||||
*/
|
||||
int flush_device(int fd)
|
||||
{
|
||||
struct stat st;
|
||||
int ret;
|
||||
|
||||
ret = fstat(fd, &st);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "fstat failed: %s (%d)\n", strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (S_ISREG(st.st_mode)) {
|
||||
ret = posix_fadvise(fd, 0, st.st_size, POSIX_FADV_DONTNEED);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "POSIX_FADV_DONTNEED failed: %s (%d)\n",
|
||||
strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
} else if (S_ISBLK(st.st_mode)) {
|
||||
ret = ioctl(fd, BLKFLSBUF, 0);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "BLKFLSBUF, failed: %s (%d)\n", strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -14,6 +14,5 @@ int device_size(char *path, int fd,
|
||||
char *use_type, u64 *size_ret);
|
||||
float size_flt(u64 nr, unsigned size);
|
||||
char *size_str(u64 nr, unsigned size);
|
||||
int flush_device(int fd);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -118,33 +118,6 @@ struct mkfs_args {
|
||||
struct scoutfs_quorum_slot slots[SCOUTFS_QUORUM_MAX_SLOTS];
|
||||
};
|
||||
|
||||
static int open_mkfs_dev(struct mkfs_args *args, char *path, mode_t mode, char *which)
|
||||
{
|
||||
int ret;
|
||||
int fd = -1;
|
||||
|
||||
fd = open(path, mode);
|
||||
if (fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open %s dev '%s': %s (%d)\n",
|
||||
which, path, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = flush_device(fd);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (!args->force)
|
||||
ret = check_bdev(fd, path, which);
|
||||
|
||||
out:
|
||||
if (ret < 0 && fd >= 0)
|
||||
close(fd);
|
||||
|
||||
return ret ?: fd;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make a new file system by writing:
|
||||
* - super blocks
|
||||
@@ -183,17 +156,32 @@ static int do_mkfs(struct mkfs_args *args)
|
||||
gettimeofday(&tv, NULL);
|
||||
pseudo_random_bytes(&fsid, sizeof(fsid));
|
||||
|
||||
meta_fd = open_mkfs_dev(args, args->meta_device, O_RDWR | O_EXCL, "meta");
|
||||
meta_fd = open(args->meta_device, O_RDWR | O_EXCL);
|
||||
if (meta_fd < 0) {
|
||||
ret = meta_fd;
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open '%s': %s (%d)\n",
|
||||
args->meta_device, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
if (!args->force) {
|
||||
ret = check_bdev(meta_fd, args->meta_device, "meta");
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
data_fd = open_mkfs_dev(args, args->data_device, O_RDWR | O_EXCL, "data");
|
||||
data_fd = open(args->data_device, O_RDWR | O_EXCL);
|
||||
if (data_fd < 0) {
|
||||
ret = data_fd;
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open '%s': %s (%d)\n",
|
||||
args->data_device, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
if (!args->force) {
|
||||
ret = check_bdev(data_fd, args->data_device, "data");
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
super = calloc(1, SCOUTFS_BLOCK_SM_SIZE);
|
||||
bt = calloc(1, SCOUTFS_BLOCK_LG_SIZE);
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <stdarg.h>
|
||||
#include <stdbool.h>
|
||||
#include <ctype.h>
|
||||
#include <uuid/uuid.h>
|
||||
#include <sys/socket.h>
|
||||
@@ -27,7 +26,6 @@
|
||||
#include "avl.h"
|
||||
#include "srch.h"
|
||||
#include "leaf_item_hash.h"
|
||||
#include "dev.h"
|
||||
|
||||
static void print_block_header(struct scoutfs_block_header *hdr, int size)
|
||||
{
|
||||
@@ -991,10 +989,9 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
|
||||
|
||||
struct print_args {
|
||||
char *meta_device;
|
||||
bool skip_likely_huge;
|
||||
};
|
||||
|
||||
static int print_volume(int fd, struct print_args *args)
|
||||
static int print_volume(int fd)
|
||||
{
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct print_recursion_args pa;
|
||||
@@ -1044,26 +1041,23 @@ static int print_volume(int fd, struct print_args *args)
|
||||
ret = err;
|
||||
}
|
||||
|
||||
if (!args->skip_likely_huge) {
|
||||
for (i = 0; i < array_size(super->meta_alloc); i++) {
|
||||
snprintf(str, sizeof(str), "meta_alloc[%u]", i);
|
||||
err = print_btree(fd, super, str, &super->meta_alloc[i].root,
|
||||
print_alloc_item, NULL);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
}
|
||||
|
||||
err = print_btree(fd, super, "data_alloc", &super->data_alloc.root,
|
||||
for (i = 0; i < array_size(super->meta_alloc); i++) {
|
||||
snprintf(str, sizeof(str), "meta_alloc[%u]", i);
|
||||
err = print_btree(fd, super, str, &super->meta_alloc[i].root,
|
||||
print_alloc_item, NULL);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
}
|
||||
|
||||
err = print_btree(fd, super, "data_alloc", &super->data_alloc.root,
|
||||
print_alloc_item, NULL);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
err = print_btree(fd, super, "srch_root", &super->srch_root,
|
||||
print_srch_root_item, NULL);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
err = print_btree(fd, super, "logs_root", &super->logs_root,
|
||||
print_log_trees_item, NULL);
|
||||
if (err && !ret)
|
||||
@@ -1071,23 +1065,19 @@ static int print_volume(int fd, struct print_args *args)
|
||||
|
||||
pa.super = super;
|
||||
pa.fd = fd;
|
||||
if (!args->skip_likely_huge) {
|
||||
err = print_btree_leaf_items(fd, super, &super->srch_root.ref,
|
||||
print_srch_root_files, &pa);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
}
|
||||
err = print_btree_leaf_items(fd, super, &super->srch_root.ref,
|
||||
print_srch_root_files, &pa);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
err = print_btree_leaf_items(fd, super, &super->logs_root.ref,
|
||||
print_log_trees_roots, &pa);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
if (!args->skip_likely_huge) {
|
||||
err = print_btree(fd, super, "fs_root", &super->fs_root,
|
||||
print_fs_item, NULL);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
}
|
||||
err = print_btree(fd, super, "fs_root", &super->fs_root,
|
||||
print_fs_item, NULL);
|
||||
if (err && !ret)
|
||||
ret = err;
|
||||
|
||||
out:
|
||||
free(super);
|
||||
@@ -1108,12 +1098,7 @@ static int do_print(struct print_args *args)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = flush_device(fd);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = print_volume(fd, args);
|
||||
out:
|
||||
ret = print_volume(fd);
|
||||
close(fd);
|
||||
return ret;
|
||||
};
|
||||
@@ -1123,9 +1108,6 @@ static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
struct print_args *args = state->input;
|
||||
|
||||
switch (key) {
|
||||
case 'S':
|
||||
args->skip_likely_huge = true;
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
if (!args->meta_device)
|
||||
args->meta_device = strdup_or_error(state, arg);
|
||||
@@ -1143,13 +1125,8 @@ static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{ "skip-likely-huge", 'S', NULL, 0, "Skip large structures to minimize output size"},
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
static struct argp argp = {
|
||||
options,
|
||||
NULL,
|
||||
parse_opt,
|
||||
"META-DEV",
|
||||
"Print metadata structures"
|
||||
|
||||
Reference in New Issue
Block a user