mirror of
https://github.com/versity/scoutfs.git
synced 2026-05-01 02:15:44 +00:00
Compare commits
36 Commits
zab/v1_2_r
...
zab/worm
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1731daf402 | ||
|
|
449b71a015 | ||
|
|
21385dc0fb | ||
|
|
dae7d29559 | ||
|
|
a423ac90f5 | ||
|
|
4c46a834f8 | ||
|
|
e7b22e19d2 | ||
|
|
2fbc135a48 | ||
|
|
4ea4bad1c6 | ||
|
|
0a6b1fb304 | ||
|
|
fb7e43dd23 | ||
|
|
45d90a5ae4 | ||
|
|
48f1305a8a | ||
|
|
cd4d6502b8 | ||
|
|
dff366e1a4 | ||
|
|
ca526e2bc0 | ||
|
|
e423d42106 | ||
|
|
82d2be2e4a | ||
|
|
4102b760d0 | ||
|
|
65654ee7c0 | ||
|
|
b2d6ceeb9c | ||
|
|
d8231016f8 | ||
|
|
3c2b329675 | ||
|
|
96ad8dd510 | ||
|
|
44f38a31ec | ||
|
|
fb2ff753ad | ||
|
|
bb3db7e272 | ||
|
|
c94b072925 | ||
|
|
26ae9c6e04 | ||
|
|
c8d7221ec5 | ||
|
|
7fd03dc311 | ||
|
|
4e8a088cc5 | ||
|
|
9c751c1197 | ||
|
|
875583b7ef | ||
|
|
38e5aa77c4 | ||
|
|
57a1d75e52 |
@@ -2,9 +2,45 @@ Versity ScoutFS Release Notes
|
||||
=============================
|
||||
|
||||
---
|
||||
v1.3-rc
|
||||
v1.4
|
||||
\
|
||||
*TBD*
|
||||
*May 6, 2022*
|
||||
|
||||
* **Fix possible client crash during server failover**
|
||||
\
|
||||
Fixed a narrow window during server failover and lock recovery that
|
||||
could cause a client mount to believe that it had an inconsistent item
|
||||
cache and panic. This required very specific lock state and messaging
|
||||
patterns between multiple mounts and multiple servers which made it
|
||||
unlikely to occur in the field.
|
||||
|
||||
---
|
||||
v1.3
|
||||
\
|
||||
*Apr 7, 2022*
|
||||
|
||||
* **Fix rare server instability under heavy load**
|
||||
\
|
||||
Fixed a case of server instability under heavy load due to concurrent
|
||||
work fully exhausting metadata block allocation pools reserved for a
|
||||
single server transaction. This would cause brief interruption as the
|
||||
server shutdown and the next server started up and made progress as
|
||||
pending work was retried.
|
||||
|
||||
* **Fix slow fencing preventing server startup**
|
||||
\
|
||||
If a server had to process many fence requests with a slow fencing
|
||||
mechanism it could be interrupted before it finished. The server
|
||||
now makes sure heartbeat messages are sent while it is making progress
|
||||
on fencing requests so that other quorum members don't interrupt the
|
||||
process.
|
||||
|
||||
* **Performance improvement in getxattr and setxattr**
|
||||
\
|
||||
Kernel allocation patterns in the getxattr and setxattr
|
||||
implementations were causing significant contention between CPUs. Their
|
||||
allocation strategy was changed so that concurrent tasks can call these
|
||||
xattr methods without degrading performance.
|
||||
|
||||
---
|
||||
v1.2
|
||||
|
||||
@@ -1318,6 +1318,17 @@ bool scoutfs_alloc_meta_low(struct super_block *sb,
|
||||
return lo;
|
||||
}
|
||||
|
||||
void scoutfs_alloc_meta_remaining(struct scoutfs_alloc *alloc, u32 *avail_total, u32 *freed_space)
|
||||
{
|
||||
unsigned int seq;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&alloc->seqlock);
|
||||
*avail_total = le32_to_cpu(alloc->avail.first_nr);
|
||||
*freed_space = list_block_space(alloc->freed.first_nr);
|
||||
} while (read_seqretry(&alloc->seqlock, seq));
|
||||
}
|
||||
|
||||
bool scoutfs_alloc_test_flag(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc, u32 flag)
|
||||
{
|
||||
|
||||
@@ -158,6 +158,7 @@ int scoutfs_alloc_splice_list(struct super_block *sb,
|
||||
|
||||
bool scoutfs_alloc_meta_low(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc, u32 nr);
|
||||
void scoutfs_alloc_meta_remaining(struct scoutfs_alloc *alloc, u32 *avail_total, u32 *freed_space);
|
||||
bool scoutfs_alloc_test_flag(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc, u32 flag);
|
||||
|
||||
|
||||
@@ -2449,7 +2449,7 @@ int scoutfs_btree_free_blocks(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_root *root, int alloc_low)
|
||||
struct scoutfs_btree_root *root, int free_budget)
|
||||
{
|
||||
u64 blknos[SCOUTFS_BTREE_MAX_HEIGHT];
|
||||
struct scoutfs_block *bl = NULL;
|
||||
@@ -2459,11 +2459,15 @@ int scoutfs_btree_free_blocks(struct super_block *sb,
|
||||
struct scoutfs_avl_node *node;
|
||||
struct scoutfs_avl_node *next;
|
||||
struct scoutfs_key par_next;
|
||||
int nr_freed = 0;
|
||||
int nr_par;
|
||||
int level;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (WARN_ON_ONCE(free_budget <= 0))
|
||||
return -EINVAL;
|
||||
|
||||
if (WARN_ON_ONCE(root->height > ARRAY_SIZE(blknos)))
|
||||
return -EIO; /* XXX corruption */
|
||||
|
||||
@@ -2538,8 +2542,7 @@ int scoutfs_btree_free_blocks(struct super_block *sb,
|
||||
while (node) {
|
||||
|
||||
/* make sure we can always free parents after leaves */
|
||||
if (scoutfs_alloc_meta_low(sb, alloc,
|
||||
alloc_low + nr_par + 1)) {
|
||||
if ((nr_freed + 1 + nr_par) > free_budget) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
@@ -2553,6 +2556,7 @@ int scoutfs_btree_free_blocks(struct super_block *sb,
|
||||
le64_to_cpu(ref.blkno));
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
nr_freed++;
|
||||
|
||||
node = scoutfs_avl_next(&bt->item_root, node);
|
||||
if (node) {
|
||||
@@ -2568,6 +2572,7 @@ int scoutfs_btree_free_blocks(struct super_block *sb,
|
||||
blknos[i]);
|
||||
ret = scoutfs_free_meta(sb, alloc, wri, blknos[i]);
|
||||
BUG_ON(ret); /* checked meta low, freed should fit */
|
||||
nr_freed++;
|
||||
}
|
||||
|
||||
/* restart walk past the subtree we just freed */
|
||||
|
||||
@@ -125,7 +125,7 @@ int scoutfs_btree_free_blocks(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_root *root, int alloc_low);
|
||||
struct scoutfs_btree_root *root, int free_budget);
|
||||
|
||||
void scoutfs_btree_put_iref(struct scoutfs_btree_item_ref *iref);
|
||||
|
||||
|
||||
@@ -157,6 +157,7 @@
|
||||
EXPAND_COUNTER(orphan_scan_error) \
|
||||
EXPAND_COUNTER(orphan_scan_item) \
|
||||
EXPAND_COUNTER(orphan_scan_omap_set) \
|
||||
EXPAND_COUNTER(quorum_candidate_server_stopping) \
|
||||
EXPAND_COUNTER(quorum_elected) \
|
||||
EXPAND_COUNTER(quorum_fence_error) \
|
||||
EXPAND_COUNTER(quorum_fence_leader) \
|
||||
|
||||
@@ -529,6 +529,11 @@ static int scoutfs_get_block(struct inode *inode, sector_t iblock,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (create && !si->staging && scoutfs_inode_worm_denied(inode)) {
|
||||
ret = -EACCES;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* convert unwritten to written, could be staging */
|
||||
if (create && ext.map && (ext.flags & SEF_UNWRITTEN)) {
|
||||
un.start = iblock;
|
||||
@@ -1192,6 +1197,11 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_inode_worm_denied(to)) {
|
||||
ret = -EACCES;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((from_off & SCOUTFS_BLOCK_SM_MASK) ||
|
||||
(to_off & SCOUTFS_BLOCK_SM_MASK) ||
|
||||
((byte_len & SCOUTFS_BLOCK_SM_MASK) &&
|
||||
|
||||
@@ -1029,6 +1029,11 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry)
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (scoutfs_inode_worm_denied(inode)) {
|
||||
ret = -EACCES;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
if (should_orphan(inode)) {
|
||||
ret = scoutfs_lock_orphan(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, scoutfs_ino(inode),
|
||||
&orph_lock);
|
||||
@@ -1697,6 +1702,12 @@ static int scoutfs_rename_common(struct inode *old_dir,
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if ((old_inode && scoutfs_inode_worm_denied(old_inode)) ||
|
||||
(new_inode && scoutfs_inode_worm_denied(new_inode))) {
|
||||
ret = -EACCES;
|
||||
goto out_unlock;
|
||||
}
|
||||
|
||||
if (should_orphan(new_inode)) {
|
||||
ret = scoutfs_lock_orphan(sb, SCOUTFS_LOCK_WRITE_ONLY, 0, scoutfs_ino(new_inode),
|
||||
&orph_lock);
|
||||
|
||||
@@ -107,6 +107,11 @@ retry:
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_inode_worm_denied(inode)) {
|
||||
ret = -EACCES;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_complete_truncate(inode, inode_lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*/
|
||||
#define SCOUTFS_FORMAT_VERSION_MIN 1
|
||||
#define SCOUTFS_FORMAT_VERSION_MIN_STR __stringify(SCOUTFS_FORMAT_VERSION_MIN)
|
||||
#define SCOUTFS_FORMAT_VERSION_MAX 1
|
||||
#define SCOUTFS_FORMAT_VERSION_MAX 2
|
||||
#define SCOUTFS_FORMAT_VERSION_MAX_STR __stringify(SCOUTFS_FORMAT_VERSION_MAX)
|
||||
|
||||
/* statfs(2) f_type */
|
||||
@@ -856,8 +856,12 @@ struct scoutfs_inode {
|
||||
struct scoutfs_timespec ctime;
|
||||
struct scoutfs_timespec mtime;
|
||||
struct scoutfs_timespec crtime;
|
||||
struct scoutfs_timespec worm_level1_expire;
|
||||
};
|
||||
|
||||
#define SCOUTFS_INODE_FMT_V1_BYTES offsetof(struct scoutfs_inode, worm_level1_expire)
|
||||
#define SCOUTFS_INODE_FMT_V2_BYTES sizeof(struct scoutfs_inode)
|
||||
|
||||
#define SCOUTFS_INO_FLAG_TRUNCATE 0x1
|
||||
|
||||
#define SCOUTFS_ROOT_INO 1
|
||||
|
||||
152
kmod/src/inode.c
152
kmod/src/inode.c
@@ -84,6 +84,7 @@ static void scoutfs_inode_ctor(void *obj)
|
||||
{
|
||||
struct scoutfs_inode_info *si = obj;
|
||||
|
||||
seqlock_init(&si->seqlock);
|
||||
init_rwsem(&si->extent_sem);
|
||||
mutex_init(&si->item_mutex);
|
||||
seqcount_init(&si->seqcount);
|
||||
@@ -213,6 +214,30 @@ static u64 get_item_minor(struct scoutfs_inode_info *si, u8 type)
|
||||
return si->item_minors[ind];
|
||||
}
|
||||
|
||||
void scoutfs_inode_get_worm(struct inode *inode, struct timespec *ts)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
unsigned int seq;
|
||||
|
||||
do {
|
||||
seq = read_seqbegin(&si->seqlock);
|
||||
*ts = si->worm_expire;
|
||||
} while (read_seqretry(&si->seqlock, seq));
|
||||
}
|
||||
|
||||
void scoutfs_inode_set_worm(struct inode *inode, u64 expire_sec, u32 expire_nsec)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
/* we don't deal with native timespec truncating our 64bit .sec */
|
||||
BUILD_BUG_ON(sizeof(si->worm_expire.tv_sec) != sizeof(expire_sec));
|
||||
|
||||
write_seqlock(&si->seqlock);
|
||||
si->worm_expire.tv_sec = expire_sec;
|
||||
si->worm_expire.tv_nsec = expire_nsec;
|
||||
write_sequnlock(&si->seqlock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller has ensured that the fields in the incoming scoutfs inode
|
||||
* reflect both the inode item and the inode index items. This happens
|
||||
@@ -233,7 +258,7 @@ static void set_item_info(struct scoutfs_inode_info *si,
|
||||
set_item_major(si, SCOUTFS_INODE_INDEX_DATA_SEQ_TYPE, sinode->data_seq);
|
||||
}
|
||||
|
||||
static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
static void load_inode(struct inode *inode, struct scoutfs_inode *cinode, int inode_bytes)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
@@ -262,6 +287,12 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
si->crtime.tv_sec = le64_to_cpu(cinode->crtime.sec);
|
||||
si->crtime.tv_nsec = le32_to_cpu(cinode->crtime.nsec);
|
||||
|
||||
if (inode_bytes == SCOUTFS_INODE_FMT_V2_BYTES)
|
||||
scoutfs_inode_set_worm(inode, le64_to_cpu(cinode->worm_level1_expire.sec),
|
||||
le32_to_cpu(cinode->worm_level1_expire.nsec));
|
||||
else
|
||||
scoutfs_inode_set_worm(inode, 0, 0);
|
||||
|
||||
/*
|
||||
* i_blocks is initialized from online and offline and is then
|
||||
* maintained as blocks come and go.
|
||||
@@ -272,6 +303,36 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
set_item_info(si, cinode);
|
||||
}
|
||||
|
||||
/* Returns the max inode size given format version */
|
||||
static int max_inode_fmt_ver_bytes(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
int ret = 0;
|
||||
|
||||
if (sbi->fmt_vers == 1)
|
||||
ret = SCOUTFS_INODE_FMT_V1_BYTES;
|
||||
else if (sbi->fmt_vers == 2)
|
||||
ret = SCOUTFS_INODE_FMT_V2_BYTES;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Returns if inode bytes is valid for our format version */
|
||||
static bool valid_inode_fmt_ver_bytes(struct super_block *sb, int bytes)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
int ver;
|
||||
|
||||
if (bytes == SCOUTFS_INODE_FMT_V1_BYTES)
|
||||
ver = 1;
|
||||
else if (bytes == SCOUTFS_INODE_FMT_V2_BYTES)
|
||||
ver = 2;
|
||||
else
|
||||
ver = 0;
|
||||
|
||||
return ver > 0 && ver <= sbi->fmt_vers;
|
||||
}
|
||||
|
||||
void scoutfs_inode_init_key(struct scoutfs_key *key, u64 ino)
|
||||
{
|
||||
*key = (struct scoutfs_key) {
|
||||
@@ -281,6 +342,23 @@ void scoutfs_inode_init_key(struct scoutfs_key *key, u64 ino)
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* Read an inode item into the caller's buffer and return the size that
|
||||
* we read. Returns errors if the inode size is unsupported or doesn't
|
||||
* make sense for the format version.
|
||||
*/
|
||||
static int lookup_inode_item(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_inode *sinode, struct scoutfs_lock *lock)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_item_lookup_within(sb, key, sinode, sizeof(struct scoutfs_inode), lock);
|
||||
if (ret >= 0 && !valid_inode_fmt_ver_bytes(sb, ret))
|
||||
return -EIO;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Refresh the vfs inode fields if the lock indicates that the current
|
||||
* contents could be stale.
|
||||
@@ -316,13 +394,13 @@ int scoutfs_inode_refresh(struct inode *inode, struct scoutfs_lock *lock)
|
||||
|
||||
mutex_lock(&si->item_mutex);
|
||||
if (atomic64_read(&si->last_refreshed) < refresh_gen) {
|
||||
ret = scoutfs_item_lookup_exact(sb, &key, &sinode,
|
||||
sizeof(sinode), lock);
|
||||
if (ret == 0) {
|
||||
load_inode(inode, &sinode);
|
||||
ret = lookup_inode_item(sb, &key, &sinode, lock);
|
||||
if (ret > 0) {
|
||||
load_inode(inode, &sinode, ret);
|
||||
atomic64_set(&si->last_refreshed, refresh_gen);
|
||||
scoutfs_lock_add_coverage(sb, lock, &si->ino_lock_cov);
|
||||
si->drop_invalidated = false;
|
||||
ret = 0;
|
||||
}
|
||||
} else {
|
||||
ret = 0;
|
||||
@@ -455,6 +533,11 @@ retry:
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_inode_worm_denied(inode)) {
|
||||
ret = -EACCES;
|
||||
goto out;
|
||||
}
|
||||
|
||||
attr_size = (attr->ia_valid & ATTR_SIZE) ? attr->ia_size :
|
||||
i_size_read(inode);
|
||||
|
||||
@@ -767,9 +850,10 @@ out:
|
||||
return inode;
|
||||
}
|
||||
|
||||
static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
static void store_inode(struct scoutfs_inode *cinode, struct inode *inode, int inode_bytes)
|
||||
{
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct timespec ts;
|
||||
u64 online_blocks;
|
||||
u64 offline_blocks;
|
||||
|
||||
@@ -803,6 +887,15 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
cinode->crtime.sec = cpu_to_le64(si->crtime.tv_sec);
|
||||
cinode->crtime.nsec = cpu_to_le32(si->crtime.tv_nsec);
|
||||
memset(cinode->crtime.__pad, 0, sizeof(cinode->crtime.__pad));
|
||||
|
||||
if (inode_bytes == SCOUTFS_INODE_FMT_V2_BYTES) {
|
||||
scoutfs_inode_get_worm(inode, &ts);
|
||||
|
||||
cinode->worm_level1_expire.sec = cpu_to_le64(ts.tv_sec);
|
||||
cinode->worm_level1_expire.nsec = cpu_to_le32(ts.tv_nsec);
|
||||
memset(cinode->worm_level1_expire.__pad, 0,
|
||||
sizeof(cinode->worm_level1_expire.__pad));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -828,13 +921,15 @@ int scoutfs_dirty_inode_item(struct inode *inode, struct scoutfs_lock *lock)
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
int inode_bytes;
|
||||
int ret;
|
||||
|
||||
store_inode(&sinode, inode);
|
||||
inode_bytes = max_inode_fmt_ver_bytes(sb);
|
||||
store_inode(&sinode, inode, inode_bytes);
|
||||
|
||||
scoutfs_inode_init_key(&key, scoutfs_ino(inode));
|
||||
|
||||
ret = scoutfs_item_update(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
ret = scoutfs_item_update(sb, &key, &sinode, inode_bytes, lock);
|
||||
if (!ret)
|
||||
trace_scoutfs_dirty_inode(inode);
|
||||
return ret;
|
||||
@@ -1035,8 +1130,9 @@ void scoutfs_update_inode_item(struct inode *inode, struct scoutfs_lock *lock,
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
int inode_bytes;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
@@ -1045,15 +1141,17 @@ void scoutfs_update_inode_item(struct inode *inode, struct scoutfs_lock *lock,
|
||||
/* set the meta version once per trans for any inode updates */
|
||||
scoutfs_inode_set_meta_seq(inode);
|
||||
|
||||
inode_bytes = max_inode_fmt_ver_bytes(sb);
|
||||
|
||||
/* only race with other inode field stores once */
|
||||
store_inode(&sinode, inode);
|
||||
store_inode(&sinode, inode, inode_bytes);
|
||||
|
||||
ret = update_indices(sb, si, ino, inode->i_mode, &sinode, lock_list);
|
||||
BUG_ON(ret);
|
||||
|
||||
scoutfs_inode_init_key(&key, ino);
|
||||
|
||||
err = scoutfs_item_update(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
err = scoutfs_item_update(sb, &key, &sinode, inode_bytes, lock);
|
||||
if (err) {
|
||||
scoutfs_err(sb, "inode %llu update err %d", ino, err);
|
||||
BUG_ON(err);
|
||||
@@ -1421,9 +1519,10 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
|
||||
u64 ino, struct scoutfs_lock *lock, struct inode **inode_ret)
|
||||
{
|
||||
struct scoutfs_inode_info *si;
|
||||
struct scoutfs_key key;
|
||||
struct scoutfs_inode sinode;
|
||||
struct scoutfs_key key;
|
||||
struct inode *inode;
|
||||
int inode_bytes;
|
||||
int ret;
|
||||
|
||||
inode = new_inode(sb);
|
||||
@@ -1445,6 +1544,8 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
|
||||
si->drop_invalidated = false;
|
||||
si->flags = 0;
|
||||
|
||||
scoutfs_inode_set_worm(inode, 0, 0);
|
||||
|
||||
scoutfs_inode_set_meta_seq(inode);
|
||||
scoutfs_inode_set_data_seq(inode);
|
||||
|
||||
@@ -1455,14 +1556,16 @@ int scoutfs_new_inode(struct super_block *sb, struct inode *dir, umode_t mode, d
|
||||
inode->i_rdev = rdev;
|
||||
set_inode_ops(inode);
|
||||
|
||||
store_inode(&sinode, inode);
|
||||
inode_bytes = max_inode_fmt_ver_bytes(sb);
|
||||
|
||||
store_inode(&sinode, inode, inode_bytes);
|
||||
scoutfs_inode_init_key(&key, scoutfs_ino(inode));
|
||||
|
||||
ret = scoutfs_omap_set(sb, ino);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_item_create(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
ret = scoutfs_item_create(sb, &key, &sinode, inode_bytes, lock);
|
||||
if (ret < 0)
|
||||
scoutfs_omap_clear(sb, ino);
|
||||
out:
|
||||
@@ -1712,7 +1815,7 @@ static int try_delete_inode_items(struct super_block *sb, u64 ino)
|
||||
}
|
||||
|
||||
scoutfs_inode_init_key(&key, ino);
|
||||
ret = scoutfs_item_lookup_exact(sb, &key, &sinode, sizeof(sinode), lock);
|
||||
ret = lookup_inode_item(sb, &key, &sinode, lock);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
@@ -2069,6 +2172,25 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the inode is protected by worm and the current time is
|
||||
* before the expiration time.
|
||||
*/
|
||||
bool scoutfs_inode_worm_denied(struct inode *inode)
|
||||
{
|
||||
struct timespec expire;
|
||||
struct timespec cur;
|
||||
|
||||
scoutfs_inode_get_worm(inode, &expire);
|
||||
if (expire.tv_sec != 0 || expire.tv_nsec != 0) {
|
||||
cur = CURRENT_TIME;
|
||||
if (timespec64_compare(&cur, &expire) < 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
int scoutfs_inode_setup(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
@@ -23,6 +23,10 @@ struct scoutfs_inode_info {
|
||||
u64 offline_blocks;
|
||||
u32 flags;
|
||||
struct timespec crtime;
|
||||
struct timespec worm_expire;
|
||||
|
||||
/* Prevent readers from racing with xattr_set */
|
||||
seqlock_t seqlock;
|
||||
|
||||
/*
|
||||
* Protects per-inode extent items, most particularly readers
|
||||
@@ -141,4 +145,8 @@ void scoutfs_inode_orphan_stop(struct super_block *sb);
|
||||
void scoutfs_inode_flush_iput(struct super_block *sb);
|
||||
void scoutfs_inode_destroy(struct super_block *sb);
|
||||
|
||||
void scoutfs_inode_get_worm(struct inode *inode, struct timespec *ts);
|
||||
void scoutfs_inode_set_worm(struct inode *inode, u64 expire_sec, u32 expire_nsec);
|
||||
bool scoutfs_inode_worm_denied(struct inode *inode);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -659,6 +659,11 @@ static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
|
||||
if (ret)
|
||||
goto unlock;
|
||||
|
||||
if (scoutfs_inode_worm_denied(inode)) {
|
||||
ret = -EACCES;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* can only change size/dv on untouched regular files */
|
||||
if ((sm.i_size != 0 || sm.data_version != 0) &&
|
||||
((!S_ISREG(inode->i_mode) ||
|
||||
@@ -823,7 +828,7 @@ static long scoutfs_ioc_search_xattrs(struct file *file, unsigned long arg)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (scoutfs_xattr_parse_tags(name, sx.name_bytes, &tgs) < 0 ||
|
||||
if (scoutfs_xattr_parse_tags(sb, name, sx.name_bytes, &tgs) < 0 ||
|
||||
!tgs.srch) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
|
||||
@@ -1697,8 +1697,8 @@ static int copy_val(void *dst, int dst_len, void *src, int src_len)
|
||||
* The amount of bytes copied is returned which can be 0 or truncated if
|
||||
* the caller's buffer isn't big enough.
|
||||
*/
|
||||
int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
static int item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, int len_limit, struct scoutfs_lock *lock)
|
||||
{
|
||||
DECLARE_ITEM_CACHE_INFO(sb, cinf);
|
||||
struct cached_item *item;
|
||||
@@ -1718,6 +1718,8 @@ int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
item = item_rbtree_walk(&pg->item_root, key, NULL, NULL, NULL);
|
||||
if (!item || item->deletion)
|
||||
ret = -ENOENT;
|
||||
else if (len_limit > 0 && item->val_len > len_limit)
|
||||
ret = -EIO;
|
||||
else
|
||||
ret = copy_val(val, val_len, item->val, item->val_len);
|
||||
|
||||
@@ -1726,13 +1728,30 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
return item_lookup(sb, key, val, val_len, 0, lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Return -EIO if the item we find has a value larger than the caller's
|
||||
* val_len, rather than truncating and returning the size of the copied
|
||||
* value.
|
||||
*/
|
||||
int scoutfs_item_lookup_within(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock)
|
||||
{
|
||||
return item_lookup(sb, key, val, val_len, val_len, lock);
|
||||
}
|
||||
|
||||
int scoutfs_item_lookup_exact(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_item_lookup(sb, key, val, val_len, lock);
|
||||
ret = item_lookup(sb, key, val, val_len, 0, lock);
|
||||
if (ret == val_len)
|
||||
ret = 0;
|
||||
else if (ret >= 0)
|
||||
|
||||
@@ -3,6 +3,8 @@
|
||||
|
||||
int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_lookup_within(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_lookup_exact(struct super_block *sb, struct scoutfs_key *key,
|
||||
void *val, int val_len,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
@@ -289,6 +289,7 @@ static struct scoutfs_lock *lock_alloc(struct super_block *sb,
|
||||
lock->sb = sb;
|
||||
init_waitqueue_head(&lock->waitq);
|
||||
lock->mode = SCOUTFS_LOCK_NULL;
|
||||
lock->invalidating_mode = SCOUTFS_LOCK_NULL;
|
||||
|
||||
atomic64_set(&lock->forest_bloom_nr, 0);
|
||||
|
||||
@@ -666,7 +667,9 @@ struct inv_req {
|
||||
*
|
||||
* Before we start invalidating the lock we set the lock to the new
|
||||
* mode, preventing further incompatible users of the old mode from
|
||||
* using the lock while we're invalidating.
|
||||
* using the lock while we're invalidating. We record the previously
|
||||
* granted mode so that we can send lock recover responses with the old
|
||||
* granted mode during invalidation.
|
||||
*/
|
||||
static void lock_invalidate_worker(struct work_struct *work)
|
||||
{
|
||||
@@ -691,7 +694,8 @@ static void lock_invalidate_worker(struct work_struct *work)
|
||||
if (!lock_counts_match(nl->new_mode, lock->users))
|
||||
continue;
|
||||
|
||||
/* set the new mode, no incompatible users during inval */
|
||||
/* set the new mode, no incompatible users during inval, recov needs old */
|
||||
lock->invalidating_mode = lock->mode;
|
||||
lock->mode = nl->new_mode;
|
||||
|
||||
/* move everyone that's ready to our private list */
|
||||
@@ -734,6 +738,8 @@ static void lock_invalidate_worker(struct work_struct *work)
|
||||
list_del(&ireq->head);
|
||||
kfree(ireq);
|
||||
|
||||
lock->invalidating_mode = SCOUTFS_LOCK_NULL;
|
||||
|
||||
if (list_empty(&lock->inv_list)) {
|
||||
/* finish if another request didn't arrive */
|
||||
list_del_init(&lock->inv_head);
|
||||
@@ -824,6 +830,7 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
|
||||
{
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
struct scoutfs_net_lock_recover *nlr;
|
||||
enum scoutfs_lock_mode mode;
|
||||
struct scoutfs_lock *lock;
|
||||
struct scoutfs_lock *next;
|
||||
struct rb_node *node;
|
||||
@@ -844,10 +851,15 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
|
||||
|
||||
for (i = 0; lock && i < SCOUTFS_NET_LOCK_MAX_RECOVER_NR; i++) {
|
||||
|
||||
if (lock->invalidating_mode != SCOUTFS_LOCK_NULL)
|
||||
mode = lock->invalidating_mode;
|
||||
else
|
||||
mode = lock->mode;
|
||||
|
||||
nlr->locks[i].key = lock->start;
|
||||
nlr->locks[i].write_seq = cpu_to_le64(lock->write_seq);
|
||||
nlr->locks[i].old_mode = lock->mode;
|
||||
nlr->locks[i].new_mode = lock->mode;
|
||||
nlr->locks[i].old_mode = mode;
|
||||
nlr->locks[i].new_mode = mode;
|
||||
|
||||
node = rb_next(&lock->node);
|
||||
if (node)
|
||||
|
||||
@@ -39,6 +39,7 @@ struct scoutfs_lock {
|
||||
struct list_head cov_list;
|
||||
|
||||
enum scoutfs_lock_mode mode;
|
||||
enum scoutfs_lock_mode invalidating_mode;
|
||||
unsigned int waiters[SCOUTFS_LOCK_NR_MODES];
|
||||
unsigned int users[SCOUTFS_LOCK_NR_MODES];
|
||||
|
||||
|
||||
@@ -749,7 +749,7 @@ out:
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "lock server err %d during client rid %016llx farewell, shutting down",
|
||||
ret, rid);
|
||||
scoutfs_server_abort(sb);
|
||||
scoutfs_server_stop(sb);
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
||||
@@ -1292,7 +1292,7 @@ restart:
|
||||
if (ret) {
|
||||
scoutfs_err(sb, "client fence returned err %d, shutting down server",
|
||||
ret);
|
||||
scoutfs_server_abort(sb);
|
||||
scoutfs_server_stop(sb);
|
||||
}
|
||||
}
|
||||
destroy_conn(acc);
|
||||
|
||||
@@ -105,6 +105,8 @@ enum quorum_role { FOLLOWER, CANDIDATE, LEADER };
|
||||
struct quorum_status {
|
||||
enum quorum_role role;
|
||||
u64 term;
|
||||
u64 server_start_term;
|
||||
int server_event;
|
||||
int vote_for;
|
||||
unsigned long vote_bits;
|
||||
ktime_t timeout;
|
||||
@@ -117,7 +119,6 @@ struct quorum_info {
|
||||
bool shutdown;
|
||||
|
||||
int our_quorum_slot_nr;
|
||||
unsigned long flags;
|
||||
int votes_needed;
|
||||
|
||||
spinlock_t show_lock;
|
||||
@@ -128,8 +129,6 @@ struct quorum_info {
|
||||
struct scoutfs_sysfs_attrs ssa;
|
||||
};
|
||||
|
||||
#define QINF_FLAG_SERVER 0
|
||||
|
||||
#define DECLARE_QUORUM_INFO(sb, name) \
|
||||
struct quorum_info *name = SCOUTFS_SB(sb)->quorum_info
|
||||
#define DECLARE_QUORUM_INFO_KOBJ(kobj, name) \
|
||||
@@ -494,16 +493,6 @@ static int update_quorum_block(struct super_block *sb, int event, u64 term, bool
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The calling server has fenced previous leaders and reclaimed their
|
||||
* resources. We can now update our fence event with a greater term to
|
||||
* stop future leaders from doing the same.
|
||||
*/
|
||||
int scoutfs_quorum_fence_complete(struct super_block *sb, u64 term)
|
||||
{
|
||||
return update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_FENCE, term, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* The calling server has been elected and has started running but can't
|
||||
* yet assume that it has exclusive access to the metadata device. We
|
||||
@@ -593,15 +582,9 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term)
|
||||
}
|
||||
|
||||
out:
|
||||
if (fence_started) {
|
||||
err = scoutfs_fence_wait_fenced(sb, msecs_to_jiffies(SCOUTFS_QUORUM_FENCE_TO_MS));
|
||||
if (ret == 0)
|
||||
ret = err;
|
||||
} else {
|
||||
err = scoutfs_quorum_fence_complete(sb, term);
|
||||
if (ret == 0)
|
||||
ret = err;
|
||||
}
|
||||
err = scoutfs_fence_wait_fenced(sb, msecs_to_jiffies(SCOUTFS_QUORUM_FENCE_TO_MS));
|
||||
if (ret == 0)
|
||||
ret = err;
|
||||
|
||||
if (ret < 0)
|
||||
scoutfs_inc_counter(sb, quorum_fence_error);
|
||||
@@ -609,12 +592,26 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The main quorum task maintains its private status. It seemed cleaner
|
||||
* to occasionally copy the status for showing in sysfs/debugfs files
|
||||
* than to have the two lock access to shared status. The show copy is
|
||||
* updated after being modified before the quorum task sleeps for a
|
||||
* significant amount of time, either waiting on timeouts or interacting
|
||||
* with the server.
|
||||
*/
|
||||
static void update_show_status(struct quorum_info *qinf, struct quorum_status *qst)
|
||||
{
|
||||
spin_lock(&qinf->show_lock);
|
||||
qinf->show_status = *qst;
|
||||
spin_unlock(&qinf->show_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* The quorum work always runs in the background of quorum member
|
||||
* mounts. It's responsible for starting and stopping the server if
|
||||
* it's elected leader, and the server can call back into it to let it
|
||||
* know that it has shut itself down (perhaps due to error) so that the
|
||||
* work should stop sending heartbeats.
|
||||
* it's elected leader. While it's leader it sends heartbeats to
|
||||
* suppress other quorum work from standing for election.
|
||||
*/
|
||||
static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
{
|
||||
@@ -622,7 +619,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
struct super_block *sb = qinf->sb;
|
||||
struct sockaddr_in unused;
|
||||
struct quorum_host_msg msg;
|
||||
struct quorum_status qst;
|
||||
struct quorum_status qst = {0,};
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
@@ -631,9 +628,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
|
||||
/* start out as a follower */
|
||||
qst.role = FOLLOWER;
|
||||
qst.term = 0;
|
||||
qst.vote_for = -1;
|
||||
qst.vote_bits = 0;
|
||||
|
||||
/* read our starting term from greatest in all events in all slots */
|
||||
read_greatest_term(sb, &qst.term);
|
||||
@@ -651,6 +646,8 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
|
||||
while (!(qinf->shutdown || scoutfs_forcing_unmount(sb))) {
|
||||
|
||||
update_show_status(qinf, &qst);
|
||||
|
||||
ret = recv_msg(sb, &msg, qst.timeout);
|
||||
if (ret < 0) {
|
||||
if (ret != -ETIMEDOUT && ret != -EAGAIN) {
|
||||
@@ -667,24 +664,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
msg.term < qst.term)
|
||||
msg.type = SCOUTFS_QUORUM_MSG_INVALID;
|
||||
|
||||
/* if the server has shutdown we become follower */
|
||||
if (!test_bit(QINF_FLAG_SERVER, &qinf->flags) &&
|
||||
qst.role == LEADER) {
|
||||
qst.role = FOLLOWER;
|
||||
qst.vote_for = -1;
|
||||
qst.vote_bits = 0;
|
||||
qst.timeout = election_timeout();
|
||||
scoutfs_inc_counter(sb, quorum_server_shutdown);
|
||||
|
||||
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION,
|
||||
qst.term);
|
||||
scoutfs_inc_counter(sb, quorum_send_resignation);
|
||||
}
|
||||
|
||||
spin_lock(&qinf->show_lock);
|
||||
qinf->show_status = qst;
|
||||
spin_unlock(&qinf->show_lock);
|
||||
|
||||
trace_scoutfs_quorum_loop(sb, qst.role, qst.term, qst.vote_for,
|
||||
qst.vote_bits,
|
||||
ktime_to_timespec64(qst.timeout));
|
||||
@@ -695,7 +674,6 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
if (qst.role == LEADER) {
|
||||
scoutfs_warn(sb, "saw msg type %u from %u for term %llu while leader in term %llu, shutting down server.",
|
||||
msg.type, msg.from, msg.term, qst.term);
|
||||
scoutfs_server_stop(sb);
|
||||
}
|
||||
qst.role = FOLLOWER;
|
||||
qst.term = msg.term;
|
||||
@@ -717,6 +695,13 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
/* followers and candidates start new election on timeout */
|
||||
if (qst.role != LEADER &&
|
||||
ktime_after(ktime_get(), qst.timeout)) {
|
||||
/* .. but only if their server has stopped */
|
||||
if (!scoutfs_server_is_down(sb)) {
|
||||
qst.timeout = election_timeout();
|
||||
scoutfs_inc_counter(sb, quorum_candidate_server_stopping);
|
||||
continue;
|
||||
}
|
||||
|
||||
qst.role = CANDIDATE;
|
||||
qst.term++;
|
||||
qst.vote_for = -1;
|
||||
@@ -758,29 +743,69 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
qst.term);
|
||||
qst.timeout = heartbeat_interval();
|
||||
|
||||
update_show_status(qinf, &qst);
|
||||
|
||||
/* record that we've been elected before starting up server */
|
||||
ret = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_ELECT, qst.term, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
/* make very sure server is fully shut down */
|
||||
scoutfs_server_stop(sb);
|
||||
/* set server bit before server shutdown could clear */
|
||||
set_bit(QINF_FLAG_SERVER, &qinf->flags);
|
||||
qst.server_start_term = qst.term;
|
||||
qst.server_event = SCOUTFS_QUORUM_EVENT_ELECT;
|
||||
scoutfs_server_start(sb, qst.term);
|
||||
}
|
||||
|
||||
ret = scoutfs_server_start(sb, qst.term);
|
||||
if (ret < 0) {
|
||||
clear_bit(QINF_FLAG_SERVER, &qinf->flags);
|
||||
/* store our increased term */
|
||||
err = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP, qst.term,
|
||||
true);
|
||||
if (err < 0) {
|
||||
ret = err;
|
||||
goto out;
|
||||
}
|
||||
ret = 0;
|
||||
continue;
|
||||
/*
|
||||
* This leader's server is up, having finished fencing
|
||||
* previous leaders. We update the fence event with the
|
||||
* current term to let future leaders know that previous
|
||||
* servers have been fenced.
|
||||
*/
|
||||
if (qst.role == LEADER && qst.server_event != SCOUTFS_QUORUM_EVENT_FENCE &&
|
||||
scoutfs_server_is_up(sb)) {
|
||||
ret = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_FENCE, qst.term, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
qst.server_event = SCOUTFS_QUORUM_EVENT_FENCE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Stop a running server if we're no longer leader in
|
||||
* its term.
|
||||
*/
|
||||
if (!(qst.role == LEADER && qst.term == qst.server_start_term) &&
|
||||
scoutfs_server_is_running(sb)) {
|
||||
scoutfs_server_stop(sb);
|
||||
}
|
||||
|
||||
/*
|
||||
* A previously running server has stopped. The quorum
|
||||
* protocol might have shut it down by changing roles or
|
||||
* it might have stopped on its own, perhaps on errors.
|
||||
* If we're still a leader then we become a follower and
|
||||
* send resignations to encourage the next election.
|
||||
* Always update the _STOP event to stop connections and
|
||||
* fencing.
|
||||
*/
|
||||
if (qst.server_start_term > 0 && scoutfs_server_is_down(sb)) {
|
||||
if (qst.role == LEADER) {
|
||||
qst.role = FOLLOWER;
|
||||
qst.vote_for = -1;
|
||||
qst.vote_bits = 0;
|
||||
qst.timeout = election_timeout();
|
||||
scoutfs_inc_counter(sb, quorum_server_shutdown);
|
||||
|
||||
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION,
|
||||
qst.server_start_term);
|
||||
scoutfs_inc_counter(sb, quorum_send_resignation);
|
||||
}
|
||||
|
||||
ret = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP,
|
||||
qst.server_start_term, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
qst.server_start_term = 0;
|
||||
}
|
||||
|
||||
/* leaders regularly send heartbeats to delay elections */
|
||||
@@ -817,12 +842,19 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
}
|
||||
}
|
||||
|
||||
update_show_status(qinf, &qst);
|
||||
|
||||
/* always try to stop a running server as we stop */
|
||||
if (test_bit(QINF_FLAG_SERVER, &qinf->flags)) {
|
||||
scoutfs_server_stop(sb);
|
||||
scoutfs_fence_stop(sb);
|
||||
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION,
|
||||
qst.term);
|
||||
if (scoutfs_server_is_running(sb)) {
|
||||
scoutfs_server_stop_wait(sb);
|
||||
send_msg_others(sb, SCOUTFS_QUORUM_MSG_RESIGNATION, qst.term);
|
||||
|
||||
if (qst.server_start_term > 0) {
|
||||
err = update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP,
|
||||
qst.server_start_term, true);
|
||||
if (err < 0 && ret == 0)
|
||||
ret = err;
|
||||
}
|
||||
}
|
||||
|
||||
/* record that this slot no longer has an active quorum */
|
||||
@@ -834,21 +866,6 @@ out:
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The calling server has shutdown and is no longer using shared
|
||||
* resources. Clear the bit so that we stop sending heartbeats and
|
||||
* allow the next server to be elected. Update the stop event so that
|
||||
* it won't be considered available by clients or fenced by the next
|
||||
* leader.
|
||||
*/
|
||||
void scoutfs_quorum_server_shutdown(struct super_block *sb, u64 term)
|
||||
{
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
|
||||
clear_bit(QINF_FLAG_SERVER, &qinf->flags);
|
||||
update_quorum_block(sb, SCOUTFS_QUORUM_EVENT_STOP, term, true);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clients read quorum blocks looking for the leader with a server whose
|
||||
* address it can try and connect to.
|
||||
@@ -970,6 +987,8 @@ static ssize_t status_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
qinf->our_quorum_slot_nr);
|
||||
snprintf_ret(buf, size, &ret, "term %llu\n",
|
||||
qst.term);
|
||||
snprintf_ret(buf, size, &ret, "server_start_term %llu\n", qst.server_start_term);
|
||||
snprintf_ret(buf, size, &ret, "server_event %d\n", qst.server_event);
|
||||
snprintf_ret(buf, size, &ret, "role %d (%s)\n",
|
||||
qst.role, role_str(qst.role));
|
||||
snprintf_ret(buf, size, &ret, "vote_for %d\n",
|
||||
|
||||
@@ -2,14 +2,12 @@
|
||||
#define _SCOUTFS_QUORUM_H_
|
||||
|
||||
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin);
|
||||
void scoutfs_quorum_server_shutdown(struct super_block *sb, u64 term);
|
||||
|
||||
u8 scoutfs_quorum_votes_needed(struct super_block *sb);
|
||||
void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i,
|
||||
struct sockaddr_in *sin);
|
||||
|
||||
int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term);
|
||||
int scoutfs_quorum_fence_complete(struct super_block *sb, u64 term);
|
||||
|
||||
int scoutfs_quorum_setup(struct super_block *sb);
|
||||
void scoutfs_quorum_shutdown(struct super_block *sb);
|
||||
|
||||
@@ -1843,6 +1843,53 @@ DEFINE_EVENT(scoutfs_server_client_count_class, scoutfs_server_client_down,
|
||||
TP_ARGS(sb, rid, nr_clients)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
|
||||
TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
|
||||
u32 avail_before, u32 freed_before, int exceeded),
|
||||
TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded),
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(int, holding)
|
||||
__field(int, applying)
|
||||
__field(int, nr_holders)
|
||||
__field(__u32, avail_before)
|
||||
__field(__u32, freed_before)
|
||||
__field(int, exceeded)
|
||||
),
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->holding = !!holding;
|
||||
__entry->applying = !!applying;
|
||||
__entry->nr_holders = nr_holders;
|
||||
__entry->avail_before = avail_before;
|
||||
__entry->freed_before = freed_before;
|
||||
__entry->exceeded = !!exceeded;
|
||||
),
|
||||
TP_printk(SCSBF" holding %u applying %u nr %u avail_before %u freed_before %u exceeded %u",
|
||||
SCSB_TRACE_ARGS, __entry->holding, __entry->applying, __entry->nr_holders,
|
||||
__entry->avail_before, __entry->freed_before, __entry->exceeded)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_hold,
|
||||
TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
|
||||
u32 avail_before, u32 freed_before, int exceeded),
|
||||
TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_apply,
|
||||
TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
|
||||
u32 avail_before, u32 freed_before, int exceeded),
|
||||
TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_start,
|
||||
TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
|
||||
u32 avail_before, u32 freed_before, int exceeded),
|
||||
TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_end,
|
||||
TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
|
||||
u32 avail_before, u32 freed_before, int exceeded),
|
||||
TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
|
||||
);
|
||||
|
||||
#define slt_symbolic(mode) \
|
||||
__print_symbolic(mode, \
|
||||
{ SLT_CLIENT, "client" }, \
|
||||
|
||||
@@ -52,6 +52,41 @@
|
||||
* mount will become the leader and have less trouble.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Tracks all the holders and commit work that are operating on server
|
||||
* commits. It synchronizes holders modifying the blocks in the commit
|
||||
* and the commit work writing dirty blocks that make up a consistent
|
||||
* commit. It limits the number of active holders so that they don't
|
||||
* fully consume the allocation resources prepared for a commit.
|
||||
*/
|
||||
struct commit_users {
|
||||
wait_queue_head_t waitq;
|
||||
spinlock_t lock;
|
||||
struct list_head holding;
|
||||
struct list_head applying;
|
||||
unsigned int nr_holders;
|
||||
u32 avail_before;
|
||||
u32 freed_before;
|
||||
bool exceeded;
|
||||
};
|
||||
|
||||
static void init_commit_users(struct commit_users *cusers)
|
||||
{
|
||||
memset(cusers, 0, sizeof(struct commit_users));
|
||||
init_waitqueue_head(&cusers->waitq);
|
||||
spin_lock_init(&cusers->lock);
|
||||
INIT_LIST_HEAD(&cusers->holding);
|
||||
INIT_LIST_HEAD(&cusers->applying);
|
||||
}
|
||||
|
||||
#define TRACE_COMMIT_USERS(sb, cusers, which) \
|
||||
do { \
|
||||
__typeof__(cusers) _cusers = (cusers); \
|
||||
trace_scoutfs_server_commit_##which(sb, !list_empty(&_cusers->holding), \
|
||||
!list_empty(&_cusers->applying), _cusers->nr_holders, _cusers->avail_before, \
|
||||
_cusers->freed_before, _cusers->exceeded); \
|
||||
} while (0)
|
||||
|
||||
struct server_info {
|
||||
struct super_block *sb;
|
||||
spinlock_t lock;
|
||||
@@ -59,9 +94,7 @@ struct server_info {
|
||||
|
||||
struct workqueue_struct *wq;
|
||||
struct work_struct work;
|
||||
int err;
|
||||
bool shutting_down;
|
||||
struct completion start_comp;
|
||||
int status;
|
||||
u64 term;
|
||||
struct scoutfs_net_connection *conn;
|
||||
|
||||
@@ -69,8 +102,7 @@ struct server_info {
|
||||
atomic64_t seq_atomic;
|
||||
|
||||
/* request processing coordinates shared commits */
|
||||
struct rw_semaphore commit_rwsem;
|
||||
struct llist_head commit_waiters;
|
||||
struct commit_users cusers;
|
||||
struct work_struct commit_work;
|
||||
|
||||
struct list_head clients;
|
||||
@@ -155,87 +187,286 @@ static bool get_volopt_val(struct server_info *server, int nr, u64 *val)
|
||||
return is_set;
|
||||
}
|
||||
|
||||
|
||||
struct commit_waiter {
|
||||
struct completion comp;
|
||||
struct llist_node node;
|
||||
int ret;
|
||||
enum {
|
||||
SERVER_NOP = 0,
|
||||
SERVER_STARTING,
|
||||
SERVER_UP,
|
||||
SERVER_STOPPING,
|
||||
SERVER_DOWN,
|
||||
};
|
||||
|
||||
static bool test_shutting_down(struct server_info *server)
|
||||
bool scoutfs_server_is_running(struct super_block *sb)
|
||||
{
|
||||
smp_rmb();
|
||||
return server->shutting_down;
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
long was = cmpxchg(&server->status, SERVER_NOP, SERVER_NOP);
|
||||
|
||||
return was == SERVER_STARTING || was == SERVER_UP;
|
||||
}
|
||||
|
||||
static void set_shutting_down(struct server_info *server, bool val)
|
||||
bool scoutfs_server_is_up(struct super_block *sb)
|
||||
{
|
||||
server->shutting_down = val;
|
||||
smp_wmb();
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
return cmpxchg(&server->status, SERVER_NOP, SERVER_NOP) == SERVER_UP;
|
||||
}
|
||||
|
||||
bool scoutfs_server_is_down(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
return cmpxchg(&server->status, SERVER_NOP, SERVER_NOP) == SERVER_DOWN;
|
||||
}
|
||||
|
||||
static bool server_is_stopping(struct server_info *server)
|
||||
{
|
||||
return cmpxchg(&server->status, SERVER_NOP, SERVER_NOP) == SERVER_STOPPING;
|
||||
}
|
||||
|
||||
static void stop_server(struct server_info *server)
|
||||
{
|
||||
set_shutting_down(server, true);
|
||||
wake_up(&server->waitq);
|
||||
long was = cmpxchg(&server->status, SERVER_NOP, SERVER_NOP);
|
||||
|
||||
if ((was == SERVER_STARTING || was == SERVER_UP) &&
|
||||
cmpxchg(&server->status, was, SERVER_STOPPING) == was)
|
||||
wake_up(&server->waitq);
|
||||
}
|
||||
|
||||
static void server_up(struct server_info *server)
|
||||
{
|
||||
cmpxchg(&server->status, SERVER_STARTING, SERVER_UP);
|
||||
}
|
||||
|
||||
static void server_down(struct server_info *server)
|
||||
{
|
||||
long was = cmpxchg(&server->status, SERVER_NOP, SERVER_NOP);
|
||||
|
||||
if (was != SERVER_DOWN)
|
||||
cmpxchg(&server->status, was, SERVER_DOWN);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hold the shared rwsem that lets multiple holders modify blocks in the
|
||||
* current commit and prevents the commit worker from acquiring the
|
||||
* exclusive write lock to write the commit.
|
||||
* The per-holder allocation block use budget balances batching
|
||||
* efficiency and concurrency. The larger this gets, the fewer
|
||||
* concurrent server operations can be performed in one commit. Commits
|
||||
* are immediately written after being dirtied so this really only
|
||||
* limits immediate concurrency under load, not batching over time as
|
||||
* one might expect if commits were long lived.
|
||||
*
|
||||
* This is exported for server components isolated in their own files
|
||||
* (lock_server) and which are not called directly by the server core
|
||||
* (async timeout work).
|
||||
* The upper bound is determined by the server commit hold path that can
|
||||
* dirty the most blocks.
|
||||
*/
|
||||
void scoutfs_server_hold_commit(struct super_block *sb)
|
||||
#define COMMIT_HOLD_ALLOC_BUDGET 500
|
||||
|
||||
struct commit_hold {
|
||||
struct list_head entry;
|
||||
ktime_t start;
|
||||
u32 avail;
|
||||
u32 freed;
|
||||
int ret;
|
||||
bool exceeded;
|
||||
};
|
||||
|
||||
#define COMMIT_HOLD(name) \
|
||||
struct commit_hold name = { .entry = LIST_HEAD_INIT(name.entry) }
|
||||
|
||||
/*
|
||||
* See if the currently active holders have, all together, consumed more
|
||||
* allocation resources than they were allowed. We don't have
|
||||
* per-holder allocation consumption tracking. The best we can do is
|
||||
* flag all the current holders so that as they release we can see
|
||||
* everyone involved in crossing the limit.
|
||||
*/
|
||||
static void check_holder_budget(struct super_block *sb, struct server_info *server,
|
||||
struct commit_users *cusers)
|
||||
{
|
||||
static bool exceeded_once = false;
|
||||
struct commit_hold *hold;
|
||||
struct timespec ts;
|
||||
u32 avail_used;
|
||||
u32 freed_used;
|
||||
u32 avail_now;
|
||||
u32 freed_now;
|
||||
u32 budget;
|
||||
|
||||
assert_spin_locked(&cusers->lock);
|
||||
|
||||
if (cusers->exceeded || cusers->nr_holders == 0 || exceeded_once)
|
||||
return;
|
||||
|
||||
scoutfs_alloc_meta_remaining(&server->alloc, &avail_now, &freed_now);
|
||||
avail_used = cusers->avail_before - avail_now;
|
||||
freed_used = cusers->freed_before - freed_now;
|
||||
budget = cusers->nr_holders * COMMIT_HOLD_ALLOC_BUDGET;
|
||||
if (avail_used <= budget && freed_used <= budget)
|
||||
return;
|
||||
|
||||
exceeded_once = true;
|
||||
cusers->exceeded = cusers->nr_holders;
|
||||
|
||||
scoutfs_err(sb, "%u holders exceeded alloc budget av: bef %u now %u, fr: bef %u now %u",
|
||||
cusers->nr_holders, cusers->avail_before, avail_now,
|
||||
cusers->freed_before, freed_now);
|
||||
|
||||
list_for_each_entry(hold, &cusers->holding, entry) {
|
||||
ts = ktime_to_timespec(hold->start);
|
||||
scoutfs_err(sb, "exceeding hold start %llu.%09llu av %u fr %u",
|
||||
(u64)ts.tv_sec, (u64)ts.tv_nsec, hold->avail, hold->freed);
|
||||
hold->exceeded = true;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* We don't have per-holder consumption. We allow commit holders as
|
||||
* long as the total budget of all the holders doesn't exceed the alloc
|
||||
* resources that were available
|
||||
*/
|
||||
static bool commit_alloc_has_room(struct server_info *server, struct commit_users *cusers,
|
||||
unsigned int more_holders)
|
||||
{
|
||||
u32 avail_before;
|
||||
u32 freed_before;
|
||||
u32 budget;
|
||||
|
||||
if (cusers->nr_holders > 0) {
|
||||
avail_before = cusers->avail_before;
|
||||
freed_before = cusers->freed_before;
|
||||
} else {
|
||||
scoutfs_alloc_meta_remaining(&server->alloc, &avail_before, &freed_before);
|
||||
}
|
||||
|
||||
budget = (cusers->nr_holders + more_holders) * COMMIT_HOLD_ALLOC_BUDGET;
|
||||
|
||||
return avail_before >= budget && freed_before >= budget;
|
||||
}
|
||||
|
||||
static bool hold_commit(struct super_block *sb, struct server_info *server,
|
||||
struct commit_users *cusers, struct commit_hold *hold)
|
||||
{
|
||||
bool held = false;
|
||||
|
||||
spin_lock(&cusers->lock);
|
||||
|
||||
TRACE_COMMIT_USERS(sb, cusers, hold);
|
||||
|
||||
check_holder_budget(sb, server, cusers);
|
||||
|
||||
/* +2 for our additional hold and then for the final commit work the server does */
|
||||
if (list_empty(&cusers->applying) && commit_alloc_has_room(server, cusers, 2)) {
|
||||
scoutfs_alloc_meta_remaining(&server->alloc, &hold->avail, &hold->freed);
|
||||
if (cusers->nr_holders == 0) {
|
||||
cusers->avail_before = hold->avail;
|
||||
cusers->freed_before = hold->freed;
|
||||
cusers->exceeded = false;
|
||||
}
|
||||
hold->exceeded = false;
|
||||
hold->start = ktime_get();
|
||||
list_add_tail(&hold->entry, &cusers->holding);
|
||||
cusers->nr_holders++;
|
||||
held = true;
|
||||
}
|
||||
|
||||
spin_unlock(&cusers->lock);
|
||||
|
||||
return held;
|
||||
}
|
||||
|
||||
/*
|
||||
* Hold the server commit so that we can make a consistent change to the
|
||||
* dirty blocks in the commit. The commit won't be written while we
|
||||
* hold it.
|
||||
*/
|
||||
static void server_hold_commit(struct super_block *sb, struct commit_hold *hold)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct commit_users *cusers = &server->cusers;
|
||||
|
||||
BUG_ON(!list_empty(&hold->entry));
|
||||
|
||||
scoutfs_inc_counter(sb, server_commit_hold);
|
||||
|
||||
down_read(&server->commit_rwsem);
|
||||
wait_event(cusers->waitq, hold_commit(sb, server, cusers, hold));
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called while holding the commit and returns once the commit
|
||||
* is successfully written. Many holders can all wait for all holders
|
||||
* to drain before their shared commit is applied and they're all woken.
|
||||
*
|
||||
* It's important to realize that our commit_waiter list node might be
|
||||
* serviced by a currently executing commit work that is blocked waiting
|
||||
* for the holders to release the commit_rwsem. This caller can return
|
||||
* from wait_for_commit() while another future commit_work is still
|
||||
* queued.
|
||||
*
|
||||
* This could queue delayed work but we're first trying to have batching
|
||||
* work by having concurrent modification line up behind a commit in
|
||||
* flight. Once the commit finishes it'll unlock and hopefully everyone
|
||||
* will race to make their changes and they'll all be applied by the
|
||||
* next commit after that.
|
||||
*/
|
||||
int scoutfs_server_apply_commit(struct super_block *sb, int err)
|
||||
static int server_apply_commit(struct super_block *sb, struct commit_hold *hold, int err)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct commit_waiter cw;
|
||||
struct commit_users *cusers = &server->cusers;
|
||||
struct timespec ts;
|
||||
bool start_commit;
|
||||
|
||||
spin_lock(&cusers->lock);
|
||||
|
||||
TRACE_COMMIT_USERS(sb, cusers, apply);
|
||||
|
||||
check_holder_budget(sb, server, cusers);
|
||||
|
||||
if (hold->exceeded) {
|
||||
ts = ktime_to_timespec(hold->start);
|
||||
scoutfs_err(sb, "exceeding hold start %llu.%09llu stack:",
|
||||
(u64)ts.tv_sec, (u64)ts.tv_nsec);
|
||||
dump_stack();
|
||||
}
|
||||
|
||||
if (err == 0) {
|
||||
cw.ret = 0;
|
||||
init_completion(&cw.comp);
|
||||
llist_add(&cw.node, &server->commit_waiters);
|
||||
scoutfs_inc_counter(sb, server_commit_queue);
|
||||
list_move_tail(&hold->entry, &cusers->applying);
|
||||
} else {
|
||||
list_del_init(&hold->entry);
|
||||
hold->ret = err;
|
||||
}
|
||||
cusers->nr_holders--;
|
||||
start_commit = cusers->nr_holders == 0 && !list_empty(&cusers->applying);
|
||||
spin_unlock(&cusers->lock);
|
||||
|
||||
if (start_commit)
|
||||
queue_work(server->wq, &server->commit_work);
|
||||
}
|
||||
|
||||
up_read(&server->commit_rwsem);
|
||||
wait_event(cusers->waitq, list_empty_careful(&hold->entry));
|
||||
smp_rmb(); /* entry load before ret */
|
||||
return hold->ret;
|
||||
}
|
||||
|
||||
if (err == 0) {
|
||||
wait_for_completion(&cw.comp);
|
||||
err = cw.ret;
|
||||
}
|
||||
/*
|
||||
* Start a commit from the commit work. We should only have been queued
|
||||
* while a holder is waiting to apply after all active holders have
|
||||
* finished.
|
||||
*/
|
||||
static int commit_start(struct super_block *sb, struct commit_users *cusers)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
return err;
|
||||
/* make sure holders held off once commit started */
|
||||
spin_lock(&cusers->lock);
|
||||
TRACE_COMMIT_USERS(sb, cusers, start);
|
||||
if (WARN_ON_ONCE(list_empty(&cusers->applying) || cusers->nr_holders != 0))
|
||||
ret = -EINVAL;
|
||||
spin_unlock(&cusers->lock);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Finish a commit from the commit work. Give the result to all the
|
||||
* holders who are waiting for the commit to be applied.
|
||||
*/
|
||||
static void commit_end(struct super_block *sb, struct commit_users *cusers, int ret)
|
||||
{
|
||||
struct commit_hold *hold;
|
||||
struct commit_hold *tmp;
|
||||
|
||||
spin_lock(&cusers->lock);
|
||||
TRACE_COMMIT_USERS(sb, cusers, end);
|
||||
list_for_each_entry(hold, &cusers->applying, entry)
|
||||
hold->ret = ret;
|
||||
smp_wmb(); /* ret stores before list updates */
|
||||
list_for_each_entry_safe(hold, tmp, &cusers->applying, entry)
|
||||
list_del_init(&hold->entry);
|
||||
spin_unlock(&cusers->lock);
|
||||
|
||||
wake_up(&cusers->waitq);
|
||||
}
|
||||
|
||||
static void get_roots(struct super_block *sb,
|
||||
@@ -297,19 +528,17 @@ static void set_roots(struct server_info *server,
|
||||
* Concurrent request processing dirties blocks in a commit and makes
|
||||
* the modifications persistent before replying. We'd like to batch
|
||||
* these commits as much as is reasonable so that we don't degrade to a
|
||||
* few IO round trips per request.
|
||||
* few synchronous IOs per request.
|
||||
*
|
||||
* Getting that batching right is bound up in the concurrency of request
|
||||
* processing so a clear way to implement the batched commits is to
|
||||
* implement commits with a single pending work func like the
|
||||
* processing.
|
||||
* implement commits with a single pending work func.
|
||||
*
|
||||
* Processing paths acquire the rwsem for reading while they're making
|
||||
* multiple dependent changes. When they're done and want it persistent
|
||||
* they add themselves to the list of waiters and queue the commit work.
|
||||
* This work runs, acquires the lock to exclude other writers, and
|
||||
* performs the commit. Readers can run concurrently with these
|
||||
* commits.
|
||||
* Processing paths hold the commit while they're making multiple
|
||||
* dependent changes. When they're done and want it persistent they add
|
||||
* queue the commit work. This work runs, performs the commit, and
|
||||
* wakes all the applying waiters with the result. Readers can run
|
||||
* concurrently with these commits.
|
||||
*/
|
||||
static void scoutfs_server_commit_func(struct work_struct *work)
|
||||
{
|
||||
@@ -317,15 +546,15 @@ static void scoutfs_server_commit_func(struct work_struct *work)
|
||||
commit_work);
|
||||
struct super_block *sb = server->sb;
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct commit_waiter *cw;
|
||||
struct commit_waiter *pos;
|
||||
struct llist_node *node;
|
||||
struct commit_users *cusers = &server->cusers;
|
||||
int ret;
|
||||
|
||||
trace_scoutfs_server_commit_work_enter(sb, 0, 0);
|
||||
scoutfs_inc_counter(sb, server_commit_worker);
|
||||
|
||||
down_write(&server->commit_rwsem);
|
||||
ret = commit_start(sb, cusers);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (scoutfs_forcing_unmount(sb)) {
|
||||
ret = -EIO;
|
||||
@@ -402,15 +631,8 @@ static void scoutfs_server_commit_func(struct work_struct *work)
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
node = llist_del_all(&server->commit_waiters);
|
||||
commit_end(sb, cusers, ret);
|
||||
|
||||
/* waiters always wait on completion, cw could be free after complete */
|
||||
llist_for_each_entry_safe(cw, pos, node, node) {
|
||||
cw->ret = ret;
|
||||
complete(&cw->comp);
|
||||
}
|
||||
|
||||
up_write(&server->commit_rwsem);
|
||||
trace_scoutfs_server_commit_work_exit(sb, 0, ret);
|
||||
}
|
||||
|
||||
@@ -421,6 +643,7 @@ static int server_alloc_inodes(struct super_block *sb,
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_net_inode_alloc ial = { 0, };
|
||||
COMMIT_HOLD(hold);
|
||||
__le64 lecount;
|
||||
u64 ino;
|
||||
u64 nr;
|
||||
@@ -433,7 +656,7 @@ static int server_alloc_inodes(struct super_block *sb,
|
||||
|
||||
memcpy(&lecount, arg, arg_len);
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
|
||||
spin_lock(&sbi->next_ino_lock);
|
||||
ino = le64_to_cpu(super->next_ino);
|
||||
@@ -441,7 +664,7 @@ static int server_alloc_inodes(struct super_block *sb,
|
||||
le64_add_cpu(&super->next_ino, nr);
|
||||
spin_unlock(&sbi->next_ino_lock);
|
||||
|
||||
ret = scoutfs_server_apply_commit(sb, 0);
|
||||
ret = server_apply_commit(sb, &hold, 0);
|
||||
if (ret == 0) {
|
||||
ial.ino = cpu_to_le64(ino);
|
||||
ial.nr = cpu_to_le64(nr);
|
||||
@@ -819,7 +1042,7 @@ static int next_log_merge_item(struct super_block *sb,
|
||||
#define FINALIZE_POLL_MS (11)
|
||||
#define FINALIZE_TIMEOUT_MS (MSEC_PER_SEC / 2)
|
||||
static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_log_trees *lt,
|
||||
u64 rid)
|
||||
u64 rid, struct commit_hold *hold)
|
||||
{
|
||||
struct server_info *server = SCOUTFS_SB(sb)->server_info;
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
@@ -945,13 +1168,13 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
/* wait a bit for mounts to arrive */
|
||||
if (others_active) {
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
ret = scoutfs_server_apply_commit(sb, 0);
|
||||
ret = server_apply_commit(sb, hold, 0);
|
||||
if (ret < 0)
|
||||
err_str = "applying commit before waiting for finalized";
|
||||
|
||||
msleep(FINALIZE_POLL_MS);
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
|
||||
/* done if we timed out */
|
||||
@@ -1044,6 +1267,7 @@ static int server_get_log_trees(struct super_block *sb,
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_key key;
|
||||
bool unlock_alloc = false;
|
||||
COMMIT_HOLD(hold);
|
||||
u64 data_zone_blocks;
|
||||
char *err_str = NULL;
|
||||
u64 nr;
|
||||
@@ -1054,7 +1278,7 @@ static int server_get_log_trees(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
|
||||
mutex_lock(&server->logs_mutex);
|
||||
|
||||
@@ -1092,7 +1316,7 @@ static int server_get_log_trees(struct super_block *sb,
|
||||
}
|
||||
|
||||
/* drops and re-acquires the mutex and commit if it has to wait */
|
||||
ret = finalize_and_start_log_merge(sb, <, rid);
|
||||
ret = finalize_and_start_log_merge(sb, <, rid, &hold);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
|
||||
@@ -1187,7 +1411,7 @@ unlock:
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
out:
|
||||
if (ret < 0)
|
||||
scoutfs_err(sb, "error %d getting log trees for rid %016llx: %s",
|
||||
@@ -1213,6 +1437,7 @@ static int server_commit_log_trees(struct super_block *sb,
|
||||
struct scoutfs_log_trees *exist;
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_key key;
|
||||
COMMIT_HOLD(hold);
|
||||
char *err_str = NULL;
|
||||
bool committed = false;
|
||||
int ret;
|
||||
@@ -1231,7 +1456,7 @@ static int server_commit_log_trees(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
|
||||
mutex_lock(&server->logs_mutex);
|
||||
|
||||
@@ -1280,7 +1505,7 @@ static int server_commit_log_trees(struct super_block *sb,
|
||||
unlock:
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
if (ret < 0)
|
||||
scoutfs_err(sb, "server error %d committing client logs for rid %016llx: %s",
|
||||
ret, rid, err_str);
|
||||
@@ -1589,6 +1814,7 @@ static int server_srch_get_compact(struct super_block *sb,
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_srch_compact *sc = NULL;
|
||||
COMMIT_HOLD(hold);
|
||||
int ret;
|
||||
|
||||
if (arg_len != 0) {
|
||||
@@ -1602,7 +1828,7 @@ static int server_srch_get_compact(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
|
||||
mutex_lock(&server->srch_mutex);
|
||||
ret = scoutfs_srch_get_compact(sb, &server->alloc, &server->wri,
|
||||
@@ -1630,7 +1856,7 @@ static int server_srch_get_compact(struct super_block *sb,
|
||||
mutex_unlock(&server->srch_mutex);
|
||||
|
||||
apply:
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
WARN_ON_ONCE(ret < 0 && ret != -ENOENT); /* XXX leaked busy item */
|
||||
out:
|
||||
ret = scoutfs_net_response(sb, conn, cmd, id, ret,
|
||||
@@ -1656,6 +1882,7 @@ static int server_srch_commit_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *sc;
|
||||
struct scoutfs_alloc_list_head av;
|
||||
struct scoutfs_alloc_list_head fr;
|
||||
COMMIT_HOLD(hold);
|
||||
int ret;
|
||||
|
||||
if (arg_len != sizeof(struct scoutfs_srch_compact)) {
|
||||
@@ -1664,7 +1891,7 @@ static int server_srch_commit_compact(struct super_block *sb,
|
||||
}
|
||||
sc = arg;
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
|
||||
mutex_lock(&server->srch_mutex);
|
||||
ret = scoutfs_srch_commit_compact(sb, &server->alloc, &server->wri,
|
||||
@@ -1682,7 +1909,7 @@ static int server_srch_commit_compact(struct super_block *sb,
|
||||
server->other_freed, &fr);
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
apply:
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
out:
|
||||
WARN_ON(ret < 0); /* XXX leaks allocators */
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
|
||||
@@ -2047,13 +2274,14 @@ static void server_log_merge_free_work(struct work_struct *work)
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_log_merge_freeing fr;
|
||||
struct scoutfs_key key;
|
||||
COMMIT_HOLD(hold);
|
||||
char *err_str = NULL;
|
||||
bool commit = false;
|
||||
int ret = 0;
|
||||
|
||||
/* shutdown waits for us, we'll eventually load set shutting_down */
|
||||
while (!server->shutting_down) {
|
||||
scoutfs_server_hold_commit(sb);
|
||||
while (!server_is_stopping(server)) {
|
||||
|
||||
server_hold_commit(sb, &hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
commit = true;
|
||||
|
||||
@@ -2083,7 +2311,7 @@ static void server_log_merge_free_work(struct work_struct *work)
|
||||
|
||||
ret = scoutfs_btree_free_blocks(sb, &server->alloc,
|
||||
&server->wri, &fr.key,
|
||||
&fr.root, 10);
|
||||
&fr.root, COMMIT_HOLD_ALLOC_BUDGET / 2);
|
||||
if (ret < 0) {
|
||||
err_str = "freeing log btree";
|
||||
break;
|
||||
@@ -2103,7 +2331,7 @@ static void server_log_merge_free_work(struct work_struct *work)
|
||||
BUG_ON(ret < 0);
|
||||
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
commit = false;
|
||||
if (ret < 0) {
|
||||
err_str = "looping commit del/upd freeing item";
|
||||
@@ -2113,7 +2341,7 @@ static void server_log_merge_free_work(struct work_struct *work)
|
||||
|
||||
if (commit) {
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
if (ret < 0)
|
||||
err_str = "final commit del/upd freeing item";
|
||||
}
|
||||
@@ -2145,6 +2373,7 @@ static int server_get_log_merge(struct super_block *sb,
|
||||
struct scoutfs_key par_end;
|
||||
struct scoutfs_key next_key;
|
||||
struct scoutfs_key key;
|
||||
COMMIT_HOLD(hold);
|
||||
char *err_str = NULL;
|
||||
bool ins_rng;
|
||||
bool del_remain;
|
||||
@@ -2158,7 +2387,7 @@ static int server_get_log_merge(struct super_block *sb,
|
||||
if (arg_len != 0)
|
||||
return -EINVAL;
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
|
||||
restart:
|
||||
@@ -2401,7 +2630,7 @@ out:
|
||||
}
|
||||
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, &req, sizeof(req));
|
||||
}
|
||||
@@ -2425,6 +2654,7 @@ static int server_commit_log_merge(struct super_block *sb,
|
||||
struct scoutfs_log_merge_status stat;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
struct scoutfs_key key;
|
||||
COMMIT_HOLD(hold);
|
||||
char *err_str = NULL;
|
||||
bool deleted = false;
|
||||
int ret = 0;
|
||||
@@ -2442,7 +2672,7 @@ static int server_commit_log_merge(struct super_block *sb,
|
||||
le64_to_cpu(comp->seq),
|
||||
le64_to_cpu(comp->flags));
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
|
||||
/* find the status of the current log merge */
|
||||
@@ -2535,7 +2765,7 @@ out:
|
||||
if (ret < 0 && err_str)
|
||||
scoutfs_err(sb, "error %d committing log merge: %s", ret, err_str);
|
||||
|
||||
err = scoutfs_server_apply_commit(sb, ret);
|
||||
err = server_apply_commit(sb, &hold, ret);
|
||||
BUG_ON(ret < 0 && deleted); /* inconsistent */
|
||||
|
||||
if (ret == 0)
|
||||
@@ -2655,6 +2885,7 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_volume_options *volopt;
|
||||
COMMIT_HOLD(hold);
|
||||
u64 opt;
|
||||
u64 nr;
|
||||
int ret = 0;
|
||||
@@ -2672,7 +2903,7 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti
|
||||
|
||||
mutex_lock(&server->volopt_mutex);
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
|
||||
if (le64_to_cpu(volopt->set_bits) & SCOUTFS_VOLOPT_DATA_ALLOC_ZONE_BLOCKS_BIT) {
|
||||
opt = le64_to_cpu(volopt->data_alloc_zone_blocks);
|
||||
@@ -2703,7 +2934,7 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti
|
||||
}
|
||||
|
||||
apply:
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
|
||||
write_seqcount_begin(&server->volopt_seqcount);
|
||||
if (ret == 0)
|
||||
@@ -2723,6 +2954,7 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_volume_options *volopt;
|
||||
COMMIT_HOLD(hold);
|
||||
__le64 *opt;
|
||||
u64 bit;
|
||||
int ret = 0;
|
||||
@@ -2741,7 +2973,7 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec
|
||||
|
||||
mutex_lock(&server->volopt_mutex);
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
|
||||
for (i = 0, bit = 1, opt = first_valopt(&super->volopt); i < 64; i++, bit <<= 1, opt++) {
|
||||
if (le64_to_cpu(volopt->set_bits) & bit) {
|
||||
@@ -2750,7 +2982,7 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec
|
||||
}
|
||||
}
|
||||
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
|
||||
write_seqcount_begin(&server->volopt_seqcount);
|
||||
if (ret == 0)
|
||||
@@ -2776,6 +3008,7 @@ static int server_resize_devices(struct super_block *sb, struct scoutfs_net_conn
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_net_resize_devices *nrd;
|
||||
COMMIT_HOLD(hold);
|
||||
u64 meta_tot;
|
||||
u64 meta_start;
|
||||
u64 meta_len;
|
||||
@@ -2794,7 +3027,7 @@ static int server_resize_devices(struct super_block *sb, struct scoutfs_net_conn
|
||||
meta_tot = le64_to_cpu(nrd->new_total_meta_blocks);
|
||||
data_tot = le64_to_cpu(nrd->new_total_data_blocks);
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
mutex_lock(&server->alloc_mutex);
|
||||
|
||||
if (meta_tot == le64_to_cpu(super->total_meta_blocks))
|
||||
@@ -2856,7 +3089,7 @@ static int server_resize_devices(struct super_block *sb, struct scoutfs_net_conn
|
||||
ret = 0;
|
||||
unlock:
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
out:
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
|
||||
};
|
||||
@@ -3180,7 +3413,7 @@ out:
|
||||
*/
|
||||
static void queue_farewell_work(struct server_info *server)
|
||||
{
|
||||
if (!test_shutting_down(server))
|
||||
if (!server_is_stopping(server))
|
||||
queue_work(server->wq, &server->farewell_work);
|
||||
}
|
||||
|
||||
@@ -3210,6 +3443,7 @@ static int server_greeting(struct super_block *sb,
|
||||
struct scoutfs_net_greeting *gr = arg;
|
||||
struct scoutfs_net_greeting greet;
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
COMMIT_HOLD(hold);
|
||||
bool reconnecting;
|
||||
bool first_contact;
|
||||
bool farewell;
|
||||
@@ -3237,12 +3471,12 @@ static int server_greeting(struct super_block *sb,
|
||||
}
|
||||
|
||||
if (gr->server_term == 0) {
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
|
||||
ret = insert_mounted_client(sb, le64_to_cpu(gr->rid), le64_to_cpu(gr->flags),
|
||||
&conn->peername);
|
||||
|
||||
ret = scoutfs_server_apply_commit(sb, ret);
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
queue_work(server->wq, &server->farewell_work);
|
||||
if (ret < 0)
|
||||
goto send_err;
|
||||
@@ -3308,9 +3542,10 @@ struct farewell_request {
|
||||
*/
|
||||
static int reclaim_rid(struct super_block *sb, u64 rid)
|
||||
{
|
||||
COMMIT_HOLD(hold);
|
||||
int ret;
|
||||
|
||||
scoutfs_server_hold_commit(sb);
|
||||
server_hold_commit(sb, &hold);
|
||||
|
||||
/* delete mounted client last, recovery looks for it */
|
||||
ret = scoutfs_lock_server_farewell(sb, rid) ?:
|
||||
@@ -3320,7 +3555,7 @@ static int reclaim_rid(struct super_block *sb, u64 rid)
|
||||
scoutfs_omap_remove_rid(sb, rid) ?:
|
||||
delete_mounted_client(sb, rid);
|
||||
|
||||
return scoutfs_server_apply_commit(sb, ret);
|
||||
return server_apply_commit(sb, &hold, ret);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -3693,14 +3928,14 @@ static void fence_pending_recov_worker(struct work_struct *work)
|
||||
}
|
||||
|
||||
if (ret < 0)
|
||||
scoutfs_server_abort(sb);
|
||||
stop_server(server);
|
||||
}
|
||||
|
||||
static void recovery_timeout(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
if (!test_shutting_down(server))
|
||||
if (!server_is_stopping(server))
|
||||
queue_work(server->wq, &server->fence_pending_recov_work);
|
||||
}
|
||||
|
||||
@@ -3765,7 +4000,7 @@ out:
|
||||
|
||||
static void queue_reclaim_work(struct server_info *server, unsigned long delay)
|
||||
{
|
||||
if (!test_shutting_down(server))
|
||||
if (!server_is_stopping(server))
|
||||
queue_delayed_work(server->wq, &server->reclaim_dwork, delay);
|
||||
}
|
||||
|
||||
@@ -3800,7 +4035,7 @@ static void reclaim_worker(struct work_struct *work)
|
||||
if (error == true) {
|
||||
scoutfs_err(sb, "saw error indicator on fence request for rid %016llx, shutting down server",
|
||||
rid);
|
||||
scoutfs_server_abort(sb);
|
||||
stop_server(server);
|
||||
ret = -ESHUTDOWN;
|
||||
goto out;
|
||||
}
|
||||
@@ -3809,7 +4044,7 @@ static void reclaim_worker(struct work_struct *work)
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "failure to reclaim fenced rid %016llx: err %d, shutting down server",
|
||||
rid, ret);
|
||||
scoutfs_server_abort(sb);
|
||||
stop_server(server);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -3817,16 +4052,7 @@ static void reclaim_worker(struct work_struct *work)
|
||||
scoutfs_fence_free(sb, rid);
|
||||
scoutfs_server_recov_finish(sb, rid, SCOUTFS_RECOV_ALL);
|
||||
|
||||
/* tell quorum we've finished fencing all previous leaders */
|
||||
if (reason == SCOUTFS_FENCE_QUORUM_BLOCK_LEADER &&
|
||||
!scoutfs_fence_reason_pending(sb, reason)) {
|
||||
ret = scoutfs_quorum_fence_complete(sb, server->term);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
out:
|
||||
/* queue next reclaim immediately if we're making progress */
|
||||
if (ret == 0)
|
||||
@@ -3942,12 +4168,12 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
scoutfs_net_listen(sb, conn);
|
||||
|
||||
scoutfs_info(sb, "server ready at "SIN_FMT, SIN_ARG(&sin));
|
||||
complete(&server->start_comp);
|
||||
server_up(server);
|
||||
|
||||
queue_reclaim_work(server, 0);
|
||||
|
||||
/* interruptible mostly to avoid stuck messages */
|
||||
wait_event_interruptible(server->waitq, test_shutting_down(server));
|
||||
wait_event_interruptible(server->waitq, server_is_stopping(server));
|
||||
|
||||
shutdown:
|
||||
scoutfs_info(sb, "server shutting down at "SIN_FMT, SIN_ARG(&sin));
|
||||
@@ -3981,60 +4207,44 @@ out:
|
||||
scoutfs_fence_stop(sb);
|
||||
scoutfs_net_free_conn(sb, conn);
|
||||
|
||||
/* let quorum know that we've shutdown */
|
||||
scoutfs_quorum_server_shutdown(sb, server->term);
|
||||
server_down(server);
|
||||
|
||||
scoutfs_info(sb, "server stopped at "SIN_FMT, SIN_ARG(&sin));
|
||||
trace_scoutfs_server_work_exit(sb, 0, ret);
|
||||
|
||||
server->err = ret;
|
||||
complete(&server->start_comp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for the server to successfully start. If this returns error then
|
||||
* the super block's fence_term has been set to the new server's term so
|
||||
* that it won't be fenced.
|
||||
* Start the server but don't wait for it to complete.
|
||||
*/
|
||||
int scoutfs_server_start(struct super_block *sb, u64 term)
|
||||
void scoutfs_server_start(struct super_block *sb, u64 term)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
server->err = 0;
|
||||
set_shutting_down(server, false);
|
||||
server->term = term;
|
||||
init_completion(&server->start_comp);
|
||||
|
||||
queue_work(server->wq, &server->work);
|
||||
|
||||
wait_for_completion(&server->start_comp);
|
||||
return server->err;
|
||||
if (cmpxchg(&server->status, SERVER_DOWN, SERVER_STARTING) == SERVER_DOWN) {
|
||||
server->term = term;
|
||||
queue_work(server->wq, &server->work);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start shutdown on the server but don't want for it to finish.
|
||||
*/
|
||||
void scoutfs_server_abort(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
stop_server(server);
|
||||
}
|
||||
|
||||
/*
|
||||
* Once the server is stopped we give the caller our election info
|
||||
* which might have been modified while we were running.
|
||||
*/
|
||||
void scoutfs_server_stop(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
stop_server(server);
|
||||
}
|
||||
|
||||
cancel_work_sync(&server->work);
|
||||
cancel_work_sync(&server->farewell_work);
|
||||
cancel_work_sync(&server->commit_work);
|
||||
cancel_work_sync(&server->log_merge_free_work);
|
||||
/*
|
||||
* Start shutdown on the server and wait for it to finish.
|
||||
*/
|
||||
void scoutfs_server_stop_wait(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
stop_server(server);
|
||||
flush_work_sync(&server->work);
|
||||
}
|
||||
|
||||
int scoutfs_server_setup(struct super_block *sb)
|
||||
@@ -4050,8 +4260,8 @@ int scoutfs_server_setup(struct super_block *sb)
|
||||
spin_lock_init(&server->lock);
|
||||
init_waitqueue_head(&server->waitq);
|
||||
INIT_WORK(&server->work, scoutfs_server_worker);
|
||||
init_rwsem(&server->commit_rwsem);
|
||||
init_llist_head(&server->commit_waiters);
|
||||
server->status = SERVER_DOWN;
|
||||
init_commit_users(&server->cusers);
|
||||
INIT_WORK(&server->commit_work, scoutfs_server_commit_func);
|
||||
INIT_LIST_HEAD(&server->clients);
|
||||
spin_lock_init(&server->farewell_lock);
|
||||
|
||||
@@ -64,8 +64,6 @@ int scoutfs_server_lock_response(struct super_block *sb, u64 rid, u64 id,
|
||||
struct scoutfs_net_lock *nl);
|
||||
int scoutfs_server_lock_recover_request(struct super_block *sb, u64 rid,
|
||||
struct scoutfs_key *key);
|
||||
void scoutfs_server_hold_commit(struct super_block *sb);
|
||||
int scoutfs_server_apply_commit(struct super_block *sb, int err);
|
||||
void scoutfs_server_recov_finish(struct super_block *sb, u64 rid, int which);
|
||||
|
||||
int scoutfs_server_send_omap_request(struct super_block *sb, u64 rid,
|
||||
@@ -77,9 +75,12 @@ u64 scoutfs_server_seq(struct super_block *sb);
|
||||
u64 scoutfs_server_next_seq(struct super_block *sb);
|
||||
void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq);
|
||||
|
||||
int scoutfs_server_start(struct super_block *sb, u64 term);
|
||||
void scoutfs_server_abort(struct super_block *sb);
|
||||
void scoutfs_server_start(struct super_block *sb, u64 term);
|
||||
void scoutfs_server_stop(struct super_block *sb);
|
||||
void scoutfs_server_stop_wait(struct super_block *sb);
|
||||
bool scoutfs_server_is_running(struct super_block *sb);
|
||||
bool scoutfs_server_is_up(struct super_block *sb);
|
||||
bool scoutfs_server_is_down(struct super_block *sb);
|
||||
|
||||
int scoutfs_server_setup(struct super_block *sb);
|
||||
void scoutfs_server_destroy(struct super_block *sb);
|
||||
|
||||
@@ -37,6 +37,15 @@ struct attr_funcs {
|
||||
#define ATTR_FUNCS_RO(_name) \
|
||||
static struct attr_funcs _name##_attr_funcs = __ATTR_RO(_name)
|
||||
|
||||
static ssize_t data_device_maj_min_show(struct kobject *kobj, struct attribute *attr, char *buf)
|
||||
{
|
||||
struct super_block *sb = KOBJ_TO_SB(kobj, sb_id_kobj);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u:%u\n",
|
||||
MAJOR(sb->s_bdev->bd_dev), MINOR(sb->s_bdev->bd_dev));
|
||||
}
|
||||
ATTR_FUNCS_RO(data_device_maj_min);
|
||||
|
||||
static ssize_t format_version_show(struct kobject *kobj, struct attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
@@ -101,6 +110,7 @@ static ssize_t attr_funcs_show(struct kobject *kobj, struct attribute *attr,
|
||||
|
||||
|
||||
static struct attribute *sb_id_attrs[] = {
|
||||
&data_device_maj_min_attr_funcs.attr,
|
||||
&format_version_attr_funcs.attr,
|
||||
&fsid_attr_funcs.attr,
|
||||
&rid_attr_funcs.attr,
|
||||
|
||||
446
kmod/src/xattr.c
446
kmod/src/xattr.c
@@ -57,12 +57,6 @@ static u32 xattr_names_equal(const char *a_name, unsigned int a_len,
|
||||
return a_len == b_len && memcmp(a_name, b_name, a_len) == 0;
|
||||
}
|
||||
|
||||
static unsigned int xattr_full_bytes(struct scoutfs_xattr *xat)
|
||||
{
|
||||
return offsetof(struct scoutfs_xattr,
|
||||
name[xat->name_len + le16_to_cpu(xat->val_len)]);
|
||||
}
|
||||
|
||||
static unsigned int xattr_nr_parts(struct scoutfs_xattr *xat)
|
||||
{
|
||||
return SCOUTFS_XATTR_NR_PARTS(xat->name_len,
|
||||
@@ -85,10 +79,18 @@ static void init_xattr_key(struct scoutfs_key *key, u64 ino, u32 name_hash,
|
||||
#define SCOUTFS_XATTR_PREFIX "scoutfs."
|
||||
#define SCOUTFS_XATTR_PREFIX_LEN (sizeof(SCOUTFS_XATTR_PREFIX) - 1)
|
||||
|
||||
static int unknown_prefix(const char *name)
|
||||
static int unknown_prefix(const char *name, bool *is_user)
|
||||
{
|
||||
return strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
|
||||
strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
|
||||
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
|
||||
if (is_user)
|
||||
*is_user = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (is_user)
|
||||
*is_user = false;
|
||||
|
||||
return strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
|
||||
strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
|
||||
strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN)&&
|
||||
strncmp(name, SCOUTFS_XATTR_PREFIX, SCOUTFS_XATTR_PREFIX_LEN);
|
||||
@@ -98,11 +100,13 @@ static int unknown_prefix(const char *name)
|
||||
#define HIDE_TAG "hide."
|
||||
#define SRCH_TAG "srch."
|
||||
#define TOTL_TAG "totl."
|
||||
#define WORM_TAG "worm."
|
||||
#define TAG_LEN (sizeof(HIDE_TAG) - 1)
|
||||
|
||||
int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
struct scoutfs_xattr_prefix_tags *tgs)
|
||||
int scoutfs_xattr_parse_tags(struct super_block *sb, const char *name,
|
||||
unsigned int name_len, struct scoutfs_xattr_prefix_tags *tgs)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
bool found;
|
||||
|
||||
memset(tgs, 0, sizeof(struct scoutfs_xattr_prefix_tags));
|
||||
@@ -123,6 +127,9 @@ int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
} else if (!strncmp(name, TOTL_TAG, TAG_LEN)) {
|
||||
if (++tgs->totl == 0)
|
||||
return -EINVAL;
|
||||
} else if (!strncmp(name, WORM_TAG, TAG_LEN)) {
|
||||
if (++tgs->worm == 0 || sbi->fmt_vers < 2)
|
||||
return -EINVAL;
|
||||
} else {
|
||||
/* only reason to use scoutfs. is tags */
|
||||
if (!found)
|
||||
@@ -137,12 +144,29 @@ int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the next xattr and copy the key, xattr header, and as much of
|
||||
* the name and value into the callers buffer as we can. Returns the
|
||||
* number of bytes copied which include the header, name, and value and
|
||||
* can be limited by the xattr length or the callers buffer. The caller
|
||||
* is responsible for comparing their lengths, the header, and the
|
||||
* returned length before safely using the xattr.
|
||||
* xattrs are stored in multiple items. The first item is a
|
||||
* concatenation of an initial header, the name, and then as much of the
|
||||
* value as fits in the remainder of the first item. This return the
|
||||
* size of the first item that'd store an xattr with the given name
|
||||
* length and value payload size.
|
||||
*/
|
||||
static int first_item_bytes(int name_len, size_t size)
|
||||
{
|
||||
if (WARN_ON_ONCE(name_len <= 0) ||
|
||||
WARN_ON_ONCE(name_len > SCOUTFS_XATTR_MAX_NAME_LEN))
|
||||
return 0;
|
||||
|
||||
return min_t(int, sizeof(struct scoutfs_xattr) + name_len + size,
|
||||
SCOUTFS_XATTR_MAX_PART_SIZE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the next xattr, set the caller's key, and copy as much of the
|
||||
* first item into the callers buffer as we can. Returns the number of
|
||||
* bytes copied which can include the header, name, and start of the
|
||||
* value from the first item. The caller is responsible for comparing
|
||||
* their lengths, the header, and the returned length before safely
|
||||
* using the buffer.
|
||||
*
|
||||
* If a name is provided then we'll iterate over items with a matching
|
||||
* name_hash until we find a matching name. If we don't find a matching
|
||||
@@ -154,20 +178,17 @@ int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
* Returns -ENOENT if it didn't find a next item.
|
||||
*/
|
||||
static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
|
||||
struct scoutfs_xattr *xat, unsigned int bytes,
|
||||
struct scoutfs_xattr *xat, unsigned int xat_bytes,
|
||||
const char *name, unsigned int name_len,
|
||||
u64 name_hash, u64 id, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_key last;
|
||||
u8 last_part;
|
||||
int total;
|
||||
u8 part;
|
||||
int ret;
|
||||
|
||||
/* need to be able to see the name we're looking for */
|
||||
if (WARN_ON_ONCE(name_len > 0 && bytes < offsetof(struct scoutfs_xattr,
|
||||
name[name_len])))
|
||||
if (WARN_ON_ONCE(name_len > 0 &&
|
||||
xat_bytes < offsetof(struct scoutfs_xattr, name[name_len])))
|
||||
return -EINVAL;
|
||||
|
||||
if (name_len)
|
||||
@@ -176,26 +197,15 @@ static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
|
||||
init_xattr_key(key, scoutfs_ino(inode), name_hash, id);
|
||||
init_xattr_key(&last, scoutfs_ino(inode), U32_MAX, U64_MAX);
|
||||
|
||||
last_part = 0;
|
||||
part = 0;
|
||||
total = 0;
|
||||
|
||||
for (;;) {
|
||||
key->skx_part = part;
|
||||
ret = scoutfs_item_next(sb, key, &last,
|
||||
(void *)xat + total, bytes - total,
|
||||
lock);
|
||||
if (ret < 0) {
|
||||
/* XXX corruption, ran out of parts */
|
||||
if (ret == -ENOENT && part > 0)
|
||||
ret = -EIO;
|
||||
ret = scoutfs_item_next(sb, key, &last, xat, xat_bytes, lock);
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
trace_scoutfs_xattr_get_next_key(sb, key);
|
||||
|
||||
/* XXX corruption */
|
||||
if (key->skx_part != part) {
|
||||
if (key->skx_part != 0) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
@@ -205,8 +215,7 @@ static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
|
||||
* the first part and if the next xattr name fits in our
|
||||
* buffer then the item must have included it.
|
||||
*/
|
||||
if (part == 0 &&
|
||||
(ret < sizeof(struct scoutfs_xattr) ||
|
||||
if ((ret < sizeof(struct scoutfs_xattr) ||
|
||||
(xat->name_len <= name_len &&
|
||||
ret < offsetof(struct scoutfs_xattr,
|
||||
name[xat->name_len])) ||
|
||||
@@ -216,7 +225,7 @@ static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
|
||||
break;
|
||||
}
|
||||
|
||||
if (part == 0 && name_len) {
|
||||
if (name_len > 0) {
|
||||
/* ran out of names that could match */
|
||||
if (le64_to_cpu(key->skx_name_hash) != name_hash) {
|
||||
ret = -ENOENT;
|
||||
@@ -224,64 +233,126 @@ static int get_next_xattr(struct inode *inode, struct scoutfs_key *key,
|
||||
}
|
||||
|
||||
/* keep looking for our name */
|
||||
if (!xattr_names_equal(name, name_len,
|
||||
xat->name, xat->name_len)) {
|
||||
part = 0;
|
||||
if (!xattr_names_equal(name, name_len, xat->name, xat->name_len)) {
|
||||
le64_add_cpu(&key->skx_id, 1);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* use the matching name we found */
|
||||
last_part = xattr_nr_parts(xat) - 1;
|
||||
}
|
||||
|
||||
total += ret;
|
||||
if (total == bytes || part == last_part) {
|
||||
/* copied as much as we could */
|
||||
ret = total;
|
||||
break;
|
||||
}
|
||||
part++;
|
||||
/* found next name */
|
||||
break;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller has already read and verified the xattr's first item.
|
||||
* Copy the value from the tail of the first item and from any future
|
||||
* items into the destination buffer.
|
||||
*/
|
||||
static int copy_xattr_value(struct super_block *sb, struct scoutfs_key *xat_key,
|
||||
struct scoutfs_xattr *xat, int xat_bytes,
|
||||
char *buffer, size_t size,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_key key;
|
||||
size_t copied = 0;
|
||||
int val_tail;
|
||||
int bytes;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
/* must have first item up to value */
|
||||
if (WARN_ON_ONCE(xat_bytes < sizeof(struct scoutfs_xattr)) ||
|
||||
WARN_ON_ONCE(xat_bytes < offsetof(struct scoutfs_xattr, name[xat->name_len])))
|
||||
return -EINVAL;
|
||||
|
||||
/* only ever copy up to the full value */
|
||||
size = min_t(size_t, size, le16_to_cpu(xat->val_len));
|
||||
|
||||
/* must have full first item if caller needs value from second item */
|
||||
val_tail = SCOUTFS_XATTR_MAX_PART_SIZE -
|
||||
offsetof(struct scoutfs_xattr, name[xat->name_len]);
|
||||
if (WARN_ON_ONCE(size > val_tail && xat_bytes != SCOUTFS_XATTR_MAX_PART_SIZE))
|
||||
return -EINVAL;
|
||||
|
||||
/* copy from tail of first item */
|
||||
bytes = min_t(unsigned int, size, val_tail);
|
||||
if (bytes > 0) {
|
||||
memcpy(buffer, &xat->name[xat->name_len], bytes);
|
||||
copied += bytes;
|
||||
}
|
||||
|
||||
key = *xat_key;
|
||||
for (i = 1; copied < size; i++) {
|
||||
key.skx_part = i;
|
||||
bytes = min_t(unsigned int, size - copied, SCOUTFS_XATTR_MAX_PART_SIZE);
|
||||
|
||||
ret = scoutfs_item_lookup(sb, &key, buffer + copied, bytes, lock);
|
||||
if (ret >= 0 && ret != bytes)
|
||||
ret = -EIO;
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
copied += ret;
|
||||
}
|
||||
|
||||
return copied;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller is working with items that are either in the allocated
|
||||
* first compound item or further items that are offsets into a value
|
||||
* buffer. Give them a pointer and length of the start of the item.
|
||||
*/
|
||||
static void xattr_item_part_buffer(void **buf, int *len, int part,
|
||||
struct scoutfs_xattr *xat, unsigned int xat_bytes,
|
||||
const char *value, size_t size)
|
||||
{
|
||||
int off;
|
||||
|
||||
if (part == 0) {
|
||||
*buf = xat;
|
||||
*len = xat_bytes;
|
||||
} else {
|
||||
off = (part * SCOUTFS_XATTR_MAX_PART_SIZE) -
|
||||
offsetof(struct scoutfs_xattr, name[xat->name_len]);
|
||||
BUG_ON(off >= size); /* calls limited by number of parts */
|
||||
*buf = (void *)value + off;
|
||||
*len = min_t(size_t, size - off, SCOUTFS_XATTR_MAX_PART_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Create all the items associated with the given xattr. If this
|
||||
* returns an error it will have already cleaned up any items it created
|
||||
* before seeing the error.
|
||||
*/
|
||||
static int create_xattr_items(struct inode *inode, u64 id,
|
||||
struct scoutfs_xattr *xat, unsigned int bytes,
|
||||
static int create_xattr_items(struct inode *inode, u64 id, struct scoutfs_xattr *xat,
|
||||
int xat_bytes, const char *value, size_t size, u8 new_parts,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_key key;
|
||||
unsigned int part_bytes;
|
||||
unsigned int total;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
void *buf;
|
||||
int len;
|
||||
int i;
|
||||
|
||||
init_xattr_key(&key, scoutfs_ino(inode),
|
||||
xattr_name_hash(xat->name, xat->name_len), id);
|
||||
|
||||
total = 0;
|
||||
ret = 0;
|
||||
while (total < bytes) {
|
||||
part_bytes = min_t(unsigned int, bytes - total,
|
||||
SCOUTFS_XATTR_MAX_PART_SIZE);
|
||||
for (i = 0; i < new_parts; i++) {
|
||||
key.skx_part = i;
|
||||
xattr_item_part_buffer(&buf, &len, i, xat, xat_bytes, value, size);
|
||||
|
||||
ret = scoutfs_item_create(sb, &key,
|
||||
(void *)xat + total, part_bytes,
|
||||
lock);
|
||||
if (ret) {
|
||||
ret = scoutfs_item_create(sb, &key, buf, len, lock);
|
||||
if (ret < 0) {
|
||||
while (key.skx_part-- > 0)
|
||||
scoutfs_item_delete(sb, &key, lock);
|
||||
break;
|
||||
}
|
||||
|
||||
total += part_bytes;
|
||||
key.skx_part++;
|
||||
}
|
||||
|
||||
return ret;
|
||||
@@ -329,20 +400,20 @@ out:
|
||||
* deleted items.
|
||||
*/
|
||||
static int change_xattr_items(struct inode *inode, u64 id,
|
||||
struct scoutfs_xattr *new_xat,
|
||||
unsigned int new_bytes, u8 new_parts,
|
||||
u8 old_parts, struct scoutfs_lock *lock)
|
||||
struct scoutfs_xattr *xat, int xat_bytes,
|
||||
const char *value, size_t size,
|
||||
u8 new_parts, u8 old_parts, struct scoutfs_lock *lock)
|
||||
{
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_key key;
|
||||
int last_created = -1;
|
||||
int bytes;
|
||||
int off;
|
||||
void *buf;
|
||||
int len;
|
||||
int i;
|
||||
int ret;
|
||||
|
||||
init_xattr_key(&key, scoutfs_ino(inode),
|
||||
xattr_name_hash(new_xat->name, new_xat->name_len), id);
|
||||
xattr_name_hash(xat->name, xat->name_len), id);
|
||||
|
||||
/* dirty existing old items */
|
||||
for (i = 0; i < old_parts; i++) {
|
||||
@@ -354,13 +425,10 @@ static int change_xattr_items(struct inode *inode, u64 id,
|
||||
|
||||
/* create any new items past the old */
|
||||
for (i = old_parts; i < new_parts; i++) {
|
||||
off = i * SCOUTFS_XATTR_MAX_PART_SIZE;
|
||||
bytes = min_t(unsigned int, new_bytes - off,
|
||||
SCOUTFS_XATTR_MAX_PART_SIZE);
|
||||
|
||||
key.skx_part = i;
|
||||
ret = scoutfs_item_create(sb, &key, (void *)new_xat + off,
|
||||
bytes, lock);
|
||||
xattr_item_part_buffer(&buf, &len, i, xat, xat_bytes, value, size);
|
||||
|
||||
ret = scoutfs_item_create(sb, &key, buf, len, lock);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -369,13 +437,10 @@ static int change_xattr_items(struct inode *inode, u64 id,
|
||||
|
||||
/* update dirtied overlapping existing items, last partial first */
|
||||
for (i = min(old_parts, new_parts) - 1; i >= 0; i--) {
|
||||
off = i * SCOUTFS_XATTR_MAX_PART_SIZE;
|
||||
bytes = min_t(unsigned int, new_bytes - off,
|
||||
SCOUTFS_XATTR_MAX_PART_SIZE);
|
||||
|
||||
key.skx_part = i;
|
||||
ret = scoutfs_item_update(sb, &key, (void *)new_xat + off,
|
||||
bytes, lock);
|
||||
xattr_item_part_buffer(&buf, &len, i, xat, xat_bytes, value, size);
|
||||
|
||||
ret = scoutfs_item_update(sb, &key, buf, len, lock);
|
||||
/* only last partial can fail, then we unwind created */
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
@@ -412,20 +477,19 @@ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer,
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
struct scoutfs_key key;
|
||||
unsigned int bytes;
|
||||
unsigned int xat_bytes;
|
||||
size_t name_len;
|
||||
int ret;
|
||||
|
||||
if (unknown_prefix(name))
|
||||
if (unknown_prefix(name, NULL))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
name_len = strlen(name);
|
||||
if (name_len > SCOUTFS_XATTR_MAX_NAME_LEN)
|
||||
return -ENODATA;
|
||||
|
||||
/* only need enough for caller's name and value sizes */
|
||||
bytes = sizeof(struct scoutfs_xattr) + name_len + size;
|
||||
xat = __vmalloc(bytes, GFP_NOFS, PAGE_KERNEL);
|
||||
xat_bytes = first_item_bytes(name_len, size);
|
||||
xat = kmalloc(xat_bytes, GFP_NOFS);
|
||||
if (!xat)
|
||||
return -ENOMEM;
|
||||
|
||||
@@ -435,40 +499,32 @@ ssize_t scoutfs_getxattr(struct dentry *dentry, const char *name, void *buffer,
|
||||
|
||||
down_read(&si->xattr_rwsem);
|
||||
|
||||
ret = get_next_xattr(inode, &key, xat, bytes,
|
||||
name, name_len, 0, 0, lck);
|
||||
|
||||
up_read(&si->xattr_rwsem);
|
||||
scoutfs_unlock(sb, lck, SCOUTFS_LOCK_READ);
|
||||
ret = get_next_xattr(inode, &key, xat, xat_bytes, name, name_len, 0, 0, lck);
|
||||
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = -ENODATA;
|
||||
goto out;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* the caller just wants to know the size */
|
||||
if (size == 0) {
|
||||
ret = le16_to_cpu(xat->val_len);
|
||||
goto out;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* the caller's buffer wasn't big enough */
|
||||
if (size < le16_to_cpu(xat->val_len)) {
|
||||
ret = -ERANGE;
|
||||
goto out;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* XXX corruption, the items didn't match the header */
|
||||
if (ret < xattr_full_bytes(xat)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = le16_to_cpu(xat->val_len);
|
||||
memcpy(buffer, &xat->name[xat->name_len], ret);
|
||||
ret = copy_xattr_value(sb, &key, xat, xat_bytes, buffer, size, lck);
|
||||
unlock:
|
||||
up_read(&si->xattr_rwsem);
|
||||
scoutfs_unlock(sb, lck, SCOUTFS_LOCK_READ);
|
||||
out:
|
||||
vfree(xat);
|
||||
kfree(xat);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -481,6 +537,22 @@ void scoutfs_xattr_init_totl_key(struct scoutfs_key *key, u64 *name)
|
||||
key->skxt_c = cpu_to_le64(name[2]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Currently only support enabling level1 worm by setting a non-zero
|
||||
* expiration.
|
||||
*/
|
||||
static int parse_worm_name(const char *name)
|
||||
{
|
||||
static const char worm_name[] = "level1_expire";
|
||||
char *last_dot;
|
||||
|
||||
last_dot = strrchr(name, '.');
|
||||
if (!last_dot)
|
||||
return -EINVAL;
|
||||
|
||||
return strcmp(worm_name, last_dot + 1) == 0 ? 0 : -EINVAL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Parse a u64 in any base after null terminating it while forbidding
|
||||
* the leading + and trailing \n that kstrotull allows.
|
||||
@@ -498,6 +570,66 @@ static int parse_totl_u64(const char *s, int len, u64 *res)
|
||||
return kstrtoull(str, 0, res) != 0 ? -EINVAL : 0;
|
||||
}
|
||||
|
||||
static int parse_worm_u32(const char *s, int len, u32 *res)
|
||||
{
|
||||
u64 tmp;
|
||||
int ret;
|
||||
|
||||
ret = parse_totl_u64(s, len, &tmp);
|
||||
if (ret == 0 && tmp > U32_MAX) {
|
||||
tmp = 0;
|
||||
ret = -EINVAL;
|
||||
}
|
||||
|
||||
*res = tmp;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int parse_worm_timespec(struct timespec *ts, const char *name, int name_len)
|
||||
{
|
||||
char *delim;
|
||||
u64 sec;
|
||||
u32 nsec;
|
||||
int sec_len;
|
||||
int nsec_len;
|
||||
int ret;
|
||||
|
||||
memset(ts, 0, sizeof(struct scoutfs_timespec));
|
||||
|
||||
if (name_len < 3)
|
||||
return -EINVAL;
|
||||
|
||||
delim = strnchr(name, name_len, '.');
|
||||
if (!delim)
|
||||
return -EINVAL;
|
||||
|
||||
if (delim == name || delim == (name + name_len - 1))
|
||||
return -EINVAL;
|
||||
|
||||
sec_len = delim - name;
|
||||
nsec_len = name_len - (sec_len + 1);
|
||||
|
||||
/* Check to make sure only one '.' */
|
||||
if (strnchr(delim + 1, nsec_len, '.'))
|
||||
return -EINVAL;
|
||||
|
||||
ret = parse_totl_u64(name, sec_len, &sec);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
ret = parse_worm_u32(delim + 1, nsec_len, &nsec);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
if (sec > S64_MAX || nsec >= NSEC_PER_SEC || (sec == 0 && nsec == 0))
|
||||
return -EINVAL;
|
||||
|
||||
ts->tv_sec = sec;
|
||||
ts->tv_nsec = nsec;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* non-destructive relatively quick parse of the last 3 dotted u64s that
|
||||
* make up the name of the xattr total. -EINVAL is returned if there
|
||||
@@ -582,22 +714,25 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
{
|
||||
struct inode *inode = dentry->d_inode;
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
struct super_block *sb = inode->i_sb;
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct scoutfs_xattr_totl_val tval = {0,};
|
||||
struct scoutfs_lock *totl_lock = NULL;
|
||||
struct super_block *sb = inode->i_sb;
|
||||
struct scoutfs_xattr_prefix_tags tgs;
|
||||
const u64 ino = scoutfs_ino(inode);
|
||||
struct timespec worm_ts = {0,};
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
struct scoutfs_lock *totl_lock = NULL;
|
||||
size_t name_len = strlen(name);
|
||||
struct scoutfs_key totl_key;
|
||||
struct scoutfs_key key;
|
||||
bool undo_srch = false;
|
||||
bool undo_totl = false;
|
||||
bool is_user = false;
|
||||
LIST_HEAD(ind_locks);
|
||||
u8 found_parts;
|
||||
unsigned int bytes;
|
||||
unsigned int xat_bytes_totl;
|
||||
unsigned int xat_bytes;
|
||||
unsigned int val_len;
|
||||
u8 found_parts;
|
||||
u64 ind_seq;
|
||||
u64 total;
|
||||
u64 hash = 0;
|
||||
@@ -617,21 +752,28 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
(flags & ~(XATTR_CREATE | XATTR_REPLACE)))
|
||||
return -EINVAL;
|
||||
|
||||
if (unknown_prefix(name))
|
||||
if (unknown_prefix(name, &is_user))
|
||||
return -EOPNOTSUPP;
|
||||
|
||||
if (scoutfs_xattr_parse_tags(name, name_len, &tgs) != 0)
|
||||
if (scoutfs_xattr_parse_tags(sb, name, name_len, &tgs) != 0)
|
||||
return -EINVAL;
|
||||
|
||||
if ((tgs.hide | tgs.srch | tgs.totl) && !capable(CAP_SYS_ADMIN))
|
||||
if ((tgs.hide | tgs.srch | tgs.totl | tgs.worm) && !capable(CAP_SYS_ADMIN))
|
||||
return -EPERM;
|
||||
|
||||
if (tgs.totl && ((ret = parse_totl_key(&totl_key, name, name_len)) != 0))
|
||||
if (tgs.worm && !tgs.hide)
|
||||
return -EINVAL;
|
||||
|
||||
if ((tgs.totl && ((ret = parse_totl_key(&totl_key, name, name_len)) != 0)) ||
|
||||
(tgs.worm && ((ret = parse_worm_name(name)) != 0)))
|
||||
return ret;
|
||||
|
||||
bytes = sizeof(struct scoutfs_xattr) + name_len + size;
|
||||
/* alloc enough to read old totl value */
|
||||
xat = __vmalloc(bytes + SCOUTFS_XATTR_MAX_TOTL_U64, GFP_NOFS, PAGE_KERNEL);
|
||||
/* allocate enough to always read an existing xattr's totl */
|
||||
xat_bytes_totl = first_item_bytes(name_len,
|
||||
max_t(size_t, size, SCOUTFS_XATTR_MAX_TOTL_U64));
|
||||
/* but store partial first item that only includes the new xattr's value */
|
||||
xat_bytes = first_item_bytes(name_len, size);
|
||||
xat = kmalloc(xat_bytes_totl, GFP_NOFS);
|
||||
if (!xat) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@@ -644,10 +786,13 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
|
||||
down_write(&si->xattr_rwsem);
|
||||
|
||||
if (!S_ISREG(inode->i_mode) && tgs.worm) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* find an existing xattr to delete, including possible totl value */
|
||||
ret = get_next_xattr(inode, &key, xat,
|
||||
sizeof(struct scoutfs_xattr) + name_len + SCOUTFS_XATTR_MAX_TOTL_U64,
|
||||
name, name_len, 0, 0, lck);
|
||||
ret = get_next_xattr(inode, &key, xat, xat_bytes_totl, name, name_len, 0, 0, lck);
|
||||
if (ret < 0 && ret != -ENOENT)
|
||||
goto unlock;
|
||||
|
||||
@@ -666,6 +811,12 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* current worm only protects user. xattrs and expiration xattr itself */
|
||||
if (scoutfs_inode_worm_denied(inode) && (is_user || tgs.worm)) {
|
||||
ret = -EACCES;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* s64 count delta if we create or delete */
|
||||
if (tgs.totl)
|
||||
tval.count = cpu_to_le64((u64)!!(value) - (u64)!!(ret != -ENOENT));
|
||||
@@ -683,7 +834,7 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
le64_add_cpu(&tval.total, -total);
|
||||
}
|
||||
|
||||
/* prepare our xattr */
|
||||
/* prepare the xattr header, name, and start of value in first item */
|
||||
if (value) {
|
||||
if (found_parts)
|
||||
id = le64_to_cpu(key.skx_id);
|
||||
@@ -693,15 +844,30 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
|
||||
xat->val_len = cpu_to_le16(size);
|
||||
memset(xat->__pad, 0, sizeof(xat->__pad));
|
||||
memcpy(xat->name, name, name_len);
|
||||
memcpy(&xat->name[xat->name_len], value, size);
|
||||
memcpy(&xat->name[name_len], value,
|
||||
min(size, SCOUTFS_XATTR_MAX_PART_SIZE -
|
||||
offsetof(struct scoutfs_xattr, name[name_len])));
|
||||
|
||||
if (tgs.totl) {
|
||||
ret = parse_totl_u64(value, size, &total);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
|
||||
le64_add_cpu(&tval.total, total);
|
||||
}
|
||||
|
||||
le64_add_cpu(&tval.total, total);
|
||||
if (tgs.worm) {
|
||||
/* can't set multiple times with different names */
|
||||
scoutfs_inode_get_worm(inode, &worm_ts);
|
||||
if (worm_ts.tv_sec || worm_ts.tv_nsec) {
|
||||
ret = -EINVAL;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
ret = parse_worm_timespec(&worm_ts, value, size);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
|
||||
if (tgs.totl) {
|
||||
@@ -741,17 +907,21 @@ retry:
|
||||
}
|
||||
|
||||
if (found_parts && value)
|
||||
ret = change_xattr_items(inode, id, xat, bytes,
|
||||
ret = change_xattr_items(inode, id, xat, xat_bytes, value, size,
|
||||
xattr_nr_parts(xat), found_parts, lck);
|
||||
else if (found_parts)
|
||||
ret = delete_xattr_items(inode, le64_to_cpu(key.skx_name_hash),
|
||||
le64_to_cpu(key.skx_id), found_parts,
|
||||
lck);
|
||||
else
|
||||
ret = create_xattr_items(inode, id, xat, bytes, lck);
|
||||
ret = create_xattr_items(inode, id, xat, xat_bytes, value, size,
|
||||
xattr_nr_parts(xat), lck);
|
||||
if (ret < 0)
|
||||
goto release;
|
||||
|
||||
if (tgs.worm)
|
||||
scoutfs_inode_set_worm(inode, worm_ts.tv_sec, worm_ts.tv_nsec);
|
||||
|
||||
/* XXX do these want i_mutex or anything? */
|
||||
inode_inc_iversion(inode);
|
||||
inode->i_ctime = CURRENT_TIME;
|
||||
@@ -778,7 +948,7 @@ unlock:
|
||||
scoutfs_unlock(sb, lck, SCOUTFS_LOCK_WRITE);
|
||||
scoutfs_unlock(sb, totl_lock, SCOUTFS_LOCK_WRITE_ONLY);
|
||||
out:
|
||||
vfree(xat);
|
||||
kfree(xat);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -807,7 +977,7 @@ ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer,
|
||||
struct scoutfs_xattr *xat = NULL;
|
||||
struct scoutfs_lock *lck = NULL;
|
||||
struct scoutfs_key key;
|
||||
unsigned int bytes;
|
||||
unsigned int xat_bytes;
|
||||
ssize_t total = 0;
|
||||
u32 name_hash = 0;
|
||||
bool is_hidden;
|
||||
@@ -820,8 +990,8 @@ ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer,
|
||||
id = *id_pos;
|
||||
|
||||
/* need a buffer large enough for all possible names */
|
||||
bytes = sizeof(struct scoutfs_xattr) + SCOUTFS_XATTR_MAX_NAME_LEN;
|
||||
xat = kmalloc(bytes, GFP_NOFS);
|
||||
xat_bytes = first_item_bytes(SCOUTFS_XATTR_MAX_NAME_LEN, 0);
|
||||
xat = kmalloc(xat_bytes, GFP_NOFS);
|
||||
if (!xat) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
@@ -834,15 +1004,14 @@ ssize_t scoutfs_list_xattrs(struct inode *inode, char *buffer,
|
||||
down_read(&si->xattr_rwsem);
|
||||
|
||||
for (;;) {
|
||||
ret = get_next_xattr(inode, &key, xat, bytes,
|
||||
NULL, 0, name_hash, id, lck);
|
||||
ret = get_next_xattr(inode, &key, xat, xat_bytes, NULL, 0, name_hash, id, lck);
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = total;
|
||||
break;
|
||||
}
|
||||
|
||||
is_hidden = scoutfs_xattr_parse_tags(xat->name, xat->name_len,
|
||||
is_hidden = scoutfs_xattr_parse_tags(sb, xat->name, xat->name_len,
|
||||
&tgs) == 0 && tgs.hide;
|
||||
|
||||
if (show_hidden == is_hidden) {
|
||||
@@ -938,8 +1107,7 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
}
|
||||
|
||||
if (key.skx_part != 0 ||
|
||||
scoutfs_xattr_parse_tags(xat->name, xat->name_len,
|
||||
&tgs) != 0)
|
||||
scoutfs_xattr_parse_tags(sb, xat->name, xat->name_len, &tgs) != 0)
|
||||
memset(&tgs, 0, sizeof(tgs));
|
||||
|
||||
if (tgs.totl) {
|
||||
|
||||
@@ -17,11 +17,12 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
struct scoutfs_xattr_prefix_tags {
|
||||
unsigned long hide:1,
|
||||
srch:1,
|
||||
totl:1;
|
||||
totl:1,
|
||||
worm:1;
|
||||
};
|
||||
|
||||
int scoutfs_xattr_parse_tags(const char *name, unsigned int name_len,
|
||||
struct scoutfs_xattr_prefix_tags *tgs);
|
||||
int scoutfs_xattr_parse_tags(struct super_block *sb, const char *name,
|
||||
unsigned int name_len, struct scoutfs_xattr_prefix_tags *tgs);
|
||||
|
||||
void scoutfs_xattr_init_totl_key(struct scoutfs_key *key, u64 *name);
|
||||
int scoutfs_xattr_combine_totl(void *dst, int dst_len, void *src, int src_len);
|
||||
|
||||
@@ -112,6 +112,7 @@ used during the test.
|
||||
| T\_EX\_META\_DEV | scratch meta bdev | -f | /dev/vdd |
|
||||
| T\_EX\_DATA\_DEV | scratch meta bdev | -e | /dev/vdc |
|
||||
| T\_M[0-9] | mount paths | mounted per run | /mnt/test.[0-9]/ |
|
||||
| T\_MODULE | built kernel module | created per run | ../kmod/src/..ko |
|
||||
| T\_NR\_MOUNTS | number of mounts | -n | 3 |
|
||||
| T\_O[0-9] | mount options | created per run | -o server\_addr= |
|
||||
| T\_QUORUM | quorum count | -q | 2 |
|
||||
|
||||
@@ -1,5 +1,18 @@
|
||||
#!/usr/bin/bash
|
||||
|
||||
#
|
||||
# This fencing script is used for testing clusters of multiple mounts on
|
||||
# a single host. It finds mounts to fence by looking for their rids and
|
||||
# only knows how to "fence" by using forced unmount.
|
||||
#
|
||||
|
||||
echo "$0 running rid '$SCOUTFS_FENCED_REQ_RID' ip '$SCOUTFS_FENCED_REQ_IP' args '$@'"
|
||||
|
||||
log() {
|
||||
echo "$@" > /dev/stderr
|
||||
exit 1
|
||||
}
|
||||
|
||||
echo_fail() {
|
||||
echo "$@" > /dev/stderr
|
||||
exit 1
|
||||
@@ -7,29 +20,24 @@ echo_fail() {
|
||||
|
||||
rid="$SCOUTFS_FENCED_REQ_RID"
|
||||
|
||||
#
|
||||
# Look for a local mount with the rid to fence. Typically we'll at
|
||||
# least find the mount with the server that requested the fence that
|
||||
# we're processing. But it's possible that mounts are unmounted
|
||||
# before, or while, we're running.
|
||||
#
|
||||
mnts=$(findmnt -l -n -t scoutfs -o TARGET) || \
|
||||
echo_fail "findmnt -t scoutfs failed" > /dev/stderr
|
||||
for fs in /sys/fs/scoutfs/*; do
|
||||
[ ! -d "$fs" ] && continue
|
||||
|
||||
for mnt in $mnts; do
|
||||
mnt_rid=$(scoutfs statfs -p "$mnt" -s rid) || \
|
||||
echo_fail "scoutfs statfs $mnt failed"
|
||||
|
||||
if [ "$mnt_rid" == "$rid" ]; then
|
||||
umount -f "$mnt" || \
|
||||
echo_fail "umout -f $mnt"
|
||||
|
||||
exit 0
|
||||
fs_rid="$(cat $fs/rid)" || \
|
||||
echo_fail "failed to get rid in $fs"
|
||||
if [ "$fs_rid" != "$rid" ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
nr="$(cat $fs/data_device_maj_min)" || \
|
||||
echo_fail "failed to get data device major:minor in $fs"
|
||||
|
||||
mnts=$(findmnt -l -n -t scoutfs -o TARGET -S $nr) || \
|
||||
echo_fail "findmnt -t scoutfs -S $nr failed"
|
||||
for mnt in $mnts; do
|
||||
umount -f "$mnt" || \
|
||||
echo_fail "umout -f $mnt failed"
|
||||
done
|
||||
done
|
||||
|
||||
#
|
||||
# If the mount doesn't exist on this host then it can't access the
|
||||
# devices by definition and can be considered fenced.
|
||||
#
|
||||
exit 0
|
||||
|
||||
@@ -35,7 +35,7 @@ t_fail()
|
||||
t_quiet()
|
||||
{
|
||||
echo "# $*" >> "$T_TMPDIR/quiet.log"
|
||||
"$@" > "$T_TMPDIR/quiet.log" 2>&1 || \
|
||||
"$@" >> "$T_TMPDIR/quiet.log" 2>&1 || \
|
||||
t_fail "quiet command failed"
|
||||
}
|
||||
|
||||
|
||||
@@ -82,5 +82,9 @@ t_filter_dmesg()
|
||||
re="$re|scoutfs .* error .*reading quorum block.*to update event.*"
|
||||
re="$re|scoutfs .* error.*server failed to bind to.*"
|
||||
|
||||
# format vers back/compat tries bad mounts
|
||||
re="$re|scoutfs .* error.*outside of supported version.*"
|
||||
re="$re|scoutfs .* error.*could not get .*super.*"
|
||||
|
||||
egrep -v "($re)"
|
||||
}
|
||||
|
||||
@@ -29,13 +29,12 @@ t_mount_rid()
|
||||
}
|
||||
|
||||
#
|
||||
# Output the "f.$fsid.r.$rid" identifier string for the given mount
|
||||
# number, 0 is used by default if none is specified.
|
||||
# Output the "f.$fsid.r.$rid" identifier string for the given path
|
||||
# in a mounted scoutfs volume.
|
||||
#
|
||||
t_ident()
|
||||
t_ident_from_mnt()
|
||||
{
|
||||
local nr="${1:-0}"
|
||||
local mnt="$(eval echo \$T_M$nr)"
|
||||
local mnt="$1"
|
||||
local fsid
|
||||
local rid
|
||||
|
||||
@@ -45,6 +44,38 @@ t_ident()
|
||||
echo "f.${fsid:0:6}.r.${rid:0:6}"
|
||||
}
|
||||
|
||||
#
|
||||
# Output the "f.$fsid.r.$rid" identifier string for the given mount
|
||||
# number, 0 is used by default if none is specified.
|
||||
#
|
||||
t_ident()
|
||||
{
|
||||
local nr="${1:-0}"
|
||||
local mnt="$(eval echo \$T_M$nr)"
|
||||
|
||||
t_ident_from_mnt "$mnt"
|
||||
}
|
||||
|
||||
#
|
||||
# Output the sysfs path for a path in a mounted fs.
|
||||
#
|
||||
t_sysfs_path_from_ident()
|
||||
{
|
||||
local ident="$1"
|
||||
|
||||
echo "/sys/fs/scoutfs/$ident"
|
||||
}
|
||||
|
||||
#
|
||||
# Output the sysfs path for a path in a mounted fs.
|
||||
#
|
||||
t_sysfs_path_from_mnt()
|
||||
{
|
||||
local mnt="$1"
|
||||
|
||||
t_sysfs_path_from_ident $(t_ident_from_mnt $mnt)
|
||||
}
|
||||
|
||||
#
|
||||
# Output the mount's sysfs path, defaulting to mount 0 if none is
|
||||
# specified.
|
||||
@@ -53,7 +84,7 @@ t_sysfs_path()
|
||||
{
|
||||
local nr="$1"
|
||||
|
||||
echo "/sys/fs/scoutfs/$(t_ident $nr)"
|
||||
t_sysfs_path_from_ident $(t_ident $nr)
|
||||
}
|
||||
|
||||
#
|
||||
@@ -75,6 +106,20 @@ t_fs_nrs()
|
||||
seq 0 $((T_NR_MOUNTS - 1))
|
||||
}
|
||||
|
||||
#
|
||||
# outputs "1" if the fs number has "1" in its quorum/is_leader file.
|
||||
# All other cases output 0, including the fs nr being a client which
|
||||
# won't have a quorum/ dir.
|
||||
#
|
||||
t_fs_is_leader()
|
||||
{
|
||||
if [ "$(cat $(t_sysfs_path $i)/quorum/is_leader 2>/dev/null)" == "1" ]; then
|
||||
echo "1"
|
||||
else
|
||||
echo "0"
|
||||
fi
|
||||
}
|
||||
|
||||
#
|
||||
# Output the mount nr of the current server. This takes no steps to
|
||||
# ensure that the server doesn't shut down and have some other mount
|
||||
@@ -83,7 +128,7 @@ t_fs_nrs()
|
||||
t_server_nr()
|
||||
{
|
||||
for i in $(t_fs_nrs); do
|
||||
if [ "$(cat $(t_sysfs_path $i)/quorum/is_leader)" == "1" ]; then
|
||||
if [ "$(t_fs_is_leader $i)" == "1" ]; then
|
||||
echo $i
|
||||
return
|
||||
fi
|
||||
@@ -101,7 +146,7 @@ t_server_nr()
|
||||
t_first_client_nr()
|
||||
{
|
||||
for i in $(t_fs_nrs); do
|
||||
if [ "$(cat $(t_sysfs_path $i)/quorum/is_leader)" == "0" ]; then
|
||||
if [ "$(t_fs_is_leader $i)" == "0" ]; then
|
||||
echo $i
|
||||
return
|
||||
fi
|
||||
|
||||
4
tests/golden/format-version-forward-back
Normal file
4
tests/golden/format-version-forward-back
Normal file
@@ -0,0 +1,4 @@
|
||||
== ensuring utils and module for old versions
|
||||
== unmounting test fs and removing test module
|
||||
== testing combinations of old and new format versions
|
||||
== restoring test module and mount
|
||||
3
tests/golden/lock-recover-invalidate
Normal file
3
tests/golden/lock-recover-invalidate
Normal file
@@ -0,0 +1,3 @@
|
||||
== starting background invalidating read/write load
|
||||
== 60s of lock recovery during invalidating load
|
||||
== stopping background load
|
||||
0
tests/golden/lock-rever-invalidate
Normal file
0
tests/golden/lock-rever-invalidate
Normal file
53
tests/golden/worm-xattr-unit
Normal file
53
tests/golden/worm-xattr-unit
Normal file
@@ -0,0 +1,53 @@
|
||||
== worm xattr creation without .hide. fails
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
== worm xattr creation on dir fails
|
||||
setfattr: /mnt/test/test/worm-xattr-unit: Invalid argument
|
||||
== worm xattr creation
|
||||
== get correct parsed timespec value
|
||||
== hidden scoutfs xattrs before expire
|
||||
== user xattr creation before expire fails
|
||||
setfattr: /mnt/test.0/test/worm-xattr-unit/file-1: Permission denied
|
||||
== worm xattr deletion before expire fails
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Permission denied
|
||||
== worm xattr update before expire fails
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Permission denied
|
||||
== other worm xattr create before expire fails
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Permission denied
|
||||
== file deletion before expire fails
|
||||
rm: cannot remove ‘/mnt/test/test/worm-xattr-unit/file-1’: Permission denied
|
||||
== file rename before expire fails
|
||||
mv: cannot move ‘/mnt/test/test/worm-xattr-unit/file-1’ to ‘/mnt/test/test/worm-xattr-unit/file-2’: Permission denied
|
||||
== file write before expire fails
|
||||
date: write error: Permission denied
|
||||
== file truncate before expire fails
|
||||
truncate: failed to truncate ‘/mnt/test/test/worm-xattr-unit/file-1’ at 0 bytes: Permission denied
|
||||
== file inode update before expire fails
|
||||
touch: setting times of ‘/mnt/test/test/worm-xattr-unit/file-1’: Permission denied
|
||||
== wait until expiration
|
||||
== file write after expire
|
||||
== file rename after expire
|
||||
== other worm xattr create after expire fails
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
== xattr deletion after expire
|
||||
== invalid all zero expire value
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
== invalid non integer expire value
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
== invalid only dots dots expire value
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
== invalid mixed dots secs expire value
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
== invalid (u32)(u64) nsecs expire value
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
== invalid negative (signed)(u64) secs expire value
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
setfattr: /mnt/test/test/worm-xattr-unit/file-1: Invalid argument
|
||||
== cleanup
|
||||
@@ -342,7 +342,8 @@ if [ -n "$T_INSMOD" ]; then
|
||||
msg "removing and reinserting scoutfs module"
|
||||
test -e /sys/module/scoutfs && cmd rmmod scoutfs
|
||||
cmd modprobe libcrc32c
|
||||
cmd insmod "$T_KMOD/src/scoutfs.ko"
|
||||
T_MODULE="$T_KMOD/src/scoutfs.ko"
|
||||
cmd insmod "$T_MODULE"
|
||||
fi
|
||||
|
||||
nr_globs=${#T_TRACE_GLOB[@]}
|
||||
@@ -380,13 +381,14 @@ cmd grep . /sys/kernel/debug/tracing/options/trace_printk \
|
||||
# Build a fenced config that runs scripts out of the repository rather
|
||||
# than the default system directory
|
||||
#
|
||||
conf="$T_RESULTS/scoutfs-fencd.conf"
|
||||
conf="$T_RESULTS/scoutfs-fenced.conf"
|
||||
cat > $conf << EOF
|
||||
SCOUTFS_FENCED_DELAY=1
|
||||
SCOUTFS_FENCED_RUN=$T_TESTS/fenced-local-force-unmount.sh
|
||||
SCOUTFS_FENCED_RUN_ARGS=""
|
||||
SCOUTFS_FENCED_RUN_ARGS="ignored run args"
|
||||
EOF
|
||||
export SCOUTFS_FENCED_CONFIG_FILE="$conf"
|
||||
T_FENCED_LOG="$T_RESULTS/fenced.log"
|
||||
|
||||
#
|
||||
# Run the agent in the background, log its output, an kill it if we
|
||||
@@ -394,7 +396,7 @@ export SCOUTFS_FENCED_CONFIG_FILE="$conf"
|
||||
#
|
||||
fenced_log()
|
||||
{
|
||||
echo "[$(timestamp)] $*" >> "$T_RESULTS/fenced.stdout.log"
|
||||
echo "[$(timestamp)] $*" >> "$T_FENCED_LOG"
|
||||
}
|
||||
fenced_pid=""
|
||||
kill_fenced()
|
||||
@@ -405,7 +407,7 @@ kill_fenced()
|
||||
fi
|
||||
}
|
||||
trap kill_fenced EXIT
|
||||
$T_UTILS/fenced/scoutfs-fenced > "$T_RESULTS/fenced.stdout.log" 2> "$T_RESULTS/fenced.stderr.log" &
|
||||
$T_UTILS/fenced/scoutfs-fenced > "$T_FENCED_LOG" 2>&1 &
|
||||
fenced_pid=$!
|
||||
fenced_log "started fenced pid $fenced_pid in the background"
|
||||
|
||||
|
||||
@@ -9,14 +9,17 @@ fallocate.sh
|
||||
setattr_more.sh
|
||||
offline-extent-waiting.sh
|
||||
move-blocks.sh
|
||||
format-version-forward-back.sh
|
||||
enospc.sh
|
||||
srch-basic-functionality.sh
|
||||
simple-xattr-unit.sh
|
||||
worm-xattr-unit.sh
|
||||
totl-xattr-tag.sh
|
||||
lock-refleak.sh
|
||||
lock-shrink-consistency.sh
|
||||
lock-pr-cw-conflict.sh
|
||||
lock-revoke-getcwd.sh
|
||||
lock-recover-invalidate.sh
|
||||
export-lookup-evict-race.sh
|
||||
createmany-parallel.sh
|
||||
createmany-large-names.sh
|
||||
|
||||
@@ -45,6 +45,18 @@ check_read_write()
|
||||
fi
|
||||
}
|
||||
|
||||
# verify that fenced ran our testing fence script
|
||||
verify_fenced_run()
|
||||
{
|
||||
local rids="$@"
|
||||
local rid
|
||||
|
||||
for rid in $rids; do
|
||||
grep -q ".* running rid '$rid'.* args 'ignored run args'" "$T_FENCED_LOG" || \
|
||||
t_fail "fenced didn't execute RUN script for rid $rid"
|
||||
done
|
||||
}
|
||||
|
||||
echo "== make sure all mounts can see each other"
|
||||
check_read_write
|
||||
|
||||
@@ -62,12 +74,14 @@ done
|
||||
while t_rid_is_fencing $rid; do
|
||||
sleep .5
|
||||
done
|
||||
verify_fenced_run $rid
|
||||
t_mount $cl
|
||||
check_read_write
|
||||
|
||||
echo "== force unmount all non-server, connection timeout, fence nop, mount"
|
||||
sv=$(t_server_nr)
|
||||
pattern="nonsense"
|
||||
rids=""
|
||||
sync
|
||||
for cl in $(t_fs_nrs); do
|
||||
if [ $cl == $sv ]; then
|
||||
@@ -75,6 +89,7 @@ for cl in $(t_fs_nrs); do
|
||||
fi
|
||||
|
||||
rid=$(t_mount_rid $cl)
|
||||
rids="$rids $rid"
|
||||
pattern="$pattern|$rid"
|
||||
echo "cl $cl sv $sv rid $rid" >> "$T_TMP.log"
|
||||
|
||||
@@ -89,6 +104,7 @@ done
|
||||
while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do
|
||||
sleep .5
|
||||
done
|
||||
verify_fenced_run $rids
|
||||
# remount all the clients
|
||||
for cl in $(t_fs_nrs); do
|
||||
if [ $cl == $sv ]; then
|
||||
@@ -109,11 +125,17 @@ t_wait_for_leader
|
||||
while t_rid_is_fencing $rid; do
|
||||
sleep .5
|
||||
done
|
||||
verify_fenced_run $rid
|
||||
t_mount $sv
|
||||
check_read_write
|
||||
|
||||
echo "== force unmount everything, new server fences all previous"
|
||||
sync
|
||||
rids=""
|
||||
# get rids before forced unmount breaks scoutfs statfs
|
||||
for nr in $(t_fs_nrs); do
|
||||
rids="$rids $(t_mount_rid $nr)"
|
||||
done
|
||||
for nr in $(t_fs_nrs); do
|
||||
t_force_umount $nr
|
||||
done
|
||||
@@ -122,6 +144,7 @@ t_mount_all
|
||||
while test -d $(echo /sys/fs/scoutfs/*/fence/* | cut -d " " -f 1); do
|
||||
sleep .5
|
||||
done
|
||||
verify_fenced_run $rids
|
||||
check_read_write
|
||||
|
||||
t_pass
|
||||
|
||||
179
tests/tests/format-version-forward-back.sh
Normal file
179
tests/tests/format-version-forward-back.sh
Normal file
@@ -0,0 +1,179 @@
|
||||
#
|
||||
# Test our basic ability to work with different format versions.
|
||||
#
|
||||
# The current code being tested has a range of supported format
|
||||
# versions. For each of the older supported format versions we have a
|
||||
# git hash of the commit before the next greater version was introduced.
|
||||
# We build versions of the scoutfs utility and kernel module for the
|
||||
# last commit in tree that had a lesser supported version as its max
|
||||
# supported version. We use those binaries to test forward and back
|
||||
# compat as new and old code works with a persistent volume with a given
|
||||
# format version.
|
||||
#
|
||||
|
||||
mount_has_format_version()
|
||||
{
|
||||
local mnt="$1"
|
||||
local vers="$2"
|
||||
local sysfs_fmt_vers="$(t_sysfs_path_from_mnt $SCR)/format_version"
|
||||
|
||||
test "$(cat $sysfs_fmt_vers)" == "$vers"
|
||||
}
|
||||
|
||||
SCR="/mnt/scoutfs.scratch"
|
||||
|
||||
MIN=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_min:"){print $2}')
|
||||
MAX=$(modinfo $T_MODULE | awk '($1 == "scoutfs_format_version_max:"){print $2}')
|
||||
|
||||
echo "min: $MIN max: $MAX" > "$T_TMP.log"
|
||||
|
||||
test "$MIN" -gt 0 -a "$MAX" -gt 0 -a "$MIN" -le "$MAX" || \
|
||||
t_fail "parsed bad versions, min: $MIN max: $MAX"
|
||||
|
||||
test "$MIN" == "$MAX" && \
|
||||
t_skip "only one supported format version: $MIN"
|
||||
|
||||
# prepare dir and wipe any weird old partial state
|
||||
builds="$T_RESULTS/format_version_builds"
|
||||
mkdir -p "$builds"
|
||||
|
||||
echo "== ensuring utils and module for old versions"
|
||||
declare -A commits
|
||||
commits[1]=730a84a
|
||||
for vers in $(seq $MIN $((MAX - 1))); do
|
||||
dir="$builds/$vers"
|
||||
platform=$(uname -rp)
|
||||
buildmark="$dir/buildmark"
|
||||
commit="${commits[$vers]}"
|
||||
|
||||
test -n "$commit" || \
|
||||
t_fail "no commit for vers $vers"
|
||||
|
||||
# have our files for this version
|
||||
test "$(cat $buildmark 2>&1)" == "$platform" && \
|
||||
continue
|
||||
|
||||
# build as one big sequence of commands that can return failure
|
||||
(
|
||||
set -o pipefail
|
||||
|
||||
rm -rf $dir &&
|
||||
mkdir -p $dir/building &&
|
||||
cd "$T_TESTS/.." &&
|
||||
git archive --format=tar "$commit" | tar -C "$dir/building" -xf - &&
|
||||
cd - &&
|
||||
find $dir &&
|
||||
make -C "$dir/building" &&
|
||||
mv $dir/building/utils/src/scoutfs $dir &&
|
||||
mv $dir/building/kmod/src/scoutfs.ko $dir &&
|
||||
rm -rf $dir/building &&
|
||||
echo "$platform" > $buildmark &&
|
||||
find $dir &&
|
||||
cat $buildmark
|
||||
) >> "$T_TMP.log" 2>&1 || t_fail "version $vers build failed"
|
||||
done
|
||||
|
||||
echo "== unmounting test fs and removing test module"
|
||||
t_quiet t_umount_all
|
||||
t_quiet rmmod scoutfs
|
||||
|
||||
echo "== testing combinations of old and new format versions"
|
||||
mkdir -p "$SCR"
|
||||
for vers in $(seq $MIN $((MAX - 1))); do
|
||||
old_scoutfs="$builds/$vers/scoutfs"
|
||||
old_module="$builds/$vers/scoutfs.ko"
|
||||
|
||||
echo "mkfs $vers" >> "$T_TMP.log"
|
||||
t_quiet $old_scoutfs mkfs -f -Q 0,127.0.0.1,53000 "$T_EX_META_DEV" "$T_EX_DATA_DEV" \
|
||||
|| t_fail "mkfs $vers failed"
|
||||
|
||||
echo "mount $vers with $vers" >> "$T_TMP.log"
|
||||
t_quiet insmod $old_module
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$vers"
|
||||
|
||||
echo "creating files in $vers" >> "$T_TMP.log"
|
||||
t_quiet touch "$SCR/file-"{1,2,3}
|
||||
stat "$SCR"/file-* > "$T_TMP.stat" || \
|
||||
t_fail "stat in $vers failed"
|
||||
|
||||
echo "remounting $vers fs with $MAX" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
insmod "$T_MODULE"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$vers"
|
||||
|
||||
echo "verifying stat in $vers with $MAX" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "keep/update/del existing, create new in $vers" >> "$T_TMP.log"
|
||||
t_quiet touch "$SCR/file-2"
|
||||
t_quiet rm -f "$SCR/file-3"
|
||||
t_quiet touch "$SCR/file-4"
|
||||
stat "$SCR"/file-* > "$T_TMP.stat" || \
|
||||
t_fail "stat in $vers failed"
|
||||
|
||||
echo "remounting $vers fs with $vers" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
insmod "$old_module"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$vers"
|
||||
|
||||
echo "verifying stat in $vers with $vers" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "changing format vers to $MAX" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
t_quiet scoutfs change-format-version -F -V $MAX $T_EX_META_DEV "$T_EX_DATA_DEV"
|
||||
|
||||
echo "mount fs $MAX with old $vers should fail" >> "$T_TMP.log"
|
||||
insmod "$old_module"
|
||||
mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR" >> "$T_TMP.log" 2>&1
|
||||
if [ "$?" == "0" ]; then
|
||||
umount "$SCR"
|
||||
t_fail "old code ver $vers able to mount new ver $MAX"
|
||||
fi
|
||||
|
||||
echo "remounting $MAX fs with $MAX" >> "$T_TMP.log"
|
||||
rmmod scoutfs
|
||||
insmod "$T_MODULE"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$MAX"
|
||||
|
||||
echo "verifying stat in $MAX with $MAX" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "keep/update/del existing, create new in $MAX" >> "$T_TMP.log"
|
||||
t_quiet touch "$SCR/file-2"
|
||||
t_quiet rm -f "$SCR/file-4"
|
||||
t_quiet touch "$SCR/file-5"
|
||||
stat "$SCR"/file-* > "$T_TMP.stat" || \
|
||||
t_fail "stat in $MAX failed"
|
||||
|
||||
echo "remounting $MAX fs with $MAX again" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
t_quiet mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
|
||||
"$T_EX_DATA_DEV" "$SCR"
|
||||
t_quiet mount_has_format_version "$SCR" "$MAX"
|
||||
|
||||
echo "verifying stat in $MAX with $MAX again" >> "$T_TMP.log"
|
||||
diff -u "$T_TMP.stat" <(stat "$SCR"/file-*)
|
||||
|
||||
echo "done with old vers $vers" >> "$T_TMP.log"
|
||||
t_quiet umount "$SCR"
|
||||
rmmod scoutfs
|
||||
done
|
||||
|
||||
echo "== restoring test module and mount"
|
||||
insmod "$T_MODULE"
|
||||
t_mount_all
|
||||
|
||||
t_pass
|
||||
43
tests/tests/lock-recover-invalidate.sh
Normal file
43
tests/tests/lock-recover-invalidate.sh
Normal file
@@ -0,0 +1,43 @@
|
||||
#
|
||||
# trigger server failover and lock recovery during heavy invalidating
|
||||
# load on multiple mounts
|
||||
#
|
||||
|
||||
majority_nr=$(t_majority_count)
|
||||
quorum_nr=$T_QUORUM
|
||||
|
||||
test "$quorum_nr" == "$majority_nr" && \
|
||||
t_skip "need remaining majority when leader unmounted"
|
||||
|
||||
test "$T_NR_MOUNTS" -lt "$((quorum_nr + 2))" && \
|
||||
t_skip "need at least 2 non-quorum load mounts"
|
||||
|
||||
echo "== starting background invalidating read/write load"
|
||||
touch "$T_D0/file"
|
||||
load_pids=""
|
||||
for i in $(t_fs_nrs); do
|
||||
if [ "$i" -ge "$quorum_nr" ]; then
|
||||
eval path="\$T_D${i}/file"
|
||||
|
||||
(while true; do touch $path > /dev/null 2>&1; done) &
|
||||
load_pids="$load_pids $!"
|
||||
(while true; do stat $path > /dev/null 2>&1; done) &
|
||||
load_pids="$load_pids $!"
|
||||
fi
|
||||
done
|
||||
|
||||
# had it reproduce in ~40s on wimpy debug kernel guests
|
||||
LENGTH=60
|
||||
echo "== ${LENGTH}s of lock recovery during invalidating load"
|
||||
END=$((SECONDS + LENGTH))
|
||||
while [ "$SECONDS" -lt "$END" ]; do
|
||||
sv=$(t_server_nr)
|
||||
t_umount $sv
|
||||
t_mount $sv
|
||||
# new server had to process greeting for mount to finish
|
||||
done
|
||||
|
||||
echo "== stopping background load"
|
||||
kill $load_pids
|
||||
|
||||
t_pass
|
||||
97
tests/tests/worm-xattr-unit.sh
Normal file
97
tests/tests/worm-xattr-unit.sh
Normal file
@@ -0,0 +1,97 @@
|
||||
t_require_commands touch rm setfattr
|
||||
|
||||
touch "$T_D0/file-1"
|
||||
SECS=$(date '+%s')
|
||||
NSECS=$(date '+%N')
|
||||
DELAY=10
|
||||
EXP=$((SECS + DELAY))
|
||||
|
||||
echo "== worm xattr creation without .hide. fails"
|
||||
setfattr -n scoutfs.worm.level1_expire -v $EXP.$NSECS "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== worm xattr creation on dir fails"
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v $EXP.$NSECS "$T_D0" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== worm xattr creation"
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v $EXP.$NSECS "$T_D0/file-1"
|
||||
|
||||
echo "== get correct parsed timespec value"
|
||||
diff -u --ignore-all-space <(echo "$EXP.$NSECS") <(getfattr --absolute-names --only-values -n scoutfs.hide.worm.level1_expire -m - "$T_D0/file-1")
|
||||
|
||||
echo "== hidden scoutfs xattrs before expire"
|
||||
setfattr -n scoutfs.hide.srch.worm_test -v val "$T_D0/file-1"
|
||||
|
||||
echo "== user xattr creation before expire fails"
|
||||
setfattr -n user.worm_test -v val "$T_D0/file-1"
|
||||
|
||||
echo "== worm xattr deletion before expire fails"
|
||||
setfattr -x scoutfs.hide.worm.level1_expire "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== worm xattr update before expire fails"
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v $SECS.$NSECS "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== other worm xattr create before expire fails"
|
||||
setfattr -n scoutfs.hide.worm.other.level1_expire -v 123.456 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== file deletion before expire fails"
|
||||
rm -f "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== file rename before expire fails"
|
||||
mv $T_D0/file-1 $T_D0/file-2 2>&1 | t_filter_fs
|
||||
|
||||
echo "== file write before expire fails"
|
||||
date >> $T_D0/file-1
|
||||
|
||||
echo "== file truncate before expire fails"
|
||||
truncate -s 0 $T_D0/file-1 2>&1 | t_filter_fs
|
||||
|
||||
echo "== file inode update before expire fails"
|
||||
touch $T_D0/file-1 2>&1 | t_filter_fs
|
||||
|
||||
echo "== wait until expiration"
|
||||
sleep $((DELAY + 1))
|
||||
|
||||
echo "== file write after expire"
|
||||
date >> $T_D0/file-1
|
||||
|
||||
echo "== file rename after expire"
|
||||
mv $T_D0/file-1 $T_D0/file-2
|
||||
mv $T_D0/file-2 $T_D0/file-1
|
||||
|
||||
echo "== other worm xattr create after expire fails"
|
||||
setfattr -n scoutfs.hide.worm.other.level1_expire -v 123.456 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== xattr deletion after expire"
|
||||
setfattr -x scoutfs.hide.worm.level1_expire "$T_D0/file-1"
|
||||
|
||||
echo "== invalid all zero expire value"
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v 0.0 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== invalid non integer expire value"
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v a.a "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== invalid only dots dots expire value"
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v . "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v .. "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v ... "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== invalid mixed dots secs expire value"
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v 11 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v .11 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v 11. "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v .11. "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v .1.1. "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== invalid (u32)(u64) nsecs expire value"
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v 1.1000000000 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v 1.4294967296 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v 1.18446744073709551615 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== invalid negative (signed)(u64) secs expire value"
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v 9223372036854775808.1 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.worm.level1_expire -v 18446744073709551615.1 "$T_D0/file-1" 2>&1 | t_filter_fs
|
||||
|
||||
echo "== cleanup"
|
||||
rm -f "$T_D0/file-1"
|
||||
|
||||
t_pass
|
||||
@@ -55,9 +55,21 @@ test -x "$SCOUTFS_FENCED_RUN" || \
|
||||
error_exit "SCOUTFS_FENCED_RUN '$SCOUTFS_FENCED_RUN' isn't executable"
|
||||
|
||||
#
|
||||
# main loop watching for fence request across all filesystems
|
||||
# Main loop watching for fence request across all filesystems. The
|
||||
# server can shut down without waiting for pending fence requests to
|
||||
# finish. All of the interaction with the fence directory and files can
|
||||
# fail at any moment. We will generate log messages when the dir or
|
||||
# files disappear.
|
||||
#
|
||||
|
||||
# generate failure messages to stderr while still echoing 0 for the caller
|
||||
careful_cat()
|
||||
{
|
||||
local path="$@"
|
||||
|
||||
cat "$@" || echo 0
|
||||
}
|
||||
|
||||
while sleep $SCOUTFS_FENCED_DELAY; do
|
||||
for fence in /sys/fs/scoutfs/*/fence/*; do
|
||||
# catches unmatched regex when no dirs
|
||||
@@ -66,7 +78,8 @@ while sleep $SCOUTFS_FENCED_DELAY; do
|
||||
fi
|
||||
|
||||
# skip requests that have been handled
|
||||
if [ $(cat "$fence/fenced") == 1 -o $(cat "$fence/error") == 1 ]; then
|
||||
if [ "$(careful_cat $fence/fenced)" == 1 -o \
|
||||
"$(careful_cat $fence/error)" == 1 ]; then
|
||||
continue
|
||||
fi
|
||||
|
||||
@@ -81,10 +94,10 @@ while sleep $SCOUTFS_FENCED_DELAY; do
|
||||
export SCOUTFS_FENCED_REQ_RID="$rid"
|
||||
export SCOUTFS_FENCED_REQ_IP="$ip"
|
||||
|
||||
$run $SCOUTFS_FENCED_RUN_ARGS
|
||||
$SCOUTFS_FENCED_RUN $SCOUTFS_FENCED_RUN_ARGS
|
||||
rc=$?
|
||||
if [ "$rc" != 0 ]; then
|
||||
log_message "server $srv fencing rid $rid saw error status $rc from $run"
|
||||
log_message "server $srv fencing rid $rid saw error status $rc"
|
||||
echo 1 > "$fence/error"
|
||||
continue
|
||||
fi
|
||||
|
||||
@@ -212,6 +212,38 @@ name, total value, and a count of contributing attributes can be read
|
||||
with the
|
||||
.IB READ_XATTR_TOTALS
|
||||
ioctl.
|
||||
.TP
|
||||
.B .worm.
|
||||
Attributes with the .worm. flag are used to control WORM (write once,
|
||||
read many) access restrictions, typically used to comply with operational
|
||||
regulations. The only currently supported mechanism is controlled by a
|
||||
single .worm. attribute whose name ends in ".level1_expire". Additional
|
||||
levels with different enfrocement policies may be added and would be
|
||||
controlled by different attributes.
|
||||
.sp
|
||||
The level1 policy is enabled by setting an attribute on a file that
|
||||
contains the .worm. tag and whose name ends in ".level1_expire". The
|
||||
attribute name must also include the .hide. tag. As with other scoutfs
|
||||
tagged attributes, the name may include any other string between the
|
||||
tags and the final required suffix. Only one level1 expiration
|
||||
attribute may be set at a time.
|
||||
.sp
|
||||
The value of the attribute contains a string representing the kernel
|
||||
time at which the policy enforcement will expire. The time is formated
|
||||
as "seconds.nanoseconds" in GMT. The attribute must be set with the
|
||||
CAP_SYS_ADMIN capability, perhaps via the root user. Setting an
|
||||
expiration value of "0.0" will always fail. The policy can only be set
|
||||
on regular files.
|
||||
.sp
|
||||
The file is protected once the expiration attribute is set and can not
|
||||
be modified until the expiration time has passed. The file data, its
|
||||
inode fields, directory entries that link to its inode, untrusted
|
||||
"user." attributes, and non-hidden scoutfs attributes are all protected
|
||||
and modification attempts will fail with with permission denied.
|
||||
Trusted system-level attributes like "security." and hidden scoutfs
|
||||
attributes may still be modified to support ongoing archiving
|
||||
operations. The worm attribute itself can not be modified once it is
|
||||
set and can only be removed once the expiration time has passed.
|
||||
.RE
|
||||
|
||||
.SH FORMAT VERSION
|
||||
@@ -292,6 +324,20 @@ The version that a mount is using is shown in the
|
||||
file in the mount's sysfs directory, typically
|
||||
.I /sys/fs/scoutfs/f.FSID.r.RID/
|
||||
.RE
|
||||
.sp
|
||||
The defined format versions are:
|
||||
.RS
|
||||
.TP
|
||||
.sp
|
||||
.B 1
|
||||
Initial format version.
|
||||
.TP
|
||||
.B 2
|
||||
Added level1 WORM file protection for regular files. The
|
||||
".level1_expire" worm tagged extended attribute was added and the inode
|
||||
item size was increased to store the parsed expiration time from the
|
||||
extended attribute.
|
||||
.RE
|
||||
|
||||
.SH CORRUPTION DETECTION
|
||||
A
|
||||
|
||||
@@ -119,6 +119,16 @@ static int do_change_fmt_vers(struct change_fmt_vers_args *args)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(meta_super->fmt_vers) > args->fmt_vers ||
|
||||
le64_to_cpu(data_super->fmt_vers) > args->fmt_vers) {
|
||||
ret = -EPERM;
|
||||
printf("Downgrade of Meta Format Version: %llu and Data Format Version: %llu to Format Version: %llu is not allowed\n",
|
||||
le64_to_cpu(meta_super->fmt_vers),
|
||||
le64_to_cpu(data_super->fmt_vers),
|
||||
args->fmt_vers);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (le64_to_cpu(meta_super->fmt_vers) != args->fmt_vers) {
|
||||
meta_super->fmt_vers = cpu_to_le64(args->fmt_vers);
|
||||
|
||||
|
||||
@@ -262,7 +262,10 @@ static int do_mkfs(struct mkfs_args *args)
|
||||
inode.ctime.nsec = inode.atime.nsec;
|
||||
inode.mtime.sec = inode.atime.sec;
|
||||
inode.mtime.nsec = inode.atime.nsec;
|
||||
btree_append_item(bt, &key, &inode, sizeof(inode));
|
||||
if (args->fmt_vers == 1)
|
||||
btree_append_item(bt, &key, &inode, SCOUTFS_INODE_FMT_V1_BYTES);
|
||||
else
|
||||
btree_append_item(bt, &key, &inode, SCOUTFS_INODE_FMT_V2_BYTES);
|
||||
|
||||
ret = write_block(meta_fd, SCOUTFS_BLOCK_MAGIC_BTREE, fsid, 1, blkno,
|
||||
SCOUTFS_BLOCK_LG_SHIFT, &bt->hdr);
|
||||
|
||||
@@ -69,6 +69,12 @@ static void print_inode(struct scoutfs_key *key, void *val, int val_len)
|
||||
le32_to_cpu(inode->ctime.nsec),
|
||||
le64_to_cpu(inode->mtime.sec),
|
||||
le32_to_cpu(inode->mtime.nsec));
|
||||
|
||||
if (val_len == SCOUTFS_INODE_FMT_V2_BYTES) {
|
||||
printf(" worm_level1_expire %llu.%08u\n",
|
||||
le64_to_cpu(inode->worm_level1_expire.sec),
|
||||
le32_to_cpu(inode->worm_level1_expire.nsec));
|
||||
}
|
||||
}
|
||||
|
||||
static void print_orphan(struct scoutfs_key *key, void *val, int val_len)
|
||||
|
||||
Reference in New Issue
Block a user