mirror of
https://github.com/versity/scoutfs.git
synced 2026-04-30 09:56:55 +00:00
Compare commits
2 Commits
v1.26
...
zab/test_l
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ead8be6b8c | ||
|
|
ae84271b37 |
@@ -1,41 +1,6 @@
|
|||||||
Versity ScoutFS Release Notes
|
Versity ScoutFS Release Notes
|
||||||
=============================
|
=============================
|
||||||
|
|
||||||
---
|
|
||||||
v1.26
|
|
||||||
\
|
|
||||||
*Nov 17, 2025*
|
|
||||||
|
|
||||||
Add the ino\_alloc\_per\_lock mount option. This changes the number of
|
|
||||||
inode numbers allocated under each cluster lock and can alleviate lock
|
|
||||||
contention for some patterns of larger file creation.
|
|
||||||
|
|
||||||
Add the tcp\_keepalive\_timeout\_ms mount option. This can enable the
|
|
||||||
system to survive longer periods of networking outages.
|
|
||||||
|
|
||||||
Fix a rare double free of internal btree metadata blocks when merging
|
|
||||||
log trees. The duplicated freed metadata block numbers would cause
|
|
||||||
persistent errors in the server, preventing the server from starting and
|
|
||||||
hanging the system.
|
|
||||||
|
|
||||||
Fix the data\_wait interface to not require the correct data\_version of
|
|
||||||
the inode when raising an error. This lets callers raise errors when
|
|
||||||
they're unable to recall the details of the inode to discover its
|
|
||||||
data\_version.
|
|
||||||
|
|
||||||
Change scoutfs to more aggressively reclaim cached memory when under
|
|
||||||
memory pressure. This makes scoutfs behave more like other kernel
|
|
||||||
components and it integrates better with the reclaim policy heuristics
|
|
||||||
in the VM core of the kernel.
|
|
||||||
|
|
||||||
Change scoutfs to more efficiently transmit and receive socket messages.
|
|
||||||
Under heavy load this can process messages sufficiently more quickly to
|
|
||||||
avoid hung task messages for tasks that were waiting for cluster lock
|
|
||||||
messages to be processed.
|
|
||||||
|
|
||||||
Fix faulty server block commit budget calculations that were generating
|
|
||||||
spurious "holders exceeded alloc budget" console messages.
|
|
||||||
|
|
||||||
---
|
---
|
||||||
v1.25
|
v1.25
|
||||||
\
|
\
|
||||||
|
|||||||
@@ -1482,6 +1482,12 @@ static int remove_index_items(struct super_block *sb, u64 ino,
|
|||||||
* Return an allocated and unused inode number. Returns -ENOSPC if
|
* Return an allocated and unused inode number. Returns -ENOSPC if
|
||||||
* we're out of inode.
|
* we're out of inode.
|
||||||
*
|
*
|
||||||
|
* Each parent directory has its own pool of free inode numbers. Items
|
||||||
|
* are sorted by their inode numbers as they're stored in segments.
|
||||||
|
* This will tend to group together files that are created in a
|
||||||
|
* directory at the same time in segments. Concurrent creation across
|
||||||
|
* different directories will be stored in their own regions.
|
||||||
|
*
|
||||||
* Inode numbers are never reclaimed. If the inode is evicted or we're
|
* Inode numbers are never reclaimed. If the inode is evicted or we're
|
||||||
* unmounted the pending inode numbers will be lost. Asking for a
|
* unmounted the pending inode numbers will be lost. Asking for a
|
||||||
* relatively small number from the server each time will tend to
|
* relatively small number from the server each time will tend to
|
||||||
@@ -1491,18 +1497,12 @@ static int remove_index_items(struct super_block *sb, u64 ino,
|
|||||||
int scoutfs_alloc_ino(struct super_block *sb, bool is_dir, u64 *ino_ret)
|
int scoutfs_alloc_ino(struct super_block *sb, bool is_dir, u64 *ino_ret)
|
||||||
{
|
{
|
||||||
DECLARE_INODE_SB_INFO(sb, inf);
|
DECLARE_INODE_SB_INFO(sb, inf);
|
||||||
struct scoutfs_mount_options opts;
|
|
||||||
struct inode_allocator *ia;
|
struct inode_allocator *ia;
|
||||||
u64 ino;
|
u64 ino;
|
||||||
u64 nr;
|
u64 nr;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
scoutfs_options_read(sb, &opts);
|
ia = is_dir ? &inf->dir_ino_alloc : &inf->ino_alloc;
|
||||||
|
|
||||||
if (is_dir && opts.ino_alloc_per_lock == SCOUTFS_LOCK_INODE_GROUP_NR)
|
|
||||||
ia = &inf->dir_ino_alloc;
|
|
||||||
else
|
|
||||||
ia = &inf->ino_alloc;
|
|
||||||
|
|
||||||
spin_lock(&ia->lock);
|
spin_lock(&ia->lock);
|
||||||
|
|
||||||
@@ -1523,17 +1523,6 @@ int scoutfs_alloc_ino(struct super_block *sb, bool is_dir, u64 *ino_ret)
|
|||||||
*ino_ret = ia->ino++;
|
*ino_ret = ia->ino++;
|
||||||
ia->nr--;
|
ia->nr--;
|
||||||
|
|
||||||
if (opts.ino_alloc_per_lock != SCOUTFS_LOCK_INODE_GROUP_NR) {
|
|
||||||
nr = ia->ino & SCOUTFS_LOCK_INODE_GROUP_MASK;
|
|
||||||
if (nr >= opts.ino_alloc_per_lock) {
|
|
||||||
nr = SCOUTFS_LOCK_INODE_GROUP_NR - nr;
|
|
||||||
if (nr > ia->nr)
|
|
||||||
nr = ia->nr;
|
|
||||||
ia->ino += nr;
|
|
||||||
ia->nr -= nr;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
spin_unlock(&ia->lock);
|
spin_unlock(&ia->lock);
|
||||||
ret = 0;
|
ret = 0;
|
||||||
out:
|
out:
|
||||||
|
|||||||
@@ -35,12 +35,6 @@ do { \
|
|||||||
} \
|
} \
|
||||||
} while (0) \
|
} while (0) \
|
||||||
|
|
||||||
#define scoutfs_bug_on_err(sb, err, fmt, args...) \
|
|
||||||
do { \
|
|
||||||
__typeof__(err) _err = (err); \
|
|
||||||
scoutfs_bug_on(sb, _err < 0 && _err != -ENOLINK, fmt, ##args); \
|
|
||||||
} while (0)
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Each message is only generated once per volume. Remounting resets
|
* Each message is only generated once per volume. Remounting resets
|
||||||
* the messages.
|
* the messages.
|
||||||
|
|||||||
@@ -33,7 +33,6 @@ enum {
|
|||||||
Opt_acl,
|
Opt_acl,
|
||||||
Opt_data_prealloc_blocks,
|
Opt_data_prealloc_blocks,
|
||||||
Opt_data_prealloc_contig_only,
|
Opt_data_prealloc_contig_only,
|
||||||
Opt_ino_alloc_per_lock,
|
|
||||||
Opt_log_merge_wait_timeout_ms,
|
Opt_log_merge_wait_timeout_ms,
|
||||||
Opt_metadev_path,
|
Opt_metadev_path,
|
||||||
Opt_noacl,
|
Opt_noacl,
|
||||||
@@ -48,7 +47,6 @@ static const match_table_t tokens = {
|
|||||||
{Opt_acl, "acl"},
|
{Opt_acl, "acl"},
|
||||||
{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
|
{Opt_data_prealloc_blocks, "data_prealloc_blocks=%s"},
|
||||||
{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
|
{Opt_data_prealloc_contig_only, "data_prealloc_contig_only=%s"},
|
||||||
{Opt_ino_alloc_per_lock, "ino_alloc_per_lock=%s"},
|
|
||||||
{Opt_log_merge_wait_timeout_ms, "log_merge_wait_timeout_ms=%s"},
|
{Opt_log_merge_wait_timeout_ms, "log_merge_wait_timeout_ms=%s"},
|
||||||
{Opt_metadev_path, "metadev_path=%s"},
|
{Opt_metadev_path, "metadev_path=%s"},
|
||||||
{Opt_noacl, "noacl"},
|
{Opt_noacl, "noacl"},
|
||||||
@@ -138,7 +136,6 @@ static void init_default_options(struct scoutfs_mount_options *opts)
|
|||||||
|
|
||||||
opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS;
|
opts->data_prealloc_blocks = SCOUTFS_DATA_PREALLOC_DEFAULT_BLOCKS;
|
||||||
opts->data_prealloc_contig_only = 1;
|
opts->data_prealloc_contig_only = 1;
|
||||||
opts->ino_alloc_per_lock = SCOUTFS_LOCK_INODE_GROUP_NR;
|
|
||||||
opts->log_merge_wait_timeout_ms = DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS;
|
opts->log_merge_wait_timeout_ms = DEFAULT_LOG_MERGE_WAIT_TIMEOUT_MS;
|
||||||
opts->orphan_scan_delay_ms = -1;
|
opts->orphan_scan_delay_ms = -1;
|
||||||
opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
|
opts->quorum_heartbeat_timeout_ms = SCOUTFS_QUORUM_DEF_HB_TIMEO_MS;
|
||||||
@@ -241,18 +238,6 @@ static int parse_options(struct super_block *sb, char *options, struct scoutfs_m
|
|||||||
opts->data_prealloc_contig_only = nr;
|
opts->data_prealloc_contig_only = nr;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case Opt_ino_alloc_per_lock:
|
|
||||||
ret = match_int(args, &nr);
|
|
||||||
if (ret < 0 || nr < 1 || nr > SCOUTFS_LOCK_INODE_GROUP_NR) {
|
|
||||||
scoutfs_err(sb, "invalid ino_alloc_per_lock option, must be between 1 and %u",
|
|
||||||
SCOUTFS_LOCK_INODE_GROUP_NR);
|
|
||||||
if (ret == 0)
|
|
||||||
ret = -EINVAL;
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
opts->ino_alloc_per_lock = nr;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case Opt_tcp_keepalive_timeout_ms:
|
case Opt_tcp_keepalive_timeout_ms:
|
||||||
ret = match_int(args, &nr);
|
ret = match_int(args, &nr);
|
||||||
ret = verify_tcp_keepalive_timeout_ms(sb, ret, nr);
|
ret = verify_tcp_keepalive_timeout_ms(sb, ret, nr);
|
||||||
@@ -408,7 +393,6 @@ int scoutfs_options_show(struct seq_file *seq, struct dentry *root)
|
|||||||
seq_puts(seq, ",acl");
|
seq_puts(seq, ",acl");
|
||||||
seq_printf(seq, ",data_prealloc_blocks=%llu", opts.data_prealloc_blocks);
|
seq_printf(seq, ",data_prealloc_blocks=%llu", opts.data_prealloc_blocks);
|
||||||
seq_printf(seq, ",data_prealloc_contig_only=%u", opts.data_prealloc_contig_only);
|
seq_printf(seq, ",data_prealloc_contig_only=%u", opts.data_prealloc_contig_only);
|
||||||
seq_printf(seq, ",ino_alloc_per_lock=%u", opts.ino_alloc_per_lock);
|
|
||||||
seq_printf(seq, ",metadev_path=%s", opts.metadev_path);
|
seq_printf(seq, ",metadev_path=%s", opts.metadev_path);
|
||||||
if (!is_acl)
|
if (!is_acl)
|
||||||
seq_puts(seq, ",noacl");
|
seq_puts(seq, ",noacl");
|
||||||
@@ -497,45 +481,6 @@ static ssize_t data_prealloc_contig_only_store(struct kobject *kobj, struct kobj
|
|||||||
}
|
}
|
||||||
SCOUTFS_ATTR_RW(data_prealloc_contig_only);
|
SCOUTFS_ATTR_RW(data_prealloc_contig_only);
|
||||||
|
|
||||||
static ssize_t ino_alloc_per_lock_show(struct kobject *kobj, struct kobj_attribute *attr,
|
|
||||||
char *buf)
|
|
||||||
{
|
|
||||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
|
||||||
struct scoutfs_mount_options opts;
|
|
||||||
|
|
||||||
scoutfs_options_read(sb, &opts);
|
|
||||||
|
|
||||||
return snprintf(buf, PAGE_SIZE, "%u", opts.ino_alloc_per_lock);
|
|
||||||
}
|
|
||||||
static ssize_t ino_alloc_per_lock_store(struct kobject *kobj, struct kobj_attribute *attr,
|
|
||||||
const char *buf, size_t count)
|
|
||||||
{
|
|
||||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
|
||||||
DECLARE_OPTIONS_INFO(sb, optinf);
|
|
||||||
char nullterm[20]; /* more than enough for octal -U32_MAX */
|
|
||||||
long val;
|
|
||||||
int len;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
len = min(count, sizeof(nullterm) - 1);
|
|
||||||
memcpy(nullterm, buf, len);
|
|
||||||
nullterm[len] = '\0';
|
|
||||||
|
|
||||||
ret = kstrtol(nullterm, 0, &val);
|
|
||||||
if (ret < 0 || val < 1 || val > SCOUTFS_LOCK_INODE_GROUP_NR) {
|
|
||||||
scoutfs_err(sb, "invalid ino_alloc_per_lock option, must be between 1 and %u",
|
|
||||||
SCOUTFS_LOCK_INODE_GROUP_NR);
|
|
||||||
return -EINVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
write_seqlock(&optinf->seqlock);
|
|
||||||
optinf->opts.ino_alloc_per_lock = val;
|
|
||||||
write_sequnlock(&optinf->seqlock);
|
|
||||||
|
|
||||||
return count;
|
|
||||||
}
|
|
||||||
SCOUTFS_ATTR_RW(ino_alloc_per_lock);
|
|
||||||
|
|
||||||
static ssize_t log_merge_wait_timeout_ms_show(struct kobject *kobj, struct kobj_attribute *attr,
|
static ssize_t log_merge_wait_timeout_ms_show(struct kobject *kobj, struct kobj_attribute *attr,
|
||||||
char *buf)
|
char *buf)
|
||||||
{
|
{
|
||||||
@@ -676,7 +621,6 @@ SCOUTFS_ATTR_RO(quorum_slot_nr);
|
|||||||
static struct attribute *options_attrs[] = {
|
static struct attribute *options_attrs[] = {
|
||||||
SCOUTFS_ATTR_PTR(data_prealloc_blocks),
|
SCOUTFS_ATTR_PTR(data_prealloc_blocks),
|
||||||
SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
|
SCOUTFS_ATTR_PTR(data_prealloc_contig_only),
|
||||||
SCOUTFS_ATTR_PTR(ino_alloc_per_lock),
|
|
||||||
SCOUTFS_ATTR_PTR(log_merge_wait_timeout_ms),
|
SCOUTFS_ATTR_PTR(log_merge_wait_timeout_ms),
|
||||||
SCOUTFS_ATTR_PTR(metadev_path),
|
SCOUTFS_ATTR_PTR(metadev_path),
|
||||||
SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
|
SCOUTFS_ATTR_PTR(orphan_scan_delay_ms),
|
||||||
|
|||||||
@@ -8,7 +8,6 @@
|
|||||||
struct scoutfs_mount_options {
|
struct scoutfs_mount_options {
|
||||||
u64 data_prealloc_blocks;
|
u64 data_prealloc_blocks;
|
||||||
bool data_prealloc_contig_only;
|
bool data_prealloc_contig_only;
|
||||||
unsigned int ino_alloc_per_lock;
|
|
||||||
unsigned int log_merge_wait_timeout_ms;
|
unsigned int log_merge_wait_timeout_ms;
|
||||||
char *metadev_path;
|
char *metadev_path;
|
||||||
unsigned int orphan_scan_delay_ms;
|
unsigned int orphan_scan_delay_ms;
|
||||||
|
|||||||
@@ -994,11 +994,10 @@ static int for_each_rid_last_lt(struct super_block *sb, struct scoutfs_btree_roo
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Log merge range items are stored at the starting fs key of the range
|
* Log merge range items are stored at the starting fs key of the range.
|
||||||
* with the zone overwritten to indicate the log merge item type. This
|
* The only fs key field that doesn't hold information is the zone, so
|
||||||
* day0 mistake loses sorting information for items in the different
|
* we use the zone to differentiate all types that we store in the log
|
||||||
* zones in the fs root, so the range items aren't strictly sorted by
|
* merge tree.
|
||||||
* the starting key of their range.
|
|
||||||
*/
|
*/
|
||||||
static void init_log_merge_key(struct scoutfs_key *key, u8 zone, u64 first,
|
static void init_log_merge_key(struct scoutfs_key *key, u8 zone, u64 first,
|
||||||
u64 second)
|
u64 second)
|
||||||
@@ -1030,51 +1029,6 @@ static int next_log_merge_item_key(struct super_block *sb, struct scoutfs_btree_
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* The range items aren't sorted by their range.start because
|
|
||||||
* _RANGE_ZONE clobbers the range's zone. We sweep all the items and
|
|
||||||
* find the range with the next least starting key that's greater than
|
|
||||||
* the caller's starting key. We have to be careful to iterate over the
|
|
||||||
* log_merge tree keys because the ranges can overlap as they're mapped
|
|
||||||
* to the log_merge keys by clobbering their zone.
|
|
||||||
*/
|
|
||||||
static int next_log_merge_range(struct super_block *sb, struct scoutfs_btree_root *root,
|
|
||||||
struct scoutfs_key *start, struct scoutfs_log_merge_range *rng)
|
|
||||||
{
|
|
||||||
struct scoutfs_log_merge_range *next;
|
|
||||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
|
||||||
struct scoutfs_key key;
|
|
||||||
int ret;
|
|
||||||
|
|
||||||
key = *start;
|
|
||||||
key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
|
|
||||||
scoutfs_key_set_ones(&rng->start);
|
|
||||||
|
|
||||||
do {
|
|
||||||
ret = scoutfs_btree_next(sb, root, &key, &iref);
|
|
||||||
if (ret == 0) {
|
|
||||||
if (iref.key->sk_zone != SCOUTFS_LOG_MERGE_RANGE_ZONE) {
|
|
||||||
ret = -ENOENT;
|
|
||||||
} else if (iref.val_len != sizeof(struct scoutfs_log_merge_range)) {
|
|
||||||
ret = -EIO;
|
|
||||||
} else {
|
|
||||||
next = iref.val;
|
|
||||||
if (scoutfs_key_compare(&next->start, &rng->start) < 0 &&
|
|
||||||
scoutfs_key_compare(&next->start, start) >= 0)
|
|
||||||
*rng = *next;
|
|
||||||
key = *iref.key;
|
|
||||||
scoutfs_key_inc(&key);
|
|
||||||
}
|
|
||||||
scoutfs_btree_put_iref(&iref);
|
|
||||||
}
|
|
||||||
} while (ret == 0);
|
|
||||||
|
|
||||||
if (ret == -ENOENT && !scoutfs_key_is_ones(&rng->start))
|
|
||||||
ret = 0;
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int next_log_merge_item(struct super_block *sb,
|
static int next_log_merge_item(struct super_block *sb,
|
||||||
struct scoutfs_btree_root *root,
|
struct scoutfs_btree_root *root,
|
||||||
u8 zone, u64 first, u64 second,
|
u8 zone, u64 first, u64 second,
|
||||||
@@ -1728,7 +1682,6 @@ static int server_commit_log_trees(struct super_block *sb,
|
|||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
if (arg_len != sizeof(struct scoutfs_log_trees)) {
|
if (arg_len != sizeof(struct scoutfs_log_trees)) {
|
||||||
err_str = "invalid message log_trees size";
|
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
@@ -1792,7 +1745,7 @@ static int server_commit_log_trees(struct super_block *sb,
|
|||||||
|
|
||||||
ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
|
ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
|
||||||
&super->logs_root, &key, <, sizeof(lt));
|
&super->logs_root, &key, <, sizeof(lt));
|
||||||
BUG_ON(ret < 0); /* dirtying should have guaranteed success, srch item inconsistent */
|
BUG_ON(ret < 0); /* dirtying should have guaranteed success */
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
err_str = "updating log trees item";
|
err_str = "updating log trees item";
|
||||||
|
|
||||||
@@ -1800,10 +1753,11 @@ unlock:
|
|||||||
mutex_unlock(&server->logs_mutex);
|
mutex_unlock(&server->logs_mutex);
|
||||||
|
|
||||||
ret = server_apply_commit(sb, &hold, ret);
|
ret = server_apply_commit(sb, &hold, ret);
|
||||||
out:
|
|
||||||
if (ret < 0)
|
if (ret < 0)
|
||||||
scoutfs_err(sb, "server error %d committing client logs for rid %016llx, nr %llu: %s",
|
scoutfs_err(sb, "server error %d committing client logs for rid %016llx, nr %llu: %s",
|
||||||
ret, rid, le64_to_cpu(lt.nr), err_str);
|
ret, rid, le64_to_cpu(lt.nr), err_str);
|
||||||
|
out:
|
||||||
|
WARN_ON_ONCE(ret < 0);
|
||||||
return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
|
return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2140,7 +2094,7 @@ static int server_srch_get_compact(struct super_block *sb,
|
|||||||
|
|
||||||
apply:
|
apply:
|
||||||
ret = server_apply_commit(sb, &hold, ret);
|
ret = server_apply_commit(sb, &hold, ret);
|
||||||
WARN_ON_ONCE(ret < 0 && ret != -ENOENT && ret != -ENOLINK); /* XXX leaked busy item */
|
WARN_ON_ONCE(ret < 0 && ret != -ENOENT); /* XXX leaked busy item */
|
||||||
out:
|
out:
|
||||||
ret = scoutfs_net_response(sb, conn, cmd, id, ret,
|
ret = scoutfs_net_response(sb, conn, cmd, id, ret,
|
||||||
sc, sizeof(struct scoutfs_srch_compact));
|
sc, sizeof(struct scoutfs_srch_compact));
|
||||||
@@ -2518,9 +2472,10 @@ out:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* inconsistent */
|
if (ret < 0)
|
||||||
scoutfs_bug_on_err(sb, ret,
|
scoutfs_err(sb, "server error %d splicing log merge completion: %s", ret, err_str);
|
||||||
"server error %d splicing log merge completion: %s", ret, err_str);
|
|
||||||
|
BUG_ON(ret); /* inconsistent */
|
||||||
|
|
||||||
return ret ?: einprogress;
|
return ret ?: einprogress;
|
||||||
}
|
}
|
||||||
@@ -2765,7 +2720,10 @@ restart:
|
|||||||
|
|
||||||
/* find the next range, always checking for splicing */
|
/* find the next range, always checking for splicing */
|
||||||
for (;;) {
|
for (;;) {
|
||||||
ret = next_log_merge_range(sb, &super->log_merge, &stat.next_range_key, &rng);
|
key = stat.next_range_key;
|
||||||
|
key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
|
||||||
|
ret = next_log_merge_item_key(sb, &super->log_merge, SCOUTFS_LOG_MERGE_RANGE_ZONE,
|
||||||
|
&key, &rng, sizeof(rng));
|
||||||
if (ret < 0 && ret != -ENOENT) {
|
if (ret < 0 && ret != -ENOENT) {
|
||||||
err_str = "finding merge range item";
|
err_str = "finding merge range item";
|
||||||
goto out;
|
goto out;
|
||||||
|
|||||||
@@ -8,34 +8,36 @@
|
|||||||
|
|
||||||
echo "$0 running rid '$SCOUTFS_FENCED_REQ_RID' ip '$SCOUTFS_FENCED_REQ_IP' args '$@'"
|
echo "$0 running rid '$SCOUTFS_FENCED_REQ_RID' ip '$SCOUTFS_FENCED_REQ_IP' args '$@'"
|
||||||
|
|
||||||
echo_fail() {
|
log() {
|
||||||
echo "$@" >> /dev/stderr
|
echo "$@" > /dev/stderr
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
# silence error messages
|
echo_fail() {
|
||||||
quiet_cat()
|
echo "$@" > /dev/stderr
|
||||||
{
|
exit 1
|
||||||
cat "$@" 2>/dev/null
|
|
||||||
}
|
}
|
||||||
|
|
||||||
rid="$SCOUTFS_FENCED_REQ_RID"
|
rid="$SCOUTFS_FENCED_REQ_RID"
|
||||||
|
|
||||||
shopt -s nullglob
|
|
||||||
for fs in /sys/fs/scoutfs/*; do
|
for fs in /sys/fs/scoutfs/*; do
|
||||||
fs_rid="$(quiet_cat $fs/rid)"
|
[ ! -d "$fs" ] && continue
|
||||||
nr="$(quiet_cat $fs/data_device_maj_min)"
|
|
||||||
[ ! -d "$fs" -o "$fs_rid" != "$rid" ] && continue
|
|
||||||
|
|
||||||
mnt=$(findmnt -l -n -t scoutfs -o TARGET -S $nr) || \
|
fs_rid="$(cat $fs/rid)" || \
|
||||||
echo_fail "findmnt -t scoutfs -S $nr failed"
|
echo_fail "failed to get rid in $fs"
|
||||||
[ -z "$mnt" ] && continue
|
if [ "$fs_rid" != "$rid" ]; then
|
||||||
|
continue
|
||||||
if ! umount -qf "$mnt"; then
|
|
||||||
if [ -d "$fs" ]; then
|
|
||||||
echo_fail "umount -qf $mnt failed"
|
|
||||||
fi
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
nr="$(cat $fs/data_device_maj_min)" || \
|
||||||
|
echo_fail "failed to get data device major:minor in $fs"
|
||||||
|
|
||||||
|
mnts=$(findmnt -l -n -t scoutfs -o TARGET -S $nr) || \
|
||||||
|
echo_fail "findmnt -t scoutfs -S $nr failed"
|
||||||
|
for mnt in $mnts; do
|
||||||
|
umount -f "$mnt" || \
|
||||||
|
echo_fail "umout -f $mnt failed"
|
||||||
|
done
|
||||||
done
|
done
|
||||||
|
|
||||||
exit 0
|
exit 0
|
||||||
|
|||||||
@@ -121,7 +121,6 @@ t_filter_dmesg()
|
|||||||
|
|
||||||
# in debugging kernels we can slow things down a bit
|
# in debugging kernels we can slow things down a bit
|
||||||
re="$re|hrtimer: interrupt took .*"
|
re="$re|hrtimer: interrupt took .*"
|
||||||
re="$re|clocksource: Long readout interval"
|
|
||||||
|
|
||||||
# fencing tests force unmounts and trigger timeouts
|
# fencing tests force unmounts and trigger timeouts
|
||||||
re="$re|scoutfs .* forcing unmount"
|
re="$re|scoutfs .* forcing unmount"
|
||||||
@@ -167,9 +166,6 @@ t_filter_dmesg()
|
|||||||
# perf warning that it adjusted sample rate
|
# perf warning that it adjusted sample rate
|
||||||
re="$re|perf: interrupt took too long.*lowering kernel.perf_event_max_sample_rate.*"
|
re="$re|perf: interrupt took too long.*lowering kernel.perf_event_max_sample_rate.*"
|
||||||
|
|
||||||
# some ci test guests are unresponsive
|
|
||||||
re="$re|longest quorum heartbeat .* delay"
|
|
||||||
|
|
||||||
egrep -v "($re)" | \
|
egrep -v "($re)" | \
|
||||||
ignore_harmless_unwind_kasan_stack_oob
|
ignore_harmless_unwind_kasan_stack_oob
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -39,6 +39,20 @@ cmd() {
|
|||||||
die "cmd failed (check the run.log)"
|
die "cmd failed (check the run.log)"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# we can record pids to kill as we exit, we kill in reverse added order
|
||||||
|
declare -a atexit_kill_pids
|
||||||
|
atexit_kill()
|
||||||
|
{
|
||||||
|
local pid
|
||||||
|
|
||||||
|
for pid in $(echo ${atexit_kill_pids[*]} | rev); do
|
||||||
|
if test -e "/proc/$pid/status" ; then
|
||||||
|
kill "$pid"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
}
|
||||||
|
trap atexit_kill EXIT
|
||||||
|
|
||||||
show_help()
|
show_help()
|
||||||
{
|
{
|
||||||
cat << EOF
|
cat << EOF
|
||||||
@@ -438,30 +452,6 @@ cmd grep . /sys/kernel/debug/tracing/options/trace_printk \
|
|||||||
/sys/kernel/debug/tracing/buffer_size_kb \
|
/sys/kernel/debug/tracing/buffer_size_kb \
|
||||||
/proc/sys/kernel/ftrace_dump_on_oops
|
/proc/sys/kernel/ftrace_dump_on_oops
|
||||||
|
|
||||||
# we can record pids to kill as we exit, we kill in reverse added order
|
|
||||||
atexit_kill_pids=""
|
|
||||||
add_atexit_kill_pid()
|
|
||||||
{
|
|
||||||
atexit_kill_pids="$1 $atexit_kill_pids"
|
|
||||||
}
|
|
||||||
atexit_kill()
|
|
||||||
{
|
|
||||||
local pid
|
|
||||||
|
|
||||||
# suppress bg function exited messages
|
|
||||||
exec {ERR}>&2 2>/dev/null
|
|
||||||
|
|
||||||
for pid in $atexit_kill_pids; do
|
|
||||||
if test -e "/proc/$pid/status" ; then
|
|
||||||
kill "$pid"
|
|
||||||
wait "$pid"
|
|
||||||
fi
|
|
||||||
done
|
|
||||||
|
|
||||||
exec 2>&$ERR {ERR}>&-
|
|
||||||
}
|
|
||||||
trap atexit_kill EXIT
|
|
||||||
|
|
||||||
#
|
#
|
||||||
# Build a fenced config that runs scripts out of the repository rather
|
# Build a fenced config that runs scripts out of the repository rather
|
||||||
# than the default system directory
|
# than the default system directory
|
||||||
@@ -477,7 +467,7 @@ T_FENCED_LOG="$T_RESULTS/fenced.log"
|
|||||||
|
|
||||||
$T_UTILS/fenced/scoutfs-fenced > "$T_FENCED_LOG" 2>&1 &
|
$T_UTILS/fenced/scoutfs-fenced > "$T_FENCED_LOG" 2>&1 &
|
||||||
fenced_pid=$!
|
fenced_pid=$!
|
||||||
add_atexit_kill_pid $fenced_pid
|
atexit_kill_pids+=($fenced_pid)
|
||||||
|
|
||||||
#
|
#
|
||||||
# some critical failures will cause fs operations to hang. We can watch
|
# some critical failures will cause fs operations to hang. We can watch
|
||||||
@@ -506,12 +496,13 @@ crash_monitor()
|
|||||||
if [ "$bad" != 0 ]; then
|
if [ "$bad" != 0 ]; then
|
||||||
echo "run-tests monitor triggering crash"
|
echo "run-tests monitor triggering crash"
|
||||||
echo c > /proc/sysrq-trigger
|
echo c > /proc/sysrq-trigger
|
||||||
exit 1
|
# bg function doesn't reload bash, $$ is parent run-tests.sh
|
||||||
|
kill -9 $$
|
||||||
fi
|
fi
|
||||||
done
|
done
|
||||||
}
|
}
|
||||||
crash_monitor &
|
crash_monitor &
|
||||||
add_atexit_kill_pid $!
|
atexit_kill_pids+=($!)
|
||||||
|
|
||||||
# setup dm tables
|
# setup dm tables
|
||||||
echo "0 $(blockdev --getsz $T_META_DEVICE) linear $T_META_DEVICE 0" > \
|
echo "0 $(blockdev --getsz $T_META_DEVICE) linear $T_META_DEVICE 0" > \
|
||||||
|
|||||||
@@ -19,7 +19,6 @@
|
|||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <inttypes.h>
|
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@@ -30,7 +29,7 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
|
|
||||||
static int size = 0;
|
static int size = 0;
|
||||||
static int duration = 0;
|
static int count = 0; /* XXX make this duration instead */
|
||||||
|
|
||||||
struct thread_info {
|
struct thread_info {
|
||||||
int nr;
|
int nr;
|
||||||
@@ -42,8 +41,6 @@ static void *run_test_func(void *ptr)
|
|||||||
void *buf = NULL;
|
void *buf = NULL;
|
||||||
char *addr = NULL;
|
char *addr = NULL;
|
||||||
struct thread_info *tinfo = ptr;
|
struct thread_info *tinfo = ptr;
|
||||||
uint64_t seconds = 0;
|
|
||||||
struct timespec ts;
|
|
||||||
int c = 0;
|
int c = 0;
|
||||||
int fd;
|
int fd;
|
||||||
ssize_t read, written, ret;
|
ssize_t read, written, ret;
|
||||||
@@ -64,15 +61,9 @@ static void *run_test_func(void *ptr)
|
|||||||
|
|
||||||
usleep(100000); /* 0.1sec to allow all threads to start roughly at the same time */
|
usleep(100000); /* 0.1sec to allow all threads to start roughly at the same time */
|
||||||
|
|
||||||
clock_gettime(CLOCK_REALTIME, &ts); /* record start time */
|
|
||||||
seconds = ts.tv_sec + duration;
|
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
if (++c % 16 == 0) {
|
if (++c > count)
|
||||||
clock_gettime(CLOCK_REALTIME, &ts);
|
break;
|
||||||
if (ts.tv_sec >= seconds)
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (rand() % 4) {
|
switch (rand() % 4) {
|
||||||
case 0: /* pread */
|
case 0: /* pread */
|
||||||
@@ -108,8 +99,6 @@ static void *run_test_func(void *ptr)
|
|||||||
memcpy(addr, buf, size); /* noerr */
|
memcpy(addr, buf, size); /* noerr */
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
usleep(10000);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
munmap(addr, size);
|
munmap(addr, size);
|
||||||
@@ -131,7 +120,7 @@ int main(int argc, char **argv)
|
|||||||
int i;
|
int i;
|
||||||
|
|
||||||
if (argc != 8) {
|
if (argc != 8) {
|
||||||
fprintf(stderr, "%s requires 7 arguments - size duration file1 file2 file3 file4 file5\n", argv[0]);
|
fprintf(stderr, "%s requires 7 arguments - size count file1 file2 file3 file4 file5\n", argv[0]);
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -141,9 +130,9 @@ int main(int argc, char **argv)
|
|||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
duration = atoi(argv[2]);
|
count = atoi(argv[2]);
|
||||||
if (duration < 0) {
|
if (count < 0) {
|
||||||
fprintf(stderr, "invalid duration, must be greater than or equal to 0\n");
|
fprintf(stderr, "invalid count, must be greater than 0\n");
|
||||||
exit(-1);
|
exit(-1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@
|
|||||||
t_require_commands mmap_stress mmap_validate scoutfs xfs_io
|
t_require_commands mmap_stress mmap_validate scoutfs xfs_io
|
||||||
|
|
||||||
echo "== mmap_stress"
|
echo "== mmap_stress"
|
||||||
mmap_stress 8192 30 "$T_D0/mmap_stress" "$T_D0/mmap_stress" "$T_D0/mmap_stress" "$T_D3/mmap_stress" "$T_D3/mmap_stress" | sed 's/:.*//g' | sort
|
mmap_stress 8192 2000 "$T_D0/mmap_stress" "$T_D1/mmap_stress" "$T_D2/mmap_stress" "$T_D3/mmap_stress" "$T_D4/mmap_stress" | sed 's/:.*//g' | sort
|
||||||
|
|
||||||
echo "== basic mmap/read/write consistency checks"
|
echo "== basic mmap/read/write consistency checks"
|
||||||
mmap_validate 256 1000 "$T_D0/mmap_val1" "$T_D1/mmap_val1"
|
mmap_validate 256 1000 "$T_D0/mmap_val1" "$T_D1/mmap_val1"
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ message_output()
|
|||||||
|
|
||||||
error_message()
|
error_message()
|
||||||
{
|
{
|
||||||
message_output "$@" >> /dev/stderr
|
message_output "$@" >&2
|
||||||
}
|
}
|
||||||
|
|
||||||
error_exit()
|
error_exit()
|
||||||
@@ -62,28 +62,32 @@ test -x "$SCOUTFS_FENCED_RUN" || \
|
|||||||
# files disappear.
|
# files disappear.
|
||||||
#
|
#
|
||||||
|
|
||||||
# silence error messages
|
# generate failure messages to stderr while still echoing 0 for the caller
|
||||||
quiet_cat()
|
careful_cat()
|
||||||
{
|
{
|
||||||
cat "$@" 2>/dev/null
|
local path="$@"
|
||||||
|
|
||||||
|
cat "$@" || echo 0
|
||||||
}
|
}
|
||||||
|
|
||||||
while sleep $SCOUTFS_FENCED_DELAY; do
|
while sleep $SCOUTFS_FENCED_DELAY; do
|
||||||
shopt -s nullglob
|
|
||||||
for fence in /sys/fs/scoutfs/*/fence/*; do
|
for fence in /sys/fs/scoutfs/*/fence/*; do
|
||||||
|
# catches unmatched regex when no dirs
|
||||||
srv=$(basename $(dirname $(dirname $fence)))
|
if [ ! -d "$fence" ]; then
|
||||||
fenced="$(quiet_cat $fence/fenced)"
|
|
||||||
error="$(quiet_cat $fence/error)"
|
|
||||||
rid="$(quiet_cat $fence/rid)"
|
|
||||||
ip="$(quiet_cat $fence/ipv4_addr)"
|
|
||||||
reason="$(quiet_cat $fence/reason)"
|
|
||||||
|
|
||||||
# request dirs can linger then disappear after fenced/error is set
|
|
||||||
if [ ! -d "$fence" -o "$fenced" == "1" -o "$error" == "1" ]; then
|
|
||||||
continue
|
continue
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# skip requests that have been handled
|
||||||
|
if [ "$(careful_cat $fence/fenced)" == 1 -o \
|
||||||
|
"$(careful_cat $fence/error)" == 1 ]; then
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
srv=$(basename $(dirname $(dirname $fence)))
|
||||||
|
rid="$(cat $fence/rid)"
|
||||||
|
ip="$(cat $fence/ipv4_addr)"
|
||||||
|
reason="$(cat $fence/reason)"
|
||||||
|
|
||||||
log_message "server $srv fencing rid $rid at IP $ip for $reason"
|
log_message "server $srv fencing rid $rid at IP $ip for $reason"
|
||||||
|
|
||||||
# export _REQ_ vars for run to use
|
# export _REQ_ vars for run to use
|
||||||
|
|||||||
@@ -55,14 +55,6 @@ with initial sparse regions (perhaps by multiple threads writing to
|
|||||||
different regions) and wasted space isn't an issue (perhaps because the
|
different regions) and wasted space isn't an issue (perhaps because the
|
||||||
file population contains few small files).
|
file population contains few small files).
|
||||||
.TP
|
.TP
|
||||||
.B ino_alloc_per_lock=<number>
|
|
||||||
This option determines how many inode numbers are allocated in the same
|
|
||||||
cluster lock. The default, and maximum, is 1024. The minimum is 1.
|
|
||||||
Allocating fewer inodes per lock can allow more parallelism between
|
|
||||||
mounts because there are more locks that cover the same number of
|
|
||||||
created files. This can be helpful when working with smaller numbers of
|
|
||||||
large files.
|
|
||||||
.TP
|
|
||||||
.B log_merge_wait_timeout_ms=<number>
|
.B log_merge_wait_timeout_ms=<number>
|
||||||
This option sets the amount of time, in milliseconds, that log merge
|
This option sets the amount of time, in milliseconds, that log merge
|
||||||
creation can wait before timing out. This setting is per-mount, only
|
creation can wait before timing out. This setting is per-mount, only
|
||||||
|
|||||||
Reference in New Issue
Block a user