mirror of
https://github.com/versity/scoutfs.git
synced 2026-04-30 18:05:43 +00:00
Compare commits
30 Commits
v1.17
...
zab/force_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
55f0a0ded4 | ||
|
|
dcfd22e4b1 | ||
|
|
8d3e6883c6 | ||
|
|
8747dae61c | ||
|
|
fffcf4a9bb | ||
|
|
b552406427 | ||
|
|
d812599e6b | ||
|
|
03ab5cedb6 | ||
|
|
2b94cd6468 | ||
|
|
5507ee5351 | ||
|
|
1600a121d9 | ||
|
|
6daf24ff37 | ||
|
|
cd5d9ff3e0 | ||
|
|
d94e49eb63 | ||
|
|
1dbe408539 | ||
|
|
bf21699ad7 | ||
|
|
c7c67a173d | ||
|
|
0d10189f58 | ||
|
|
6b88f3268e | ||
|
|
4b2afa61b8 | ||
|
|
222ba2cede | ||
|
|
c7e97eeb1f | ||
|
|
21c070b42d | ||
|
|
77fbf92968 | ||
|
|
d5c699c3b4 | ||
|
|
b56b8e502c | ||
|
|
5ff372561d | ||
|
|
bdecee5e5d | ||
|
|
707e1b2d59 | ||
|
|
006f429f72 |
@@ -1,6 +1,18 @@
|
||||
Versity ScoutFS Release Notes
|
||||
=============================
|
||||
|
||||
---
|
||||
v1.18
|
||||
\
|
||||
*Nov 7, 2023*
|
||||
|
||||
Fixed a bug where background srch file compaction could stop making
|
||||
forward progress if a partial compaction operation was committed at a
|
||||
specific byte offset in a block. This would cause srch file searches to
|
||||
be progressively more expensive over time. Once this fix is running
|
||||
background compaction will resume, bringing the cost of searches back
|
||||
down.
|
||||
|
||||
---
|
||||
v1.17
|
||||
\
|
||||
|
||||
@@ -68,6 +68,8 @@ struct forest_info {
|
||||
struct delayed_work log_merge_dwork;
|
||||
|
||||
atomic64_t inode_count_delta;
|
||||
|
||||
struct dentry *dent;
|
||||
};
|
||||
|
||||
#define DECLARE_FOREST_INFO(sb, name) \
|
||||
@@ -750,6 +752,60 @@ resched:
|
||||
queue_delayed_work(finf->workq, &finf->log_merge_dwork, delay);
|
||||
}
|
||||
|
||||
static int count_log_trees(struct super_block *sb, struct scoutfs_key *key, u64 seq,
|
||||
u8 flags, void *val, int val_len, void *arg)
|
||||
{
|
||||
u64 *count = arg;
|
||||
|
||||
(*count)++;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int debugfs_nr_log_trees_get(void *data, u64 *val)
|
||||
{
|
||||
struct super_block *sb = data;
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct scoutfs_key start;
|
||||
struct scoutfs_key end;
|
||||
struct scoutfs_key key;
|
||||
u64 count;
|
||||
int ret;
|
||||
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
if (!super) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
scoutfs_key_init_log_trees(&key, 0, 0);
|
||||
count = 0;
|
||||
for (;;) {
|
||||
scoutfs_key_set_zeros(&start);
|
||||
scoutfs_key_set_ones(&end);
|
||||
ret = scoutfs_btree_read_items(sb, &super->logs_root, &key, &start, &end,
|
||||
count_log_trees, &count);
|
||||
if (ret == -ENOENT || scoutfs_key_is_ones(&end))
|
||||
break;
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
key = end;
|
||||
scoutfs_key_inc(&key);
|
||||
}
|
||||
|
||||
*val = count;
|
||||
ret = 0;
|
||||
out:
|
||||
kfree(super);
|
||||
return ret ? -EIO : 0;
|
||||
}
|
||||
|
||||
DEFINE_SIMPLE_ATTRIBUTE(fops_nr_log_trees, debugfs_nr_log_trees_get, NULL, "%llu\n");
|
||||
|
||||
int scoutfs_forest_setup(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
@@ -770,6 +826,13 @@ int scoutfs_forest_setup(struct super_block *sb)
|
||||
scoutfs_forest_log_merge_worker);
|
||||
sbi->forest_info = finf;
|
||||
|
||||
finf->dent = debugfs_create_file("nr_log_trees", S_IFREG|S_IRUSR, sbi->debug_root, sb,
|
||||
&fops_nr_log_trees);
|
||||
if (IS_ERR(finf->dent)) {
|
||||
ret = PTR_ERR(finf->dent);
|
||||
goto out;
|
||||
}
|
||||
|
||||
finf->workq = alloc_workqueue("scoutfs_log_merge", WQ_NON_REENTRANT |
|
||||
WQ_UNBOUND | WQ_HIGHPRI, 0);
|
||||
if (!finf->workq) {
|
||||
@@ -799,6 +862,8 @@ void scoutfs_forest_stop(struct super_block *sb)
|
||||
|
||||
if (finf && finf->workq) {
|
||||
cancel_delayed_work_sync(&finf->log_merge_dwork);
|
||||
if (!IS_ERR_OR_NULL(finf->dent))
|
||||
debugfs_remove(finf->dent);
|
||||
destroy_workqueue(finf->workq);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -439,6 +439,7 @@ DECLARE_EVENT_CLASS(scoutfs_trans_hold_release_class,
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->journal_info = (unsigned long)journal_info;
|
||||
__entry->holders = holders;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" journal_info 0x%0lx holders %d ret %d",
|
||||
@@ -2799,6 +2800,81 @@ TRACE_EVENT(scoutfs_omap_should_delete,
|
||||
SCSB_TRACE_ARGS, __entry->ino, __entry->nlink, __entry->ret)
|
||||
);
|
||||
|
||||
#define SSCF_FMT "[bo %llu bs %llu es %llu]"
|
||||
#define SSCF_FIELDS(pref) \
|
||||
__field(__u64, pref##_blkno) \
|
||||
__field(__u64, pref##_blocks) \
|
||||
__field(__u64, pref##_entries)
|
||||
#define SSCF_ASSIGN(pref, sfl) \
|
||||
__entry->pref##_blkno = le64_to_cpu((sfl)->ref.blkno); \
|
||||
__entry->pref##_blocks = le64_to_cpu((sfl)->blocks); \
|
||||
__entry->pref##_entries = le64_to_cpu((sfl)->entries);
|
||||
#define SSCF_ENTRY_ARGS(pref) \
|
||||
__entry->pref##_blkno, \
|
||||
__entry->pref##_blocks, \
|
||||
__entry->pref##_entries
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_srch_compact_class,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_srch_compact *sc),
|
||||
|
||||
TP_ARGS(sb, sc),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, id)
|
||||
__field(__u8, nr)
|
||||
__field(__u8, flags)
|
||||
SSCF_FIELDS(out)
|
||||
__field(__u64, in0_blk)
|
||||
__field(__u64, in0_pos)
|
||||
SSCF_FIELDS(in0)
|
||||
__field(__u64, in1_blk)
|
||||
__field(__u64, in1_pos)
|
||||
SSCF_FIELDS(in1)
|
||||
__field(__u64, in2_blk)
|
||||
__field(__u64, in2_pos)
|
||||
SSCF_FIELDS(in2)
|
||||
__field(__u64, in3_blk)
|
||||
__field(__u64, in3_pos)
|
||||
SSCF_FIELDS(in3)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->id = le64_to_cpu(sc->id);
|
||||
__entry->nr = sc->nr;
|
||||
__entry->flags = sc->flags;
|
||||
SSCF_ASSIGN(out, &sc->out)
|
||||
__entry->in0_blk = le64_to_cpu(sc->in[0].blk);
|
||||
__entry->in0_pos = le64_to_cpu(sc->in[0].pos);
|
||||
SSCF_ASSIGN(in0, &sc->in[0].sfl)
|
||||
__entry->in1_blk = le64_to_cpu(sc->in[0].blk);
|
||||
__entry->in1_pos = le64_to_cpu(sc->in[0].pos);
|
||||
SSCF_ASSIGN(in1, &sc->in[1].sfl)
|
||||
__entry->in2_blk = le64_to_cpu(sc->in[0].blk);
|
||||
__entry->in2_pos = le64_to_cpu(sc->in[0].pos);
|
||||
SSCF_ASSIGN(in2, &sc->in[2].sfl)
|
||||
__entry->in3_blk = le64_to_cpu(sc->in[0].blk);
|
||||
__entry->in3_pos = le64_to_cpu(sc->in[0].pos);
|
||||
SSCF_ASSIGN(in3, &sc->in[3].sfl)
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" id %llu nr %u flags 0x%x out "SSCF_FMT" in0 b %llu p %llu "SSCF_FMT" in1 b %llu p %llu "SSCF_FMT" in2 b %llu p %llu "SSCF_FMT" in3 b %llu p %llu "SSCF_FMT,
|
||||
SCSB_TRACE_ARGS, __entry->id, __entry->nr, __entry->flags, SSCF_ENTRY_ARGS(out),
|
||||
__entry->in0_blk, __entry->in0_pos, SSCF_ENTRY_ARGS(in0),
|
||||
__entry->in1_blk, __entry->in1_pos, SSCF_ENTRY_ARGS(in1),
|
||||
__entry->in2_blk, __entry->in2_pos, SSCF_ENTRY_ARGS(in2),
|
||||
__entry->in3_blk, __entry->in3_pos, SSCF_ENTRY_ARGS(in3))
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_srch_compact_class, scoutfs_srch_compact_client_send,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_srch_compact *sc),
|
||||
TP_ARGS(sb, sc)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_srch_compact_class, scoutfs_srch_compact_client_recv,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_srch_compact *sc),
|
||||
TP_ARGS(sb, sc)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_SCOUTFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
||||
@@ -91,6 +91,7 @@ do { \
|
||||
struct server_info {
|
||||
struct super_block *sb;
|
||||
spinlock_t lock;
|
||||
seqlock_t seqlock;
|
||||
wait_queue_head_t waitq;
|
||||
|
||||
struct workqueue_struct *wq;
|
||||
@@ -132,11 +133,9 @@ struct server_info {
|
||||
struct mutex mounted_clients_mutex;
|
||||
|
||||
/* stable super stored from commits, given in locks and rpcs */
|
||||
seqcount_t stable_seqcount;
|
||||
struct scoutfs_super_block stable_super;
|
||||
|
||||
/* serializing and get and set volume options */
|
||||
seqcount_t volopt_seqcount;
|
||||
struct mutex volopt_mutex;
|
||||
struct scoutfs_volume_options volopt;
|
||||
|
||||
@@ -182,7 +181,7 @@ static bool get_volopt_val(struct server_info *server, int nr, u64 *val)
|
||||
unsigned seq;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&server->volopt_seqcount);
|
||||
seq = read_seqbegin(&server->seqlock);
|
||||
if ((le64_to_cpu(server->volopt.set_bits) & bit)) {
|
||||
is_set = true;
|
||||
*val = le64_to_cpup(opt);
|
||||
@@ -190,7 +189,7 @@ static bool get_volopt_val(struct server_info *server, int nr, u64 *val)
|
||||
is_set = false;
|
||||
*val = 0;
|
||||
};
|
||||
} while (read_seqcount_retry(&server->volopt_seqcount, seq));
|
||||
} while (read_seqretry(&server->seqlock, seq));
|
||||
|
||||
return is_set;
|
||||
}
|
||||
@@ -506,7 +505,7 @@ static void get_stable(struct super_block *sb, struct scoutfs_super_block *super
|
||||
unsigned int seq;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&server->stable_seqcount);
|
||||
seq = read_seqbegin(&server->seqlock);
|
||||
if (super)
|
||||
*super = server->stable_super;
|
||||
if (roots) {
|
||||
@@ -514,7 +513,7 @@ static void get_stable(struct super_block *sb, struct scoutfs_super_block *super
|
||||
roots->logs_root = server->stable_super.logs_root;
|
||||
roots->srch_root = server->stable_super.srch_root;
|
||||
}
|
||||
} while (read_seqcount_retry(&server->stable_seqcount, seq));
|
||||
} while (read_seqretry(&server->seqlock, seq));
|
||||
}
|
||||
|
||||
u64 scoutfs_server_seq(struct super_block *sb)
|
||||
@@ -548,11 +547,9 @@ void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq)
|
||||
|
||||
static void set_stable_super(struct server_info *server, struct scoutfs_super_block *super)
|
||||
{
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&server->stable_seqcount);
|
||||
write_seqlock(&server->seqlock);
|
||||
server->stable_super = *super;
|
||||
write_seqcount_end(&server->stable_seqcount);
|
||||
preempt_enable();
|
||||
write_sequnlock(&server->seqlock);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -1203,7 +1200,7 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
mutex_lock(&server->logs_mutex);
|
||||
|
||||
/* done if we timed out */
|
||||
if (time_after(jiffies, timeo)) {
|
||||
if (1 || time_after(jiffies, timeo)) {
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
@@ -1969,9 +1966,7 @@ static int server_srch_get_compact(struct super_block *sb,
|
||||
ret = scoutfs_srch_get_compact(sb, &server->alloc, &server->wri,
|
||||
&super->srch_root, rid, sc);
|
||||
mutex_unlock(&server->srch_mutex);
|
||||
if (ret == 0 && sc->nr == 0)
|
||||
ret = -ENOENT;
|
||||
if (ret < 0)
|
||||
if (ret < 0 || (ret == 0 && sc->nr == 0))
|
||||
goto apply;
|
||||
|
||||
mutex_lock(&server->alloc_mutex);
|
||||
@@ -3073,9 +3068,9 @@ static int server_get_volopt(struct super_block *sb, struct scoutfs_net_connecti
|
||||
}
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&server->volopt_seqcount);
|
||||
seq = read_seqbegin(&server->seqlock);
|
||||
volopt = server->volopt;
|
||||
} while (read_seqcount_retry(&server->volopt_seqcount, seq));
|
||||
} while (read_seqretry(&server->seqlock, seq));
|
||||
|
||||
out:
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, &volopt, sizeof(volopt));
|
||||
@@ -3144,12 +3139,12 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti
|
||||
apply:
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
|
||||
write_seqcount_begin(&server->volopt_seqcount);
|
||||
write_seqlock(&server->seqlock);
|
||||
if (ret == 0)
|
||||
server->volopt = super->volopt;
|
||||
else
|
||||
super->volopt = server->volopt;
|
||||
write_seqcount_end(&server->volopt_seqcount);
|
||||
write_sequnlock(&server->seqlock);
|
||||
|
||||
mutex_unlock(&server->volopt_mutex);
|
||||
out:
|
||||
@@ -3192,12 +3187,12 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec
|
||||
|
||||
ret = server_apply_commit(sb, &hold, ret);
|
||||
|
||||
write_seqcount_begin(&server->volopt_seqcount);
|
||||
write_seqlock(&server->seqlock);
|
||||
if (ret == 0)
|
||||
server->volopt = super->volopt;
|
||||
else
|
||||
super->volopt = server->volopt;
|
||||
write_seqcount_end(&server->volopt_seqcount);
|
||||
write_sequnlock(&server->seqlock);
|
||||
|
||||
mutex_unlock(&server->volopt_mutex);
|
||||
out:
|
||||
@@ -4336,9 +4331,9 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
}
|
||||
|
||||
/* update volume options early, possibly for use during startup */
|
||||
write_seqcount_begin(&server->volopt_seqcount);
|
||||
write_seqlock(&server->seqlock);
|
||||
server->volopt = super->volopt;
|
||||
write_seqcount_end(&server->volopt_seqcount);
|
||||
write_sequnlock(&server->seqlock);
|
||||
|
||||
atomic64_set(&server->seq_atomic, le64_to_cpu(super->seq));
|
||||
set_stable_super(server, super);
|
||||
@@ -4478,6 +4473,7 @@ int scoutfs_server_setup(struct super_block *sb)
|
||||
|
||||
server->sb = sb;
|
||||
spin_lock_init(&server->lock);
|
||||
seqlock_init(&server->seqlock);
|
||||
init_waitqueue_head(&server->waitq);
|
||||
INIT_WORK(&server->work, scoutfs_server_worker);
|
||||
server->status = SERVER_DOWN;
|
||||
@@ -4492,8 +4488,6 @@ int scoutfs_server_setup(struct super_block *sb)
|
||||
INIT_WORK(&server->log_merge_free_work, server_log_merge_free_work);
|
||||
mutex_init(&server->srch_mutex);
|
||||
mutex_init(&server->mounted_clients_mutex);
|
||||
seqcount_init(&server->stable_seqcount);
|
||||
seqcount_init(&server->volopt_seqcount);
|
||||
mutex_init(&server->volopt_mutex);
|
||||
INIT_WORK(&server->fence_pending_recov_work, fence_pending_recov_worker);
|
||||
INIT_DELAYED_WORK(&server->reclaim_dwork, reclaim_worker);
|
||||
|
||||
201
kmod/src/srch.c
201
kmod/src/srch.c
@@ -30,6 +30,9 @@
|
||||
#include "client.h"
|
||||
#include "counters.h"
|
||||
#include "scoutfs_trace.h"
|
||||
#include "triggers.h"
|
||||
#include "sysfs.h"
|
||||
#include "msg.h"
|
||||
|
||||
/*
|
||||
* This srch subsystem gives us a way to find inodes that have a given
|
||||
@@ -68,10 +71,14 @@ struct srch_info {
|
||||
atomic_t shutdown;
|
||||
struct workqueue_struct *workq;
|
||||
struct delayed_work compact_dwork;
|
||||
struct scoutfs_sysfs_attrs ssa;
|
||||
atomic_t compact_delay_ms;
|
||||
};
|
||||
|
||||
#define DECLARE_SRCH_INFO(sb, name) \
|
||||
struct srch_info *name = SCOUTFS_SB(sb)->srch_info
|
||||
#define DECLARE_SRCH_INFO_KOBJ(kobj, name) \
|
||||
DECLARE_SRCH_INFO(SCOUTFS_SYSFS_ATTRS_SB(kobj), name)
|
||||
|
||||
#define SRE_FMT "%016llx.%llu.%llu"
|
||||
#define SRE_ARG(sre) \
|
||||
@@ -520,6 +527,95 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Padded entries are encoded in pairs after an existing entry. All of
|
||||
* the pairs cancel each other out by all readers (the second encoding
|
||||
* looks like deletion) so they aren't visible to the first/last bounds of
|
||||
* the block or file.
|
||||
*/
|
||||
static int append_padded_entry(struct scoutfs_srch_file *sfl, u64 blk,
|
||||
struct scoutfs_srch_block *srb, struct scoutfs_srch_entry *sre)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = encode_entry(srb->entries + le32_to_cpu(srb->entry_bytes),
|
||||
sre, &srb->tail);
|
||||
if (ret > 0) {
|
||||
srb->tail = *sre;
|
||||
le32_add_cpu(&srb->entry_nr, 1);
|
||||
le32_add_cpu(&srb->entry_bytes, ret);
|
||||
le64_add_cpu(&sfl->entries, 1);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called by a testing trigger to create a very specific case of
|
||||
* encoded entry offsets. We want the last entry in the block to start
|
||||
* precisely at the _SAFE_BYTES offset.
|
||||
*
|
||||
* This is called when there is a single existing entry in the block.
|
||||
* We have the entire block to work with. We encode pairs of matching
|
||||
* entries. This hides them from readers (both searches and merging) as
|
||||
* they're interpreted as creation and deletion and are deleted. We use
|
||||
* the existing hash value of the first entry in the block but then set
|
||||
* the inode to an impossibly large number so it doesn't interfere with
|
||||
* anything.
|
||||
*
|
||||
* To hit the specific offset we very carefully manage the amount of
|
||||
* bytes of change between fields in the entry. We know that if we
|
||||
* change all the byte of the ino and id we end up with a 20 byte
|
||||
* (2+8+8,2) encoding of the pair of entries. To have the last entry
|
||||
* start at the _SAFE_POS offset we know that the final 20 byte pair
|
||||
* encoding needs to end at 2 bytes (second entry encoding) after the
|
||||
* _SAFE_POS offset.
|
||||
*
|
||||
* So as we encode pairs we watch the delta of our current offset from
|
||||
* that desired final offset of 2 past _SAFE_POS. If we're a multiple
|
||||
* of 20 away then we encode the full 20 byte pairs. If we're not, then
|
||||
* we drop a byte to encode 19 bytes. That'll slowly change the offset
|
||||
* to be a multiple of 20 again while encoding large entries.
|
||||
*/
|
||||
static void pad_entries_at_safe(struct scoutfs_srch_file *sfl, u64 blk,
|
||||
struct scoutfs_srch_block *srb)
|
||||
{
|
||||
struct scoutfs_srch_entry sre;
|
||||
u32 target;
|
||||
s32 diff;
|
||||
u64 hash;
|
||||
u64 ino;
|
||||
u64 id;
|
||||
int ret;
|
||||
|
||||
hash = le64_to_cpu(srb->tail.hash);
|
||||
ino = le64_to_cpu(srb->tail.ino) | (1ULL << 62);
|
||||
id = le64_to_cpu(srb->tail.id);
|
||||
|
||||
target = SCOUTFS_SRCH_BLOCK_SAFE_BYTES + 2;
|
||||
|
||||
while ((diff = target - le32_to_cpu(srb->entry_bytes)) > 0) {
|
||||
ino ^= 1ULL << (7 * 8);
|
||||
if (diff % 20 == 0) {
|
||||
id ^= 1ULL << (7 * 8);
|
||||
} else {
|
||||
id ^= 1ULL << (6 * 8);
|
||||
}
|
||||
|
||||
sre.hash = cpu_to_le64(hash);
|
||||
sre.ino = cpu_to_le64(ino);
|
||||
sre.id = cpu_to_le64(id);
|
||||
|
||||
ret = append_padded_entry(sfl, blk, srb, &sre);
|
||||
if (ret == 0)
|
||||
ret = append_padded_entry(sfl, blk, srb, &sre);
|
||||
BUG_ON(ret != 0);
|
||||
|
||||
diff = target - le32_to_cpu(srb->entry_bytes);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller is dropping an ino/id because the tracking rbtree is full.
|
||||
* This loses information so we can't return any entries at or after the
|
||||
@@ -987,6 +1083,9 @@ int scoutfs_srch_rotate_log(struct super_block *sb,
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
if (sfl->ref.blkno && !force && scoutfs_trigger(sb, SRCH_FORCE_LOG_ROTATE))
|
||||
force = true;
|
||||
|
||||
if (sfl->ref.blkno == 0 ||
|
||||
(!force && le64_to_cpu(sfl->blocks) < SCOUTFS_SRCH_LOG_BLOCK_LIMIT))
|
||||
return 0;
|
||||
@@ -1462,7 +1561,7 @@ static int kway_merge(struct super_block *sb,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_srch_file *sfl,
|
||||
kway_get_t kway_get, kway_advance_t kway_adv,
|
||||
void **args, int nr)
|
||||
void **args, int nr, bool logs_input)
|
||||
{
|
||||
DECLARE_SRCH_INFO(sb, srinf);
|
||||
struct scoutfs_srch_block *srb = NULL;
|
||||
@@ -1567,6 +1666,15 @@ static int kway_merge(struct super_block *sb,
|
||||
blk++;
|
||||
}
|
||||
|
||||
/* end sorted block on _SAFE offset for testing */
|
||||
if (bl && le32_to_cpu(srb->entry_nr) == 1 && logs_input &&
|
||||
scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) {
|
||||
pad_entries_at_safe(sfl, blk, srb);
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
blk++;
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, srch_compact_entry);
|
||||
|
||||
} else {
|
||||
@@ -1609,6 +1717,8 @@ static int kway_merge(struct super_block *sb,
|
||||
empty++;
|
||||
ret = 0;
|
||||
} else if (ret < 0) {
|
||||
if (ret == -ENOANO) /* just testing trigger */
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -1816,7 +1926,7 @@ static int compact_logs(struct super_block *sb,
|
||||
}
|
||||
|
||||
ret = kway_merge(sb, alloc, wri, &sc->out, kway_get_page, kway_adv_page,
|
||||
args, nr_pages);
|
||||
args, nr_pages, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -1874,12 +1984,18 @@ static int kway_get_reader(struct super_block *sb,
|
||||
srb = rdr->bl->data;
|
||||
|
||||
if (rdr->pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES ||
|
||||
rdr->skip >= SCOUTFS_SRCH_BLOCK_SAFE_BYTES ||
|
||||
rdr->skip > SCOUTFS_SRCH_BLOCK_SAFE_BYTES ||
|
||||
rdr->skip >= le32_to_cpu(srb->entry_bytes)) {
|
||||
/* XXX inconsistency */
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (rdr->decoded_bytes == 0 && rdr->pos == SCOUTFS_SRCH_BLOCK_SAFE_BYTES &&
|
||||
scoutfs_trigger(sb, SRCH_MERGE_STOP_SAFE)) {
|
||||
/* only used in testing */
|
||||
return -ENOANO;
|
||||
}
|
||||
|
||||
/* decode entry, possibly skipping start of the block */
|
||||
while (rdr->decoded_bytes == 0 || rdr->pos < rdr->skip) {
|
||||
ret = decode_entry(srb->entries + rdr->pos,
|
||||
@@ -1969,7 +2085,7 @@ static int compact_sorted(struct super_block *sb,
|
||||
}
|
||||
|
||||
ret = kway_merge(sb, alloc, wri, &sc->out, kway_get_reader,
|
||||
kway_adv_reader, args, nr);
|
||||
kway_adv_reader, args, nr, false);
|
||||
|
||||
sc->flags |= SCOUTFS_SRCH_COMPACT_FLAG_DONE;
|
||||
for (i = 0; i < nr; i++) {
|
||||
@@ -2098,8 +2214,15 @@ static int delete_files(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* wait 10s between compact attempts on error, immediate after success */
|
||||
#define SRCH_COMPACT_DELAY_MS (10 * MSEC_PER_SEC)
|
||||
static void queue_compact_work(struct srch_info *srinf, bool immediate)
|
||||
{
|
||||
unsigned long delay;
|
||||
|
||||
if (!atomic_read(&srinf->shutdown)) {
|
||||
delay = immediate ? 0 : msecs_to_jiffies(atomic_read(&srinf->compact_delay_ms));
|
||||
queue_delayed_work(srinf->workq, &srinf->compact_dwork, delay);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Get a compaction operation from the server, sort the entries from the
|
||||
@@ -2127,7 +2250,6 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
|
||||
struct super_block *sb = srinf->sb;
|
||||
struct scoutfs_block_writer wri;
|
||||
struct scoutfs_alloc alloc;
|
||||
unsigned long delay;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
@@ -2140,6 +2262,8 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
|
||||
scoutfs_block_writer_init(sb, &wri);
|
||||
|
||||
ret = scoutfs_client_srch_get_compact(sb, sc);
|
||||
if (ret >= 0)
|
||||
trace_scoutfs_srch_compact_client_recv(sb, sc);
|
||||
if (ret < 0 || sc->nr == 0)
|
||||
goto out;
|
||||
|
||||
@@ -2168,6 +2292,7 @@ commit:
|
||||
sc->meta_freed = alloc.freed;
|
||||
sc->flags |= ret < 0 ? SCOUTFS_SRCH_COMPACT_FLAG_ERROR : 0;
|
||||
|
||||
trace_scoutfs_srch_compact_client_send(sb, sc);
|
||||
err = scoutfs_client_srch_commit_compact(sb, sc);
|
||||
if (err < 0 && ret == 0)
|
||||
ret = err;
|
||||
@@ -2178,14 +2303,56 @@ out:
|
||||
scoutfs_inc_counter(sb, srch_compact_error);
|
||||
|
||||
scoutfs_block_writer_forget_all(sb, &wri);
|
||||
if (!atomic_read(&srinf->shutdown)) {
|
||||
delay = ret == 0 ? 0 : msecs_to_jiffies(SRCH_COMPACT_DELAY_MS);
|
||||
queue_delayed_work(srinf->workq, &srinf->compact_dwork, delay);
|
||||
}
|
||||
queue_compact_work(srinf, sc->nr > 0 && ret == 0);
|
||||
|
||||
kfree(sc);
|
||||
}
|
||||
|
||||
static ssize_t compact_delay_ms_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
DECLARE_SRCH_INFO_KOBJ(kobj, srinf);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%u", atomic_read(&srinf->compact_delay_ms));
|
||||
}
|
||||
|
||||
#define MIN_COMPACT_DELAY_MS MSEC_PER_SEC
|
||||
#define DEF_COMPACT_DELAY_MS (10 * MSEC_PER_SEC)
|
||||
#define MAX_COMPACT_DELAY_MS (60 * MSEC_PER_SEC)
|
||||
|
||||
static ssize_t compact_delay_ms_store(struct kobject *kobj, struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct super_block *sb = SCOUTFS_SYSFS_ATTRS_SB(kobj);
|
||||
DECLARE_SRCH_INFO(sb, srinf);
|
||||
char nullterm[30]; /* more than enough for octal -U64_MAX */
|
||||
u64 val;
|
||||
int len;
|
||||
int ret;
|
||||
|
||||
len = min(count, sizeof(nullterm) - 1);
|
||||
memcpy(nullterm, buf, len);
|
||||
nullterm[len] = '\0';
|
||||
|
||||
ret = kstrtoll(nullterm, 0, &val);
|
||||
if (ret < 0 || val < MIN_COMPACT_DELAY_MS || val > MAX_COMPACT_DELAY_MS) {
|
||||
scoutfs_err(sb, "invalid compact_delay_ms value, must be between %lu and %lu",
|
||||
MIN_COMPACT_DELAY_MS, MAX_COMPACT_DELAY_MS);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
atomic_set(&srinf->compact_delay_ms, val);
|
||||
cancel_delayed_work(&srinf->compact_dwork);
|
||||
queue_compact_work(srinf, false);
|
||||
|
||||
return count;
|
||||
}
|
||||
SCOUTFS_ATTR_RW(compact_delay_ms);
|
||||
|
||||
static struct attribute *srch_attrs[] = {
|
||||
SCOUTFS_ATTR_PTR(compact_delay_ms),
|
||||
NULL,
|
||||
};
|
||||
|
||||
void scoutfs_srch_destroy(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
@@ -2202,6 +2369,8 @@ void scoutfs_srch_destroy(struct super_block *sb)
|
||||
destroy_workqueue(srinf->workq);
|
||||
}
|
||||
|
||||
scoutfs_sysfs_destroy_attrs(sb, &srinf->ssa);
|
||||
|
||||
kfree(srinf);
|
||||
sbi->srch_info = NULL;
|
||||
}
|
||||
@@ -2219,8 +2388,15 @@ int scoutfs_srch_setup(struct super_block *sb)
|
||||
srinf->sb = sb;
|
||||
atomic_set(&srinf->shutdown, 0);
|
||||
INIT_DELAYED_WORK(&srinf->compact_dwork, scoutfs_srch_compact_worker);
|
||||
scoutfs_sysfs_init_attrs(sb, &srinf->ssa);
|
||||
atomic_set(&srinf->compact_delay_ms, DEF_COMPACT_DELAY_MS);
|
||||
|
||||
sbi->srch_info = srinf;
|
||||
|
||||
ret = scoutfs_sysfs_create_attrs(sb, &srinf->ssa, srch_attrs, "srch");
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
srinf->workq = alloc_workqueue("scoutfs_srch_compact",
|
||||
WQ_NON_REENTRANT | WQ_UNBOUND |
|
||||
WQ_HIGHPRI, 0);
|
||||
@@ -2229,8 +2405,7 @@ int scoutfs_srch_setup(struct super_block *sb)
|
||||
goto out;
|
||||
}
|
||||
|
||||
queue_delayed_work(srinf->workq, &srinf->compact_dwork,
|
||||
msecs_to_jiffies(SRCH_COMPACT_DELAY_MS));
|
||||
queue_compact_work(srinf, false);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
|
||||
@@ -39,6 +39,9 @@ struct scoutfs_triggers {
|
||||
|
||||
static char *names[] = {
|
||||
[SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE] = "block_remove_stale",
|
||||
[SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE] = "srch_compact_logs_pad_safe",
|
||||
[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
|
||||
[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
|
||||
[SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge",
|
||||
};
|
||||
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
|
||||
enum scoutfs_trigger {
|
||||
SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE,
|
||||
SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE,
|
||||
SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
|
||||
SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
|
||||
SCOUTFS_TRIGGER_STATFS_LOCK_PURGE,
|
||||
SCOUTFS_TRIGGER_NR,
|
||||
};
|
||||
|
||||
@@ -25,8 +25,9 @@ All options can be seen by running with -h.
|
||||
This script is built to test multi-node systems on one host by using
|
||||
different mounts of the same devices. The script creates a fake block
|
||||
device in front of each fs block device for each mount that will be
|
||||
tested. Currently it will create free loop devices and will mount on
|
||||
/mnt/test.[0-9].
|
||||
tested. It will create predictable device mapper devices and mounts
|
||||
them on /mnt/test.N. These static device names and mount paths limit
|
||||
the script to a single execution per host.
|
||||
|
||||
All tests will be run by default. Particular tests can be included or
|
||||
excluded by providing test name regular expressions with the -I and -E
|
||||
@@ -104,8 +105,8 @@ used during the test.
|
||||
|
||||
| Variable | Description | Origin | Example |
|
||||
| ---------------- | ------------------- | --------------- | ----------------- |
|
||||
| T\_MB[0-9] | per-mount meta bdev | created per run | /dev/loop0 |
|
||||
| T\_DB[0-9] | per-mount data bdev | created per run | /dev/loop1 |
|
||||
| T\_MB[0-9] | per-mount meta bdev | created per run | /dev/mapper/\_scoutfs\_test\_meta\_[0-9] |
|
||||
| T\_DB[0-9] | per-mount data bdev | created per run | /dev/mapper/\_scoutfs\_test\_data\_[0-9] |
|
||||
| T\_D[0-9] | per-mount test dir | made for test | /mnt/test.[0-9]/t |
|
||||
| T\_META\_DEVICE | main FS meta bdev | -M | /dev/vda |
|
||||
| T\_DATA\_DEVICE | main FS data bdev | -D | /dev/vdb |
|
||||
|
||||
@@ -6,6 +6,61 @@ t_filter_fs()
|
||||
-e 's@Device: [a-fA-F0-9]*h/[0-9]*d@Device: 0h/0d@g'
|
||||
}
|
||||
|
||||
#
|
||||
# We can hit a spurious kasan warning that was fixed upstream:
|
||||
#
|
||||
# e504e74cc3a2 x86/unwind/orc: Disable KASAN checking in the ORC unwinder, part 2
|
||||
#
|
||||
# KASAN can get mad when the unwinder doesn't find ORC metadata and
|
||||
# wanders up without using frames and hits the KASAN stack red zones.
|
||||
# We can ignore these messages.
|
||||
#
|
||||
# They're bracketed by:
|
||||
# [ 2687.690127] ==================================================================
|
||||
# [ 2687.691366] BUG: KASAN: stack-out-of-bounds in get_reg+0x1bc/0x230
|
||||
# ...
|
||||
# [ 2687.706220] ==================================================================
|
||||
# [ 2687.707284] Disabling lock debugging due to kernel taint
|
||||
#
|
||||
# That final lock debugging message may not be included.
|
||||
#
|
||||
ignore_harmless_unwind_kasan_stack_oob()
|
||||
{
|
||||
awk '
|
||||
BEGIN {
|
||||
in_soob = 0
|
||||
soob_nr = 0
|
||||
}
|
||||
( !in_soob && $0 ~ /==================================================================/ ) {
|
||||
in_soob = 1
|
||||
soob_nr = NR
|
||||
saved = $0
|
||||
}
|
||||
( in_soob == 1 && NR == (soob_nr + 1) ) {
|
||||
if (match($0, /KASAN: stack-out-of-bounds in get_reg/) != 0) {
|
||||
in_soob = 2
|
||||
} else {
|
||||
in_soob = 0
|
||||
print saved
|
||||
}
|
||||
saved=""
|
||||
}
|
||||
( in_soob == 2 && $0 ~ /==================================================================/ ) {
|
||||
in_soob = 3
|
||||
soob_nr = NR
|
||||
}
|
||||
( in_soob == 3 && NR > soob_nr && $0 !~ /Disabling lock debugging/ ) {
|
||||
in_soob = 0
|
||||
}
|
||||
( !in_soob ) { print $0 }
|
||||
END {
|
||||
if (saved) {
|
||||
print saved
|
||||
}
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
#
|
||||
# Filter out expected messages. Putting messages here implies that
|
||||
# tests aren't relying on messages to discover failures.. they're
|
||||
@@ -86,10 +141,12 @@ t_filter_dmesg()
|
||||
re="$re|scoutfs .* critical transaction commit failure.*"
|
||||
|
||||
# change-devices causes loop device resizing
|
||||
re="$re|loop: module loaded"
|
||||
re="$re|loop[0-9].* detected capacity change from.*"
|
||||
|
||||
# ignore systemd-journal rotating
|
||||
re="$re|systemd-journald.*"
|
||||
|
||||
egrep -v "($re)"
|
||||
egrep -v "($re)" | \
|
||||
ignore_harmless_unwind_kasan_stack_oob
|
||||
}
|
||||
|
||||
@@ -265,6 +265,15 @@ t_trigger_get() {
|
||||
cat "$(t_trigger_path "$nr")/$which"
|
||||
}
|
||||
|
||||
t_trigger_set() {
|
||||
local which="$1"
|
||||
local nr="$2"
|
||||
local val="$3"
|
||||
local path=$(t_trigger_path "$nr")
|
||||
|
||||
echo "$val" > "$path/$which"
|
||||
}
|
||||
|
||||
t_trigger_show() {
|
||||
local which="$1"
|
||||
local string="$2"
|
||||
@@ -276,9 +285,8 @@ t_trigger_show() {
|
||||
t_trigger_arm_silent() {
|
||||
local which="$1"
|
||||
local nr="$2"
|
||||
local path=$(t_trigger_path "$nr")
|
||||
|
||||
echo 1 > "$path/$which"
|
||||
t_trigger_set "$which" "$nr" 1
|
||||
}
|
||||
|
||||
t_trigger_arm() {
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
== measure initial createmany
|
||||
== measure initial createmany
|
||||
== measure two concurrent createmany runs
|
||||
== cleanup
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
== setting longer hung task timeout
|
||||
== creating fragmented extents
|
||||
== unlink file with moved extents to free extents per block
|
||||
== cleanup
|
||||
|
||||
37
tests/golden/srch-safe-merge-pos
Normal file
37
tests/golden/srch-safe-merge-pos
Normal file
@@ -0,0 +1,37 @@
|
||||
== initialize per-mount values
|
||||
== arm compaction triggers
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_merge_stop_safe armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_merge_stop_safe armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_merge_stop_safe armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_merge_stop_safe armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_merge_stop_safe armed: 1
|
||||
== compact more often
|
||||
== create padded sorted inputs by forcing log rotation
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_force_log_rotate armed: 1
|
||||
trigger srch_compact_logs_pad_safe armed: 1
|
||||
== compaction of padded should stop at safe
|
||||
== verify no compaction errors
|
||||
== cleanup
|
||||
@@ -326,16 +326,10 @@ unmount_all() {
|
||||
cmd wait $p
|
||||
done
|
||||
|
||||
# delete all temp meta devices
|
||||
for dev in $(losetup --associated "$T_META_DEVICE" | cut -d : -f 1); do
|
||||
if [ -e "$dev" ]; then
|
||||
cmd losetup -d "$dev"
|
||||
fi
|
||||
done
|
||||
# delete all temp data devices
|
||||
for dev in $(losetup --associated "$T_DATA_DEVICE" | cut -d : -f 1); do
|
||||
if [ -e "$dev" ]; then
|
||||
cmd losetup -d "$dev"
|
||||
# delete all temp devices
|
||||
for dev in /dev/mapper/_scoutfs_test_*; do
|
||||
if [ -b "$dev" ]; then
|
||||
cmd dmsetup remove $dev
|
||||
fi
|
||||
done
|
||||
}
|
||||
@@ -434,6 +428,12 @@ $T_UTILS/fenced/scoutfs-fenced > "$T_FENCED_LOG" 2>&1 &
|
||||
fenced_pid=$!
|
||||
fenced_log "started fenced pid $fenced_pid in the background"
|
||||
|
||||
# setup dm tables
|
||||
echo "0 $(blockdev --getsz $T_META_DEVICE) linear $T_META_DEVICE 0" > \
|
||||
$T_RESULTS/dmtable.meta
|
||||
echo "0 $(blockdev --getsz $T_DATA_DEVICE) linear $T_DATA_DEVICE 0" > \
|
||||
$T_RESULTS/dmtable.data
|
||||
|
||||
#
|
||||
# mount concurrently so that a quorum is present to elect the leader and
|
||||
# start a server.
|
||||
@@ -442,10 +442,13 @@ msg "mounting $T_NR_MOUNTS mounts on meta $T_META_DEVICE data $T_DATA_DEVICE"
|
||||
pids=""
|
||||
for i in $(seq 0 $((T_NR_MOUNTS - 1))); do
|
||||
|
||||
meta_dev=$(losetup --find --show $T_META_DEVICE)
|
||||
test -b "$meta_dev" || die "failed to create temp device $meta_dev"
|
||||
data_dev=$(losetup --find --show $T_DATA_DEVICE)
|
||||
test -b "$data_dev" || die "failed to create temp device $data_dev"
|
||||
name="_scoutfs_test_meta_$i"
|
||||
cmd dmsetup create "$name" --table "$(cat $T_RESULTS/dmtable.meta)"
|
||||
meta_dev="/dev/mapper/$name"
|
||||
|
||||
name="_scoutfs_test_data_$i"
|
||||
cmd dmsetup create "$name" --table "$(cat $T_RESULTS/dmtable.data)"
|
||||
data_dev="/dev/mapper/$name"
|
||||
|
||||
dir="/mnt/test.$i"
|
||||
test -d "$dir" || cmd mkdir -p "$dir"
|
||||
|
||||
@@ -14,6 +14,7 @@ offline-extent-waiting.sh
|
||||
move-blocks.sh
|
||||
large-fragmented-free.sh
|
||||
enospc.sh
|
||||
srch-safe-merge-pos.sh
|
||||
srch-basic-functionality.sh
|
||||
simple-xattr-unit.sh
|
||||
totl-xattr-tag.sh
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
#include <errno.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
@@ -35,10 +36,10 @@ struct opts {
|
||||
unsigned int dry_run:1,
|
||||
ls_output:1,
|
||||
quiet:1,
|
||||
user_xattr:1,
|
||||
same_srch_xattr:1,
|
||||
group_srch_xattr:1,
|
||||
unique_srch_xattr:1;
|
||||
xattr_set:1,
|
||||
xattr_file:1,
|
||||
xattr_group:1;
|
||||
char *xattr_name;
|
||||
};
|
||||
|
||||
struct stats {
|
||||
@@ -149,12 +150,31 @@ static void free_dir(struct dir *dir)
|
||||
free(dir);
|
||||
}
|
||||
|
||||
static size_t snprintf_off(void *buf, size_t sz, size_t off, char *fmt, ...)
|
||||
{
|
||||
va_list ap;
|
||||
int ret;
|
||||
|
||||
if (off >= sz)
|
||||
return sz;
|
||||
|
||||
va_start(ap, fmt);
|
||||
ret = vsnprintf(buf + off, sz - off, fmt, ap);
|
||||
va_end(ap);
|
||||
|
||||
if (ret <= 0)
|
||||
return sz;
|
||||
|
||||
return off + ret;
|
||||
}
|
||||
|
||||
static void create_dir(struct dir *dir, struct opts *opts,
|
||||
struct stats *stats)
|
||||
{
|
||||
struct str_list *s;
|
||||
char name[100];
|
||||
char name[256]; /* max len and null term */
|
||||
char val = 'v';
|
||||
size_t off;
|
||||
int rc;
|
||||
int i;
|
||||
|
||||
@@ -175,29 +195,21 @@ static void create_dir(struct dir *dir, struct opts *opts,
|
||||
rc = mknod(s->str, S_IFREG | 0644, 0);
|
||||
error_exit(rc, "mknod %s failed"ERRF, s->str, ERRA);
|
||||
|
||||
rc = 0;
|
||||
if (rc == 0 && opts->user_xattr) {
|
||||
strcpy(name, "user.scoutfs_bcp");
|
||||
rc = setxattr(s->str, name, &val, 1, 0);
|
||||
}
|
||||
if (rc == 0 && opts->same_srch_xattr) {
|
||||
strcpy(name, "scoutfs.srch.scoutfs_bcp");
|
||||
rc = setxattr(s->str, name, &val, 1, 0);
|
||||
}
|
||||
if (rc == 0 && opts->group_srch_xattr) {
|
||||
snprintf(name, sizeof(name),
|
||||
"scoutfs.srch.scoutfs_bcp.group.%lu",
|
||||
stats->files / 10000);
|
||||
rc = setxattr(s->str, name, &val, 1, 0);
|
||||
}
|
||||
if (rc == 0 && opts->unique_srch_xattr) {
|
||||
snprintf(name, sizeof(name),
|
||||
"scoutfs.srch.scoutfs_bcp.unique.%lu",
|
||||
stats->files);
|
||||
if (opts->xattr_set) {
|
||||
off = snprintf_off(name, sizeof(name), 0, "%s", opts->xattr_name);
|
||||
if (opts->xattr_file)
|
||||
off = snprintf_off(name, sizeof(name), off,
|
||||
"-f-%lu", stats->files);
|
||||
if (opts->xattr_group)
|
||||
off = snprintf_off(name, sizeof(name), off,
|
||||
"-g-%lu", stats->files / 10000);
|
||||
|
||||
error_exit(off >= sizeof(name), "xattr name longer than 255 bytes");
|
||||
|
||||
rc = setxattr(s->str, name, &val, 1, 0);
|
||||
error_exit(rc, "setxattr %s %s failed"ERRF, s->str, name, ERRA);
|
||||
}
|
||||
|
||||
error_exit(rc, "setxattr %s %s failed"ERRF, s->str, name, ERRA);
|
||||
|
||||
stats->files++;
|
||||
rate_banner(opts, stats);
|
||||
@@ -365,11 +377,10 @@ static void usage(void)
|
||||
" -d DIR | create all files in DIR top level directory\n"
|
||||
" -n | dry run, only parse, don't create any files\n"
|
||||
" -q | quiet, don't regularly print rates\n"
|
||||
" -F | append \"-f-NR\" file nr to xattr name, requires -X\n"
|
||||
" -G | append \"-g-NR\" file nr/10000 to xattr name, requires -X\n"
|
||||
" -L | parse ls output; only reg, skip meta, paths at ./\n"
|
||||
" -X | set the same user. xattr name in all files\n"
|
||||
" -S | set the same .srch. xattr name in all files\n"
|
||||
" -G | set a .srch. xattr name shared by groups of files\n"
|
||||
" -U | set a unique .srch. xattr name in all files\n");
|
||||
" -X NAM | set named xattr in all files\n");
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
@@ -386,7 +397,7 @@ int main(int argc, char **argv)
|
||||
|
||||
memset(&opts, 0, sizeof(opts));
|
||||
|
||||
while ((c = getopt(argc, argv, "d:nqLXSGU")) != -1) {
|
||||
while ((c = getopt(argc, argv, "d:nqFGLX:")) != -1) {
|
||||
switch(c) {
|
||||
case 'd':
|
||||
top_dir = strdup(optarg);
|
||||
@@ -397,20 +408,19 @@ int main(int argc, char **argv)
|
||||
case 'q':
|
||||
opts.quiet = 1;
|
||||
break;
|
||||
case 'F':
|
||||
opts.xattr_file = 1;
|
||||
break;
|
||||
case 'G':
|
||||
opts.xattr_group = 1;
|
||||
break;
|
||||
case 'L':
|
||||
opts.ls_output = 1;
|
||||
break;
|
||||
case 'X':
|
||||
opts.user_xattr = 1;
|
||||
break;
|
||||
case 'S':
|
||||
opts.same_srch_xattr = 1;
|
||||
break;
|
||||
case 'G':
|
||||
opts.group_srch_xattr = 1;
|
||||
break;
|
||||
case 'U':
|
||||
opts.unique_srch_xattr = 1;
|
||||
opts.xattr_set = 1;
|
||||
opts.xattr_name = strdup(optarg);
|
||||
error_exit(!opts.xattr_name, "error allocating xattr name");
|
||||
break;
|
||||
case '?':
|
||||
printf("Unknown option '%c'\n", optopt);
|
||||
@@ -419,6 +429,11 @@ int main(int argc, char **argv)
|
||||
}
|
||||
}
|
||||
|
||||
error_exit(opts.xattr_file && !opts.xattr_set,
|
||||
"must specify xattr -X when appending file nr with -F");
|
||||
error_exit(opts.xattr_group && !opts.xattr_set,
|
||||
"must specify xattr -X when appending file nr with -G");
|
||||
|
||||
if (!opts.dry_run) {
|
||||
error_exit(!top_dir,
|
||||
"must specify top level directory with -d");
|
||||
|
||||
@@ -11,8 +11,13 @@ FILE="$T_D0/file"
|
||||
# final block as we truncated past it.
|
||||
#
|
||||
echo "== truncate writes zeroed partial end of file block"
|
||||
yes | dd of="$FILE" bs=8K count=1 status=none
|
||||
yes | dd of="$FILE" bs=8K count=1 status=none iflag=fullblock
|
||||
sync
|
||||
|
||||
# not passing iflag=fullblock causes the file occasionally to just be
|
||||
# 4K, so just to be safe we should at least check size once
|
||||
test `stat --printf="%s\n" "$FILE"` -eq 8192 || t_fail "test file incorrect start size"
|
||||
|
||||
truncate -s 6K "$FILE"
|
||||
truncate -s 12K "$FILE"
|
||||
echo 3 > /proc/sys/vm/drop_caches
|
||||
|
||||
@@ -7,9 +7,11 @@ t_require_mounts 2
|
||||
|
||||
COUNT=50000
|
||||
|
||||
# Prep dirs for test. Each mount needs to make their own parent dir for
|
||||
# the createmany run, otherwise both dirs will end up in the same inode
|
||||
# group, causing updates to bounce that lock around.
|
||||
#
|
||||
# Prep dirs for test. We have per-directory inode number allocators so
|
||||
# by putting each createmany in a per-mount dir they get their own inode
|
||||
# number region and cluster locks.
|
||||
#
|
||||
echo "== measure initial createmany"
|
||||
mkdir -p $T_D0/dir/0
|
||||
mkdir $T_D1/dir/1
|
||||
@@ -17,18 +19,20 @@ mkdir $T_D1/dir/1
|
||||
echo "== measure initial createmany"
|
||||
START=$SECONDS
|
||||
createmany -o "$T_D0/file_" $COUNT >> $T_TMP.full
|
||||
sync
|
||||
SINGLE=$((SECONDS - START))
|
||||
echo single $SINGLE >> $T_TMP.full
|
||||
|
||||
echo "== measure two concurrent createmany runs"
|
||||
START=$SECONDS
|
||||
createmany -o $T_D0/dir/0/file $COUNT > /dev/null &
|
||||
(cd $T_D0/dir/0; createmany -o ./file_ $COUNT > /dev/null) &
|
||||
pids="$!"
|
||||
createmany -o $T_D1/dir/1/file $COUNT > /dev/null &
|
||||
(cd $T_D1/dir/1; createmany -o ./file_ $COUNT > /dev/null) &
|
||||
pids="$pids $!"
|
||||
for p in $pids; do
|
||||
wait $p
|
||||
done
|
||||
sync
|
||||
BOTH=$((SECONDS - START))
|
||||
echo both $BOTH >> $T_TMP.full
|
||||
|
||||
@@ -41,7 +45,10 @@ echo both $BOTH >> $T_TMP.full
|
||||
# synchronized operation.
|
||||
FACTOR=200
|
||||
if [ "$BOTH" -gt $(($SINGLE*$FACTOR)) ]; then
|
||||
echo "both createmany took $BOTH sec, more than $FACTOR x single $SINGLE sec"
|
||||
t_fail "both createmany took $BOTH sec, more than $FACTOR x single $SINGLE sec"
|
||||
fi
|
||||
|
||||
echo "== cleanup"
|
||||
find $T_D0/dir -delete
|
||||
|
||||
t_pass
|
||||
|
||||
@@ -10,6 +10,30 @@ EXTENTS_PER_BTREE_BLOCK=600
|
||||
EXTENTS_PER_LIST_BLOCK=8192
|
||||
FREED_EXTENTS=$((EXTENTS_PER_BTREE_BLOCK * EXTENTS_PER_LIST_BLOCK))
|
||||
|
||||
#
|
||||
# This test specifically creates a pathologically sparse file that will
|
||||
# be as expensive as possible to free. This is usually fine on
|
||||
# dedicated or reasonable hardware, but trying to run this in
|
||||
# virtualized debug kernels can take a very long time. This test is
|
||||
# about making sure that the server doesn't fail, not that the platform
|
||||
# can handle the scale of work that our btree formats happen to require
|
||||
# while execution is bogged down with use-after-free memory reference
|
||||
# tracking. So we give the test a lot more breathing room before
|
||||
# deciding that its hung.
|
||||
#
|
||||
echo "== setting longer hung task timeout"
|
||||
if [ -w /proc/sys/kernel/hung_task_timeout_secs ]; then
|
||||
secs=$(cat /proc/sys/kernel/hung_task_timeout_secs)
|
||||
test "$secs" -gt 0 || \
|
||||
t_fail "confusing value '$secs' from /proc/sys/kernel/hung_task_timeout_secs"
|
||||
restore_hung_task_timeout()
|
||||
{
|
||||
echo "$secs" > /proc/sys/kernel/hung_task_timeout_secs
|
||||
}
|
||||
trap restore_hung_task_timeout EXIT
|
||||
echo "$((secs * 5))" > /proc/sys/kernel/hung_task_timeout_secs
|
||||
fi
|
||||
|
||||
echo "== creating fragmented extents"
|
||||
fragmented_data_extents $FREED_EXTENTS $EXTENTS_PER_BTREE_BLOCK "$T_D0/alloc" "$T_D0/move"
|
||||
|
||||
|
||||
@@ -9,6 +9,7 @@ LOG=340000
|
||||
LIM=1000000
|
||||
|
||||
SEQF="%.20g"
|
||||
SXA="scoutfs.srch.test-srch-basic-functionality"
|
||||
|
||||
t_require_commands touch rm setfattr scoutfs find_xattrs
|
||||
|
||||
@@ -27,20 +28,20 @@ diff_srch_find()
|
||||
|
||||
echo "== create new xattrs"
|
||||
touch "$T_D0/"{create,update}
|
||||
setfattr -n scoutfs.srch.test -v 1 "$T_D0/"{create,update} 2>&1 | t_filter_fs
|
||||
diff_srch_find scoutfs.srch.test
|
||||
setfattr -n $SXA -v 1 "$T_D0/"{create,update} 2>&1 | t_filter_fs
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== update existing xattr"
|
||||
setfattr -n scoutfs.srch.test -v 2 "$T_D0/update" 2>&1 | t_filter_fs
|
||||
diff_srch_find scoutfs.srch.test
|
||||
setfattr -n $SXA -v 2 "$T_D0/update" 2>&1 | t_filter_fs
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== remove an xattr"
|
||||
setfattr -x scoutfs.srch.test "$T_D0/create" 2>&1 | t_filter_fs
|
||||
diff_srch_find scoutfs.srch.test
|
||||
setfattr -x $SXA "$T_D0/create" 2>&1 | t_filter_fs
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== remove xattr with files"
|
||||
rm -f "$T_D0/"{create,update}
|
||||
diff_srch_find scoutfs.srch.test
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== trigger small log merges by rotating single block with unmount"
|
||||
sv=$(t_server_nr)
|
||||
@@ -56,7 +57,7 @@ while [ "$i" -lt "8" ]; do
|
||||
|
||||
eval path="\$T_D${nr}/single-block-$i"
|
||||
touch "$path"
|
||||
setfattr -n scoutfs.srch.single-block-logs -v $i "$path"
|
||||
setfattr -n $SXA -v $i "$path"
|
||||
t_umount $nr
|
||||
t_mount $nr
|
||||
|
||||
@@ -65,51 +66,51 @@ while [ "$i" -lt "8" ]; do
|
||||
done
|
||||
# wait for srch compaction worker delay
|
||||
sleep 10
|
||||
rm -rf "$T_D0/single-block-*"
|
||||
find "$T_D0" -type f -name 'single-block-*' -delete
|
||||
|
||||
echo "== create entries in current log"
|
||||
DIR="$T_D0/dir"
|
||||
NR=$((LOG / 4))
|
||||
mkdir -p "$DIR"
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -S -d "$DIR" > /dev/null
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -X $SXA -d "$DIR" > /dev/null
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== delete small fraction"
|
||||
seq -f "$DIR/f-$SEQF" 1 7 $NR | xargs setfattr -x scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "$DIR/f-$SEQF" 1 7 $NR | xargs setfattr -x $SXA
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== remove files"
|
||||
rm -rf "$DIR"
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== create entries that exceed one log"
|
||||
NR=$((LOG * 3 / 2))
|
||||
mkdir -p "$DIR"
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -S -d "$DIR" > /dev/null
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -X $SXA -d "$DIR" > /dev/null
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== delete fractions in phases"
|
||||
for i in $(seq 1 3); do
|
||||
seq -f "$DIR/f-$SEQF" $i 3 $NR | xargs setfattr -x scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "$DIR/f-$SEQF" $i 3 $NR | xargs setfattr -x $SXA
|
||||
diff_srch_find $SXA
|
||||
done
|
||||
|
||||
echo "== remove files"
|
||||
rm -rf "$DIR"
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== create entries for exceed search entry limit"
|
||||
NR=$((LIM * 3 / 2))
|
||||
mkdir -p "$DIR"
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -S -d "$DIR" > /dev/null
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "f-$SEQF" 1 $NR | src/bulk_create_paths -X $SXA -d "$DIR" > /dev/null
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== delete half"
|
||||
seq -f "$DIR/f-$SEQF" 1 2 $NR | xargs setfattr -x scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
seq -f "$DIR/f-$SEQF" 1 2 $NR | xargs setfattr -x $SXA
|
||||
diff_srch_find $SXA
|
||||
|
||||
echo "== entirely remove third batch"
|
||||
rm -rf "$DIR"
|
||||
diff_srch_find scoutfs.srch.scoutfs_bcp
|
||||
diff_srch_find $SXA
|
||||
|
||||
t_pass
|
||||
|
||||
90
tests/tests/srch-safe-merge-pos.sh
Normal file
90
tests/tests/srch-safe-merge-pos.sh
Normal file
@@ -0,0 +1,90 @@
|
||||
#
|
||||
# There was a bug where srch file compaction could get stuck if a
|
||||
# partial compaction finished at the specific _SAFE_BYTES offset in a
|
||||
# block. Resuming from that position would return an error and
|
||||
# compaction would stop making forward progress.
|
||||
#
|
||||
# We use triggers to pad the output of log compaction to end on the safe
|
||||
# offset and then cause compaction of those padded inputs to stop at the
|
||||
# safe offset. Continuation will either succeed or return errors.
|
||||
#
|
||||
|
||||
# forcing rotation, so just a few
|
||||
NR=10
|
||||
SEQF="%.20g"
|
||||
COMPACT_NR=4
|
||||
|
||||
echo "== initialize per-mount values"
|
||||
declare -a err
|
||||
declare -a compact_delay
|
||||
for nr in $(t_fs_nrs); do
|
||||
err[$nr]=$(t_counter srch_compact_error $nr)
|
||||
compact_delay[$nr]=$(cat $(t_sysfs_path $nr)/srch/compact_delay_ms)
|
||||
done
|
||||
restore_compact_delay()
|
||||
{
|
||||
for nr in $(t_fs_nrs); do
|
||||
echo ${compact_delay[$nr]} > $(t_sysfs_path $nr)/srch/compact_delay_ms
|
||||
done
|
||||
}
|
||||
trap restore_compact_delay EXIT
|
||||
|
||||
echo "== arm compaction triggers"
|
||||
for nr in $(t_fs_nrs); do
|
||||
t_trigger_arm srch_compact_logs_pad_safe $nr
|
||||
t_trigger_arm srch_merge_stop_safe $nr
|
||||
done
|
||||
|
||||
echo "== compact more often"
|
||||
for nr in $(t_fs_nrs); do
|
||||
echo 1000 > $(t_sysfs_path $nr)/srch/compact_delay_ms
|
||||
done
|
||||
|
||||
echo "== create padded sorted inputs by forcing log rotation"
|
||||
sv=$(t_server_nr)
|
||||
for i in $(seq 1 $COMPACT_NR); do
|
||||
for j in $(seq 1 $COMPACT_NR); do
|
||||
t_trigger_arm srch_force_log_rotate $sv
|
||||
|
||||
seq -f "f-$i-$j-$SEQF" 1 10 | \
|
||||
bulk_create_paths -X "scoutfs.srch.t-srch-safe-merge-pos" -d "$T_D0" > \
|
||||
/dev/null
|
||||
sync
|
||||
|
||||
test "$(t_trigger_get srch_force_log_rotate $sv)" == "0" || \
|
||||
t_fail "srch_force_log_rotate didn't trigger"
|
||||
done
|
||||
|
||||
padded=0
|
||||
while test $padded == 0 && sleep .5; do
|
||||
for nr in $(t_fs_nrs); do
|
||||
if [ "$(t_trigger_get srch_compact_logs_pad_safe $nr)" == "0" ]; then
|
||||
t_trigger_arm srch_compact_logs_pad_safe $nr
|
||||
padded=1
|
||||
break
|
||||
fi
|
||||
test "$(t_counter srch_compact_error $nr)" == "${err[$nr]}" || \
|
||||
t_fail "srch_compact_error counter increased on mount $nr"
|
||||
done
|
||||
done
|
||||
done
|
||||
|
||||
echo "== compaction of padded should stop at safe"
|
||||
sleep 2
|
||||
for nr in $(t_fs_nrs); do
|
||||
if [ "$(t_trigger_get srch_merge_stop_safe $nr)" == "0" ]; then
|
||||
break
|
||||
fi
|
||||
done
|
||||
|
||||
echo "== verify no compaction errors"
|
||||
sleep 2
|
||||
for nr in $(t_fs_nrs); do
|
||||
test "$(t_counter srch_compact_error $nr)" == "${err[$nr]}" || \
|
||||
t_fail "srch_compact_error counter increased on mount $nr"
|
||||
done
|
||||
|
||||
echo "== cleanup"
|
||||
find "$T_D0" -type f -name 'f-*' -delete
|
||||
|
||||
t_pass
|
||||
Reference in New Issue
Block a user