mirror of
https://github.com/versity/scoutfs.git
synced 2026-02-04 01:32:04 +00:00
Don't emit empty blocks in kway_merge()
It's possible for a srch compaction to collapse down to nothing if given evenly paired create/delete entries. In this case, we were emitting an empty block. This could cause problems for search_sorted_file(), which assumes that every block it sees has a valid first and last entry. Fix this by keeping a temp entry and only emitting it if it differs from the next entry in the block. Be sure to flush out a straggling temp entry if we have one when we're done with the last block of the merge. Signed-off-by: Chris Kirby <ckirby@versity.com>
This commit is contained in:
@@ -2660,6 +2660,110 @@ TRACE_EVENT(scoutfs_get_file_block,
|
||||
__entry->last_hash, __entry->last_ino, __entry->last_id)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_srch_new_merge,
|
||||
TP_PROTO(struct super_block *sb),
|
||||
|
||||
TP_ARGS(sb),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
),
|
||||
|
||||
TP_printk(SCSBF, SCSB_TRACE_ARGS)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_srch_emit_entry,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *sre,
|
||||
struct scoutfs_srch_block *srb, u64 blkno),
|
||||
|
||||
TP_ARGS(sb, sre, srb, blkno),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u32, entry_nr)
|
||||
__field(__u64, blkno)
|
||||
__field(__u64, hash)
|
||||
__field(__u64, ino)
|
||||
__field(__u64, id)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->entry_nr = __le32_to_cpu(srb->entry_nr);
|
||||
__entry->blkno = blkno;
|
||||
__entry->hash = __le64_to_cpu(sre->hash);
|
||||
__entry->ino = __le64_to_cpu(sre->ino);
|
||||
__entry->id = __le64_to_cpu(sre->id);
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" nr %u blkno %llu hash 0x%llx ino %llu id 0x%llx",
|
||||
SCSB_TRACE_ARGS, __entry->entry_nr, __entry->blkno,
|
||||
__entry->hash, __entry->ino, __entry->id)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_srch_clr_tmp,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *tmp),
|
||||
|
||||
TP_ARGS(sb, tmp),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, tmp_hash)
|
||||
__field(__u64, tmp_ino)
|
||||
__field(__u64, tmp_id)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->tmp_hash = __le64_to_cpu(tmp->hash);
|
||||
__entry->tmp_ino = __le64_to_cpu(tmp->ino);
|
||||
__entry->tmp_id = __le64_to_cpu(tmp->id);
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" tmp hash 0x%llx tmp ino %llu tmp hash 0x%llx",
|
||||
SCSB_TRACE_ARGS,
|
||||
__entry->tmp_hash, __entry->tmp_ino, __entry->tmp_id)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_srch_cmp,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_srch_entry *root,
|
||||
struct scoutfs_srch_entry *tmp, void *bl),
|
||||
|
||||
TP_ARGS(sb, root, tmp, bl),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, root_hash)
|
||||
__field(__u64, root_ino)
|
||||
__field(__u64, root_id)
|
||||
__field(__u64, tmp_hash)
|
||||
__field(__u64, tmp_ino)
|
||||
__field(__u64, tmp_id)
|
||||
__field(void *, bl)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->root_hash = __le64_to_cpu(root->hash);
|
||||
__entry->root_ino = __le64_to_cpu(root->ino);
|
||||
__entry->root_id = __le64_to_cpu(root->id);
|
||||
__entry->tmp_hash = __le64_to_cpu(tmp->hash);
|
||||
__entry->tmp_ino = __le64_to_cpu(tmp->ino);
|
||||
__entry->tmp_id = __le64_to_cpu(tmp->id);
|
||||
__entry->bl = bl;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" root hash 0x%llx root ino %llu root id 0x%llx tmp hash 0x%llx tmp ino %llu tmp hash 0x%llx, bl %p",
|
||||
SCSB_TRACE_ARGS,
|
||||
__entry->root_hash, __entry->root_ino, __entry->root_id,
|
||||
__entry->tmp_hash, __entry->tmp_ino, __entry->tmp_id,
|
||||
__entry->bl)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_block_stale,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_block_ref *ref,
|
||||
struct scoutfs_block_header *hdr, u32 magic, u32 crc),
|
||||
|
||||
219
kmod/src/srch.c
219
kmod/src/srch.c
@@ -1525,6 +1525,66 @@ static bool should_commit(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
scoutfs_alloc_meta_low(sb, alloc, nr);
|
||||
}
|
||||
|
||||
static int alloc_srch_block(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
struct scoutfs_block_writer *wri,
|
||||
struct scoutfs_srch_file *sfl,
|
||||
struct scoutfs_block **bl,
|
||||
u64 blk)
|
||||
{
|
||||
DECLARE_SRCH_INFO(sb, srinf);
|
||||
int ret;
|
||||
|
||||
if (atomic_read(&srinf->shutdown))
|
||||
return -ESHUTDOWN;
|
||||
|
||||
/* could grow and dirty to a leaf */
|
||||
if (should_commit(sb, alloc, wri, sfl->height + 1))
|
||||
return -EAGAIN;
|
||||
|
||||
ret = get_file_block(sb, alloc, wri, sfl, GFB_INSERT | GFB_DIRTY,
|
||||
blk, bl);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
|
||||
scoutfs_inc_counter(sb, srch_compact_dirty_block);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int emit_srch_entry(struct super_block *sb,
|
||||
struct scoutfs_srch_file *sfl,
|
||||
struct scoutfs_srch_block *srb,
|
||||
struct scoutfs_srch_entry *sre,
|
||||
u64 blk)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = encode_entry(srb->entries + le32_to_cpu(srb->entry_bytes),
|
||||
sre, &srb->tail);
|
||||
if (WARN_ON_ONCE(ret <= 0)) {
|
||||
/* shouldn't happen */
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
if (srb->entry_bytes == 0) {
|
||||
if (blk == 0)
|
||||
sfl->first = *sre;
|
||||
srb->first = *sre;
|
||||
}
|
||||
|
||||
le32_add_cpu(&srb->entry_nr, 1);
|
||||
le32_add_cpu(&srb->entry_bytes, ret);
|
||||
srb->last = *sre;
|
||||
srb->tail = *sre;
|
||||
sfl->last = *sre;
|
||||
le64_add_cpu(&sfl->entries, 1);
|
||||
|
||||
scoutfs_inc_counter(sb, srch_compact_entry);
|
||||
trace_scoutfs_srch_emit_entry(sb, sre, srb, blk);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
struct tourn_node {
|
||||
struct scoutfs_srch_entry sre;
|
||||
int ind;
|
||||
@@ -1559,20 +1619,18 @@ static int kway_merge(struct super_block *sb,
|
||||
kway_get_t kway_get, kway_advance_t kway_adv,
|
||||
void **args, int nr, bool logs_input)
|
||||
{
|
||||
DECLARE_SRCH_INFO(sb, srinf);
|
||||
struct scoutfs_srch_block *srb = NULL;
|
||||
struct scoutfs_srch_entry last_tail;
|
||||
struct scoutfs_srch_entry tmp_entry = {0};
|
||||
struct scoutfs_block *bl = NULL;
|
||||
struct tourn_node *tnodes;
|
||||
struct tourn_node *leaves;
|
||||
struct tourn_node *root;
|
||||
struct tourn_node *tn;
|
||||
int last_bytes = 0;
|
||||
bool have_tmp = false;
|
||||
int nr_parents;
|
||||
int nr_nodes;
|
||||
int empty = 0;
|
||||
int ret = 0;
|
||||
int diff;
|
||||
u64 blk;
|
||||
int ind;
|
||||
int i;
|
||||
@@ -1606,97 +1664,73 @@ static int kway_merge(struct super_block *sb,
|
||||
}
|
||||
}
|
||||
|
||||
trace_scoutfs_srch_new_merge(sb);
|
||||
|
||||
/* always append new blocks */
|
||||
blk = le64_to_cpu(sfl->blocks);
|
||||
while (empty < nr) {
|
||||
if (bl == NULL) {
|
||||
if (atomic_read(&srinf->shutdown)) {
|
||||
ret = -ESHUTDOWN;
|
||||
goto out;
|
||||
trace_scoutfs_srch_cmp(sb, &root->sre, &tmp_entry, bl);
|
||||
|
||||
if (sre_cmp(&root->sre, &tmp_entry) != 0) {
|
||||
if (have_tmp) {
|
||||
if (bl == NULL) {
|
||||
ret = alloc_srch_block(sb, alloc, wri,
|
||||
sfl, &bl, blk);
|
||||
if (ret < 0) {
|
||||
if (ret == -EAGAIN)
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
srb = bl->data;
|
||||
}
|
||||
|
||||
ret = emit_srch_entry(sb, sfl, srb, &tmp_entry,
|
||||
blk);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (le32_to_cpu(srb->entry_bytes) >
|
||||
SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
blk++;
|
||||
memset(&tmp_entry, 0, sizeof(tmp_entry));
|
||||
have_tmp = false;
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* end sorted block on _SAFE offset for
|
||||
* testing
|
||||
*/
|
||||
if (bl && le32_to_cpu(srb->entry_nr) == 1 &&
|
||||
logs_input &&
|
||||
scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) {
|
||||
pad_entries_at_safe(sfl, srb);
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
blk++;
|
||||
|
||||
memset(&tmp_entry, 0, sizeof(tmp_entry));
|
||||
have_tmp = false;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
/* could grow and dirty to a leaf */
|
||||
if (should_commit(sb, alloc, wri, sfl->height + 1)) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = get_file_block(sb, alloc, wri, sfl,
|
||||
GFB_INSERT | GFB_DIRTY, blk, &bl);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
srb = bl->data;
|
||||
scoutfs_inc_counter(sb, srch_compact_dirty_block);
|
||||
}
|
||||
|
||||
if (sre_cmp(&root->sre, &srb->last) != 0) {
|
||||
last_bytes = le32_to_cpu(srb->entry_bytes);
|
||||
last_tail = srb->last;
|
||||
ret = encode_entry(srb->entries +
|
||||
le32_to_cpu(srb->entry_bytes),
|
||||
&root->sre, &srb->tail);
|
||||
if (WARN_ON_ONCE(ret <= 0)) {
|
||||
/* shouldn't happen */
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (srb->entry_bytes == 0) {
|
||||
if (blk == 0)
|
||||
sfl->first = root->sre;
|
||||
srb->first = root->sre;
|
||||
}
|
||||
le32_add_cpu(&srb->entry_nr, 1);
|
||||
le32_add_cpu(&srb->entry_bytes, ret);
|
||||
srb->last = root->sre;
|
||||
srb->tail = root->sre;
|
||||
sfl->last = root->sre;
|
||||
le64_add_cpu(&sfl->entries, 1);
|
||||
ret = 0;
|
||||
|
||||
if (le32_to_cpu(srb->entry_bytes) >
|
||||
SCOUTFS_SRCH_BLOCK_SAFE_BYTES) {
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
blk++;
|
||||
}
|
||||
|
||||
/* end sorted block on _SAFE offset for testing */
|
||||
if (bl && le32_to_cpu(srb->entry_nr) == 1 && logs_input &&
|
||||
scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) {
|
||||
pad_entries_at_safe(sfl, srb);
|
||||
scoutfs_block_put(sb, bl);
|
||||
bl = NULL;
|
||||
blk++;
|
||||
}
|
||||
|
||||
scoutfs_inc_counter(sb, srch_compact_entry);
|
||||
|
||||
tmp_entry = root->sre;
|
||||
have_tmp = true;
|
||||
} else {
|
||||
/*
|
||||
* Duplicate entries indicate deletion so we
|
||||
* undo the previously encoded entry and ignore
|
||||
* undo the previously cached tmp entry and ignore
|
||||
* this entry. This only happens within each
|
||||
* block. Deletions can span block boundaries
|
||||
* and will be filtered out by search and
|
||||
* hopefully removed in future compactions.
|
||||
*/
|
||||
diff = le32_to_cpu(srb->entry_bytes) - last_bytes;
|
||||
if (diff) {
|
||||
memset(srb->entries + last_bytes, 0, diff);
|
||||
if (srb->entry_bytes == 0) {
|
||||
/* last_tail will be 0 */
|
||||
if (blk == 0)
|
||||
sfl->first = last_tail;
|
||||
srb->first = last_tail;
|
||||
}
|
||||
le32_add_cpu(&srb->entry_nr, -1);
|
||||
srb->entry_bytes = cpu_to_le32(last_bytes);
|
||||
srb->last = last_tail;
|
||||
srb->tail = last_tail;
|
||||
sfl->last = last_tail;
|
||||
le64_add_cpu(&sfl->entries, -1);
|
||||
}
|
||||
trace_scoutfs_srch_clr_tmp(sb, &tmp_entry);
|
||||
memset(&tmp_entry, 0, sizeof(tmp_entry));
|
||||
have_tmp = false;
|
||||
|
||||
scoutfs_inc_counter(sb, srch_compact_removed_entry);
|
||||
}
|
||||
@@ -1739,6 +1773,22 @@ static int kway_merge(struct super_block *sb,
|
||||
/* could stream a final index.. arguably a small portion of work */
|
||||
|
||||
out:
|
||||
if (have_tmp) {
|
||||
bool emit = true;
|
||||
|
||||
if (bl == NULL) {
|
||||
ret = alloc_srch_block(sb, alloc, wri, sfl, &bl, blk);
|
||||
if (ret) {
|
||||
emit = false;
|
||||
if (ret == -EAGAIN)
|
||||
ret = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (emit)
|
||||
ret = emit_srch_entry(sb, sfl, srb, &tmp_entry, blk);
|
||||
}
|
||||
|
||||
scoutfs_block_put(sb, bl);
|
||||
vfree(tnodes);
|
||||
return ret;
|
||||
@@ -1982,6 +2032,11 @@ static int kway_get_reader(struct super_block *sb,
|
||||
rdr->skip > SCOUTFS_SRCH_BLOCK_SAFE_BYTES ||
|
||||
rdr->skip >= le32_to_cpu(srb->entry_bytes)) {
|
||||
/* XXX inconsistency */
|
||||
scoutfs_err(sb, "blkno %llu pos %u vs %ld, skip %u, bytes %u",
|
||||
__le64_to_cpu(srb->hdr.blkno),
|
||||
rdr->pos, SCOUTFS_SRCH_BLOCK_SAFE_BYTES,
|
||||
rdr->skip,
|
||||
le32_to_cpu(srb->entry_bytes));
|
||||
return -EIO;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user