From d5c699c3b432ed06a5cf49a791b1e32823e830cd Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 6 Nov 2023 14:02:08 -0800 Subject: [PATCH 1/4] Don't respond with ENOENT for no srch compaction The srch compaction request building function and the srch compaction worker both have logic to recognize a valid response with no input files indicating that there's no work to do. The server unfortunately translated nr == 0 into ENOENT and send that error response to the client. This caused the client to increment error counters in the common case when there's no compaction work to perform. We'd like the error counter to reflect actual errors, we're about to check it in a test, so let's fix this up to the server sends a sucessful response with nr == 0 to indicate that there's no work to do. Signed-off-by: Zach Brown --- kmod/src/server.c | 4 +--- kmod/src/srch.c | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/kmod/src/server.c b/kmod/src/server.c index 46d50b54..2593ef1a 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -1966,9 +1966,7 @@ static int server_srch_get_compact(struct super_block *sb, ret = scoutfs_srch_get_compact(sb, &server->alloc, &server->wri, &super->srch_root, rid, sc); mutex_unlock(&server->srch_mutex); - if (ret == 0 && sc->nr == 0) - ret = -ENOENT; - if (ret < 0) + if (ret < 0 || (ret == 0 && sc->nr == 0)) goto apply; mutex_lock(&server->alloc_mutex); diff --git a/kmod/src/srch.c b/kmod/src/srch.c index 36385ad9..49eb27b7 100644 --- a/kmod/src/srch.c +++ b/kmod/src/srch.c @@ -2179,7 +2179,7 @@ out: scoutfs_block_writer_forget_all(sb, &wri); if (!atomic_read(&srinf->shutdown)) { - delay = ret == 0 ? 0 : msecs_to_jiffies(SRCH_COMPACT_DELAY_MS); + delay = (sc->nr > 0 && ret == 0) ? 0 : msecs_to_jiffies(SRCH_COMPACT_DELAY_MS); queue_delayed_work(srinf->workq, &srinf->compact_dwork, delay); } From 77fbf9296805c683cfd97035a8b967d4c3ba7c58 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 7 Nov 2023 12:11:08 -0800 Subject: [PATCH 2/4] Add t_trigger_set helper Add a helper to arm or disarm a trigger with a value argument. Signed-off-by: Zach Brown --- tests/funcs/fs.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/funcs/fs.sh b/tests/funcs/fs.sh index b4442252..080b5c5f 100644 --- a/tests/funcs/fs.sh +++ b/tests/funcs/fs.sh @@ -265,6 +265,15 @@ t_trigger_get() { cat "$(t_trigger_path "$nr")/$which" } +t_trigger_set() { + local which="$1" + local nr="$2" + local val="$3" + local path=$(t_trigger_path "$nr") + + echo "$val" > "$path/$which" +} + t_trigger_show() { local which="$1" local string="$2" @@ -276,9 +285,8 @@ t_trigger_show() { t_trigger_arm_silent() { local which="$1" local nr="$2" - local path=$(t_trigger_path "$nr") - echo 1 > "$path/$which" + t_trigger_set "$which" "$nr" 1 } t_trigger_arm() { From 21c070b42df1d3253ce235089b11dd7c54d70987 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Wed, 1 Nov 2023 10:05:06 -0700 Subject: [PATCH 3/4] Add test for srch continutation safe pos errors Add a test for srch compaction getting stuck hitting errors continuing a partial operation. It ensures that a block has an encoded entry at the _SAFE_BYTES offset, that an operaton stops precisely at that offset, and then watches for errors. Signed-off-by: Zach Brown --- kmod/src/srch.c | 116 ++++++++++++++++++++++++++++- kmod/src/triggers.c | 3 + kmod/src/triggers.h | 3 + tests/golden/srch-safe-merge-pos | 81 ++++++++++++++++++++ tests/sequence | 1 + tests/tests/srch-safe-merge-pos.sh | 69 +++++++++++++++++ 6 files changed, 270 insertions(+), 3 deletions(-) create mode 100644 tests/golden/srch-safe-merge-pos create mode 100644 tests/tests/srch-safe-merge-pos.sh diff --git a/kmod/src/srch.c b/kmod/src/srch.c index 49eb27b7..7af54e82 100644 --- a/kmod/src/srch.c +++ b/kmod/src/srch.c @@ -30,6 +30,7 @@ #include "client.h" #include "counters.h" #include "scoutfs_trace.h" +#include "triggers.h" /* * This srch subsystem gives us a way to find inodes that have a given @@ -520,6 +521,95 @@ out: return ret; } +/* + * Padded entries are encoded in pairs after an existing entry. All of + * the pairs cancel each other out by all readers (the second encoding + * looks like deletion) so they aren't visible to the first/last bounds of + * the block or file. + */ +static int append_padded_entry(struct scoutfs_srch_file *sfl, u64 blk, + struct scoutfs_srch_block *srb, struct scoutfs_srch_entry *sre) +{ + int ret; + + ret = encode_entry(srb->entries + le32_to_cpu(srb->entry_bytes), + sre, &srb->tail); + if (ret > 0) { + srb->tail = *sre; + le32_add_cpu(&srb->entry_nr, 1); + le32_add_cpu(&srb->entry_bytes, ret); + le64_add_cpu(&sfl->entries, 1); + ret = 0; + } + + return ret; +} + +/* + * This is called by a testing trigger to create a very specific case of + * encoded entry offsets. We want the last entry in the block to start + * precisely at the _SAFE_BYTES offset. + * + * This is called when there is a single existing entry in the block. + * We have the entire block to work with. We encode pairs of matching + * entries. This hides them from readers (both searches and merging) as + * they're interpreted as creation and deletion and are deleted. We use + * the existing hash value of the first entry in the block but then set + * the inode to an impossibly large number so it doesn't interfere with + * anything. + * + * To hit the specific offset we very carefully manage the amount of + * bytes of change between fields in the entry. We know that if we + * change all the byte of the ino and id we end up with a 20 byte + * (2+8+8,2) encoding of the pair of entries. To have the last entry + * start at the _SAFE_POS offset we know that the final 20 byte pair + * encoding needs to end at 2 bytes (second entry encoding) after the + * _SAFE_POS offset. + * + * So as we encode pairs we watch the delta of our current offset from + * that desired final offset of 2 past _SAFE_POS. If we're a multiple + * of 20 away then we encode the full 20 byte pairs. If we're not, then + * we drop a byte to encode 19 bytes. That'll slowly change the offset + * to be a multiple of 20 again while encoding large entries. + */ +static void pad_entries_at_safe(struct scoutfs_srch_file *sfl, u64 blk, + struct scoutfs_srch_block *srb) +{ + struct scoutfs_srch_entry sre; + u32 target; + s32 diff; + u64 hash; + u64 ino; + u64 id; + int ret; + + hash = le64_to_cpu(srb->tail.hash); + ino = le64_to_cpu(srb->tail.ino) | (1ULL << 62); + id = le64_to_cpu(srb->tail.id); + + target = SCOUTFS_SRCH_BLOCK_SAFE_BYTES + 2; + + while ((diff = target - le32_to_cpu(srb->entry_bytes)) > 0) { + ino ^= 1ULL << (7 * 8); + if (diff % 20 == 0) { + id ^= 1ULL << (7 * 8); + } else { + id ^= 1ULL << (6 * 8); + } + + sre.hash = cpu_to_le64(hash); + sre.ino = cpu_to_le64(ino); + sre.id = cpu_to_le64(id); + + ret = append_padded_entry(sfl, blk, srb, &sre); + if (ret == 0) + ret = append_padded_entry(sfl, blk, srb, &sre); + BUG_ON(ret != 0); + + diff = target - le32_to_cpu(srb->entry_bytes); + } +} + /* * The caller is dropping an ino/id because the tracking rbtree is full. * This loses information so we can't return any entries at or after the @@ -987,6 +1077,9 @@ int scoutfs_srch_rotate_log(struct super_block *sb, struct scoutfs_key key; int ret; + if (sfl->ref.blkno && !force && scoutfs_trigger(sb, SRCH_FORCE_LOG_ROTATE)) + force = true; + if (sfl->ref.blkno == 0 || (!force && le64_to_cpu(sfl->blocks) < SCOUTFS_SRCH_LOG_BLOCK_LIMIT)) return 0; @@ -1462,7 +1555,7 @@ static int kway_merge(struct super_block *sb, struct scoutfs_block_writer *wri, struct scoutfs_srch_file *sfl, kway_get_t kway_get, kway_advance_t kway_adv, - void **args, int nr) + void **args, int nr, bool logs_input) { DECLARE_SRCH_INFO(sb, srinf); struct scoutfs_srch_block *srb = NULL; @@ -1567,6 +1660,15 @@ static int kway_merge(struct super_block *sb, blk++; } + /* end sorted block on _SAFE offset for testing */ + if (bl && le32_to_cpu(srb->entry_nr) == 1 && logs_input && + scoutfs_trigger(sb, SRCH_COMPACT_LOGS_PAD_SAFE)) { + pad_entries_at_safe(sfl, blk, srb); + scoutfs_block_put(sb, bl); + bl = NULL; + blk++; + } + scoutfs_inc_counter(sb, srch_compact_entry); } else { @@ -1609,6 +1711,8 @@ static int kway_merge(struct super_block *sb, empty++; ret = 0; } else if (ret < 0) { + if (ret == -ENOANO) /* just testing trigger */ + ret = 0; goto out; } @@ -1816,7 +1920,7 @@ static int compact_logs(struct super_block *sb, } ret = kway_merge(sb, alloc, wri, &sc->out, kway_get_page, kway_adv_page, - args, nr_pages); + args, nr_pages, true); if (ret < 0) goto out; @@ -1880,6 +1984,12 @@ static int kway_get_reader(struct super_block *sb, return -EIO; } + if (rdr->decoded_bytes == 0 && rdr->pos == SCOUTFS_SRCH_BLOCK_SAFE_BYTES && + scoutfs_trigger(sb, SRCH_MERGE_STOP_SAFE)) { + /* only used in testing */ + return -ENOANO; + } + /* decode entry, possibly skipping start of the block */ while (rdr->decoded_bytes == 0 || rdr->pos < rdr->skip) { ret = decode_entry(srb->entries + rdr->pos, @@ -1969,7 +2079,7 @@ static int compact_sorted(struct super_block *sb, } ret = kway_merge(sb, alloc, wri, &sc->out, kway_get_reader, - kway_adv_reader, args, nr); + kway_adv_reader, args, nr, false); sc->flags |= SCOUTFS_SRCH_COMPACT_FLAG_DONE; for (i = 0; i < nr; i++) { diff --git a/kmod/src/triggers.c b/kmod/src/triggers.c index 0dcbfb7d..750a86b1 100644 --- a/kmod/src/triggers.c +++ b/kmod/src/triggers.c @@ -39,6 +39,9 @@ struct scoutfs_triggers { static char *names[] = { [SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE] = "block_remove_stale", + [SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE] = "srch_compact_logs_pad_safe", + [SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate", + [SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe", [SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge", }; diff --git a/kmod/src/triggers.h b/kmod/src/triggers.h index 40b40626..f2374dec 100644 --- a/kmod/src/triggers.h +++ b/kmod/src/triggers.h @@ -3,6 +3,9 @@ enum scoutfs_trigger { SCOUTFS_TRIGGER_BLOCK_REMOVE_STALE, + SCOUTFS_TRIGGER_SRCH_COMPACT_LOGS_PAD_SAFE, + SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE, + SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE, SCOUTFS_TRIGGER_STATFS_LOCK_PURGE, SCOUTFS_TRIGGER_NR, }; diff --git a/tests/golden/srch-safe-merge-pos b/tests/golden/srch-safe-merge-pos new file mode 100644 index 00000000..369d4e04 --- /dev/null +++ b/tests/golden/srch-safe-merge-pos @@ -0,0 +1,81 @@ +== snapshot errors +== arm compaction triggers +trigger srch_compact_logs_pad_safe armed: 1 +trigger srch_merge_stop_safe armed: 1 +trigger srch_compact_logs_pad_safe armed: 1 +trigger srch_merge_stop_safe armed: 1 +trigger srch_compact_logs_pad_safe armed: 1 +trigger srch_merge_stop_safe armed: 1 +trigger srch_compact_logs_pad_safe armed: 1 +trigger srch_merge_stop_safe armed: 1 +trigger srch_compact_logs_pad_safe armed: 1 +trigger srch_merge_stop_safe armed: 1 +== force lots of small rotated log files for compaction +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +trigger srch_force_log_rotate armed: 1 +== wait for compaction +== test and disarm compaction triggers +== verify triggers and errors +== cleanup diff --git a/tests/sequence b/tests/sequence index 9e126fea..fc656e2c 100644 --- a/tests/sequence +++ b/tests/sequence @@ -14,6 +14,7 @@ offline-extent-waiting.sh move-blocks.sh large-fragmented-free.sh enospc.sh +srch-safe-merge-pos.sh srch-basic-functionality.sh simple-xattr-unit.sh totl-xattr-tag.sh diff --git a/tests/tests/srch-safe-merge-pos.sh b/tests/tests/srch-safe-merge-pos.sh new file mode 100644 index 00000000..b575d84e --- /dev/null +++ b/tests/tests/srch-safe-merge-pos.sh @@ -0,0 +1,69 @@ +# +# There was a bug where srch file compaction could get stuck if a +# partial compaction finished at the specific _SAFE_BYTES offset in a +# block. Resuming from that position would return an error and +# compaction would stop making forward progress. +# +# We use triggers to make sure that we create the circumstance where a +# sorted srch block ends at the _SAFE_BYTES offsset and that a merge +# request stops with a partial block at that specific offset. We then +# watch error counters to make sure compaction doesn't get stuck. +# + +# forcing rotation, so just a few +NR=10 +SEQF="%.20g" +COMPACT_NR=4 + +echo "== snapshot errors" +declare -a err +for nr in $(t_fs_nrs); do + err[$nr]=$(t_counter srch_compact_error $nr) +done + +echo "== arm compaction triggers" +for nr in $(t_fs_nrs); do + t_trigger_arm srch_compact_logs_pad_safe $nr + t_trigger_arm srch_merge_stop_safe $nr +done + +echo "== force lots of small rotated log files for compaction" +sv=$(t_server_nr) +iter=1 +while [ $iter -le $((COMPACT_NR * COMPACT_NR * COMPACT_NR)) ]; do + t_trigger_arm srch_force_log_rotate $sv + + seq -f "f-$iter-$SEQF" 1 10 | src/bulk_create_paths -S -d "$T_D0" > /dev/null + sync + + test "$(t_trigger_get srch_force_log_rotate $sv)" == "0" || \ + t_fail "srch_force_log_rotate didn't trigger" + + ((iter++)) +done + +echo "== wait for compaction" +sleep 15 + +echo "== test and disarm compaction triggers" +pad=0 +merge_stop=0 +for nr in $(t_fs_nrs); do + test "$(t_trigger_get srch_compact_logs_pad_safe $nr)" == "0" && pad=1 + t_trigger_set srch_compact_logs_pad_safe $nr 0 + test "$(t_trigger_get srch_merge_stop_safe $nr)" == "0" && merge_stop=1 + t_trigger_set srch_merge_stop_safe $nr 0 +done + +echo "== verify triggers and errors" +test $pad == 1 || t_fail "srch_compact_logs_pad_safe didn't trigger" +test $merge_stop == 1 || t_fail "srch_merge_stop_safe didn't trigger" +for nr in $(t_fs_nrs); do + test "$(t_counter srch_compact_error $nr)" == "${err[$nr]}" || \ + t_fail "srch_compact_error counter increased on mount $nr" +done + +echo "== cleanup" +find "$T_D0" -type f -name 'f-*' -delete + +t_pass From c7e97eeb1f20df9d82f52d75b58bea594e67c3ca Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Tue, 7 Nov 2023 11:57:11 -0800 Subject: [PATCH 4/4] Allow srch compaction from _SAFE_BYTES Compacting sorted srch files can take multiple transactions because they can be very large. Each transaction resumes at a byte offset in a block where the previous transaction stopped. The resuming code tests that the byte offsets are sane but had a mistake in testing the offset to skip to. It returned an error if the compaction resumed from the last possible safe offset for decoding entries. If a system is unlucky enough to have a compaction transaction stop at just this offset then compaction stops making forward progress as each attempt to resume returns an error. The fix allows continuation from this last safe offset while returning errors for attempts to continue *past* that offset. This matches all the encoding code which allows encoding the last entry in the block at this offset. Signed-off-by: Zach Brown --- kmod/src/srch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kmod/src/srch.c b/kmod/src/srch.c index 7af54e82..81bc2238 100644 --- a/kmod/src/srch.c +++ b/kmod/src/srch.c @@ -1978,7 +1978,7 @@ static int kway_get_reader(struct super_block *sb, srb = rdr->bl->data; if (rdr->pos > SCOUTFS_SRCH_BLOCK_SAFE_BYTES || - rdr->skip >= SCOUTFS_SRCH_BLOCK_SAFE_BYTES || + rdr->skip > SCOUTFS_SRCH_BLOCK_SAFE_BYTES || rdr->skip >= le32_to_cpu(srb->entry_bytes)) { /* XXX inconsistency */ return -EIO;