Compare commits

...

5 Commits

Author SHA1 Message Date
Auke Kok
a87e92c1ad Check for fenced old leader in mounted test.
The old mounted check only considered begin/end quorum data, and
not whether the old leader that is now disconnected was fenced by
a new quorum leader.

Since this is the quaranteed case if the leader is disconnected
forcefully, this check must account for this case, so that quorum slots
can be modified if the node is permanently removed or replaced.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2025-12-05 15:28:45 -08:00
Zach Brown
e194714004 Merge pull request #264 from versity/auke/findmnt_retval
Findmnt returns 1 when no matching entries found
2025-12-03 14:29:31 -08:00
Auke Kok
8bb2f83cf9 Findmnt returns 1 when no matching entries found
Our local fence script attempts to interpret errors executing `findmnt`
as critical errors, but the program exit code explicitly returns
EXIT_FAILURE when the total number of matching mount entries is zero.

This can happen if the mount disappeared while we're attempting to
fence the mount, but, the scoutfs sysfs files are still in place as
we read them. It's a small window, but, it's a fork/exec plus full
parse of /etc/fstab, and a lot can happen in the 0.015s findmnt takes
on my system.

There's no other exit codes from findmnt other than 0 and 1. At that
point, we can only assume that if the stdout is empty, the mount
isn't there anymore.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2025-12-02 12:55:11 -08:00
Zach Brown
6a9a6789d5 Merge pull request #267 from versity/clk/merge_enoent
Handle ENOENT when getting log merge status item
2025-12-02 09:34:28 -08:00
Chris Kirby
ee630b164f Handle ENOENT when getting log merge status item
Tests that cause client retries can fail with this error
from server_commit_log_merge():

error -2 committing log merge: getting merge status item

This can happen if the server has already committed and resolved
the log merge that is being retried. We can safely ignore ENOENT here
just like we do a few lines later.

Signed-off-by: Chris Kirby <ckirby@versity.com>
2025-12-01 08:58:24 -06:00
3 changed files with 53 additions and 21 deletions

View File

@@ -3036,7 +3036,13 @@ static int server_commit_log_merge(struct super_block *sb,
SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0,
&stat, sizeof(stat));
if (ret < 0) {
err_str = "getting merge status item";
/*
* During a retransmission, it's possible that the server
* already committed and resolved this log merge. ENOENT
* is expected in that case.
*/
if (ret != -ENOENT)
err_str = "getting merge status item";
goto out;
}

View File

@@ -27,8 +27,7 @@ for fs in /sys/fs/scoutfs/*; do
nr="$(quiet_cat $fs/data_device_maj_min)"
[ ! -d "$fs" -o "$fs_rid" != "$rid" ] && continue
mnt=$(findmnt -l -n -t scoutfs -o TARGET -S $nr) || \
echo_fail "findmnt -t scoutfs -S $nr failed"
mnt=$(findmnt -l -n -t scoutfs -o TARGET -S $nr)
[ -z "$mnt" ] && continue
if ! umount -qf "$mnt"; then

View File

@@ -198,11 +198,13 @@ int write_block_sync(int fd, u32 magic, __le64 fsid, u64 seq, u64 blkno,
*/
int meta_super_in_use(int meta_fd, struct scoutfs_super_block *meta_super)
{
struct scoutfs_quorum_block *qblk = NULL;
struct scoutfs_quorum_block *qblk[SCOUTFS_QUORUM_BLOCKS] = {NULL,};
struct scoutfs_quorum_block_event *beg;
struct scoutfs_quorum_block_event *end;
struct scoutfs_quorum_block_event *fence;
bool beg_was_fenced;
int ret = 0;
int i;
int i, j;
if (meta_super->mounted_clients.ref.blkno != 0) {
fprintf(stderr, "meta superblock mounted clients btree is not empty.\n");
@@ -210,36 +212,61 @@ int meta_super_in_use(int meta_fd, struct scoutfs_super_block *meta_super)
goto out;
}
/* check for active quorum slots */
/* read all blocks */
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) {
if (!quorum_slot_present(meta_super, i))
continue;
ret = read_block(meta_fd, SCOUTFS_QUORUM_BLKNO + i, SCOUTFS_BLOCK_SM_SHIFT,
(void **)&qblk);
(void **)&qblk[i]);
if (ret < 0) {
fprintf(stderr, "error reading quorum block for slot %u\n", i);
goto out;
}
}
beg = &qblk->events[SCOUTFS_QUORUM_EVENT_BEGIN];
end = &qblk->events[SCOUTFS_QUORUM_EVENT_END];
/* check for active quorum slots */
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++) {
if (!qblk[i])
continue;
if (le64_to_cpu(beg->write_nr) > le64_to_cpu(end->write_nr)) {
fprintf(stderr, "mount in quorum slot %u could still be running.\n"
" begin event: write_nr %llu timestamp %llu.%08u\n"
" end event: write_nr %llu timestamp %llu.%08u\n",
i, le64_to_cpu(beg->write_nr), le64_to_cpu(beg->ts.sec),
le32_to_cpu(beg->ts.nsec),
le64_to_cpu(end->write_nr), le64_to_cpu(end->ts.sec),
le32_to_cpu(end->ts.nsec));
ret = -EBUSY;
goto out;
beg = &qblk[i]->events[SCOUTFS_QUORUM_EVENT_BEGIN];
end = &qblk[i]->events[SCOUTFS_QUORUM_EVENT_END];
if (le64_to_cpu(beg->write_nr) <= le64_to_cpu(end->write_nr))
continue;
/* check if this term was fenced by others in a later term */
beg_was_fenced = false;
for (j = 0; j < SCOUTFS_QUORUM_BLOCKS; j++) {
if ((!qblk[j]) || (i == j))
continue;
fence = &qblk[j]->events[SCOUTFS_QUORUM_EVENT_FENCE];
if (le64_to_cpu(fence->term) > le64_to_cpu(beg->term)) {
beg_was_fenced = true;
break;
}
}
free(qblk);
qblk = NULL;
if (beg_was_fenced)
continue;
fprintf(stderr, "mount in quorum slot %u could still be running.\n"
" begin event: write_nr %llu timestamp %llu.%08u\n"
" end event: write_nr %llu timestamp %llu.%08u\n",
i, le64_to_cpu(beg->write_nr), le64_to_cpu(beg->ts.sec),
le32_to_cpu(beg->ts.nsec),
le64_to_cpu(end->write_nr), le64_to_cpu(end->ts.sec),
le32_to_cpu(end->ts.nsec));
ret = -EBUSY;
goto out;
}
out:
/* free any allocated blocks */
for (i = 0; i < SCOUTFS_QUORUM_BLOCKS; i++)
if (qblk[i] != NULL)
free(qblk[i]);
return ret;
}