mirror of
https://github.com/versity/scoutfs.git
synced 2026-04-16 19:57:53 +00:00
Merge pull request #291 from versity/auke/orphan-log-merge
Auke/orphan log merge
This commit is contained in:
@@ -256,6 +256,14 @@ static void server_down(struct server_info *server)
|
||||
cmpxchg(&server->status, was, SERVER_DOWN);
|
||||
}
|
||||
|
||||
static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
|
||||
{
|
||||
*key = (struct scoutfs_key) {
|
||||
.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
|
||||
.skmc_rid = cpu_to_le64(rid),
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* The per-holder allocation block use budget balances batching
|
||||
* efficiency and concurrency. The larger this gets, the fewer
|
||||
@@ -963,6 +971,28 @@ static int find_log_trees_item(struct super_block *sb,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the given rid has a mounted_clients entry.
|
||||
*/
|
||||
static bool rid_is_mounted(struct super_block *sb, u64 rid)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
init_mounted_client_key(&key, rid);
|
||||
|
||||
mutex_lock(&server->mounted_clients_mutex);
|
||||
ret = scoutfs_btree_lookup(sb, &super->mounted_clients, &key, &iref);
|
||||
if (ret == 0)
|
||||
scoutfs_btree_put_iref(&iref);
|
||||
mutex_unlock(&server->mounted_clients_mutex);
|
||||
|
||||
return ret == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the log_trees item with the greatest nr for each rid. Fills the
|
||||
* caller's log_trees and sets the key before the returned log_trees for
|
||||
@@ -1221,6 +1251,60 @@ static int do_finalize_ours(struct super_block *sb,
|
||||
* happens to arrive at just the right time. That's fine, merging will
|
||||
* ignore and tear down the empty input.
|
||||
*/
|
||||
|
||||
static int reclaim_open_log_tree(struct super_block *sb, u64 rid);
|
||||
|
||||
/*
|
||||
* Reclaim log trees for rids that have no mounted_clients entry.
|
||||
* They block merges by appearing active. reclaim_open_log_tree
|
||||
* may need multiple commits to drain allocators (-EINPROGRESS).
|
||||
*
|
||||
* The caller holds logs_mutex and a commit, both are dropped and
|
||||
* re-acquired around each reclaim call. Returns >0 if any orphans
|
||||
* were reclaimed so the caller can re-check state that may have
|
||||
* changed while the lock was dropped.
|
||||
*/
|
||||
static int reclaim_orphan_log_trees(struct super_block *sb, u64 rid,
|
||||
struct commit_hold *hold)
|
||||
{
|
||||
struct server_info *server = SCOUTFS_SB(sb)->server_info;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_key key;
|
||||
bool found = false;
|
||||
u64 orphan_rid;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
|
||||
while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, <)) > 0) {
|
||||
|
||||
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) ||
|
||||
le64_to_cpu(lt.rid) == rid ||
|
||||
rid_is_mounted(sb, le64_to_cpu(lt.rid)))
|
||||
continue;
|
||||
|
||||
orphan_rid = le64_to_cpu(lt.rid);
|
||||
scoutfs_err(sb, "reclaiming orphan log trees for rid %016llx nr %llu",
|
||||
orphan_rid, le64_to_cpu(lt.nr));
|
||||
found = true;
|
||||
|
||||
do {
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
err = reclaim_open_log_tree(sb, orphan_rid);
|
||||
ret = server_apply_commit(sb, hold,
|
||||
err == -EINPROGRESS ? 0 : err);
|
||||
server_hold_commit(sb, hold);
|
||||
mutex_lock(&server->logs_mutex);
|
||||
} while (err == -EINPROGRESS && ret == 0);
|
||||
|
||||
if (ret < 0)
|
||||
break;
|
||||
}
|
||||
|
||||
return ret < 0 ? ret : found;
|
||||
}
|
||||
|
||||
#define FINALIZE_POLL_MIN_DELAY_MS 5U
|
||||
#define FINALIZE_POLL_MAX_DELAY_MS 100U
|
||||
#define FINALIZE_POLL_DELAY_GROWTH_PCT 150U
|
||||
@@ -1261,6 +1345,16 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
break;
|
||||
}
|
||||
|
||||
ret = reclaim_orphan_log_trees(sb, rid, hold);
|
||||
if (ret < 0) {
|
||||
err_str = "reclaiming orphan log trees";
|
||||
break;
|
||||
}
|
||||
if (ret > 0) {
|
||||
/* lock was dropped, re-check merge status */
|
||||
continue;
|
||||
}
|
||||
|
||||
/* look for finalized and other active log btrees */
|
||||
saw_finalized = false;
|
||||
others_active = false;
|
||||
@@ -1929,7 +2023,7 @@ static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
|
||||
/* only finalize, allowing merging, once the allocators are fully freed */
|
||||
if (ret == 0) {
|
||||
if (ret == 0 && !scoutfs_trigger(sb, RECLAIM_SKIP_FINALIZE)) {
|
||||
/* the transaction is no longer open */
|
||||
lt.commit_trans_seq = lt.get_trans_seq;
|
||||
|
||||
@@ -1981,7 +2075,8 @@ static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret)
|
||||
scoutfs_key_init_log_trees(&key, U64_MAX, U64_MAX);
|
||||
while ((ret = for_each_rid_last_lt(sb, &super->logs_root, &key, <)) > 0) {
|
||||
if ((le64_to_cpu(lt.get_trans_seq) > le64_to_cpu(lt.commit_trans_seq)) &&
|
||||
le64_to_cpu(lt.get_trans_seq) <= last_seq) {
|
||||
le64_to_cpu(lt.get_trans_seq) <= last_seq &&
|
||||
rid_is_mounted(sb, le64_to_cpu(lt.rid))) {
|
||||
last_seq = le64_to_cpu(lt.get_trans_seq) - 1;
|
||||
}
|
||||
}
|
||||
@@ -3533,14 +3628,6 @@ out:
|
||||
return scoutfs_net_response(sb, conn, cmd, id, ret, &nst, sizeof(nst));
|
||||
}
|
||||
|
||||
static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
|
||||
{
|
||||
*key = (struct scoutfs_key) {
|
||||
.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
|
||||
.skmc_rid = cpu_to_le64(rid),
|
||||
};
|
||||
}
|
||||
|
||||
static bool invalid_mounted_client_item(struct scoutfs_btree_item_ref *iref)
|
||||
{
|
||||
return (iref->val_len != sizeof(struct scoutfs_mounted_client_btree_val));
|
||||
|
||||
@@ -45,6 +45,7 @@ static char *names[] = {
|
||||
[SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE] = "srch_force_log_rotate",
|
||||
[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
|
||||
[SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge",
|
||||
[SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE] = "reclaim_skip_finalize",
|
||||
};
|
||||
|
||||
bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
|
||||
|
||||
@@ -8,6 +8,7 @@ enum scoutfs_trigger {
|
||||
SCOUTFS_TRIGGER_SRCH_FORCE_LOG_ROTATE,
|
||||
SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
|
||||
SCOUTFS_TRIGGER_STATFS_LOCK_PURGE,
|
||||
SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE,
|
||||
SCOUTFS_TRIGGER_NR,
|
||||
};
|
||||
|
||||
|
||||
@@ -123,6 +123,9 @@ t_filter_dmesg()
|
||||
re="$re|hrtimer: interrupt took .*"
|
||||
re="$re|clocksource: Long readout interval"
|
||||
|
||||
# orphan log trees reclaim is handled, not an error
|
||||
re="$re|scoutfs .* reclaiming orphan log trees"
|
||||
|
||||
# fencing tests force unmounts and trigger timeouts
|
||||
re="$re|scoutfs .* forcing unmount"
|
||||
re="$re|scoutfs .* reconnect timed out"
|
||||
|
||||
3
tests/golden/orphan-log-trees
Normal file
3
tests/golden/orphan-log-trees
Normal file
@@ -0,0 +1,3 @@
|
||||
== create orphan log_trees entry via trigger
|
||||
== verify orphan is reclaimed and merge completes
|
||||
== verify orphan reclaim was logged
|
||||
@@ -50,6 +50,7 @@ setup-error-teardown.sh
|
||||
resize-devices.sh
|
||||
change-devices.sh
|
||||
fence-and-reclaim.sh
|
||||
orphan-log-trees.sh
|
||||
quorum-heartbeat-timeout.sh
|
||||
orphan-inodes.sh
|
||||
mount-unmount-race.sh
|
||||
|
||||
52
tests/tests/orphan-log-trees.sh
Normal file
52
tests/tests/orphan-log-trees.sh
Normal file
@@ -0,0 +1,52 @@
|
||||
#
|
||||
# Test that orphaned log_trees entries from unmounted rids are
|
||||
# finalized and merged.
|
||||
#
|
||||
# An orphan log_trees entry is one whose rid has no mounted_clients
|
||||
# entry. This can happen from incomplete reclaim across server
|
||||
# failovers. We simulate it with the reclaim_skip_finalize trigger
|
||||
# which makes reclaim_open_log_tree skip the finalization step.
|
||||
#
|
||||
|
||||
t_require_commands touch scoutfs
|
||||
t_require_mounts 2
|
||||
|
||||
TIMEOUT=90
|
||||
|
||||
echo "== create orphan log_trees entry via trigger"
|
||||
sv=$(t_server_nr)
|
||||
cl=$(t_first_client_nr)
|
||||
rid=$(t_mount_rid $cl)
|
||||
|
||||
touch "$T_D0/file" "$T_D1/file"
|
||||
sync
|
||||
|
||||
# arm the trigger so reclaim skips finalization
|
||||
t_trigger_arm_silent reclaim_skip_finalize $sv
|
||||
|
||||
# force unmount the client, server will fence and reclaim it
|
||||
# but the trigger makes reclaim leave log_trees unfinalized
|
||||
t_force_umount $cl
|
||||
|
||||
# wait for fencing to run
|
||||
verify_fenced() {
|
||||
grep -q "running rid '$rid'" "$T_FENCED_LOG" 2>/dev/null
|
||||
}
|
||||
t_wait_until_timeout $TIMEOUT verify_fenced
|
||||
|
||||
# give the server time to complete reclaim after fence
|
||||
sleep 5
|
||||
|
||||
# remount the client so t_force_log_merge can sync all mounts.
|
||||
# the client gets a new rid; the old rid's log_trees is the orphan.
|
||||
t_mount $cl
|
||||
|
||||
echo "== verify orphan is reclaimed and merge completes"
|
||||
t_force_log_merge
|
||||
|
||||
echo "== verify orphan reclaim was logged"
|
||||
if ! dmesg | grep -q "reclaiming orphan log trees for rid $rid"; then
|
||||
t_fail "expected orphan reclaim message for rid $rid in dmesg"
|
||||
fi
|
||||
|
||||
t_pass
|
||||
Reference in New Issue
Block a user