Add force to prepare-empty-data-device

Signed-off-by: Zach Brown <zab@versity.com>
Merge pull request #145 from versity/zab/server_seqlock
2026-06-09 21:22:36 +00:00 · 2023-11-02 18:05:51 -07:00 · 2023-10-24 14:36:56 -07:00 · 2023-10-24 10:10:11 -07:00 · 2023-10-24 09:52:36 -07:00 · 2023-10-23 14:20:13 -07:00
14 changed files with 165 additions and 167 deletions
@@ -1,6 +1,22 @@
 Versity ScoutFS Release Notes
 =============================

+---
+v1.17
+\
+*Oct 23, 2023*
+
+Add support for EL8 generation kernels.
+
+---
+v1.16
+\
+*Oct 4, 2023*
+
+Fix an issue where the server could hang on startup if its persistent
+allocator structures were left in a specific degraded state by the
+previously active server.
+
 ---
 v1.15
 \
@@ -1078,7 +1078,7 @@ static unsigned long block_count_objects(struct shrinker *shrink, struct shrink_

 	scoutfs_inc_counter(sb, block_cache_count_objects);

-	return shrinker_min_t_long((u64)atomic_read(&binf->total_inserted));
+	return shrinker_min_long(atomic_read(&binf->total_inserted));
 }

 /*
@@ -1058,16 +1058,15 @@ static int symlink_item_ops(struct super_block *sb, enum symlink_ops op, u64 ino
 	return ret;
 }

-#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
 /*
- * Full a buffer with the null terminated symlink, point nd at it, and
- * return it so put_link can free it once the vfs is done.
+ * Fill a buffer with the null terminated symlink, and return it
+ * so callers can free it once the vfs is done.
 *
 * We chose to pay the runtime cost of per-call allocation and copy
 * overhead instead of wiring up symlinks to the page cache, storing
 * each small link in a full page, and later having to reclaim them.
 */
-static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void *scoutfs_get_link_target(struct dentry *dentry)
 {
 	struct inode *inode = dentry->d_inode;
 	struct super_block *sb = inode->i_sb;
@@ -1126,13 +1125,23 @@ out:
 	if (ret < 0) {
 		kfree(path);
 		path = ERR_PTR(ret);
-	} else {
-		nd_set_link(nd, path);
 	}
+
 	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 	return path;
 }

+#ifdef KC_LINUX_HAVE_RHEL_IOPS_WRAPPER
+static void *scoutfs_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	char *path;
+
+	path = scoutfs_get_link_target(dentry);
+	if (!IS_ERR_OR_NULL(path))
+		nd_set_link(nd, path);
+	return path;
+}
+
 static void scoutfs_put_link(struct dentry *dentry, struct nameidata *nd,
 			     void *cookie)
 {
@@ -1142,67 +1151,12 @@ static void scoutfs_put_link(struct dentry *dentry, struct nameidata *nd,
 #else
 static const char *scoutfs_get_link(struct dentry *dentry, struct inode *inode, struct delayed_call *done)
 {
-	struct super_block *sb = inode->i_sb;
-	struct scoutfs_lock *inode_lock = NULL;
-	char *path = NULL;
-	loff_t size;
-	int ret;
+	char *path;

-	ret = scoutfs_lock_inode(sb, SCOUTFS_LOCK_READ,
-				 SCOUTFS_LKF_REFRESH_INODE, inode, &inode_lock);
-	if (ret)
-		return ERR_PTR(ret);
-
-	size = i_size_read(inode);
-
-	if (size == 0 || size > SCOUTFS_SYMLINK_MAX_SIZE) {
-		scoutfs_corruption(sb, SC_SYMLINK_INODE_SIZE,
-				   corrupt_symlink_inode_size,
-				   "ino %llu size %llu",
-				   scoutfs_ino(inode), (u64)size);
-		ret = -EIO;
-		goto out;
-	}
-
-	/* unlikely, but possible I suppose */
-	if (size > PATH_MAX) {
-		ret = -ENAMETOOLONG;
-		goto out;
-	}
-
-	path = kmalloc(size, GFP_NOFS);
-	if (!path) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	ret = symlink_item_ops(sb, SYM_LOOKUP, scoutfs_ino(inode), inode_lock,
-			       path, size);
-
-	if (ret == -ENOENT) {
-		scoutfs_corruption(sb, SC_SYMLINK_MISSING_ITEM,
-				   corrupt_symlink_missing_item,
-				   "ino %llu size %llu", scoutfs_ino(inode),
-				   size);
-		ret = -EIO;
-
-	} else if (ret == 0 && path[size - 1]) {
-		scoutfs_corruption(sb, SC_SYMLINK_NOT_NULL_TERM,
-				   corrupt_symlink_not_null_term,
-				   "ino %llu last %u",
-				   scoutfs_ino(inode), path[size - 1]);
-		ret = -EIO;
-	}
-
-	if (ret != -EIO)
+	path = scoutfs_get_link_target(dentry);
+	if (!IS_ERR_OR_NULL(path))
 		set_delayed_call(done, kfree_link, path);

-out:
-	if (ret < 0) {
-		kfree(path);
-		path = ERR_PTR(ret);
-	}
-	scoutfs_unlock(sb, inode_lock, SCOUTFS_LOCK_READ);
 	return path;
 }
 #endif
@@ -2541,7 +2541,7 @@ static unsigned long item_cache_count_objects(struct shrinker *shrink,

 	scoutfs_inc_counter(sb, item_cache_count_objects);

-	return shrinker_min_t_long((u64)(cinf->lru_pages));
+	return shrinker_min_long(cinf->lru_pages);
 }

 /*
@@ -1409,7 +1409,7 @@ static unsigned long lock_count_objects(struct shrinker *shrink,

 	scoutfs_inc_counter(sb, lock_count_objects);

-	return shrinker_min_t_long((u64)(linfo->lru_nr));
+	return shrinker_min_long(linfo->lru_nr);
 }

 /*
@@ -1896,8 +1896,9 @@ DEFINE_EVENT(scoutfs_server_client_count_class, scoutfs_server_client_down,

 DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
-		 u32 avail_before, u32 freed_before, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded),
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing,
+		exceeded),
        TP_STRUCT__entry(
 		SCSB_TRACE_FIELDS
 		__field(int, holding)
@@ -1905,6 +1906,7 @@ DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
 		__field(int, nr_holders)
 		__field(__u32, avail_before)
 		__field(__u32, freed_before)
+		__field(int, committing)
 		__field(int, exceeded)
        ),
        TP_fast_assign(
@@ -1914,31 +1916,33 @@ DECLARE_EVENT_CLASS(scoutfs_server_commit_users_class,
 		__entry->nr_holders = nr_holders;
 		__entry->avail_before = avail_before;
 		__entry->freed_before = freed_before;
+		__entry->committing = !!committing;
 		__entry->exceeded = !!exceeded;
        ),
-	TP_printk(SCSBF" holding %u applying %u nr %u avail_before %u freed_before %u exceeded %u",
+	TP_printk(SCSBF" holding %u applying %u nr %u avail_before %u freed_before %u committing %u exceeded %u",
 		  SCSB_TRACE_ARGS, __entry->holding, __entry->applying, __entry->nr_holders,
-		  __entry->avail_before, __entry->freed_before, __entry->exceeded)
+		  __entry->avail_before, __entry->freed_before, __entry->committing,
+		  __entry->exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_hold,
        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
-		 u32 avail_before, u32 freed_before, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_apply,
        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
-		 u32 avail_before, u32 freed_before, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_start,
        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
-		 u32 avail_before, u32 freed_before, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );
 DEFINE_EVENT(scoutfs_server_commit_users_class, scoutfs_server_commit_end,
        TP_PROTO(struct super_block *sb, int holding, int applying, int nr_holders,
-		 u32 avail_before, u32 freed_before, int exceeded),
-        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, exceeded)
+		 u32 avail_before, u32 freed_before, int committing, int exceeded),
+        TP_ARGS(sb, holding, applying, nr_holders, avail_before, freed_before, committing, exceeded)
 );

 #define slt_symbolic(mode)						\
@@ -67,6 +67,7 @@ struct commit_users {
 	unsigned int nr_holders;
 	u32 avail_before;
 	u32 freed_before;
+	bool committing;
 	bool exceeded;
 };

@@ -84,12 +85,13 @@ do {												\
 	__typeof__(cusers) _cusers = (cusers);							\
 	trace_scoutfs_server_commit_##which(sb, !list_empty(&_cusers->holding),			\
 		!list_empty(&_cusers->applying), _cusers->nr_holders, _cusers->avail_before,	\
-		_cusers->freed_before, _cusers->exceeded);					\
+		_cusers->freed_before, _cusers->committing, _cusers->exceeded);			\
 } while (0)

 struct server_info {
 	struct super_block *sb;
 	spinlock_t lock;
+	seqlock_t seqlock;
 	wait_queue_head_t waitq;

 	struct workqueue_struct *wq;
@@ -131,11 +133,9 @@ struct server_info {
 	struct mutex mounted_clients_mutex;

 	/* stable super stored from commits, given in locks and rpcs */
-	seqcount_t stable_seqcount;
 	struct scoutfs_super_block stable_super;

 	/* serializing and get and set volume options */
-	seqcount_t volopt_seqcount;
 	struct mutex volopt_mutex;
 	struct scoutfs_volume_options volopt;

@@ -181,7 +181,7 @@ static bool get_volopt_val(struct server_info *server, int nr, u64 *val)
 	unsigned seq;

 	do {
-		seq = read_seqcount_begin(&server->volopt_seqcount);
+		seq = read_seqbegin(&server->seqlock);
 		if ((le64_to_cpu(server->volopt.set_bits) & bit)) {
 			is_set = true;
 			*val = le64_to_cpup(opt);
@@ -189,7 +189,7 @@ static bool get_volopt_val(struct server_info *server, int nr, u64 *val)
 			is_set = false;
 			*val = 0;
 		};
-	} while (read_seqcount_retry(&server->volopt_seqcount, seq));
+	} while (read_seqretry(&server->seqlock, seq));

 	return is_set;
 }
@@ -282,6 +282,14 @@ struct commit_hold {
 * per-holder allocation consumption tracking.   The best we can do is
 * flag all the current holders so that as they release we can see
 * everyone involved in crossing the limit.
+ *
+ * The consumption of space to record freed blocks is tricky.  The
+ * freed_before value was the space available as the holder started.
+ * But that happens before we actually dirty the first block in the
+ * freed list.  If that block is too full then we just allocate a new
+ * empty first block.  In that case the current remaining here can be a
+ * lot more than the initial freed_before.  We account for that and
+ * treat freed_before as the maximum capacity.
 */
 static void check_holder_budget(struct super_block *sb, struct server_info *server,
 				struct commit_users *cusers)
@@ -301,8 +309,13 @@ static void check_holder_budget(struct super_block *sb, struct server_info *serv
 		return;

 	scoutfs_alloc_meta_remaining(&server->alloc, &avail_now, &freed_now);
+
 	avail_used = cusers->avail_before - avail_now;
-	freed_used = cusers->freed_before - freed_now;
+	if (freed_now < cusers->freed_before)
+		freed_used = cusers->freed_before - freed_now;
+	else
+		freed_used = SCOUTFS_ALLOC_LIST_MAX_BLOCKS - freed_now;
+
 	budget = cusers->nr_holders * COMMIT_HOLD_ALLOC_BUDGET;
 	if (avail_used <= budget && freed_used <= budget)
 		return;
@@ -325,31 +338,18 @@ static void check_holder_budget(struct super_block *sb, struct server_info *serv
 /*
 * We don't have per-holder consumption.   We allow commit holders as
 * long as the total budget of all the holders doesn't exceed the alloc
- * resources that were available
+ * resources that were available.  If a hold is waiting for budget
+ * availability in the allocators then we try and kick off a commit to
+ * fill and use the next allocators after the current transaction.
 */
-static bool commit_alloc_has_room(struct server_info *server, struct commit_users *cusers,
-				  unsigned int more_holders)
-{
-	u32 avail_before;
-	u32 freed_before;
-	u32 budget;
-
-	if (cusers->nr_holders > 0) {
-		avail_before = cusers->avail_before;
-		freed_before = cusers->freed_before;
-	} else {
-		scoutfs_alloc_meta_remaining(&server->alloc, &avail_before, &freed_before);
-	}
-
-	budget = (cusers->nr_holders + more_holders) * COMMIT_HOLD_ALLOC_BUDGET;
-
-	return avail_before >= budget && freed_before >= budget;
-}
-
 static bool hold_commit(struct super_block *sb, struct server_info *server,
 			struct commit_users *cusers, struct commit_hold *hold)
 {
-	bool held = false;
+	bool has_room;
+	bool held;
+	u32 budget;
+	u32 av;
+	u32 fr;

 	spin_lock(&cusers->lock);

@@ -357,19 +357,39 @@ static bool hold_commit(struct super_block *sb, struct server_info *server,

 	check_holder_budget(sb, server, cusers);

+	if (cusers->nr_holders == 0) {
+		scoutfs_alloc_meta_remaining(&server->alloc, &av, &fr);
+	} else {
+		av = cusers->avail_before;
+		fr = cusers->freed_before;
+	}
+
 	/* +2 for our additional hold and then for the final commit work the server does */
-	if (list_empty(&cusers->applying) && commit_alloc_has_room(server, cusers, 2)) {
-		scoutfs_alloc_meta_remaining(&server->alloc, &hold->avail, &hold->freed);
+	budget = (cusers->nr_holders + 2) * COMMIT_HOLD_ALLOC_BUDGET;
+	has_room = av >= budget && fr >= budget;
+	/* checking applying so holders drain once an apply caller starts waiting */
+	held = !cusers->committing && has_room && list_empty(&cusers->applying);
+
+	if (held) {
 		if (cusers->nr_holders == 0) {
-			cusers->avail_before = hold->avail;
-			cusers->freed_before = hold->freed;
+			cusers->avail_before = av;
+			cusers->freed_before = fr;
+			hold->avail = av;
+			hold->freed = fr;
 			cusers->exceeded = false;
+		} else {
+			scoutfs_alloc_meta_remaining(&server->alloc, &hold->avail, &hold->freed);
 		}
+
 		hold->exceeded = false;
 		hold->start = ktime_get();
 		list_add_tail(&hold->entry, &cusers->holding);
+
 		cusers->nr_holders++;
-		held = true;
+
+	} else if (!has_room && cusers->nr_holders == 0 && !cusers->committing) {
+		cusers->committing = true;
+		queue_work(server->wq, &server->commit_work);
 	}

 	spin_unlock(&cusers->lock);
@@ -403,7 +423,6 @@ static int server_apply_commit(struct super_block *sb, struct commit_hold *hold,
 	DECLARE_SERVER_INFO(sb, server);
 	struct commit_users *cusers = &server->cusers;
 	struct timespec ts;
-	bool start_commit;

 	spin_lock(&cusers->lock);

@@ -424,12 +443,14 @@ static int server_apply_commit(struct super_block *sb, struct commit_hold *hold,
 		list_del_init(&hold->entry);
 		hold->ret = err;
 	}
-	cusers->nr_holders--;
-	start_commit = cusers->nr_holders == 0 && !list_empty(&cusers->applying);
-	spin_unlock(&cusers->lock);

-	if (start_commit)
+	cusers->nr_holders--;
+	if (cusers->nr_holders == 0 && !cusers->committing && !list_empty(&cusers->applying)) {
+		cusers->committing = true;
 		queue_work(server->wq, &server->commit_work);
+	}
+
+	spin_unlock(&cusers->lock);

 	wait_event(cusers->waitq, list_empty_careful(&hold->entry));
 	smp_rmb(); /* entry load before ret */
@@ -438,8 +459,8 @@ static int server_apply_commit(struct super_block *sb, struct commit_hold *hold,

 /*
 * Start a commit from the commit work.  We should only have been queued
- * while a holder is waiting to apply after all active holders have
- * finished.
+ * while there are no active holders and someone started the commit.
+ * There may or may not be blocked apply callers waiting for the result.
 */
 static int commit_start(struct super_block *sb, struct commit_users *cusers)
 {
@@ -448,7 +469,7 @@ static int commit_start(struct super_block *sb, struct commit_users *cusers)
 	/* make sure holders held off once commit started */
 	spin_lock(&cusers->lock);
 	TRACE_COMMIT_USERS(sb, cusers, start);
-	if (WARN_ON_ONCE(list_empty(&cusers->applying) || cusers->nr_holders != 0))
+	if (WARN_ON_ONCE(!cusers->committing || cusers->nr_holders != 0))
 		ret = -EINVAL;
 	spin_unlock(&cusers->lock);

@@ -471,6 +492,7 @@ static void commit_end(struct super_block *sb, struct commit_users *cusers, int
 	smp_wmb(); /* ret stores before list updates */
 	list_for_each_entry_safe(hold, tmp, &cusers->applying, entry)
 		list_del_init(&hold->entry);
+	cusers->committing = false;
 	spin_unlock(&cusers->lock);

 	wake_up(&cusers->waitq);
@@ -483,7 +505,7 @@ static void get_stable(struct super_block *sb, struct scoutfs_super_block *super
 	unsigned int seq;

 	do {
-		seq = read_seqcount_begin(&server->stable_seqcount);
+		seq = read_seqbegin(&server->seqlock);
 		if (super)
 			*super = server->stable_super;
 		if (roots) {
@@ -491,7 +513,7 @@ static void get_stable(struct super_block *sb, struct scoutfs_super_block *super
 			roots->logs_root = server->stable_super.logs_root;
 			roots->srch_root = server->stable_super.srch_root;
 		}
-	} while (read_seqcount_retry(&server->stable_seqcount, seq));
+	} while (read_seqretry(&server->seqlock, seq));
 }

 u64 scoutfs_server_seq(struct super_block *sb)
@@ -525,11 +547,9 @@ void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq)

 static void set_stable_super(struct server_info *server, struct scoutfs_super_block *super)
 {
-	preempt_disable();
-	write_seqcount_begin(&server->stable_seqcount);
+	write_seqlock(&server->seqlock);
 	server->stable_super = *super;
-	write_seqcount_end(&server->stable_seqcount);
-	preempt_enable();
+	write_sequnlock(&server->seqlock);
 }

 /*
@@ -543,7 +563,7 @@ static void set_stable_super(struct server_info *server, struct scoutfs_super_bl
 * implement commits with a single pending work func.
 *
 * Processing paths hold the commit while they're making multiple
- * dependent changes.  When they're done and want it persistent they add
+ * dependent changes.  When they're done and want it persistent they
 * queue the commit work.  This work runs, performs the commit, and
 * wakes all the applying waiters with the result.  Readers can run
 * concurrently with these commits.
@@ -3050,9 +3070,9 @@ static int server_get_volopt(struct super_block *sb, struct scoutfs_net_connecti
 	}

 	do {
-		seq = read_seqcount_begin(&server->volopt_seqcount);
+		seq = read_seqbegin(&server->seqlock);
 		volopt = server->volopt;
-	} while (read_seqcount_retry(&server->volopt_seqcount, seq));
+	} while (read_seqretry(&server->seqlock, seq));

 out:
 	return scoutfs_net_response(sb, conn, cmd, id, ret, &volopt, sizeof(volopt));
@@ -3121,12 +3141,12 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti
 apply:
 	ret = server_apply_commit(sb, &hold, ret);

-	write_seqcount_begin(&server->volopt_seqcount);
+	write_seqlock(&server->seqlock);
 	if (ret == 0)
 		server->volopt = super->volopt;
 	else
 		super->volopt = server->volopt;
-	write_seqcount_end(&server->volopt_seqcount);
+	write_sequnlock(&server->seqlock);

 	mutex_unlock(&server->volopt_mutex);
 out:
@@ -3169,12 +3189,12 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec

 	ret = server_apply_commit(sb, &hold, ret);

-	write_seqcount_begin(&server->volopt_seqcount);
+	write_seqlock(&server->seqlock);
 	if (ret == 0)
 		server->volopt = super->volopt;
 	else
 		super->volopt = server->volopt;
-	write_seqcount_end(&server->volopt_seqcount);
+	write_sequnlock(&server->seqlock);

 	mutex_unlock(&server->volopt_mutex);
 out:
@@ -4313,9 +4333,9 @@ static void scoutfs_server_worker(struct work_struct *work)
 	}

 	/* update volume options early, possibly for use during startup */
-	write_seqcount_begin(&server->volopt_seqcount);
+	write_seqlock(&server->seqlock);
 	server->volopt = super->volopt;
-	write_seqcount_end(&server->volopt_seqcount);
+	write_sequnlock(&server->seqlock);

 	atomic64_set(&server->seq_atomic, le64_to_cpu(super->seq));
 	set_stable_super(server, super);
@@ -4455,6 +4475,7 @@ int scoutfs_server_setup(struct super_block *sb)

 	server->sb = sb;
 	spin_lock_init(&server->lock);
+	seqlock_init(&server->seqlock);
 	init_waitqueue_head(&server->waitq);
 	INIT_WORK(&server->work, scoutfs_server_worker);
 	server->status = SERVER_DOWN;
@@ -4469,8 +4490,6 @@ int scoutfs_server_setup(struct super_block *sb)
 	INIT_WORK(&server->log_merge_free_work, server_log_merge_free_work);
 	mutex_init(&server->srch_mutex);
 	mutex_init(&server->mounted_clients_mutex);
-	seqcount_init(&server->stable_seqcount);
-	seqcount_init(&server->volopt_seqcount);
 	mutex_init(&server->volopt_mutex);
 	INIT_WORK(&server->fence_pending_recov_work, fence_pending_recov_worker);
 	INIT_DELAYED_WORK(&server->reclaim_dwork, reclaim_worker);
@@ -23,9 +23,9 @@ static inline void down_write_two(struct rw_semaphore *a,
 * ~0UL values. Hence, we cap count to ~0L, which is arbitarily high
 * enough to avoid it.
 */
-static inline unsigned long shrinker_min_t_long(unsigned long count)
+static inline long shrinker_min_long(long count)
 {
-	return min_t(u64, count, LONG_MAX);
+	return min(count, LONG_MAX);
 }

 #endif
@@ -35,7 +35,7 @@ t_fail()
 t_quiet()
 {
 	echo "# $*" >> "$T_TMPDIR/quiet.log"
-	"$@" > "$T_TMPDIR/quiet.log" 2>&1 || \
+	"$@" >> "$T_TMPDIR/quiet.log" 2>&1 || \
 		t_fail "quiet command failed"
 }

@@ -88,5 +88,8 @@ t_filter_dmesg()
 	# change-devices causes loop device resizing
 	re="$re|loop[0-9].* detected capacity change from.*"

+	# ignore systemd-journal rotating
+	re="$re|systemd-journald.*"
+
 	egrep -v "($re)" 
 }
@@ -11,8 +11,13 @@ FILE="$T_D0/file"
 # final block as we truncated past it.
 #
 echo "== truncate writes zeroed partial end of file block"
-yes | dd of="$FILE" bs=8K count=1 status=none
+yes | dd of="$FILE" bs=8K count=1 status=none iflag=fullblock
 sync
+
+# not passing iflag=fullblock causes the file occasionally to just be
+# 4K, so just to be safe we should at least check size once
+test `stat --printf="%s\n" "$FILE"` -eq 8192 || t_fail "test file incorrect start size"
+
 truncate -s 6K "$FILE"
 truncate -s 12K "$FILE"
 echo 3 > /proc/sys/vm/drop_caches
@@ -7,14 +7,11 @@ t_require_mounts 2

 #
 # Make sure that all mounts can read the results of a write from each
-# mount.  And make sure that the greatest of all the written seqs is
-# visible after the writes were commited by remote reads.
+# mount.
 #
 check_read_write()
 {
 	local expected
-	local greatest=0
-	local seq
 	local path
 	local saw
 	local w
@@ -25,11 +22,6 @@ check_read_write()
 		eval path="\$T_D${w}/written"
 		echo "$expected" > "$path"

-		seq=$(scoutfs stat -s meta_seq $path)
-		if [ "$seq" -gt "$greatest" ]; then
-			greatest=$seq
-		fi
-
 		for r in $(t_fs_nrs); do
 			eval path="\$T_D${r}/written"
 			saw=$(cat "$path")
@@ -38,11 +30,6 @@ check_read_write()
 			fi
 		done
 	done
-
-	seq=$(scoutfs statfs -s committed_seq -p $T_D0)
-	if [ "$seq" -lt "$greatest" ]; then
-		echo "committed_seq $seq less than greatest $greatest"
-	fi
 }

 # verify that fenced ran our testing fence script
@@ -2,6 +2,8 @@
 # Some basic tests of online resizing metadata and data devices.
 #

+t_require_commands bc
+
 statfs_total() {
 	local single="total_$1_blocks"
 	local mnt="$2"
@@ -38,6 +38,7 @@ struct prepare_empty_data_dev_args {
 	char *meta_device;
 	char *data_device;
 	bool check;
+	bool force;
 };

 static int do_prepare_empty_data_dev(struct prepare_empty_data_dev_args *args)
@@ -77,20 +78,22 @@ static int do_prepare_empty_data_dev(struct prepare_empty_data_dev_args *args)
 		goto out;
 	}

-	ret = meta_super_in_use(meta_fd, meta_super);
-	if (ret < 0) {
-		if (ret == -EBUSY)
-			fprintf(stderr, "The filesystem must be fully recovered and cleanly unmounted to determine if the data device is empty.\n");
-		goto out;
-	}
+	if (!args->force) {
+		ret = meta_super_in_use(meta_fd, meta_super);
+		if (ret < 0) {
+			if (ret == -EBUSY)
+				fprintf(stderr, "The filesystem must be fully recovered and cleanly unmounted to determine if the data device is empty.\n");
+			goto out;
+		}

-	in_use = (le64_to_cpu(meta_super->total_data_blocks) - SCOUTFS_DATA_DEV_START_BLKNO) -
-		 le64_to_cpu(meta_super->data_alloc.total_len);
-	if (in_use) {
-		fprintf(stderr, "Data block allocator metadata shows "SIZE_FMT" data blocks used by files.  They must be removed, truncated, or released before a new empty data device can be used.\n",
-		       SIZE_ARGS(in_use, SCOUTFS_BLOCK_SM_SIZE));
-		ret = -EINVAL;
-		goto out;
+		in_use = (le64_to_cpu(meta_super->total_data_blocks) - SCOUTFS_DATA_DEV_START_BLKNO) -
+			 le64_to_cpu(meta_super->data_alloc.total_len);
+		if (in_use) {
+			fprintf(stderr, "Data block allocator metadata shows "SIZE_FMT" data blocks used by files.  They must be removed, truncated, or released before a new empty data device can be used.\n",
+			       SIZE_ARGS(in_use, SCOUTFS_BLOCK_SM_SIZE));
+			ret = -EINVAL;
+			goto out;
+		}
 	}

 	if (args->data_device) {
@@ -193,6 +196,9 @@ static int parse_opt(int key, char *arg, struct argp_state *state)
 	case 'c':
 		args->check = true;
 		break;
+	case 'f':
+		args->force = true;
+		break;
 	case ARGP_KEY_ARG:
 		if (!args->meta_device)
 			args->meta_device = strdup_or_error(state, arg);
@@ -216,6 +222,7 @@ static int parse_opt(int key, char *arg, struct argp_state *state)

 static struct argp_option options[] = {
 	{ "check", 'c', NULL, 0, "Only check for errors and do not write", },
+	{ "force", 'f', NULL, 0, "Do not check that super is in use, nor if blocks are in use",},
 	{ NULL }
 };

@@ -230,6 +237,7 @@ static int prepare_empty_data_dev_cmd(int argc, char *argv[])
 {
 	struct prepare_empty_data_dev_args prepare_empty_data_dev_args = { 
 		.check = false,
+		.force = false,
 	};
 	int ret;
Author	SHA1	Message	Date
Zach Brown	a7ed6bf242	Add force to prepare-empty-data-device Signed-off-by: Zach Brown <zab@versity.com>	2023-11-02 18:05:51 -07:00
Zach Brown	b56b8e502c	Merge pull request #145 from versity/zab/server_seqlock Use seqlock instead of seqcount in server	2023-10-24 14:36:56 -07:00
Zach Brown	5ff372561d	Merge pull request #146 from versity/auke/truncatedd Ensure dd creates the full 8K input test file.	2023-10-24 10:10:11 -07:00
Zach Brown	bdecee5e5d	Merge pull request #147 from versity/zab/v1.17 v1.17 Release	2023-10-24 09:52:36 -07:00
Zach Brown	a9281b75fa	v1.17 Release Finish the release notes for the 1.17 release. Signed-off-by: Zach Brown <zab@versity.com>	2023-10-23 14:20:13 -07:00
Auke Kok	707e1b2d59	Ensure dd creates the full 8K input test file. Without `iflag=fullblock` we encounter sporadic cases where the input file to the truncate test isn't fully written to 8K and ends up to be only 4K. The subsequent truncate tests then fail. We add a check to the input test file size just to be sure in the future. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-23 17:04:19 -04:00
Zach Brown	006f429f72	Use seqlock instead of seqcount in server The server had a few lower level seqcounts that it used to protect state. One user got it wrong by forgetting to disable pre-emption around writers. Debug kernels warned as write_seqcount_begin() was called without preemption disabled. We fix that user and make it easier to get right in the future by having one higher level seqlock and using that consistently for seq read begin/retry and write lock/unlock patterns. Signed-off-by: Zach Brown <zab@versity.com>	2023-10-19 15:43:15 -07:00
Zach Brown	d71583bcf5	Merge pull request #134 from versity/auke/tests-add-bc Add `bc` to test requirement.	2023-10-16 15:12:22 -07:00
Zach Brown	bb835b948d	Merge pull request #138 from versity/auke/ignore-journald-rotate Filter out journald rotate messages.	2023-10-16 14:54:56 -07:00
Zach Brown	bcdc4f5423	Merge pull request #143 from versity/zab/t_quiet_appends t_quiet appends command output	2023-10-12 11:58:50 -07:00
Auke Kok	7ceb215c91	Filter out journald rotate messages. On el9 distros systemd-journald will log rotation events into kmesg. Since the default logs on VM images are transient only, they are rotated several times during a single test cycle, causing test failures. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-12 12:27:41 -04:00
Auke Kok	d4d2b0850b	Add `bc` to test requirement. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-12 12:21:29 -04:00
Zach Brown	cf05aefe50	t_quiet appends command output The t_quiet test command execution helper was constantly truncating the quiet.log with the output of each command. It was meant to show each command and its output as they're run. Signed-off-by: Zach Brown <zab@versity.com>	2023-10-11 14:50:04 -07:00
Zach Brown	9f06065ce7	Merge pull request #123 from versity/auke/el8 el8 support	2023-10-10 10:37:43 -07:00
Ben McClelland	d2c2fece2a	Add rpm spec file support for el8 builds The rpmbuild support files no longer define the previously used kernel module macros. This carves out the differences between el7 and el8 with conditionals based on the distro we are building for. Signed-off-by: Ben McClelland <ben.mcclelland@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	0e1e55d25b	Ignore `last` flag output by filefrag. New versions of filefrag will output the presence of the `last` flag as well, but we don't care. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	293cee9554	Don't use static struct initializer. In rhel7 this is a nested struct with ktime_t. However, in rhel8 ktime_t is a simple s64, and not a union, and thus we can't do this as easily. Just memset it. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	a7704e0b56	Allow the kernel to return -ESTALE from orphan-inode test In newer kernels, we always get -ESTALE because the inode has been marked immediately as deleting. Since this is expected behavior we should not fail the test here on this error value. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	819df4be60	Skip userns based testing for RHEL8. In RHEL7, this was skipped automatically. In RHEL8, we don't support the needed passing through of the actual user namespace into our ACL set/get handlers. Once we get around v5.11 or so, the handlers are automatically passed the namespace. Until then, skip this test. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	592e3d471f	Use `.prefix` for POSIX acl instead of `.name`. New kernels expect to do a partial match when a .prefix is used here, and provide a .name member in case matching should look at the whole string. This is what we want. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	29160b0bc6	Don't cache ACL's in newer kernels. The caller takes care of caching for us. Us doing caching messes with memory management of cached ACLs and breaks. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	11c041d2ea	New versions of getfattr will quote empty attr values. Instead of messing with quotes and using grep for the correct xattr name, directly query the value of the xattr being tested only, and compare that to the input. Side effect is that this is significantly simpler and faster. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	46e8dfe884	Account for coreutils using statx() call instead of stat() `stat` internally switched to using the new `statx` syscall, and this affects the output of perror() subsequently. This is the same error as before (and expected). Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	a9beeaf5da	Account for e2fsprogs output format changes. The filefrag program in e2fsprogs-v1.42.10-10-g29758d2f now includes an extra flag, and changes how the `unknown` flag is output. We essentially adjust for this "new" golden value on the fly if we encounter it. We don't expect future changes to the output. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	205d8ebd4a	Account for quoting style changes in coreutils. In older versions of coreutils, quoted strings are occasionally output using utf-8 open/close single quotes. New versions of coreutils will exclusively use the ASCII single quote character "'" when the output is not a TTY - as is the case with all test scripts. We can avoid most of these problems by always setting LC_ALL=C in testing, however. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	e580f33f82	Ignore loop device resizing messages. These occasionally trigger during tests. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	d480243c11	Support .read/write_iter callbacks in lieu of .aio_read/write The aio_read and aio_write callbacks are no longer used by newer kernels which now uses iter based readers and writers. We can avoid implementing plain .read and .write as an iter will be generated when needed for us automatically. We add a new data_wait_check_iter() function accordingly. With these methods removed from the kernel, the el8 kernel no longer uses the extended ops wrapper struct and is much closer now to upstream. As a result, a lot of methods are moving around from inode_dir_operations to and from inode_file_operations etc, and perhaps things will look a bit more structured as a result. As a result, we need a slightly different data_wait_check() that accounts for the iter and offset properly. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	bafecbc604	Implement .readahead for address_space_operations (aops). .readpages is obsolete in el8 kernels. We implement the .readahead method instead which is passed a struct readahead_control. We use the readahead_page(rac) accessor to retrieve page by page from the struct. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	65be4682e3	implement generic_file_buffered_write() This function is removed in el8 therefore we need to implement it ourselves now. Copy it. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	e88845d185	(un)register_hotcpu_notifier is obsolete v4.9-12228-g530e9b76ae8f Drops all (un)register_(hot)cpu_notifier() API functions. From here on we need to use the new cpuhp_* API. We avoid this entirely for now, at the cost of leaking pages until the filesystem is unmounted. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	ec50e66fff	Timespec64 changes for yr2038. Provide a fallback `current_time(inode)` implementation for older kernels. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	0e91f9a277	Adjust scoutfs_quorum_loop trace point. Convert the timeout struct unto a u64 nsecs value before passing it to the trace point event, as to not overflow the 64bit limitation on args. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	69068ae2c0	Initialize msg.msg_iter from iovec. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	016dac39bf	Handle net arg being added to sock_create_kern() Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	e69cf3dec8	kernel_getsockname and kernel_getpeername dropped addrlen arg. v4.16-rc1-1-g9b2c45d479d0 This interface now returns (sizeof (addr)) on success, instead of 0. Therefore, we have to change the error condition detection. The compat for older kernels handles the addrlen check internally. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	d6c143a639	xattr functions are now passed flags through struct xattr_handler Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	09ae100254	Remove the use of backing_dev_info pt from address_space. Instead, use the new inline inode_to_bdi from <backing-dev.h> to fill in the task's backing_dev_info. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	50f5077863	Do not use MS_* flags anymore in kernel space. MS_* flags from <linux/mount.h> should not be used in the kernel anymore from 4.x onwards. Instead, we need to use the SB_* versions Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Zach Brown	cca4fcb788	Use count/scan objects shrinking interface Move to the more recent interfaces for counting and scanning cached objects to shrink. Signed-off-by: Zach Brown <zab@versity.com> Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	1d150da3f0	Use page->lru instead of page->list With v3.14-rc1-10-g34bf6ef94a83, page->list is removed Instead, use the union member ->lru. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Zach Brown	28f03d3558	Use more modern bio interfaces Move towards modern bio intefaces, while unfortunately carrying along a bunch of compat functions that let us still work with the old incompatible interfaces. Signed-off-by: Zach Brown <zab@versity.com> Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Zach Brown	4275f6e6e5	Use memalloc_nofs_save memalloc_nofs_save() was introduced as preferential to trying to use GFP flags to indicate that a task should not recurse during reclaim. We use it instead of the _noio_ we were using before. Signed-off-by: Zach Brown <zab@versity.com>	2023-10-09 15:35:40 -04:00
Zach Brown	70a5b6ffe2	Use percpu_counter_add_batch __percpu_counter_add_batch was renamed to make it clear that the __ doesn't mean it's less safe, as it means in other calls in the API, but just that it takes an additional parameter. Signed-off-by: Zach Brown <zab@versity.com> Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	b89ecd47b4	Use __posix_acl_create/_chmod and add backwards compatibility There are new interfaces available but the old one has been retained for us to use. In case of older kernels, we will need to fall back to the previous name of these functions. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	4293816764	Fix argument test for __posix_acl_valid. The argument is fixed to be user_namespace, instead of user_ns. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	f0de59a9a3	Use setattr_preapre() as inode_change_ok() was removed in v4.8-rc1 Instead, we can call setattr_prepare() directly. We provide a fallback for older kernels. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	1f0a08eacb	Use the new inode->i_version manipulation methods. Provide fallback in degraded mode for kernels pre-v4.15-rc3 by directly manipulating the member as needed. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	dac3f056a5	inode->i_mutex has been replaced with inode->i_rwsem. Since v4.6-rc3-27-g9902af79c01a, inode->i_mutex has been replaced with ->i_rwsem. However, long since whenever, inode_lock() and related functions already worked as intended and provided fully exclusive locking to the inode. To avoid a name clash on pre-rhel8 kernels, we have to rename a stack variable in `src/file.c`. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	af868aad9b	New inode->i_version API requires <iversion.h> Since v4.15-rc3-4-gae5e165d855d, <linux/iversion.h> contains a new inode->i_version API and it is not included by default. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	cf4df0ef9f	use $(MAKE) to allow passing jobserver flags. With this, we can `make -jX` to speed up compiles a bit from the kmod folder. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	81aa58253e	module_init/_exit should have a semicolon at eol. In the past this was not needed but since el7 onwards these macros should require the semicolon. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	c683ded0e6	Adjust for new augmented rbtree compute callback function signature The new variant of the code that recomputes the augmented value is designed to handle non-scalar types and to facilitate that, it has new semantics for the _compute callback. It is now passed a boolean flag `exit` that indicates that if the value isn't changed, it should exit and halt propagation. The callback function now shall return whether that propagation should stop or not, and not the computed new value. The callback can now directly update the new computed value in the node. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	f27431b3ae	Add include <blkdev.h>. Fixes: Error: implicit declaration of function ‘blkdev_put’ Previously this was an `extern` in <fs.h> and included implicitly, hence the need to hard include it now. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	28c3cee995	preempt_mask.h is removed entirely. v4.1-rc4-22-g92cf211874e9 merges this into preempt.h, and on rhel7 kernels we don't need this include anymore either. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	430960ef3c	page_cache_release() is removed. put_page() instead. Even in 3.x, this already was equivalent. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	7006a84d96	flush_work_sync is equivalent to flush_work. v3.15-rc1-6-g1a56f2aa4752 removes flush_work_sync entirely, but ever since v3.6-rc1-25-g606a5020b9bd which made all workqueues non-reentrant, it has been equivalent to flush_work. This is safe because in all cases only one server->work can be in flight at a time. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	eafb8621da	d_materialise_unique replaced with d_splice_alias. Note argument order reversal. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	006555d42a	READ_ONCE() replaces ACCESS_ONCE() v3.18-rc3-2-g230fa253df63 forces us to remove ACCESS_ONCE() with READ_ONCE(), but it is probably the better interface and works with non-scalar types. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	8e458f9230	PAGE_CACHE_SIZE was removed, replace with PAGE_SIZE. PAGE_CACHE_SIZE was previously defined to be equivalent to PAGE_SIZE. This symbol was removed in v4.6-rc1-32-g1fa64f198b9f. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Auke Kok	32c0dbce09	Include kernel.h and fs.h at the top of kernelcompat.h Because we `-include src/kernelcompat.h` from the command line, this header gets included before any of the kernel includes in most .c and .h files. We should at least make sure we pull in <fs> and <kernel> since they're required. Signed-off-by: Auke Kok <auke.kok@versity.com>	2023-10-09 15:35:40 -04:00
Zach Brown	9c9ba651bd	Merge pull request #141 from versity/zab/fence-reclaim-racey-seq-test Remove seq test from fence-and-reclaim	2023-10-09 12:21:48 -07:00
Zach Brown	14eddb6420	Remove seq test from fence-and-reclaim The fence-and-reclaim test has a little function that runs after fencing and recovery to make sure that all the mounts are operational again. The main thing it does is re-use the same locks across a lot of files to ensure that lock recovery didn't lose any locks that stop forward progress. But I also threw in a test of the committed_seq machinery, as a bit of belt and suspenders. The problem is the test is racey. It samples the seq after the write so the greatest seq it rememebers can be after the write and will not be committed by the other nodes reads. It being less than the committed_seq is a totally reasonable race. Which explains why this test has been rarely failing since it was written. There's no particular reason to test the committed_seq machinery here, so we can just remove that racey test. Signed-off-by: Zach Brown <zab@versity.com>	2023-10-09 10:56:15 -07:00
Zach Brown	597208324d	Merge pull request #140 from versity/zab/v1.16 v1.16 Release	2023-10-04 11:51:45 -07:00
Zach Brown	8596c9ad45	v1.16 Release Finish the release notes for the 1.16 release. Signed-off-by: Zach Brown <zab@versity.com>	2023-10-04 10:32:55 -07:00
Zach Brown	8a705ea380	Merge pull request #139 from versity/zab/hold_commit_stuck Start server commits when holds wait for alloc	2023-10-04 10:27:12 -07:00
Zach Brown	4784ccdfd5	Start server commits when holds wait for alloc Server code that wants to dirty blocks by holding a commit won't be allowed to until the current allocators for the server transaction have enough space for the holder. As an active holder applies the commit the allocators are refilled and the waiting holders will proceed. But the current allocators can have no resources as the server starts up. There will never be active holders to apply the commit and refill the allocators. In this case all the holders will block indefinitely. The fix is to trigger a server commit when a holder doesn't have room. It used to be that commits were only triggered when apply callers were waiting. We transfer some of that logic into a new 'committing' field so that we can have commits in flight without apply callers waiting. We add it to the server commit tracing. While we're at it we clean up the logic that tests if a hold can proceed. It used to be confusingly split across two functions that both could sample the current allocator space remaining. This could lead to weird cases where the first holder could use the second alloc remaining call, not the one whose values were tested to see if the holder could fit. Now each hold check only samples the allocators once. And finally we fix a subtle case where the budget exceeded message can spuriously trigger in the case where dirtying the freed list created a new empty block after the holder recorded the amount of space in the freed block. Signed-off-by: Zach Brown <zab@versity.com>	2023-10-03 13:32:09 -07:00