diff --git a/kmod/src/alloc.c b/kmod/src/alloc.c
index d556112e..ac9601b8 100644
--- a/kmod/src/alloc.c
+++ b/kmod/src/alloc.c
@@ -676,6 +676,14 @@ int scoutfs_dalloc_return_cached(struct super_block *sb,
  *
  * Unlike meta allocations, the caller is expected to serialize
  * allocations from the root.
+ *
+ * ENOBUFS is returned if the data allocator ran out of space and we can
+ * probably refill it from the server.  The caller is expected to back
+ * out, commit the transaction, and try again.
+ *
+ * ENOSPC is returned if the data allocator ran out of space but we have
+ * a flag from the server telling us that there's no more space
+ * available.  This is a hard error and should be returned.
  */
 int scoutfs_alloc_data(struct super_block *sb, struct scoutfs_alloc *alloc,
 		       struct scoutfs_block_writer *wri,
@@ -724,13 +732,13 @@ int scoutfs_alloc_data(struct super_block *sb, struct scoutfs_alloc *alloc,
 	ret = 0;
 out:
 	if (ret < 0) {
-		/*
-		 * Special retval meaning there wasn't space to alloc from
-		 * this txn. Doesn't mean filesystem is completely full.
-		 * Maybe upper layers want to try again.
-		 */
-		if (ret == -ENOENT)
-			ret = -ENOBUFS;
+		if (ret == -ENOENT) {
+			if (le32_to_cpu(dalloc->root.flags) & SCOUTFS_ALLOC_FLAG_LOW)
+				ret = -ENOSPC;
+			else
+				ret = -ENOBUFS;
+		}
+
 		*blkno_ret = 0;
 		*count_ret = 0;
 	} else {
@@ -1261,6 +1269,20 @@ bool scoutfs_alloc_meta_low(struct super_block *sb,
 	return lo;
 }
 
+bool scoutfs_alloc_test_flag(struct super_block *sb,
+			    struct scoutfs_alloc *alloc, u32 flag)
+{
+	unsigned int seq;
+	bool set;
+
+	do {
+		seq = read_seqbegin(&alloc->seqlock);
+		set = !!(le32_to_cpu(alloc->avail.flags) & flag);
+	} while (read_seqretry(&alloc->seqlock, seq));
+
+	return set;
+}
+
 /*
  * Call the callers callback for every persistent allocator structure
  * we can find.
diff --git a/kmod/src/alloc.h b/kmod/src/alloc.h
index 9130d086..5a95d98c 100644
--- a/kmod/src/alloc.h
+++ b/kmod/src/alloc.h
@@ -38,6 +38,10 @@
 #define SCOUTFS_ALLOC_DATA_LG_THRESH \
 	(8ULL * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
 
+/* the client will force commits if data allocators get too low */
+#define SCOUTFS_ALLOC_DATA_REFILL_THRESH \
+	((256ULL * 1024 * 1024) >> SCOUTFS_BLOCK_SM_SHIFT)
+
 /*
  * Fill client alloc roots to the target when they fall below the lo
  * threshold.
@@ -55,6 +59,7 @@
 #define SCOUTFS_SERVER_DATA_FILL_LO \
 	(1ULL * 1024 * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
 
+
 /*
  * Log merge meta allocations are only used for one request and will
  * never use more than the dirty limit.
@@ -65,16 +70,6 @@
 	((SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT >> SCOUTFS_BLOCK_LG_SHIFT) + 4)
 #define SCOUTFS_SERVER_MERGE_FILL_LO		SCOUTFS_SERVER_MERGE_FILL_TARGET
 
-/*
- * Each of the server meta_alloc roots will try to keep a minimum amount
- * of free blocks.  The server will swap roots when its current avail
- * falls below the threshold while the freed root is still above it.  It
- * must have room for all the largest allocation attempted in a
- * transaction on the server.
- */
-#define SCOUTFS_SERVER_META_ALLOC_MIN \
-	(SCOUTFS_SERVER_META_FILL_TARGET * 2)
-
 /*
  * A run-time use of a pair of persistent avail/freed roots as a
  * metadata allocator.  It has the machinery needed to lock and avoid
@@ -157,6 +152,8 @@ int scoutfs_alloc_splice_list(struct super_block *sb,
 
 bool scoutfs_alloc_meta_low(struct super_block *sb,
 			    struct scoutfs_alloc *alloc, u32 nr);
+bool scoutfs_alloc_test_flag(struct super_block *sb,
+			    struct scoutfs_alloc *alloc, u32 flag);
 
 typedef int (*scoutfs_alloc_foreach_cb_t)(struct super_block *sb, void *arg,
 					  int owner, u64 id,
diff --git a/kmod/src/data.c b/kmod/src/data.c
index caf26657..4d710496 100644
--- a/kmod/src/data.c
+++ b/kmod/src/data.c
@@ -312,10 +312,9 @@ int scoutfs_data_truncate_items(struct super_block *sb, struct inode *inode,
 
 	while (iblock <= last) {
 		if (inode)
-			ret = scoutfs_inode_index_lock_hold(inode, &ind_locks,
-							    true);
+			ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false);
 		else
-			ret = scoutfs_hold_trans(sb);
+			ret = scoutfs_hold_trans(sb, false);
 		if (ret)
 			break;
 
@@ -756,8 +755,7 @@ retry:
 		ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
 		      scoutfs_inode_index_prepare(sb, &wbd->ind_locks, inode,
 						  true) ?:
-		      scoutfs_inode_index_try_lock_hold(sb, &wbd->ind_locks,
-							ind_seq);
+		      scoutfs_inode_index_try_lock_hold(sb, &wbd->ind_locks, ind_seq, true);
 	} while (ret > 0);
 	if (ret < 0)
 		goto out;
@@ -1010,7 +1008,7 @@ long scoutfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 
 	while(iblock <= last) {
 
-		ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false);
+		ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false, true);
 		if (ret)
 			goto out;
 
@@ -1086,7 +1084,7 @@ int scoutfs_data_init_offline_extent(struct inode *inode, u64 size,
 	}
 
 	/* we're updating meta_seq with offline block count */
-	ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false);
+	ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false, true);
 	if (ret < 0)
 		goto out;
 
@@ -1238,7 +1236,7 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
 		ret = scoutfs_inode_index_start(sb, &seq) ?:
 		      scoutfs_inode_index_prepare(sb, &locks, from, true) ?:
 		      scoutfs_inode_index_prepare(sb, &locks, to, true) ?:
-		      scoutfs_inode_index_try_lock_hold(sb, &locks, seq);
+		      scoutfs_inode_index_try_lock_hold(sb, &locks, seq, false);
 		if (ret > 0)
 			continue;
 		if (ret < 0)
@@ -1844,13 +1842,17 @@ int scoutfs_data_prepare_commit(struct super_block *sb)
 	return ret;
 }
 
-u64 scoutfs_data_alloc_free_bytes(struct super_block *sb)
+/*
+ * Return true if the data allocator is lower than the caller's
+ * requirement and we haven't been told by the server that we're out of
+ * free extents.
+ */
+bool scoutfs_data_alloc_should_refill(struct super_block *sb, u64 blocks)
 {
 	DECLARE_DATA_INFO(sb, datinf);
 
-	return scoutfs_dalloc_total_len(&datinf->dalloc) <<
-		SCOUTFS_BLOCK_SM_SHIFT;
-
+	return (scoutfs_dalloc_total_len(&datinf->dalloc) < blocks) &&
+	       !(le32_to_cpu(datinf->dalloc.root.flags) & SCOUTFS_ALLOC_FLAG_LOW);
 }
 
 int scoutfs_data_setup(struct super_block *sb)
diff --git a/kmod/src/data.h b/kmod/src/data.h
index 4f51a8c2..064564f6 100644
--- a/kmod/src/data.h
+++ b/kmod/src/data.h
@@ -86,7 +86,7 @@ void scoutfs_data_init_btrees(struct super_block *sb,
 void scoutfs_data_get_btrees(struct super_block *sb,
 			     struct scoutfs_log_trees *lt);
 int scoutfs_data_prepare_commit(struct super_block *sb);
-u64 scoutfs_data_alloc_free_bytes(struct super_block *sb);
+bool scoutfs_data_alloc_should_refill(struct super_block *sb, u64 blocks);
 
 int scoutfs_data_setup(struct super_block *sb);
 void scoutfs_data_destroy(struct super_block *sb);
diff --git a/kmod/src/dir.c b/kmod/src/dir.c
index 79a276da..c6eb331d 100644
--- a/kmod/src/dir.c
+++ b/kmod/src/dir.c
@@ -712,7 +712,7 @@ retry:
 	ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
 	      scoutfs_inode_index_prepare(sb, ind_locks, dir, true) ?:
 	      scoutfs_inode_index_prepare_ino(sb, ind_locks, ino, mode) ?:
-	      scoutfs_inode_index_try_lock_hold(sb, ind_locks, ind_seq);
+	      scoutfs_inode_index_try_lock_hold(sb, ind_locks, ind_seq, true);
 	if (ret > 0)
 		goto retry;
 	if (ret)
@@ -869,7 +869,7 @@ retry:
 	ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
 	      scoutfs_inode_index_prepare(sb, &ind_locks, dir, false) ?:
 	      scoutfs_inode_index_prepare(sb, &ind_locks, inode, false) ?:
-	      scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq);
+	      scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq, true);
 	if (ret > 0)
 		goto retry;
 	if (ret)
@@ -969,7 +969,7 @@ retry:
 	ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
 	      scoutfs_inode_index_prepare(sb, &ind_locks, dir, false) ?:
 	      scoutfs_inode_index_prepare(sb, &ind_locks, inode, false) ?:
-	      scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq);
+	      scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq, false);
 	if (ret > 0)
 		goto retry;
 	if (ret)
@@ -1641,7 +1641,7 @@ retry:
 	       scoutfs_inode_index_prepare(sb, &ind_locks, new_dir, false)) ?:
 	      (new_inode == NULL ? 0 :
 	       scoutfs_inode_index_prepare(sb, &ind_locks, new_inode, false)) ?:
-	      scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq);
+	      scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq, true);
 	if (ret > 0)
 		goto retry;
 	if (ret)
diff --git a/kmod/src/format.h b/kmod/src/format.h
index 654da558..fb6c1f4f 100644
--- a/kmod/src/format.h
+++ b/kmod/src/format.h
@@ -286,9 +286,10 @@ struct scoutfs_alloc_list_head {
 	struct scoutfs_block_ref ref;
 	__le64 total_nr;
 	__le32 first_nr;
-	__u8 __pad[4];
+	__le32 flags;
 };
 
+
 /*
  * While the main allocator uses extent items in btree blocks, metadata
  * allocations for a single transaction are recorded in arrays in
@@ -317,9 +318,14 @@ struct scoutfs_alloc_list_block {
  */
 struct scoutfs_alloc_root {
 	__le64 total_len;
+	__le32 flags;
+	__le32 _pad;
 	struct scoutfs_btree_root root;
 };
 
+/* Shared by _alloc_list_head and _alloc_root */
+#define SCOUTFS_ALLOC_FLAG_LOW	(1U << 0)
+
 /* types of allocators, exposed to alloc_detail ioctl */
 #define SCOUTFS_ALLOC_OWNER_NONE	0
 #define SCOUTFS_ALLOC_OWNER_SERVER	1
diff --git a/kmod/src/inode.c b/kmod/src/inode.c
index 3911b74e..cfabc332 100644
--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -358,7 +358,7 @@ static int set_inode_size(struct inode *inode, struct scoutfs_lock *lock,
 	if (!S_ISREG(inode->i_mode))
 		return 0;
 
-	ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true);
+	ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, true, false);
 	if (ret)
 		return ret;
 
@@ -385,7 +385,7 @@ static int clear_truncate_flag(struct inode *inode, struct scoutfs_lock *lock)
 	LIST_HEAD(ind_locks);
 	int ret;
 
-	ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false);
+	ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false, false);
 	if (ret)
 		return ret;
 
@@ -500,7 +500,7 @@ retry:
 		}
 	}
 
-	ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false);
+	ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, false, false);
 	if (ret)
 		goto out;
 
@@ -1213,7 +1213,7 @@ int scoutfs_inode_index_start(struct super_block *sb, u64 *seq)
  * Returns > 0 if the seq changed and the locks should be retried.
  */
 int scoutfs_inode_index_try_lock_hold(struct super_block *sb,
-				      struct list_head *list, u64 seq)
+				      struct list_head *list, u64 seq, bool allocing)
 {
 	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
 	struct index_lock *ind_lock;
@@ -1229,7 +1229,7 @@ int scoutfs_inode_index_try_lock_hold(struct super_block *sb,
 			goto out;
 	}
 
-	ret = scoutfs_hold_trans(sb);
+	ret = scoutfs_hold_trans(sb, allocing);
 	if (ret == 0 && seq != sbi->trans_seq) {
 		scoutfs_release_trans(sb);
 		ret = 1;
@@ -1243,7 +1243,7 @@ out:
 }
 
 int scoutfs_inode_index_lock_hold(struct inode *inode, struct list_head *list,
-				  bool set_data_seq)
+				  bool set_data_seq, bool allocing)
 {
 	struct super_block *sb = inode->i_sb;
 	int ret;
@@ -1253,7 +1253,7 @@ int scoutfs_inode_index_lock_hold(struct inode *inode, struct list_head *list,
 		ret = scoutfs_inode_index_start(sb, &seq) ?:
 		      scoutfs_inode_index_prepare(sb, list, inode,
 						  set_data_seq) ?:
-		      scoutfs_inode_index_try_lock_hold(sb, list, seq);
+		      scoutfs_inode_index_try_lock_hold(sb, list, seq, allocing);
 	} while (ret > 0);
 
 	return ret;
@@ -1533,7 +1533,7 @@ static int delete_inode_items(struct super_block *sb, u64 ino, struct scoutfs_lo
 retry:
 	ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
 	      prepare_index_deletion(sb, &ind_locks, ino, mode, &sinode) ?:
-	      scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq);
+	      scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq, false);
 	if (ret > 0)
 		goto retry;
 	if (ret)
diff --git a/kmod/src/inode.h b/kmod/src/inode.h
index 805eb237..7cb61b57 100644
--- a/kmod/src/inode.h
+++ b/kmod/src/inode.h
@@ -88,9 +88,9 @@ int scoutfs_inode_index_prepare_ino(struct super_block *sb,
 				    struct list_head *list, u64 ino,
 				    umode_t mode);
 int scoutfs_inode_index_try_lock_hold(struct super_block *sb,
-				      struct list_head *list, u64 seq);
+				      struct list_head *list, u64 seq, bool allocing);
 int scoutfs_inode_index_lock_hold(struct inode *inode, struct list_head *list,
-				  bool set_data_seq);
+				  bool set_data_seq, bool allocing);
 void scoutfs_inode_index_unlock(struct super_block *sb, struct list_head *list);
 
 int scoutfs_dirty_inode_item(struct inode *inode, struct scoutfs_lock *lock);
diff --git a/kmod/src/ioctl.c b/kmod/src/ioctl.c
index b323b9a1..cb3f4a4e 100644
--- a/kmod/src/ioctl.c
+++ b/kmod/src/ioctl.c
@@ -38,6 +38,7 @@
 #include "hash.h"
 #include "srch.h"
 #include "alloc.h"
+#include "server.h"
 #include "scoutfs_trace.h"
 
 /*
@@ -674,7 +675,7 @@ static long scoutfs_ioc_setattr_more(struct file *file, unsigned long arg)
 
 	/* setting only so we don't see 0 data seq with nonzero data_version */
 	set_data_seq = sm.data_version != 0 ? true : false;
-	ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, set_data_seq);
+	ret = scoutfs_inode_index_lock_hold(inode, &ind_locks, set_data_seq, false);
 	if (ret)
 		goto unlock;
 
@@ -879,6 +880,7 @@ static long scoutfs_ioc_statfs_more(struct file *file, unsigned long arg)
 	sfm.rid = sbi->rid;
 	sfm.total_meta_blocks = le64_to_cpu(super->total_meta_blocks);
 	sfm.total_data_blocks = le64_to_cpu(super->total_data_blocks);
+	sfm.reserved_meta_blocks = scoutfs_server_reserved_meta_blocks(sb);
 
 	ret = scoutfs_client_get_last_seq(sb, &sfm.committed_seq);
 	if (ret)
diff --git a/kmod/src/ioctl.h b/kmod/src/ioctl.h
index 5042edfe..446611e9 100644
--- a/kmod/src/ioctl.h
+++ b/kmod/src/ioctl.h
@@ -371,6 +371,7 @@ struct scoutfs_ioctl_statfs_more {
 	__u64 committed_seq;
 	__u64 total_meta_blocks;
 	__u64 total_data_blocks;
+	__u64 reserved_meta_blocks;
 };
 
 #define SCOUTFS_IOC_STATFS_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 10, \
diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h
index bc9c4797..b92471fd 100644
--- a/kmod/src/scoutfs_trace.h
+++ b/kmod/src/scoutfs_trace.h
@@ -424,14 +424,15 @@ TRACE_EVENT(scoutfs_trans_write_func,
 );
 
 DECLARE_EVENT_CLASS(scoutfs_trans_hold_release_class,
-	TP_PROTO(struct super_block *sb, void *journal_info, int holders),
+	TP_PROTO(struct super_block *sb, void *journal_info, int holders, int ret),
 
-	TP_ARGS(sb, journal_info, holders),
+	TP_ARGS(sb, journal_info, holders, ret),
 
 	TP_STRUCT__entry(
 		SCSB_TRACE_FIELDS
 		__field(unsigned long, journal_info)
 		__field(int, holders)
+		__field(int, ret)
 	),
 
 	TP_fast_assign(
@@ -440,17 +441,17 @@ DECLARE_EVENT_CLASS(scoutfs_trans_hold_release_class,
 		__entry->holders = holders;
 	),
 
-	TP_printk(SCSBF" journal_info 0x%0lx holders %d",
-		  SCSB_TRACE_ARGS, __entry->journal_info, __entry->holders)
+	TP_printk(SCSBF" journal_info 0x%0lx holders %d ret %d",
+		  SCSB_TRACE_ARGS, __entry->journal_info, __entry->holders, __entry->ret)
 );
 
-DEFINE_EVENT(scoutfs_trans_hold_release_class, scoutfs_trans_acquired_hold,
-	TP_PROTO(struct super_block *sb, void *journal_info, int holders),
-	TP_ARGS(sb, journal_info, holders)
+DEFINE_EVENT(scoutfs_trans_hold_release_class, scoutfs_hold_trans,
+	TP_PROTO(struct super_block *sb, void *journal_info, int holders, int ret),
+	TP_ARGS(sb, journal_info, holders, ret)
 );
 DEFINE_EVENT(scoutfs_trans_hold_release_class, scoutfs_release_trans,
-	TP_PROTO(struct super_block *sb, void *journal_info, int holders),
-	TP_ARGS(sb, journal_info, holders)
+	TP_PROTO(struct super_block *sb, void *journal_info, int holders, int ret),
+	TP_ARGS(sb, journal_info, holders, ret)
 );
 
 TRACE_EVENT(scoutfs_ioc_release,
diff --git a/kmod/src/server.c b/kmod/src/server.c
index 9e8307b8..0bcabc45 100644
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -323,6 +323,7 @@ static void scoutfs_server_commit_func(struct work_struct *work)
 	struct commit_waiter *cw;
 	struct commit_waiter *pos;
 	struct llist_node *node;
+	u64 reserved;
 	int ret;
 
 	trace_scoutfs_server_commit_work_enter(sb, 0, 0);
@@ -387,11 +388,17 @@ static void scoutfs_server_commit_func(struct work_struct *work)
 	server->other_avail = &super->server_meta_avail[server->other_ind];
 	server->other_freed = &super->server_meta_freed[server->other_ind];
 
-	/* swap avail/free if avail gets low and freed is high */
-	if (le64_to_cpu(server->meta_avail->total_len) <=
-	    SCOUTFS_SERVER_META_ALLOC_MIN &&
-	    le64_to_cpu(server->meta_freed->total_len) >
-	    SCOUTFS_SERVER_META_ALLOC_MIN)
+	/*
+	 * The reserved metadata blocks includes the max size of
+	 * outstanding allocators and a server transaction could be
+	 * asked to refill all those allocators from meta_avail.  If our
+	 * meta_avail falls below the reserved count, and freed is still
+	 * above it, then swap so that we don't start returning enospc
+	 * until we're truly low.
+	 */
+	reserved = scoutfs_server_reserved_meta_blocks(sb);
+	if (le64_to_cpu(server->meta_avail->total_len) <= reserved &&
+	    le64_to_cpu(server->meta_freed->total_len) > reserved)
 		swap(server->meta_avail, server->meta_freed);
 
 	ret = 0;
@@ -479,6 +486,57 @@ static int alloc_move_empty(struct super_block *sb,
 				  dst, src, le64_to_cpu(src->total_len), NULL, NULL, 0);
 }
 
+/*
+ * Copy on write transactions need to allocate new dirty blocks as they
+ * make modifications to delete items and eventually free more blocks.
+ * The reserved blocks are meant to keep enough available blocks in
+ * flight to allow servers and clients to perform transactions that
+ * don't consume additional space.  We have quite a few allocators in
+ * flight across the server and various client mechanisms (posix items,
+ * srch compaction, and log merging).  We also want to include
+ * sufficient blocks for client log btrees to grow tall enough to be
+ * finalized and merges.
+ *
+ * The reserved blocks calculation is a policy of the server but it's
+ * exposed to the statfs_more interface so that df isn't misleading.
+ * Requiring this synchronization without explicit protocol
+ * communication isn't great.
+ */
+u64 scoutfs_server_reserved_meta_blocks(struct super_block *sb)
+{
+	DECLARE_SERVER_INFO(sb, server);
+	u64 server_blocks;
+	u64 client_blocks;
+	u64 log_blocks;
+	u64 nr_clients;
+
+	/* server has two meta_avail lists it swaps between */
+	server_blocks = SCOUTFS_SERVER_META_FILL_TARGET * 2;
+
+	/*
+	 * Log trees will be compacted once they hit a height of 3.
+	 * That'll be the grandparent, two parents resulting from a
+	 * split, and all their child blocks (roughly calculated,
+	 * overestimating).
+	 */
+	log_blocks = 3 + (SCOUTFS_BLOCK_LG_SIZE /
+		          (sizeof(struct scoutfs_btree_item) + sizeof(struct scoutfs_block_ref)));
+
+	/*
+	 * Each client can have a meta_avail list, srch compaction
+	 * request, log merge request, and a log btree it's building.
+	 */
+	client_blocks = SCOUTFS_SERVER_META_FILL_TARGET + SCOUTFS_SERVER_META_FILL_TARGET +
+			SCOUTFS_SERVER_MERGE_FILL_TARGET + log_blocks;
+
+	/* we should reserve for voting majority, too */
+	spin_lock(&server->lock);
+	nr_clients = server->nr_clients;
+	spin_unlock(&server->lock);
+
+	return server_blocks + (max(1ULL, nr_clients) * client_blocks);
+}
+
 /*
  * Set all the bits in the destination which overlap with the extent.
  */
@@ -662,6 +720,7 @@ static int server_get_log_trees(struct super_block *sb,
 	struct scoutfs_log_trees lt;
 	struct scoutfs_key key;
 	bool have_fin = false;
+	bool unlock_alloc = false;
 	u64 data_zone_blocks;
 	u64 nr;
 	int ret;
@@ -701,8 +760,15 @@ static int server_get_log_trees(struct super_block *sb,
 		lt.nr = cpu_to_le64(nr);
 	}
 
-	/* finalize an existing root when large enough and don't have one */
-	if (lt.item_root.height > 2 && !have_fin) {
+	/*
+	 * Finalize the client log btree when it has enough leaf blocks
+	 * to allow some degree of merging concurrency.  Smaller btrees
+	 * are also finalized when meta was low so that deleted items
+	 * are merged promptly and freed blocks can bring the client out
+	 * of enospc.
+	 */
+	if (!have_fin && ((lt.item_root.height > 2) ||
+		          (le32_to_cpu(lt.meta_avail.flags) & SCOUTFS_ALLOC_FLAG_LOW))) {
 		fin = lt;
 		memset(&fin.meta_avail, 0, sizeof(fin.meta_avail));
 		memset(&fin.meta_freed, 0, sizeof(fin.meta_freed));
@@ -734,24 +800,45 @@ static int server_get_log_trees(struct super_block *sb,
 		data_zone_blocks = 0;
 	}
 
-	/* return freed to server for emptying, refill avail  */
+	/*
+	 * Reclaim the freed meta and data allocators and refill the
+	 * avail allocators, setting low flags if they drop too low.
+	 */
 	mutex_lock(&server->alloc_mutex);
-	ret = scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri,
-					server->other_freed,
+	unlock_alloc = true;
+
+	ret = scoutfs_alloc_splice_list(sb, &server->alloc, &server->wri, server->other_freed,
 					&lt.meta_freed) ?:
-	      alloc_move_empty(sb, &super->data_alloc, &lt.data_freed) ?:
-	      scoutfs_alloc_fill_list(sb, &server->alloc, &server->wri,
-				      &lt.meta_avail, server->meta_avail,
-				      SCOUTFS_SERVER_META_FILL_LO,
-				      SCOUTFS_SERVER_META_FILL_TARGET) ?:
-	      alloc_move_refill_zoned(sb, &lt.data_avail, &super->data_alloc,
-				      SCOUTFS_SERVER_DATA_FILL_LO,
-				      SCOUTFS_SERVER_DATA_FILL_TARGET,
-				      exclusive, vacant, data_zone_blocks);
-	mutex_unlock(&server->alloc_mutex);
+	      alloc_move_empty(sb, &super->data_alloc, &lt.data_freed);
 	if (ret < 0)
 		goto unlock;
 
+	ret = scoutfs_alloc_fill_list(sb, &server->alloc, &server->wri,
+				      &lt.meta_avail, server->meta_avail,
+				      SCOUTFS_SERVER_META_FILL_LO,
+				      SCOUTFS_SERVER_META_FILL_TARGET);
+	if (ret < 0)
+		goto unlock;
+
+	if (le64_to_cpu(server->meta_avail->total_len) <= scoutfs_server_reserved_meta_blocks(sb))
+		lt.meta_avail.flags |= cpu_to_le32(SCOUTFS_ALLOC_FLAG_LOW);
+	else
+		lt.meta_avail.flags &= ~cpu_to_le32(SCOUTFS_ALLOC_FLAG_LOW);
+
+	ret = alloc_move_refill_zoned(sb, &lt.data_avail, &super->data_alloc,
+				      SCOUTFS_SERVER_DATA_FILL_LO, SCOUTFS_SERVER_DATA_FILL_TARGET,
+				      exclusive, vacant, data_zone_blocks);
+	if (ret < 0)
+		goto unlock;
+
+	if (le64_to_cpu(lt.data_avail.total_len) < SCOUTFS_SERVER_DATA_FILL_LO)
+		lt.data_avail.flags |= cpu_to_le32(SCOUTFS_ALLOC_FLAG_LOW);
+	else
+		lt.data_avail.flags &= ~cpu_to_le32(SCOUTFS_ALLOC_FLAG_LOW);
+
+	mutex_unlock(&server->alloc_mutex);
+	unlock_alloc = false;
+
 	/* record data alloc zone bits */
 	zero_data_alloc_zone_bits(&lt);
 	if (data_zone_blocks != 0) {
@@ -772,6 +859,8 @@ static int server_get_log_trees(struct super_block *sb,
 	ret = scoutfs_btree_force(sb, &server->alloc, &server->wri,
 				  &super->logs_root, &key, &lt, sizeof(lt));
 unlock:
+	if (unlock_alloc)
+		mutex_unlock(&server->alloc_mutex);
 	mutex_unlock(&server->logs_mutex);
 
 	ret = scoutfs_server_apply_commit(sb, ret);
diff --git a/kmod/src/server.h b/kmod/src/server.h
index 79fcb443..41b808e7 100644
--- a/kmod/src/server.h
+++ b/kmod/src/server.h
@@ -56,6 +56,8 @@ do {								\
 	__entry->name##_data_len, __entry->name##_cmd, __entry->name##_flags, \
 	__entry->name##_error
 
+u64 scoutfs_server_reserved_meta_blocks(struct super_block *sb);
+
 int scoutfs_server_lock_request(struct super_block *sb, u64 rid,
 				struct scoutfs_net_lock *nl);
 int scoutfs_server_lock_response(struct super_block *sb, u64 rid, u64 id,
diff --git a/kmod/src/trans.c b/kmod/src/trans.c
index 07eea0fa..9417e39a 100644
--- a/kmod/src/trans.c
+++ b/kmod/src/trans.c
@@ -436,8 +436,8 @@ static bool commit_before_hold(struct super_block *sb, struct trans_info *tri)
 		return true;
 	}
 
-	/* Try to refill data allocator before premature enospc */
-	if (scoutfs_data_alloc_free_bytes(sb) <= SCOUTFS_TRANS_DATA_ALLOC_LWM) {
+	/* if we're low and can't refill then alloc could empty and return enospc */
+	if (scoutfs_data_alloc_should_refill(sb, SCOUTFS_ALLOC_DATA_REFILL_THRESH)) {
 		scoutfs_inc_counter(sb, trans_commit_data_alloc_low);
 		return true;
 	}
@@ -445,38 +445,15 @@ static bool commit_before_hold(struct super_block *sb, struct trans_info *tri)
 	return false;
 }
 
-static bool acquired_hold(struct super_block *sb)
+/*
+ * called as a wait_event condition, needs to be careful to not change
+ * task state and is racing with waking paths that sub_return, test, and
+ * wake.
+ */
+static bool holders_no_writer(struct trans_info *tri)
 {
-	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
-	DECLARE_TRANS_INFO(sb, tri);
-	bool acquired;
-
-	/* if a caller already has a hold we acquire unconditionally */
-	if (inc_journal_info_holders()) {
-		atomic_inc(&tri->holders);
-		acquired = true;
-		goto out;
-	}
-
-	/* wait if the writer is blocking holds */
-	if (!inc_holders_unless_writer(tri)) {
-		dec_journal_info_holders();
-		acquired = false;
-		goto out;
-	}
-
-	/* wait if we're triggering another commit */
-	if (commit_before_hold(sb, tri)) {
-		release_holders(sb);
-		queue_trans_work(sbi);
-		acquired = false;
-		goto out;
-	}
-
-	trace_scoutfs_trans_acquired_hold(sb, current->journal_info, atomic_read(&tri->holders));
-	acquired = true;
-out:
-	return acquired;
+	smp_mb(); /* make sure task in wait_event queue before atomic read */
+	return !(atomic_read(&tri->holders) & TRANS_HOLDERS_WRITE_FUNC_BIT);
 }
 
 /*
@@ -492,15 +469,64 @@ out:
  * The writing thread marks itself as a global trans_task which
  * short-circuits all the hold machinery so it can call code that would
  * otherwise try to hold transactions while it is writing.
+ *
+ * If the caller is adding metadata items that will eventually consume
+ * free space -- not dirtying existing items or adding deletion items --
+ * then we can return enospc if our metadata allocator indicates that
+ * we're low on space.
  */
-int scoutfs_hold_trans(struct super_block *sb)
+int scoutfs_hold_trans(struct super_block *sb, bool allocing)
 {
 	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+	DECLARE_TRANS_INFO(sb, tri);
+	u64 seq;
+	int ret;
 
 	if (current == sbi->trans_task)
 		return 0;
 
-	return wait_event_interruptible(sbi->trans_hold_wq, acquired_hold(sb));
+	for (;;) {
+		/* if a caller already has a hold we acquire unconditionally */
+		if (inc_journal_info_holders()) {
+			atomic_inc(&tri->holders);
+			ret = 0;
+			break;
+		}
+
+		/* wait until the writer work is finished */
+		if (!inc_holders_unless_writer(tri)) {
+			dec_journal_info_holders();
+			ret = wait_event_interruptible(sbi->trans_hold_wq, holders_no_writer(tri));
+			if (ret < 0)
+				break;
+			continue;
+		}
+
+		/* return enospc if server is into reserved blocks and we're allocating */
+		if (allocing && scoutfs_alloc_test_flag(sb, &tri->alloc, SCOUTFS_ALLOC_FLAG_LOW)) {
+			release_holders(sb);
+			ret = -ENOSPC;
+			break;
+		}
+
+		/* see if we need to trigger and wait for a commit before holding */
+		if (commit_before_hold(sb, tri)) {
+			seq = scoutfs_trans_sample_seq(sb);
+			release_holders(sb);
+			queue_trans_work(sbi);
+			ret = wait_event_interruptible(sbi->trans_hold_wq,
+						       scoutfs_trans_sample_seq(sb) != seq);
+			if (ret < 0)
+				break;
+			continue;
+		}
+
+		ret = 0;
+		break;
+	}
+
+	trace_scoutfs_hold_trans(sb, current->journal_info, atomic_read(&tri->holders), ret);
+	return ret;
 }
 
 /*
@@ -525,7 +551,7 @@ void scoutfs_release_trans(struct super_block *sb)
 
 	release_holders(sb);
 
-	trace_scoutfs_release_trans(sb, current->journal_info, atomic_read(&tri->holders));
+	trace_scoutfs_release_trans(sb, current->journal_info, atomic_read(&tri->holders), 0);
 }
 
 /*
diff --git a/kmod/src/trans.h b/kmod/src/trans.h
index ab42a4cf..51ae1232 100644
--- a/kmod/src/trans.h
+++ b/kmod/src/trans.h
@@ -1,18 +1,13 @@
 #ifndef _SCOUTFS_TRANS_H_
 #define _SCOUTFS_TRANS_H_
 
-/* the server will attempt to fill data allocs for each trans */
-#define SCOUTFS_TRANS_DATA_ALLOC_HWM	(2ULL * 1024 * 1024 * 1024)
-/* the client will force commits if data allocators get too low */
-#define SCOUTFS_TRANS_DATA_ALLOC_LWM	(256ULL * 1024 * 1024)
-
 void scoutfs_trans_write_func(struct work_struct *work);
 int scoutfs_trans_sync(struct super_block *sb, int wait);
 int scoutfs_file_fsync(struct file *file, loff_t start, loff_t end,
 		       int datasync);
 void scoutfs_trans_restart_sync_deadline(struct super_block *sb);
 
-int scoutfs_hold_trans(struct super_block *sb);
+int scoutfs_hold_trans(struct super_block *sb, bool allocing);
 bool scoutfs_trans_held(void);
 void scoutfs_release_trans(struct super_block *sb);
 u64 scoutfs_trans_sample_seq(struct super_block *sb);
diff --git a/kmod/src/xattr.c b/kmod/src/xattr.c
index 6c00c0c5..fd8acd8e 100644
--- a/kmod/src/xattr.c
+++ b/kmod/src/xattr.c
@@ -577,7 +577,7 @@ static int scoutfs_xattr_set(struct dentry *dentry, const char *name,
 retry:
 	ret = scoutfs_inode_index_start(sb, &ind_seq) ?:
 	      scoutfs_inode_index_prepare(sb, &ind_locks, inode, false) ?:
-	      scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq);
+	      scoutfs_inode_index_try_lock_hold(sb, &ind_locks, ind_seq, true);
 	if (ret > 0)
 		goto retry;
 	if (ret)
@@ -778,7 +778,7 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
 					     &tgs) != 0)
 			memset(&tgs, 0, sizeof(tgs));
 
-		ret = scoutfs_hold_trans(sb);
+		ret = scoutfs_hold_trans(sb, false);
 		if (ret < 0)
 			break;
 		release = true;
diff --git a/tests/golden/enospc b/tests/golden/enospc
new file mode 100644
index 00000000..150e5cf9
--- /dev/null
+++ b/tests/golden/enospc
@@ -0,0 +1,8 @@
+== prepare directories and files
+== fallocate until enospc
+== remove all the files and verify free data blocks
+== make small meta fs
+== create large xattrs until we fill up metadata
+== remove files with xattrs after enospc
+== make sure we can create again
+== cleanup small meta fs
diff --git a/tests/sequence b/tests/sequence
index b39ac824..b97e4847 100644
--- a/tests/sequence
+++ b/tests/sequence
@@ -7,6 +7,7 @@ simple-release-extents.sh
 setattr_more.sh
 offline-extent-waiting.sh
 move-blocks.sh
+enospc.sh
 srch-basic-functionality.sh
 simple-xattr-unit.sh
 lock-refleak.sh
diff --git a/tests/tests/enospc.sh b/tests/tests/enospc.sh
new file mode 100644
index 00000000..ab042479
--- /dev/null
+++ b/tests/tests/enospc.sh
@@ -0,0 +1,100 @@
+#
+# test hititng enospc by filling with data or metadata and
+# then recovering by removing what we filled.
+#
+
+#    Type  Size     Total   Used      Free  Use%  
+#MetaData  64KB   1048576  32782   1015794     3  
+#    Data   4KB  16777152      0  16777152     0  
+free_blocks() {
+	local md="$1"
+	local mnt="$2"
+	scoutfs df -p "$mnt" | awk '($1 == "'$md'") { print $5; exit }'
+}
+
+t_require_commands scoutfs stat fallocate createmany
+
+echo "== prepare directories and files"
+for n in $(t_fs_nrs); do
+	eval path="\$T_D${n}/dir-$n/file-$n"
+	mkdir -p $(dirname $path)
+	touch $path
+done
+sync
+
+echo "== fallocate until enospc"
+before=$(free_blocks Data "$T_M0")
+finished=0
+while [ $finished != 1 ]; do
+	for n in $(t_fs_nrs); do
+		eval path="\$T_D${n}/dir-$n/file-$n"
+		off=$(stat -c "%s" "$path")
+
+		LC_ALL=C fallocate -o $off -l 128MiB  "$path" > $T_TMP.fallocate 2>&1
+		err="$?"
+
+		if grep -qi "no space" $T_TMP.fallocate; then
+			finished=1
+			break
+		fi
+		if [ "$err" != "0" ]; then
+			t_fail "fallocate failed with $err"
+		fi
+	done
+done
+
+echo "== remove all the files and verify free data blocks"
+for n in $(t_fs_nrs); do
+	eval dir="\$T_D${n}/dir-$n"
+	rm -rf "$dir"
+done
+sync
+after=$(free_blocks Data "$T_M0")
+# nothing else should be modifying data blocks
+test "$before" == "$after" || \
+	t_fail "$after free data blocks after rm, expected $before"
+
+# XXX this is all pretty manual, would be nice to have helpers
+echo "== make small meta fs"
+# meta device just big enough for reserves and the metadata we'll fill
+scoutfs mkfs -A -f -Q 0,127.0.0.1,53000 -m 10G "$T_EX_META_DEV" "$T_EX_DATA_DEV" > $T_TMP.mkfs.out 2>&1 || \
+	t_fail "mkfs failed"
+SCR="/mnt/scoutfs.enospc"
+mkdir -p "$SCR"
+mount -t scoutfs -o metadev_path=$T_EX_META_DEV,quorum_slot_nr=0 \
+	"$T_EX_DATA_DEV" "$SCR"
+
+echo "== create large xattrs until we fill up metadata"
+mkdir -p "$SCR/xattrs"
+
+for f in $(seq 1 100000); do
+	file="$SCR/xattrs/file-$f"
+	touch "$file"
+
+	LC_ALL=C create_xattr_loop -c 1000 -n user.scoutfs-enospc -p "$file" -s 65535 > $T_TMP.cxl 2>&1
+	err="$?"
+
+	if grep -qi "no space" $T_TMP.cxl; then
+		echo "enospc at f $f" >> $T_TMP.cxl
+		break
+	fi
+	if [ "$err" != "0" ]; then
+		t_fail "create_xattr_loop failed with $err"
+	fi
+done
+
+echo "== remove files with xattrs after enospc"
+rm -rf "$SCR/xattrs"
+
+echo "== make sure we can create again"
+file="$SCR/file-after"
+touch $file
+setfattr -n user.scoutfs-enospc -v 1 "$file"
+sync
+rm -f "$file"
+
+echo "== cleanup small meta fs"
+umount "$SCR"
+rmdir "$SCR"
+
+t_pass
diff --git a/utils/man/scoutfs.8 b/utils/man/scoutfs.8
index 09062fb0..abf815dd 100644
--- a/utils/man/scoutfs.8
+++ b/utils/man/scoutfs.8
@@ -36,6 +36,11 @@ A path within a ScoutFS filesystem.
 .sp
 Initialize a new ScoutFS filesystem on the target devices. Since ScoutFS uses
 separate block devices for its metadata and data storage, two are required.
+The internal structures and nature of metadata and data transactions
+lead to minimum viable device sizes.  
+.B mkfs
+will check both devices and fail with an error if either are under the
+minimum size. 
 .sp
 If
 .B --force
diff --git a/utils/src/df.c b/utils/src/df.c
index 21ea9f04..585d658c 100644
--- a/utils/src/df.c
+++ b/utils/src/df.c
@@ -86,6 +86,11 @@ static int do_df(struct df_args *args)
 			data_free += ade[i].blocks;
 	}
 
+	if (meta_free >= sfm.reserved_meta_blocks)
+		meta_free -= sfm.reserved_meta_blocks;
+	else
+		meta_free = 0;
+
 	snprintf(cells[0][0], CHARS, "Type");
 	snprintf(cells[0][1], CHARS, "Size");
 	snprintf(cells[0][2], CHARS, "Total");
diff --git a/utils/src/mkfs.c b/utils/src/mkfs.c
index bcf07357..0abc9086 100644
--- a/utils/src/mkfs.c
+++ b/utils/src/mkfs.c
@@ -215,12 +215,14 @@ static int do_mkfs(struct mkfs_args *args)
 		goto out;
 	}
 
-	ret = device_size(args->meta_device, meta_fd, 2ULL * (1024 * 1024 * 1024),
+	/* minumum meta device size to make reserved blocks reasonably large */
+	ret = device_size(args->meta_device, meta_fd, 64ULL * (1024 * 1024 * 1024),
 			  args->max_meta_size, "meta", &meta_size);
 	if (ret)
 		goto out;
 
-	ret = device_size(args->data_device, data_fd, 8ULL * (1024 * 1024 * 1024),
+	/* .. then arbitrarily the same minimum data device size */
+	ret = device_size(args->data_device, data_fd, 64ULL * (1024 * 1024 * 1024),
 			  args->max_data_size, "data", &data_size);
 	if (ret)
 		goto out;
diff --git a/utils/src/print.c b/utils/src/print.c
index f4b51277..4c79a5fb 100644
--- a/utils/src/print.c
+++ b/utils/src/print.c
@@ -245,15 +245,15 @@ static int print_logs_item(struct scoutfs_key *key, void *val,
 	le64_to_cpu((p)->blkno), le64_to_cpu((p)->seq)
 
 #define AL_HEAD_F \
-	AL_REF_F" total_nr %llu first_nr %u"
+	AL_REF_F" total_nr %llu first_nr %u flags 0x%x"
 #define AL_HEAD_A(p)					\
 	AL_REF_A(&(p)->ref), le64_to_cpu((p)->total_nr),\
-	le32_to_cpu((p)->first_nr)
+	le32_to_cpu((p)->first_nr), le32_to_cpu((p)->flags)
 
 #define ALCROOT_F \
-	BTROOT_F" total_len %llu"
+	BTROOT_F" total_len %llu flags 0x%x"
 #define ALCROOT_A(ar) \
-	BTROOT_A(&(ar)->root), le64_to_cpu((ar)->total_len)
+	BTROOT_A(&(ar)->root), le64_to_cpu((ar)->total_len), le32_to_cpu((ar)->flags)
 
 #define SRE_FMT "%016llx.%llu.%llu"
 #define SRE_A(sre)						\