diff --git a/kmod/src/alloc.c b/kmod/src/alloc.c
index 92276395..d556112e 100644
--- a/kmod/src/alloc.c
+++ b/kmod/src/alloc.c
@@ -1272,9 +1272,15 @@ int scoutfs_alloc_foreach(struct super_block *sb,
 	struct scoutfs_block_ref refs[2] = {{0,}};
 	struct scoutfs_super_block *super = NULL;
 	struct scoutfs_srch_compact *sc;
+	struct scoutfs_log_merge_request *lmreq;
+	struct scoutfs_log_merge_complete *lmcomp;
 	struct scoutfs_log_trees lt;
 	SCOUTFS_BTREE_ITEM_REF(iref);
 	struct scoutfs_key key;
+	int expected;
+	u64 avail_tot;
+	u64 freed_tot;
+	u64 id;
 	int ret;
 
 	super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
@@ -1381,6 +1387,57 @@ retry:
 		scoutfs_key_inc(&key);
 	}
 
+	/* log merge allocators */
+	memset(&key, 0, sizeof(key));
+	key.sk_zone = SCOUTFS_LOG_MERGE_REQUEST_ZONE;
+	expected = sizeof(*lmreq);
+	id = 0;
+	avail_tot = 0;
+	freed_tot = 0;
+
+	for (;;) {
+		ret = scoutfs_btree_next(sb, &super->log_merge, &key, &iref);
+		if (ret == 0) {
+			if (iref.key->sk_zone != key.sk_zone) {
+				ret = -ENOENT;
+			} else if (iref.val_len == expected) {
+				key = *iref.key;
+				if (key.sk_zone == SCOUTFS_LOG_MERGE_REQUEST_ZONE) {
+					lmreq = iref.val;
+					id = le64_to_cpu(lmreq->rid);
+					avail_tot = le64_to_cpu(lmreq->meta_avail.total_nr);
+					freed_tot = le64_to_cpu(lmreq->meta_freed.total_nr);
+				} else {
+					lmcomp = iref.val;
+					id = le64_to_cpu(lmcomp->rid);
+					avail_tot = le64_to_cpu(lmcomp->meta_avail.total_nr);
+					freed_tot = le64_to_cpu(lmcomp->meta_freed.total_nr);
+				}
+			} else {
+				ret = -EIO;
+			}
+			scoutfs_btree_put_iref(&iref);
+		}
+		if (ret == -ENOENT) {
+			if (key.sk_zone == SCOUTFS_LOG_MERGE_REQUEST_ZONE) {
+				memset(&key, 0, sizeof(key));
+				key.sk_zone = SCOUTFS_LOG_MERGE_COMPLETE_ZONE;
+				expected = sizeof(*lmcomp);
+				continue;
+			}
+			break;
+		}
+		if (ret < 0)
+			goto out;
+
+		ret = cb(sb, arg, SCOUTFS_ALLOC_OWNER_LOG_MERGE, id, true, true, avail_tot) ?:
+		      cb(sb, arg, SCOUTFS_ALLOC_OWNER_LOG_MERGE, id, true, false, freed_tot);
+		if (ret < 0)
+			goto out;
+
+		scoutfs_key_inc(&key);
+	}
+
 	ret = 0;
 out:
 	if (ret == -ESTALE) {
diff --git a/kmod/src/alloc.h b/kmod/src/alloc.h
index 1e245c5f..9130d086 100644
--- a/kmod/src/alloc.h
+++ b/kmod/src/alloc.h
@@ -55,6 +55,16 @@
 #define SCOUTFS_SERVER_DATA_FILL_LO \
 	(1ULL * 1024 * 1024 * 1024 >> SCOUTFS_BLOCK_SM_SHIFT)
 
+/*
+ * Log merge meta allocations are only used for one request and will
+ * never use more than the dirty limit.
+ */
+#define SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT	(64ULL * 1024 * 1024)
+/* a few extra blocks for alloc blocks */
+#define SCOUTFS_SERVER_MERGE_FILL_TARGET	\
+	((SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT >> SCOUTFS_BLOCK_LG_SHIFT) + 4)
+#define SCOUTFS_SERVER_MERGE_FILL_LO		SCOUTFS_SERVER_MERGE_FILL_TARGET
+
 /*
  * Each of the server meta_alloc roots will try to keep a minimum amount
  * of free blocks.  The server will swap roots when its current avail
diff --git a/kmod/src/btree.c b/kmod/src/btree.c
index b9b02696..46989385 100644
--- a/kmod/src/btree.c
+++ b/kmod/src/btree.c
@@ -83,6 +83,10 @@ enum btree_walk_flags {
 	 BTW_ALLOC	= (1 <<  3), /* allocate a new block for 0 ref, requires dirty */
 	 BTW_INSERT	= (1 <<  4), /* walking to insert, try splitting */
 	 BTW_DELETE	= (1 <<  5), /* walking to delete, try joining */
+	 BTW_PAR_RNG	= (1 <<  6), /* return range through final parent */
+	 BTW_GET_PAR	= (1 <<  7), /* get reference to final parent */
+	 BTW_SET_PAR	= (1 <<  8), /* override reference to final parent */
+	 BTW_SUBTREE	= (1 <<  9), /* root is parent subtree, return -ERANGE if split/join */
 };
 
 /* total length of the value payload */
@@ -104,16 +108,22 @@ static inline unsigned int item_bytes(struct scoutfs_btree_item *item)
 }
 
 /*
- * Join blocks when they both are 1/4 full.  This puts some distance
- * between the join threshold and the full threshold for splitting.
- * Blocks that just split or joined need to undergo a reasonable amount
- * of item modification before they'll split or join again.
+ * Refill blocks from their siblings when they're under 1/4 full.  This
+ * puts some distance between the join threshold and the full threshold
+ * for splitting.  Blocks that just split or joined need to undergo a
+ * reasonable amount of item modification before they'll split or join
+ * again.
  */
 static unsigned int join_low_watermark(void)
 {
 	return (SCOUTFS_BLOCK_LG_SIZE - sizeof(struct scoutfs_btree_block)) / 4;
 }
 
+static bool total_above_join_low_water(struct scoutfs_btree_block *bt)
+{
+	return le16_to_cpu(bt->total_item_bytes) >= join_low_watermark();
+}
+
 /*
  * return the integer percentages of total space the block could have
  * consumed by items that is currently consumed.
@@ -512,6 +522,7 @@ static void create_item(struct scoutfs_btree_block *bt,
 
 	item->val_off = insert_value(bt, ptr_off(bt, item), val, val_len);
 	item->val_len = cpu_to_le16(val_len);
+	memset(item->__pad, 0, sizeof(item->__pad));
 
 	le16_add_cpu(&bt->total_item_bytes, item_bytes(item));
 }
@@ -805,12 +816,13 @@ static int try_join(struct super_block *sb,
 	struct scoutfs_btree_block *sib;
 	struct scoutfs_block *sib_bl;
 	struct scoutfs_block_ref *ref;
+	const unsigned int lwm = join_low_watermark();
 	unsigned int sib_tot;
 	bool move_right;
 	int to_move;
 	int ret;
 
-	if (le16_to_cpu(bt->total_item_bytes) >= join_low_watermark())
+	if (total_above_join_low_water(bt))
 		return 0;
 
 	scoutfs_inc_counter(sb, btree_join);
@@ -830,18 +842,23 @@ static int try_join(struct super_block *sb,
 		return ret;
 	sib = sib_bl->data;
 
-	sib_tot = le16_to_cpu(bt->total_item_bytes);
-	if (sib_tot < join_low_watermark())
+	/* combine if resulting block would be up to 75% full, move big chunk otherwise */
+	sib_tot = le16_to_cpu(sib->total_item_bytes);
+	if (sib_tot <= lwm * 2)
 		to_move = sib_tot;
 	else
-		to_move = sib_tot - join_low_watermark();
+		to_move = lwm;
 
-	if (le16_to_cpu(bt->mid_free_len) < to_move) {
+	/* compact to make room for over-estimate of worst case move overrun */
+	if (le16_to_cpu(bt->mid_free_len) <
+	    (to_move + item_len_bytes(SCOUTFS_BTREE_MAX_VAL_LEN))) {
 		ret = compact_values(sb, bt);
-		if (ret < 0)
+		if (ret < 0) {
 			scoutfs_block_put(sb, sib_bl);
-		return ret;
+			return ret;
+		}
 	}
+
 	move_items(bt, sib, move_right, to_move);
 
 	/* update our parent's item */
@@ -904,20 +921,21 @@ static bool bad_avl_node_off(__le16 node_off, int nr)
  *  - call after leaf modification
  *  - padding is zero
  */
-static void verify_btree_block(struct super_block *sb,
+__attribute__((unused))
+static void verify_btree_block(struct super_block *sb, char *str,
 			       struct scoutfs_btree_block *bt, int level,
-			       struct scoutfs_key *start,
+			       bool last_ref, struct scoutfs_key *start,
 			       struct scoutfs_key *end)
 {
 	__le16 *buckets = leaf_item_hash_buckets(bt);
 	struct scoutfs_btree_item *item;
+	struct scoutfs_avl_node *node;
 	char *reason = NULL;
 	int first_val = 0;
 	int hashed = 0;
 	int end_off;
 	int tot = 0;
 	int i = 0;
-	int j = 0;
 	int nr;
 
 	if (bt->level != level) {
@@ -956,8 +974,9 @@ static void verify_btree_block(struct super_block *sb,
 			goto out;
 		}
 
-		for (j = 0; j < sizeof(item->__pad); j++) {
-			WARN_ON_ONCE(item->__pad[j] != 0);
+		if (memchr_inv(item->__pad, '\0', sizeof(item->__pad))) {
+			reason = "item struct __pad isn't zero";
+			goto out;
 		}
 
 		if (scoutfs_key_compare(&item->key, start) < 0 ||
@@ -972,19 +991,29 @@ static void verify_btree_block(struct super_block *sb,
 			goto out;
 		}
 
+		if (level > 0 && le16_to_cpu(item->val_len) !=
+				 sizeof(struct scoutfs_block_ref)) {
+			reason = "parent item val not sizeof ref";
+			goto out;
+		}
+
 		if (le16_to_cpu(item->val_len) > SCOUTFS_BTREE_MAX_VAL_LEN) {
 			reason = "bad item val len";
 			goto out;
 		}
 
+		if (le16_to_cpu(item->val_off) % SCOUTFS_BTREE_VALUE_ALIGN) {
+			reason = "item value not aligned";
+			goto out;
+		}
+
 		if (((int)le16_to_cpu(item->val_off) +
 		     le16_to_cpu(item->val_len)) > end_off) {
 			reason = "item value outside valid";
 			goto out;
 		}
 
-		tot += sizeof(struct scoutfs_btree_item) +
-		       le16_to_cpu(item->val_len);
+		tot += item_len_bytes(le16_to_cpu(item->val_len));
 
 		if (item->val_len != 0) {
 			first_val = min_t(int, first_val,
@@ -992,6 +1021,15 @@ static void verify_btree_block(struct super_block *sb,
 		}
 	}
 
+	if (last_ref && level > 0 &&
+	    (node = scoutfs_avl_last(&bt->item_root)) != NULL) {
+		item = node_item(node);
+		if (scoutfs_key_compare(&item->key, end) != 0) {
+			reason = "final ref item key not range end";
+			goto out;
+		}
+	}
+
 	for (i = 0; level == 0 && i < SCOUTFS_BTREE_LEAF_ITEM_HASH_NR; i++) {
 		if (buckets[i] == 0)
 			continue;
@@ -1024,17 +1062,18 @@ out:
 	if (!reason)
 		return;
 
-	printk("found btree block inconsistency: %s\n", reason);
-	printk("start "SK_FMT" end "SK_FMT"\n", SK_ARG(start), SK_ARG(end));
+	printk("verifying btree %s: %s\n", str, reason);
+	printk("args: level %u last_ref %u start "SK_FMT" end "SK_FMT"\n",
+		level, last_ref, SK_ARG(start), SK_ARG(end));
 	printk("calced: i %u tot %u hashed %u fv %u\n",
 	       i, tot, hashed, first_val);
 
-	printk("hdr: crc %x magic %x fsid %llx seq %llx blkno %llu\n", 
+	printk("bt hdr: crc %x magic %x fsid %llx seq %llx blkno %llu\n", 
 		le32_to_cpu(bt->hdr.crc), le32_to_cpu(bt->hdr.magic),
 		le64_to_cpu(bt->hdr.fsid), le64_to_cpu(bt->hdr.seq),
 		le64_to_cpu(bt->hdr.blkno));
 	printk("item_root: node %u\n", le16_to_cpu(bt->item_root.node));
-	printk("nr %u tib %u mfl %u lvl %u\n",
+	printk("bt: nr %u tib %u mfl %u lvl %u\n",
 		le16_to_cpu(bt->nr_items), le16_to_cpu(bt->total_item_bytes),
 		le16_to_cpu(bt->mid_free_len), bt->level);
 
@@ -1051,6 +1090,92 @@ out:
 	BUG();
 }
 
+/*
+ * Walk from the root to the leaf, verifying the blocks traversed.
+ */
+__attribute__((unused))
+static void verify_btree_walk(struct super_block *sb, char *str,
+			      struct scoutfs_btree_root *root,
+			      struct scoutfs_key *key)
+{
+	struct scoutfs_avl_node *next_node;
+	struct scoutfs_avl_node *node;
+	struct scoutfs_btree_item *item;
+	struct scoutfs_btree_item *prev;
+	struct scoutfs_block *bl = NULL;
+	struct scoutfs_btree_block *bt;
+	struct scoutfs_block_ref ref;
+	struct scoutfs_key start;
+	struct scoutfs_key end;
+	bool last_ref;
+	int level;
+	int ret;
+
+	if (root->height == 0 && root->ref.blkno != 0) {
+		WARN_ONCE(1, "invalid btree root height %u blkno %llu seq %016llx\n",
+			root->height, le64_to_cpu(root->ref.blkno),
+			le64_to_cpu(root->ref.seq));
+		return;
+	}
+
+	if (root->height == 0)
+		return;
+
+	scoutfs_key_set_zeros(&start);
+	scoutfs_key_set_ones(&end);
+	level = root->height;
+	ref = root->ref;
+	/* first parent last ref isn't all ones in subtrees */
+	last_ref = false;
+
+	while(level-- > 0) {
+		scoutfs_block_put(sb, bl);
+		bl = NULL;
+		ret = get_ref_block(sb, NULL, NULL, 0, &ref, &bl);
+		if (ret) {
+			printk("verifying  btree %s: read error %d\n",
+			       str, ret);
+			break;
+		}
+		bt = bl->data;
+
+		verify_btree_block(sb, str, bt, level, last_ref, &start, &end);
+
+		if (level == 0)
+			break;
+
+		node = scoutfs_avl_search(&bt->item_root, cmp_key_item, key,
+					  NULL, NULL, &next_node, NULL);
+		item = node_item(node ?: next_node);
+
+		if (item == NULL) {
+			printk("verifying btree %s: no ref item\n", str);
+			printk("root: height %u blkno %llu seq %016llx\n",
+			       root->height, le64_to_cpu(root->ref.blkno),
+			       le64_to_cpu(root->ref.seq));
+			printk("walk level %u start "SK_FMT" end "SK_FMT"\n",
+				level, SK_ARG(&start), SK_ARG(&end));
+
+			printk("block: level %u blkno %llu seq %016llx\n",
+			       bt->level, le64_to_cpu(bt->hdr.blkno),
+			       le64_to_cpu(bt->hdr.seq));
+			printk("key: "SK_FMT"\n", SK_ARG(key));
+			BUG();
+		}
+
+		if ((prev = prev_item(bt, item))) {
+			start = *item_key(prev);
+			scoutfs_key_inc(&start);
+		}
+		end = *item_key(item);
+
+		memcpy(&ref, item_val(bt, item), sizeof(ref));
+		last_ref = !next_item(bt, item);
+	}
+
+	scoutfs_block_put(sb, bl);
+}
+
 struct btree_walk_key_range {
 	struct scoutfs_key start;
 	struct scoutfs_key end;
@@ -1082,7 +1207,8 @@ static int btree_walk(struct super_block *sb,
 		      int flags, struct scoutfs_key *key,
 		      unsigned int val_len,
 		      struct scoutfs_block **bl_ret,
-		      struct btree_walk_key_range *kr)
+		      struct btree_walk_key_range *kr,
+		      struct scoutfs_btree_root *par_root)
 {
 	struct scoutfs_block *par_bl = NULL;
 	struct scoutfs_block *bl = NULL;
@@ -1098,7 +1224,9 @@ static int btree_walk(struct super_block *sb,
 	unsigned int nr;
 	int ret;
 
-	if (WARN_ON_ONCE((flags & BTW_DIRTY) && (!alloc || !wri)))
+	if (WARN_ON_ONCE((flags & BTW_DIRTY) && (!alloc || !wri)) ||
+	    WARN_ON_ONCE((flags & BTW_PAR_RNG) && !kr) ||
+	    WARN_ON_ONCE((flags & (BTW_GET_PAR|BTW_SET_PAR)) && !par_root))
 		return -EINVAL;
 
 	/* all ops come through walk and walk calls all reads */
@@ -1125,7 +1253,14 @@ restart:
 	ret = 0;
 
 	if (!root->height) {
-		if (!(flags & BTW_INSERT)) {
+		if (flags & BTW_GET_PAR) {
+			memset(par_root, 0, sizeof(*par_root));
+			*root = *par_root;
+			ret = 0;
+		} else if (flags & BTW_SET_PAR) {
+			*root = *par_root;
+			ret = 0;
+		} else if (!(flags & BTW_INSERT)) {
 			ret = -ENOENT;
 		} else {
 			ret = get_ref_block(sb, alloc, wri, BTW_ALLOC | BTW_DIRTY, &root->ref, &bl);
@@ -1144,14 +1279,40 @@ restart:
 
 		trace_scoutfs_btree_walk(sb, root, key, flags, level, ref);
 
+		/* par range set by ref to last parent block */
+		if (level < 2 && (flags & BTW_PAR_RNG)) {
+			ret = 0;
+			break;
+		}
+
+		if (level < 2 && (flags & BTW_GET_PAR)) {
+			par_root->ref = *ref;
+			par_root->height = level + 1;
+			ret = 0;
+			break;
+		}
+
+		if (level < 2 && (flags & BTW_SET_PAR)) {
+			if (ref == &root->ref) {
+				/* single parent block is replaced, can shrink/grow */
+				*root = *par_root;
+			} else {
+				/* subtree replacing one of parents must match height */
+				if (par_root->height != level + 1) {
+					ret = -EINVAL;
+					break;
+				}
+				*ref = par_root->ref;
+			}
+			ret = 0;
+			break;
+		}
+
 		ret = get_ref_block(sb, alloc, wri, flags, ref, &bl);
 		if (ret)
 			break;
 		bt = bl->data;
 
-		if (0 && kr)
-			verify_btree_block(sb, bt, level, &kr->start, &kr->end);
-
 		/* XXX more aggressive block verification, before ref updates? */
 		if (bt->level != level) {
 			scoutfs_corruption(sb, SC_BTREE_BLOCK_LEVEL,
@@ -1167,6 +1328,17 @@ restart:
 			break;
 		}
 
+		/*
+		 * join/split won't check subtree parent root, let
+		 * caller know when it needs to be split/join.
+		 */
+		if ((flags & BTW_SUBTREE) && level == 1 &&
+		    (!total_above_join_low_water(bt) ||
+		     !mid_free_item_room(bt, sizeof(struct scoutfs_block_ref)))) {
+			ret = -ERANGE;
+			break;
+		}
+
 		/*
 		 * Splitting and joining can add or remove parents or
 		 * change the parent item we use to reach the child
@@ -1292,7 +1464,7 @@ int scoutfs_btree_lookup(struct super_block *sb,
 	if (WARN_ON_ONCE(iref->key))
 		return -EINVAL;
 
-	ret = btree_walk(sb, NULL, NULL, root, 0, key, 0, &bl, NULL);
+	ret = btree_walk(sb, NULL, NULL, root, 0, key, 0, &bl, NULL, NULL);
 	if (ret == 0) {
 		bt = bl->data;
 
@@ -1344,7 +1516,7 @@ int scoutfs_btree_insert(struct super_block *sb,
 		return -EINVAL;
 
 	ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY | BTW_INSERT, key,
-			 val_len, &bl, NULL);
+			 val_len, &bl, NULL, NULL);
 	if (ret == 0) {
 		bt = bl->data;
 
@@ -1406,7 +1578,7 @@ int scoutfs_btree_update(struct super_block *sb,
 		return -EINVAL;
 
 	ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY | BTW_INSERT, key,
-			 val_len, &bl, NULL);
+			 val_len, &bl, NULL, NULL);
 	if (ret == 0) {
 		bt = bl->data;
 
@@ -1448,7 +1620,7 @@ int scoutfs_btree_force(struct super_block *sb,
 		return -EINVAL;
 
 	ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY | BTW_INSERT, key,
-			 val_len, &bl, NULL);
+			 val_len, &bl, NULL, NULL);
 	if (ret == 0) {
 		bt = bl->data;
 
@@ -1486,7 +1658,7 @@ int scoutfs_btree_delete(struct super_block *sb,
 	scoutfs_inc_counter(sb, btree_delete);
 
 	ret = btree_walk(sb, alloc, wri, root, BTW_DELETE | BTW_DIRTY, key,
-			 0, &bl, NULL);
+			 0, &bl, NULL, NULL);
 	if (ret == 0) {
 		bt = bl->data;
 
@@ -1550,7 +1722,7 @@ static int btree_iter(struct super_block *sb,struct scoutfs_btree_root *root,
 
 	for (;;) {
 		ret = btree_walk(sb, NULL, NULL, root, flags, &walk_key,
-				 0, &bl, &kr);
+				 0, &bl, &kr, NULL);
 		if (ret < 0)
 			break;
 		bt = bl->data;
@@ -1623,7 +1795,8 @@ int scoutfs_btree_dirty(struct super_block *sb,
 
 	scoutfs_inc_counter(sb, btree_dirty);
 
-	ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY, key, 0, &bl, NULL);
+	ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY, key, 0, &bl,
+			 NULL, NULL);
 	if (ret == 0) {
 		bt = bl->data;
 
@@ -1659,7 +1832,7 @@ int scoutfs_btree_read_items(struct super_block *sb,
 	struct scoutfs_block *bl;
 	int ret;
 
-	ret = btree_walk(sb, NULL, NULL, root, 0, key, 0, &bl, &kr);
+	ret = btree_walk(sb, NULL, NULL, root, 0, key, 0, &bl, &kr, NULL);
 	if (ret < 0)
 		goto out;
 	bt = bl->data;
@@ -1714,7 +1887,7 @@ int scoutfs_btree_insert_list(struct super_block *sb,
 
 	while (lst) {
 		ret = btree_walk(sb, alloc, wri, root, BTW_DIRTY | BTW_INSERT,
-				 &lst->key, lst->val_len, &bl, &kr);
+				 &lst->key, lst->val_len, &bl, &kr, NULL);
 		if (ret < 0)
 			goto out;
 		bt = bl->data;
@@ -1742,3 +1915,542 @@ int scoutfs_btree_insert_list(struct super_block *sb,
 out:
 	return ret;
 }
+
+/*
+ * Descend towards the leaf that would contain the key.  As we arrive at
+ * the last parent block, set start and end to the range of keys that
+ * could be found through traversal of that last parent.
+ *
+ * If the tree is too short for parent blocks then the max key range
+ * is returned.
+ */
+int scoutfs_btree_parent_range(struct super_block *sb,
+			       struct scoutfs_btree_root *root,
+			       struct scoutfs_key *key,
+			       struct scoutfs_key *start,
+			       struct scoutfs_key *end)
+{
+	struct btree_walk_key_range kr;
+	int ret;
+
+	ret = btree_walk(sb, NULL, NULL, root, BTW_PAR_RNG, key, 0, NULL,
+			 &kr, NULL);
+	if (ret == -ENOENT)
+		ret = 0;
+
+	*start = kr.start;
+	*end = kr.end;
+	return ret;
+}
+
+/*
+ * Initialize the caller's root as a subtree whose ref points to the
+ * last parent found as we traverse towards the leaf containing the key.
+ * If the tree is too small to have multiple blocks at the final parent
+ * level then the caller's root will be initialized to equal full input
+ * root.  If the tree is empty then the par root will also be empty.
+ */
+int scoutfs_btree_get_parent(struct super_block *sb,
+			     struct scoutfs_btree_root *root,
+			     struct scoutfs_key *key,
+			     struct scoutfs_btree_root *par_root)
+{
+	return btree_walk(sb, NULL, NULL, root, BTW_GET_PAR, key, 0, NULL,
+			  NULL, par_root);
+}
+
+/*
+ * Dirty a path towards the leaf block containing the key.  As we reach
+ * the reference to the final parent block override it with the ref in
+ * the caller's block.  If the tree only has a single block at the final
+ * parent level, or a single leaf block, then the entire tree is
+ * replaced with the caller's root.
+ *
+ * This manages allocs and frees while dirtying blocks in the path to
+ * the ref, but it doesn't account for allocating the blocks that are
+ * referenced by the ref nor freeing blocks referenced by the old ref
+ * that's overwritten.  Keeping allocators in sync with the result of
+ * the ref override is the responsibility of the caller.
+ */
+int scoutfs_btree_set_parent(struct super_block *sb,
+			     struct scoutfs_alloc *alloc,
+			     struct scoutfs_block_writer *wri,
+			     struct scoutfs_btree_root *root,
+			     struct scoutfs_key *key,
+			     struct scoutfs_btree_root *par_root)
+{
+
+	trace_scoutfs_btree_set_parent(sb, root, key, par_root);
+
+	return btree_walk(sb, alloc, wri, root, BTW_DIRTY | BTW_SET_PAR,
+			  key, 0, NULL, NULL, par_root);
+}
+
+/*
+ * Descend to the leaf, making sure that all the blocks conform to the
+ * balance constraints.  Blocks below the low threshold will be joined.
+ * This is called to split blocks that were too large for insertions,
+ * but those insertions were in a distant context and we don't bother
+ * communicating the val_len back here.  We just try to insert a max
+ * value.
+ *
+ * This always dirties all the way to the leaf.  It could be made more
+ * efficient with more btree walk flags to walk and check for blocks
+ * that need balancing, and then walks that don't dirty unless they need
+ * to join/split.
+ */
+int scoutfs_btree_rebalance(struct super_block *sb,
+			    struct scoutfs_alloc *alloc,
+			    struct scoutfs_block_writer *wri,
+			    struct scoutfs_btree_root *root,
+			    struct scoutfs_key *key)
+{
+	return btree_walk(sb, alloc, wri, root,
+			  BTW_DIRTY | BTW_INSERT | BTW_DELETE,
+			  key, SCOUTFS_BTREE_MAX_VAL_LEN, NULL, NULL, NULL);
+}
+
+struct merge_pos {
+	struct rb_node node;
+	struct scoutfs_btree_root *root;
+	struct scoutfs_key key;
+	unsigned int val_len;
+	u8 val[SCOUTFS_BTREE_MAX_VAL_LEN];
+};
+
+/*
+ * Find the next item in the mpos's root after its key and make sure
+ * that it's in its sorted position in the rbtree.  We're responsible
+ * for freeing the mpos if we don't put it back in the pos_root.  This
+ * happens naturally naturally when its item_root has no more items to
+ * merge.
+ */
+static int reset_mpos(struct super_block *sb, struct rb_root *pos_root,
+		      struct merge_pos *mpos, struct scoutfs_key *end,
+		      scoutfs_btree_merge_cmp_t merge_cmp)
+{
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct merge_pos *walk;
+	struct rb_node *parent;
+	struct rb_node **node;
+	int key_cmp;
+	int val_cmp;
+	int ret;
+
+restart:
+	if (!RB_EMPTY_NODE(&mpos->node)) {
+		rb_erase(&mpos->node, pos_root);
+		RB_CLEAR_NODE(&mpos->node);
+	}
+
+	/* find the next item in the root within end */
+	ret = scoutfs_btree_next(sb, mpos->root, &mpos->key, &iref);
+	if (ret == 0) {
+		if (scoutfs_key_compare(iref.key, end) > 0) {
+			ret = -ENOENT;
+		} else {
+			mpos->key = *iref.key;
+			mpos->val_len = iref.val_len;
+			memcpy(mpos->val, iref.val, iref.val_len);
+		}
+		scoutfs_btree_put_iref(&iref);
+	}
+	if (ret < 0) {
+		kfree(mpos);
+		if (ret == -ENOENT)
+			ret = 0;
+		goto out;
+	}
+
+rewalk:
+	/* sort merge items by key then oldest to newest */
+	node = &pos_root->rb_node;
+	parent = NULL;
+	while (*node) {
+		parent = *node;
+		walk = container_of(*node, struct merge_pos, node);
+
+		key_cmp = scoutfs_key_compare(&mpos->key, &walk->key);
+		val_cmp = merge_cmp(mpos->val, mpos->val_len,
+				    walk->val, walk->val_len);
+
+		/* drop old versions of logged keys as we discover them */
+		if (key_cmp == 0) {
+			scoutfs_inc_counter(sb, btree_merge_drop_old);
+			if (val_cmp < 0)  {
+				scoutfs_key_inc(&mpos->key);
+				goto restart;
+			} else {
+				BUG_ON(val_cmp == 0);
+				rb_erase(&walk->node, pos_root);
+				kfree(walk);
+				goto rewalk;
+			}
+		}
+
+		if ((key_cmp ?: val_cmp) < 0)
+			node = &(*node)->rb_left;
+		else
+			node = &(*node)->rb_right;
+	}
+
+	rb_link_node(&mpos->node, parent, node);
+	rb_insert_color(&mpos->node, pos_root);
+	ret = 0;
+out:
+	return ret;
+}
+
+static struct merge_pos *first_mpos(struct rb_root *root)
+{
+	struct rb_node *node = rb_first(root);
+	if (node)
+		 return container_of(node, struct merge_pos, node);
+	return NULL;
+}
+
+/*
+ * Merge items from a number of read-only input roots into a writable
+ * destination root.  The order of the input roots doesn't matter, the
+ * items are merged in sorted key order.
+ *
+ * The merge_cmp callback determines the order that the input items are
+ * merged in.  The is_del callback determines if a merging item should
+ * be removed from the destination.
+ *
+ * subtree indicates that the destination root is in fact one of many
+ * parent blocks and shouldn't be split or allowed to fall below the
+ * join low water mark.
+ *
+ * drop_val indicates the initial length of the value that should be
+ * dropped when merging items into destination items.
+ *
+ * -ERANGE is returned if the merge doesn't fully exhaust the range, due
+ * to allocators running low or needing to join/split the parent.
+ * *next_ret is set to the next key which hasn't been merged so that the
+ * caller can retry with a new allocator and subtree.
+ */
+int scoutfs_btree_merge(struct super_block *sb,
+			struct scoutfs_alloc *alloc,
+			struct scoutfs_block_writer *wri,
+			struct scoutfs_key *start,
+			struct scoutfs_key *end,
+			struct scoutfs_key *next_ret,
+			struct scoutfs_btree_root *root,
+			struct list_head *inputs,
+			scoutfs_btree_merge_cmp_t merge_cmp,
+			scoutfs_btree_merge_is_del_t merge_is_del, bool subtree,
+			int drop_val, int dirty_limit, int alloc_low)
+{
+	struct scoutfs_btree_root_head *rhead;
+	struct rb_root pos_root = RB_ROOT;
+	struct scoutfs_btree_item *item;
+	struct scoutfs_btree_block *bt;
+	struct scoutfs_block *bl = NULL;
+	struct btree_walk_key_range kr;
+	struct scoutfs_avl_node *par;
+	struct merge_pos *mpos;
+	struct merge_pos *tmp;
+	int walk_val_len;
+	int walk_flags;
+	bool is_del;
+	int cmp;
+	int ret;
+
+	trace_scoutfs_btree_merge(sb, root, start, end);
+	scoutfs_inc_counter(sb, btree_merge);
+
+	list_for_each_entry(rhead, inputs, head) {
+		mpos = kmalloc(sizeof(*mpos), GFP_NOFS);
+		if (!mpos) {
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		RB_CLEAR_NODE(&mpos->node);
+		mpos->key = *start;
+		mpos->root = &rhead->root;
+
+		ret = reset_mpos(sb, &pos_root, mpos, end, merge_cmp);
+		if (ret < 0)
+			goto out;
+	}
+
+	walk_flags = BTW_DIRTY;
+	if (subtree)
+		walk_flags |= BTW_SUBTREE;
+	walk_val_len = 0;
+
+	while ((mpos = first_mpos(&pos_root))) {
+
+		if (scoutfs_block_writer_dirty_bytes(sb, wri) >= dirty_limit) {
+			scoutfs_inc_counter(sb, btree_merge_dirty_limit);
+			ret = -ERANGE;
+			*next_ret = mpos->key;
+			goto out;
+		}
+
+		if (scoutfs_alloc_meta_low(sb, alloc, alloc_low)) {
+			scoutfs_inc_counter(sb, btree_merge_alloc_low);
+			ret = -ERANGE;
+			*next_ret = mpos->key;
+			goto out;
+		}
+
+		scoutfs_block_put(sb, bl);
+		bl = NULL;
+		ret = btree_walk(sb, alloc, wri, root, walk_flags,
+				 &mpos->key, walk_val_len, &bl, &kr, NULL);
+		if (ret < 0) {
+			if (ret == -ERANGE)
+				*next_ret = mpos->key;
+			goto out;
+		}
+		bt = bl->data;
+		scoutfs_inc_counter(sb, btree_merge_walk);
+
+		for (; mpos; mpos = first_mpos(&pos_root)) {
+
+			/* val must have at least what we need to drop */
+			if (mpos->val_len < drop_val) {
+				ret = -EIO;
+				goto out;
+			}
+
+			/* walk to new leaf if we exceed parent ref key */
+			if (scoutfs_key_compare(&mpos->key, &kr.end) > 0)
+				break;
+
+			/* see if there's an existing item */
+			item = leaf_item_hash_search(sb, bt, &mpos->key);
+			is_del = merge_is_del(mpos->val, mpos->val_len);
+
+			trace_scoutfs_btree_merge_items(sb, mpos->root,
+					&mpos->key, mpos->val_len,
+					item ? root : NULL,
+					item ? item_key(item) : NULL,
+					item ? item_val_len(item) : 0, is_del);
+
+			/* rewalk and split if ins/update needs room */
+			if (!is_del && !mid_free_item_room(bt, mpos->val_len)) {
+				walk_flags |= BTW_INSERT;
+				walk_val_len = mpos->val_len;
+				break;
+			}
+
+			/* insert missing non-deletion merge items */
+			if (!item && !is_del) {
+				scoutfs_avl_search(&bt->item_root,
+						   cmp_key_item, &mpos->key,
+						   &cmp, &par, NULL, NULL);
+				create_item(bt, &mpos->key,
+					    mpos->val + drop_val,
+					    mpos->val_len - drop_val, par, cmp);
+				scoutfs_inc_counter(sb, btree_merge_insert);
+			}
+
+			/* update existing items */
+			if (item && !is_del) {
+				update_item_value(bt, item,
+						  mpos->val + drop_val,
+						  mpos->val_len - drop_val);
+				scoutfs_inc_counter(sb, btree_merge_update);
+			}
+
+			/* delete if merge item was deletion */
+			if (item && is_del) {
+				/* rewalk and join if non-root falls under low water mark */
+				if (root->ref.blkno != bt->hdr.blkno &&
+				    !total_above_join_low_water(bt)) {
+					walk_flags |= BTW_DELETE;
+					break;
+				}
+				delete_item(bt, item, NULL);
+				scoutfs_inc_counter(sb, btree_merge_delete);
+			}
+
+			/* reset walk args now that we're not split/join */
+			walk_flags &= ~(BTW_INSERT | BTW_DELETE);
+			walk_val_len = 0;
+
+			/* finished with this merge item */
+			scoutfs_key_inc(&mpos->key);
+			ret = reset_mpos(sb, &pos_root, mpos, end, merge_cmp);
+			if (ret < 0)
+				goto out;
+			mpos = NULL;
+		}
+	}
+
+	ret = 0;
+out:
+	scoutfs_block_put(sb, bl);
+	rbtree_postorder_for_each_entry_safe(mpos, tmp, &pos_root, node) {
+		kfree(mpos);
+	}
+
+	return ret;
+}
+
+/*
+ * Free all the blocks referenced by a btree.  The btree is only read,
+ * this does not update the blocks as it frees.  The caller ensures that
+ * these btrees aren't been modified.
+ *
+ * The caller's key tracks which blocks have been freed.  It must be
+ * initialized to zeros before the first call to start freeing blocks.
+ * Once a block is freed the key is updated such that the freed block
+ * will not be read again.
+ *
+ * Returns 0 when progress has been made successfully, which includes
+ * partial progress.  The key is set to all ones once we've freed all
+ * the blocks.
+ *
+ * This works by descending to the last parent block and freeing all its
+ * leaf blocks without reading them.  As it descends it remembers the
+ * number of parent blocks which were traversed through their final
+ * child ref.  If we free all the leaf blocks then all these parent
+ * blocks are no longer needed and can be freed.  The caller's key is
+ * updated to past the subtree that we just freed and we retry the
+ * descent from the root through the next set of parents to the next set
+ * of leaf blocks to free.
+ */
+int scoutfs_btree_free_blocks(struct super_block *sb,
+			      struct scoutfs_alloc *alloc,
+			      struct scoutfs_block_writer *wri,
+			      struct scoutfs_key *key,
+			      struct scoutfs_btree_root *root, int alloc_low)
+{
+	u64 blknos[SCOUTFS_BTREE_MAX_HEIGHT];
+	struct scoutfs_block *bl = NULL;
+	struct scoutfs_btree_item *item;
+	struct scoutfs_btree_block *bt;
+	struct scoutfs_block_ref ref;
+	struct scoutfs_avl_node *node;
+	struct scoutfs_avl_node *next;
+	struct scoutfs_key par_next;
+	int nr_par;
+	int level;
+	int ret;
+	int i;
+
+	if (WARN_ON_ONCE(root->height > ARRAY_SIZE(blknos)))
+		return -EIO; /* XXX corruption */
+
+	if (root->height == 0) {
+		scoutfs_key_set_ones(key);
+		return 0;
+	}
+
+	if (scoutfs_key_is_ones(key))
+		return 0;
+
+	/* just free a single leaf block */
+	if (root->height == 1) {
+		ret = scoutfs_free_meta(sb, alloc, wri,
+					le64_to_cpu(root->ref.blkno));
+		if (ret == 0) {
+			trace_scoutfs_btree_free_blocks_single(sb, root,
+						le64_to_cpu(root->ref.blkno));
+			scoutfs_key_set_ones(key);
+		}
+		goto out;
+	}
+
+	for (;;) {
+		/* start the walk at the root block */
+		level = root->height - 1;
+		ref = root->ref;
+		scoutfs_key_set_ones(&par_next);
+		nr_par = 0;
+
+		/* read blocks until we read the last parent */
+		for (;;) {
+			scoutfs_block_put(sb, bl);
+			bl = NULL;
+			ret = get_ref_block(sb, alloc, wri, 0, &ref, &bl);
+			if (ret < 0)
+				goto out;
+			bt = bl->data;
+
+			node = scoutfs_avl_search(&bt->item_root, cmp_key_item,
+						  key, NULL, NULL, &next, NULL);
+			if (node == NULL)
+				node = next;
+
+			/* should never descend into parent with no more refs */
+			if (WARN_ON_ONCE(node == NULL)) {
+				ret = -EIO;
+				goto out;
+			}
+
+			/* we'll free refs in the last parent */
+			if (level == 1)
+				break;
+
+			item = node_item(node);
+			next = scoutfs_avl_next(&bt->item_root, node);
+			if (next) {
+				/* didn't take last ref, still need parents */
+				nr_par = 0;
+				par_next = *item_key(item);
+				scoutfs_key_inc(&par_next);
+			} else {
+				/* final ref, could free after all leaves */
+				blknos[nr_par++] = le64_to_cpu(bt->hdr.blkno);
+			}
+
+			memcpy(&ref, item_val(bt, item), sizeof(ref));
+			level--;
+		}
+
+		/* free all leaf block refs in last parent */
+		while (node) {
+
+			/* make sure we can always free parents after leaves */
+			if (scoutfs_alloc_meta_low(sb, alloc,
+						   alloc_low + nr_par + 1)) {
+				ret = 0;
+				goto out;
+			}
+
+			item = node_item(node);
+			memcpy(&ref, item_val(bt, item), sizeof(ref));
+
+			trace_scoutfs_btree_free_blocks_leaf(sb, root,
+							le64_to_cpu(ref.blkno));
+			ret = scoutfs_free_meta(sb, alloc, wri,
+						le64_to_cpu(ref.blkno));
+			if (ret < 0)
+				goto out;
+
+			node = scoutfs_avl_next(&bt->item_root, node);
+			if (node) {
+				/* done with keys in child we just freed */
+				*key = *item_key(item);
+				scoutfs_key_inc(key);
+			}
+		}
+
+		/* now that leaves are freed, free any empty parents */
+		for (i = 0; i < nr_par; i++) {
+			trace_scoutfs_btree_free_blocks_parent(sb, root,
+							       blknos[i]);
+			ret = scoutfs_free_meta(sb, alloc, wri, blknos[i]);
+			BUG_ON(ret); /* checked meta low, freed should fit */
+		}
+
+		/* restart walk past the subtree we just freed */
+		*key = par_next;
+
+		/* but done if we just freed all parents down right spine */
+		if (scoutfs_key_is_ones(&par_next)) {
+			ret = 0;
+			goto out;
+		}
+	}
+
+out:
+	scoutfs_block_put(sb, bl);
+	return ret;
+}
diff --git a/kmod/src/btree.h b/kmod/src/btree.h
index 79d4de58..3d27fec2 100644
--- a/kmod/src/btree.h
+++ b/kmod/src/btree.h
@@ -82,6 +82,58 @@ int scoutfs_btree_insert_list(struct super_block *sb,
 			      struct scoutfs_btree_root *root,
 			      struct scoutfs_btree_item_list *lst);
 
+int scoutfs_btree_parent_range(struct super_block *sb,
+			       struct scoutfs_btree_root *root,
+			       struct scoutfs_key *key,
+			       struct scoutfs_key *start,
+			       struct scoutfs_key *end);
+int scoutfs_btree_get_parent(struct super_block *sb,
+			     struct scoutfs_btree_root *root,
+			     struct scoutfs_key *key,
+			     struct scoutfs_btree_root *par_root);
+int scoutfs_btree_set_parent(struct super_block *sb,
+			     struct scoutfs_alloc *alloc,
+			     struct scoutfs_block_writer *wri,
+			     struct scoutfs_btree_root *root,
+			     struct scoutfs_key *key,
+			     struct scoutfs_btree_root *par_root);
+int scoutfs_btree_rebalance(struct super_block *sb,
+			    struct scoutfs_alloc *alloc,
+			    struct scoutfs_block_writer *wri,
+			    struct scoutfs_btree_root *root,
+			    struct scoutfs_key *key);
+
+/* merge input is a list of roots */
+struct scoutfs_btree_root_head {
+	struct list_head head;
+	struct scoutfs_btree_root root;
+};
+/*
+ * Compare the values of merge input items whose keys are equal to
+ * determine their merge order.
+ */
+typedef int (*scoutfs_btree_merge_cmp_t)(void *a_val, int a_val_len,
+					 void *b_val, int b_val_len);
+/* whether merging item should be removed from destination */
+typedef bool (*scoutfs_btree_merge_is_del_t)(void *val, int val_len);
+int scoutfs_btree_merge(struct super_block *sb,
+			struct scoutfs_alloc *alloc,
+			struct scoutfs_block_writer *wri,
+			struct scoutfs_key *start,
+			struct scoutfs_key *end,
+			struct scoutfs_key *next_ret,
+			struct scoutfs_btree_root *root,
+			struct list_head *input_list,
+			scoutfs_btree_merge_cmp_t merge_cmp,
+			scoutfs_btree_merge_is_del_t merge_is_del, bool subtree,
+			int drop_val, int dirty_limit, int alloc_low);
+
+int scoutfs_btree_free_blocks(struct super_block *sb,
+			      struct scoutfs_alloc *alloc,
+			      struct scoutfs_block_writer *wri,
+			      struct scoutfs_key *key,
+			      struct scoutfs_btree_root *root, int alloc_low);
+
 void scoutfs_btree_put_iref(struct scoutfs_btree_item_ref *iref);
 
 #endif
diff --git a/kmod/src/client.c b/kmod/src/client.c
index 7a4b4322..68fe4736 100644
--- a/kmod/src/client.c
+++ b/kmod/src/client.c
@@ -217,6 +217,26 @@ int scoutfs_client_srch_commit_compact(struct super_block *sb,
 					res, sizeof(*res), NULL, 0);
 }
 
+int scoutfs_client_get_log_merge(struct super_block *sb,
+				 struct scoutfs_log_merge_request *req)
+{
+	struct client_info *client = SCOUTFS_SB(sb)->client_info;
+
+	return scoutfs_net_sync_request(sb, client->conn,
+					SCOUTFS_NET_CMD_GET_LOG_MERGE,
+					NULL, 0, req, sizeof(*req));
+}
+
+int scoutfs_client_commit_log_merge(struct super_block *sb,
+				    struct scoutfs_log_merge_complete *comp)
+{
+	struct client_info *client = SCOUTFS_SB(sb)->client_info;
+
+	return scoutfs_net_sync_request(sb, client->conn,
+					SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
+					comp, sizeof(*comp), NULL, 0);
+}
+
 int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
 				      struct scoutfs_open_ino_map *map)
 {
diff --git a/kmod/src/client.h b/kmod/src/client.h
index f8866abd..1cbcbc1d 100644
--- a/kmod/src/client.h
+++ b/kmod/src/client.h
@@ -22,6 +22,10 @@ int scoutfs_client_srch_get_compact(struct super_block *sb,
 				    struct scoutfs_srch_compact *sc);
 int scoutfs_client_srch_commit_compact(struct super_block *sb,
 				       struct scoutfs_srch_compact *res);
+int scoutfs_client_get_log_merge(struct super_block *sb,
+				 struct scoutfs_log_merge_request *req);
+int scoutfs_client_commit_log_merge(struct super_block *sb,
+				    struct scoutfs_log_merge_complete *comp);
 int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
 				      struct scoutfs_open_ino_map *map);
 int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
diff --git a/kmod/src/counters.h b/kmod/src/counters.h
index 7cb5a331..9e9e9f5e 100644
--- a/kmod/src/counters.h
+++ b/kmod/src/counters.h
@@ -44,6 +44,14 @@
 	EXPAND_COUNTER(btree_insert)				\
 	EXPAND_COUNTER(btree_leaf_item_hash_search)		\
 	EXPAND_COUNTER(btree_lookup)				\
+	EXPAND_COUNTER(btree_merge)				\
+	EXPAND_COUNTER(btree_merge_alloc_low)			\
+	EXPAND_COUNTER(btree_merge_delete)			\
+	EXPAND_COUNTER(btree_merge_dirty_limit)			\
+	EXPAND_COUNTER(btree_merge_drop_old)			\
+	EXPAND_COUNTER(btree_merge_insert)			\
+	EXPAND_COUNTER(btree_merge_update)			\
+	EXPAND_COUNTER(btree_merge_walk)			\
 	EXPAND_COUNTER(btree_next)				\
 	EXPAND_COUNTER(btree_prev)				\
 	EXPAND_COUNTER(btree_split)				\
diff --git a/kmod/src/forest.c b/kmod/src/forest.c
index 9047c223..37be80a0 100644
--- a/kmod/src/forest.c
+++ b/kmod/src/forest.c
@@ -37,9 +37,9 @@
  *
  * The log btrees are modified by multiple transactions over time so
  * there is no consistent ordering relationship between the items in
- * different btrees.  Each item in a log btree stores a version number
- * for the item.  Readers check log btrees for the most recent version
- * that it should use.
+ * different btrees.  Each item in a log btree stores a seq for the
+ * item.  Readers check log btrees for the most recent seq that it
+ * should use.
  *
  * The item cache reads items in bulk from stable btrees, and writes a
  * transaction's worth of dirty items into the item log btree.
@@ -52,6 +52,8 @@
  */
 
 struct forest_info {
+	struct super_block *sb;
+
 	struct mutex mutex;
 	struct scoutfs_alloc *alloc;
 	struct scoutfs_block_writer *wri;
@@ -60,6 +62,9 @@ struct forest_info {
 	struct mutex srch_mutex;
 	struct scoutfs_srch_file srch_file;
 	struct scoutfs_block *srch_bl;
+
+	struct workqueue_struct *workq;
+	struct delayed_work log_merge_dwork;
 };
 
 #define DECLARE_FOREST_INFO(sb, name) \
@@ -249,7 +254,7 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key,
  * If we hit stale blocks and retry we can call the callback for
  * duplicate items.  This is harmless because the items are stable while
  * the caller holds their cluster lock and the caller has to filter out
- * item versions anyway.
+ * item seqs anyway.
  */
 int scoutfs_forest_read_items(struct super_block *sb,
 			      struct scoutfs_lock *lock,
@@ -426,29 +431,29 @@ out:
 
 /*
  * The caller is commiting items in the transaction and has found the
- * greatest item version amongst them.  We store it in the log_trees root
+ * greatest item seq amongst them.  We store it in the log_trees root
  * to send to the server.
  */
-void scoutfs_forest_set_max_vers(struct super_block *sb, u64 max_vers)
+void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq)
 {
 	DECLARE_FOREST_INFO(sb, finf);
 
-	finf->our_log.max_item_vers = cpu_to_le64(max_vers);
+	finf->our_log.max_item_seq = cpu_to_le64(max_seq);
 }
 
 /*
- * The server is calling during setup to find the greatest item version
+ * The server is calling during setup to find the greatest item seq
  * amongst all the log tree roots.  They have the authoritative current
  * super.
  *
- * Item versions are only used to compare items in log trees, not in the
- * main fs tree.  All we have to do is find the greatest version amongst
- * the log_trees so that new locks will have a write_version greater
- * than all the items in the log_trees.
+ * Item seqs are only used to compare items in log trees, not in the
+ * main fs tree.  All we have to do is find the greatest seq amongst the
+ * log_trees so that the core seq will have a greater seq than all the
+ * items in the log_trees.
  */
-int scoutfs_forest_get_max_vers(struct super_block *sb,
-				struct scoutfs_super_block *super,
-				u64 *vers)
+int scoutfs_forest_get_max_seq(struct super_block *sb,
+			       struct scoutfs_super_block *super,
+			       u64 *seq)
 {
 	struct scoutfs_log_trees *lt;
 	SCOUTFS_BTREE_ITEM_REF(iref);
@@ -456,7 +461,7 @@ int scoutfs_forest_get_max_vers(struct super_block *sb,
 	int ret;
 
 	scoutfs_key_init_log_trees(&ltk, 0, 0);
-	*vers = 0;
+	*seq = 0;
 
 	for (;; scoutfs_key_inc(&ltk)) {
 		ret = scoutfs_btree_next(sb, &super->logs_root, &ltk, &iref);
@@ -464,8 +469,7 @@ int scoutfs_forest_get_max_vers(struct super_block *sb,
 			if (iref.val_len == sizeof(struct scoutfs_log_trees)) {
 				ltk = *iref.key;
 				lt = iref.val;
-				*vers = max(*vers,
-					    le64_to_cpu(lt->max_item_vers));
+				*seq = max(*seq, le64_to_cpu(lt->max_item_seq));
 			} else {
 				ret = -EIO;
 			}
@@ -534,7 +538,7 @@ void scoutfs_forest_init_btrees(struct super_block *sb,
 	memset(&finf->our_log, 0, sizeof(finf->our_log));
 	finf->our_log.item_root = lt->item_root;
 	finf->our_log.bloom_ref = lt->bloom_ref;
-	finf->our_log.max_item_vers = lt->max_item_vers;
+	finf->our_log.max_item_seq = lt->max_item_seq;
 	finf->our_log.rid = lt->rid;
 	finf->our_log.nr = lt->nr;
 	finf->srch_file = lt->srch_file;
@@ -564,7 +568,7 @@ void scoutfs_forest_get_btrees(struct super_block *sb,
 	lt->item_root = finf->our_log.item_root;
 	lt->bloom_ref = finf->our_log.bloom_ref;
 	lt->srch_file = finf->srch_file;
-	lt->max_item_vers = finf->our_log.max_item_vers;
+	lt->max_item_seq = finf->our_log.max_item_seq;
 
 	scoutfs_block_put(sb, finf->srch_bl);
 	finf->srch_bl = NULL;
@@ -573,6 +577,149 @@ void scoutfs_forest_get_btrees(struct super_block *sb,
 					    &lt->bloom_ref);
 }
 
+/*
+ * Compare input items to merge by their log item value seq when their
+ * keys match.
+ */
+static int merge_cmp(void *a_val, int a_val_len, void *b_val, int b_val_len)
+{
+	struct scoutfs_log_item_value *a = a_val;
+	struct scoutfs_log_item_value *b = b_val;
+
+	/* sort merge item by seq */
+	return scoutfs_cmp(le64_to_cpu(a->seq), le64_to_cpu(b->seq));
+}
+
+static bool merge_is_del(void *val, int val_len)
+{
+	struct scoutfs_log_item_value *liv = val;
+
+	return !!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION);
+}
+
+#define LOG_MERGE_DELAY_MS (5 * MSEC_PER_SEC)
+
+/*
+ * Regularly try to get a log merge request from the server.  If we get
+ * a request we walk the log_trees items to find input trees and pass
+ * them to btree_merge.  All of our work is done in dirty blocks
+ * allocated from available free blocks that the server gave us.  If we
+ * hit an error then we drop our dirty blocks without writing them and
+ * send an error flag to the server so they can reclaim our allocators
+ * and ignore the rest of our work.
+ */
+static void scoutfs_forest_log_merge_worker(struct work_struct *work)
+{
+	struct forest_info *finf = container_of(work, struct forest_info,
+						log_merge_dwork.work);
+	struct super_block *sb = finf->sb;
+	struct scoutfs_btree_root_head *rhead = NULL;
+	struct scoutfs_btree_root_head *tmp;
+	struct scoutfs_log_merge_complete comp;
+	struct scoutfs_log_merge_request req;
+	struct scoutfs_log_trees *lt;
+	struct scoutfs_block_writer wri;
+	struct scoutfs_alloc alloc;
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_key next;
+	struct scoutfs_key key;
+	unsigned long delay;
+	LIST_HEAD(inputs);
+	int ret;
+
+	ret = scoutfs_client_get_log_merge(sb, &req);
+	if (ret < 0)
+		goto resched;
+
+	comp.root = req.root;
+	comp.start = req.start;
+	comp.end = req.end;
+	comp.remain = req.end;
+	comp.rid = req.rid;
+	comp.seq = req.seq;
+	comp.flags = 0;
+
+	scoutfs_alloc_init(&alloc, &req.meta_avail, &req.meta_freed);
+	scoutfs_block_writer_init(sb, &wri);
+
+	/* find finalized input log trees up to last_seq */
+	for (scoutfs_key_init_log_trees(&key, 0, 0); ; scoutfs_key_inc(&key)) {
+
+		if (!rhead) {
+			rhead = kmalloc(sizeof(*rhead), GFP_NOFS);
+			if (!rhead) {
+				ret = -ENOMEM;
+				goto out;
+			}
+		}
+
+		ret = scoutfs_btree_next(sb, &req.logs_root, &key, &iref);
+		if (ret == 0) {
+			if (iref.val_len == sizeof(*lt)) {
+				key = *iref.key;
+				lt = iref.val;
+				if ((le64_to_cpu(lt->flags) &
+				     SCOUTFS_LOG_TREES_FINALIZED) &&
+				    (le64_to_cpu(lt->max_item_seq) <=
+				     le64_to_cpu(req.last_seq))) {
+					rhead->root = lt->item_root;
+					list_add_tail(&rhead->head, &inputs);
+					rhead = NULL;
+				}
+			} else {
+				ret = -EIO;
+			}
+			scoutfs_btree_put_iref(&iref);
+		}
+		if (ret < 0) {
+			if (ret == -ENOENT) {
+				ret = 0;
+				break;
+			}
+			goto out;
+		}
+	}
+
+	/* shouldn't be possible, but it's harmless */
+	if (list_empty(&inputs)) {
+		ret = 0;
+		goto out;
+	}
+
+	ret = scoutfs_btree_merge(sb, &alloc, &wri, &req.start, &req.end,
+				  &next, &comp.root, &inputs, merge_cmp,
+				  merge_is_del,
+				  !!(req.flags & cpu_to_le64(SCOUTFS_LOG_MERGE_REQUEST_SUBTREE)),
+				  sizeof(struct scoutfs_log_item_value),
+				  SCOUTFS_LOG_MERGE_DIRTY_BYTE_LIMIT, 10);
+	if (ret == -ERANGE) {
+		comp.remain = next;
+		le64_add_cpu(&comp.flags, SCOUTFS_LOG_MERGE_COMP_REMAIN);
+		ret = 0;
+	}
+
+out:
+	scoutfs_alloc_prepare_commit(sb, &alloc, &wri);
+	if (ret == 0)
+	      ret = scoutfs_block_writer_write(sb, &wri);
+	scoutfs_block_writer_forget_all(sb, &wri);
+
+	comp.meta_avail = alloc.avail;
+	comp.meta_freed = alloc.freed;
+	if (ret < 0)
+		le64_add_cpu(&comp.flags, SCOUTFS_LOG_MERGE_COMP_ERROR);
+
+	ret = scoutfs_client_commit_log_merge(sb, &comp);
+
+	kfree(rhead);
+	list_for_each_entry_safe(rhead, tmp, &inputs, head)
+		kfree(rhead);
+
+resched:
+	delay = ret == 0 ? 0 : msecs_to_jiffies(LOG_MERGE_DELAY_MS);
+	queue_delayed_work(finf->workq, &finf->log_merge_dwork, delay);
+}
+
 int scoutfs_forest_setup(struct super_block *sb)
 {
 	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
@@ -586,10 +733,23 @@ int scoutfs_forest_setup(struct super_block *sb)
 	}
 
 	/* the finf fields will be setup as we open a transaction */
+	finf->sb = sb;
 	mutex_init(&finf->mutex);
 	mutex_init(&finf->srch_mutex);
-
+	INIT_DELAYED_WORK(&finf->log_merge_dwork,
+			  scoutfs_forest_log_merge_worker);
 	sbi->forest_info = finf;
+
+	finf->workq = alloc_workqueue("scoutfs_log_merge", WQ_NON_REENTRANT |
+				      WQ_UNBOUND | WQ_HIGHPRI, 0);
+	if (!finf->workq) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	queue_delayed_work(finf->workq, &finf->log_merge_dwork,
+			   msecs_to_jiffies(LOG_MERGE_DELAY_MS));
+
 	ret = 0;
 out:
 	if (ret)
@@ -605,6 +765,12 @@ void scoutfs_forest_destroy(struct super_block *sb)
 
 	if (finf) {
 		scoutfs_block_put(sb, finf->srch_bl);
+
+		if (finf->workq) {
+			cancel_delayed_work_sync(&finf->log_merge_dwork);
+			destroy_workqueue(finf->workq);
+		}
+
 		kfree(finf);
 		sbi->forest_info = NULL;
 	}
diff --git a/kmod/src/forest.h b/kmod/src/forest.h
index b73ea7a4..3ca50670 100644
--- a/kmod/src/forest.h
+++ b/kmod/src/forest.h
@@ -23,10 +23,10 @@ int scoutfs_forest_read_items(struct super_block *sb,
 			      scoutfs_forest_item_cb cb, void *arg);
 int scoutfs_forest_set_bloom_bits(struct super_block *sb,
 				  struct scoutfs_lock *lock);
-void scoutfs_forest_set_max_vers(struct super_block *sb, u64 max_vers);
-int scoutfs_forest_get_max_vers(struct super_block *sb,
-				struct scoutfs_super_block *super,
-				u64 *vers);
+void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq);
+int scoutfs_forest_get_max_seq(struct super_block *sb,
+			       struct scoutfs_super_block *super,
+			       u64 *seq);
 int scoutfs_forest_insert_list(struct super_block *sb,
 			       struct scoutfs_btree_item_list *lst);
 int scoutfs_forest_srch_add(struct super_block *sb, u64 hash, u64 ino, u64 id);
diff --git a/kmod/src/format.h b/kmod/src/format.h
index 924a1842..af2358a0 100644
--- a/kmod/src/format.h
+++ b/kmod/src/format.h
@@ -325,6 +325,7 @@ struct scoutfs_alloc_root {
 #define SCOUTFS_ALLOC_OWNER_SERVER	1
 #define SCOUTFS_ALLOC_OWNER_MOUNT	2
 #define SCOUTFS_ALLOC_OWNER_SRCH	3
+#define SCOUTFS_ALLOC_OWNER_LOG_MERGE	4
 
 struct scoutfs_mounted_client_btree_val {
 	union scoutfs_inet_addr addr;
@@ -449,13 +450,16 @@ struct scoutfs_log_trees {
 	struct scoutfs_srch_file srch_file;
 	__le64 data_alloc_zone_blocks;
 	__le64 data_alloc_zones[SCOUTFS_DATA_ALLOC_ZONE_LE64S];
-	__le64 max_item_vers;
+	__le64 max_item_seq;
 	__le64 rid;
 	__le64 nr;
+	__le64 flags;
 };
 
+#define SCOUTFS_LOG_TREES_FINALIZED	(1ULL << 0)
+
 struct scoutfs_log_item_value {
-	__le64 vers;
+	__le64 seq;
 	__u8 flags;
 	__u8 __pad[7];
 	__u8 data[];
@@ -490,6 +494,78 @@ struct scoutfs_bloom_block {
 	 member_sizeof(struct scoutfs_bloom_block, bits[0]) * 8)
 #define SCOUTFS_FOREST_BLOOM_FUNC_BITS		(SCOUTFS_BLOCK_LG_SHIFT + 3)
 
+/*
+ * A private server btree item which records the status of a log merge
+ * operation that is in progress.
+ */
+struct scoutfs_log_merge_status {
+	struct scoutfs_key next_range_key;
+	__le64 nr_requests;
+	__le64 nr_complete;
+	__le64 last_seq;
+	__le64 seq;
+};
+
+/*
+ * A request is sent to the client and stored in a server btree item to
+ * record resources that would be reclaimed if the client failed.  It
+ * has all the inputs needed for the client to perform its portion of a
+ * merge.
+ */
+struct scoutfs_log_merge_request {
+	struct scoutfs_alloc_list_head meta_avail;
+	struct scoutfs_alloc_list_head meta_freed;
+	struct scoutfs_btree_root logs_root;
+	struct scoutfs_btree_root root;
+	struct scoutfs_key start;
+	struct scoutfs_key end;
+	__le64 last_seq;
+	__le64 rid;
+	__le64 seq;
+	__le64 flags;
+};
+
+/* request root is subtree of fs root at parent, restricted merging modifications */
+#define SCOUTFS_LOG_MERGE_REQUEST_SUBTREE	(1ULL << 0)
+
+/*
+ * The output of a client's merge of log btree items into a subtree
+ * rooted at a parent in the fs_root.  The client sends it to the
+ * server, who stores it in a btree item for later splicing/rebalancing.
+ */
+struct scoutfs_log_merge_complete {
+	struct scoutfs_alloc_list_head meta_avail;
+	struct scoutfs_alloc_list_head meta_freed;
+	struct scoutfs_btree_root root;
+	struct scoutfs_key start;
+	struct scoutfs_key end;
+	struct scoutfs_key remain;
+	__le64 rid;
+	__le64 seq;
+	__le64 flags;
+};
+
+/* merge failed, ignore completion and reclaim stored request */
+#define SCOUTFS_LOG_MERGE_COMP_ERROR	(1ULL << 0)
+/* merge didn't complete range, restart from remain */
+#define SCOUTFS_LOG_MERGE_COMP_REMAIN	(1ULL << 1)
+
+/*
+ * Range items record the ranges of the fs keyspace that still need to
+ * be merged.  They're added as a merge starts, removed as requests are
+ * sent and added back if the request didn't consume its entire range.
+ */
+struct scoutfs_log_merge_range {
+	struct scoutfs_key start;
+	struct scoutfs_key end;
+};
+
+struct scoutfs_log_merge_freeing {
+	struct scoutfs_btree_root root;
+	struct scoutfs_key key;
+	__le64 seq;
+};
+
 /*
  * Keys are first sorted by major key zones.
  */
@@ -504,6 +580,12 @@ struct scoutfs_bloom_block {
 #define SCOUTFS_SRCH_ZONE			9
 #define SCOUTFS_FREE_EXTENT_BLKNO_ZONE		10
 #define SCOUTFS_FREE_EXTENT_ORDER_ZONE		11
+/* Items only stored in log merge server btrees */
+#define SCOUTFS_LOG_MERGE_STATUS_ZONE		12
+#define SCOUTFS_LOG_MERGE_RANGE_ZONE		13
+#define SCOUTFS_LOG_MERGE_REQUEST_ZONE		14
+#define SCOUTFS_LOG_MERGE_COMPLETE_ZONE		15
+#define SCOUTFS_LOG_MERGE_FREEING_ZONE		16
 
 /* inode index zone */
 #define SCOUTFS_INODE_INDEX_META_SEQ_TYPE	1
@@ -688,8 +770,8 @@ struct scoutfs_super_block {
 	__le64 version;
 	__le64 flags;
 	__u8 uuid[SCOUTFS_UUID_BYTES];
+	__le64 seq;
 	__le64 next_ino;
-	__le64 next_trans_seq;
 	__le64 total_meta_blocks;	/* both static and dynamic */
 	__le64 first_meta_blkno;	/* first dynamically allocated */
 	__le64 last_meta_blkno;
@@ -703,6 +785,7 @@ struct scoutfs_super_block {
 	struct scoutfs_alloc_list_head server_meta_freed[2];
 	struct scoutfs_btree_root fs_root;
 	struct scoutfs_btree_root logs_root;
+	struct scoutfs_btree_root log_merge;
 	struct scoutfs_btree_root trans_seqs;
 	struct scoutfs_btree_root mounted_clients;
 	struct scoutfs_btree_root srch_root;
@@ -895,6 +978,8 @@ enum scoutfs_net_cmd {
 	SCOUTFS_NET_CMD_LOCK_RECOVER,
 	SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
 	SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
+	SCOUTFS_NET_CMD_GET_LOG_MERGE,
+	SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
 	SCOUTFS_NET_CMD_OPEN_INO_MAP,
 	SCOUTFS_NET_CMD_GET_VOLOPT,
 	SCOUTFS_NET_CMD_SET_VOLOPT,
@@ -943,7 +1028,7 @@ struct scoutfs_net_roots {
 
 struct scoutfs_net_lock {
 	struct scoutfs_key key;
-	__le64 write_version;
+	__le64 write_seq;
 	__u8 old_mode;
 	__u8 new_mode;
 	__u8 __pad[6];
diff --git a/kmod/src/item.c b/kmod/src/item.c
index 2b03c39f..d9cc2b2f 100644
--- a/kmod/src/item.c
+++ b/kmod/src/item.c
@@ -149,7 +149,8 @@ struct cached_item {
 
 static int item_val_bytes(int val_len)
 {
-	return round_up(offsetof(struct cached_item, val[val_len]), CACHED_ITEM_ALIGN);
+	return round_up(offsetof(struct cached_item, val[val_len]),
+			CACHED_ITEM_ALIGN);
 }
 
 /*
@@ -345,7 +346,8 @@ static struct cached_page *alloc_pg(struct super_block *sb, gfp_t gfp)
 	page = alloc_page(GFP_NOFS | gfp);
 	if (!page || !pg) {
 		kfree(pg);
-		__free_page(page);
+		if (page)
+			__free_page(page);
 		return NULL;
 	}
 
@@ -420,8 +422,7 @@ static struct cached_item *alloc_item(struct cached_page *pg,
 static void erase_item(struct cached_page *pg, struct cached_item *item)
 {
 	rbtree_erase(&item->node, &pg->item_root);
-	pg->erased_bytes += round_up(item_val_bytes(item->val_len),
-				     CACHED_ITEM_ALIGN);
+	pg->erased_bytes += item_val_bytes(item->val_len);
 }
 
 static void lru_add(struct super_block *sb, struct item_cache_info *cinf,
@@ -852,8 +853,7 @@ static void compact_page_items(struct super_block *sb,
 
 	for (from = first_item(&pg->item_root); from; from = next_item(from)) {
 		to = page_address(empty->page) + page_off;
-		page_off += round_up(item_val_bytes(from->val_len),
-				     CACHED_ITEM_ALIGN);
+		page_off += item_val_bytes(from->val_len);
 
 		/* copy the entire item, struct members and all */
 		memcpy(to, from, item_val_bytes(from->val_len));
@@ -1308,10 +1308,10 @@ static struct active_reader *active_rbtree_walk(struct rb_root *root,
  * on our root and aren't in dirty or lru lists.
  *
  * We need to store deletion items here as we read items from all the
- * btrees so that they can override older versions of the items.  The
- * deletion items will be deleted before we insert the pages into the
- * cache.  We don't insert old versions of items into the tree here so
- * that the trees don't have to compare versions.
+ * btrees so that they can override older items.  The deletion items
+ * will be deleted before we insert the pages into the cache.  We don't
+ * insert old versions of items into the tree here so that the trees
+ * don't have to compare seqs.
  */
 static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
 			  struct scoutfs_log_item_value *liv, void *val,
@@ -1331,7 +1331,7 @@ static int read_page_item(struct super_block *sb, struct scoutfs_key *key,
 
 	pg = page_rbtree_walk(sb, root, key, key, NULL, NULL, &p_par, &p_pnode);
 	found = item_rbtree_walk(&pg->item_root, key, NULL, &par, &pnode);
-	if (found && (le64_to_cpu(found->liv.vers) >= le64_to_cpu(liv->vers)))
+	if (found && (le64_to_cpu(found->liv.seq) >= le64_to_cpu(liv->seq)))
 		return 0;
 
 	if (!page_has_room(pg, val_len)) {
@@ -1783,6 +1783,21 @@ out:
 	return ret;
 }
 
+/*
+ * An item's seq is greater of the client transaction's seq and the
+ * lock's write_seq.  This ensures that multiple commits in one lock
+ * grant will have increasing seqs, and new locks in open commits will
+ * also increase the seqs.  It lets us limit the inputs of item merging
+ * to the last stable seq and ensure that all the items in open
+ * transactions and granted locks will have greater seqs.
+ */
+static __le64 item_seq(struct super_block *sb, struct scoutfs_lock *lock)
+{
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+
+	return cpu_to_le64(max(sbi->trans_seq, lock->write_seq));
+}
+
 /*
  * Mark the item dirty.  Dirtying while holding a transaction pins the
  * page holding the item and guarantees that the item can be deleted or
@@ -1816,7 +1831,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key *key,
 		ret = -ENOENT;
 	} else {
 		mark_item_dirty(sb, cinf, pg, NULL, item);
-		item->liv.vers = cpu_to_le64(lock->write_version);
+		item->liv.seq = item_seq(sb, lock);
 		ret = 0;
 	}
 
@@ -1836,7 +1851,7 @@ static int item_create(struct super_block *sb, struct scoutfs_key *key,
 {
 	DECLARE_ITEM_CACHE_INFO(sb, cinf);
 	struct scoutfs_log_item_value liv = {
-		.vers = cpu_to_le64(lock->write_version),
+		.seq = item_seq(sb, lock),
 	};
 	struct cached_item *found;
 	struct cached_item *item;
@@ -1911,7 +1926,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
 {
 	DECLARE_ITEM_CACHE_INFO(sb, cinf);
 	struct scoutfs_log_item_value liv = {
-		.vers = cpu_to_le64(lock->write_version),
+		.seq = item_seq(sb, lock),
 	};
 	struct cached_item *item;
 	struct cached_item *found;
@@ -1944,9 +1959,10 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
 		if (val_len)
 			memcpy(found->val, val, val_len);
 		if (val_len < found->val_len)
-			pg->erased_bytes += found->val_len - val_len;
+			pg->erased_bytes += item_val_bytes(found->val_len) -
+					    item_val_bytes(val_len);
 		found->val_len = val_len;
-		found->liv.vers = liv.vers;
+		found->liv.seq = liv.seq;
 		mark_item_dirty(sb, cinf, pg, NULL, found);
 	} else {
 		item = alloc_item(pg, key, &liv, val, val_len);
@@ -1978,7 +1994,7 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
 {
 	DECLARE_ITEM_CACHE_INFO(sb, cinf);
 	struct scoutfs_log_item_value liv = {
-		.vers = cpu_to_le64(lock->write_version),
+		.seq = item_seq(sb, lock),
 	};
 	struct cached_item *item;
 	struct cached_page *pg;
@@ -2020,10 +2036,11 @@ static int item_delete(struct super_block *sb, struct scoutfs_key *key,
 		erase_item(pg, item);
 	} else {
 		/* must emit deletion to clobber old persistent item */
-		item->liv.vers = cpu_to_le64(lock->write_version);
+		item->liv.seq = liv.seq;
 		item->liv.flags |= SCOUTFS_LOG_ITEM_FLAG_DELETION;
 		item->deletion = 1;
-		pg->erased_bytes += item->val_len;
+		pg->erased_bytes += item_val_bytes(item->val_len) -
+				    item_val_bytes(0);
 		item->val_len = 0;
 		mark_item_dirty(sb, cinf, pg, NULL, item);
 	}
@@ -2106,7 +2123,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
 	struct page *page;
 	LIST_HEAD(pages);
 	LIST_HEAD(pos);
-	u64 max_vers = 0;
+	u64 max_seq = 0;
 	int val_len;
 	int bytes;
 	int off;
@@ -2171,7 +2188,7 @@ int scoutfs_item_write_dirty(struct super_block *sb)
 			val_len = sizeof(item->liv) + item->val_len;
 			bytes = offsetof(struct scoutfs_btree_item_list,
 					 val[val_len]);
-			max_vers = max(max_vers, le64_to_cpu(item->liv.vers));
+			max_seq = max(max_seq, le64_to_cpu(item->liv.seq));
 
 			if (off + bytes > PAGE_SIZE) {
 				page = second;
@@ -2201,8 +2218,8 @@ int scoutfs_item_write_dirty(struct super_block *sb)
 		read_unlock(&pg->rwlock);
 	}
 
-	/* store max item vers in forest's log_trees */
-	scoutfs_forest_set_max_vers(sb, max_vers);
+	/* store max item seq in forest's log_trees */
+	scoutfs_forest_set_max_seq(sb, max_seq);
 
 	/* write all the dirty items into log btree blocks */
 	ret = scoutfs_forest_insert_list(sb, first);
diff --git a/kmod/src/key.h b/kmod/src/key.h
index 5ea4dd4c..66a4c84a 100644
--- a/kmod/src/key.h
+++ b/kmod/src/key.h
@@ -108,6 +108,16 @@ static inline void scoutfs_key_set_ones(struct scoutfs_key *key)
 	memset(key->__pad, 0, sizeof(key->__pad));
 }
 
+static inline bool scoutfs_key_is_ones(struct scoutfs_key *key)
+{
+	return key->sk_zone == U8_MAX &&
+	       key->_sk_first == cpu_to_le64(U64_MAX) &&
+	       key->sk_type == U8_MAX &&
+	       key->_sk_second == cpu_to_le64(U64_MAX) &&
+	       key->_sk_third == cpu_to_le64(U64_MAX) &&
+	       key->_sk_fourth == U8_MAX;
+}
+
 /*
  * Return a -1/0/1 comparison of keys.
  *
diff --git a/kmod/src/lock.c b/kmod/src/lock.c
index 50a33d26..36227eae 100644
--- a/kmod/src/lock.c
+++ b/kmod/src/lock.c
@@ -730,7 +730,7 @@ static void lock_grant_worker(struct work_struct *work)
 
 		lock->request_pending = 0;
 		lock->mode = nl->new_mode;
-		lock->write_version = le64_to_cpu(nl->write_version);
+		lock->write_seq = le64_to_cpu(nl->write_seq);
 
 		if (lock_count_match_exists(nl->new_mode, lock->waiters))
 			extend_grace(sb, lock);
@@ -988,7 +988,7 @@ int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
 	for (i = 0; lock && i < SCOUTFS_NET_LOCK_MAX_RECOVER_NR; i++) {
 
 		nlr->locks[i].key = lock->start;
-		nlr->locks[i].write_version = cpu_to_le64(lock->write_version);
+		nlr->locks[i].write_seq = cpu_to_le64(lock->write_seq);
 		nlr->locks[i].old_mode = lock->mode;
 		nlr->locks[i].new_mode = lock->mode;
 
diff --git a/kmod/src/lock.h b/kmod/src/lock.h
index 40f8f5b9..d043f9fc 100644
--- a/kmod/src/lock.h
+++ b/kmod/src/lock.h
@@ -13,7 +13,7 @@
 struct scoutfs_omap_lock;
 
 /*
- * A few fields (start, end, refresh_gen, write_version, granted_mode)
+ * A few fields (start, end, refresh_gen, write_seq, granted_mode)
  * are referenced by code outside lock.c.
  */
 struct scoutfs_lock {
@@ -23,7 +23,7 @@ struct scoutfs_lock {
 	struct rb_node node;
 	struct rb_node range_node;
 	u64 refresh_gen;
-	u64 write_version;
+	u64 write_seq;
 	u64 dirty_trans_seq;
 	struct list_head lru_head;
 	wait_queue_head_t waitq;
diff --git a/kmod/src/lock_server.c b/kmod/src/lock_server.c
index 09ce48d7..5a3a0cd7 100644
--- a/kmod/src/lock_server.c
+++ b/kmod/src/lock_server.c
@@ -81,8 +81,6 @@ struct lock_server_info {
 
 	struct scoutfs_alloc *alloc;
 	struct scoutfs_block_writer *wri;
-
-	atomic64_t write_version;
 };
 
 #define DECLARE_LOCK_SERVER_INFO(sb, name) \
@@ -479,7 +477,7 @@ static int process_waiting_requests(struct super_block *sb,
 	struct client_lock_entry *req_tmp;
 	struct client_lock_entry *gr;
 	struct client_lock_entry *gr_tmp;
-	u64 wv;
+	u64 seq;
 	int ret;
 
 	BUG_ON(!mutex_is_locked(&snode->mutex));
@@ -520,6 +518,7 @@ static int process_waiting_requests(struct super_block *sb,
 
 		nl.key = snode->key;
 		nl.new_mode = req->mode;
+		nl.write_seq = 0;
 
 		/* see if there's an existing compatible grant to replace */
 		gr = find_entry(snode, &snode->granted, req->rid);
@@ -532,8 +531,9 @@ static int process_waiting_requests(struct super_block *sb,
 
 		if (nl.new_mode == SCOUTFS_LOCK_WRITE ||
 		    nl.new_mode == SCOUTFS_LOCK_WRITE_ONLY) {
-			wv = atomic64_inc_return(&inf->write_version);
-			nl.write_version = cpu_to_le64(wv);
+			/* doesn't commit seq update, recovered with locks */
+			seq = scoutfs_server_next_seq(sb);
+			nl.write_seq = cpu_to_le64(seq);
 		}
 
 		ret = scoutfs_server_lock_response(sb, req->rid,
@@ -609,14 +609,6 @@ int scoutfs_lock_server_finished_recovery(struct super_block *sb)
 	return ret;
 }
 
-static void set_max_write_version(struct lock_server_info *inf, u64 new)
-{
-	u64 old;
-
-	while (new > (old = atomic64_read(&inf->write_version)) &&
-	       (atomic64_cmpxchg(&inf->write_version, old, new) != old));
-}
-
 /*
  * We sent a lock recover request to the client when we received its
  * greeting while in recovery.  Here we instantiate all the locks it
@@ -680,9 +672,9 @@ int scoutfs_lock_server_recover_response(struct super_block *sb, u64 rid,
 
 		put_server_lock(inf, snode);
 
-		/* make sure next write lock is greater than all recovered */
-		set_max_write_version(inf,
-				le64_to_cpu(nlr->locks[i].write_version));
+		/* make sure next core seq is greater than all lock write seq */
+		scoutfs_server_set_seq_if_greater(sb,
+				le64_to_cpu(nlr->locks[i].write_seq));
 	}
 
 	/* send request for next batch of keys */
@@ -800,7 +792,7 @@ static void lock_server_tseq_show(struct seq_file *m,
  */
 int scoutfs_lock_server_setup(struct super_block *sb,
 			      struct scoutfs_alloc *alloc,
-			      struct scoutfs_block_writer *wri, u64 max_vers)
+			      struct scoutfs_block_writer *wri)
 {
 	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
 	struct lock_server_info *inf;
@@ -815,7 +807,6 @@ int scoutfs_lock_server_setup(struct super_block *sb,
 	scoutfs_tseq_tree_init(&inf->tseq_tree, lock_server_tseq_show);
 	inf->alloc = alloc;
 	inf->wri = wri;
-	atomic64_set(&inf->write_version, max_vers); /* inc_return gives +1 */
 
 	inf->tseq_dentry = scoutfs_tseq_create("server_locks", sbi->debug_root,
 					       &inf->tseq_tree);
diff --git a/kmod/src/lock_server.h b/kmod/src/lock_server.h
index e77f116f..60ce31ce 100644
--- a/kmod/src/lock_server.h
+++ b/kmod/src/lock_server.h
@@ -13,7 +13,7 @@ int scoutfs_lock_server_farewell(struct super_block *sb, u64 rid);
 
 int scoutfs_lock_server_setup(struct super_block *sb,
 			      struct scoutfs_alloc *alloc,
-			      struct scoutfs_block_writer *wri, u64 max_vers);
+			      struct scoutfs_block_writer *wri);
 void scoutfs_lock_server_destroy(struct super_block *sb);
 
 #endif
diff --git a/kmod/src/omap.c b/kmod/src/omap.c
index 3dfcbea8..bbe80976 100644
--- a/kmod/src/omap.c
+++ b/kmod/src/omap.c
@@ -137,11 +137,10 @@ struct omap_request {
 /*
  * In each inode group cluster lock we store data to track the open ino
  * map which tracks all the inodes that the cluster lock covers.  When
- * the version shows that the map is stale we send a request to update
- * it.
+ * the seq shows that the map is stale we send a request to update it.
  */
 struct scoutfs_omap_lock_data {
-	u64 version;
+	u64 seq;
 	bool req_in_flight;
 	wait_queue_head_t waitq;
 	struct scoutfs_open_ino_map map;
@@ -833,8 +832,7 @@ static bool omap_req_in_flight(struct scoutfs_lock *lock, struct scoutfs_omap_lo
 /*
  * Make sure the map covered by the cluster lock is current.  The caller
  * holds the cluster lock so once we store lock_data on the cluster lock
- * it won't be freed and the write_version in the cluster lock won't
- * change.
+ * it won't be freed and the write_seq in the cluster lock won't change.
  *
  * The omap_spinlock protects the omap_data in the cluster lock.  We
  * have to drop it if we have to block to allocate lock_data, send a
@@ -861,7 +859,7 @@ static int get_current_lock_data(struct super_block *sb, struct scoutfs_lock *lo
 		}
 
 		if (lock->omap_data == NULL) {
-			ldata->version = lock->write_version - 1; /* ensure refresh */
+			ldata->seq = lock->write_seq - 1; /* ensure refresh */
 			init_waitqueue_head(&ldata->waitq);
 
 			lock->omap_data = ldata;
@@ -871,7 +869,7 @@ static int get_current_lock_data(struct super_block *sb, struct scoutfs_lock *lo
 		}
 	}
 
-	while (ldata->version != lock->write_version) {
+	while (ldata->seq != lock->write_seq) {
 		/* only one waiter sends a request at a time */
 		if (!ldata->req_in_flight) {
 			ldata->req_in_flight = true;
@@ -891,7 +889,7 @@ static int get_current_lock_data(struct super_block *sb, struct scoutfs_lock *lo
 		if (send_req) {
 			ldata->req_in_flight = false;
 			if (ret == 0)
-				ldata->version = lock->write_version;
+				ldata->seq = lock->write_seq;
 			wake_up(&ldata->waitq);
 			if (ret < 0)
 				goto out;
diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h
index 7dce85f0..fb5ea548 100644
--- a/kmod/src/scoutfs_trace.h
+++ b/kmod/src/scoutfs_trace.h
@@ -1644,6 +1644,164 @@ TRACE_EVENT(scoutfs_btree_walk,
 		  __entry->level, __entry->ref_blkno, __entry->ref_seq)
 );
 
+TRACE_EVENT(scoutfs_btree_set_parent,
+	TP_PROTO(struct super_block *sb,
+		 struct scoutfs_btree_root *root, struct scoutfs_key *key,
+		 struct scoutfs_btree_root *par_root),
+
+	TP_ARGS(sb, root, key, par_root),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, root_blkno)
+		__field(__u64, root_seq)
+		__field(__u8, root_height)
+		sk_trace_define(key)
+		__field(__u64, par_root_blkno)
+		__field(__u64, par_root_seq)
+		__field(__u8, par_root_height)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->root_blkno = le64_to_cpu(root->ref.blkno);
+		__entry->root_seq = le64_to_cpu(root->ref.seq);
+		__entry->root_height = root->height;
+		sk_trace_assign(key, key);
+		__entry->par_root_blkno = le64_to_cpu(par_root->ref.blkno);
+		__entry->par_root_seq = le64_to_cpu(par_root->ref.seq);
+		__entry->par_root_height = par_root->height;
+	),
+
+	TP_printk(SCSBF" root blkno %llu seq %llu height %u, key "SK_FMT", par_root blkno %llu seq %llu height %u",
+		  SCSB_TRACE_ARGS, __entry->root_blkno, __entry->root_seq,
+		  __entry->root_height, sk_trace_args(key),
+		  __entry->par_root_blkno, __entry->par_root_seq,
+		  __entry->par_root_height)
+);
+
+TRACE_EVENT(scoutfs_btree_merge,
+	TP_PROTO(struct super_block *sb, struct scoutfs_btree_root *root,
+		 struct scoutfs_key *start, struct scoutfs_key *end),
+
+	TP_ARGS(sb, root, start, end),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, root_blkno)
+		__field(__u64, root_seq)
+		__field(__u8, root_height)
+		sk_trace_define(start)
+		sk_trace_define(end)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->root_blkno = le64_to_cpu(root->ref.blkno);
+		__entry->root_seq = le64_to_cpu(root->ref.seq);
+		__entry->root_height = root->height;
+		sk_trace_assign(start, start);
+		sk_trace_assign(end, end);
+	),
+
+	TP_printk(SCSBF" root blkno %llu seq %llu height %u start "SK_FMT" end "SK_FMT,
+		  SCSB_TRACE_ARGS, __entry->root_blkno, __entry->root_seq,
+		  __entry->root_height, sk_trace_args(start),
+		  sk_trace_args(end))
+);
+
+TRACE_EVENT(scoutfs_btree_merge_items,
+	TP_PROTO(struct super_block *sb,
+		 struct scoutfs_btree_root *m_root,
+		 struct scoutfs_key *m_key, int m_val_len,
+		 struct scoutfs_btree_root *f_root,
+		 struct scoutfs_key *f_key, int f_val_len,
+		 int is_del),
+
+	TP_ARGS(sb, m_root, m_key, m_val_len, f_root, f_key, f_val_len, is_del),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, m_root_blkno)
+		__field(__u64, m_root_seq)
+		__field(__u8, m_root_height)
+		sk_trace_define(m_key)
+		__field(int, m_val_len)
+		__field(__u64, f_root_blkno)
+		__field(__u64, f_root_seq)
+		__field(__u8, f_root_height)
+		sk_trace_define(f_key)
+		__field(int, f_val_len)
+		__field(int, is_del)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->m_root_blkno = m_root ?
+					le64_to_cpu(m_root->ref.blkno) : 0;
+		__entry->m_root_seq = m_root ? le64_to_cpu(m_root->ref.seq) : 0;
+		__entry->m_root_height = m_root ? m_root->height : 0;
+		sk_trace_assign(m_key, m_key);
+		__entry->m_val_len = m_val_len;
+		__entry->f_root_blkno = f_root ?
+					le64_to_cpu(f_root->ref.blkno) : 0;
+		__entry->f_root_seq = f_root ? le64_to_cpu(f_root->ref.seq) : 0;
+		__entry->f_root_height = f_root ? f_root->height : 0;
+		sk_trace_assign(f_key, f_key);
+		__entry->f_val_len = f_val_len;
+		__entry->is_del = !!is_del;
+	),
+
+	TP_printk(SCSBF" merge item root blkno %llu seq %llu height %u key "SK_FMT" val_len %d, fs item root blkno %llu seq %llu height %u key "SK_FMT" val_len %d, is_del %d",
+		  SCSB_TRACE_ARGS, __entry->m_root_blkno, __entry->m_root_seq,
+		  __entry->m_root_height, sk_trace_args(m_key),
+		  __entry->m_val_len, __entry->f_root_blkno,
+		  __entry->f_root_seq, __entry->f_root_height,
+		  sk_trace_args(f_key), __entry->f_val_len, __entry->is_del)
+);
+
+DECLARE_EVENT_CLASS(scoutfs_btree_free_blocks,
+	TP_PROTO(struct super_block *sb, struct scoutfs_btree_root *root,
+		 u64 blkno),
+
+	TP_ARGS(sb, root, blkno),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, root_blkno)
+		__field(__u64, root_seq)
+		__field(__u8, root_height)
+		__field(__u64, blkno)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->root_blkno = le64_to_cpu(root->ref.blkno);
+		__entry->root_seq = le64_to_cpu(root->ref.seq);
+		__entry->root_height = root->height;
+		__entry->blkno = blkno;
+	),
+
+	TP_printk(SCSBF" root blkno %llu seq %llu height %u, free blkno %llu",
+		  SCSB_TRACE_ARGS, __entry->root_blkno, __entry->root_seq,
+		  __entry->root_height, __entry->blkno)
+);
+DEFINE_EVENT(scoutfs_btree_free_blocks, scoutfs_btree_free_blocks_single,
+	TP_PROTO(struct super_block *sb, struct scoutfs_btree_root *root,
+		 u64 blkno),
+	TP_ARGS(sb, root, blkno)
+);
+DEFINE_EVENT(scoutfs_btree_free_blocks, scoutfs_btree_free_blocks_leaf,
+	TP_PROTO(struct super_block *sb, struct scoutfs_btree_root *root,
+		 u64 blkno),
+	TP_ARGS(sb, root, blkno)
+);
+DEFINE_EVENT(scoutfs_btree_free_blocks, scoutfs_btree_free_blocks_parent,
+	TP_PROTO(struct super_block *sb, struct scoutfs_btree_root *root,
+		 u64 blkno),
+	TP_ARGS(sb, root, blkno)
+);
+
 TRACE_EVENT(scoutfs_online_offline_blocks,
 	TP_PROTO(struct inode *inode, s64 on_delta, s64 off_delta,
 		 u64 on_now, u64 off_now),
@@ -1900,6 +2058,116 @@ TRACE_EVENT(scoutfs_trans_seq_last,
 		  SCSB_TRACE_ARGS, __entry->s_rid, __entry->trans_seq)
 );
 
+TRACE_EVENT(scoutfs_get_log_merge_status,
+	TP_PROTO(struct super_block *sb, u64 rid, struct scoutfs_key *next_range_key,
+		 u64 nr_requests, u64 nr_complete, u64 last_seq, u64 seq),
+
+	TP_ARGS(sb, rid, next_range_key, nr_requests, nr_complete, last_seq, seq),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, s_rid)
+		sk_trace_define(next_range_key)
+		__field(__u64, nr_requests)
+		__field(__u64, nr_complete)
+		__field(__u64, last_seq)
+		__field(__u64, seq)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->s_rid = rid;
+		sk_trace_assign(next_range_key, next_range_key);
+		__entry->nr_requests = nr_requests;
+		__entry->nr_complete = nr_complete;
+		__entry->last_seq = last_seq;
+		__entry->seq = seq;
+	),
+
+	TP_printk(SCSBF" rid %016llx next_range_key "SK_FMT" nr_requests %llu nr_complete %llu last_seq %llu seq %llu",
+		  SCSB_TRACE_ARGS, __entry->s_rid, sk_trace_args(next_range_key),
+		  __entry->nr_requests, __entry->nr_complete, __entry->last_seq, __entry->seq)
+);
+
+TRACE_EVENT(scoutfs_get_log_merge_request,
+	TP_PROTO(struct super_block *sb, u64 rid,
+		 struct scoutfs_btree_root *root, struct scoutfs_key *start,
+		 struct scoutfs_key *end, u64 last_seq, u64 seq),
+
+	TP_ARGS(sb, rid, root, start, end, last_seq, seq),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, s_rid)
+		__field(__u64, root_blkno)
+		__field(__u64, root_seq)
+		__field(__u8, root_height)
+		sk_trace_define(start)
+		sk_trace_define(end)
+		__field(__u64, last_seq)
+		__field(__u64, seq)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->s_rid = rid;
+		__entry->root_blkno = le64_to_cpu(root->ref.blkno);
+		__entry->root_seq = le64_to_cpu(root->ref.seq);
+		__entry->root_height = root->height;
+		sk_trace_assign(start, start);
+		sk_trace_assign(end, end);
+		__entry->last_seq = last_seq;
+		__entry->seq = seq;
+	),
+
+	TP_printk(SCSBF" rid %016llx root blkno %llu seq %llu height %u start "SK_FMT" end "SK_FMT" last_seq %llu seq %llu",
+		  SCSB_TRACE_ARGS, __entry->s_rid, __entry->root_blkno,
+		  __entry->root_seq, __entry->root_height,
+		  sk_trace_args(start), sk_trace_args(end), __entry->last_seq,
+		  __entry->seq)
+);
+
+TRACE_EVENT(scoutfs_get_log_merge_complete,
+	TP_PROTO(struct super_block *sb, u64 rid,
+		 struct scoutfs_btree_root *root, struct scoutfs_key *start,
+		 struct scoutfs_key *end, struct scoutfs_key *remain,
+		 u64 seq, u64 flags),
+
+	TP_ARGS(sb, rid, root, start, end, remain, seq, flags),
+
+	TP_STRUCT__entry(
+		SCSB_TRACE_FIELDS
+		__field(__u64, s_rid)
+		__field(__u64, root_blkno)
+		__field(__u64, root_seq)
+		__field(__u8, root_height)
+		sk_trace_define(start)
+		sk_trace_define(end)
+		sk_trace_define(remain)
+		__field(__u64, seq)
+		__field(__u64, flags)
+	),
+
+	TP_fast_assign(
+		SCSB_TRACE_ASSIGN(sb);
+		__entry->s_rid = rid;
+		__entry->root_blkno = le64_to_cpu(root->ref.blkno);
+		__entry->root_seq = le64_to_cpu(root->ref.seq);
+		__entry->root_height = root->height;
+		sk_trace_assign(start, start);
+		sk_trace_assign(end, end);
+		sk_trace_assign(remain, remain);
+		__entry->seq = seq;
+		__entry->flags = flags;
+	),
+
+	TP_printk(SCSBF" rid %016llx root blkno %llu seq %llu height %u start "SK_FMT" end "SK_FMT" remain "SK_FMT" seq %llu flags 0x%llx",
+		  SCSB_TRACE_ARGS, __entry->s_rid, __entry->root_blkno,
+		  __entry->root_seq, __entry->root_height,
+		  sk_trace_args(start), sk_trace_args(end),
+		  sk_trace_args(remain), __entry->seq, __entry->flags)
+);
+
 DECLARE_EVENT_CLASS(scoutfs_forest_bloom_class,
 	TP_PROTO(struct super_block *sb, struct scoutfs_key *key,
 		 u64 rid, u64 nr, u64 blkno, u64 seq, unsigned int count),
diff --git a/kmod/src/server.c b/kmod/src/server.c
index 4eeefccd..9e8307b8 100644
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -65,6 +65,9 @@ struct server_info {
 	u64 term;
 	struct scoutfs_net_connection *conn;
 
+	/* synced with superblock seq on commits */
+	atomic64_t seq_atomic;
+
 	/* request processing coordinates shared commits */
 	struct rw_semaphore commit_rwsem;
 	struct llist_head commit_waiters;
@@ -93,6 +96,8 @@ struct server_info {
 	struct scoutfs_block_writer wri;
 
 	struct mutex logs_mutex;
+	struct work_struct log_merge_free_work;
+
 	struct mutex srch_mutex;
 	struct mutex mounted_clients_mutex;
 
@@ -187,15 +192,13 @@ static void stop_server(struct server_info *server)
  * (lock_server) and which are not called directly by the server core
  * (async timeout work).
  */
-int scoutfs_server_hold_commit(struct super_block *sb)
+void scoutfs_server_hold_commit(struct super_block *sb)
 {
 	DECLARE_SERVER_INFO(sb, server);
 
 	scoutfs_inc_counter(sb, server_commit_hold);
 
 	down_read(&server->commit_rwsem);
-
-	return 0;
 }
 
 /*
@@ -250,6 +253,35 @@ static void get_roots(struct super_block *sb,
 	} while (read_seqcount_retry(&server->roots_seqcount, seq));
 }
 
+u64 scoutfs_server_seq(struct super_block *sb)
+{
+	DECLARE_SERVER_INFO(sb, server);
+
+	return atomic64_read(&server->seq_atomic);
+}
+
+u64 scoutfs_server_next_seq(struct super_block *sb)
+{
+	DECLARE_SERVER_INFO(sb, server);
+
+	return atomic64_inc_return(&server->seq_atomic);
+}
+
+void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq)
+{
+	DECLARE_SERVER_INFO(sb, server);
+	u64 expect;
+	u64 was;
+
+	expect = atomic64_read(&server->seq_atomic);
+	while (seq > expect) {
+	       was = atomic64_cmpxchg(&server->seq_atomic, expect, seq);
+	       if (was == expect)
+		       break;
+	       expect = was;
+	}
+}
+
 static void set_roots(struct server_info *server,
 		      struct scoutfs_btree_root *fs_root,
 		      struct scoutfs_btree_root *logs_root,
@@ -335,6 +367,7 @@ static void scoutfs_server_commit_func(struct work_struct *work)
 		goto out;
 	}
 
+	super->seq = cpu_to_le64(atomic64_read(&server->seq_atomic));
 	super->server_meta_avail[server->other_ind ^ 1] = server->alloc.avail;
 	super->server_meta_freed[server->other_ind ^ 1] = server->alloc.freed;
 
@@ -394,9 +427,7 @@ static int server_alloc_inodes(struct super_block *sb,
 
 	memcpy(&lecount, arg, arg_len);
 
-	ret = scoutfs_server_hold_commit(sb);
-	if (ret)
-		goto out;
+	scoutfs_server_hold_commit(sb);
 
 	spin_lock(&sbi->next_ino_lock);
 	ino = le64_to_cpu(super->next_ino);
@@ -404,7 +435,7 @@ static int server_alloc_inodes(struct super_block *sb,
 	le64_add_cpu(&super->next_ino, nr);
 	spin_unlock(&sbi->next_ino_lock);
 
-	ret = scoutfs_server_apply_commit(sb, ret);
+	ret = scoutfs_server_apply_commit(sb, 0);
 	if (ret == 0) {
 		ial.ino = cpu_to_le64(ino);
 		ial.nr = cpu_to_le64(nr);
@@ -575,6 +606,35 @@ static void set_extent_zone_bits(struct super_block *sb, void *cb_arg, struct sc
 	mod_extent_bits(cba->zones, cba->zone_blocks, ext->start, ext->len, true);
 }
 
+static int find_log_trees_item(struct super_block *sb,
+			       struct scoutfs_btree_root *logs_root,
+			       bool call_next, u64 rid, u64 nr,
+			       struct scoutfs_log_trees *lt_ret)
+{
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_key key;
+	int ret;
+
+	scoutfs_key_init_log_trees(&key, rid, nr);
+	if (call_next)
+		ret = scoutfs_btree_next(sb, logs_root, &key, &iref);
+	else
+		ret = scoutfs_btree_prev(sb, logs_root, &key, &iref);
+	if (ret == 0) {
+		if (iref.val_len == sizeof(struct scoutfs_log_trees)) {
+			if (le64_to_cpu(iref.key->sklt_rid) != rid)
+				ret = -ENOENT;
+			else
+				memcpy(lt_ret, iref.val, iref.val_len);
+		} else {
+			ret = -EIO;
+		}
+		scoutfs_btree_put_iref(&iref);
+	}
+
+	return ret;
+}
+
 /*
  * Give the client roots to all the trees that they'll use to build
  * their transaction.
@@ -584,6 +644,9 @@ static void set_extent_zone_bits(struct super_block *sb, void *cb_arg, struct sc
  * trees back into the core allocators.  They're were committed with the
  * previous transaction so they're stable and can now be reused, even by
  * the server in this commit.
+ *
+ * If the committed log trees are large enough we finalize them and make
+ * them available to log merging.
  */
 static int server_get_log_trees(struct super_block *sb,
 				struct scoutfs_net_connection *conn,
@@ -595,10 +658,12 @@ static int server_get_log_trees(struct super_block *sb,
 	__le64 exclusive[SCOUTFS_DATA_ALLOC_ZONE_LE64S];
 	__le64 vacant[SCOUTFS_DATA_ALLOC_ZONE_LE64S];
 	struct alloc_extent_cb_args cba;
-	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_log_trees fin;
 	struct scoutfs_log_trees lt;
 	struct scoutfs_key key;
+	bool have_fin = false;
 	u64 data_zone_blocks;
+	u64 nr;
 	int ret;
 
 	if (arg_len != 0) {
@@ -606,38 +671,59 @@ static int server_get_log_trees(struct super_block *sb,
 		goto out;
 	}
 
-	ret = scoutfs_server_hold_commit(sb);
-	if (ret)
-		goto out;
+	scoutfs_server_hold_commit(sb);
 
 	mutex_lock(&server->logs_mutex);
 
-	scoutfs_key_init_log_trees(&key, rid, U64_MAX);
-
-	ret = scoutfs_btree_prev(sb, &super->logs_root, &key, &iref);
+	/* see if we have already have a finalized root from the rid */
+	ret = find_log_trees_item(sb, &super->logs_root, true, rid, 0, &lt);
 	if (ret < 0 && ret != -ENOENT)
 		goto unlock;
-	if (ret == 0) {
-		if (iref.val_len == sizeof(struct scoutfs_log_trees)) {
-			key = *iref.key;
-			memcpy(&lt, iref.val, iref.val_len);
-			if (le64_to_cpu(key.sklt_rid) != rid)
-				ret = -ENOENT;
-		} else {
-			ret = -EIO;
-		}
-		scoutfs_btree_put_iref(&iref);
-		if (ret == -EIO)
-			goto unlock;
+	if (ret == 0 && le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED)
+		have_fin = true;
+
+	/* use the last non-finalized root, or start a new one */
+	ret = find_log_trees_item(sb, &super->logs_root, false, rid, U64_MAX,
+				  &lt);
+	if (ret < 0 && ret != -ENOENT)
+		goto unlock;
+	if (ret == 0 && le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) {
+		ret = -ENOENT;
+		nr = le64_to_cpu(lt.nr) + 1;
+	} else if (ret == -ENOENT) {
+		nr = 1;
 	}
 
-	/* initialize new roots if we don't have any */
+	/* initialize a new root if we don't have a non-finalized one */
 	if (ret == -ENOENT) {
-		key.sklt_rid = cpu_to_le64(rid);
-		key.sklt_nr = cpu_to_le64(1);
 		memset(&lt, 0, sizeof(lt));
-		lt.rid = key.sklt_rid;
-		lt.nr = key.sklt_nr;
+		lt.rid = cpu_to_le64(rid);
+		lt.nr = cpu_to_le64(nr);
+	}
+
+	/* finalize an existing root when large enough and don't have one */
+	if (lt.item_root.height > 2 && !have_fin) {
+		fin = lt;
+		memset(&fin.meta_avail, 0, sizeof(fin.meta_avail));
+		memset(&fin.meta_freed, 0, sizeof(fin.meta_freed));
+		memset(&fin.data_avail, 0, sizeof(fin.data_avail));
+		memset(&fin.data_freed, 0, sizeof(fin.data_freed));
+		memset(&fin.srch_file, 0, sizeof(fin.srch_file));
+		le64_add_cpu(&fin.flags, SCOUTFS_LOG_TREES_FINALIZED);
+
+		scoutfs_key_init_log_trees(&key, le64_to_cpu(fin.rid),
+					   le64_to_cpu(fin.nr));
+		ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
+					   &super->logs_root, &key, &fin,
+					   sizeof(fin));
+		if (ret < 0)
+			goto unlock;
+
+		memset(&lt.item_root, 0, sizeof(lt.item_root));
+		memset(&lt.bloom_ref, 0, sizeof(lt.bloom_ref));
+		lt.max_item_seq = 0;
+		le64_add_cpu(&lt.nr, 1);
+		lt.flags = 0;
 	}
 
 	if (get_volopt_val(server, SCOUTFS_VOLOPT_DATA_ALLOC_ZONE_BLOCKS_NR, &data_zone_blocks)) {
@@ -681,6 +767,8 @@ static int server_get_log_trees(struct super_block *sb,
 	}
 
 	/* update client's log tree's item */
+	scoutfs_key_init_log_trees(&key, le64_to_cpu(lt.rid),
+				   le64_to_cpu(lt.nr));
 	ret = scoutfs_btree_force(sb, &server->alloc, &server->wri,
 				  &super->logs_root, &key, &lt, sizeof(lt));
 unlock:
@@ -717,11 +805,7 @@ static int server_commit_log_trees(struct super_block *sb,
 	/* don't modify the caller's log_trees */
 	memcpy(&lt, arg, sizeof(struct scoutfs_log_trees));
 
-	ret = scoutfs_server_hold_commit(sb);
-	if (ret < 0) {
-		scoutfs_err(sb, "server error preparing commit: %d", ret);
-		goto out;
-	}
+	scoutfs_server_hold_commit(sb);
 
 	mutex_lock(&server->logs_mutex);
 
@@ -739,7 +823,7 @@ static int server_commit_log_trees(struct super_block *sb,
 	/* try to rotate the srch log when big enough */
 	mutex_lock(&server->srch_mutex);
 	ret = scoutfs_srch_rotate_log(sb, &server->alloc, &server->wri,
-				      &super->srch_root, &lt.srch_file);
+				      &super->srch_root, &lt.srch_file, false);
 	mutex_unlock(&server->srch_mutex);
 	if (ret < 0) {
 		scoutfs_err(sb, "server error, rotating srch log: %d", ret);
@@ -788,8 +872,9 @@ static int server_get_roots(struct super_block *sb,
 
 /*
  * A client is being evicted so we want to reclaim resources from their
- * log tree items.  The item trees and bloom refs stay around to be read
- * and eventually merged and we reclaim all the allocator items.
+ * open log tree item.  The item tree and bloom ref stay around to be
+ * read and we finalize the tree so that it will be merged.  We reclaim
+ * all the allocator items.
  *
  * The caller holds the commit rwsem which means we do all this work in
  * one server commit.  We'll need to keep the total amount of blocks in
@@ -803,7 +888,7 @@ static int server_get_roots(struct super_block *sb,
  * We can return an error without fully reclaiming all the log item's
  * referenced data.
  */
-static int reclaim_log_trees(struct super_block *sb, u64 rid)
+static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
 {
 	struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
 	DECLARE_SERVER_INFO(sb, server);
@@ -815,14 +900,16 @@ static int reclaim_log_trees(struct super_block *sb, u64 rid)
 
 	mutex_lock(&server->logs_mutex);
 
-	/* find the client's existing item */
-	scoutfs_key_init_log_trees(&key, rid, 0);
-	ret = scoutfs_btree_next(sb, &super->logs_root, &key, &iref);
+	/* find the client's last open log_tree */
+	scoutfs_key_init_log_trees(&key, rid, U64_MAX);
+	ret = scoutfs_btree_prev(sb, &super->logs_root, &key, &iref);
 	if (ret == 0) {
 		if (iref.val_len == sizeof(struct scoutfs_log_trees)) {
 			key = *iref.key;
 			memcpy(&lt, iref.val, iref.val_len);
-			if (le64_to_cpu(key.sklt_rid) != rid)
+			if ((le64_to_cpu(key.sklt_rid) != rid) ||
+			    (le64_to_cpu(lt.flags) &
+			     SCOUTFS_LOG_TREES_FINALIZED))
 				ret = -ENOENT;
 		} else {
 			ret = -EIO;
@@ -835,6 +922,16 @@ static int reclaim_log_trees(struct super_block *sb, u64 rid)
 		goto out;
 	}
 
+	/* for srch log file rotation if it's populated */
+	mutex_lock(&server->srch_mutex);
+	ret = scoutfs_srch_rotate_log(sb, &server->alloc, &server->wri,
+				      &super->srch_root, &lt.srch_file, true);
+	mutex_unlock(&server->srch_mutex);
+	if (ret < 0) {
+		scoutfs_err(sb, "server error, reclaim rotating srch log: %d", ret);
+		goto out;
+	}
+
 	/*
 	 * All of these can return errors after having modified the
 	 * allocator trees.  We have to try and update the roots in the
@@ -853,10 +950,11 @@ static int reclaim_log_trees(struct super_block *sb, u64 rid)
 
 	/* the mount is no longer writing to the zones */
 	zero_data_alloc_zone_bits(&lt);
+	le64_add_cpu(&lt.flags, SCOUTFS_LOG_TREES_FINALIZED);
 
 	err = scoutfs_btree_update(sb, &server->alloc, &server->wri,
 				  &super->logs_root, &key, &lt, sizeof(lt));
-	BUG_ON(err != 0); /* alloc and log item roots out of sync */
+	BUG_ON(err != 0); /* alloc, log, srch items out of sync */
 
 out:
 	mutex_unlock(&server->logs_mutex);
@@ -952,9 +1050,7 @@ static int server_advance_seq(struct super_block *sb,
 		goto out;
 	}
 
-	ret = scoutfs_server_hold_commit(sb);
-	if (ret)
-		goto out;
+	scoutfs_server_hold_commit(sb);
 
 	down_write(&server->seq_rwsem);
 
@@ -962,8 +1058,7 @@ static int server_advance_seq(struct super_block *sb,
 	if (ret < 0)
 		goto unlock;
 
-	seq = le64_to_cpu(super->next_trans_seq);
-	le64_add_cpu(&super->next_trans_seq, 1);
+	seq = scoutfs_server_next_seq(sb);
 
 	trace_scoutfs_trans_seq_advance(sb, rid, seq);
 
@@ -1001,6 +1096,43 @@ static int remove_trans_seq(struct super_block *sb, u64 rid)
 	return ret;
 }
 
+/*
+ * Give the caller the last seq before outstanding client commits.  All
+ * seqs up to and including this are stable, new client transactions can
+ * only have greater seqs.
+ */
+static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret)
+{
+	DECLARE_SERVER_INFO(sb, server);
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+	struct scoutfs_super_block *super = &sbi->super;
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_key key;
+	u64 last_seq = 0;
+	int ret;
+
+	down_read(&server->seq_rwsem);
+
+	init_trans_seq_key(&key, 0, 0);
+	ret = scoutfs_btree_next(sb, &super->trans_seqs, &key, &iref);
+	if (ret == 0) {
+		last_seq = le64_to_cpu(iref.key->skts_trans_seq) - 1;
+		scoutfs_btree_put_iref(&iref);
+
+	} else if (ret == -ENOENT) {
+		last_seq = scoutfs_server_seq(sb) - 1;
+		ret = 0;
+	}
+
+	up_read(&server->seq_rwsem);
+
+	if (ret < 0)
+		last_seq = 0;
+
+	*last_seq_ret = last_seq;
+	return ret;
+}
+
 /*
  * Give the calling client the last valid trans_seq that it can return
  * in results from the indices of trans seqs to inodes.  These indices
@@ -1013,13 +1145,9 @@ static int server_get_last_seq(struct super_block *sb,
 			       struct scoutfs_net_connection *conn,
 			       u8 cmd, u64 id, void *arg, u16 arg_len)
 {
-	DECLARE_SERVER_INFO(sb, server);
-	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
-	struct scoutfs_super_block *super = &sbi->super;
-	SCOUTFS_BTREE_ITEM_REF(iref);
 	u64 rid = scoutfs_net_client_rid(conn);
-	struct scoutfs_key key;
-	__le64 last_seq = 0;
+	u64 last_seq = 0;
+	__le64 leseq;
 	int ret;
 
 	if (arg_len != 0) {
@@ -1027,27 +1155,12 @@ static int server_get_last_seq(struct super_block *sb,
 		goto out;
 	}
 
-	down_read(&server->seq_rwsem);
-
-	init_trans_seq_key(&key, 0, 0);
-	ret = scoutfs_btree_next(sb, &super->trans_seqs, &key, &iref);
-	if (ret == 0) {
-		key = *iref.key;
-		scoutfs_btree_put_iref(&iref);
-		last_seq = key.skts_trans_seq;
-
-	} else if (ret == -ENOENT) {
-		last_seq = super->next_trans_seq;
-		ret = 0;
-	}
-
-	le64_add_cpu(&last_seq, -1ULL);
-	trace_scoutfs_trans_seq_last(sb, rid, le64_to_cpu(last_seq));
-
-	up_read(&server->seq_rwsem);
+	ret = get_stable_trans_seq(sb, &last_seq);
 out:
+	trace_scoutfs_trans_seq_last(sb, rid, last_seq);
+	leseq = cpu_to_le64(last_seq);
 	return scoutfs_net_response(sb, conn, cmd, id, ret,
-				    &last_seq, sizeof(last_seq));
+				    &leseq, sizeof(leseq));
 }
 
 static int server_lock(struct super_block *sb,
@@ -1151,9 +1264,7 @@ static int server_srch_get_compact(struct super_block *sb,
 		goto out;
 	}
 
-	ret = scoutfs_server_hold_commit(sb);
-	if (ret)
-		goto out;
+	scoutfs_server_hold_commit(sb);
 
 	mutex_lock(&server->srch_mutex);
 	ret = scoutfs_srch_get_compact(sb, &server->alloc, &server->wri,
@@ -1215,9 +1326,7 @@ static int server_srch_commit_compact(struct super_block *sb,
 	}
 	sc = arg;
 
-	ret = scoutfs_server_hold_commit(sb);
-	if (ret)
-		goto out;
+	scoutfs_server_hold_commit(sb);
 
 	mutex_lock(&server->srch_mutex);
 	ret = scoutfs_srch_commit_compact(sb, &server->alloc, &server->wri,
@@ -1241,6 +1350,910 @@ out:
 	return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
 }
 
+/*
+ * Log merge range items are stored at the starting fs key of the range.
+ * The only fs key field that doesn't hold information is the zone, so
+ * we use the zone to differentiate all types that we store in the log
+ * merge tree.
+ */
+static void init_log_merge_key(struct scoutfs_key *key, u8 zone, u64 first,
+			       u64 second)
+{
+	*key = (struct scoutfs_key) {
+		.sk_zone = zone,
+		._sk_first = cpu_to_le64(first),
+		._sk_second = cpu_to_le64(second),
+	};
+}
+
+static int next_log_merge_item_key(struct super_block *sb, struct scoutfs_btree_root *root,
+				   u8 zone, struct scoutfs_key *key, void *val, size_t val_len)
+{
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	int ret;
+
+	ret = scoutfs_btree_next(sb, root, key, &iref);
+	if (ret == 0) {
+		if (iref.key->sk_zone != zone)
+			ret = -ENOENT;
+		else if (iref.val_len != val_len)
+			ret = -EIO;
+		else
+			memcpy(val, iref.val, val_len);
+		scoutfs_btree_put_iref(&iref);
+	}
+
+	return ret;
+}
+
+static int next_log_merge_item(struct super_block *sb,
+			       struct scoutfs_btree_root *root,
+			       u8 zone, u64 first, u64 second,
+			       void *val, size_t val_len)
+{
+	struct scoutfs_key key;
+
+	init_log_merge_key(&key, zone, first, second);
+	return next_log_merge_item_key(sb, root, zone, &key, val, val_len);
+}
+
+/*
+ * We start a log merge operation if there are any finalized log trees
+ * whose greatest seq is within the last stable seq.  This is called by
+ * every client's get_log_merge handler at a relatively low frequency
+ * until a merge starts.
+ */
+static int start_log_merge(struct super_block *sb,
+			   struct scoutfs_super_block *super,
+			   struct scoutfs_log_merge_status *stat_ret)
+{
+	struct server_info *server = SCOUTFS_SB(sb)->server_info;
+	struct scoutfs_log_merge_status stat;
+	struct scoutfs_log_merge_range rng;
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_log_trees *lt;
+	struct scoutfs_key key;
+	u64 last_seq;
+	bool start;
+	int ret;
+	int err;
+
+	scoutfs_key_init_log_trees(&key, 0, 0);
+
+	ret = get_stable_trans_seq(sb, &last_seq);
+	if (ret < 0)
+		goto out;
+
+	scoutfs_key_init_log_trees(&key, 0, 0);
+	for (start = false; !start; scoutfs_key_inc(&key)) {
+		ret = scoutfs_btree_next(sb, &super->logs_root, &key, &iref);
+		if (ret == 0) {
+			if (iref.val_len == sizeof(*lt)) {
+				key = *iref.key;
+				lt = iref.val;
+				if ((le64_to_cpu(lt->flags) &
+				     SCOUTFS_LOG_TREES_FINALIZED) &&
+				    (le64_to_cpu(lt->max_item_seq) <=
+				     last_seq)) {
+					start = true;
+				}
+			} else {
+				ret = -EIO;
+			}
+			scoutfs_btree_put_iref(&iref);
+		}
+		if (ret < 0)
+			goto out;
+	}
+
+	if (!start) {
+		ret = -ENOENT;
+		goto out;
+	}
+
+	/* add an initial full-range */
+	scoutfs_key_set_zeros(&rng.start);
+	scoutfs_key_set_ones(&rng.end);
+	key = rng.start;
+	key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+	ret = scoutfs_btree_insert(sb, &server->alloc, &server->wri,
+				   &super->log_merge, &key, &rng, sizeof(rng));
+	if (ret < 0)
+		goto out;
+
+	/* and add the merge status item */
+	scoutfs_key_set_zeros(&stat.next_range_key);
+	stat.nr_requests = 0;
+	stat.nr_complete = 0;
+	stat.last_seq = cpu_to_le64(last_seq);
+	stat.seq = cpu_to_le64(scoutfs_server_next_seq(sb));
+
+	init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
+	ret = scoutfs_btree_insert(sb, &server->alloc, &server->wri,
+				   &super->log_merge, &key,
+				   &stat, sizeof(stat));
+	if (ret < 0) {
+		key = rng.start;
+		key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+		err = scoutfs_btree_delete(sb, &server->alloc, &server->wri,
+					   &super->log_merge, &key);
+		BUG_ON(err); /* inconsistent */
+	}
+
+	/* queue free to see if there's lingering items to process */
+	if (ret == 0)
+		queue_work(server->wq, &server->log_merge_free_work);
+out:
+	if (ret == 0)
+		*stat_ret = stat;
+	return ret;
+}
+
+/* Requests drain once we get this many completions to splice */
+#define LOG_MERGE_SPLICE_BATCH 8
+
+/*
+ * Splice the completed subtrees from the clients back into the fs log
+ * tree as parents.  Once they're spliced in, try and rebalance a path
+ * through them in case they need to be split or joined before the rest
+ * of their range can be processed.
+ *
+ * It's only safe to splice in merged parents when all the requests have
+ * drained and no requests are relying on stable key ranges of parents
+ * in the fs root.
+ *
+ * It doesn't matter that the fs tree produced by these subtree splices
+ * itself contains inconsistent items because the subtrees can contain
+ * fragments of transactions.  The read-only finalized log btrees that
+ * are the source of the spliced items are still preferred by readers.
+ * It's only once all the finalized items have been merged, and all
+ * transactions are consistent, that we remove the finalized log trees
+ * and the fs tree items are used.
+ *
+ * As we splice in the subtrees we're implicitly allocating all the
+ * blocks referenced by the new subtree, and freeing all the blocks
+ * referenced by the old subtree that's overwritten.  These allocs and
+ * frees were performed by the client as it did cow updates and were
+ * stored in the allocators that were sent with the completion.  We
+ * merge in those allocators as we splice in the subtree.
+ *
+ * We can add back any remaining ranges for any partial completions and
+ * reset the next range key if there's still work to do.  If the
+ * operation is complete then we tear down the input log_trees items and
+ * delete the status.
+ */
+static int splice_log_merge_completions(struct super_block *sb,
+					struct scoutfs_log_merge_status *stat,
+					bool no_ranges)
+{
+	struct server_info *server = SCOUTFS_SB(sb)->server_info;
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+	struct scoutfs_super_block *super = &sbi->super;
+	struct scoutfs_log_merge_complete comp;
+	struct scoutfs_log_merge_freeing fr;
+	struct scoutfs_log_merge_range rng;
+	struct scoutfs_log_trees lt = {{{0,}}};
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_key key;
+	u64 seq;
+	int ret;
+
+	/* musn't rebalance fs tree parents while reqs rely on their key bounds */
+	if (WARN_ON_ONCE(le64_to_cpu(stat->nr_requests) > 0))
+		return -EIO;
+
+	/*
+	 * Splice in all the completed subtrees at the initial parent
+	 * blocks in the main fs_tree before rebalancing any of them.
+	 */
+	for (seq = 0; ; seq++) {
+
+		ret = next_log_merge_item(sb, &super->log_merge,
+					  SCOUTFS_LOG_MERGE_COMPLETE_ZONE, seq,
+					  0, &comp, sizeof(comp));
+		if (ret < 0) {
+			if (ret == -ENOENT) {
+				ret = 0;
+				break;
+			}
+			goto out;
+		}
+
+		seq = le64_to_cpu(comp.seq);
+
+		ret = scoutfs_btree_set_parent(sb, &server->alloc, &server->wri,
+					       &super->fs_root, &comp.start,
+					       &comp.root);
+		if (ret < 0)
+			goto out;
+
+		mutex_lock(&server->alloc_mutex);
+		ret = scoutfs_alloc_splice_list(sb, &server->alloc,
+						&server->wri,
+						server->other_freed,
+						&comp.meta_avail) ?:
+		      scoutfs_alloc_splice_list(sb, &server->alloc,
+						&server->wri,
+						server->other_freed,
+						&comp.meta_freed);
+		mutex_unlock(&server->alloc_mutex);
+		if (ret < 0)
+			goto out;
+
+		/* clear allocators */
+		memset(&comp.meta_avail, 0, sizeof(comp.meta_avail));
+		memset(&comp.meta_freed, 0, sizeof(comp.meta_freed));
+
+		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_COMPLETE_ZONE,
+				   seq, 0);
+		ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
+					   &super->log_merge, &key,
+					   &comp, sizeof(comp));
+		if (ret < 0)
+			goto out;
+	}
+
+	/*
+	 * Now with all the parent blocks spliced in, rebalance items
+	 * amongst parents that needed to split/join and delete the
+	 * completion items, possibly returning ranges to process.
+	 */
+	for (seq = 0; ; seq++) {
+		ret = next_log_merge_item(sb, &super->log_merge,
+					  SCOUTFS_LOG_MERGE_COMPLETE_ZONE, seq,
+					  0, &comp, sizeof(comp));
+		if (ret < 0) {
+			if (ret == -ENOENT) {
+				ret = 0;
+				break;
+			}
+			goto out;
+		}
+
+		seq = le64_to_cpu(comp.seq);
+
+		/* balance when there was a remaining key range */
+		if (le64_to_cpu(comp.flags) & SCOUTFS_LOG_MERGE_COMP_REMAIN) {
+			ret = scoutfs_btree_rebalance(sb, &server->alloc,
+						      &server->wri,
+						      &super->fs_root,
+						      &comp.start);
+			if (ret < 0)
+				goto out;
+
+			rng.start = comp.remain;
+			rng.end = comp.end;
+
+			key = rng.start;
+			key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+			ret = scoutfs_btree_insert(sb, &server->alloc,
+						   &server->wri,
+						   &super->log_merge, &key,
+						   &rng, sizeof(rng));
+			if (ret < 0)
+				goto out;
+			no_ranges = false;
+		}
+
+		/* delete the completion item */
+		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_COMPLETE_ZONE,
+				   seq, 0);
+		ret = scoutfs_btree_delete(sb, &server->alloc, &server->wri,
+					   &super->log_merge,
+					   &key);
+		if (ret < 0)
+			goto out;
+	}
+
+	/* update the status once all completes are processed */
+	scoutfs_key_set_zeros(&stat->next_range_key);
+	stat->nr_complete = 0;
+
+	/* update counts and done if there's still ranges to process */
+	if (!no_ranges) {
+		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
+		ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
+					   &super->log_merge, &key,
+					   stat, sizeof(*stat));
+		goto out;
+	}
+
+	/* no more ranges, free blooms and add freeing items for free work */
+	lt.rid = 0;
+	lt.nr = 0;
+	for (;;) {
+		scoutfs_key_init_log_trees(&key, le64_to_cpu(lt.rid),
+					   le64_to_cpu(lt.nr) + 1);
+		ret = scoutfs_btree_next(sb, &super->logs_root, &key, &iref);
+		if (ret == 0) {
+			if (iref.val_len == sizeof(lt)) {
+				key = *iref.key;
+				memcpy(&lt, iref.val, sizeof(lt));
+			} else {
+				ret = -EIO;
+			}
+			scoutfs_btree_put_iref(&iref);
+		}
+		if (ret < 0) {
+			if (ret == -ENOENT) {
+				ret = 0;
+				break;
+			}
+			goto out;
+		}
+
+		/* only free the inputs to the log merge that just finished */
+		if (!(le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) ||
+		    (le64_to_cpu(lt.max_item_seq) >
+		     le64_to_cpu(stat->last_seq)))
+			continue;
+
+		fr.root = lt.item_root;
+		scoutfs_key_set_zeros(&fr.key);
+		fr.seq = cpu_to_le64(scoutfs_server_next_seq(sb));
+		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_FREEING_ZONE,
+				   le64_to_cpu(fr.seq), 0);
+		ret = scoutfs_btree_insert(sb, &server->alloc, &server->wri,
+					   &super->log_merge, &key,
+					   &fr, sizeof(fr));
+		if (ret < 0)
+			goto out;
+
+		if (lt.bloom_ref.blkno) {
+			ret = scoutfs_free_meta(sb, &server->alloc,
+						&server->wri,
+					le64_to_cpu(lt.bloom_ref.blkno));
+			if (ret < 0)
+				goto out;
+		}
+
+		scoutfs_key_init_log_trees(&key, le64_to_cpu(lt.rid),
+					   le64_to_cpu(lt.nr));
+		ret = scoutfs_btree_delete(sb, &server->alloc, &server->wri,
+					   &super->logs_root, &key);
+		if (ret < 0)
+			goto out;
+	}
+
+	init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
+	ret = scoutfs_btree_delete(sb, &server->alloc, &server->wri,
+				   &super->log_merge, &key);
+	if (ret == 0)
+		queue_work(server->wq, &server->log_merge_free_work);
+out:
+	BUG_ON(ret); /* inconsistent */
+
+	return ret;
+}
+
+/*
+ * Search amongst the finalized log roots within the caller's merge seq looking
+ * for the earliest item within the caller's range.  The caller has taken
+ * care of locking.
+ */
+static int next_least_log_item(struct super_block *sb,
+			       struct scoutfs_btree_root *logs_root,
+			       u64 seq, struct scoutfs_key *start,
+			       struct scoutfs_key *end,
+			       struct scoutfs_key *next_ret)
+{
+	struct scoutfs_btree_root item_root;
+	struct scoutfs_log_trees *lt;
+	SCOUTFS_BTREE_ITEM_REF(iref);
+	struct scoutfs_key key;
+	int ret;
+
+	scoutfs_key_set_ones(next_ret);
+
+	for (scoutfs_key_init_log_trees(&key, 0, 0); ; scoutfs_key_inc(&key)) {
+
+		/* find the next finalized log root within the merge last_seq */
+		ret = scoutfs_btree_next(sb, logs_root, &key, &iref);
+		if (ret == 0) {
+			if (iref.val_len == sizeof(*lt)) {
+				key = *iref.key;
+				lt = iref.val;
+				if ((le64_to_cpu(lt->flags) &
+				     SCOUTFS_LOG_TREES_FINALIZED) &&
+				    (le64_to_cpu(lt->max_item_seq) <= seq))
+					item_root = lt->item_root;
+				else
+					item_root.ref.blkno = 0;
+			} else {
+				ret = -EIO;
+			}
+			scoutfs_btree_put_iref(&iref);
+		}
+		if (ret < 0) {
+			if (ret == -ENOENT)
+				ret = 0;
+			goto out;
+		}
+		if (item_root.ref.blkno == 0)
+			continue;
+
+		/* see if populated roots have item keys less than than next */
+		ret = scoutfs_btree_next(sb, &item_root, start, &iref);
+		if (ret == 0) {
+			if (scoutfs_key_compare(iref.key, end) <= 0 &&
+			    scoutfs_key_compare(iref.key, next_ret) < 0)
+				*next_ret = *iref.key;
+			scoutfs_btree_put_iref(&iref);
+		}
+		if (ret < 0) {
+			if (ret == -ENOENT)
+				ret = 0;
+			else
+				goto out;
+		}
+	}
+
+out:
+	if (ret == 0 && scoutfs_key_is_ones(next_ret))
+		ret = -ENOENT;
+
+	return ret;
+}
+
+/*
+ * Once a merge is fully completed all of the finalized input log btrees
+ * are redundant and can be freed.
+ *
+ * As merging finishes and the status item is deleted, we also move all
+ * the finalized roots from log_trees items over into freeing items.
+ * This work is then kicked off which iterates over all the freeing
+ * items calling into the btree to free all its referenced blocks, with
+ * the key tracking partial progress.
+ *
+ * The freeing work is reasonably light.  We only read the btree blocks
+ * and add freed blocks to merge back into the core allocators.  The
+ * server can handle this load and we avoid the io overhead and
+ * complexity of farming it out to clients.
+ */
+static void server_log_merge_free_work(struct work_struct *work)
+{
+	struct server_info *server = container_of(work, struct server_info,
+						  log_merge_free_work);
+	struct super_block *sb = server->sb;
+	struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
+	struct scoutfs_log_merge_freeing fr;
+	struct scoutfs_key key;
+	bool commit = false;
+	int ret = 0;
+
+	/* shutdown waits for us, we'll eventually load set shutting_down */
+	while (!server->shutting_down) {
+		scoutfs_server_hold_commit(sb);
+		mutex_lock(&server->logs_mutex);
+		commit = true;
+
+		ret = next_log_merge_item(sb, &super->log_merge,
+					  SCOUTFS_LOG_MERGE_FREEING_ZONE,
+					  0, 0, &fr, sizeof(fr));
+		if (ret < 0) {
+			if (ret == -ENOENT)
+				ret = 0;
+			break;
+		}
+
+		ret = scoutfs_btree_free_blocks(sb, &server->alloc,
+						&server->wri, &fr.key,
+						&fr.root, 10);
+		if (ret < 0)
+			break;
+
+		/* freed blocks are in allocator, we *have* to update key */
+		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_FREEING_ZONE,
+				   le64_to_cpu(fr.seq), 0);
+		if (scoutfs_key_is_ones(&fr.key))
+			ret = scoutfs_btree_delete(sb, &server->alloc,
+						   &server->wri,
+						   &super->log_merge, &key);
+		else
+			ret = scoutfs_btree_update(sb, &server->alloc,
+						   &server->wri,
+						   &super->log_merge, &key,
+						   &fr, sizeof(fr));
+		/* freed blocks are in allocator, we *have* to update fr */
+		BUG_ON(ret < 0);
+
+		mutex_unlock(&server->logs_mutex);
+		ret = scoutfs_server_apply_commit(sb, ret);
+		commit = false;
+		if (ret < 0)
+			break;
+	}
+
+	if (commit) {
+		mutex_unlock(&server->logs_mutex);
+		ret = scoutfs_server_apply_commit(sb, ret);
+	}
+
+	if (ret < 0) {
+		scoutfs_err(sb, "server error freeing merged btree blocks: %d",
+			    ret);
+		stop_server(server);
+	}
+
+	/* not re-arming, regularly queued by the server during merging */
+}
+
+/*
+ * This will return ENOENT to the client if there is no work to do.
+ */
+static int server_get_log_merge(struct super_block *sb,
+				struct scoutfs_net_connection *conn,
+				u8 cmd, u64 id, void *arg, u16 arg_len)
+{
+	DECLARE_SERVER_INFO(sb, server);
+	u64 rid = scoutfs_net_client_rid(conn);
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+	struct scoutfs_super_block *super = &sbi->super;
+	struct scoutfs_log_merge_status stat;
+	struct scoutfs_log_merge_range rng;
+	struct scoutfs_log_merge_range remain;
+	struct scoutfs_log_merge_request req;
+	struct scoutfs_key par_start;
+	struct scoutfs_key par_end;
+	struct scoutfs_key next_key;
+	struct scoutfs_key key;
+	bool ins_rng;
+	bool del_remain;
+	bool del_req;
+	bool upd_stat;
+	bool no_ranges;
+	bool no_next;
+	int ret;
+	int err;
+
+	if (arg_len != 0)
+		return -EINVAL;
+
+	scoutfs_server_hold_commit(sb);
+	mutex_lock(&server->logs_mutex);
+
+restart:
+	memset(&req, 0, sizeof(req));
+	ins_rng = false;
+	del_remain = false;
+	del_req = false;
+	upd_stat = false;
+
+	/* get the status item, maybe creating a new one */
+	ret = next_log_merge_item(sb, &super->log_merge,
+				  SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0,
+				  &stat, sizeof(stat));
+	if (ret == -ENOENT)
+		ret = start_log_merge(sb, super, &stat);
+	if (ret < 0)
+		goto out;
+
+	trace_scoutfs_get_log_merge_status(sb, rid, &stat.next_range_key,
+					   le64_to_cpu(stat.nr_requests),
+					   le64_to_cpu(stat.nr_complete),
+					   le64_to_cpu(stat.last_seq),
+					   le64_to_cpu(stat.seq));
+
+	/* find the next range, always checking for splicing */
+	for (;;) {
+		key = stat.next_range_key;
+		key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+		ret = next_log_merge_item_key(sb, &super->log_merge, SCOUTFS_LOG_MERGE_RANGE_ZONE,
+					      &key, &rng, sizeof(rng));
+		if (ret < 0 && ret != -ENOENT)
+			goto out;
+
+		/* maybe splice now that we know if there's ranges */
+		no_next = ret == -ENOENT;
+		no_ranges = scoutfs_key_is_zeros(&stat.next_range_key) && ret == -ENOENT;
+		if (le64_to_cpu(stat.nr_requests) == 0 &&
+		    (no_next || le64_to_cpu(stat.nr_complete) >= LOG_MERGE_SPLICE_BATCH)) {
+			ret = splice_log_merge_completions(sb, &stat, no_ranges);
+			if (ret < 0)
+				goto out;
+			/* splicing resets key and adds ranges, could finish status */
+			goto restart;
+		}
+
+		/* no ranges from next for requests, future attempts will create or splice */
+		if (no_next) {
+			ret = -ENOENT;
+			goto out;
+		}
+
+		/* see if we should back off after splicing might have deleted completions */
+		if ((le64_to_cpu(stat.nr_requests) +
+		     le64_to_cpu(stat.nr_complete)) >= LOG_MERGE_SPLICE_BATCH) {
+			ret = -ENOENT;
+			goto out;
+		}
+
+		/* find the next logged item in the next range */
+		ret = next_least_log_item(sb, &super->logs_root,
+					  le64_to_cpu(stat.last_seq),
+					  &rng.start, &rng.end, &next_key);
+		if (ret == 0)
+			break;
+		/* drop the range if it contained no logged items */
+		if (ret == -ENOENT) {
+			key = rng.start;
+			key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+			ret = scoutfs_btree_delete(sb, &server->alloc,
+						   &server->wri,
+						   &super->log_merge, &key);
+		}
+		if (ret < 0)
+			goto out;
+	}
+
+	/* start to build the request that's saved and sent to the client */
+	req.logs_root = super->logs_root;
+	req.last_seq = stat.last_seq;
+	req.rid = cpu_to_le64(rid);
+	req.seq = cpu_to_le64(scoutfs_server_next_seq(sb));
+	req.flags = 0;
+	if (super->fs_root.height > 2)
+		req.flags |= cpu_to_le64(SCOUTFS_LOG_MERGE_REQUEST_SUBTREE);
+
+	/* find the fs_root parent block and its key range */
+	ret = scoutfs_btree_get_parent(sb, &super->fs_root, &next_key,
+					 &req.root) ?:
+	      scoutfs_btree_parent_range(sb, &super->fs_root, &next_key,
+					 &par_start, &par_end);
+	if (ret < 0)
+		goto out;
+
+	/* start from next item, don't exceed parent key range */
+	req.start = next_key;
+	req.end = rng.end;
+	if (scoutfs_key_compare(&par_end, &req.end) < 0)
+		req.end = par_end;
+
+	/* delete the old range */
+	key = rng.start;
+	key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+	ret = scoutfs_btree_delete(sb, &server->alloc, &server->wri,
+				   &super->log_merge, &key);
+	if (ret < 0)
+		goto out;
+	ins_rng = true;
+
+	/* add remaining range if we have to */
+	if (scoutfs_key_compare(&rng.end, &req.end) > 0) {
+		remain.start = req.end;
+		scoutfs_key_inc(&remain.start);
+		remain.end = rng.end;
+
+		key = remain.start;
+		key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+		ret = scoutfs_btree_insert(sb, &server->alloc, &server->wri,
+					   &super->log_merge, &key,
+					   &remain, sizeof(remain));
+		if (ret < 0)
+			goto out;
+		del_remain = true;
+	}
+
+	/* give the client an allocation pool to work with */
+	mutex_lock(&server->alloc_mutex);
+	ret = scoutfs_alloc_fill_list(sb, &server->alloc, &server->wri,
+				      &req.meta_avail, server->meta_avail,
+				      SCOUTFS_SERVER_MERGE_FILL_LO,
+				      SCOUTFS_SERVER_MERGE_FILL_TARGET);
+	mutex_unlock(&server->alloc_mutex);
+	if (ret < 0)
+		goto out;
+
+	/* save the request that will be sent to the client */
+	init_log_merge_key(&key, SCOUTFS_LOG_MERGE_REQUEST_ZONE, rid,
+			   le64_to_cpu(req.seq));
+	ret = scoutfs_btree_insert(sb, &server->alloc, &server->wri,
+				   &super->log_merge, &key,
+				   &req, sizeof(req));
+	if (ret < 0)
+		goto out;
+	del_req = true;
+
+	trace_scoutfs_get_log_merge_request(sb, rid, &req.root,
+					    &req.start, &req.end,
+					    le64_to_cpu(req.last_seq),
+					    le64_to_cpu(req.seq));
+
+	/* make sure next range avoids ranges for parent in use */
+	stat.next_range_key = par_end;
+	if (!scoutfs_key_is_ones(&stat.next_range_key))
+		scoutfs_key_inc(&stat.next_range_key);
+
+	/* update the status requests count */
+	le64_add_cpu(&stat.nr_requests, 1);
+	init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
+	ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
+				   &super->log_merge, &key,
+				   &stat, sizeof(stat));
+	if (ret < 0)
+		goto out;
+	upd_stat = true;
+
+out:
+	if (ret < 0) {
+		/* undo any our partial item changes */
+		if (upd_stat) {
+			le64_add_cpu(&stat.nr_requests, -1ULL);
+			init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE,
+					   0, 0);
+			err = scoutfs_btree_update(sb, &server->alloc,
+						   &server->wri,
+						   &super->log_merge, &key,
+						   &stat, sizeof(stat));
+			BUG_ON(err); /* inconsistent */
+		}
+
+		if (del_req) {
+			init_log_merge_key(&key, SCOUTFS_LOG_MERGE_REQUEST_ZONE,
+					   rid, le64_to_cpu(req.seq));
+			err = scoutfs_btree_delete(sb, &server->alloc,
+						   &server->wri,
+						   &super->log_merge, &key);
+			BUG_ON(err); /* inconsistent */
+		}
+
+		if (del_remain) {
+			key = remain.start;
+			key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+			err = scoutfs_btree_delete(sb, &server->alloc,
+						   &server->wri,
+						   &super->log_merge, &key);
+			BUG_ON(err); /* inconsistent */
+		}
+
+		if (ins_rng) {
+			key = rng.start;
+			key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+			err = scoutfs_btree_insert(sb, &server->alloc,
+						   &server->wri,
+						   &super->log_merge, &key,
+						   &rng, sizeof(rng));
+			BUG_ON(err); /* inconsistent */
+		}
+
+		/* reclaim allocation if we failed */
+		mutex_lock(&server->alloc_mutex);
+		err = scoutfs_alloc_splice_list(sb, &server->alloc,
+						&server->wri,
+						server->other_freed,
+						&req.meta_avail);
+		mutex_unlock(&server->alloc_mutex);
+		BUG_ON(err); /* inconsistent */
+	}
+
+	mutex_unlock(&server->logs_mutex);
+	ret = scoutfs_server_apply_commit(sb, ret);
+
+	return scoutfs_net_response(sb, conn, cmd, id, ret, &req, sizeof(req));
+}
+
+/*
+ * Commit the client's leg merge work.  Typically we store the
+ * completion so that we can later splice it back into the fs root and
+ * reclaim its allocators later in a batch.  If it failed we reclaim it
+ * immediately.
+ */
+static int server_commit_log_merge(struct super_block *sb,
+				   struct scoutfs_net_connection *conn,
+				   u8 cmd, u64 id, void *arg, u16 arg_len)
+{
+	DECLARE_SERVER_INFO(sb, server);
+	u64 rid = scoutfs_net_client_rid(conn);
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+	struct scoutfs_super_block *super = &sbi->super;
+	struct scoutfs_log_merge_request orig_req;
+	struct scoutfs_log_merge_complete *comp;
+	struct scoutfs_log_merge_status stat;
+	struct scoutfs_log_merge_range rng;
+	struct scoutfs_key key;
+	int ret;
+
+	scoutfs_key_set_zeros(&rng.end);
+
+	if (arg_len != sizeof(struct scoutfs_log_merge_complete))
+		return -EINVAL;
+	comp = arg;
+
+	trace_scoutfs_get_log_merge_complete(sb, rid, &comp->root,
+					     &comp->start, &comp->end,
+					     &comp->remain,
+					     le64_to_cpu(comp->seq),
+					     le64_to_cpu(comp->flags));
+
+	scoutfs_server_hold_commit(sb);
+	mutex_lock(&server->logs_mutex);
+
+	/* find the status of the current log merge */
+	ret = next_log_merge_item(sb, &super->log_merge,
+				  SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0,
+				  &stat, sizeof(stat));
+	if (ret < 0) {
+		WARN_ON_ONCE(ret == -ENOENT); /* inconsistent */
+		goto out;
+	}
+
+	/* find the completion's original saved request */
+	ret = next_log_merge_item(sb, &super->log_merge,
+				  SCOUTFS_LOG_MERGE_REQUEST_ZONE,
+				  rid, le64_to_cpu(comp->seq),
+				  &orig_req, sizeof(orig_req));
+	if (WARN_ON_ONCE(ret == 0 && (comp->rid != orig_req.rid ||
+				      comp->seq != orig_req.seq)))
+		ret = -ENOENT; /* inconsistency */
+	if (ret < 0) {
+		WARN_ON_ONCE(ret == -ENOENT); /* inconsistency */
+		goto out;
+	}
+
+	/* delete the original request item */
+	init_log_merge_key(&key, SCOUTFS_LOG_MERGE_REQUEST_ZONE, rid,
+			   le64_to_cpu(orig_req.seq));
+	ret = scoutfs_btree_delete(sb, &server->alloc, &server->wri,
+				   &super->log_merge, &key);
+	if (ret < 0)
+		goto out;
+
+	if (le64_to_cpu(comp->flags) & SCOUTFS_LOG_MERGE_COMP_ERROR) {
+		/* restore the range and reclaim the allocator if it failed */
+		rng.start = orig_req.start;
+		rng.end = orig_req.end;
+
+		key = rng.start;
+		key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+		ret = scoutfs_btree_insert(sb, &server->alloc, &server->wri,
+					   &super->log_merge, &key,
+					   &rng, sizeof(rng));
+		if (ret < 0)
+			goto out;
+
+		mutex_lock(&server->alloc_mutex);
+		ret = scoutfs_alloc_splice_list(sb, &server->alloc,
+						&server->wri,
+						server->other_freed,
+						&orig_req.meta_avail) ?:
+		      scoutfs_alloc_splice_list(sb, &server->alloc,
+						&server->wri,
+						server->other_freed,
+						&orig_req.meta_freed);
+		mutex_unlock(&server->alloc_mutex);
+		if (ret < 0)
+			goto out;
+
+	} else {
+		/* otherwise store the completion for later splicing */
+		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_COMPLETE_ZONE,
+				   le64_to_cpu(comp->seq), 0);
+		ret = scoutfs_btree_insert(sb, &server->alloc, &server->wri,
+					   &super->log_merge, &key,
+					   comp, sizeof(*comp));
+		if (ret < 0)
+			goto out;
+
+		le64_add_cpu(&stat.nr_complete, 1ULL);
+	}
+
+	/* and update the status counts */
+	le64_add_cpu(&stat.nr_requests, -1ULL);
+	init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
+	ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
+				   &super->log_merge, &key,
+				   &stat, sizeof(stat));
+	if (ret < 0)
+		goto out;
+
+out:
+	mutex_unlock(&server->logs_mutex);
+	ret = scoutfs_server_apply_commit(sb, ret);
+	BUG_ON(ret < 0); /* inconsistent */
+
+	return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
+}
+
 /* The server is receiving an omap response from the client */
 static int open_ino_map_response(struct super_block *sb, struct scoutfs_net_connection *conn,
 				 void *resp, unsigned int resp_len, int error, void *data)
@@ -1347,9 +2360,7 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti
 
 	mutex_lock(&server->volopt_mutex);
 
-	ret = scoutfs_server_hold_commit(sb);
-	if (ret)
-		goto unlock;
+	scoutfs_server_hold_commit(sb);
 
 	if (le64_to_cpu(volopt->set_bits) & SCOUTFS_VOLOPT_DATA_ALLOC_ZONE_BLOCKS_BIT) {
 		opt = le64_to_cpu(volopt->data_alloc_zone_blocks);
@@ -1389,7 +2400,6 @@ apply:
 		super->volopt = server->volopt;
 	write_seqcount_end(&server->volopt_seqcount);
 
-unlock:
 	mutex_unlock(&server->volopt_mutex);
 out:
 	return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
@@ -1419,9 +2429,7 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec
 
 	mutex_lock(&server->volopt_mutex);
 
-	ret = scoutfs_server_hold_commit(sb);
-	if (ret)
-		goto unlock;
+	scoutfs_server_hold_commit(sb);
 
 	for (i = 0, bit = 1, opt = first_valopt(&super->volopt); i < 64; i++, bit <<= 1, opt++) {
 		if (le64_to_cpu(volopt->set_bits) & bit) {
@@ -1439,7 +2447,6 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec
 		super->volopt = server->volopt;
 	write_seqcount_end(&server->volopt_seqcount);
 
-unlock:
 	mutex_unlock(&server->volopt_mutex);
 out:
 	return scoutfs_net_response(sb, conn, cmd, id, ret, NULL, 0);
@@ -1585,6 +2592,113 @@ static int cancel_srch_compact(struct super_block *sb, u64 rid)
 	return ret;
 }
 
+/*
+ * Clean up any log merge requests which have now been abandoned because
+ * their client was evicted.  This is always called on eviction and
+ * there may have been no merge in progres or our client had no
+ * outstanding requests.  For each pending request, we reclaim its
+ * allocators, delte its item, and update the status.
+ *
+ * The request we cancel might have been the last request which
+ * prevented batch processing, but we don't check that here.  This is in
+ * the client eviction path and we want that to be as light and
+ * responsive as possible so we can get back up and running.  The next
+ * client get_log_merge request will see that no more requests are
+ * outstanding.
+ *
+ * The caller holds a commit, but we're responsible for locking.
+ */
+static int cancel_log_merge(struct super_block *sb, u64 rid)
+{
+	DECLARE_SERVER_INFO(sb, server);
+	struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
+	struct scoutfs_log_merge_status stat;
+	struct scoutfs_log_merge_request req;
+	struct scoutfs_log_merge_range rng;
+	struct scoutfs_key key;
+	bool update = false;
+	u64 seq;
+	int ret;
+
+	mutex_lock(&server->logs_mutex);
+
+	ret = next_log_merge_item(sb, &super->log_merge,
+				  SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0,
+				  &stat, sizeof(stat));
+	if (ret < 0) {
+		if (ret == -ENOENT)
+			ret = 0;
+		goto out;
+	}
+
+	for (seq = 0; ; seq++) {
+		ret = next_log_merge_item(sb, &super->log_merge,
+					  SCOUTFS_LOG_MERGE_REQUEST_ZONE, rid,
+					  seq, &req, sizeof(req));
+		if (ret == 0 && le64_to_cpu(req.rid) != rid)
+			ret = -ENOENT;
+		if (ret < 0) {
+			if (ret == -ENOENT)
+				ret = 0;
+			break;
+		}
+
+		seq = le64_to_cpu(req.seq);
+
+		/* remove request item */
+		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_REQUEST_ZONE, rid,
+				   le64_to_cpu(req.seq));
+		ret = scoutfs_btree_delete(sb, &server->alloc, &server->wri,
+					   &super->log_merge, &key);
+		if (ret < 0)
+			goto out;
+
+		/* restore range */
+		rng.start = req.start;
+		rng.end = req.end;
+
+		key = rng.start;
+		key.sk_zone = SCOUTFS_LOG_MERGE_RANGE_ZONE;
+		ret = scoutfs_btree_insert(sb, &server->alloc,
+					   &server->wri,
+					   &super->log_merge, &key,
+					   &rng, sizeof(rng));
+		if (ret < 0)
+			goto out;
+
+		/* reclaim allocator */
+		mutex_lock(&server->alloc_mutex);
+		ret = scoutfs_alloc_splice_list(sb, &server->alloc,
+						&server->wri,
+						server->other_freed,
+						&req.meta_avail) ?:
+		      scoutfs_alloc_splice_list(sb, &server->alloc,
+						&server->wri,
+						server->other_freed,
+						&req.meta_freed);
+		mutex_unlock(&server->alloc_mutex);
+		if (ret < 0)
+			goto out;
+
+		/* update count */
+		le64_add_cpu(&stat.nr_requests, -1ULL);
+		update = true;
+	}
+
+	if (update) {
+		/* and update the status counts */
+		init_log_merge_key(&key, SCOUTFS_LOG_MERGE_STATUS_ZONE, 0, 0);
+		ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
+					   &super->log_merge, &key,
+					   &stat, sizeof(stat));
+	}
+out:
+	mutex_unlock(&server->logs_mutex);
+
+	BUG_ON(ret < 0);  /* XXX inconsistent */
+	return ret;
+}
+
 /*
  * Farewell processing is async to the request processing work.  Shutdown
  * waits for request processing to finish and then tears down the connection.
@@ -1652,9 +2766,7 @@ static int server_greeting(struct super_block *sb,
 	}
 
 	if (gr->server_term == 0) {
-		ret = scoutfs_server_hold_commit(sb);
-		if (ret < 0)
-			goto send_err;
+		scoutfs_server_hold_commit(sb);
 
 		ret = insert_mounted_client(sb, le64_to_cpu(gr->rid), le64_to_cpu(gr->flags),
 					    &conn->peername);
@@ -1727,15 +2839,14 @@ static int reclaim_rid(struct super_block *sb, u64 rid)
 {
 	int ret;
 
-	ret = scoutfs_server_hold_commit(sb);
-	if (ret < 0)
-		return ret;
+	scoutfs_server_hold_commit(sb);
 
 	/* delete mounted client last, recovery looks for it */
 	ret = scoutfs_lock_server_farewell(sb, rid) ?:
 	      remove_trans_seq(sb, rid) ?:
-	      reclaim_log_trees(sb, rid) ?:
+	      reclaim_open_log_tree(sb, rid) ?:
 	      cancel_srch_compact(sb, rid) ?:
+	      cancel_log_merge(sb, rid) ?:
 	      scoutfs_omap_remove_rid(sb, rid) ?:
 	      delete_mounted_client(sb, rid);
 
@@ -1971,6 +3082,8 @@ static scoutfs_net_request_t server_req_funcs[] = {
 	[SCOUTFS_NET_CMD_LOCK]			= server_lock,
 	[SCOUTFS_NET_CMD_SRCH_GET_COMPACT]	= server_srch_get_compact,
 	[SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT]	= server_srch_commit_compact,
+	[SCOUTFS_NET_CMD_GET_LOG_MERGE]		= server_get_log_merge,
+	[SCOUTFS_NET_CMD_COMMIT_LOG_MERGE]	= server_commit_log_merge,
 	[SCOUTFS_NET_CMD_OPEN_INO_MAP]		= server_open_ino_map,
 	[SCOUTFS_NET_CMD_GET_VOLOPT]		= server_get_volopt,
 	[SCOUTFS_NET_CMD_SET_VOLOPT]		= server_set_volopt,
@@ -2244,7 +3357,7 @@ static void scoutfs_server_worker(struct work_struct *work)
 	struct scoutfs_net_connection *conn = NULL;
 	DECLARE_WAIT_QUEUE_HEAD(waitq);
 	struct sockaddr_in sin;
-	u64 max_vers;
+	u64 max_seq;
 	int ret;
 
 	trace_scoutfs_server_work_enter(sb, 0, 0);
@@ -2284,6 +3397,7 @@ static void scoutfs_server_worker(struct work_struct *work)
 	server->volopt = super->volopt;
 	write_seqcount_end(&server->volopt_seqcount);
 
+	atomic64_set(&server->seq_atomic, le64_to_cpu(super->seq));
 	set_roots(server, &super->fs_root, &super->logs_root,
 		  &super->srch_root);
 	scoutfs_block_writer_init(sb, &server->wri);
@@ -2307,13 +3421,14 @@ static void scoutfs_server_worker(struct work_struct *work)
 	    le64_to_cpu(server->meta_avail->total_len))
 		swap(server->meta_avail, server->meta_freed);
 
-	ret = scoutfs_forest_get_max_vers(sb, super, &max_vers);
+	ret = scoutfs_forest_get_max_seq(sb, super, &max_seq);
 	if (ret) {
-		scoutfs_err(sb, "server couldn't find max item vers: %d", ret);
+		scoutfs_err(sb, "server couldn't find max item seq: %d", ret);
 		goto shutdown;
 	}
+	scoutfs_server_set_seq_if_greater(sb, max_seq);
 
-	ret = scoutfs_lock_server_setup(sb, &server->alloc, &server->wri, max_vers) ?:
+	ret = scoutfs_lock_server_setup(sb, &server->alloc, &server->wri) ?:
 	      start_recovery(sb);
 	if (ret)
 		goto shutdown;
@@ -2341,6 +3456,8 @@ shutdown:
 	scoutfs_net_shutdown(sb, conn);
 	server->conn = NULL;
 
+	flush_work(&server->log_merge_free_work);
+
 	/* stop tracking recovery, cancel timer, flush any fencing */
 	scoutfs_recov_shutdown(sb);
 	flush_work(&server->fence_pending_recov_work);
@@ -2408,6 +3525,7 @@ void scoutfs_server_stop(struct super_block *sb)
 	cancel_work_sync(&server->work);
 	cancel_work_sync(&server->farewell_work);
 	cancel_work_sync(&server->commit_work);
+	cancel_work_sync(&server->log_merge_free_work);
 }
 
 int scoutfs_server_setup(struct super_block *sb)
@@ -2433,6 +3551,7 @@ int scoutfs_server_setup(struct super_block *sb)
 	INIT_WORK(&server->farewell_work, farewell_worker);
 	mutex_init(&server->alloc_mutex);
 	mutex_init(&server->logs_mutex);
+	INIT_WORK(&server->log_merge_free_work, server_log_merge_free_work);
 	mutex_init(&server->srch_mutex);
 	mutex_init(&server->mounted_clients_mutex);
 	seqcount_init(&server->roots_seqcount);
diff --git a/kmod/src/server.h b/kmod/src/server.h
index 8d31a271..79fcb443 100644
--- a/kmod/src/server.h
+++ b/kmod/src/server.h
@@ -62,7 +62,7 @@ int scoutfs_server_lock_response(struct super_block *sb, u64 rid, u64 id,
 				 struct scoutfs_net_lock *nl);
 int scoutfs_server_lock_recover_request(struct super_block *sb, u64 rid,
 					struct scoutfs_key *key);
-int scoutfs_server_hold_commit(struct super_block *sb);
+void scoutfs_server_hold_commit(struct super_block *sb);
 int scoutfs_server_apply_commit(struct super_block *sb, int err);
 void scoutfs_server_recov_finish(struct super_block *sb, u64 rid, int which);
 
@@ -71,6 +71,10 @@ int scoutfs_server_send_omap_request(struct super_block *sb, u64 rid,
 int scoutfs_server_send_omap_response(struct super_block *sb, u64 rid, u64 id,
 				      struct scoutfs_open_ino_map *map, int err);
 
+u64 scoutfs_server_seq(struct super_block *sb);
+u64 scoutfs_server_next_seq(struct super_block *sb);
+void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq);
+
 struct sockaddr_in;
 struct scoutfs_quorum_elected_info;
 int scoutfs_server_start(struct super_block *sb, u64 term);
diff --git a/kmod/src/srch.c b/kmod/src/srch.c
index 372be7fe..9fbaaeb7 100644
--- a/kmod/src/srch.c
+++ b/kmod/src/srch.c
@@ -989,12 +989,13 @@ int scoutfs_srch_rotate_log(struct super_block *sb,
 			    struct scoutfs_alloc *alloc,
 			    struct scoutfs_block_writer *wri,
 			    struct scoutfs_btree_root *root,
-			    struct scoutfs_srch_file *sfl)
+			    struct scoutfs_srch_file *sfl, bool force)
 {
 	struct scoutfs_key key;
 	int ret;
 
-	if (le64_to_cpu(sfl->blocks) < SCOUTFS_SRCH_LOG_BLOCK_LIMIT)
+	if (sfl->ref.blkno == 0 ||
+	    (!force && le64_to_cpu(sfl->blocks) < SCOUTFS_SRCH_LOG_BLOCK_LIMIT))
 		return 0;
 
 	init_srch_key(&key, SCOUTFS_SRCH_LOG_TYPE,
diff --git a/kmod/src/srch.h b/kmod/src/srch.h
index 69448ab3..7f30f04c 100644
--- a/kmod/src/srch.h
+++ b/kmod/src/srch.h
@@ -37,7 +37,7 @@ int scoutfs_srch_rotate_log(struct super_block *sb,
 			    struct scoutfs_alloc *alloc,
 			    struct scoutfs_block_writer *wri,
 			    struct scoutfs_btree_root *root,
-			    struct scoutfs_srch_file *sfl);
+			    struct scoutfs_srch_file *sfl, bool force);
 int scoutfs_srch_get_compact(struct super_block *sb,
 			     struct scoutfs_alloc *alloc,
 			     struct scoutfs_block_writer *wri,
diff --git a/utils/src/btree.c b/utils/src/btree.c
index 9224f9de..201c47a5 100644
--- a/utils/src/btree.c
+++ b/utils/src/btree.c
@@ -40,7 +40,7 @@ static void *alloc_val(struct scoutfs_btree_block *bt, int len)
 {
 	le16_add_cpu(&bt->mid_free_len, -len);
 	le16_add_cpu(&bt->total_item_bytes, len);
-	return (void *)bt + le16_to_cpu(bt->mid_free_len);
+	return (void *)&bt->items[le16_to_cpu(bt->nr_items)] + le16_to_cpu(bt->mid_free_len);
 }
 
 /*
diff --git a/utils/src/mkfs.c b/utils/src/mkfs.c
index 92dc0b50..bcf07357 100644
--- a/utils/src/mkfs.c
+++ b/utils/src/mkfs.c
@@ -236,7 +236,7 @@ static int do_mkfs(struct mkfs_args *args)
 	super->version = cpu_to_le64(SCOUTFS_INTEROP_VERSION);
 	uuid_generate(super->uuid);
 	super->next_ino = cpu_to_le64(SCOUTFS_ROOT_INO + 1);
-	super->next_trans_seq = cpu_to_le64(1);
+	super->seq = cpu_to_le64(1);
 	super->total_meta_blocks = cpu_to_le64(last_meta + 1);
 	super->first_meta_blkno = cpu_to_le64(next_meta);
 	super->last_meta_blkno = cpu_to_le64(last_meta);
diff --git a/utils/src/print.c b/utils/src/print.c
index 5fa57bdb..c6ea1fe0 100644
--- a/utils/src/print.c
+++ b/utils/src/print.c
@@ -210,8 +210,8 @@ static int print_logs_item(struct scoutfs_key *key, void *val,
 	/* only items in leaf blocks have values */
 	if (val) {
 		liv = val;
-		printf("    log_item_value: vers %llu flags %x\n",
-		       le64_to_cpu(liv->vers), liv->flags);
+		printf("    log_item_value: seq %llu flags %x\n",
+		       le64_to_cpu(liv->seq), liv->flags);
 
 		/* deletion items don't have values */
 		if (!(liv->flags & SCOUTFS_LOG_ITEM_FLAG_DELETION)) {
@@ -289,9 +289,10 @@ static int print_log_trees_item(struct scoutfs_key *key, void *val,
 		       "      data_avail: "ALCROOT_F"\n"
 		       "      data_freed: "ALCROOT_F"\n"
 		       "      srch_file: "SRF_FMT"\n"
-		       "      max_item_vers: %llu\n"
+		       "      max_item_seq: %llu\n"
 		       "      rid: %016llx\n"
 		       "      nr: %llu\n"
+		       "      flags: %llx\n"
 		       "      data_alloc_zone_blocks: %llu\n"
 		       "      data_alloc_zones: ",
 		       AL_HEAD_A(&lt->meta_avail),
@@ -304,9 +305,10 @@ static int print_log_trees_item(struct scoutfs_key *key, void *val,
 		       ALCROOT_A(&lt->data_avail),
 		       ALCROOT_A(&lt->data_freed),
 		       SRF_A(&lt->srch_file),
-		       le64_to_cpu(lt->max_item_vers),
+		       le64_to_cpu(lt->max_item_seq),
 		       le64_to_cpu(lt->rid),
 		       le64_to_cpu(lt->nr),
+		       le64_to_cpu(lt->flags),
 		       le64_to_cpu(lt->data_alloc_zone_blocks));
 
 		for (i = 0; i < SCOUTFS_DATA_ALLOC_ZONE_LE64S; i++) {
@@ -383,6 +385,72 @@ static int print_mounted_client_entry(struct scoutfs_key *key, void *val,
 	return 0;
 }
 
+static int print_log_merge_item(struct scoutfs_key *key, void *val,
+				      unsigned val_len, void *arg)
+{
+	struct scoutfs_log_merge_status *stat;
+	struct scoutfs_log_merge_range *rng;
+	struct scoutfs_log_merge_request *req;
+	struct scoutfs_log_merge_complete *comp;
+	struct scoutfs_log_merge_freeing *fr;
+
+	switch (key->sk_zone) {
+	case SCOUTFS_LOG_MERGE_STATUS_ZONE:
+		stat = val;
+		printf("    status: next_range_key "SK_FMT" nr_req %llu nr_comp %llu"
+		       " last_seq %llu seq %llu\n",
+		       SK_ARG(&stat->next_range_key),
+		       le64_to_cpu(stat->nr_requests),
+		       le64_to_cpu(stat->nr_complete),
+		       le64_to_cpu(stat->last_seq),
+		       le64_to_cpu(stat->seq));
+		break;
+	case SCOUTFS_LOG_MERGE_RANGE_ZONE:
+		rng = val;
+		printf("    range: start "SK_FMT" end "SK_FMT"\n",
+		       SK_ARG(&rng->start),
+		       SK_ARG(&rng->end));
+		break;
+	case SCOUTFS_LOG_MERGE_REQUEST_ZONE:
+		req = val;
+		printf("    request: logs_root "BTROOT_F" logs_root "BTROOT_F" start "SK_FMT
+		       " end "SK_FMT" last_seq %llu rid %016llx seq %llu flags 0x%llx\n",
+		       BTROOT_A(&req->logs_root),
+		       BTROOT_A(&req->root),
+		       SK_ARG(&req->start),
+		       SK_ARG(&req->end),
+		       le64_to_cpu(req->last_seq),
+		       le64_to_cpu(req->rid),
+		       le64_to_cpu(req->seq),
+		       le64_to_cpu(req->flags));
+		break;
+	case SCOUTFS_LOG_MERGE_COMPLETE_ZONE:
+		comp = val;
+		printf("    complete: root "BTROOT_F" start "SK_FMT" end "SK_FMT
+		       " remain "SK_FMT" rid %016llx seq %llu flags %llx\n",
+		       BTROOT_A(&comp->root),
+		       SK_ARG(&comp->start),
+		       SK_ARG(&comp->end),
+		       SK_ARG(&comp->remain),
+		       le64_to_cpu(comp->rid),
+		       le64_to_cpu(comp->seq),
+		       le64_to_cpu(comp->flags));
+		break;
+	case SCOUTFS_LOG_MERGE_FREEING_ZONE:
+		fr = val;
+		printf("    freeing: root "BTROOT_F" key "SK_FMT" seq %llu\n",
+		       BTROOT_A(&fr->root),
+		       SK_ARG(&fr->key),
+		       le64_to_cpu(fr->seq));
+		break;
+	default:
+		printf("    (unknown log merge key zone %u)\n", key->sk_zone);
+		break;
+	}
+
+	return 0;
+}
+
 static int print_alloc_item(struct scoutfs_key *key, void *val,
 			    unsigned val_len, void *arg)
 {
@@ -859,6 +927,10 @@ out:
 	return ret;
 }
 
+#define BTR_FMT "blkno %llu seq %016llx height %u"
+#define BTR_ARG(rt) \
+	le64_to_cpu((rt)->ref.blkno), le64_to_cpu((rt)->ref.seq), (rt)->height
+
 static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
 {
 	char uuid_str[37];
@@ -878,7 +950,7 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
 	printf("  flags: 0x%016llx\n", le64_to_cpu(super->flags));
 
 	/* XXX these are all in a crazy order */
-	printf("  next_ino %llu next_trans_seq %llu\n"
+	printf("  next_ino %llu seq %llu\n"
 	       "  total_meta_blocks %llu first_meta_blkno %llu last_meta_blkno %llu\n"
 	       "  total_data_blocks %llu first_data_blkno %llu last_data_blkno %llu\n"
 	       "  meta_alloc[0]: "ALCROOT_F"\n"
@@ -888,12 +960,14 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
 	       "  server_meta_avail[1]: "AL_HEAD_F"\n"
 	       "  server_meta_freed[0]: "AL_HEAD_F"\n"
 	       "  server_meta_freed[1]: "AL_HEAD_F"\n"
-	       "  mounted_clients root: height %u blkno %llu seq %llu\n"
-	       "  srch_root root: height %u blkno %llu seq %llu\n"
-	       "  trans_seqs root: height %u blkno %llu seq %llu\n"
-	       "  fs_root btree root: height %u blkno %llu seq %llu\n",
+	       "  fs_root: "BTR_FMT"\n"
+	       "  logs_root: "BTR_FMT"\n"
+	       "  log_merge: "BTR_FMT"\n"
+	       "  trans_seqs: "BTR_FMT"\n"
+	       "  mounted_clients: "BTR_FMT"\n"
+	       "  srch_root: "BTR_FMT"\n",
 		le64_to_cpu(super->next_ino),
-		le64_to_cpu(super->next_trans_seq),
+		le64_to_cpu(super->seq),
 		le64_to_cpu(super->total_meta_blocks),
 		le64_to_cpu(super->first_meta_blkno),
 		le64_to_cpu(super->last_meta_blkno),
@@ -907,18 +981,12 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
 		AL_HEAD_A(&super->server_meta_avail[1]),
 		AL_HEAD_A(&super->server_meta_freed[0]),
 		AL_HEAD_A(&super->server_meta_freed[1]),
-		super->mounted_clients.height,
-		le64_to_cpu(super->mounted_clients.ref.blkno),
-		le64_to_cpu(super->mounted_clients.ref.seq),
-		super->srch_root.height,
-		le64_to_cpu(super->srch_root.ref.blkno),
-		le64_to_cpu(super->srch_root.ref.seq),
-		super->trans_seqs.height,
-		le64_to_cpu(super->trans_seqs.ref.blkno),
-		le64_to_cpu(super->trans_seqs.ref.seq),
-		super->fs_root.height,
-		le64_to_cpu(super->fs_root.ref.blkno),
-		le64_to_cpu(super->fs_root.ref.seq));
+		BTR_ARG(&super->fs_root),
+		BTR_ARG(&super->logs_root),
+		BTR_ARG(&super->log_merge),
+		BTR_ARG(&super->trans_seqs),
+		BTR_ARG(&super->mounted_clients),
+		BTR_ARG(&super->srch_root));
 
 	printf("  volume options:\n"
 	       "    set_bits: %016llx\n",
@@ -973,6 +1041,11 @@ static int print_volume(int fd)
 	if (err && !ret)
 		ret = err;
 
+	err = print_btree(fd, super, "log_merge", &super->log_merge,
+			  print_log_merge_item, NULL);
+	if (err && !ret)
+		ret = err;
+
 	for (i = 0; i < array_size(super->server_meta_avail); i++) {
 		snprintf(str, sizeof(str), "server_meta_avail[%u]", i);
 		err = print_alloc_list_block(fd, str,