scoutfs: sync large transactions as released

We don't want very large transactions to build up and create huge commit latencies. All blocks are written to free space so we use a count of allocations to count dirty blocks. We arbitrarily limit the transaction to 128MB and try to kick off commits when we release transactions that have gotten that big. Signed-off-by: Zach Brown <zab@versity.com>
2026-01-08 21:03:12 +00:00 · 2016-09-06 15:13:21 -07:00
parent 06c718e16a
commit b2e12a9f27
6 changed files with 50 additions and 2 deletions
--- a/kmod/src/buddy.c
+++ b/kmod/src/buddy.c
@@ -580,6 +580,7 @@ static int buddy_alloc(struct super_block *sb, u64 *blkno, int order)
 static int alloc_region(struct super_block *sb, u64 *blkno, int order,
 			u64 existing, int region)
 {
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
 	int ret;

 	switch(region) {
@@ -593,8 +594,15 @@ static int alloc_region(struct super_block *sb, u64 *blkno, int order,
 		case REGION_BUDDY:
 			ret = buddy_alloc(sb, blkno, order);
 			break;
+		default:
+			WARN_ON_ONCE(1);
+			ret = -EINVAL;
 	}

+	/* this misses other direct calls to bitmap_alloc, but that's minor */
+	if (ret >= 0)
+		atomic_add(1 << ret, &sbi->buddy_count);
+
 	trace_scoutfs_buddy_alloc(*blkno, order, region, ret);
 	return ret;
 }
@@ -848,3 +856,23 @@ out:
 	return ret;

 }
+
+/*
+ * Return the number of block allocations since the last time the
+ * counter was reset.  This count doesn't include some internal bitmap
+ * block allocations but that should be a small fraction of the main
+ * allocations.
+ */
+unsigned int scoutfs_buddy_alloc_count(struct super_block *sb)
+{
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+
+	return atomic_read(&sbi->buddy_count);
+}
+
+void scoutfs_buddy_reset_count(struct super_block *sb)
+{
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+
+	return atomic_set(&sbi->buddy_count, 0);
+}
--- a/kmod/src/buddy.h
+++ b/kmod/src/buddy.h
@@ -10,4 +10,7 @@ void scoutfs_buddy_free_extent(struct super_block *sb, u64 blkno, u64 count);
 int scoutfs_buddy_was_free(struct super_block *sb, u64 blkno, int order);
 int scoutfs_buddy_bfree(struct super_block *sb, u64 *bfree);

+unsigned int scoutfs_buddy_alloc_count(struct super_block *sb);
+void scoutfs_buddy_reset_count(struct super_block *sb);
+
 #endif
--- a/kmod/src/format.h
+++ b/kmod/src/format.h
@@ -22,6 +22,8 @@
 #define SCOUTFS_BUDDY_BM_BLKNO (SCOUTFS_SUPER_BLKNO + SCOUTFS_SUPER_NR)
 #define SCOUTFS_BUDDY_BM_NR 2

+#define SCOUTFS_MAX_TRANS_BLOCKS  (128 * 1024 * 1024 / SCOUTFS_BLOCK_SIZE)
+
 /*
 * This header is found at the start of every block so that we can
 * verify that it's what we were looking for.  The crc and padding
--- a/kmod/src/super.c
+++ b/kmod/src/super.c
@@ -198,6 +198,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
 	init_waitqueue_head(&sbi->block_wq);
 	atomic_set(&sbi->block_writes, 0);
 	mutex_init(&sbi->buddy_mutex);
+	atomic_set(&sbi->buddy_count, 0);
 	init_rwsem(&sbi->btree_rwsem);
 	atomic_set(&sbi->trans_holds, 0);
 	init_waitqueue_head(&sbi->trans_hold_wq);
--- a/kmod/src/super.h
+++ b/kmod/src/super.h
@@ -25,6 +25,7 @@ struct scoutfs_sb_info {
 	int block_write_err;

 	struct mutex buddy_mutex;
+	atomic_t buddy_count;

 	/* XXX there will be a lot more of these :) */
 	struct rw_semaphore btree_rwsem;
--- a/kmod/src/trans.c
+++ b/kmod/src/trans.c
@@ -97,8 +97,10 @@ void scoutfs_trans_write_func(struct work_struct *work)
 	}

 	spin_lock(&sbi->trans_write_lock);
-	if (advance)
+	if (advance) {
 		scoutfs_advance_dirty_super(sb);
+		scoutfs_buddy_reset_count(sb);
+	}
 	sbi->trans_write_count++;
 	sbi->trans_write_ret = ret;
 	spin_unlock(&sbi->trans_write_lock);
@@ -172,12 +174,23 @@ int scoutfs_hold_trans(struct super_block *sb)
 				  atomic_add_unless(&sbi->trans_holds, 1, -1));
 }

+/*
+ * As we release we ask the allocator how many blocks have been
+ * allocated since the last transaction was successfully committed.  If
+ * it's large enough we kick off a write.  This is mostly to reduce the
+ * commit latency.  We also don't want to let the IO pipeline sit idle.
+ * Once we have enough blocks to write efficiently we should do so.
+ */
 void scoutfs_release_trans(struct super_block *sb)
 {
 	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);

-	if (atomic_sub_return(1, &sbi->trans_holds) == 0)
+	if (atomic_sub_return(1, &sbi->trans_holds) == 0) {
+		if (scoutfs_buddy_alloc_count(sb) >= SCOUTFS_MAX_TRANS_BLOCKS)
+			scoutfs_sync_fs(sb, 0);
+
 		wake_up(&sbi->trans_hold_wq);
+	}
 }

 int scoutfs_setup_trans(struct super_block *sb)