diff --git a/kmod/src/compact.c b/kmod/src/compact.c index 5933af25..8e00b576 100644 --- a/kmod/src/compact.c +++ b/kmod/src/compact.c @@ -22,6 +22,7 @@ #include "cmp.h" #include "compact.h" #include "manifest.h" +#include "trans.h" #include "scoutfs_trace.h" /* @@ -470,15 +471,21 @@ static void scoutfs_compact_func(struct work_struct *work) INIT_LIST_HEAD(&curs.csegs); - ret = scoutfs_manifest_next_compact(sb, (void *)&curs) ?: - read_segments(sb, &curs) ?: + ret = scoutfs_manifest_next_compact(sb, (void *)&curs); + if (ret <= 0) + goto out; + + ret = read_segments(sb, &curs) ?: compact_segments(sb, &curs, &results) ?: write_segments(sb, &results) ?: update_manifest(sb, &curs, &results); - - if (ret) + if (ret) { free_result_segnos(sb, &results); - + } else { + scoutfs_sync_fs(sb, 0); + scoutfs_compact_kick(sb); + } +out: free_csegs(&curs.csegs); free_csegs(&results); diff --git a/kmod/src/manifest.c b/kmod/src/manifest.c index 01266a52..48277ab0 100644 --- a/kmod/src/manifest.c +++ b/kmod/src/manifest.c @@ -24,6 +24,7 @@ #include "cmp.h" #include "compact.h" #include "manifest.h" +#include "trans.h" #include "scoutfs_trace.h" /* @@ -142,19 +143,41 @@ static u64 get_level_count(struct manifest *mani, return count; } -static void add_level_count(struct manifest *mani, - struct scoutfs_super_block *super, u8 level, - s64 val) +static bool past_limit(struct manifest *mani, u8 level, u64 count) { - write_seqcount_begin(&mani->seqcount); - le64_add_cpu(&super->manifest.level_counts[level], val); - write_seqcount_end(&mani->seqcount); + return count > mani->level_limits[level]; } static bool level_full(struct manifest *mani, struct scoutfs_super_block *super, u8 level) { - return get_level_count(mani, super, level) > mani->level_limits[level]; + return past_limit(mani, level, get_level_count(mani, super, level)); +} + +static void add_level_count(struct super_block *sb, struct manifest *mani, + struct scoutfs_super_block *super, u8 level, + s64 val) +{ + bool was_full; + bool now_full; + u64 count; + + write_seqcount_begin(&mani->seqcount); + + count = le64_to_cpu(super->manifest.level_counts[level]); + was_full = past_limit(mani, level, count); + + count += val; + now_full = past_limit(mani, level, count); + super->manifest.level_counts[level] = cpu_to_le64(count); + + write_seqcount_end(&mani->seqcount); + + if (was_full && !now_full) + scoutfs_trans_wake_holders(sb); + + if (now_full) + scoutfs_compact_kick(sb); } /* @@ -199,11 +222,7 @@ int scoutfs_manifest_add(struct super_block *sb, struct kvec *first, ret = PTR_ERR(ment); } else { mani->nr_levels = max_t(u8, mani->nr_levels, level + 1); - add_level_count(mani, super, level, 1); - - if (level_full(mani, super, level)) - scoutfs_compact_kick(sb); - + add_level_count(sb, mani, super, level, 1); ret = 0; } @@ -250,7 +269,7 @@ int scoutfs_manifest_del(struct super_block *sb, struct kvec *first, u64 seq, ret = scoutfs_treap_delete(mani->treap, &skey); if (ret == 0) - add_level_count(mani, super, level, -1ULL); + add_level_count(sb, mani, super, level, -1ULL); return ret; } @@ -618,6 +637,15 @@ int scoutfs_manifest_dirty_ring(struct super_block *sb) return 0; } +u64 scoutfs_manifest_level_count(struct super_block *sb, u8 level) +{ + DECLARE_MANIFEST(sb, mani); + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); + struct scoutfs_super_block *super = &sbi->super; + + return get_level_count(mani, super, level); +} + /* * Give the caller the segments that will be involved in the next * compaction. @@ -636,6 +664,8 @@ int scoutfs_manifest_dirty_ring(struct super_block *sb) * compaction caller's data and let it do its thing. It'll allocate and * free segments and update the manifest. * + * Returns 1 if there's compaction work to do, 0 if not, or -errno. + * * XXX this will get a lot more clever: * - ensuring concurrent compactions don't overlap * - prioritize segments with deletion or incremental records @@ -769,7 +799,7 @@ done: scoutfs_kvec_memcpy_truncate(mani->compact_keys[level], ment_last); scoutfs_kvec_be_inc(mani->compact_keys[level]); - ret = 0; + ret = 1; out: up_write(&mani->rwsem); return ret; diff --git a/kmod/src/manifest.h b/kmod/src/manifest.h index 2db63bb1..5e529cd5 100644 --- a/kmod/src/manifest.h +++ b/kmod/src/manifest.h @@ -16,6 +16,7 @@ int scoutfs_manifest_unlock(struct super_block *sb); int scoutfs_manifest_read_items(struct super_block *sb, struct kvec *key, struct kvec *until); +u64 scoutfs_manifest_level_count(struct super_block *sb, u8 level); int scoutfs_manifest_next_compact(struct super_block *sb, void *data); int scoutfs_manifest_setup(struct super_block *sb); diff --git a/kmod/src/trans.c b/kmod/src/trans.c index 5987646c..6b10cd55 100644 --- a/kmod/src/trans.c +++ b/kmod/src/trans.c @@ -28,6 +28,7 @@ #include "seg.h" #include "alloc.h" #include "treap.h" +#include "compact.h" #include "scoutfs_trace.h" /* @@ -210,9 +211,56 @@ int scoutfs_file_fsync(struct file *file, loff_t start, loff_t end, } /* - * The first holders race to try and allocate the segment that will be - * written by the next commit. + * I think the holder that creates the most dirty item data is + * symlinking, which can create all the entry items and a symlink target + * item with a full 4k path. We go a little nuts and just set it to two + * blocks. + * + * XXX This divides the segment size to set the hard limit on the number of + * concurrent holders so we'll want this to be more precise. */ +#define MOST_DIRTY (2 * SCOUTFS_BLOCK_SIZE) + +/* + * We're able to hold the transaction if the current dirty item bytes + * and the presumed worst case item dirtying of all the holders, + * including us, all fit in a segment. + */ +static bool hold_acquired(struct super_block *sb) +{ + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); + long bytes; + int with_us; + int holds; + int before; + + holds = atomic_read(&sbi->trans_holds); + for (;;) { + /* transaction is being committed */ + if (holds < 0) + return false; + + /* only hold when there's no level 0 segments, XXX for now */ + if (scoutfs_manifest_level_count(sb, 0) > 0) { + scoutfs_compact_kick(sb); + return false; + } + + /* see if we all would fill the segment */ + with_us = holds + 1; + bytes = (with_us * MOST_DIRTY) + scoutfs_item_dirty_bytes(sb); + if (bytes > SCOUTFS_SEGMENT_SIZE) { + scoutfs_sync_fs(sb, 0); + return false; + } + + before = atomic_cmpxchg(&sbi->trans_holds, holds, with_us); + if (before == holds) + return true; + holds = before; + } +} + int scoutfs_hold_trans(struct super_block *sb) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); @@ -220,37 +268,36 @@ int scoutfs_hold_trans(struct super_block *sb) if (current == sbi->trans_task) return 0; - return wait_event_interruptible(sbi->trans_hold_wq, - atomic_add_unless(&sbi->trans_holds, 1, -1)); + return wait_event_interruptible(sbi->trans_hold_wq, hold_acquired(sb)); } /* - * As we release we kick off a commit if we have a segment's worth of - * dirty items. - * - * Right now it's conservatively kicking off writes at ~95% full blocks. - * This leaves a lot of slop for the largest item bytes created by a - * holder and overrun by concurrent holders (who aren't accounted - * today). - * - * It should more precisely know the worst case item byte consumption of - * holders and only kick off a write when someone tries to hold who - * might fill the segment. + * As we release we'll almost certainly have dirtied less than the + * worst case dirty assumption that holders might be throttled waiting + * for. We always try and wake blocked holders in case they now have + * room to dirty. */ void scoutfs_release_trans(struct super_block *sb) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - unsigned int target = (SCOUTFS_SEGMENT_SIZE * 95 / 100); if (current == sbi->trans_task) return; - if (atomic_sub_return(1, &sbi->trans_holds) == 0) { - if (scoutfs_item_dirty_bytes(sb) >= target) - scoutfs_sync_fs(sb, 0); + atomic_dec(&sbi->trans_holds); + wake_up(&sbi->trans_hold_wq); +} - wake_up(&sbi->trans_hold_wq); - } +/* + * This is called to wake people waiting on holders when the conditions + * that they're waiting on change: levels being full, dirty count falling + * under a segment, or holders falling to 0. + */ +void scoutfs_trans_wake_holders(struct super_block *sb) +{ + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); + + wake_up(&sbi->trans_hold_wq); } int scoutfs_setup_trans(struct super_block *sb) diff --git a/kmod/src/trans.h b/kmod/src/trans.h index 22c5755a..f1ecbc51 100644 --- a/kmod/src/trans.h +++ b/kmod/src/trans.h @@ -8,6 +8,7 @@ int scoutfs_file_fsync(struct file *file, loff_t start, loff_t end, int scoutfs_hold_trans(struct super_block *sb); void scoutfs_release_trans(struct super_block *sb); +void scoutfs_trans_wake_holders(struct super_block *sb); int scoutfs_setup_trans(struct super_block *sb); void scoutfs_shutdown_trans(struct super_block *sb);