diff --git a/kmod/src/compact.c b/kmod/src/compact.c index 1f3835c1..f27fb1c5 100644 --- a/kmod/src/compact.c +++ b/kmod/src/compact.c @@ -24,6 +24,7 @@ #include "manifest.h" #include "counters.h" #include "alloc.h" +#include "net.h" #include "scoutfs_trace.h" /* @@ -71,6 +72,7 @@ struct compact_seg { struct scoutfs_segment *seg; int pos; int saved_pos; + bool part_of_move; }; /* @@ -80,6 +82,10 @@ struct compact_seg { struct compact_cursor { struct list_head csegs; + /* buffer holds allocations and our returning them */ + u64 segnos[2 * (1 + SCOUTFS_MANIFEST_FANOUT)]; + unsigned nr_segnos; + u8 lower_level; u8 last_level; @@ -345,9 +351,9 @@ static int compact_segments(struct super_block *sb, struct compact_seg *cseg; struct compact_seg *upper; struct compact_seg *lower; + unsigned next_segno = 0; u32 key_bytes; u32 nr_items; - u64 segno; int ret; scoutfs_inc_counter(sb, compact_operations); @@ -390,6 +396,10 @@ static int compact_segments(struct super_block *sb, scoutfs_seg_get(cseg->seg); list_add_tail(&cseg->entry, results); + /* don't mess with its segno */ + upper->part_of_move = true; + cseg->part_of_move = true; + curs->upper = NULL; upper = NULL; @@ -445,15 +455,14 @@ static int compact_segments(struct super_block *sb, break; } - ret = scoutfs_alloc_segno(sb, &segno); - if (ret) { - kfree(cseg); - break; - } + cseg->segno = curs->segnos[next_segno]; + curs->segnos[next_segno] = 0; + next_segno++; - ret = scoutfs_seg_alloc(sb, segno, &seg); + ret = scoutfs_seg_alloc(sb, cseg->segno, &seg); if (ret) { - scoutfs_alloc_free(sb, segno); + next_segno--; + curs->segnos[next_segno] = cseg->segno; kfree(cseg); break; } @@ -529,33 +538,55 @@ out: } /* - * Atomically update the manifest. We lock down the manifest so no one - * can use it while we're mucking with it. While the current ring can - * always delete without failure we will probably have a manifest - * storage layer eventually that could return errors on deletion. We - * also also have corrupted something and try to delete an entry that - * doesn't exist. So we use an initial dirtying step to ensure that our - * later deletions succeed. - * - * XXX does locking the manifest prevent commits? I would think so? + * Give the compaction cursor a segno to allocate from. */ -static int update_manifest(struct super_block *sb, struct compact_cursor *curs, - struct list_head *results) +void scoutfs_compact_add_segno(struct super_block *sb, void *data, u64 segno) { + struct compact_cursor *curs = data; + + curs->segnos[curs->nr_segnos++] = segno; +} + +/* + * Commit the result of a compaction based on the state of the cursor. + * The net caller stops the rings from being written while we're making + * changes. We lock the manifest to atomically make our changes. + * + * The erorr handling is sketchy here because calling the manifest from + * here is temporary. We should be sending a message to the server + * instead of calling the allocator and manifest. + */ +int scoutfs_compact_commit(struct super_block *sb, void *c, void *r) +{ + struct compact_cursor *curs = c; + struct list_head *results = r; struct compact_seg *cseg; - struct compact_seg *until; - int ret = 0; - int err; + int ret; + int i; + + /* free unused segnos that were allocated for the compaction */ + for (i = 0; i < curs->nr_segnos; i++) { + if (curs->segnos[i]) { + ret = scoutfs_alloc_free(sb, curs->segnos[i]); + BUG_ON(ret); + } + } scoutfs_manifest_lock(sb); + /* delete input segments, probably freeing their segnos */ list_for_each_entry(cseg, &curs->csegs, entry) { - ret = scoutfs_manifest_dirty(sb, cseg->first, - cseg->seq, cseg->level); - if (ret) - goto out; + if (!cseg->part_of_move) { + ret = scoutfs_alloc_free(sb, cseg->segno); + BUG_ON(ret); + } + + ret = scoutfs_manifest_del(sb, cseg->first, + cseg->seq, cseg->level); + BUG_ON(ret); } + /* add output entries */ list_for_each_entry(cseg, results, entry) { /* XXX moved upper segments won't have read the segment :P */ if (cseg->seg) @@ -565,56 +596,22 @@ static int update_manifest(struct super_block *sb, struct compact_cursor *curs, ret = scoutfs_manifest_add(sb, cseg->first, cseg->last, cseg->segno, cseg->seq, cseg->level); - if (ret) { - until = cseg; - list_for_each_entry(cseg, results, entry) { - if (cseg == until) - break; - err = scoutfs_seg_manifest_del(sb, cseg->seg, - cseg->level); - BUG_ON(err); - } - goto out; - } - } - - list_for_each_entry(cseg, &curs->csegs, entry) { - ret = scoutfs_manifest_del(sb, cseg->first, - cseg->seq, cseg->level); BUG_ON(ret); } -out: scoutfs_manifest_unlock(sb); - return ret; -} - -static int free_result_segnos(struct super_block *sb, - struct list_head *results) -{ - struct compact_seg *cseg; - int ret = 0; - int err; - - list_for_each_entry(cseg, results, entry) { - /* XXX failure here would be an inconsistency */ - err = scoutfs_seg_free_segno(sb, cseg->seg); - if (err && !ret) - ret = err; - } - - return ret; + return 0; } /* * The compaction worker tries to make forward progress with compaction - * every time its kicked. It asks the manifest for segments to compact. + * every time its kicked. It pretends to send a message requesting + * compaction parameters but in reality the net request function there + * is calling directly into the manifest and back into our compaction + * add routines. * - * If it succeeds in doing work then it kicks itself again to see if there's - * more work to do. - * - * XXX worry about forward progress in the case of errors. + * We always try to clean up everything on errors. */ static void scoutfs_compact_func(struct work_struct *work) { @@ -622,6 +619,7 @@ static void scoutfs_compact_func(struct work_struct *work) struct super_block *sb = ci->sb; struct compact_cursor curs = {{NULL,}}; struct scoutfs_bio_completion comp; + struct compact_seg *cseg; LIST_HEAD(results); int ret; int err; @@ -629,33 +627,35 @@ static void scoutfs_compact_func(struct work_struct *work) INIT_LIST_HEAD(&curs.csegs); scoutfs_bio_init_comp(&comp); - ret = scoutfs_manifest_next_compact(sb, (void *)&curs); - if (list_empty(&curs.csegs)) - goto out; + ret = scoutfs_net_get_compaction(sb, (void *)&curs); - ret = compact_segments(sb, &curs, &comp, &results); + /* short circuit no compaction work to do */ + if (ret == 0 && list_empty(&curs.csegs)) + return; - /* always wait for io completion */ - err = scoutfs_bio_wait_comp(sb, &comp); + if (ret == 0 && !list_empty(&curs.csegs)) { + ret = compact_segments(sb, &curs, &comp, &results); + + /* always wait for io completion */ + err = scoutfs_bio_wait_comp(sb, &comp); + if (!ret && err) + ret = err; + } + + /* don't update manifest on error, just free segnos */ + if (ret) { + list_for_each_entry(cseg, &results, entry) { + if (!cseg->part_of_move) + curs.segnos[curs.nr_segnos++] = cseg->segno; + } + free_cseg_list(sb, &curs.csegs); + free_cseg_list(sb, &results); + } + + err = scoutfs_net_finish_compaction(sb, &curs, &results); if (!ret && err) ret = err; - if (ret) - goto out; - ret = update_manifest(sb, &curs, &results); - if (ret == 0) { -#if 0 /* XXX this is busted, fixing soon */ - scoutfs_sync_fs(sb, 0); -#endif - -#if 0 /* XXX where do we do this in shared? */ - scoutfs_trans_wake_holders(sb); -#endif - scoutfs_compact_kick(sb); - } -out: - if (ret) - free_result_segnos(sb, &results); free_cseg_list(sb, &curs.csegs); free_cseg_list(sb, &results); diff --git a/kmod/src/compact.h b/kmod/src/compact.h index d3654fd3..e017dd87 100644 --- a/kmod/src/compact.h +++ b/kmod/src/compact.h @@ -9,6 +9,8 @@ int scoutfs_compact_add(struct super_block *sb, void *data, struct scoutfs_key_buf *first, struct scoutfs_key_buf *last, u64 segno, u64 seq, u8 level); +void scoutfs_compact_add_segno(struct super_block *sb, void *data, u64 segno); +int scoutfs_compact_commit(struct super_block *sb, void *c, void *r); int scoutfs_compact_setup(struct super_block *sb); void scoutfs_compact_destroy(struct super_block *sb); diff --git a/kmod/src/manifest.c b/kmod/src/manifest.c index b3c7a258..e0eb9a9e 100644 --- a/kmod/src/manifest.c +++ b/kmod/src/manifest.c @@ -39,7 +39,6 @@ struct manifest { struct rw_semaphore rwsem; - seqcount_t seqcount; struct scoutfs_ring_info ring; u8 nr_levels; @@ -110,57 +109,6 @@ static bool cmp_range_ment(struct scoutfs_key_buf *key, return scoutfs_key_compare_ranges(key, end, &first, &last); } -static u64 get_level_count(struct manifest *mani, - struct scoutfs_super_block *super, u8 level) -{ - unsigned int sc; - u64 count; - - do { - sc = read_seqcount_begin(&mani->seqcount); - count = le64_to_cpu(super->manifest.level_counts[level]); - } while (read_seqcount_retry(&mani->seqcount, sc)); - - return count; -} - -static bool past_limit(struct manifest *mani, u8 level, u64 count) -{ - return count > mani->level_limits[level]; -} - -static bool level_full(struct manifest *mani, - struct scoutfs_super_block *super, u8 level) -{ - return past_limit(mani, level, get_level_count(mani, super, level)); -} - -static void add_level_count(struct super_block *sb, struct manifest *mani, - struct scoutfs_super_block *super, u8 level, - s64 val) -{ - bool was_full; - bool now_full; - u64 count; - - write_seqcount_begin(&mani->seqcount); - - count = le64_to_cpu(super->manifest.level_counts[level]); - was_full = past_limit(mani, level, count); - - count += val; - now_full = past_limit(mani, level, count); - super->manifest.level_counts[level] = cpu_to_le64(count); - - write_seqcount_end(&mani->seqcount); - - if (was_full && !now_full) - scoutfs_trans_wake_holders(sb); - - if (now_full) - scoutfs_compact_kick(sb); -} - /* * Insert a new manifest entry in the ring. The ring allocates a new * node for us and we fill it. @@ -206,7 +154,7 @@ int scoutfs_manifest_add(struct super_block *sb, scoutfs_key_copy(&ment_last, last); mani->nr_levels = max_t(u8, mani->nr_levels, level + 1); - add_level_count(sb, mani, super, level, 1); + le64_add_cpu(&super->manifest.level_counts[level], 1); return 0; } @@ -244,7 +192,7 @@ int scoutfs_manifest_add_ment(struct super_block *sb, memcpy(ment, add, bytes); mani->nr_levels = max_t(u8, mani->nr_levels, add->level + 1); - add_level_count(sb, mani, super, add->level, 1); + le64_add_cpu(&super->manifest.level_counts[add->level], 1); return 0; } @@ -292,7 +240,7 @@ int scoutfs_manifest_del(struct super_block *sb, struct scoutfs_key_buf *first, return -ENOENT; scoutfs_ring_delete(&mani->ring, ment); - add_level_count(sb, mani, super, level, -1ULL); + le64_add_cpu(&super->manifest.level_counts[level], -1ULL); return 0; } @@ -455,7 +403,7 @@ scoutfs_manifest_find_range_entries(struct super_block *sb, *found_bytes = 0; /* at most we get all level 0, one from other levels, and null term */ - nr = get_level_count(mani, super, 0) + mani->nr_levels + 1; + nr = le64_to_cpu(super->manifest.level_counts[0]) + mani->nr_levels + 1; found = kcalloc(nr, sizeof(struct scoutfs_manifest_entry *), GFP_NOFS); if (!found) { @@ -736,15 +684,6 @@ void scoutfs_manifest_write_complete(struct super_block *sb) up_write(&mani->rwsem); } -u64 scoutfs_manifest_level_count(struct super_block *sb, u8 level) -{ - DECLARE_MANIFEST(sb, mani); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; - - return get_level_count(mani, super, level); -} - /* * Give the caller the segments that will be involved in the next * compaction. @@ -759,7 +698,7 @@ u64 scoutfs_manifest_level_count(struct super_block *sb, u8 level) * We add all the segments to the compaction caller's data and let it do * its thing. It'll allocate and free segments and update the manifest. * - * Returns 0 or -errno. The caller will see if any segments were added. + * Returns the number of input segments or -errno. * * XXX this will get a lot more clever: * - ensuring concurrent compactions don't overlap @@ -781,12 +720,14 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) struct scoutfs_key_buf over_last; int level; int ret; + int nr = 0; int i; down_write(&mani->rwsem); for (level = mani->nr_levels - 1; level >= 0; level--) { - if (level_full(mani, super, level)) + if (le64_to_cpu(super->manifest.level_counts[level]) > + mani->level_limits[level]) break; } @@ -828,6 +769,7 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) le64_to_cpu(ment->seq), level); if (ret) goto out; + nr++; /* start with the first overlapping at the next level */ skey.key = &ment_first; @@ -851,6 +793,7 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) le64_to_cpu(over->seq), level + 1); if (ret) goto out; + nr++; over = scoutfs_ring_next(&mani->ring, over); } @@ -862,7 +805,7 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) ret = 0; out: up_write(&mani->rwsem); - return ret; + return ret ?: nr; } /* @@ -949,7 +892,6 @@ int scoutfs_manifest_setup(struct super_block *sb) return -ENOMEM; init_rwsem(&mani->rwsem); - seqcount_init(&mani->seqcount); scoutfs_ring_init(&mani->ring, &super->manifest.ring, manifest_ring_compare_key, manifest_ring_compare_data); diff --git a/kmod/src/manifest.h b/kmod/src/manifest.h index e0547c26..b65860a2 100644 --- a/kmod/src/manifest.h +++ b/kmod/src/manifest.h @@ -43,7 +43,6 @@ int scoutfs_manifest_add_ment_ref(struct super_block *sb, struct list_head *list, struct scoutfs_manifest_entry *ment); -u64 scoutfs_manifest_level_count(struct super_block *sb, u8 level); int scoutfs_manifest_next_compact(struct super_block *sb, void *data); int scoutfs_manifest_setup(struct super_block *sb); diff --git a/kmod/src/net.c b/kmod/src/net.c index ad22ea7c..11e5fe30 100644 --- a/kmod/src/net.c +++ b/kmod/src/net.c @@ -397,6 +397,8 @@ static struct send_buf *process_record_segment(struct super_block *sb, if (ret == 0) ret = wait_for_commit(&cw); + scoutfs_compact_kick(sb); + sbuf = alloc_sbuf(0); if (!sbuf) { sbuf = ERR_PTR(-ENOMEM); @@ -700,9 +702,9 @@ static int process_reply(struct net_info *nti, struct recv_buf *rbuf) static void destroy_server_state(struct super_block *sb) { + scoutfs_compact_destroy(sb); scoutfs_alloc_destroy(sb); scoutfs_manifest_destroy(sb); - scoutfs_compact_destroy(sb); } /* @@ -1098,6 +1100,87 @@ static int add_send_buf(struct super_block *sb, int type, void *data, return 0; } +/* + * Eventually we're going to have messages that control compaction. + * Each client mount would have long-lived work that sends requests + * which are stuck in processing until there's work to do. They'd get + * their entries, perform the compaction, and send a reply. But we're + * not there yet. + * + * This is a short circuit that's called directly by a work function + * that's only queued on the server. It makes compaction work inside + * the ring update consistency mechanics inside net message processing + * and demonstrates the moving pieces that we'd need to cut up into a + * series of messages and replies. + * + * The compaction work caller cleans up everything on errors. + */ +int scoutfs_net_get_compaction(struct super_block *sb, void *curs) +{ + DECLARE_NET_INFO(sb, nti); + struct commit_waiter cw; + u64 segno; + int ret = 0; + int nr; + int i; + + down_read(&nti->ring_commit_rwsem); + + nr = scoutfs_manifest_next_compact(sb, curs); + if (nr <= 0) { + up_read(&nti->ring_commit_rwsem); + return nr; + } + + for (i = 0; i < nr; i++) { + ret = scoutfs_alloc_segno(sb, &segno); + if (ret < 0) + break; + scoutfs_compact_add_segno(sb, curs, segno); + } + + if (ret == 0) + queue_commit_work(nti, &cw); + up_read(&nti->ring_commit_rwsem); + + if (ret == 0) + ret = wait_for_commit(&cw); + + return ret; +} + +/* + * This is a stub for recording the results of a compaction. We just + * call back into compaction to have it call the manifest and allocator + * updates. + * + * In the future we'd encode the manifest and segnos in requests sent to + * the server who'd update the manifest and allocator in request + * processing. + */ +int scoutfs_net_finish_compaction(struct super_block *sb, void *curs, + void *list) +{ + DECLARE_NET_INFO(sb, nti); + struct commit_waiter cw; + int ret; + + down_read(&nti->ring_commit_rwsem); + + ret = scoutfs_compact_commit(sb, curs, list); + + if (ret == 0) + queue_commit_work(nti, &cw); + up_read(&nti->ring_commit_rwsem); + + if (ret == 0) + ret = wait_for_commit(&cw); + + scoutfs_compact_kick(sb); + + return ret; +} + struct record_segment_args { struct completion comp; int ret; diff --git a/kmod/src/net.h b/kmod/src/net.h index 973b0530..d48fc2b7 100644 --- a/kmod/src/net.h +++ b/kmod/src/net.h @@ -14,6 +14,10 @@ int scoutfs_net_alloc_segno(struct super_block *sb, u64 *segno); int scoutfs_net_record_segment(struct super_block *sb, struct scoutfs_segment *seg, u8 level); +int scoutfs_net_get_compaction(struct super_block *sb, void *curs); +int scoutfs_net_finish_compaction(struct super_block *sb, void *curs, + void *list); + int scoutfs_net_setup(struct super_block *sb); void scoutfs_net_destroy(struct super_block *sb); diff --git a/kmod/src/super.c b/kmod/src/super.c index 5a43a426..48fb27d2 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -253,18 +253,22 @@ static void scoutfs_kill_sb(struct super_block *sb) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - kill_block_super(sb); + /* make sure all dirty work is settled before killing the super */ if (sbi) { + sync_filesystem(sb); + scoutfs_lock_shutdown(sb); scoutfs_net_destroy(sb); + } + + kill_block_super(sb); + + if (sbi) { scoutfs_lock_destroy(sb); - scoutfs_compact_destroy(sb); scoutfs_shutdown_trans(sb); scoutfs_data_destroy(sb); scoutfs_inode_destroy(sb); scoutfs_item_destroy(sb); - scoutfs_alloc_destroy(sb); - scoutfs_manifest_destroy(sb); scoutfs_seg_destroy(sb); scoutfs_destroy_counters(sb); if (sbi->kset)