diff --git a/kmod/src/compact.c b/kmod/src/compact.c index f02234e0..9c83eca6 100644 --- a/kmod/src/compact.c +++ b/kmod/src/compact.c @@ -83,7 +83,7 @@ struct compact_cursor { struct list_head csegs; /* buffer holds allocations and our returning them */ - u64 segnos[2 * (1 + SCOUTFS_MANIFEST_FANOUT)]; + u64 segnos[SCOUTFS_COMPACTION_MAX_UPDATE]; unsigned nr_segnos; u8 lower_level; @@ -93,6 +93,9 @@ struct compact_cursor { struct compact_seg *saved_upper; struct compact_seg *lower; struct compact_seg *saved_lower; + + bool sticky; + struct compact_seg *last_lower; }; static void free_cseg(struct super_block *sb, struct compact_seg *cseg) @@ -256,6 +259,19 @@ retry: *item_flags = lower_flags; } + /* + * If we have a sticky compaction then we can't mix items from + * the upper level past the last lower key into the lower level. + * The caller will notice when they're emptying the final upper + * level in a sticky merge and leave it at the upper level. + */ + if (curs->sticky && curs->lower && + (!lower || lower == curs->last_lower) && + scoutfs_key_compare(item_key, curs->last_lower->last) > 0) { + ret = 0; + goto out; + } + if (cmp <= 0) upper->pos++; if (cmp >= 0) @@ -346,7 +362,6 @@ static int compact_segments(struct super_block *sb, struct scoutfs_bio_completion *comp, struct list_head *results) { - struct scoutfs_key_buf upper_next; struct scoutfs_segment *seg; struct compact_seg *cseg; struct compact_seg *upper; @@ -357,24 +372,25 @@ static int compact_segments(struct super_block *sb, int ret; scoutfs_inc_counter(sb, compact_operations); + if (curs->sticky) + scoutfs_inc_counter(sb, compact_sticky_upper); for (;;) { upper = curs->upper; lower = curs->lower; /* - * We can just move the upper segment down a level if it - * doesn't intersect any lower segments. + * If we're at the start of the upper segment and + * there's no lower segment then we might as well just + * move the segment in the manifest. We can't do this + * if we're moving to the last level because we might + * need to drop any deletion items. * - * XXX we can't do this if the segment we're moving has - * deletion items. We need to copy the non-deletion items - * and drop the deletion items in that case. To do that - * we'll need the manifest to count the number of deletion - * and non-deletion items. + * XXX We should have metadata in the manifest to tell + * us that there's no deletion items in the segment. */ - if (upper && upper->pos == 0 && - (!lower || - scoutfs_key_compare(upper->last, lower->first) < 0)) { + if (upper && upper->pos == 0 && !lower && !curs->sticky && + ((upper->level + 1) < curs->last_level)) { /* * XXX blah! these csegs are getting @@ -412,26 +428,17 @@ static int compact_segments(struct super_block *sb, break; /* - * We can skip a lower segment if there's no upper segment - * or the next upper item is past the last in the lower. + * XXX we could intelligently skip reading and merging + * lower segments here. The lower segment won't change + * if: + * - the lower segment is entirely before the upper + * - the lower segment is full * - * XXX this will need to test for intersection with range - * deletion items. + * We don't have the metadata to determine that it's + * full today so we want to read lower segments that don't + * overlap so that we can merge partial lowers with + * its neighbours. */ - if (lower && lower->pos == 0 && - (!upper || - (!scoutfs_seg_item_ptrs(upper->seg, upper->pos, - &upper_next, NULL, NULL) && - scoutfs_key_compare(&upper_next, lower->last) > 0))) { - - curs->lower = next_spos(curs, lower); - - list_del_init(&lower->entry); - free_cseg(sb, lower); - - scoutfs_inc_counter(sb, compact_segment_skipped); - continue; - } ret = read_segment(sb, lower); if (ret) @@ -467,8 +474,18 @@ static int compact_segments(struct super_block *sb, break; } + /* + * The remaining upper items in a sticky merge have to + * be written into the upper level. + */ + if (curs->sticky && !lower) { + cseg->level = curs->lower_level - 1; + scoutfs_inc_counter(sb, compact_sticky_written); + } else { + cseg->level = curs->lower_level; + } + /* csegs will be claned up once they're on the list */ - cseg->level = curs->lower_level; cseg->seg = seg; list_add_tail(&cseg->entry, results); @@ -476,6 +493,17 @@ static int compact_segments(struct super_block *sb, if (ret < 0) break; + /* + * Clear lower after we've consumed it so that sticky + * compaction can decide to write the rest of the items + * into the upper level. We decide that it's done by + * testing the pos that next_item() is going to try. + */ + if (curs->sticky && curs->lower == curs->last_lower && + scoutfs_seg_item_ptrs(curs->lower->seg, curs->lower->pos, + NULL, NULL, NULL) < 0) + curs->lower = NULL; + /* start a complete segment write now, we'll wait later */ ret = scoutfs_seg_submit_write(sb, seg, comp); if (ret) @@ -489,15 +517,16 @@ static int compact_segments(struct super_block *sb, /* * Manifest walking is providing the details of the overall compaction - * operation. It'll then add all the segments involved. + * operation. */ void scoutfs_compact_describe(struct super_block *sb, void *data, - u8 upper_level, u8 last_level) + u8 upper_level, u8 last_level, bool sticky) { struct compact_cursor *curs = data; curs->lower_level = upper_level + 1; curs->last_level = last_level; + curs->sticky = sticky; } /* @@ -531,6 +560,8 @@ int scoutfs_compact_add(struct super_block *sb, void *data, curs->upper = cseg; else if (!curs->lower) curs->lower = cseg; + if (curs->lower) + curs->last_lower = cseg; ret = 0; out: diff --git a/kmod/src/compact.h b/kmod/src/compact.h index e017dd87..f6f4bb60 100644 --- a/kmod/src/compact.h +++ b/kmod/src/compact.h @@ -4,7 +4,7 @@ void scoutfs_compact_kick(struct super_block *sb); void scoutfs_compact_describe(struct super_block *sb, void *data, - u8 upper_level, u8 last_level); + u8 upper_level, u8 last_level, bool sticky); int scoutfs_compact_add(struct super_block *sb, void *data, struct scoutfs_key_buf *first, struct scoutfs_key_buf *last, u64 segno, u64 seq, diff --git a/kmod/src/counters.h b/kmod/src/counters.h index e0b92c44..a41340e2 100644 --- a/kmod/src/counters.h +++ b/kmod/src/counters.h @@ -19,9 +19,10 @@ EXPAND_COUNTER(manifest_compact_migrate) \ EXPAND_COUNTER(compact_operations) \ EXPAND_COUNTER(compact_segment_moved) \ - EXPAND_COUNTER(compact_segment_skipped) \ EXPAND_COUNTER(compact_segment_read) \ EXPAND_COUNTER(compact_segment_written) \ + EXPAND_COUNTER(compact_sticky_upper) \ + EXPAND_COUNTER(compact_sticky_written) \ EXPAND_COUNTER(data_readpage) \ EXPAND_COUNTER(data_write_begin) \ EXPAND_COUNTER(data_write_end) \ diff --git a/kmod/src/format.h b/kmod/src/format.h index 26c2bbb8..3451a04b 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -448,6 +448,16 @@ struct scoutfs_net_segnos { __le64 segnos[0]; } __packed; +/* XXX eventually we'll have net compaction and will need agents to agree */ + +/* one upper segment and fanout lower segments */ +#define SCOUTFS_COMPACTION_MAX_INPUT (1 + SCOUTFS_MANIFEST_FANOUT) +/* sticky can add one, and so can item page alignment */ +#define SCOUTFS_COMPACTION_SLOP 2 +/* delete all inputs and insert all outputs (same goes for alloc|free segnos) */ +#define SCOUTFS_COMPACTION_MAX_UPDATE \ + (2 * (SCOUTFS_COMPACTION_MAX_INPUT + SCOUTFS_COMPACTION_SLOP)) + enum { SCOUTFS_NET_ALLOC_INODES = 0, SCOUTFS_NET_MANIFEST_RANGE_ENTRIES, diff --git a/kmod/src/manifest.c b/kmod/src/manifest.c index a04fbe50..4cec4184 100644 --- a/kmod/src/manifest.c +++ b/kmod/src/manifest.c @@ -719,6 +719,7 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) struct scoutfs_key_buf ment_last; struct scoutfs_key_buf over_first; struct scoutfs_key_buf over_last; + bool sticky; int level; int ret; int nr = 0; @@ -739,7 +740,6 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) goto out; } - scoutfs_compact_describe(sb, data, level, mani->nr_levels - 1); /* find the oldest level 0 or the next higher order level by key */ if (level == 0) { @@ -779,7 +779,8 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) over = scoutfs_ring_lookup_next(&mani->ring, &skey); /* and add a fanout's worth of lower overlapping segments */ - for (i = 0; i < SCOUTFS_MANIFEST_FANOUT; i++) { + sticky = false; + for (i = 0; i < SCOUTFS_MANIFEST_FANOUT + 1; i++) { if (!over || over->level != (ment->level + 1)) break; @@ -789,6 +790,12 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) &over_first, &over_last) != 0) break; + /* upper level has to stay around when more than fanout */ + if (i == SCOUTFS_MANIFEST_FANOUT) { + sticky = true; + break; + } + ret = scoutfs_compact_add(sb, data, &over_first, &over_last, le64_to_cpu(over->segno), le64_to_cpu(over->seq), level + 1); @@ -799,6 +806,8 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) over = scoutfs_ring_next(&mani->ring, over); } + scoutfs_compact_describe(sb, data, level, mani->nr_levels - 1, sticky); + /* record the next key to start from */ scoutfs_key_copy(mani->compact_keys[level], &ment_last); scoutfs_key_inc(mani->compact_keys[level]); diff --git a/kmod/src/net.c b/kmod/src/net.c index f50c1fa3..76d9ca3a 100644 --- a/kmod/src/net.c +++ b/kmod/src/net.c @@ -1420,7 +1420,8 @@ int scoutfs_net_get_compaction(struct super_block *sb, void *curs) return nr; } - for (i = 0; i < nr; i++) { + /* allow for expansion slop from sticky and alignment */ + for (i = 0; i < nr + SCOUTFS_COMPACTION_SLOP; i++) { ret = scoutfs_alloc_segno(sb, &segno); if (ret < 0) break;