From 5f5729b2a431fadbdaef214789e0d4cc853df254 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 9 Jun 2017 09:28:48 -0700 Subject: [PATCH] scoutfs: add sticky compaction As we write segments we're not limiting the number of segments they intersect at the next level. Compactions are limited to a fanout's worth of overlapping segments. This means that we can get a compaction where the upper level segment overlapps more than the segments that are part of the compaction. In this case we can't write the remaining upper level items at the lower level because now we can have a level with segments whose keys intersect. Instead we detect this compaction case. We call it sticky because after merging with the lower level segments the remaining items in the upper level need to stick to the upper level. The next time compaction comes around it'll compact the remaining items with the additional lower overlaping segments. Signed-off-by: Zach Brown --- kmod/src/compact.c | 97 ++++++++++++++++++++++++++++++--------------- kmod/src/compact.h | 2 +- kmod/src/counters.h | 3 +- kmod/src/format.h | 10 +++++ kmod/src/manifest.c | 13 +++++- kmod/src/net.c | 3 +- 6 files changed, 90 insertions(+), 38 deletions(-) diff --git a/kmod/src/compact.c b/kmod/src/compact.c index f02234e0..9c83eca6 100644 --- a/kmod/src/compact.c +++ b/kmod/src/compact.c @@ -83,7 +83,7 @@ struct compact_cursor { struct list_head csegs; /* buffer holds allocations and our returning them */ - u64 segnos[2 * (1 + SCOUTFS_MANIFEST_FANOUT)]; + u64 segnos[SCOUTFS_COMPACTION_MAX_UPDATE]; unsigned nr_segnos; u8 lower_level; @@ -93,6 +93,9 @@ struct compact_cursor { struct compact_seg *saved_upper; struct compact_seg *lower; struct compact_seg *saved_lower; + + bool sticky; + struct compact_seg *last_lower; }; static void free_cseg(struct super_block *sb, struct compact_seg *cseg) @@ -256,6 +259,19 @@ retry: *item_flags = lower_flags; } + /* + * If we have a sticky compaction then we can't mix items from + * the upper level past the last lower key into the lower level. + * The caller will notice when they're emptying the final upper + * level in a sticky merge and leave it at the upper level. + */ + if (curs->sticky && curs->lower && + (!lower || lower == curs->last_lower) && + scoutfs_key_compare(item_key, curs->last_lower->last) > 0) { + ret = 0; + goto out; + } + if (cmp <= 0) upper->pos++; if (cmp >= 0) @@ -346,7 +362,6 @@ static int compact_segments(struct super_block *sb, struct scoutfs_bio_completion *comp, struct list_head *results) { - struct scoutfs_key_buf upper_next; struct scoutfs_segment *seg; struct compact_seg *cseg; struct compact_seg *upper; @@ -357,24 +372,25 @@ static int compact_segments(struct super_block *sb, int ret; scoutfs_inc_counter(sb, compact_operations); + if (curs->sticky) + scoutfs_inc_counter(sb, compact_sticky_upper); for (;;) { upper = curs->upper; lower = curs->lower; /* - * We can just move the upper segment down a level if it - * doesn't intersect any lower segments. + * If we're at the start of the upper segment and + * there's no lower segment then we might as well just + * move the segment in the manifest. We can't do this + * if we're moving to the last level because we might + * need to drop any deletion items. * - * XXX we can't do this if the segment we're moving has - * deletion items. We need to copy the non-deletion items - * and drop the deletion items in that case. To do that - * we'll need the manifest to count the number of deletion - * and non-deletion items. + * XXX We should have metadata in the manifest to tell + * us that there's no deletion items in the segment. */ - if (upper && upper->pos == 0 && - (!lower || - scoutfs_key_compare(upper->last, lower->first) < 0)) { + if (upper && upper->pos == 0 && !lower && !curs->sticky && + ((upper->level + 1) < curs->last_level)) { /* * XXX blah! these csegs are getting @@ -412,26 +428,17 @@ static int compact_segments(struct super_block *sb, break; /* - * We can skip a lower segment if there's no upper segment - * or the next upper item is past the last in the lower. + * XXX we could intelligently skip reading and merging + * lower segments here. The lower segment won't change + * if: + * - the lower segment is entirely before the upper + * - the lower segment is full * - * XXX this will need to test for intersection with range - * deletion items. + * We don't have the metadata to determine that it's + * full today so we want to read lower segments that don't + * overlap so that we can merge partial lowers with + * its neighbours. */ - if (lower && lower->pos == 0 && - (!upper || - (!scoutfs_seg_item_ptrs(upper->seg, upper->pos, - &upper_next, NULL, NULL) && - scoutfs_key_compare(&upper_next, lower->last) > 0))) { - - curs->lower = next_spos(curs, lower); - - list_del_init(&lower->entry); - free_cseg(sb, lower); - - scoutfs_inc_counter(sb, compact_segment_skipped); - continue; - } ret = read_segment(sb, lower); if (ret) @@ -467,8 +474,18 @@ static int compact_segments(struct super_block *sb, break; } + /* + * The remaining upper items in a sticky merge have to + * be written into the upper level. + */ + if (curs->sticky && !lower) { + cseg->level = curs->lower_level - 1; + scoutfs_inc_counter(sb, compact_sticky_written); + } else { + cseg->level = curs->lower_level; + } + /* csegs will be claned up once they're on the list */ - cseg->level = curs->lower_level; cseg->seg = seg; list_add_tail(&cseg->entry, results); @@ -476,6 +493,17 @@ static int compact_segments(struct super_block *sb, if (ret < 0) break; + /* + * Clear lower after we've consumed it so that sticky + * compaction can decide to write the rest of the items + * into the upper level. We decide that it's done by + * testing the pos that next_item() is going to try. + */ + if (curs->sticky && curs->lower == curs->last_lower && + scoutfs_seg_item_ptrs(curs->lower->seg, curs->lower->pos, + NULL, NULL, NULL) < 0) + curs->lower = NULL; + /* start a complete segment write now, we'll wait later */ ret = scoutfs_seg_submit_write(sb, seg, comp); if (ret) @@ -489,15 +517,16 @@ static int compact_segments(struct super_block *sb, /* * Manifest walking is providing the details of the overall compaction - * operation. It'll then add all the segments involved. + * operation. */ void scoutfs_compact_describe(struct super_block *sb, void *data, - u8 upper_level, u8 last_level) + u8 upper_level, u8 last_level, bool sticky) { struct compact_cursor *curs = data; curs->lower_level = upper_level + 1; curs->last_level = last_level; + curs->sticky = sticky; } /* @@ -531,6 +560,8 @@ int scoutfs_compact_add(struct super_block *sb, void *data, curs->upper = cseg; else if (!curs->lower) curs->lower = cseg; + if (curs->lower) + curs->last_lower = cseg; ret = 0; out: diff --git a/kmod/src/compact.h b/kmod/src/compact.h index e017dd87..f6f4bb60 100644 --- a/kmod/src/compact.h +++ b/kmod/src/compact.h @@ -4,7 +4,7 @@ void scoutfs_compact_kick(struct super_block *sb); void scoutfs_compact_describe(struct super_block *sb, void *data, - u8 upper_level, u8 last_level); + u8 upper_level, u8 last_level, bool sticky); int scoutfs_compact_add(struct super_block *sb, void *data, struct scoutfs_key_buf *first, struct scoutfs_key_buf *last, u64 segno, u64 seq, diff --git a/kmod/src/counters.h b/kmod/src/counters.h index e0b92c44..a41340e2 100644 --- a/kmod/src/counters.h +++ b/kmod/src/counters.h @@ -19,9 +19,10 @@ EXPAND_COUNTER(manifest_compact_migrate) \ EXPAND_COUNTER(compact_operations) \ EXPAND_COUNTER(compact_segment_moved) \ - EXPAND_COUNTER(compact_segment_skipped) \ EXPAND_COUNTER(compact_segment_read) \ EXPAND_COUNTER(compact_segment_written) \ + EXPAND_COUNTER(compact_sticky_upper) \ + EXPAND_COUNTER(compact_sticky_written) \ EXPAND_COUNTER(data_readpage) \ EXPAND_COUNTER(data_write_begin) \ EXPAND_COUNTER(data_write_end) \ diff --git a/kmod/src/format.h b/kmod/src/format.h index 26c2bbb8..3451a04b 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -448,6 +448,16 @@ struct scoutfs_net_segnos { __le64 segnos[0]; } __packed; +/* XXX eventually we'll have net compaction and will need agents to agree */ + +/* one upper segment and fanout lower segments */ +#define SCOUTFS_COMPACTION_MAX_INPUT (1 + SCOUTFS_MANIFEST_FANOUT) +/* sticky can add one, and so can item page alignment */ +#define SCOUTFS_COMPACTION_SLOP 2 +/* delete all inputs and insert all outputs (same goes for alloc|free segnos) */ +#define SCOUTFS_COMPACTION_MAX_UPDATE \ + (2 * (SCOUTFS_COMPACTION_MAX_INPUT + SCOUTFS_COMPACTION_SLOP)) + enum { SCOUTFS_NET_ALLOC_INODES = 0, SCOUTFS_NET_MANIFEST_RANGE_ENTRIES, diff --git a/kmod/src/manifest.c b/kmod/src/manifest.c index a04fbe50..4cec4184 100644 --- a/kmod/src/manifest.c +++ b/kmod/src/manifest.c @@ -719,6 +719,7 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) struct scoutfs_key_buf ment_last; struct scoutfs_key_buf over_first; struct scoutfs_key_buf over_last; + bool sticky; int level; int ret; int nr = 0; @@ -739,7 +740,6 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) goto out; } - scoutfs_compact_describe(sb, data, level, mani->nr_levels - 1); /* find the oldest level 0 or the next higher order level by key */ if (level == 0) { @@ -779,7 +779,8 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) over = scoutfs_ring_lookup_next(&mani->ring, &skey); /* and add a fanout's worth of lower overlapping segments */ - for (i = 0; i < SCOUTFS_MANIFEST_FANOUT; i++) { + sticky = false; + for (i = 0; i < SCOUTFS_MANIFEST_FANOUT + 1; i++) { if (!over || over->level != (ment->level + 1)) break; @@ -789,6 +790,12 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) &over_first, &over_last) != 0) break; + /* upper level has to stay around when more than fanout */ + if (i == SCOUTFS_MANIFEST_FANOUT) { + sticky = true; + break; + } + ret = scoutfs_compact_add(sb, data, &over_first, &over_last, le64_to_cpu(over->segno), le64_to_cpu(over->seq), level + 1); @@ -799,6 +806,8 @@ int scoutfs_manifest_next_compact(struct super_block *sb, void *data) over = scoutfs_ring_next(&mani->ring, over); } + scoutfs_compact_describe(sb, data, level, mani->nr_levels - 1, sticky); + /* record the next key to start from */ scoutfs_key_copy(mani->compact_keys[level], &ment_last); scoutfs_key_inc(mani->compact_keys[level]); diff --git a/kmod/src/net.c b/kmod/src/net.c index f50c1fa3..76d9ca3a 100644 --- a/kmod/src/net.c +++ b/kmod/src/net.c @@ -1420,7 +1420,8 @@ int scoutfs_net_get_compaction(struct super_block *sb, void *curs) return nr; } - for (i = 0; i < nr; i++) { + /* allow for expansion slop from sticky and alignment */ + for (i = 0; i < nr + SCOUTFS_COMPACTION_SLOP; i++) { ret = scoutfs_alloc_segno(sb, &segno); if (ret < 0) break;