mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-08 04:55:21 +00:00
scoutfs: insert new manifests at highest level
Manifests for newly written segments can be inserted at the highest level that doesn't have segments they intersect. This avoids ring and merging churn. The change cleans up the code a little bit, which is nice, and adds tracepoints for manifests entering and leaving the in memory structures. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -161,6 +161,8 @@ struct scoutfs_ring_manifest_entry {
|
||||
struct scoutfs_key last;
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_MANIFESTS_PER_LEVEL 10
|
||||
|
||||
struct scoutfs_ring_del_manifest {
|
||||
__le64 blkno;
|
||||
} __packed;
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include "manifest.h"
|
||||
#include "key.h"
|
||||
#include "ring.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
* The manifest organizes log segment blocks into a tree structure.
|
||||
@@ -42,6 +43,7 @@ struct scoutfs_manifest {
|
||||
|
||||
struct scoutfs_level {
|
||||
struct rb_root root;
|
||||
u64 count;
|
||||
} levels[SCOUTFS_MAX_LEVEL + 1];
|
||||
};
|
||||
|
||||
@@ -111,11 +113,14 @@ static struct scoutfs_manifest_node *unlink_mnode(struct scoutfs_manifest *mani,
|
||||
|
||||
mnode = radix_tree_lookup(&mani->blkno_radix, blkno);
|
||||
if (mnode) {
|
||||
trace_scoutfs_delete_manifest(&mnode->ment);
|
||||
|
||||
if (!list_empty(&mnode->head))
|
||||
list_del_init(&mnode->head);
|
||||
if (!RB_EMPTY_NODE(&mnode->node)) {
|
||||
rb_erase(&mnode->node,
|
||||
&mani->levels[mnode->ment.level].root);
|
||||
mani->levels[mnode->ment.level].count--;
|
||||
RB_CLEAR_NODE(&mnode->node);
|
||||
}
|
||||
}
|
||||
@@ -144,61 +149,114 @@ void scoutfs_delete_manifest(struct super_block *sb, u64 blkno)
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called during ring replay to reconstruct the manifest state
|
||||
* from the ring entries. Moving segments between levels is recorded
|
||||
* with a single ring entry so we always try to look up the segment in
|
||||
* the manifest before we add it to the manifest.
|
||||
* A newly inserted manifest can be inserted at the level
|
||||
* above the first block that it intersects.
|
||||
*/
|
||||
int scoutfs_add_manifest(struct super_block *sb,
|
||||
struct scoutfs_ring_manifest_entry *ment)
|
||||
static u8 insertion_level(struct super_block *sb,
|
||||
struct scoutfs_ring_manifest_entry *ment)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_manifest *mani = sbi->mani;
|
||||
struct scoutfs_manifest_node *mnode;
|
||||
int i;
|
||||
|
||||
list_for_each_entry(mnode, &mani->level_zero, head) {
|
||||
if (scoutfs_cmp_key_ranges(&ment->first, &ment->last,
|
||||
&mnode->ment.first,
|
||||
&mnode->ment.last) == 0)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* XXX this <= looks fishy :/ */
|
||||
for (i = 1; i <= SCOUTFS_MAX_LEVEL; i++) {
|
||||
mnode = find_mnode(&mani->levels[i].root, &ment->first);
|
||||
if (mnode)
|
||||
break;
|
||||
if (mani->levels[i].count < SCOUTFS_MANIFESTS_PER_LEVEL)
|
||||
return i;
|
||||
}
|
||||
|
||||
return i - 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert an manifest entry into the blkno radix and either level 0 list
|
||||
* or greater level rbtrees as appropriate. The new entry will replace
|
||||
* any existing entry at its blkno, perhaps with different keys and
|
||||
* level.
|
||||
*
|
||||
* The caller can ask that we find the highest level that the entry can
|
||||
* be inserted into before it intersects with an existing entry. The
|
||||
* caller's entry is updated with the new level so they can store it in
|
||||
* the ring. Doing so here avoids extra ring churn of doing it later in
|
||||
* merging.
|
||||
*/
|
||||
static int insert_manifest(struct super_block *sb,
|
||||
struct scoutfs_ring_manifest_entry *ment,
|
||||
bool find_level)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_manifest *mani = sbi->mani;
|
||||
struct scoutfs_manifest_node *mnode;
|
||||
struct scoutfs_manifest_node *found;
|
||||
u64 blkno = le64_to_cpu(ment->blkno);
|
||||
bool preloaded = false;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
|
||||
/* allocation/preloading should be cheap enough to always try */
|
||||
mnode = kmalloc(sizeof(struct scoutfs_manifest_node), GFP_NOFS);
|
||||
if (!mnode)
|
||||
return -ENOMEM; /* XXX hmm, fatal? prealloc?*/
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
|
||||
if (ret) {
|
||||
kfree(mnode);
|
||||
return ret;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&mnode->head);
|
||||
RB_CLEAR_NODE(&mnode->node);
|
||||
|
||||
spin_lock(&mani->lock);
|
||||
|
||||
mnode = unlink_mnode(mani, blkno);
|
||||
if (!mnode) {
|
||||
spin_unlock(&mani->lock);
|
||||
mnode = kmalloc(sizeof(struct scoutfs_manifest_node),
|
||||
GFP_NOFS);
|
||||
if (!mnode)
|
||||
return -ENOMEM; /* XXX hmm, fatal? prealloc?*/
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM);
|
||||
if (ret) {
|
||||
kfree(mnode);
|
||||
return ret;
|
||||
}
|
||||
preloaded = true;
|
||||
|
||||
INIT_LIST_HEAD(&mnode->head);
|
||||
RB_CLEAR_NODE(&mnode->node);
|
||||
spin_lock(&mani->lock);
|
||||
/* preloading should guarantee this succeeds */
|
||||
/* reuse found to avoid radix delete/insert churn */
|
||||
found = unlink_mnode(mani, blkno);
|
||||
if (!found) {
|
||||
radix_tree_insert(&mani->blkno_radix, blkno, mnode);
|
||||
} else {
|
||||
swap(found, mnode);
|
||||
}
|
||||
|
||||
/* careful to find our level after deleting old blkno ment */
|
||||
if (find_level)
|
||||
ment->level = insertion_level(sb, ment);
|
||||
|
||||
trace_scoutfs_insert_manifest(ment);
|
||||
|
||||
mnode->ment = *ment;
|
||||
if (ment->level)
|
||||
if (ment->level) {
|
||||
insert_mnode(&mani->levels[ment->level].root, mnode);
|
||||
else
|
||||
mani->levels[ment->level].count++;
|
||||
} else {
|
||||
list_add(&mnode->head, &mani->level_zero);
|
||||
}
|
||||
|
||||
spin_unlock(&mani->lock);
|
||||
if (preloaded)
|
||||
radix_tree_preload_end();
|
||||
radix_tree_preload_end();
|
||||
kfree(found);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Index an existing entry */
|
||||
int scoutfs_insert_manifest(struct super_block *sb,
|
||||
struct scoutfs_ring_manifest_entry *ment)
|
||||
{
|
||||
return insert_manifest(sb, ment, false);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller is writing a new log segment. We add it to the in-memory
|
||||
* manifest and write it to dirty ring blocks.
|
||||
* Add an entry for a newly written segment to the indexes and record it
|
||||
* in the ring. The entry can be modified by insertion.
|
||||
*
|
||||
* XXX we'd also need to add stale manifest entry's to the ring
|
||||
* XXX In the future we'd send it to the leader
|
||||
@@ -206,9 +264,17 @@ int scoutfs_add_manifest(struct super_block *sb,
|
||||
int scoutfs_new_manifest(struct super_block *sb,
|
||||
struct scoutfs_ring_manifest_entry *ment)
|
||||
{
|
||||
return scoutfs_add_manifest(sb, ment) ?:
|
||||
scoutfs_dirty_ring_entry(sb, SCOUTFS_RING_ADD_MANIFEST,
|
||||
ment, sizeof(*ment));
|
||||
int ret;
|
||||
|
||||
ret = insert_manifest(sb, ment, true);
|
||||
if (!ret) {
|
||||
ret = scoutfs_dirty_ring_entry(sb, SCOUTFS_RING_ADD_MANIFEST,
|
||||
ment, sizeof(*ment));
|
||||
if (ret)
|
||||
scoutfs_delete_manifest(sb, le64_to_cpu(ment->blkno));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -4,8 +4,8 @@
|
||||
int scoutfs_setup_manifest(struct super_block *sb);
|
||||
void scoutfs_destroy_manifest(struct super_block *sb);
|
||||
|
||||
int scoutfs_add_manifest(struct super_block *sb,
|
||||
struct scoutfs_ring_manifest_entry *ment);
|
||||
int scoutfs_insert_manifest(struct super_block *sb,
|
||||
struct scoutfs_ring_manifest_entry *ment);
|
||||
int scoutfs_new_manifest(struct super_block *sb,
|
||||
struct scoutfs_ring_manifest_entry *ment);
|
||||
void scoutfs_delete_manifest(struct super_block *sb, u64 blkno);
|
||||
|
||||
@@ -40,7 +40,7 @@ static int replay_ring_block(struct super_block *sb, struct buffer_head *bh)
|
||||
switch(ent->type) {
|
||||
case SCOUTFS_RING_ADD_MANIFEST:
|
||||
ment = (void *)(ent + 1);
|
||||
ret = scoutfs_add_manifest(sb, ment);
|
||||
ret = scoutfs_insert_manifest(sb, ment);
|
||||
break;
|
||||
case SCOUTFS_RING_DEL_MANIFEST:
|
||||
del = (void *)(ent + 1);
|
||||
|
||||
@@ -24,6 +24,8 @@
|
||||
|
||||
#include <linux/tracepoint.h>
|
||||
|
||||
#include "key.h"
|
||||
|
||||
TRACE_EVENT(scoutfs_bloom_hit,
|
||||
TP_PROTO(struct scoutfs_key *key),
|
||||
|
||||
@@ -186,6 +188,78 @@ TRACE_EVENT(scoutfs_write_super,
|
||||
__entry->blkno, __entry->seq)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_insert_manifest,
|
||||
TP_PROTO(struct scoutfs_ring_manifest_entry *ment),
|
||||
|
||||
TP_ARGS(ment),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(__u64, blkno)
|
||||
__field(__u64, seq)
|
||||
__field(__u8, level)
|
||||
__field(__u64, first_inode)
|
||||
__field(__u8, first_type)
|
||||
__field(__u64, first_offset)
|
||||
__field(__u64, last_inode)
|
||||
__field(__u8, last_type)
|
||||
__field(__u64, last_offset)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->blkno = le64_to_cpu(ment->blkno);
|
||||
__entry->seq = le64_to_cpu(ment->seq);
|
||||
__entry->level = ment->level;
|
||||
__entry->first_inode = le64_to_cpu(ment->first.inode);
|
||||
__entry->first_type = ment->first.type;
|
||||
__entry->first_offset = le64_to_cpu(ment->first.offset);
|
||||
__entry->last_inode = le64_to_cpu(ment->last.inode);
|
||||
__entry->last_type = ment->last.type;
|
||||
__entry->last_offset = le64_to_cpu(ment->last.offset);
|
||||
),
|
||||
|
||||
TP_printk("blkno %llu seq %llu level %u first "CKF" last "CKF,
|
||||
__entry->blkno, __entry->seq, __entry->level,
|
||||
__entry->first_inode, __entry->first_type,
|
||||
__entry->first_offset, __entry->last_inode,
|
||||
__entry->last_type, __entry->last_offset)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_delete_manifest,
|
||||
TP_PROTO(struct scoutfs_ring_manifest_entry *ment),
|
||||
|
||||
TP_ARGS(ment),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(__u64, blkno)
|
||||
__field(__u64, seq)
|
||||
__field(__u8, level)
|
||||
__field(__u64, first_inode)
|
||||
__field(__u8, first_type)
|
||||
__field(__u64, first_offset)
|
||||
__field(__u64, last_inode)
|
||||
__field(__u8, last_type)
|
||||
__field(__u64, last_offset)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->blkno = le64_to_cpu(ment->blkno);
|
||||
__entry->seq = le64_to_cpu(ment->seq);
|
||||
__entry->level = ment->level;
|
||||
__entry->first_inode = le64_to_cpu(ment->first.inode);
|
||||
__entry->first_type = ment->first.type;
|
||||
__entry->first_offset = le64_to_cpu(ment->first.offset);
|
||||
__entry->last_inode = le64_to_cpu(ment->last.inode);
|
||||
__entry->last_type = ment->last.type;
|
||||
__entry->last_offset = le64_to_cpu(ment->last.offset);
|
||||
),
|
||||
|
||||
TP_printk("blkno %llu seq %llu level %u first "CKF" last "CKF,
|
||||
__entry->blkno, __entry->seq, __entry->level,
|
||||
__entry->first_inode, __entry->first_type,
|
||||
__entry->first_offset, __entry->last_inode,
|
||||
__entry->last_type, __entry->last_offset)
|
||||
);
|
||||
|
||||
#endif /* _TRACE_SCOUTFS_H */
|
||||
|
||||
/* This part must be outside protection */
|
||||
|
||||
@@ -350,7 +350,7 @@ static int update_dirty_segment_manifest(struct super_block *sb, u64 blkno,
|
||||
}
|
||||
|
||||
if (all_items)
|
||||
ret = scoutfs_add_manifest(sb, &ment);
|
||||
ret = scoutfs_insert_manifest(sb, &ment);
|
||||
else
|
||||
ret = scoutfs_new_manifest(sb, &ment);
|
||||
out:
|
||||
|
||||
Reference in New Issue
Block a user