mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-05 19:54:53 +00:00
scoutfs: add basic reing replay on mount
Read the ring described by the super block and replay its entries to rebuild the in-memory state of the chunk allocator and log segment manifest. We add just enough of the chunk allocator to set the free bits to the contents of the ring bitmap entries. We start to build out the basic manifest data structure. It'll certainly evolve when we later add code to actually query it. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -1,3 +1,4 @@
|
||||
obj-$(CONFIG_SCOUTFS_FS) := scoutfs.o
|
||||
|
||||
scoutfs-y += block.o dir.o inode.o item.o msg.o super.o
|
||||
scoutfs-y += block.o chunk.o crc.o dir.o inode.o item.o manifest.o msg.o \
|
||||
ring.o super.o
|
||||
|
||||
39
kmod/src/chunk.c
Normal file
39
kmod/src/chunk.c
Normal file
@@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/module.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/magic.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/random.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "format.h"
|
||||
#include "inode.h"
|
||||
#include "dir.h"
|
||||
#include "msg.h"
|
||||
#include "block.h"
|
||||
|
||||
void scoutfs_set_chunk_alloc_bits(struct super_block *sb,
|
||||
struct scoutfs_ring_bitmap *bm)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
u64 off = le64_to_cpu(bm->offset);
|
||||
|
||||
/* XXX check for corruption */
|
||||
|
||||
sbi->chunk_alloc_bits[off] = bm->bits[0];
|
||||
sbi->chunk_alloc_bits[off + 1] = bm->bits[1];
|
||||
|
||||
}
|
||||
7
kmod/src/chunk.h
Normal file
7
kmod/src/chunk.h
Normal file
@@ -0,0 +1,7 @@
|
||||
#ifndef _SCOUTFS_CHUNK_H_
|
||||
#define _SCOUTFS_CHUNK_H_
|
||||
|
||||
void scoutfs_set_chunk_alloc_bits(struct super_block *sb,
|
||||
struct scoutfs_ring_bitmap *bm);
|
||||
|
||||
#endif
|
||||
@@ -24,6 +24,7 @@
|
||||
#define SCOUTFS_CHUNK_SHIFT 22
|
||||
#define SCOUTFS_CHUNK_SIZE (1 << SCOUTFS_CHUNK_SHIFT)
|
||||
#define SCOUTFS_CHUNK_BLOCK_SHIFT (SCOUTFS_CHUNK_SHIFT - SCOUTFS_BLOCK_SHIFT)
|
||||
#define SCOUTFS_CHUNK_BLOCK_MASK ((1 << SCOUTFS_CHUNK_BLOCK_SHIFT) - 1)
|
||||
#define SCOUTFS_BLOCKS_PER_CHUNK (1 << SCOUTFS_CHUNK_BLOCK_SHIFT)
|
||||
|
||||
/*
|
||||
@@ -93,6 +94,10 @@ struct scoutfs_ring_map_block {
|
||||
__le64 blknos[0];
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_RING_MAP_BLOCKS \
|
||||
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_ring_map_block)) / \
|
||||
sizeof(__le64))
|
||||
|
||||
struct scoutfs_ring_entry {
|
||||
u8 type;
|
||||
__le16 len;
|
||||
@@ -112,15 +117,11 @@ struct scoutfs_ring_block {
|
||||
} __packed;
|
||||
|
||||
enum {
|
||||
SCOUTFS_RING_REMOVE_MANIFEST = 0,
|
||||
SCOUTFS_RING_ADD_MANIFEST,
|
||||
SCOUTFS_RING_ADD_MANIFEST = 0,
|
||||
SCOUTFS_RING_DEL_MANIFEST,
|
||||
SCOUTFS_RING_BITMAP,
|
||||
};
|
||||
|
||||
struct scoutfs_ring_remove_manifest {
|
||||
__le64 blkno;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Including both keys might make the manifest too large. It might be
|
||||
* better to only include one key and infer a block's range from the
|
||||
@@ -128,7 +129,7 @@ struct scoutfs_ring_remove_manifest {
|
||||
* isn't unused key space between blocks in a level. We might search
|
||||
* blocks when we didn't need to.
|
||||
*/
|
||||
struct scoutfs_ring_add_manifest {
|
||||
struct scoutfs_ring_manifest_entry {
|
||||
__le64 blkno;
|
||||
__le64 seq;
|
||||
__u8 level;
|
||||
@@ -136,6 +137,13 @@ struct scoutfs_ring_add_manifest {
|
||||
struct scoutfs_key last;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_ring_del_manifest {
|
||||
__le64 blkno;
|
||||
} __packed;
|
||||
|
||||
/* 2^22 * 10^13 > 2^64 */
|
||||
#define SCOUTFS_MAX_LEVEL 13
|
||||
|
||||
struct scoutfs_ring_bitmap {
|
||||
__le32 offset;
|
||||
__le64 bits[2];
|
||||
|
||||
207
kmod/src/manifest.c
Normal file
207
kmod/src/manifest.c
Normal file
@@ -0,0 +1,207 @@
|
||||
/*
|
||||
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/spinlock.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "format.h"
|
||||
#include "manifest.h"
|
||||
#include "key.h"
|
||||
|
||||
/*
|
||||
* The manifest organizes log segment blocks into a tree structure.
|
||||
*
|
||||
* Each level of the tree contains an ordered list of log segments whose
|
||||
* item keys don't overlap. The first level (level 0) of the tree is
|
||||
* the exception whose segments can have key ranges that overlap.
|
||||
*
|
||||
* We also store pointers to the manifest entries in a radix tree
|
||||
* indexed by their block number so that we can easily find existing
|
||||
* entries for deletion.
|
||||
*
|
||||
* Level 0 segments are stored in the list with the most recent at the
|
||||
* head of the list. Level 0's rb tree will always be empty.
|
||||
*/
|
||||
struct scoutfs_manifest {
|
||||
spinlock_t lock;
|
||||
|
||||
struct radix_tree_root blkno_radix;
|
||||
struct list_head level_zero;
|
||||
|
||||
struct scoutfs_level {
|
||||
struct rb_root root;
|
||||
} levels[SCOUTFS_MAX_LEVEL + 1];
|
||||
};
|
||||
|
||||
struct scoutfs_manifest_node {
|
||||
struct rb_node node;
|
||||
struct list_head head;
|
||||
|
||||
struct scoutfs_ring_manifest_entry ment;
|
||||
};
|
||||
|
||||
static void insert_mnode(struct rb_root *root,
|
||||
struct scoutfs_manifest_node *ins)
|
||||
{
|
||||
struct rb_node **node = &root->rb_node;
|
||||
struct scoutfs_manifest_node *mnode;
|
||||
struct rb_node *parent = NULL;
|
||||
int cmp;
|
||||
|
||||
while (*node) {
|
||||
parent = *node;
|
||||
mnode = rb_entry(*node, struct scoutfs_manifest_node, node);
|
||||
|
||||
cmp = scoutfs_key_cmp(&ins->ment.first, &mnode->ment.first);
|
||||
if (cmp < 0)
|
||||
node = &(*node)->rb_left;
|
||||
else
|
||||
node = &(*node)->rb_right;
|
||||
}
|
||||
|
||||
rb_link_node(&ins->node, parent, node);
|
||||
rb_insert_color(&ins->node, root);
|
||||
}
|
||||
|
||||
static struct scoutfs_manifest_node *delete_mnode(struct scoutfs_manifest *mani,
|
||||
u64 blkno)
|
||||
|
||||
{
|
||||
struct scoutfs_manifest_node *mnode;
|
||||
|
||||
mnode = radix_tree_lookup(&mani->blkno_radix, blkno);
|
||||
if (mnode) {
|
||||
if (!list_empty(&mnode->head))
|
||||
list_del_init(&mnode->head);
|
||||
if (!RB_EMPTY_NODE(&mnode->node)) {
|
||||
rb_erase(&mnode->node,
|
||||
&mani->levels[mnode->ment.level].root);
|
||||
RB_CLEAR_NODE(&mnode->node);
|
||||
}
|
||||
}
|
||||
|
||||
return mnode;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called during ring replay. Because of the way the ring works
|
||||
* we can get deletion entries for segments that we don't yet have
|
||||
* in the replayed ring state.
|
||||
*/
|
||||
void scoutfs_delete_manifest(struct super_block *sb, u64 blkno)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_manifest *mani = sbi->mani;
|
||||
struct scoutfs_manifest_node *mnode;
|
||||
|
||||
spin_lock(&mani->lock);
|
||||
mnode = delete_mnode(mani, blkno);
|
||||
spin_unlock(&mani->lock);
|
||||
if (mnode)
|
||||
kfree(mnode);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called during ring replay to reconstruct the manifest state
|
||||
* from the ring entries. Moving segments between levels is recorded
|
||||
* with a single ring entry so we always try to look up the segment in
|
||||
* the manifest before we add it to the manifest.
|
||||
*/
|
||||
int scoutfs_add_manifest(struct super_block *sb,
|
||||
struct scoutfs_ring_manifest_entry *ment)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_manifest *mani = sbi->mani;
|
||||
struct scoutfs_manifest_node *mnode;
|
||||
|
||||
spin_lock(&mani->lock);
|
||||
|
||||
mnode = delete_mnode(mani, le64_to_cpu(ment->blkno));
|
||||
if (!mnode) {
|
||||
spin_unlock(&mani->lock);
|
||||
mnode = kmalloc(sizeof(struct scoutfs_manifest_node),
|
||||
GFP_NOFS);
|
||||
if (!mnode)
|
||||
return -ENOMEM; /* XXX hmm, fatal? prealloc?*/
|
||||
|
||||
INIT_LIST_HEAD(&mnode->head);
|
||||
RB_CLEAR_NODE(&mnode->node);
|
||||
spin_lock(&mani->lock);
|
||||
}
|
||||
|
||||
mnode->ment = *ment;
|
||||
if (ment->level)
|
||||
insert_mnode(&mani->levels[ment->level].root, mnode);
|
||||
else
|
||||
list_add(&mnode->head, &mani->level_zero);
|
||||
|
||||
spin_unlock(&mani->lock);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scoutfs_setup_manifest(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_manifest *mani;
|
||||
int i;
|
||||
|
||||
mani = kmalloc(sizeof(struct scoutfs_manifest), GFP_KERNEL);
|
||||
if (!mani)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&mani->lock);
|
||||
INIT_RADIX_TREE(&mani->blkno_radix, GFP_NOFS);
|
||||
INIT_LIST_HEAD(&mani->level_zero);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(mani->levels); i++)
|
||||
mani->levels[i].root = RB_ROOT;
|
||||
|
||||
sbi->mani = mani;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This is called once the manifest will no longer be used. We iterate
|
||||
* over the blkno radix deleting radix entries and freeing manifest
|
||||
* nodes.
|
||||
*/
|
||||
void scoutfs_destroy_manifest(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_manifest *mani = sbi->mani;
|
||||
struct scoutfs_manifest_node *mnodes[16];
|
||||
unsigned long first_index = 0;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
for (;;) {
|
||||
ret = radix_tree_gang_lookup(&mani->blkno_radix,
|
||||
(void **)mnodes, first_index,
|
||||
ARRAY_SIZE(mnodes));
|
||||
if (!ret)
|
||||
break;
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
first_index = le64_to_cpu(mnodes[i]->ment.blkno);
|
||||
radix_tree_delete(&mani->blkno_radix, first_index);
|
||||
kfree(mnodes[i]);
|
||||
}
|
||||
first_index++;
|
||||
}
|
||||
|
||||
kfree(sbi->mani);
|
||||
sbi->mani = NULL;
|
||||
}
|
||||
11
kmod/src/manifest.h
Normal file
11
kmod/src/manifest.h
Normal file
@@ -0,0 +1,11 @@
|
||||
#ifndef _SCOUTFS_MANIFEST_H_
|
||||
#define _SCOUTFS_MANIFEST_H_
|
||||
|
||||
int scoutfs_setup_manifest(struct super_block *sb);
|
||||
void scoutfs_destroy_manifest(struct super_block *sb);
|
||||
|
||||
int scoutfs_add_manifest(struct super_block *sb,
|
||||
struct scoutfs_ring_manifest_entry *ment);
|
||||
void scoutfs_delete_manifest(struct super_block *sb, u64 blkno);
|
||||
|
||||
#endif
|
||||
128
kmod/src/ring.c
Normal file
128
kmod/src/ring.c
Normal file
@@ -0,0 +1,128 @@
|
||||
/*
|
||||
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/buffer_head.h>
|
||||
#include <linux/fs.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "dir.h"
|
||||
#include "inode.h"
|
||||
#include "key.h"
|
||||
#include "item.h"
|
||||
#include "super.h"
|
||||
#include "manifest.h"
|
||||
#include "chunk.h"
|
||||
#include "block.h"
|
||||
|
||||
static int replay_ring_block(struct super_block *sb, struct buffer_head *bh)
|
||||
{
|
||||
struct scoutfs_ring_block *ring = (void *)bh->b_data;
|
||||
struct scoutfs_ring_entry *ent = (void *)(ring + 1);
|
||||
struct scoutfs_ring_manifest_entry *ment;
|
||||
struct scoutfs_ring_del_manifest *del;
|
||||
struct scoutfs_ring_bitmap *bm;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
/* XXX verify */
|
||||
|
||||
for (i = 0; i < le16_to_cpu(ring->nr_entries); i++) {
|
||||
switch(ent->type) {
|
||||
case SCOUTFS_RING_ADD_MANIFEST:
|
||||
ment = (void *)(ent + 1);
|
||||
ret = scoutfs_add_manifest(sb, ment);
|
||||
break;
|
||||
case SCOUTFS_RING_DEL_MANIFEST:
|
||||
del = (void *)(ent + 1);
|
||||
scoutfs_delete_manifest(sb, le64_to_cpu(del->blkno));
|
||||
break;
|
||||
case SCOUTFS_RING_BITMAP:
|
||||
bm = (void *)(ent + 1);
|
||||
scoutfs_set_chunk_alloc_bits(sb, bm);
|
||||
break;
|
||||
default:
|
||||
/* XXX */
|
||||
break;
|
||||
}
|
||||
|
||||
ent = (void *)(ent + 1) + le16_to_cpu(ent->len);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read a given logical ring block.
|
||||
*
|
||||
* Each ring map block entry maps a chunk's worth of ring blocks.
|
||||
*/
|
||||
static struct buffer_head *read_ring_block(struct super_block *sb, u64 block)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_ring_map_block *map;
|
||||
struct buffer_head *bh;
|
||||
u64 ring_chunk;
|
||||
u32 ring_block;
|
||||
u64 blkno;
|
||||
u64 div;
|
||||
u32 rem;
|
||||
|
||||
ring_block = block & SCOUTFS_CHUNK_BLOCK_MASK;
|
||||
ring_chunk = block >> SCOUTFS_CHUNK_BLOCK_SHIFT;
|
||||
|
||||
div = div_u64_rem(ring_chunk, SCOUTFS_RING_MAP_BLOCKS, &rem);
|
||||
|
||||
bh = scoutfs_read_block(sb, le64_to_cpu(super->ring_map_blkno) + div);
|
||||
if (!bh)
|
||||
return NULL;
|
||||
|
||||
/* XXX verify map block */
|
||||
|
||||
map = (void *)bh->b_data;
|
||||
blkno = le64_to_cpu(map->blknos[rem]) + ring_block;
|
||||
brelse(bh);
|
||||
|
||||
return scoutfs_read_block(sb, blkno);
|
||||
}
|
||||
|
||||
int scoutfs_replay_ring(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct buffer_head *bh;
|
||||
u64 block;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
/* XXX read-ahead map blocks and each set of ring blocks */
|
||||
|
||||
block = le64_to_cpu(super->ring_first_block);
|
||||
for (i = 0; i < le64_to_cpu(super->ring_active_blocks); i++) {
|
||||
bh = read_ring_block(sb, block);
|
||||
if (!bh) {
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
|
||||
ret = replay_ring_block(sb, bh);
|
||||
brelse(bh);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (++block == le64_to_cpu(super->ring_total_blocks))
|
||||
block = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
6
kmod/src/ring.h
Normal file
6
kmod/src/ring.h
Normal file
@@ -0,0 +1,6 @@
|
||||
#ifndef _SCOUTFS_RING_H_
|
||||
#define _SCOUTFS_RING_H_
|
||||
|
||||
int scoutfs_replay_ring(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
@@ -24,6 +24,8 @@
|
||||
#include "dir.h"
|
||||
#include "msg.h"
|
||||
#include "block.h"
|
||||
#include "manifest.h"
|
||||
#include "ring.h"
|
||||
|
||||
static const struct super_operations scoutfs_super_ops = {
|
||||
.alloc_inode = scoutfs_alloc_inode,
|
||||
@@ -35,6 +37,7 @@ static int read_supers(struct super_block *sb)
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super;
|
||||
struct buffer_head *bh = NULL;
|
||||
unsigned long bytes;
|
||||
int found = -1;
|
||||
int i;
|
||||
|
||||
@@ -80,6 +83,16 @@ static int read_supers(struct super_block *sb)
|
||||
atomic64_set(&sbi->next_ino, SCOUTFS_ROOT_INO + 1);
|
||||
atomic64_set(&sbi->next_blkno, 2);
|
||||
|
||||
/* Initialize all the sb info fields which depends on the supers. */
|
||||
|
||||
bytes = DIV_ROUND_UP(sbi->super.total_chunks, 64) * sizeof(u64);
|
||||
sbi->chunk_alloc_bits = vmalloc(bytes);
|
||||
if (!sbi->chunk_alloc_bits)
|
||||
return -ENOMEM;
|
||||
|
||||
/* the alloc bits default to all free then ring entries update them */
|
||||
memset(sbi->chunk_alloc_bits, 0xff, bytes);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -111,6 +124,14 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = scoutfs_setup_manifest(sb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
ret = scoutfs_replay_ring(sb);
|
||||
if (ret)
|
||||
return ret;
|
||||
|
||||
inode = scoutfs_iget(sb, SCOUTFS_ROOT_INO);
|
||||
if (IS_ERR(inode))
|
||||
return PTR_ERR(inode);
|
||||
@@ -130,6 +151,7 @@ static struct dentry *scoutfs_mount(struct file_system_type *fs_type, int flags,
|
||||
|
||||
static void scoutfs_kill_sb(struct super_block *sb)
|
||||
{
|
||||
scoutfs_destroy_manifest(sb);
|
||||
kill_block_super(sb);
|
||||
kfree(sb->s_fs_info);
|
||||
}
|
||||
|
||||
@@ -4,6 +4,8 @@
|
||||
#include <linux/rbtree.h>
|
||||
#include "format.h"
|
||||
|
||||
struct scoutfs_manifest;
|
||||
|
||||
struct scoutfs_sb_info {
|
||||
struct scoutfs_super_block super;
|
||||
|
||||
@@ -13,6 +15,10 @@ struct scoutfs_sb_info {
|
||||
spinlock_t item_lock;
|
||||
struct rb_root item_root;
|
||||
struct rb_root dirty_item_root;
|
||||
|
||||
struct scoutfs_manifest *mani;
|
||||
|
||||
__le64 *chunk_alloc_bits;
|
||||
};
|
||||
|
||||
static inline struct scoutfs_sb_info *SCOUTFS_SB(struct super_block *sb)
|
||||
|
||||
Reference in New Issue
Block a user