mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-09 13:23:14 +00:00
Remove dead btree, block, and buddy code
Remove all the unused dead code from the previous btree block design. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -2,6 +2,6 @@ obj-$(CONFIG_SCOUTFS_FS) := scoutfs.o
|
||||
|
||||
CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include
|
||||
|
||||
scoutfs-y += alloc.o bio.o block.o btree.o buddy.o compact.o counters.o crc.o \
|
||||
data.o dir.o kvec.o inode.o ioctl.o item.o key.o manifest.o \
|
||||
msg.o name.o seg.o scoutfs_trace.o super.o trans.o treap.o xattr.o
|
||||
scoutfs-y += alloc.o bio.o compact.o counters.o data.o dir.o kvec.o inode.o \
|
||||
ioctl.o item.o key.o manifest.o msg.o seg.o scoutfs_trace.o \
|
||||
super.o trans.o treap.o xattr.o
|
||||
|
||||
786
kmod/src/block.c
786
kmod/src/block.c
@@ -1,786 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "format.h"
|
||||
#include "block.h"
|
||||
#include "crc.h"
|
||||
#include "counters.h"
|
||||
#include "buddy.h"
|
||||
|
||||
/*
|
||||
* scoutfs maintains a cache of metadata blocks in a radix tree. This
|
||||
* gives us blocks bigger than page size and avoids fixing the location
|
||||
* of a logical cached block in one possible position in a larger block
|
||||
* device page cache page.
|
||||
*
|
||||
* This does the work to cow dirty blocks, track dirty blocks, generate
|
||||
* checksums as they're written, only write them in transactions, verify
|
||||
* checksums on read, and invalidate and retry reads of stale cached
|
||||
* blocks. (That last bit only has a hint of an implementation.)
|
||||
*
|
||||
* XXX
|
||||
* - tear down dirty blocks left by write errors on unmount
|
||||
* - multiple smaller page allocs
|
||||
* - vmalloc? vm_map_ram?
|
||||
* - blocks allocated from per-cpu pages when page size > block size
|
||||
* - cmwq crc calcs if that makes sense
|
||||
* - slab of block structs
|
||||
* - don't verify checksums in end_io context?
|
||||
* - fall back to multiple single bios per block io if bio alloc fails?
|
||||
* - fail mount if total_blocks is greater than long radix blkno
|
||||
*/
|
||||
|
||||
struct scoutfs_block {
|
||||
struct rw_semaphore rwsem;
|
||||
atomic_t refcount;
|
||||
struct list_head lru_entry;
|
||||
u64 blkno;
|
||||
|
||||
unsigned long bits;
|
||||
|
||||
struct super_block *sb;
|
||||
struct page *page;
|
||||
void *data;
|
||||
};
|
||||
|
||||
#define DIRTY_RADIX_TAG 0
|
||||
|
||||
enum {
|
||||
BLOCK_BIT_UPTODATE = 0,
|
||||
BLOCK_BIT_ERROR,
|
||||
BLOCK_BIT_CLASS_SET,
|
||||
};
|
||||
|
||||
static struct scoutfs_block *alloc_block(struct super_block *sb, u64 blkno)
|
||||
{
|
||||
struct scoutfs_block *bl;
|
||||
struct page *page;
|
||||
|
||||
/* we'd need to be just a bit more careful */
|
||||
BUILD_BUG_ON(PAGE_SIZE > SCOUTFS_BLOCK_SIZE);
|
||||
|
||||
bl = kzalloc(sizeof(struct scoutfs_block), GFP_NOFS);
|
||||
if (bl) {
|
||||
/* change _from_contents if allocs not aligned */
|
||||
page = alloc_pages(GFP_NOFS, SCOUTFS_BLOCK_PAGE_ORDER);
|
||||
WARN_ON_ONCE(!page);
|
||||
if (page) {
|
||||
init_rwsem(&bl->rwsem);
|
||||
atomic_set(&bl->refcount, 1);
|
||||
INIT_LIST_HEAD(&bl->lru_entry);
|
||||
bl->blkno = blkno;
|
||||
bl->sb = sb;
|
||||
bl->page = page;
|
||||
bl->data = page_address(page);
|
||||
trace_printk("allocated bl %p\n", bl);
|
||||
} else {
|
||||
kfree(bl);
|
||||
bl = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
return bl;
|
||||
}
|
||||
|
||||
void scoutfs_block_put(struct scoutfs_block *bl)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(bl) && atomic_dec_and_test(&bl->refcount)) {
|
||||
trace_printk("freeing bl %p\n", bl);
|
||||
WARN_ON_ONCE(!list_empty(&bl->lru_entry));
|
||||
__free_pages(bl->page, SCOUTFS_BLOCK_PAGE_ORDER);
|
||||
kfree(bl);
|
||||
scoutfs_inc_counter(bl->sb, block_mem_free);
|
||||
}
|
||||
}
|
||||
|
||||
static void lru_add(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl)
|
||||
{
|
||||
if (list_empty(&bl->lru_entry)) {
|
||||
list_add_tail(&bl->lru_entry, &sbi->block_lru_list);
|
||||
sbi->block_lru_nr++;
|
||||
}
|
||||
}
|
||||
|
||||
static void lru_del(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl)
|
||||
{
|
||||
if (!list_empty(&bl->lru_entry)) {
|
||||
list_del_init(&bl->lru_entry);
|
||||
sbi->block_lru_nr--;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller is referencing a block but doesn't know if its in the LRU
|
||||
* or not. If it is move it to the tail so it's last to be dropped by
|
||||
* the shrinker.
|
||||
*/
|
||||
static void lru_move(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl)
|
||||
{
|
||||
if (!list_empty(&bl->lru_entry))
|
||||
list_move_tail(&bl->lru_entry, &sbi->block_lru_list);
|
||||
}
|
||||
|
||||
static void radix_insert(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl,
|
||||
bool dirty)
|
||||
{
|
||||
radix_tree_insert(&sbi->block_radix, bl->blkno, bl);
|
||||
if (dirty)
|
||||
radix_tree_tag_set(&sbi->block_radix, bl->blkno,
|
||||
DIRTY_RADIX_TAG);
|
||||
else
|
||||
lru_add(sbi, bl);
|
||||
atomic_inc(&bl->refcount);
|
||||
}
|
||||
|
||||
/* deleting the blkno from the radix also clears the dirty tag if it was set */
|
||||
static void radix_delete(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl)
|
||||
{
|
||||
lru_del(sbi, bl);
|
||||
radix_tree_delete(&sbi->block_radix, bl->blkno);
|
||||
scoutfs_block_put(bl);
|
||||
}
|
||||
|
||||
static int verify_block_header(struct super_block *sb, struct scoutfs_block *bl)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_block_header *hdr = bl->data;
|
||||
u32 crc = scoutfs_crc_block(hdr);
|
||||
int ret = -EIO;
|
||||
|
||||
if (le32_to_cpu(hdr->crc) != crc) {
|
||||
printk("blkno %llu hdr crc %x != calculated %x\n", bl->blkno,
|
||||
le32_to_cpu(hdr->crc), crc);
|
||||
} else if (super->hdr.fsid && hdr->fsid != super->hdr.fsid) {
|
||||
printk("blkno %llu fsid %llx != super fsid %llx\n", bl->blkno,
|
||||
le64_to_cpu(hdr->fsid), le64_to_cpu(super->hdr.fsid));
|
||||
} else if (le64_to_cpu(hdr->blkno) != bl->blkno) {
|
||||
printk("blkno %llu invalid hdr blkno %llx\n", bl->blkno,
|
||||
le64_to_cpu(hdr->blkno));
|
||||
} else {
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void block_read_end_io(struct bio *bio, int err)
|
||||
{
|
||||
struct scoutfs_block *bl = bio->bi_private;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(bl->sb);
|
||||
|
||||
if (!err && !verify_block_header(bl->sb, bl))
|
||||
set_bit(BLOCK_BIT_UPTODATE, &bl->bits);
|
||||
else
|
||||
set_bit(BLOCK_BIT_ERROR, &bl->bits);
|
||||
|
||||
/*
|
||||
* uncontended spin_lock in wake_up and unconditional smp_mb to
|
||||
* make waitqueue_active safe are about the same cost, so we
|
||||
* prefer the obviously safe choice.
|
||||
*/
|
||||
wake_up(&sbi->block_wq);
|
||||
|
||||
scoutfs_block_put(bl);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Once a transaction block is persistent it's fine to drop the dirty
|
||||
* tag. It's been checksummed so it can be read in again. It's seq
|
||||
* will be in the current transaction so it'll simply be dirtied and
|
||||
* checksummed and written out again.
|
||||
*/
|
||||
static void block_write_end_io(struct bio *bio, int err)
|
||||
{
|
||||
struct scoutfs_block *bl = bio->bi_private;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(bl->sb);
|
||||
unsigned long flags;
|
||||
|
||||
if (!err) {
|
||||
spin_lock_irqsave(&sbi->block_lock, flags);
|
||||
radix_tree_tag_clear(&sbi->block_radix,
|
||||
bl->blkno, DIRTY_RADIX_TAG);
|
||||
lru_add(sbi, bl);
|
||||
spin_unlock_irqrestore(&sbi->block_lock, flags);
|
||||
}
|
||||
|
||||
/* not too worried about racing ints */
|
||||
if (err && !sbi->block_write_err)
|
||||
sbi->block_write_err = err;
|
||||
|
||||
if (atomic_dec_and_test(&sbi->block_writes))
|
||||
wake_up(&sbi->block_wq);
|
||||
|
||||
scoutfs_block_put(bl);
|
||||
bio_put(bio);
|
||||
|
||||
}
|
||||
|
||||
static int block_submit_bio(struct scoutfs_block *bl, int rw)
|
||||
{
|
||||
struct super_block *sb = bl->sb;
|
||||
struct bio *bio;
|
||||
int ret;
|
||||
|
||||
bio = bio_alloc(GFP_NOFS, SCOUTFS_PAGES_PER_BLOCK);
|
||||
if (WARN_ON_ONCE(!bio))
|
||||
return -ENOMEM;
|
||||
|
||||
bio->bi_sector = bl->blkno << (SCOUTFS_BLOCK_SHIFT - 9);
|
||||
bio->bi_bdev = sb->s_bdev;
|
||||
if (rw & WRITE) {
|
||||
bio->bi_end_io = block_write_end_io;
|
||||
} else
|
||||
bio->bi_end_io = block_read_end_io;
|
||||
bio->bi_private = bl;
|
||||
|
||||
ret = bio_add_page(bio, bl->page, SCOUTFS_BLOCK_SIZE, 0);
|
||||
if (WARN_ON_ONCE(ret != SCOUTFS_BLOCK_SIZE)) {
|
||||
bio_put(bio);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
atomic_inc(&bl->refcount);
|
||||
submit_bio(rw, bio);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read an existing block from the device and verify its metadata header.
|
||||
*/
|
||||
struct scoutfs_block *scoutfs_block_read(struct super_block *sb, u64 blkno)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_block *found;
|
||||
struct scoutfs_block *bl;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
/* find an existing block, dropping if it's errored */
|
||||
spin_lock_irqsave(&sbi->block_lock, flags);
|
||||
|
||||
bl = radix_tree_lookup(&sbi->block_radix, blkno);
|
||||
if (bl) {
|
||||
if (test_bit(BLOCK_BIT_ERROR, &bl->bits)) {
|
||||
radix_delete(sbi, bl);
|
||||
bl = NULL;
|
||||
} else {
|
||||
lru_move(sbi, bl);
|
||||
atomic_inc(&bl->refcount);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&sbi->block_lock, flags);
|
||||
if (bl)
|
||||
goto wait;
|
||||
|
||||
/* allocate a new block and try to insert it */
|
||||
bl = alloc_block(sb, blkno);
|
||||
if (!bl) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
spin_lock_irqsave(&sbi->block_lock, flags);
|
||||
|
||||
found = radix_tree_lookup(&sbi->block_radix, blkno);
|
||||
if (found) {
|
||||
scoutfs_block_put(bl);
|
||||
bl = found;
|
||||
lru_move(sbi, bl);
|
||||
atomic_inc(&bl->refcount);
|
||||
} else {
|
||||
radix_insert(sbi, bl, false);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&sbi->block_lock, flags);
|
||||
radix_tree_preload_end();
|
||||
|
||||
if (!found) {
|
||||
ret = block_submit_bio(bl, READ_SYNC | REQ_META);
|
||||
if (ret)
|
||||
goto out;
|
||||
}
|
||||
|
||||
wait:
|
||||
ret = wait_event_interruptible(sbi->block_wq,
|
||||
test_bit(BLOCK_BIT_UPTODATE, &bl->bits) ||
|
||||
test_bit(BLOCK_BIT_ERROR, &bl->bits));
|
||||
if (ret == 0 && test_bit(BLOCK_BIT_ERROR, &bl->bits))
|
||||
ret = -EIO;
|
||||
out:
|
||||
if (ret) {
|
||||
scoutfs_block_put(bl);
|
||||
bl = ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return bl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Read an existing block from the device described by the caller's
|
||||
* reference.
|
||||
*
|
||||
* If the reference sequence numbers don't match then we could be racing
|
||||
* with another writer. We back off and try again. If it happens too
|
||||
* many times the caller assumes that we've hit persistent corruption
|
||||
* and returns an error.
|
||||
*
|
||||
* XXX:
|
||||
* - actually implement this
|
||||
* - reads that span transactions?
|
||||
* - writers creating a new dirty block?
|
||||
*/
|
||||
struct scoutfs_block *scoutfs_block_read_ref(struct super_block *sb,
|
||||
struct scoutfs_block_ref *ref)
|
||||
{
|
||||
struct scoutfs_block_header *hdr;
|
||||
struct scoutfs_block *bl;
|
||||
|
||||
bl = scoutfs_block_read(sb, le64_to_cpu(ref->blkno));
|
||||
if (!IS_ERR(bl)) {
|
||||
hdr = scoutfs_block_data(bl);
|
||||
if (WARN_ON_ONCE(hdr->seq != ref->seq)) {
|
||||
scoutfs_block_put(bl);
|
||||
bl = ERR_PTR(-EAGAIN);
|
||||
}
|
||||
}
|
||||
|
||||
return bl;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller knows that it's not racing with writers.
|
||||
*/
|
||||
int scoutfs_block_has_dirty(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
return radix_tree_tagged(&sbi->block_radix, DIRTY_RADIX_TAG);
|
||||
}
|
||||
|
||||
/*
|
||||
* Submit writes for all the blocks in the radix with their dirty tag
|
||||
* set. The transaction machinery ensures that the dirty blocks form a
|
||||
* consistent image and excludes future dirtying while IO is in flight.
|
||||
*
|
||||
* Presence in the dirty tree holds a reference. Blocks are only
|
||||
* removed from the tree which drops the ref when IO completes.
|
||||
*
|
||||
* Blocks that see write errors remain in the dirty tree and will try to
|
||||
* be written again in the next transaction commit.
|
||||
*
|
||||
* Reads can traverse the blocks while they're in flight.
|
||||
*/
|
||||
int scoutfs_block_write_dirty(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_block *blocks[16];
|
||||
struct scoutfs_block *bl;
|
||||
struct blk_plug plug;
|
||||
unsigned long flags;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
int nr;
|
||||
int i;
|
||||
|
||||
atomic_set(&sbi->block_writes, 1);
|
||||
sbi->block_write_err = 0;
|
||||
blkno = 0;
|
||||
ret = 0;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
|
||||
do {
|
||||
/* get refs to a bunch of dirty blocks */
|
||||
spin_lock_irqsave(&sbi->block_lock, flags);
|
||||
nr = radix_tree_gang_lookup_tag(&sbi->block_radix,
|
||||
(void **)blocks, blkno,
|
||||
ARRAY_SIZE(blocks),
|
||||
DIRTY_RADIX_TAG);
|
||||
if (nr > 0)
|
||||
blkno = blocks[nr - 1]->blkno + 1;
|
||||
for (i = 0; i < nr; i++)
|
||||
atomic_inc(&blocks[i]->refcount);
|
||||
spin_unlock_irqrestore(&sbi->block_lock, flags);
|
||||
|
||||
/* submit them in order, being careful to put all on err */
|
||||
for (i = 0; i < nr; i++) {
|
||||
bl = blocks[i];
|
||||
|
||||
if (ret == 0) {
|
||||
scoutfs_block_set_crc(bl);
|
||||
atomic_inc(&sbi->block_writes);
|
||||
ret = block_submit_bio(bl, WRITE);
|
||||
if (ret)
|
||||
atomic_dec(&sbi->block_writes);
|
||||
}
|
||||
scoutfs_block_put(bl);
|
||||
}
|
||||
} while (nr && !ret);
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
/* wait for all io to drain */
|
||||
atomic_dec(&sbi->block_writes);
|
||||
wait_event(sbi->block_wq, atomic_read(&sbi->block_writes) == 0);
|
||||
|
||||
return ret ?: sbi->block_write_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX This is a gross hack for writing the super. It doesn't have
|
||||
* per-block write completion indication. It knows that it's the only
|
||||
* thing that will be writing.
|
||||
*/
|
||||
int scoutfs_block_write_sync(struct scoutfs_block *bl)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(bl->sb);
|
||||
int ret;
|
||||
|
||||
BUG_ON(atomic_read(&sbi->block_writes) != 0);
|
||||
|
||||
atomic_inc(&sbi->block_writes);
|
||||
ret = block_submit_bio(bl, WRITE);
|
||||
if (ret)
|
||||
atomic_dec(&sbi->block_writes);
|
||||
else
|
||||
wait_event(sbi->block_wq, atomic_read(&sbi->block_writes) == 0);
|
||||
|
||||
return ret ?: sbi->block_write_err;
|
||||
}
|
||||
|
||||
/*
|
||||
* Give the caller a dirty block that they can safely modify. If the
|
||||
* reference refers to a stable clean block then we allocate a new block
|
||||
* and update the reference.
|
||||
*
|
||||
* Blocks are dirtied and modified within a transaction that has a given
|
||||
* sequence number which we use to determine if the block is currently
|
||||
* dirty or not.
|
||||
*
|
||||
* For now we're using the dirty super block in the sb_info to track the
|
||||
* dirty seq. That'll be different when we have multiple btrees.
|
||||
*
|
||||
* Callers are responsible for serializing modification to the reference
|
||||
* which is probably embedded in some other dirty persistent structure.
|
||||
*/
|
||||
struct scoutfs_block *scoutfs_block_dirty_ref(struct super_block *sb,
|
||||
struct scoutfs_block_ref *ref)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_block_header *hdr;
|
||||
struct scoutfs_block *copy_bl = NULL;
|
||||
struct scoutfs_block *bl;
|
||||
u64 blkno = 0;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
bl = scoutfs_block_read(sb, le64_to_cpu(ref->blkno));
|
||||
if (IS_ERR(bl) || ref->seq == sbi->super.hdr.seq)
|
||||
return bl;
|
||||
|
||||
ret = scoutfs_buddy_alloc_same(sb, &blkno, le64_to_cpu(ref->blkno));
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
copy_bl = scoutfs_block_dirty(sb, blkno);
|
||||
if (IS_ERR(copy_bl)) {
|
||||
ret = PTR_ERR(copy_bl);
|
||||
goto out;
|
||||
}
|
||||
|
||||
hdr = scoutfs_block_data(bl);
|
||||
ret = scoutfs_buddy_free(sb, hdr->seq, le64_to_cpu(hdr->blkno), 0);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
memcpy(scoutfs_block_data(copy_bl), scoutfs_block_data(bl),
|
||||
SCOUTFS_BLOCK_SIZE);
|
||||
|
||||
hdr = scoutfs_block_data(copy_bl);
|
||||
hdr->blkno = cpu_to_le64(blkno);
|
||||
hdr->seq = sbi->super.hdr.seq;
|
||||
ref->blkno = hdr->blkno;
|
||||
ref->seq = hdr->seq;
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
scoutfs_block_put(bl);
|
||||
if (ret) {
|
||||
if (!IS_ERR_OR_NULL(copy_bl)) {
|
||||
err = scoutfs_buddy_free(sb, sbi->super.hdr.seq,
|
||||
blkno, 0);
|
||||
WARN_ON_ONCE(err); /* freeing dirty must work */
|
||||
}
|
||||
scoutfs_block_put(copy_bl);
|
||||
copy_bl = ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return copy_bl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return a dirty metadata block with an updated block header to match
|
||||
* the current dirty seq. Callers are responsible for serializing
|
||||
* access to the block and for zeroing unwritten block contents.
|
||||
*
|
||||
* Always allocating a new block and replacing any old cached block
|
||||
* serves a very specific purpose. We can have an unlocked reader
|
||||
* traversing stable structures actively using a clean block while a
|
||||
* writer gets that same blkno from the allocator and starts modifying
|
||||
* it. By always allocating a new block we let the reader continue
|
||||
* safely using their old immutable block while the writer works on the
|
||||
* newly allocated block. The old stable block will be freed once the
|
||||
* reader drops their reference.
|
||||
*/
|
||||
struct scoutfs_block *scoutfs_block_dirty(struct super_block *sb, u64 blkno)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_block_header *hdr;
|
||||
struct scoutfs_block *found;
|
||||
struct scoutfs_block *bl;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
/* allocate a new block and try to insert it */
|
||||
bl = alloc_block(sb, blkno);
|
||||
if (!bl) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
set_bit(BLOCK_BIT_UPTODATE, &bl->bits);
|
||||
|
||||
ret = radix_tree_preload(GFP_NOFS);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
hdr = bl->data;
|
||||
*hdr = sbi->super.hdr;
|
||||
hdr->blkno = cpu_to_le64(blkno);
|
||||
hdr->seq = sbi->super.hdr.seq;
|
||||
|
||||
spin_lock_irqsave(&sbi->block_lock, flags);
|
||||
found = radix_tree_lookup(&sbi->block_radix, blkno);
|
||||
if (found)
|
||||
radix_delete(sbi, found);
|
||||
radix_insert(sbi, bl, true);
|
||||
spin_unlock_irqrestore(&sbi->block_lock, flags);
|
||||
|
||||
radix_tree_preload_end();
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret) {
|
||||
scoutfs_block_put(bl);
|
||||
bl = ERR_PTR(ret);
|
||||
}
|
||||
|
||||
return bl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate a new dirty writable block. The caller must be in a
|
||||
* transaction so that we can assign the dirty seq.
|
||||
*/
|
||||
struct scoutfs_block *scoutfs_block_dirty_alloc(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->stable_super;
|
||||
struct scoutfs_block *bl;
|
||||
u64 blkno;
|
||||
int ret;
|
||||
int err;
|
||||
|
||||
ret = scoutfs_buddy_alloc(sb, &blkno, 0);
|
||||
if (ret < 0)
|
||||
return ERR_PTR(ret);
|
||||
|
||||
bl = scoutfs_block_dirty(sb, blkno);
|
||||
if (IS_ERR(bl)) {
|
||||
err = scoutfs_buddy_free(sb, super->hdr.seq, blkno, 0);
|
||||
WARN_ON_ONCE(err); /* freeing dirty must work */
|
||||
}
|
||||
return bl;
|
||||
}
|
||||
|
||||
/*
|
||||
* Forget the given block by removing it from the radix and clearing its
|
||||
* dirty tag. It will not be found by future lookups and will not be
|
||||
* written out. The caller can still use it until it drops its
|
||||
* reference.
|
||||
*/
|
||||
void scoutfs_block_forget(struct scoutfs_block *bl)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(bl->sb);
|
||||
struct scoutfs_block *found;
|
||||
unsigned long flags;
|
||||
u64 blkno = bl->blkno;
|
||||
|
||||
spin_lock_irqsave(&sbi->block_lock, flags);
|
||||
found = radix_tree_lookup(&sbi->block_radix, blkno);
|
||||
if (found == bl)
|
||||
radix_delete(sbi, bl);
|
||||
spin_unlock_irqrestore(&sbi->block_lock, flags);
|
||||
}
|
||||
|
||||
/*
|
||||
* We maintain an LRU of blocks so that the shrinker can free the oldest
|
||||
* under memory pressure. We can't reclaim dirty blocks so only clean
|
||||
* blocks are kept in the LRU. Blocks are only in the LRU while their
|
||||
* presence in the radix holds a reference. We don't care if a reader
|
||||
* has an active ref on a clean block that gets reclaimed. All we're
|
||||
* doing is removing from the radix. The caller can still work with the
|
||||
* block and it will be freed once they drop their ref.
|
||||
*
|
||||
* If this is called with nr_to_scan == 0 then it only returns the nr.
|
||||
* We avoid acquiring the lock in that case.
|
||||
*
|
||||
* Lookup code only moves blocks around in the LRU while they're in the
|
||||
* radix. Once we remove the block from the radix we're able to use the
|
||||
* lru_entry to drop all the blocks outside the lock.
|
||||
*
|
||||
* XXX:
|
||||
* - are sc->nr_to_scan and our return meant to be in units of pages?
|
||||
* - should we sync a transaction here?
|
||||
*/
|
||||
int scoutfs_block_shrink(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = container_of(shrink,
|
||||
struct scoutfs_sb_info,
|
||||
block_shrinker);
|
||||
struct scoutfs_block *tmp;
|
||||
struct scoutfs_block *bl;
|
||||
unsigned long flags;
|
||||
unsigned long nr;
|
||||
LIST_HEAD(list);
|
||||
|
||||
nr = sc->nr_to_scan;
|
||||
if (!nr)
|
||||
goto out;
|
||||
|
||||
spin_lock_irqsave(&sbi->block_lock, flags);
|
||||
|
||||
list_for_each_entry_safe(bl, tmp, &sbi->block_lru_list, lru_entry) {
|
||||
if (nr-- == 0)
|
||||
break;
|
||||
atomic_inc(&bl->refcount);
|
||||
radix_delete(sbi, bl);
|
||||
list_add(&bl->lru_entry, &list);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&sbi->block_lock, flags);
|
||||
|
||||
list_for_each_entry_safe(bl, tmp, &list, lru_entry) {
|
||||
list_del_init(&bl->lru_entry);
|
||||
scoutfs_block_put(bl);
|
||||
}
|
||||
|
||||
out:
|
||||
return min_t(unsigned long, sbi->block_lru_nr, INT_MAX);
|
||||
}
|
||||
|
||||
void scoutfs_block_set_crc(struct scoutfs_block *bl)
|
||||
{
|
||||
struct scoutfs_block_header *hdr = scoutfs_block_data(bl);
|
||||
|
||||
hdr->crc = cpu_to_le32(scoutfs_crc_block(hdr));
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero the block from the given byte to the end of the block.
|
||||
*/
|
||||
void scoutfs_block_zero(struct scoutfs_block *bl, size_t off)
|
||||
{
|
||||
if (WARN_ON_ONCE(off > SCOUTFS_BLOCK_SIZE))
|
||||
return;
|
||||
|
||||
if (off < SCOUTFS_BLOCK_SIZE)
|
||||
memset(scoutfs_block_data(bl) + off, 0,
|
||||
SCOUTFS_BLOCK_SIZE - off);
|
||||
}
|
||||
|
||||
/*
|
||||
* Zero the block from the given byte to the end of the block.
|
||||
*/
|
||||
void scoutfs_block_zero_from(struct scoutfs_block *bl, void *ptr)
|
||||
{
|
||||
return scoutfs_block_zero(bl, (char *)ptr -
|
||||
(char *)scoutfs_block_data(bl));
|
||||
}
|
||||
|
||||
void scoutfs_block_set_lock_class(struct scoutfs_block *bl,
|
||||
struct lock_class_key *class)
|
||||
{
|
||||
if (!test_bit(BLOCK_BIT_CLASS_SET, &bl->bits)) {
|
||||
lockdep_set_class(&bl->rwsem, class);
|
||||
set_bit(BLOCK_BIT_CLASS_SET, &bl->bits);
|
||||
}
|
||||
}
|
||||
|
||||
void scoutfs_block_lock(struct scoutfs_block *bl, bool write, int subclass)
|
||||
{
|
||||
if (write)
|
||||
down_write_nested(&bl->rwsem, subclass);
|
||||
else
|
||||
down_read_nested(&bl->rwsem, subclass);
|
||||
}
|
||||
|
||||
void scoutfs_block_unlock(struct scoutfs_block *bl, bool write)
|
||||
{
|
||||
if (write)
|
||||
up_write(&bl->rwsem);
|
||||
else
|
||||
up_read(&bl->rwsem);
|
||||
}
|
||||
|
||||
void *scoutfs_block_data(struct scoutfs_block *bl)
|
||||
{
|
||||
return bl->data;
|
||||
}
|
||||
|
||||
void *scoutfs_block_data_from_contents(const void *ptr)
|
||||
{
|
||||
unsigned long addr = (unsigned long)ptr;
|
||||
|
||||
return (void *)(addr & ~((unsigned long)SCOUTFS_BLOCK_MASK));
|
||||
}
|
||||
|
||||
void scoutfs_block_destroy(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_block *blocks[16];
|
||||
struct scoutfs_block *bl;
|
||||
unsigned long blkno = 0;
|
||||
int nr;
|
||||
int i;
|
||||
|
||||
do {
|
||||
nr = radix_tree_gang_lookup(&sbi->block_radix, (void **)blocks,
|
||||
blkno, ARRAY_SIZE(blocks));
|
||||
for (i = 0; i < nr; i++) {
|
||||
bl = blocks[i];
|
||||
blkno = bl->blkno + 1;
|
||||
radix_delete(sbi, bl);
|
||||
}
|
||||
} while (nr);
|
||||
}
|
||||
@@ -1,38 +0,0 @@
|
||||
#ifndef _SCOUTFS_BLOCK_H_
|
||||
#define _SCOUTFS_BLOCK_H_
|
||||
|
||||
struct scoutfs_block;
|
||||
|
||||
#include <linux/fs.h>
|
||||
|
||||
struct scoutfs_block *scoutfs_block_read(struct super_block *sb, u64 blkno);
|
||||
struct scoutfs_block *scoutfs_block_read_ref(struct super_block *sb,
|
||||
struct scoutfs_block_ref *ref);
|
||||
|
||||
struct scoutfs_block *scoutfs_block_dirty(struct super_block *sb, u64 blkno);
|
||||
struct scoutfs_block *scoutfs_block_dirty_alloc(struct super_block *sb);
|
||||
struct scoutfs_block *scoutfs_block_dirty_ref(struct super_block *sb,
|
||||
struct scoutfs_block_ref *ref);
|
||||
|
||||
int scoutfs_block_has_dirty(struct super_block *sb);
|
||||
int scoutfs_block_write_dirty(struct super_block *sb);
|
||||
int scoutfs_block_write_sync(struct scoutfs_block *bl);
|
||||
|
||||
void scoutfs_block_set_crc(struct scoutfs_block *bl);
|
||||
void scoutfs_block_zero(struct scoutfs_block *bl, size_t off);
|
||||
void scoutfs_block_zero_from(struct scoutfs_block *bl, void *ptr);
|
||||
|
||||
void scoutfs_block_set_lock_class(struct scoutfs_block *bl,
|
||||
struct lock_class_key *class);
|
||||
void scoutfs_block_lock(struct scoutfs_block *bl, bool write, int subclass);
|
||||
void scoutfs_block_unlock(struct scoutfs_block *bl, bool write);
|
||||
|
||||
void *scoutfs_block_data(struct scoutfs_block *bl);
|
||||
void *scoutfs_block_data_from_contents(const void *ptr);
|
||||
void scoutfs_block_forget(struct scoutfs_block *bl);
|
||||
void scoutfs_block_put(struct scoutfs_block *bl);
|
||||
|
||||
int scoutfs_block_shrink(struct shrinker *shrink, struct shrink_control *sc);
|
||||
void scoutfs_block_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
1582
kmod/src/btree.c
1582
kmod/src/btree.c
File diff suppressed because it is too large
Load Diff
@@ -1,77 +0,0 @@
|
||||
#ifndef _SCOUTFS_BTREE_H_
|
||||
#define _SCOUTFS_BTREE_H_
|
||||
|
||||
#include <linux/uio.h>
|
||||
|
||||
struct scoutfs_btree_val {
|
||||
struct kvec vec[3];
|
||||
unsigned int check_size_eq:1;
|
||||
unsigned int check_size_lte:1;
|
||||
};
|
||||
|
||||
static inline void __scoutfs_btree_init_val(struct scoutfs_btree_val *val,
|
||||
void *ptr0, unsigned int len0,
|
||||
void *ptr1, unsigned int len1,
|
||||
void *ptr2, unsigned int len2)
|
||||
{
|
||||
*val = (struct scoutfs_btree_val) {
|
||||
{ { ptr0, len0 }, { ptr1, len1 }, { ptr2, len2 } }
|
||||
};
|
||||
}
|
||||
|
||||
#define _scoutfs_btree_init_val(v, p0, l0, p1, l1, p2, l2, ...) \
|
||||
__scoutfs_btree_init_val(v, p0, l0, p1, l1, p2, l2)
|
||||
|
||||
/*
|
||||
* Provide a nice variadic initialization function without having to
|
||||
* iterate over the callers arg types. We play some macro games to pad
|
||||
* out the callers ptr/len pairs to the full possible number. This will
|
||||
* produce confusing errors if an odd number of arguments is given and
|
||||
* the padded ptr/length types aren't compatible with the fixed
|
||||
* arguments in the static inline.
|
||||
*/
|
||||
#define scoutfs_btree_init_val(val, ...) \
|
||||
_scoutfs_btree_init_val(val, __VA_ARGS__, NULL, 0, NULL, 0, NULL, 0)
|
||||
|
||||
static inline int scoutfs_btree_val_length(struct scoutfs_btree_val *val)
|
||||
{
|
||||
|
||||
return iov_length((struct iovec *)val->vec, ARRAY_SIZE(val->vec));
|
||||
}
|
||||
|
||||
int scoutfs_btree_lookup(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_val *val);
|
||||
int scoutfs_btree_insert(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_val *val);
|
||||
int scoutfs_btree_delete(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key);
|
||||
int scoutfs_btree_next(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last,
|
||||
struct scoutfs_key *found,
|
||||
struct scoutfs_btree_val *val);
|
||||
int scoutfs_btree_prev(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last,
|
||||
struct scoutfs_key *found, u64 *found_seq,
|
||||
struct scoutfs_btree_val *val);
|
||||
int scoutfs_btree_dirty(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key);
|
||||
int scoutfs_btree_update(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_btree_val *val);
|
||||
int scoutfs_btree_hole(struct super_block *sb, struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first,
|
||||
struct scoutfs_key *last, struct scoutfs_key *hole);
|
||||
int scoutfs_btree_since(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last,
|
||||
u64 seq, struct scoutfs_key *found, u64 *found_seq,
|
||||
struct scoutfs_btree_val *val);
|
||||
|
||||
#endif
|
||||
1063
kmod/src/buddy.c
1063
kmod/src/buddy.c
File diff suppressed because it is too large
Load Diff
@@ -1,20 +0,0 @@
|
||||
#ifndef _SCOUTFS_BUDDY_H_
|
||||
#define _SCOUTFS_BUDDY_H_
|
||||
|
||||
int scoutfs_buddy_alloc(struct super_block *sb, u64 *blkno, int order);
|
||||
int scoutfs_buddy_alloc_same(struct super_block *sb, u64 *blkno, u64 existing);
|
||||
int scoutfs_buddy_free(struct super_block *sb, __le64 seq, u64 blkno,
|
||||
int order);
|
||||
void scoutfs_buddy_free_extent(struct super_block *sb, u64 blkno, u64 count);
|
||||
|
||||
int scoutfs_buddy_was_free(struct super_block *sb, u64 blkno, int order);
|
||||
u64 scoutfs_buddy_bfree(struct super_block *sb);
|
||||
|
||||
unsigned int scoutfs_buddy_alloc_count(struct super_block *sb);
|
||||
int scoutfs_buddy_apply_pending(struct super_block *sb, bool alloc);
|
||||
void scoutfs_buddy_committed(struct super_block *sb);
|
||||
|
||||
int scoutfs_buddy_setup(struct super_block *sb);
|
||||
void scoutfs_buddy_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
@@ -14,8 +14,6 @@
|
||||
#define EXPAND_EACH_COUNTER \
|
||||
EXPAND_COUNTER(alloc_alloc) \
|
||||
EXPAND_COUNTER(alloc_free) \
|
||||
EXPAND_COUNTER(block_mem_alloc) \
|
||||
EXPAND_COUNTER(block_mem_free) \
|
||||
EXPAND_COUNTER(seg_lru_shrink) \
|
||||
EXPAND_COUNTER(trans_level0_seg_write) \
|
||||
EXPAND_COUNTER(manifest_compact_migrate) \
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2015 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/crc32c.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "crc.h"
|
||||
|
||||
u32 scoutfs_crc_block(struct scoutfs_block_header *hdr)
|
||||
{
|
||||
return crc32c(~0, (char *)hdr + sizeof(hdr->crc),
|
||||
SCOUTFS_BLOCK_SIZE - sizeof(hdr->crc));
|
||||
}
|
||||
@@ -1,6 +0,0 @@
|
||||
#ifndef _SCOUTFS_CRC_H_
|
||||
#define _SCOUTFS_CRC_H_
|
||||
|
||||
u32 scoutfs_crc_block(struct scoutfs_block_header *hdr);
|
||||
|
||||
#endif
|
||||
@@ -23,9 +23,7 @@
|
||||
#include "inode.h"
|
||||
#include "key.h"
|
||||
#include "super.h"
|
||||
#include "btree.h"
|
||||
#include "trans.h"
|
||||
#include "name.h"
|
||||
#include "xattr.h"
|
||||
#include "kvec.h"
|
||||
#include "item.h"
|
||||
|
||||
@@ -35,9 +35,6 @@
|
||||
*/
|
||||
#define SCOUTFS_SUPER_BLKNO ((64 * 1024) >> SCOUTFS_BLOCK_SHIFT)
|
||||
#define SCOUTFS_SUPER_NR 2
|
||||
#define SCOUTFS_BUDDY_BLKNO (SCOUTFS_SUPER_BLKNO + SCOUTFS_SUPER_NR)
|
||||
|
||||
#define SCOUTFS_MAX_TRANS_BLOCKS (128 * 1024 * 1024 / SCOUTFS_BLOCK_SIZE)
|
||||
|
||||
/*
|
||||
* This header is found at the start of every block so that we can
|
||||
@@ -161,70 +158,6 @@ struct scoutfs_segment_block {
|
||||
/* packed vals */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Block references include the sequence number so that we can detect
|
||||
* readers racing with writers and so that we can tell that we don't
|
||||
* need to follow a reference when traversing based on seqs.
|
||||
*/
|
||||
struct scoutfs_block_ref {
|
||||
__le64 blkno;
|
||||
__le64 seq;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* If the block was full of bits the largest possible order would be
|
||||
* the block size shift + 3 (BITS_PER_BYTE). But the header uses
|
||||
* up some space and then the buddy bits mean two bits per block.
|
||||
* Then +1 for this being the number, not the greatest order.
|
||||
*/
|
||||
#define SCOUTFS_BUDDY_ORDERS (SCOUTFS_BLOCK_SHIFT + 3 - 2 + 1)
|
||||
|
||||
struct scoutfs_buddy_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le16 first_set[SCOUTFS_BUDDY_ORDERS];
|
||||
__u8 level;
|
||||
__u8 __pad[3]; /* naturally align bits */
|
||||
union {
|
||||
struct scoutfs_buddy_slot {
|
||||
__le64 seq;
|
||||
__le16 free_orders;
|
||||
/* XXX seems like we could hide a bit somewhere */
|
||||
__u8 blkno_off;
|
||||
} __packed slots[0];
|
||||
__le64 bits[0];
|
||||
} __packed;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Each buddy leaf block references order 0 blocks with half of its
|
||||
* bitmap. The other half of the bits are used for the higher order
|
||||
* bits.
|
||||
*/
|
||||
#define SCOUTFS_BUDDY_ORDER0_BITS \
|
||||
(((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_buddy_block)) * 8) / 2)
|
||||
|
||||
#define SCOUTFS_BUDDY_SLOTS \
|
||||
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_buddy_block)) / \
|
||||
sizeof(struct scoutfs_buddy_slot))
|
||||
|
||||
struct scoutfs_buddy_root {
|
||||
struct scoutfs_buddy_slot slot;
|
||||
__u8 height;
|
||||
} __packed;
|
||||
|
||||
/* ((SCOUTFS_BUDDY_SLOTS^5) * SCOUTFS_BUDDY_ORDER0_BITS) > 2^52 */
|
||||
#define SCOUTFS_BUDDY_MAX_HEIGHT 6
|
||||
|
||||
/*
|
||||
* We should be able to make the offset smaller if neither dirents nor
|
||||
* data items use the full 64 bits.
|
||||
*/
|
||||
struct scoutfs_key {
|
||||
__le64 inode;
|
||||
u8 type;
|
||||
__le64 offset;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Currently we sort keys by the numeric value of the types, but that
|
||||
* isn't necessary. We could have an arbitrary sort order. So we don't
|
||||
@@ -241,8 +174,6 @@ struct scoutfs_key {
|
||||
#define SCOUTFS_DATA_KEY 11
|
||||
#define SCOUTFS_MAX_UNUSED_KEY 255
|
||||
|
||||
#define SCOUTFS_MAX_ITEM_LEN 512
|
||||
|
||||
/* value is struct scoutfs_inode */
|
||||
struct scoutfs_inode_key {
|
||||
__u8 type;
|
||||
@@ -307,66 +238,9 @@ struct scoutfs_symlink_key {
|
||||
__be64 ino;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_btree_root {
|
||||
u8 height;
|
||||
struct scoutfs_block_ref ref;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* @free_end: records the byte offset of the first byte after the free
|
||||
* space in the block between the header and the first item. New items
|
||||
* are allocated by subtracting the space they need.
|
||||
*
|
||||
* @free_reclaim: records the number of bytes of free space amongst the
|
||||
* items after free_end. If a block is compacted then this much new
|
||||
* free space would be reclaimed.
|
||||
*/
|
||||
struct scoutfs_btree_block {
|
||||
struct scoutfs_block_header hdr;
|
||||
__le16 free_end;
|
||||
__le16 free_reclaim;
|
||||
__le16 nr_items;
|
||||
__le16 item_offs[0];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* The item sequence number is set to the dirty block's sequence number
|
||||
* when the item is modified. It is not changed by splits or merges.
|
||||
*/
|
||||
struct scoutfs_btree_item {
|
||||
struct scoutfs_key key;
|
||||
__le64 seq;
|
||||
__le16 val_len;
|
||||
char val[0];
|
||||
} __packed;
|
||||
|
||||
/* Blocks are no more than half free. */
|
||||
#define SCOUTFS_BTREE_FREE_LIMIT \
|
||||
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_btree_block)) / 2)
|
||||
|
||||
/* XXX does this exist upstream somewhere? */
|
||||
#define member_sizeof(TYPE, MEMBER) (sizeof(((TYPE *)0)->MEMBER))
|
||||
|
||||
#define SCOUTFS_BTREE_MAX_ITEMS \
|
||||
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_btree_block)) / \
|
||||
(member_sizeof(struct scoutfs_btree_block, item_offs[0]) + \
|
||||
sizeof(struct scoutfs_btree_item)))
|
||||
|
||||
/*
|
||||
* We can calculate the max tree depth by calculating how many leaf
|
||||
* blocks the tree could reference. The block device can only reference
|
||||
* 2^64 bytes. The tallest parent tree has half full parent blocks.
|
||||
*
|
||||
* So we have the relation:
|
||||
*
|
||||
* ceil(max_items / 2) ^ (max_depth - 1) >= 2^64 / block_size
|
||||
*
|
||||
* and solve for depth:
|
||||
*
|
||||
* max_depth = log(ceil(max_items / 2), 2^64 / block_size) + 1
|
||||
*/
|
||||
#define SCOUTFS_BTREE_MAX_DEPTH 10
|
||||
|
||||
#define SCOUTFS_UUID_BYTES 16
|
||||
|
||||
/*
|
||||
@@ -382,16 +256,11 @@ struct scoutfs_super_block {
|
||||
__le64 alloc_uninit;
|
||||
__le64 total_segs;
|
||||
__le64 free_segs;
|
||||
__le64 total_blocks;
|
||||
__le64 free_blocks;
|
||||
__le64 ring_blkno;
|
||||
__le64 ring_blocks;
|
||||
__le64 ring_tail_block;
|
||||
__le64 ring_gen;
|
||||
__le64 next_seg_seq;
|
||||
__le64 buddy_blocks;
|
||||
struct scoutfs_buddy_root buddy_root;
|
||||
struct scoutfs_btree_root btree_root;
|
||||
struct scoutfs_treap_root alloc_treap_root;
|
||||
struct scoutfs_manifest manifest;
|
||||
} __packed;
|
||||
@@ -418,7 +287,6 @@ struct scoutfs_timespec {
|
||||
struct scoutfs_inode {
|
||||
__le64 size;
|
||||
__le64 blocks;
|
||||
__le64 link_counter;
|
||||
__le64 data_version;
|
||||
__le64 next_readdir_pos;
|
||||
__le32 nlink;
|
||||
@@ -426,7 +294,6 @@ struct scoutfs_inode {
|
||||
__le32 gid;
|
||||
__le32 mode;
|
||||
__le32 rdev;
|
||||
__le32 salt;
|
||||
struct scoutfs_timespec atime;
|
||||
struct scoutfs_timespec ctime;
|
||||
struct scoutfs_timespec mtime;
|
||||
@@ -449,20 +316,6 @@ struct scoutfs_dirent {
|
||||
__u8 name[0];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Dirent items are stored at keys with the offset set to the hash of
|
||||
* the name. Creation can find that hash values collide and will
|
||||
* attempt to linearly probe this many following hash values looking for
|
||||
* an unused value.
|
||||
*
|
||||
* In small directories this doesn't really matter because hash values
|
||||
* will so very rarely collide. At around 50k items we start to see our
|
||||
* first collisions. 16 slots is still pretty quick to scan in the
|
||||
* btree and it gets us up into the hundreds of millions of entries
|
||||
* before enospc is returned as we run out of hash values.
|
||||
*/
|
||||
#define SCOUTFS_DIRENT_COLL_NR 16
|
||||
|
||||
#define SCOUTFS_NAME_LEN 255
|
||||
|
||||
/* S32_MAX avoids the (int) sign bit and might avoid sloppy bugs */
|
||||
@@ -475,14 +328,6 @@ struct scoutfs_dirent {
|
||||
#define SCOUTFS_XATTR_MAX_PARTS \
|
||||
DIV_ROUND_UP(SCOUTFS_XATTR_MAX_SIZE, SCOUTFS_XATTR_PART_SIZE)
|
||||
|
||||
/*
|
||||
* We only use 31 bits for readdir positions so that we don't confuse
|
||||
* old signed 32bit f_pos applications or those on the other side of
|
||||
* network protocols that have limited readir positions.
|
||||
*/
|
||||
|
||||
#define SCOUTFS_DIRENT_OFF_BITS 31
|
||||
#define SCOUTFS_DIRENT_OFF_MASK ((1U << SCOUTFS_DIRENT_OFF_BITS) - 1)
|
||||
/* entries begin after . and .. */
|
||||
#define SCOUTFS_DIRENT_FIRST_POS 2
|
||||
/* getdents returns next pos with an entry, no entry at (f_pos)~0 */
|
||||
@@ -499,14 +344,6 @@ enum {
|
||||
SCOUTFS_DT_WHT,
|
||||
};
|
||||
|
||||
struct scoutfs_extent {
|
||||
__le64 blkno;
|
||||
__le64 len;
|
||||
__u8 flags;
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_EXTENT_FLAG_OFFLINE (1 << 0)
|
||||
|
||||
/* ino_path can search for backref items with a null term */
|
||||
#define SCOUTFS_MAX_KEY_SIZE \
|
||||
offsetof(struct scoutfs_link_backref_key, name[SCOUTFS_NAME_LEN + 1])
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
#include "super.h"
|
||||
#include "key.h"
|
||||
#include "inode.h"
|
||||
#include "btree.h"
|
||||
#include "dir.h"
|
||||
#include "data.h"
|
||||
#include "scoutfs_trace.h"
|
||||
@@ -126,8 +125,6 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
|
||||
inode->i_ctime.tv_sec = le64_to_cpu(cinode->ctime.sec);
|
||||
inode->i_ctime.tv_nsec = le32_to_cpu(cinode->ctime.nsec);
|
||||
|
||||
ci->salt = le32_to_cpu(cinode->salt);
|
||||
atomic64_set(&ci->link_counter, le64_to_cpu(cinode->link_counter));
|
||||
ci->data_version = le64_to_cpu(cinode->data_version);
|
||||
ci->next_readdir_pos = le64_to_cpu(cinode->next_readdir_pos);
|
||||
}
|
||||
@@ -247,8 +244,6 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
|
||||
cinode->mtime.sec = cpu_to_le64(inode->i_mtime.tv_sec);
|
||||
cinode->mtime.nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
|
||||
|
||||
cinode->salt = cpu_to_le32(ci->salt);
|
||||
cinode->link_counter = cpu_to_le64(atomic64_read(&ci->link_counter));
|
||||
cinode->data_version = cpu_to_le64(ci->data_version);
|
||||
cinode->next_readdir_pos = cpu_to_le64(ci->next_readdir_pos);
|
||||
}
|
||||
@@ -415,8 +410,6 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
|
||||
ci->data_version = 0;
|
||||
ci->next_readdir_pos = SCOUTFS_DIRENT_FIRST_POS;
|
||||
ci->staging = false;
|
||||
get_random_bytes(&ci->salt, sizeof(ci->salt));
|
||||
atomic64_set(&ci->link_counter, 0);
|
||||
|
||||
inode->i_ino = ino; /* XXX overflow */
|
||||
inode_init_owner(inode, dir, mode);
|
||||
|
||||
@@ -5,7 +5,6 @@
|
||||
|
||||
struct scoutfs_inode_info {
|
||||
u64 ino;
|
||||
u32 salt;
|
||||
|
||||
seqcount_t seqcount;
|
||||
u64 data_version;
|
||||
@@ -14,7 +13,6 @@ struct scoutfs_inode_info {
|
||||
/* holder of i_mutex is staging */
|
||||
bool staging;
|
||||
|
||||
atomic64_t link_counter;
|
||||
struct rw_semaphore xattr_rwsem;
|
||||
|
||||
struct inode inode;
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
#include "format.h"
|
||||
#include "key.h"
|
||||
#include "dir.h"
|
||||
#include "name.h"
|
||||
#include "ioctl.h"
|
||||
#include "super.h"
|
||||
#include "inode.h"
|
||||
|
||||
123
kmod/src/key.h
123
kmod/src/key.h
@@ -126,127 +126,4 @@ static inline void scoutfs_key_set_max(struct scoutfs_key_buf *key)
|
||||
scoutfs_key_memset(key, 0xff, sizeof(struct scoutfs_inode_key));
|
||||
}
|
||||
|
||||
/*
|
||||
* What follows are the key functions for the small fixed size btree
|
||||
* keys. It will all be removed once the callers are converted from
|
||||
* the btree to the item cache.
|
||||
*/
|
||||
|
||||
#define CKF "%llu.%u.%llu"
|
||||
#define CKA(key) \
|
||||
le64_to_cpu((key)->inode), (key)->type, le64_to_cpu((key)->offset)
|
||||
|
||||
static inline u64 scoutfs_key_inode(struct scoutfs_key *key)
|
||||
{
|
||||
return le64_to_cpu(key->inode);
|
||||
}
|
||||
|
||||
static inline u64 scoutfs_key_offset(struct scoutfs_key *key)
|
||||
{
|
||||
return le64_to_cpu(key->offset);
|
||||
}
|
||||
|
||||
static inline int le64_cmp(__le64 a, __le64 b)
|
||||
{
|
||||
return le64_to_cpu(a) < le64_to_cpu(b) ? -1 :
|
||||
le64_to_cpu(a) > le64_to_cpu(b) ? 1 : 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Items are sorted by type and then by inode to reflect the relative
|
||||
* frequency of use. Inodes and xattrs are hot, then dirents, then file
|
||||
* data extents. We want each use class to be hot and dense, we don't
|
||||
* want a scan of the inodes to have to skip over each inode's extent
|
||||
* items.
|
||||
*/
|
||||
static inline int scoutfs_key_cmp(struct scoutfs_key *a, struct scoutfs_key *b)
|
||||
{
|
||||
return ((short)a->type - (short)b->type) ?:
|
||||
le64_cmp(a->inode, b->inode) ?:
|
||||
le64_cmp(a->offset, b->offset);
|
||||
}
|
||||
|
||||
/*
|
||||
* return -ve if the first range is completely before the second, +ve for
|
||||
* completely after, and 0 if they intersect.
|
||||
*/
|
||||
static inline int scoutfs_cmp_key_ranges(struct scoutfs_key *a_first,
|
||||
struct scoutfs_key *a_last,
|
||||
struct scoutfs_key *b_first,
|
||||
struct scoutfs_key *b_last)
|
||||
{
|
||||
if (scoutfs_key_cmp(a_last, b_first) < 0)
|
||||
return -1;
|
||||
if (scoutfs_key_cmp(a_first, b_last) > 0)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline int scoutfs_cmp_key_range(struct scoutfs_key *key,
|
||||
struct scoutfs_key *first,
|
||||
struct scoutfs_key *last)
|
||||
{
|
||||
return scoutfs_cmp_key_ranges(key, key, first, last);
|
||||
}
|
||||
|
||||
static inline void scoutfs_set_key(struct scoutfs_key *key, u64 inode, u8 type,
|
||||
u64 offset)
|
||||
{
|
||||
key->inode = cpu_to_le64(inode);
|
||||
key->type = type;
|
||||
key->offset = cpu_to_le64(offset);
|
||||
}
|
||||
|
||||
static inline void scoutfs_set_max_key(struct scoutfs_key *key)
|
||||
{
|
||||
scoutfs_set_key(key, ~0ULL, ~0, ~0ULL);
|
||||
}
|
||||
|
||||
/*
|
||||
* This saturates at (~0,~0,~0) instead of wrapping. This will never be
|
||||
* an issue for real item keys but parent item keys along the right
|
||||
* spine of the tree have maximal key values that could wrap if
|
||||
* incremented.
|
||||
*/
|
||||
static inline void scoutfs_inc_key(struct scoutfs_key *key)
|
||||
{
|
||||
if (key->inode == cpu_to_le64(~0ULL) &&
|
||||
key->type == (u8)~0 &&
|
||||
key->offset == cpu_to_le64(~0ULL))
|
||||
return;
|
||||
|
||||
le64_add_cpu(&key->offset, 1);
|
||||
if (!key->offset) {
|
||||
if (++key->type == 0)
|
||||
le64_add_cpu(&key->inode, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void scoutfs_dec_key(struct scoutfs_key *key)
|
||||
{
|
||||
le64_add_cpu(&key->offset, -1ULL);
|
||||
if (key->offset == cpu_to_le64(~0ULL)) {
|
||||
if (key->type-- == 0)
|
||||
le64_add_cpu(&key->inode, -1ULL);
|
||||
}
|
||||
}
|
||||
|
||||
static inline struct scoutfs_key *scoutfs_max_key(struct scoutfs_key *a,
|
||||
struct scoutfs_key *b)
|
||||
{
|
||||
return scoutfs_key_cmp(a, b) > 0 ? a : b;
|
||||
}
|
||||
|
||||
static inline bool scoutfs_key_is_zero(struct scoutfs_key *key)
|
||||
{
|
||||
return key->inode == 0 && key->type == 0 && key->offset == 0;
|
||||
}
|
||||
|
||||
static inline void scoutfs_key_set_zero(struct scoutfs_key *key)
|
||||
{
|
||||
key->inode = 0;
|
||||
key->type = 0;
|
||||
key->offset = 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -25,10 +25,8 @@
|
||||
#include "dir.h"
|
||||
#include "xattr.h"
|
||||
#include "msg.h"
|
||||
#include "block.h"
|
||||
#include "counters.h"
|
||||
#include "trans.h"
|
||||
#include "buddy.h"
|
||||
#include "kvec.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
|
||||
@@ -1,35 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/crc32c.h>
|
||||
#include <linux/string.h>
|
||||
|
||||
#include "name.h"
|
||||
|
||||
/*
|
||||
* XXX This crc nonsense is a quick hack. We'll want something a
|
||||
* lot stronger like siphash.
|
||||
*/
|
||||
u64 scoutfs_name_hash(const char *name, unsigned int len)
|
||||
{
|
||||
unsigned int half = (len + 1) / 2;
|
||||
|
||||
return crc32c(~0, name, half) |
|
||||
((u64)crc32c(~0, name + len - half, half) << 32);
|
||||
}
|
||||
|
||||
int scoutfs_names_equal(const char *name_a, int len_a,
|
||||
const char *name_b, int len_b)
|
||||
{
|
||||
return (len_a == len_b) && !memcmp(name_a, name_b, len_a);
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef _SCOUTFS_NAME_H_
|
||||
#define _SCOUTFS_NAME_H_
|
||||
|
||||
u64 scoutfs_name_hash(const char *data, unsigned int len);
|
||||
int scoutfs_names_equal(const char *name_a, int len_a,
|
||||
const char *name_b, int len_b);
|
||||
|
||||
#endif
|
||||
@@ -23,7 +23,6 @@
|
||||
#include "inode.h"
|
||||
#include "dir.h"
|
||||
#include "msg.h"
|
||||
#include "block.h"
|
||||
|
||||
#define CREATE_TRACE_POINTS
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
@@ -180,171 +180,6 @@ TRACE_EVENT(scoutfs_scan_orphans,
|
||||
TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_buddy_alloc,
|
||||
TP_PROTO(u64 blkno, int order, int region, int ret),
|
||||
|
||||
TP_ARGS(blkno, order, region, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, blkno)
|
||||
__field(int, order)
|
||||
__field(int, region)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->blkno = blkno;
|
||||
__entry->order = order;
|
||||
__entry->region = region;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk("blkno %llu order %d region %d ret %d",
|
||||
__entry->blkno, __entry->order, __entry->region, __entry->ret)
|
||||
);
|
||||
|
||||
|
||||
TRACE_EVENT(scoutfs_buddy_free,
|
||||
TP_PROTO(u64 blkno, int order, int region, int ret),
|
||||
|
||||
TP_ARGS(blkno, order, region, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(u64, blkno)
|
||||
__field(int, order)
|
||||
__field(int, region)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->blkno = blkno;
|
||||
__entry->order = order;
|
||||
__entry->region = region;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk("blkno %llu order %d region %d ret %d",
|
||||
__entry->blkno, __entry->order, __entry->region, __entry->ret)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_btree_op,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
|
||||
|
||||
TP_ARGS(sb, key, len),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( dev_t, dev )
|
||||
__field( u64, key_ino )
|
||||
__field( u64, key_off )
|
||||
__field( u8, key_type )
|
||||
__field( int, val_len )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = sb->s_dev;
|
||||
__entry->key_ino = le64_to_cpu(key->inode);
|
||||
__entry->key_off = le64_to_cpu(key->offset);
|
||||
__entry->key_type = key->type;
|
||||
__entry->val_len = len;
|
||||
),
|
||||
|
||||
TP_printk("dev %d,%d key "TRACE_KEYF" size %d",
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev),
|
||||
__entry->key_ino, show_key_type(__entry->key_type),
|
||||
__entry->key_off, __entry->val_len)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_btree_op, scoutfs_btree_lookup,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
|
||||
|
||||
TP_ARGS(sb, key, len)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_btree_op, scoutfs_btree_insert,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
|
||||
|
||||
TP_ARGS(sb, key, len)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_btree_op, scoutfs_btree_delete,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
|
||||
|
||||
TP_ARGS(sb, key, len)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_btree_op, scoutfs_btree_dirty,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
|
||||
|
||||
TP_ARGS(sb, key, len)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_btree_op, scoutfs_btree_update,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
|
||||
|
||||
TP_ARGS(sb, key, len)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_btree_ranged_op,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *first,
|
||||
struct scoutfs_key *last),
|
||||
|
||||
TP_ARGS(sb, first, last),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field( dev_t, dev )
|
||||
__field( u64, first_ino )
|
||||
__field( u64, first_off )
|
||||
__field( u8, first_type )
|
||||
__field( u64, last_ino )
|
||||
__field( u64, last_off )
|
||||
__field( u8, last_type )
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->dev = sb->s_dev;
|
||||
__entry->first_ino = le64_to_cpu(first->inode);
|
||||
__entry->first_off = le64_to_cpu(first->offset);
|
||||
__entry->first_type = first->type;
|
||||
__entry->last_ino = le64_to_cpu(last->inode);
|
||||
__entry->last_off = le64_to_cpu(last->offset);
|
||||
__entry->last_type = last->type;
|
||||
),
|
||||
|
||||
TP_printk("dev %d,%d first key "TRACE_KEYF" last key "TRACE_KEYF,
|
||||
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->first_ino,
|
||||
show_key_type(__entry->first_type), __entry->first_off,
|
||||
__entry->last_ino, show_key_type(__entry->last_type),
|
||||
__entry->last_off)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_btree_ranged_op, scoutfs_btree_hole,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *first,
|
||||
struct scoutfs_key *last),
|
||||
|
||||
TP_ARGS(sb, first, last)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_btree_ranged_op, scoutfs_btree_next,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *first,
|
||||
struct scoutfs_key *last),
|
||||
|
||||
TP_ARGS(sb, first, last)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_btree_ranged_op, scoutfs_btree_prev,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *first,
|
||||
struct scoutfs_key *last),
|
||||
|
||||
TP_ARGS(sb, first, last)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_btree_ranged_op, scoutfs_btree_since,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *first,
|
||||
struct scoutfs_key *last),
|
||||
|
||||
TP_ARGS(sb, first, last)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_manifest_add,
|
||||
TP_PROTO(struct super_block *sb, struct kvec *first,
|
||||
struct kvec *last, u64 segno, u64 seq, u8 level),
|
||||
|
||||
@@ -25,10 +25,8 @@
|
||||
#include "dir.h"
|
||||
#include "xattr.h"
|
||||
#include "msg.h"
|
||||
#include "block.h"
|
||||
#include "counters.h"
|
||||
#include "trans.h"
|
||||
#include "buddy.h"
|
||||
#include "item.h"
|
||||
#include "manifest.h"
|
||||
#include "seg.h"
|
||||
@@ -96,8 +94,6 @@ void scoutfs_advance_dirty_super(struct super_block *sb)
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
|
||||
sbi->stable_super = sbi->super;
|
||||
|
||||
le64_add_cpu(&super->hdr.blkno, 1);
|
||||
if (le64_to_cpu(super->hdr.blkno) == (SCOUTFS_SUPER_BLKNO +
|
||||
SCOUTFS_SUPER_NR))
|
||||
@@ -182,8 +178,6 @@ static int read_supers(struct super_block *sb)
|
||||
scoutfs_info(sb, "using super %u with seq %llu",
|
||||
found, le64_to_cpu(sbi->super.hdr.seq));
|
||||
|
||||
sbi->stable_super = sbi->super;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -204,23 +198,12 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&sbi->next_ino_lock);
|
||||
spin_lock_init(&sbi->block_lock);
|
||||
/* radix only inserted with NOFS _preload */
|
||||
INIT_RADIX_TREE(&sbi->block_radix, GFP_ATOMIC);
|
||||
init_waitqueue_head(&sbi->block_wq);
|
||||
atomic_set(&sbi->block_writes, 0);
|
||||
INIT_LIST_HEAD(&sbi->block_lru_list);
|
||||
init_rwsem(&sbi->btree_rwsem);
|
||||
atomic_set(&sbi->trans_holds, 0);
|
||||
init_waitqueue_head(&sbi->trans_hold_wq);
|
||||
spin_lock_init(&sbi->trans_write_lock);
|
||||
INIT_WORK(&sbi->trans_write_work, scoutfs_trans_write_func);
|
||||
init_waitqueue_head(&sbi->trans_write_wq);
|
||||
|
||||
sbi->block_shrinker.shrink = scoutfs_block_shrink;
|
||||
sbi->block_shrinker.seeks = DEFAULT_SEEKS;
|
||||
register_shrinker(&sbi->block_shrinker);
|
||||
|
||||
/* XXX can have multiple mounts of a device, need mount id */
|
||||
sbi->kset = kset_create_and_add(sb->s_id, NULL, &scoutfs_kset->kobj);
|
||||
if (!sbi->kset)
|
||||
@@ -269,16 +252,12 @@ static void scoutfs_kill_sb(struct super_block *sb)
|
||||
if (sbi) {
|
||||
scoutfs_compact_destroy(sb);
|
||||
scoutfs_shutdown_trans(sb);
|
||||
scoutfs_buddy_destroy(sb);
|
||||
if (sbi->block_shrinker.shrink == scoutfs_block_shrink)
|
||||
unregister_shrinker(&sbi->block_shrinker);
|
||||
scoutfs_data_destroy(sb);
|
||||
scoutfs_item_destroy(sb);
|
||||
scoutfs_alloc_destroy(sb);
|
||||
scoutfs_manifest_destroy(sb);
|
||||
scoutfs_treap_destroy(sb);
|
||||
scoutfs_seg_destroy(sb);
|
||||
scoutfs_block_destroy(sb);
|
||||
scoutfs_destroy_counters(sb);
|
||||
if (sbi->kset)
|
||||
kset_unregister(sbi->kset);
|
||||
|
||||
@@ -5,10 +5,8 @@
|
||||
#include <linux/rbtree.h>
|
||||
|
||||
#include "format.h"
|
||||
#include "buddy.h"
|
||||
|
||||
struct scoutfs_counters;
|
||||
struct buddy_info;
|
||||
struct item_cache;
|
||||
struct manifest;
|
||||
struct segment_cache;
|
||||
@@ -20,20 +18,9 @@ struct scoutfs_sb_info {
|
||||
struct super_block *sb;
|
||||
|
||||
struct scoutfs_super_block super;
|
||||
struct scoutfs_super_block stable_super;
|
||||
|
||||
spinlock_t next_ino_lock;
|
||||
|
||||
spinlock_t block_lock;
|
||||
struct radix_tree_root block_radix;
|
||||
wait_queue_head_t block_wq;
|
||||
atomic_t block_writes;
|
||||
int block_write_err;
|
||||
/* block cache lru */
|
||||
struct shrinker block_shrinker;
|
||||
struct list_head block_lru_list;
|
||||
unsigned long block_lru_nr;
|
||||
|
||||
struct manifest *manifest;
|
||||
struct item_cache *item_cache;
|
||||
struct segment_cache *segment_cache;
|
||||
@@ -42,10 +29,6 @@ struct scoutfs_sb_info {
|
||||
struct compact_info *compact_info;
|
||||
struct data_info *data_info;
|
||||
|
||||
struct buddy_info *buddy_info;
|
||||
|
||||
struct rw_semaphore btree_rwsem;
|
||||
|
||||
atomic_t trans_holds;
|
||||
wait_queue_head_t trans_hold_wq;
|
||||
struct task_struct *trans_task;
|
||||
@@ -68,17 +51,6 @@ static inline struct scoutfs_sb_info *SCOUTFS_SB(struct super_block *sb)
|
||||
return sb->s_fs_info;
|
||||
}
|
||||
|
||||
/* The root of the metadata btree */
|
||||
static inline struct scoutfs_btree_root *SCOUTFS_META(struct super_block *sb)
|
||||
{
|
||||
return &SCOUTFS_SB(sb)->super.btree_root;
|
||||
}
|
||||
|
||||
static inline struct scoutfs_btree_root *SCOUTFS_STABLE_META(struct super_block *sb)
|
||||
{
|
||||
return &SCOUTFS_SB(sb)->stable_super.btree_root;
|
||||
}
|
||||
|
||||
void scoutfs_advance_dirty_super(struct super_block *sb);
|
||||
int scoutfs_write_dirty_super(struct super_block *sb);
|
||||
|
||||
|
||||
@@ -18,9 +18,7 @@
|
||||
#include <linux/writeback.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "block.h"
|
||||
#include "trans.h"
|
||||
#include "buddy.h"
|
||||
#include "data.h"
|
||||
#include "bio.h"
|
||||
#include "item.h"
|
||||
|
||||
@@ -22,7 +22,6 @@
|
||||
#include "kvec.h"
|
||||
#include "item.h"
|
||||
#include "trans.h"
|
||||
#include "name.h"
|
||||
#include "xattr.h"
|
||||
|
||||
/*
|
||||
|
||||
Reference in New Issue
Block a user