Remove dead btree, block, and buddy code

Remove all the unused dead code from the previous btree block design.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2017-02-10 09:58:37 -08:00
parent 6bcdca3cf9
commit 97cb75bd88
25 changed files with 3 additions and 4161 deletions

View File

@@ -2,6 +2,6 @@ obj-$(CONFIG_SCOUTFS_FS) := scoutfs.o
CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include
scoutfs-y += alloc.o bio.o block.o btree.o buddy.o compact.o counters.o crc.o \
data.o dir.o kvec.o inode.o ioctl.o item.o key.o manifest.o \
msg.o name.o seg.o scoutfs_trace.o super.o trans.o treap.o xattr.o
scoutfs-y += alloc.o bio.o compact.o counters.o data.o dir.o kvec.o inode.o \
ioctl.o item.o key.o manifest.o msg.o seg.o scoutfs_trace.o \
super.o trans.o treap.o xattr.o

View File

@@ -1,786 +0,0 @@
/*
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/blkdev.h>
#include <linux/slab.h>
#include "super.h"
#include "format.h"
#include "block.h"
#include "crc.h"
#include "counters.h"
#include "buddy.h"
/*
* scoutfs maintains a cache of metadata blocks in a radix tree. This
* gives us blocks bigger than page size and avoids fixing the location
* of a logical cached block in one possible position in a larger block
* device page cache page.
*
* This does the work to cow dirty blocks, track dirty blocks, generate
* checksums as they're written, only write them in transactions, verify
* checksums on read, and invalidate and retry reads of stale cached
* blocks. (That last bit only has a hint of an implementation.)
*
* XXX
* - tear down dirty blocks left by write errors on unmount
* - multiple smaller page allocs
* - vmalloc? vm_map_ram?
* - blocks allocated from per-cpu pages when page size > block size
* - cmwq crc calcs if that makes sense
* - slab of block structs
* - don't verify checksums in end_io context?
* - fall back to multiple single bios per block io if bio alloc fails?
* - fail mount if total_blocks is greater than long radix blkno
*/
struct scoutfs_block {
struct rw_semaphore rwsem;
atomic_t refcount;
struct list_head lru_entry;
u64 blkno;
unsigned long bits;
struct super_block *sb;
struct page *page;
void *data;
};
#define DIRTY_RADIX_TAG 0
enum {
BLOCK_BIT_UPTODATE = 0,
BLOCK_BIT_ERROR,
BLOCK_BIT_CLASS_SET,
};
static struct scoutfs_block *alloc_block(struct super_block *sb, u64 blkno)
{
struct scoutfs_block *bl;
struct page *page;
/* we'd need to be just a bit more careful */
BUILD_BUG_ON(PAGE_SIZE > SCOUTFS_BLOCK_SIZE);
bl = kzalloc(sizeof(struct scoutfs_block), GFP_NOFS);
if (bl) {
/* change _from_contents if allocs not aligned */
page = alloc_pages(GFP_NOFS, SCOUTFS_BLOCK_PAGE_ORDER);
WARN_ON_ONCE(!page);
if (page) {
init_rwsem(&bl->rwsem);
atomic_set(&bl->refcount, 1);
INIT_LIST_HEAD(&bl->lru_entry);
bl->blkno = blkno;
bl->sb = sb;
bl->page = page;
bl->data = page_address(page);
trace_printk("allocated bl %p\n", bl);
} else {
kfree(bl);
bl = NULL;
}
}
return bl;
}
void scoutfs_block_put(struct scoutfs_block *bl)
{
if (!IS_ERR_OR_NULL(bl) && atomic_dec_and_test(&bl->refcount)) {
trace_printk("freeing bl %p\n", bl);
WARN_ON_ONCE(!list_empty(&bl->lru_entry));
__free_pages(bl->page, SCOUTFS_BLOCK_PAGE_ORDER);
kfree(bl);
scoutfs_inc_counter(bl->sb, block_mem_free);
}
}
static void lru_add(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl)
{
if (list_empty(&bl->lru_entry)) {
list_add_tail(&bl->lru_entry, &sbi->block_lru_list);
sbi->block_lru_nr++;
}
}
static void lru_del(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl)
{
if (!list_empty(&bl->lru_entry)) {
list_del_init(&bl->lru_entry);
sbi->block_lru_nr--;
}
}
/*
* The caller is referencing a block but doesn't know if its in the LRU
* or not. If it is move it to the tail so it's last to be dropped by
* the shrinker.
*/
static void lru_move(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl)
{
if (!list_empty(&bl->lru_entry))
list_move_tail(&bl->lru_entry, &sbi->block_lru_list);
}
static void radix_insert(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl,
bool dirty)
{
radix_tree_insert(&sbi->block_radix, bl->blkno, bl);
if (dirty)
radix_tree_tag_set(&sbi->block_radix, bl->blkno,
DIRTY_RADIX_TAG);
else
lru_add(sbi, bl);
atomic_inc(&bl->refcount);
}
/* deleting the blkno from the radix also clears the dirty tag if it was set */
static void radix_delete(struct scoutfs_sb_info *sbi, struct scoutfs_block *bl)
{
lru_del(sbi, bl);
radix_tree_delete(&sbi->block_radix, bl->blkno);
scoutfs_block_put(bl);
}
static int verify_block_header(struct super_block *sb, struct scoutfs_block *bl)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_super_block *super = &sbi->super;
struct scoutfs_block_header *hdr = bl->data;
u32 crc = scoutfs_crc_block(hdr);
int ret = -EIO;
if (le32_to_cpu(hdr->crc) != crc) {
printk("blkno %llu hdr crc %x != calculated %x\n", bl->blkno,
le32_to_cpu(hdr->crc), crc);
} else if (super->hdr.fsid && hdr->fsid != super->hdr.fsid) {
printk("blkno %llu fsid %llx != super fsid %llx\n", bl->blkno,
le64_to_cpu(hdr->fsid), le64_to_cpu(super->hdr.fsid));
} else if (le64_to_cpu(hdr->blkno) != bl->blkno) {
printk("blkno %llu invalid hdr blkno %llx\n", bl->blkno,
le64_to_cpu(hdr->blkno));
} else {
ret = 0;
}
return ret;
}
static void block_read_end_io(struct bio *bio, int err)
{
struct scoutfs_block *bl = bio->bi_private;
struct scoutfs_sb_info *sbi = SCOUTFS_SB(bl->sb);
if (!err && !verify_block_header(bl->sb, bl))
set_bit(BLOCK_BIT_UPTODATE, &bl->bits);
else
set_bit(BLOCK_BIT_ERROR, &bl->bits);
/*
* uncontended spin_lock in wake_up and unconditional smp_mb to
* make waitqueue_active safe are about the same cost, so we
* prefer the obviously safe choice.
*/
wake_up(&sbi->block_wq);
scoutfs_block_put(bl);
bio_put(bio);
}
/*
* Once a transaction block is persistent it's fine to drop the dirty
* tag. It's been checksummed so it can be read in again. It's seq
* will be in the current transaction so it'll simply be dirtied and
* checksummed and written out again.
*/
static void block_write_end_io(struct bio *bio, int err)
{
struct scoutfs_block *bl = bio->bi_private;
struct scoutfs_sb_info *sbi = SCOUTFS_SB(bl->sb);
unsigned long flags;
if (!err) {
spin_lock_irqsave(&sbi->block_lock, flags);
radix_tree_tag_clear(&sbi->block_radix,
bl->blkno, DIRTY_RADIX_TAG);
lru_add(sbi, bl);
spin_unlock_irqrestore(&sbi->block_lock, flags);
}
/* not too worried about racing ints */
if (err && !sbi->block_write_err)
sbi->block_write_err = err;
if (atomic_dec_and_test(&sbi->block_writes))
wake_up(&sbi->block_wq);
scoutfs_block_put(bl);
bio_put(bio);
}
static int block_submit_bio(struct scoutfs_block *bl, int rw)
{
struct super_block *sb = bl->sb;
struct bio *bio;
int ret;
bio = bio_alloc(GFP_NOFS, SCOUTFS_PAGES_PER_BLOCK);
if (WARN_ON_ONCE(!bio))
return -ENOMEM;
bio->bi_sector = bl->blkno << (SCOUTFS_BLOCK_SHIFT - 9);
bio->bi_bdev = sb->s_bdev;
if (rw & WRITE) {
bio->bi_end_io = block_write_end_io;
} else
bio->bi_end_io = block_read_end_io;
bio->bi_private = bl;
ret = bio_add_page(bio, bl->page, SCOUTFS_BLOCK_SIZE, 0);
if (WARN_ON_ONCE(ret != SCOUTFS_BLOCK_SIZE)) {
bio_put(bio);
return -ENOMEM;
}
atomic_inc(&bl->refcount);
submit_bio(rw, bio);
return 0;
}
/*
* Read an existing block from the device and verify its metadata header.
*/
struct scoutfs_block *scoutfs_block_read(struct super_block *sb, u64 blkno)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_block *found;
struct scoutfs_block *bl;
unsigned long flags;
int ret;
/* find an existing block, dropping if it's errored */
spin_lock_irqsave(&sbi->block_lock, flags);
bl = radix_tree_lookup(&sbi->block_radix, blkno);
if (bl) {
if (test_bit(BLOCK_BIT_ERROR, &bl->bits)) {
radix_delete(sbi, bl);
bl = NULL;
} else {
lru_move(sbi, bl);
atomic_inc(&bl->refcount);
}
}
spin_unlock_irqrestore(&sbi->block_lock, flags);
if (bl)
goto wait;
/* allocate a new block and try to insert it */
bl = alloc_block(sb, blkno);
if (!bl) {
ret = -EIO;
goto out;
}
ret = radix_tree_preload(GFP_NOFS);
if (ret)
goto out;
spin_lock_irqsave(&sbi->block_lock, flags);
found = radix_tree_lookup(&sbi->block_radix, blkno);
if (found) {
scoutfs_block_put(bl);
bl = found;
lru_move(sbi, bl);
atomic_inc(&bl->refcount);
} else {
radix_insert(sbi, bl, false);
}
spin_unlock_irqrestore(&sbi->block_lock, flags);
radix_tree_preload_end();
if (!found) {
ret = block_submit_bio(bl, READ_SYNC | REQ_META);
if (ret)
goto out;
}
wait:
ret = wait_event_interruptible(sbi->block_wq,
test_bit(BLOCK_BIT_UPTODATE, &bl->bits) ||
test_bit(BLOCK_BIT_ERROR, &bl->bits));
if (ret == 0 && test_bit(BLOCK_BIT_ERROR, &bl->bits))
ret = -EIO;
out:
if (ret) {
scoutfs_block_put(bl);
bl = ERR_PTR(ret);
}
return bl;
}
/*
* Read an existing block from the device described by the caller's
* reference.
*
* If the reference sequence numbers don't match then we could be racing
* with another writer. We back off and try again. If it happens too
* many times the caller assumes that we've hit persistent corruption
* and returns an error.
*
* XXX:
* - actually implement this
* - reads that span transactions?
* - writers creating a new dirty block?
*/
struct scoutfs_block *scoutfs_block_read_ref(struct super_block *sb,
struct scoutfs_block_ref *ref)
{
struct scoutfs_block_header *hdr;
struct scoutfs_block *bl;
bl = scoutfs_block_read(sb, le64_to_cpu(ref->blkno));
if (!IS_ERR(bl)) {
hdr = scoutfs_block_data(bl);
if (WARN_ON_ONCE(hdr->seq != ref->seq)) {
scoutfs_block_put(bl);
bl = ERR_PTR(-EAGAIN);
}
}
return bl;
}
/*
* The caller knows that it's not racing with writers.
*/
int scoutfs_block_has_dirty(struct super_block *sb)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
return radix_tree_tagged(&sbi->block_radix, DIRTY_RADIX_TAG);
}
/*
* Submit writes for all the blocks in the radix with their dirty tag
* set. The transaction machinery ensures that the dirty blocks form a
* consistent image and excludes future dirtying while IO is in flight.
*
* Presence in the dirty tree holds a reference. Blocks are only
* removed from the tree which drops the ref when IO completes.
*
* Blocks that see write errors remain in the dirty tree and will try to
* be written again in the next transaction commit.
*
* Reads can traverse the blocks while they're in flight.
*/
int scoutfs_block_write_dirty(struct super_block *sb)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_block *blocks[16];
struct scoutfs_block *bl;
struct blk_plug plug;
unsigned long flags;
u64 blkno;
int ret;
int nr;
int i;
atomic_set(&sbi->block_writes, 1);
sbi->block_write_err = 0;
blkno = 0;
ret = 0;
blk_start_plug(&plug);
do {
/* get refs to a bunch of dirty blocks */
spin_lock_irqsave(&sbi->block_lock, flags);
nr = radix_tree_gang_lookup_tag(&sbi->block_radix,
(void **)blocks, blkno,
ARRAY_SIZE(blocks),
DIRTY_RADIX_TAG);
if (nr > 0)
blkno = blocks[nr - 1]->blkno + 1;
for (i = 0; i < nr; i++)
atomic_inc(&blocks[i]->refcount);
spin_unlock_irqrestore(&sbi->block_lock, flags);
/* submit them in order, being careful to put all on err */
for (i = 0; i < nr; i++) {
bl = blocks[i];
if (ret == 0) {
scoutfs_block_set_crc(bl);
atomic_inc(&sbi->block_writes);
ret = block_submit_bio(bl, WRITE);
if (ret)
atomic_dec(&sbi->block_writes);
}
scoutfs_block_put(bl);
}
} while (nr && !ret);
blk_finish_plug(&plug);
/* wait for all io to drain */
atomic_dec(&sbi->block_writes);
wait_event(sbi->block_wq, atomic_read(&sbi->block_writes) == 0);
return ret ?: sbi->block_write_err;
}
/*
* XXX This is a gross hack for writing the super. It doesn't have
* per-block write completion indication. It knows that it's the only
* thing that will be writing.
*/
int scoutfs_block_write_sync(struct scoutfs_block *bl)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(bl->sb);
int ret;
BUG_ON(atomic_read(&sbi->block_writes) != 0);
atomic_inc(&sbi->block_writes);
ret = block_submit_bio(bl, WRITE);
if (ret)
atomic_dec(&sbi->block_writes);
else
wait_event(sbi->block_wq, atomic_read(&sbi->block_writes) == 0);
return ret ?: sbi->block_write_err;
}
/*
* Give the caller a dirty block that they can safely modify. If the
* reference refers to a stable clean block then we allocate a new block
* and update the reference.
*
* Blocks are dirtied and modified within a transaction that has a given
* sequence number which we use to determine if the block is currently
* dirty or not.
*
* For now we're using the dirty super block in the sb_info to track the
* dirty seq. That'll be different when we have multiple btrees.
*
* Callers are responsible for serializing modification to the reference
* which is probably embedded in some other dirty persistent structure.
*/
struct scoutfs_block *scoutfs_block_dirty_ref(struct super_block *sb,
struct scoutfs_block_ref *ref)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_block_header *hdr;
struct scoutfs_block *copy_bl = NULL;
struct scoutfs_block *bl;
u64 blkno = 0;
int ret;
int err;
bl = scoutfs_block_read(sb, le64_to_cpu(ref->blkno));
if (IS_ERR(bl) || ref->seq == sbi->super.hdr.seq)
return bl;
ret = scoutfs_buddy_alloc_same(sb, &blkno, le64_to_cpu(ref->blkno));
if (ret < 0)
goto out;
copy_bl = scoutfs_block_dirty(sb, blkno);
if (IS_ERR(copy_bl)) {
ret = PTR_ERR(copy_bl);
goto out;
}
hdr = scoutfs_block_data(bl);
ret = scoutfs_buddy_free(sb, hdr->seq, le64_to_cpu(hdr->blkno), 0);
if (ret)
goto out;
memcpy(scoutfs_block_data(copy_bl), scoutfs_block_data(bl),
SCOUTFS_BLOCK_SIZE);
hdr = scoutfs_block_data(copy_bl);
hdr->blkno = cpu_to_le64(blkno);
hdr->seq = sbi->super.hdr.seq;
ref->blkno = hdr->blkno;
ref->seq = hdr->seq;
ret = 0;
out:
scoutfs_block_put(bl);
if (ret) {
if (!IS_ERR_OR_NULL(copy_bl)) {
err = scoutfs_buddy_free(sb, sbi->super.hdr.seq,
blkno, 0);
WARN_ON_ONCE(err); /* freeing dirty must work */
}
scoutfs_block_put(copy_bl);
copy_bl = ERR_PTR(ret);
}
return copy_bl;
}
/*
* Return a dirty metadata block with an updated block header to match
* the current dirty seq. Callers are responsible for serializing
* access to the block and for zeroing unwritten block contents.
*
* Always allocating a new block and replacing any old cached block
* serves a very specific purpose. We can have an unlocked reader
* traversing stable structures actively using a clean block while a
* writer gets that same blkno from the allocator and starts modifying
* it. By always allocating a new block we let the reader continue
* safely using their old immutable block while the writer works on the
* newly allocated block. The old stable block will be freed once the
* reader drops their reference.
*/
struct scoutfs_block *scoutfs_block_dirty(struct super_block *sb, u64 blkno)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_block_header *hdr;
struct scoutfs_block *found;
struct scoutfs_block *bl;
unsigned long flags;
int ret;
/* allocate a new block and try to insert it */
bl = alloc_block(sb, blkno);
if (!bl) {
ret = -EIO;
goto out;
}
set_bit(BLOCK_BIT_UPTODATE, &bl->bits);
ret = radix_tree_preload(GFP_NOFS);
if (ret)
goto out;
hdr = bl->data;
*hdr = sbi->super.hdr;
hdr->blkno = cpu_to_le64(blkno);
hdr->seq = sbi->super.hdr.seq;
spin_lock_irqsave(&sbi->block_lock, flags);
found = radix_tree_lookup(&sbi->block_radix, blkno);
if (found)
radix_delete(sbi, found);
radix_insert(sbi, bl, true);
spin_unlock_irqrestore(&sbi->block_lock, flags);
radix_tree_preload_end();
ret = 0;
out:
if (ret) {
scoutfs_block_put(bl);
bl = ERR_PTR(ret);
}
return bl;
}
/*
* Allocate a new dirty writable block. The caller must be in a
* transaction so that we can assign the dirty seq.
*/
struct scoutfs_block *scoutfs_block_dirty_alloc(struct super_block *sb)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_super_block *super = &sbi->stable_super;
struct scoutfs_block *bl;
u64 blkno;
int ret;
int err;
ret = scoutfs_buddy_alloc(sb, &blkno, 0);
if (ret < 0)
return ERR_PTR(ret);
bl = scoutfs_block_dirty(sb, blkno);
if (IS_ERR(bl)) {
err = scoutfs_buddy_free(sb, super->hdr.seq, blkno, 0);
WARN_ON_ONCE(err); /* freeing dirty must work */
}
return bl;
}
/*
* Forget the given block by removing it from the radix and clearing its
* dirty tag. It will not be found by future lookups and will not be
* written out. The caller can still use it until it drops its
* reference.
*/
void scoutfs_block_forget(struct scoutfs_block *bl)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(bl->sb);
struct scoutfs_block *found;
unsigned long flags;
u64 blkno = bl->blkno;
spin_lock_irqsave(&sbi->block_lock, flags);
found = radix_tree_lookup(&sbi->block_radix, blkno);
if (found == bl)
radix_delete(sbi, bl);
spin_unlock_irqrestore(&sbi->block_lock, flags);
}
/*
* We maintain an LRU of blocks so that the shrinker can free the oldest
* under memory pressure. We can't reclaim dirty blocks so only clean
* blocks are kept in the LRU. Blocks are only in the LRU while their
* presence in the radix holds a reference. We don't care if a reader
* has an active ref on a clean block that gets reclaimed. All we're
* doing is removing from the radix. The caller can still work with the
* block and it will be freed once they drop their ref.
*
* If this is called with nr_to_scan == 0 then it only returns the nr.
* We avoid acquiring the lock in that case.
*
* Lookup code only moves blocks around in the LRU while they're in the
* radix. Once we remove the block from the radix we're able to use the
* lru_entry to drop all the blocks outside the lock.
*
* XXX:
* - are sc->nr_to_scan and our return meant to be in units of pages?
* - should we sync a transaction here?
*/
int scoutfs_block_shrink(struct shrinker *shrink, struct shrink_control *sc)
{
struct scoutfs_sb_info *sbi = container_of(shrink,
struct scoutfs_sb_info,
block_shrinker);
struct scoutfs_block *tmp;
struct scoutfs_block *bl;
unsigned long flags;
unsigned long nr;
LIST_HEAD(list);
nr = sc->nr_to_scan;
if (!nr)
goto out;
spin_lock_irqsave(&sbi->block_lock, flags);
list_for_each_entry_safe(bl, tmp, &sbi->block_lru_list, lru_entry) {
if (nr-- == 0)
break;
atomic_inc(&bl->refcount);
radix_delete(sbi, bl);
list_add(&bl->lru_entry, &list);
}
spin_unlock_irqrestore(&sbi->block_lock, flags);
list_for_each_entry_safe(bl, tmp, &list, lru_entry) {
list_del_init(&bl->lru_entry);
scoutfs_block_put(bl);
}
out:
return min_t(unsigned long, sbi->block_lru_nr, INT_MAX);
}
void scoutfs_block_set_crc(struct scoutfs_block *bl)
{
struct scoutfs_block_header *hdr = scoutfs_block_data(bl);
hdr->crc = cpu_to_le32(scoutfs_crc_block(hdr));
}
/*
* Zero the block from the given byte to the end of the block.
*/
void scoutfs_block_zero(struct scoutfs_block *bl, size_t off)
{
if (WARN_ON_ONCE(off > SCOUTFS_BLOCK_SIZE))
return;
if (off < SCOUTFS_BLOCK_SIZE)
memset(scoutfs_block_data(bl) + off, 0,
SCOUTFS_BLOCK_SIZE - off);
}
/*
* Zero the block from the given byte to the end of the block.
*/
void scoutfs_block_zero_from(struct scoutfs_block *bl, void *ptr)
{
return scoutfs_block_zero(bl, (char *)ptr -
(char *)scoutfs_block_data(bl));
}
void scoutfs_block_set_lock_class(struct scoutfs_block *bl,
struct lock_class_key *class)
{
if (!test_bit(BLOCK_BIT_CLASS_SET, &bl->bits)) {
lockdep_set_class(&bl->rwsem, class);
set_bit(BLOCK_BIT_CLASS_SET, &bl->bits);
}
}
void scoutfs_block_lock(struct scoutfs_block *bl, bool write, int subclass)
{
if (write)
down_write_nested(&bl->rwsem, subclass);
else
down_read_nested(&bl->rwsem, subclass);
}
void scoutfs_block_unlock(struct scoutfs_block *bl, bool write)
{
if (write)
up_write(&bl->rwsem);
else
up_read(&bl->rwsem);
}
void *scoutfs_block_data(struct scoutfs_block *bl)
{
return bl->data;
}
void *scoutfs_block_data_from_contents(const void *ptr)
{
unsigned long addr = (unsigned long)ptr;
return (void *)(addr & ~((unsigned long)SCOUTFS_BLOCK_MASK));
}
void scoutfs_block_destroy(struct super_block *sb)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_block *blocks[16];
struct scoutfs_block *bl;
unsigned long blkno = 0;
int nr;
int i;
do {
nr = radix_tree_gang_lookup(&sbi->block_radix, (void **)blocks,
blkno, ARRAY_SIZE(blocks));
for (i = 0; i < nr; i++) {
bl = blocks[i];
blkno = bl->blkno + 1;
radix_delete(sbi, bl);
}
} while (nr);
}

View File

@@ -1,38 +0,0 @@
#ifndef _SCOUTFS_BLOCK_H_
#define _SCOUTFS_BLOCK_H_
struct scoutfs_block;
#include <linux/fs.h>
struct scoutfs_block *scoutfs_block_read(struct super_block *sb, u64 blkno);
struct scoutfs_block *scoutfs_block_read_ref(struct super_block *sb,
struct scoutfs_block_ref *ref);
struct scoutfs_block *scoutfs_block_dirty(struct super_block *sb, u64 blkno);
struct scoutfs_block *scoutfs_block_dirty_alloc(struct super_block *sb);
struct scoutfs_block *scoutfs_block_dirty_ref(struct super_block *sb,
struct scoutfs_block_ref *ref);
int scoutfs_block_has_dirty(struct super_block *sb);
int scoutfs_block_write_dirty(struct super_block *sb);
int scoutfs_block_write_sync(struct scoutfs_block *bl);
void scoutfs_block_set_crc(struct scoutfs_block *bl);
void scoutfs_block_zero(struct scoutfs_block *bl, size_t off);
void scoutfs_block_zero_from(struct scoutfs_block *bl, void *ptr);
void scoutfs_block_set_lock_class(struct scoutfs_block *bl,
struct lock_class_key *class);
void scoutfs_block_lock(struct scoutfs_block *bl, bool write, int subclass);
void scoutfs_block_unlock(struct scoutfs_block *bl, bool write);
void *scoutfs_block_data(struct scoutfs_block *bl);
void *scoutfs_block_data_from_contents(const void *ptr);
void scoutfs_block_forget(struct scoutfs_block *bl);
void scoutfs_block_put(struct scoutfs_block *bl);
int scoutfs_block_shrink(struct shrinker *shrink, struct shrink_control *sc);
void scoutfs_block_destroy(struct super_block *sb);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,77 +0,0 @@
#ifndef _SCOUTFS_BTREE_H_
#define _SCOUTFS_BTREE_H_
#include <linux/uio.h>
struct scoutfs_btree_val {
struct kvec vec[3];
unsigned int check_size_eq:1;
unsigned int check_size_lte:1;
};
static inline void __scoutfs_btree_init_val(struct scoutfs_btree_val *val,
void *ptr0, unsigned int len0,
void *ptr1, unsigned int len1,
void *ptr2, unsigned int len2)
{
*val = (struct scoutfs_btree_val) {
{ { ptr0, len0 }, { ptr1, len1 }, { ptr2, len2 } }
};
}
#define _scoutfs_btree_init_val(v, p0, l0, p1, l1, p2, l2, ...) \
__scoutfs_btree_init_val(v, p0, l0, p1, l1, p2, l2)
/*
* Provide a nice variadic initialization function without having to
* iterate over the callers arg types. We play some macro games to pad
* out the callers ptr/len pairs to the full possible number. This will
* produce confusing errors if an odd number of arguments is given and
* the padded ptr/length types aren't compatible with the fixed
* arguments in the static inline.
*/
#define scoutfs_btree_init_val(val, ...) \
_scoutfs_btree_init_val(val, __VA_ARGS__, NULL, 0, NULL, 0, NULL, 0)
static inline int scoutfs_btree_val_length(struct scoutfs_btree_val *val)
{
return iov_length((struct iovec *)val->vec, ARRAY_SIZE(val->vec));
}
int scoutfs_btree_lookup(struct super_block *sb,
struct scoutfs_btree_root *root,
struct scoutfs_key *key,
struct scoutfs_btree_val *val);
int scoutfs_btree_insert(struct super_block *sb,
struct scoutfs_btree_root *root,
struct scoutfs_key *key,
struct scoutfs_btree_val *val);
int scoutfs_btree_delete(struct super_block *sb,
struct scoutfs_btree_root *root,
struct scoutfs_key *key);
int scoutfs_btree_next(struct super_block *sb, struct scoutfs_btree_root *root,
struct scoutfs_key *first, struct scoutfs_key *last,
struct scoutfs_key *found,
struct scoutfs_btree_val *val);
int scoutfs_btree_prev(struct super_block *sb, struct scoutfs_btree_root *root,
struct scoutfs_key *first, struct scoutfs_key *last,
struct scoutfs_key *found, u64 *found_seq,
struct scoutfs_btree_val *val);
int scoutfs_btree_dirty(struct super_block *sb,
struct scoutfs_btree_root *root,
struct scoutfs_key *key);
int scoutfs_btree_update(struct super_block *sb,
struct scoutfs_btree_root *root,
struct scoutfs_key *key,
struct scoutfs_btree_val *val);
int scoutfs_btree_hole(struct super_block *sb, struct scoutfs_btree_root *root,
struct scoutfs_key *first,
struct scoutfs_key *last, struct scoutfs_key *hole);
int scoutfs_btree_since(struct super_block *sb,
struct scoutfs_btree_root *root,
struct scoutfs_key *first, struct scoutfs_key *last,
u64 seq, struct scoutfs_key *found, u64 *found_seq,
struct scoutfs_btree_val *val);
#endif

File diff suppressed because it is too large Load Diff

View File

@@ -1,20 +0,0 @@
#ifndef _SCOUTFS_BUDDY_H_
#define _SCOUTFS_BUDDY_H_
int scoutfs_buddy_alloc(struct super_block *sb, u64 *blkno, int order);
int scoutfs_buddy_alloc_same(struct super_block *sb, u64 *blkno, u64 existing);
int scoutfs_buddy_free(struct super_block *sb, __le64 seq, u64 blkno,
int order);
void scoutfs_buddy_free_extent(struct super_block *sb, u64 blkno, u64 count);
int scoutfs_buddy_was_free(struct super_block *sb, u64 blkno, int order);
u64 scoutfs_buddy_bfree(struct super_block *sb);
unsigned int scoutfs_buddy_alloc_count(struct super_block *sb);
int scoutfs_buddy_apply_pending(struct super_block *sb, bool alloc);
void scoutfs_buddy_committed(struct super_block *sb);
int scoutfs_buddy_setup(struct super_block *sb);
void scoutfs_buddy_destroy(struct super_block *sb);
#endif

View File

@@ -14,8 +14,6 @@
#define EXPAND_EACH_COUNTER \
EXPAND_COUNTER(alloc_alloc) \
EXPAND_COUNTER(alloc_free) \
EXPAND_COUNTER(block_mem_alloc) \
EXPAND_COUNTER(block_mem_free) \
EXPAND_COUNTER(seg_lru_shrink) \
EXPAND_COUNTER(trans_level0_seg_write) \
EXPAND_COUNTER(manifest_compact_migrate) \

View File

@@ -1,23 +0,0 @@
/*
* Copyright (C) 2015 Versity Software, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/crc32c.h>
#include "format.h"
#include "crc.h"
u32 scoutfs_crc_block(struct scoutfs_block_header *hdr)
{
return crc32c(~0, (char *)hdr + sizeof(hdr->crc),
SCOUTFS_BLOCK_SIZE - sizeof(hdr->crc));
}

View File

@@ -1,6 +0,0 @@
#ifndef _SCOUTFS_CRC_H_
#define _SCOUTFS_CRC_H_
u32 scoutfs_crc_block(struct scoutfs_block_header *hdr);
#endif

View File

@@ -23,9 +23,7 @@
#include "inode.h"
#include "key.h"
#include "super.h"
#include "btree.h"
#include "trans.h"
#include "name.h"
#include "xattr.h"
#include "kvec.h"
#include "item.h"

View File

@@ -35,9 +35,6 @@
*/
#define SCOUTFS_SUPER_BLKNO ((64 * 1024) >> SCOUTFS_BLOCK_SHIFT)
#define SCOUTFS_SUPER_NR 2
#define SCOUTFS_BUDDY_BLKNO (SCOUTFS_SUPER_BLKNO + SCOUTFS_SUPER_NR)
#define SCOUTFS_MAX_TRANS_BLOCKS (128 * 1024 * 1024 / SCOUTFS_BLOCK_SIZE)
/*
* This header is found at the start of every block so that we can
@@ -161,70 +158,6 @@ struct scoutfs_segment_block {
/* packed vals */
} __packed;
/*
* Block references include the sequence number so that we can detect
* readers racing with writers and so that we can tell that we don't
* need to follow a reference when traversing based on seqs.
*/
struct scoutfs_block_ref {
__le64 blkno;
__le64 seq;
} __packed;
/*
* If the block was full of bits the largest possible order would be
* the block size shift + 3 (BITS_PER_BYTE). But the header uses
* up some space and then the buddy bits mean two bits per block.
* Then +1 for this being the number, not the greatest order.
*/
#define SCOUTFS_BUDDY_ORDERS (SCOUTFS_BLOCK_SHIFT + 3 - 2 + 1)
struct scoutfs_buddy_block {
struct scoutfs_block_header hdr;
__le16 first_set[SCOUTFS_BUDDY_ORDERS];
__u8 level;
__u8 __pad[3]; /* naturally align bits */
union {
struct scoutfs_buddy_slot {
__le64 seq;
__le16 free_orders;
/* XXX seems like we could hide a bit somewhere */
__u8 blkno_off;
} __packed slots[0];
__le64 bits[0];
} __packed;
} __packed;
/*
* Each buddy leaf block references order 0 blocks with half of its
* bitmap. The other half of the bits are used for the higher order
* bits.
*/
#define SCOUTFS_BUDDY_ORDER0_BITS \
(((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_buddy_block)) * 8) / 2)
#define SCOUTFS_BUDDY_SLOTS \
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_buddy_block)) / \
sizeof(struct scoutfs_buddy_slot))
struct scoutfs_buddy_root {
struct scoutfs_buddy_slot slot;
__u8 height;
} __packed;
/* ((SCOUTFS_BUDDY_SLOTS^5) * SCOUTFS_BUDDY_ORDER0_BITS) > 2^52 */
#define SCOUTFS_BUDDY_MAX_HEIGHT 6
/*
* We should be able to make the offset smaller if neither dirents nor
* data items use the full 64 bits.
*/
struct scoutfs_key {
__le64 inode;
u8 type;
__le64 offset;
} __packed;
/*
* Currently we sort keys by the numeric value of the types, but that
* isn't necessary. We could have an arbitrary sort order. So we don't
@@ -241,8 +174,6 @@ struct scoutfs_key {
#define SCOUTFS_DATA_KEY 11
#define SCOUTFS_MAX_UNUSED_KEY 255
#define SCOUTFS_MAX_ITEM_LEN 512
/* value is struct scoutfs_inode */
struct scoutfs_inode_key {
__u8 type;
@@ -307,66 +238,9 @@ struct scoutfs_symlink_key {
__be64 ino;
} __packed;
struct scoutfs_btree_root {
u8 height;
struct scoutfs_block_ref ref;
} __packed;
/*
* @free_end: records the byte offset of the first byte after the free
* space in the block between the header and the first item. New items
* are allocated by subtracting the space they need.
*
* @free_reclaim: records the number of bytes of free space amongst the
* items after free_end. If a block is compacted then this much new
* free space would be reclaimed.
*/
struct scoutfs_btree_block {
struct scoutfs_block_header hdr;
__le16 free_end;
__le16 free_reclaim;
__le16 nr_items;
__le16 item_offs[0];
} __packed;
/*
* The item sequence number is set to the dirty block's sequence number
* when the item is modified. It is not changed by splits or merges.
*/
struct scoutfs_btree_item {
struct scoutfs_key key;
__le64 seq;
__le16 val_len;
char val[0];
} __packed;
/* Blocks are no more than half free. */
#define SCOUTFS_BTREE_FREE_LIMIT \
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_btree_block)) / 2)
/* XXX does this exist upstream somewhere? */
#define member_sizeof(TYPE, MEMBER) (sizeof(((TYPE *)0)->MEMBER))
#define SCOUTFS_BTREE_MAX_ITEMS \
((SCOUTFS_BLOCK_SIZE - sizeof(struct scoutfs_btree_block)) / \
(member_sizeof(struct scoutfs_btree_block, item_offs[0]) + \
sizeof(struct scoutfs_btree_item)))
/*
* We can calculate the max tree depth by calculating how many leaf
* blocks the tree could reference. The block device can only reference
* 2^64 bytes. The tallest parent tree has half full parent blocks.
*
* So we have the relation:
*
* ceil(max_items / 2) ^ (max_depth - 1) >= 2^64 / block_size
*
* and solve for depth:
*
* max_depth = log(ceil(max_items / 2), 2^64 / block_size) + 1
*/
#define SCOUTFS_BTREE_MAX_DEPTH 10
#define SCOUTFS_UUID_BYTES 16
/*
@@ -382,16 +256,11 @@ struct scoutfs_super_block {
__le64 alloc_uninit;
__le64 total_segs;
__le64 free_segs;
__le64 total_blocks;
__le64 free_blocks;
__le64 ring_blkno;
__le64 ring_blocks;
__le64 ring_tail_block;
__le64 ring_gen;
__le64 next_seg_seq;
__le64 buddy_blocks;
struct scoutfs_buddy_root buddy_root;
struct scoutfs_btree_root btree_root;
struct scoutfs_treap_root alloc_treap_root;
struct scoutfs_manifest manifest;
} __packed;
@@ -418,7 +287,6 @@ struct scoutfs_timespec {
struct scoutfs_inode {
__le64 size;
__le64 blocks;
__le64 link_counter;
__le64 data_version;
__le64 next_readdir_pos;
__le32 nlink;
@@ -426,7 +294,6 @@ struct scoutfs_inode {
__le32 gid;
__le32 mode;
__le32 rdev;
__le32 salt;
struct scoutfs_timespec atime;
struct scoutfs_timespec ctime;
struct scoutfs_timespec mtime;
@@ -449,20 +316,6 @@ struct scoutfs_dirent {
__u8 name[0];
} __packed;
/*
* Dirent items are stored at keys with the offset set to the hash of
* the name. Creation can find that hash values collide and will
* attempt to linearly probe this many following hash values looking for
* an unused value.
*
* In small directories this doesn't really matter because hash values
* will so very rarely collide. At around 50k items we start to see our
* first collisions. 16 slots is still pretty quick to scan in the
* btree and it gets us up into the hundreds of millions of entries
* before enospc is returned as we run out of hash values.
*/
#define SCOUTFS_DIRENT_COLL_NR 16
#define SCOUTFS_NAME_LEN 255
/* S32_MAX avoids the (int) sign bit and might avoid sloppy bugs */
@@ -475,14 +328,6 @@ struct scoutfs_dirent {
#define SCOUTFS_XATTR_MAX_PARTS \
DIV_ROUND_UP(SCOUTFS_XATTR_MAX_SIZE, SCOUTFS_XATTR_PART_SIZE)
/*
* We only use 31 bits for readdir positions so that we don't confuse
* old signed 32bit f_pos applications or those on the other side of
* network protocols that have limited readir positions.
*/
#define SCOUTFS_DIRENT_OFF_BITS 31
#define SCOUTFS_DIRENT_OFF_MASK ((1U << SCOUTFS_DIRENT_OFF_BITS) - 1)
/* entries begin after . and .. */
#define SCOUTFS_DIRENT_FIRST_POS 2
/* getdents returns next pos with an entry, no entry at (f_pos)~0 */
@@ -499,14 +344,6 @@ enum {
SCOUTFS_DT_WHT,
};
struct scoutfs_extent {
__le64 blkno;
__le64 len;
__u8 flags;
} __packed;
#define SCOUTFS_EXTENT_FLAG_OFFLINE (1 << 0)
/* ino_path can search for backref items with a null term */
#define SCOUTFS_MAX_KEY_SIZE \
offsetof(struct scoutfs_link_backref_key, name[SCOUTFS_NAME_LEN + 1])

View File

@@ -22,7 +22,6 @@
#include "super.h"
#include "key.h"
#include "inode.h"
#include "btree.h"
#include "dir.h"
#include "data.h"
#include "scoutfs_trace.h"
@@ -126,8 +125,6 @@ static void load_inode(struct inode *inode, struct scoutfs_inode *cinode)
inode->i_ctime.tv_sec = le64_to_cpu(cinode->ctime.sec);
inode->i_ctime.tv_nsec = le32_to_cpu(cinode->ctime.nsec);
ci->salt = le32_to_cpu(cinode->salt);
atomic64_set(&ci->link_counter, le64_to_cpu(cinode->link_counter));
ci->data_version = le64_to_cpu(cinode->data_version);
ci->next_readdir_pos = le64_to_cpu(cinode->next_readdir_pos);
}
@@ -247,8 +244,6 @@ static void store_inode(struct scoutfs_inode *cinode, struct inode *inode)
cinode->mtime.sec = cpu_to_le64(inode->i_mtime.tv_sec);
cinode->mtime.nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
cinode->salt = cpu_to_le32(ci->salt);
cinode->link_counter = cpu_to_le64(atomic64_read(&ci->link_counter));
cinode->data_version = cpu_to_le64(ci->data_version);
cinode->next_readdir_pos = cpu_to_le64(ci->next_readdir_pos);
}
@@ -415,8 +410,6 @@ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
ci->data_version = 0;
ci->next_readdir_pos = SCOUTFS_DIRENT_FIRST_POS;
ci->staging = false;
get_random_bytes(&ci->salt, sizeof(ci->salt));
atomic64_set(&ci->link_counter, 0);
inode->i_ino = ino; /* XXX overflow */
inode_init_owner(inode, dir, mode);

View File

@@ -5,7 +5,6 @@
struct scoutfs_inode_info {
u64 ino;
u32 salt;
seqcount_t seqcount;
u64 data_version;
@@ -14,7 +13,6 @@ struct scoutfs_inode_info {
/* holder of i_mutex is staging */
bool staging;
atomic64_t link_counter;
struct rw_semaphore xattr_rwsem;
struct inode inode;

View File

@@ -24,7 +24,6 @@
#include "format.h"
#include "key.h"
#include "dir.h"
#include "name.h"
#include "ioctl.h"
#include "super.h"
#include "inode.h"

View File

@@ -126,127 +126,4 @@ static inline void scoutfs_key_set_max(struct scoutfs_key_buf *key)
scoutfs_key_memset(key, 0xff, sizeof(struct scoutfs_inode_key));
}
/*
* What follows are the key functions for the small fixed size btree
* keys. It will all be removed once the callers are converted from
* the btree to the item cache.
*/
#define CKF "%llu.%u.%llu"
#define CKA(key) \
le64_to_cpu((key)->inode), (key)->type, le64_to_cpu((key)->offset)
static inline u64 scoutfs_key_inode(struct scoutfs_key *key)
{
return le64_to_cpu(key->inode);
}
static inline u64 scoutfs_key_offset(struct scoutfs_key *key)
{
return le64_to_cpu(key->offset);
}
static inline int le64_cmp(__le64 a, __le64 b)
{
return le64_to_cpu(a) < le64_to_cpu(b) ? -1 :
le64_to_cpu(a) > le64_to_cpu(b) ? 1 : 0;
}
/*
* Items are sorted by type and then by inode to reflect the relative
* frequency of use. Inodes and xattrs are hot, then dirents, then file
* data extents. We want each use class to be hot and dense, we don't
* want a scan of the inodes to have to skip over each inode's extent
* items.
*/
static inline int scoutfs_key_cmp(struct scoutfs_key *a, struct scoutfs_key *b)
{
return ((short)a->type - (short)b->type) ?:
le64_cmp(a->inode, b->inode) ?:
le64_cmp(a->offset, b->offset);
}
/*
* return -ve if the first range is completely before the second, +ve for
* completely after, and 0 if they intersect.
*/
static inline int scoutfs_cmp_key_ranges(struct scoutfs_key *a_first,
struct scoutfs_key *a_last,
struct scoutfs_key *b_first,
struct scoutfs_key *b_last)
{
if (scoutfs_key_cmp(a_last, b_first) < 0)
return -1;
if (scoutfs_key_cmp(a_first, b_last) > 0)
return 1;
return 0;
}
static inline int scoutfs_cmp_key_range(struct scoutfs_key *key,
struct scoutfs_key *first,
struct scoutfs_key *last)
{
return scoutfs_cmp_key_ranges(key, key, first, last);
}
static inline void scoutfs_set_key(struct scoutfs_key *key, u64 inode, u8 type,
u64 offset)
{
key->inode = cpu_to_le64(inode);
key->type = type;
key->offset = cpu_to_le64(offset);
}
static inline void scoutfs_set_max_key(struct scoutfs_key *key)
{
scoutfs_set_key(key, ~0ULL, ~0, ~0ULL);
}
/*
* This saturates at (~0,~0,~0) instead of wrapping. This will never be
* an issue for real item keys but parent item keys along the right
* spine of the tree have maximal key values that could wrap if
* incremented.
*/
static inline void scoutfs_inc_key(struct scoutfs_key *key)
{
if (key->inode == cpu_to_le64(~0ULL) &&
key->type == (u8)~0 &&
key->offset == cpu_to_le64(~0ULL))
return;
le64_add_cpu(&key->offset, 1);
if (!key->offset) {
if (++key->type == 0)
le64_add_cpu(&key->inode, 1);
}
}
static inline void scoutfs_dec_key(struct scoutfs_key *key)
{
le64_add_cpu(&key->offset, -1ULL);
if (key->offset == cpu_to_le64(~0ULL)) {
if (key->type-- == 0)
le64_add_cpu(&key->inode, -1ULL);
}
}
static inline struct scoutfs_key *scoutfs_max_key(struct scoutfs_key *a,
struct scoutfs_key *b)
{
return scoutfs_key_cmp(a, b) > 0 ? a : b;
}
static inline bool scoutfs_key_is_zero(struct scoutfs_key *key)
{
return key->inode == 0 && key->type == 0 && key->offset == 0;
}
static inline void scoutfs_key_set_zero(struct scoutfs_key *key)
{
key->inode = 0;
key->type = 0;
key->offset = 0;
}
#endif

View File

@@ -25,10 +25,8 @@
#include "dir.h"
#include "xattr.h"
#include "msg.h"
#include "block.h"
#include "counters.h"
#include "trans.h"
#include "buddy.h"
#include "kvec.h"
#include "scoutfs_trace.h"

View File

@@ -1,35 +0,0 @@
/*
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/crc32c.h>
#include <linux/string.h>
#include "name.h"
/*
* XXX This crc nonsense is a quick hack. We'll want something a
* lot stronger like siphash.
*/
u64 scoutfs_name_hash(const char *name, unsigned int len)
{
unsigned int half = (len + 1) / 2;
return crc32c(~0, name, half) |
((u64)crc32c(~0, name + len - half, half) << 32);
}
int scoutfs_names_equal(const char *name_a, int len_a,
const char *name_b, int len_b)
{
return (len_a == len_b) && !memcmp(name_a, name_b, len_a);
}

View File

@@ -1,8 +0,0 @@
#ifndef _SCOUTFS_NAME_H_
#define _SCOUTFS_NAME_H_
u64 scoutfs_name_hash(const char *data, unsigned int len);
int scoutfs_names_equal(const char *name_a, int len_a,
const char *name_b, int len_b);
#endif

View File

@@ -23,7 +23,6 @@
#include "inode.h"
#include "dir.h"
#include "msg.h"
#include "block.h"
#define CREATE_TRACE_POINTS
#include "scoutfs_trace.h"

View File

@@ -180,171 +180,6 @@ TRACE_EVENT(scoutfs_scan_orphans,
TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
);
TRACE_EVENT(scoutfs_buddy_alloc,
TP_PROTO(u64 blkno, int order, int region, int ret),
TP_ARGS(blkno, order, region, ret),
TP_STRUCT__entry(
__field(u64, blkno)
__field(int, order)
__field(int, region)
__field(int, ret)
),
TP_fast_assign(
__entry->blkno = blkno;
__entry->order = order;
__entry->region = region;
__entry->ret = ret;
),
TP_printk("blkno %llu order %d region %d ret %d",
__entry->blkno, __entry->order, __entry->region, __entry->ret)
);
TRACE_EVENT(scoutfs_buddy_free,
TP_PROTO(u64 blkno, int order, int region, int ret),
TP_ARGS(blkno, order, region, ret),
TP_STRUCT__entry(
__field(u64, blkno)
__field(int, order)
__field(int, region)
__field(int, ret)
),
TP_fast_assign(
__entry->blkno = blkno;
__entry->order = order;
__entry->region = region;
__entry->ret = ret;
),
TP_printk("blkno %llu order %d region %d ret %d",
__entry->blkno, __entry->order, __entry->region, __entry->ret)
);
DECLARE_EVENT_CLASS(scoutfs_btree_op,
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
TP_ARGS(sb, key, len),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, key_ino )
__field( u64, key_off )
__field( u8, key_type )
__field( int, val_len )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->key_ino = le64_to_cpu(key->inode);
__entry->key_off = le64_to_cpu(key->offset);
__entry->key_type = key->type;
__entry->val_len = len;
),
TP_printk("dev %d,%d key "TRACE_KEYF" size %d",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->key_ino, show_key_type(__entry->key_type),
__entry->key_off, __entry->val_len)
);
DEFINE_EVENT(scoutfs_btree_op, scoutfs_btree_lookup,
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
TP_ARGS(sb, key, len)
);
DEFINE_EVENT(scoutfs_btree_op, scoutfs_btree_insert,
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
TP_ARGS(sb, key, len)
);
DEFINE_EVENT(scoutfs_btree_op, scoutfs_btree_delete,
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
TP_ARGS(sb, key, len)
);
DEFINE_EVENT(scoutfs_btree_op, scoutfs_btree_dirty,
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
TP_ARGS(sb, key, len)
);
DEFINE_EVENT(scoutfs_btree_op, scoutfs_btree_update,
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int len),
TP_ARGS(sb, key, len)
);
DECLARE_EVENT_CLASS(scoutfs_btree_ranged_op,
TP_PROTO(struct super_block *sb, struct scoutfs_key *first,
struct scoutfs_key *last),
TP_ARGS(sb, first, last),
TP_STRUCT__entry(
__field( dev_t, dev )
__field( u64, first_ino )
__field( u64, first_off )
__field( u8, first_type )
__field( u64, last_ino )
__field( u64, last_off )
__field( u8, last_type )
),
TP_fast_assign(
__entry->dev = sb->s_dev;
__entry->first_ino = le64_to_cpu(first->inode);
__entry->first_off = le64_to_cpu(first->offset);
__entry->first_type = first->type;
__entry->last_ino = le64_to_cpu(last->inode);
__entry->last_off = le64_to_cpu(last->offset);
__entry->last_type = last->type;
),
TP_printk("dev %d,%d first key "TRACE_KEYF" last key "TRACE_KEYF,
MAJOR(__entry->dev), MINOR(__entry->dev), __entry->first_ino,
show_key_type(__entry->first_type), __entry->first_off,
__entry->last_ino, show_key_type(__entry->last_type),
__entry->last_off)
);
DEFINE_EVENT(scoutfs_btree_ranged_op, scoutfs_btree_hole,
TP_PROTO(struct super_block *sb, struct scoutfs_key *first,
struct scoutfs_key *last),
TP_ARGS(sb, first, last)
);
DEFINE_EVENT(scoutfs_btree_ranged_op, scoutfs_btree_next,
TP_PROTO(struct super_block *sb, struct scoutfs_key *first,
struct scoutfs_key *last),
TP_ARGS(sb, first, last)
);
DEFINE_EVENT(scoutfs_btree_ranged_op, scoutfs_btree_prev,
TP_PROTO(struct super_block *sb, struct scoutfs_key *first,
struct scoutfs_key *last),
TP_ARGS(sb, first, last)
);
DEFINE_EVENT(scoutfs_btree_ranged_op, scoutfs_btree_since,
TP_PROTO(struct super_block *sb, struct scoutfs_key *first,
struct scoutfs_key *last),
TP_ARGS(sb, first, last)
);
TRACE_EVENT(scoutfs_manifest_add,
TP_PROTO(struct super_block *sb, struct kvec *first,
struct kvec *last, u64 segno, u64 seq, u8 level),

View File

@@ -25,10 +25,8 @@
#include "dir.h"
#include "xattr.h"
#include "msg.h"
#include "block.h"
#include "counters.h"
#include "trans.h"
#include "buddy.h"
#include "item.h"
#include "manifest.h"
#include "seg.h"
@@ -96,8 +94,6 @@ void scoutfs_advance_dirty_super(struct super_block *sb)
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_super_block *super = &sbi->super;
sbi->stable_super = sbi->super;
le64_add_cpu(&super->hdr.blkno, 1);
if (le64_to_cpu(super->hdr.blkno) == (SCOUTFS_SUPER_BLKNO +
SCOUTFS_SUPER_NR))
@@ -182,8 +178,6 @@ static int read_supers(struct super_block *sb)
scoutfs_info(sb, "using super %u with seq %llu",
found, le64_to_cpu(sbi->super.hdr.seq));
sbi->stable_super = sbi->super;
return 0;
}
@@ -204,23 +198,12 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
return -ENOMEM;
spin_lock_init(&sbi->next_ino_lock);
spin_lock_init(&sbi->block_lock);
/* radix only inserted with NOFS _preload */
INIT_RADIX_TREE(&sbi->block_radix, GFP_ATOMIC);
init_waitqueue_head(&sbi->block_wq);
atomic_set(&sbi->block_writes, 0);
INIT_LIST_HEAD(&sbi->block_lru_list);
init_rwsem(&sbi->btree_rwsem);
atomic_set(&sbi->trans_holds, 0);
init_waitqueue_head(&sbi->trans_hold_wq);
spin_lock_init(&sbi->trans_write_lock);
INIT_WORK(&sbi->trans_write_work, scoutfs_trans_write_func);
init_waitqueue_head(&sbi->trans_write_wq);
sbi->block_shrinker.shrink = scoutfs_block_shrink;
sbi->block_shrinker.seeks = DEFAULT_SEEKS;
register_shrinker(&sbi->block_shrinker);
/* XXX can have multiple mounts of a device, need mount id */
sbi->kset = kset_create_and_add(sb->s_id, NULL, &scoutfs_kset->kobj);
if (!sbi->kset)
@@ -269,16 +252,12 @@ static void scoutfs_kill_sb(struct super_block *sb)
if (sbi) {
scoutfs_compact_destroy(sb);
scoutfs_shutdown_trans(sb);
scoutfs_buddy_destroy(sb);
if (sbi->block_shrinker.shrink == scoutfs_block_shrink)
unregister_shrinker(&sbi->block_shrinker);
scoutfs_data_destroy(sb);
scoutfs_item_destroy(sb);
scoutfs_alloc_destroy(sb);
scoutfs_manifest_destroy(sb);
scoutfs_treap_destroy(sb);
scoutfs_seg_destroy(sb);
scoutfs_block_destroy(sb);
scoutfs_destroy_counters(sb);
if (sbi->kset)
kset_unregister(sbi->kset);

View File

@@ -5,10 +5,8 @@
#include <linux/rbtree.h>
#include "format.h"
#include "buddy.h"
struct scoutfs_counters;
struct buddy_info;
struct item_cache;
struct manifest;
struct segment_cache;
@@ -20,20 +18,9 @@ struct scoutfs_sb_info {
struct super_block *sb;
struct scoutfs_super_block super;
struct scoutfs_super_block stable_super;
spinlock_t next_ino_lock;
spinlock_t block_lock;
struct radix_tree_root block_radix;
wait_queue_head_t block_wq;
atomic_t block_writes;
int block_write_err;
/* block cache lru */
struct shrinker block_shrinker;
struct list_head block_lru_list;
unsigned long block_lru_nr;
struct manifest *manifest;
struct item_cache *item_cache;
struct segment_cache *segment_cache;
@@ -42,10 +29,6 @@ struct scoutfs_sb_info {
struct compact_info *compact_info;
struct data_info *data_info;
struct buddy_info *buddy_info;
struct rw_semaphore btree_rwsem;
atomic_t trans_holds;
wait_queue_head_t trans_hold_wq;
struct task_struct *trans_task;
@@ -68,17 +51,6 @@ static inline struct scoutfs_sb_info *SCOUTFS_SB(struct super_block *sb)
return sb->s_fs_info;
}
/* The root of the metadata btree */
static inline struct scoutfs_btree_root *SCOUTFS_META(struct super_block *sb)
{
return &SCOUTFS_SB(sb)->super.btree_root;
}
static inline struct scoutfs_btree_root *SCOUTFS_STABLE_META(struct super_block *sb)
{
return &SCOUTFS_SB(sb)->stable_super.btree_root;
}
void scoutfs_advance_dirty_super(struct super_block *sb);
int scoutfs_write_dirty_super(struct super_block *sb);

View File

@@ -18,9 +18,7 @@
#include <linux/writeback.h>
#include "super.h"
#include "block.h"
#include "trans.h"
#include "buddy.h"
#include "data.h"
#include "bio.h"
#include "item.h"

View File

@@ -22,7 +22,6 @@
#include "kvec.h"
#include "item.h"
#include "trans.h"
#include "name.h"
#include "xattr.h"
/*