mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-10 05:37:25 +00:00
scoutfs: remove lsm code
Remove all the now unused code that deals with lsm: segment IO, the item cache, and the manifest. Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
@@ -9,12 +9,10 @@ CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include
|
||||
-include $(src)/Makefile.kernelcompat
|
||||
|
||||
scoutfs-y += \
|
||||
bio.o \
|
||||
balloc.o \
|
||||
block.o \
|
||||
btree.o \
|
||||
client.o \
|
||||
compact.o \
|
||||
counters.o \
|
||||
data.o \
|
||||
dir.o \
|
||||
@@ -24,17 +22,14 @@ scoutfs-y += \
|
||||
forest.o \
|
||||
inode.o \
|
||||
ioctl.o \
|
||||
item.o \
|
||||
lock.o \
|
||||
lock_server.o \
|
||||
manifest.o \
|
||||
msg.o \
|
||||
net.o \
|
||||
options.o \
|
||||
per_task.o \
|
||||
quorum.o \
|
||||
scoutfs_trace.o \
|
||||
seg.o \
|
||||
server.o \
|
||||
spbm.o \
|
||||
super.o \
|
||||
|
||||
223
kmod/src/bio.c
223
kmod/src/bio.c
@@ -1,223 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/bio.h>
|
||||
#include <linux/blkdev.h>
|
||||
#include <linux/slab.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "format.h"
|
||||
#include "bio.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
struct bio_end_io_args {
|
||||
struct super_block *sb;
|
||||
atomic_t in_flight;
|
||||
int err;
|
||||
scoutfs_bio_end_io_t end_io;
|
||||
void *data;
|
||||
};
|
||||
|
||||
static void dec_end_io(struct bio_end_io_args *args, int err)
|
||||
{
|
||||
if (err && !args->err)
|
||||
args->err = err;
|
||||
|
||||
trace_scoutfs_dec_end_io(args->sb, args, atomic_read(&args->in_flight),
|
||||
err);
|
||||
|
||||
if (atomic_dec_and_test(&args->in_flight)) {
|
||||
args->end_io(args->sb, args->data, args->err);
|
||||
kfree(args);
|
||||
}
|
||||
}
|
||||
|
||||
static void bio_end_io(struct bio *bio, int err)
|
||||
{
|
||||
struct bio_end_io_args *args = bio->bi_private;
|
||||
|
||||
trace_scoutfs_bio_end_io(args->sb, bio, bio->bi_size, err);
|
||||
|
||||
dec_end_io(args, err);
|
||||
bio_put(bio);
|
||||
}
|
||||
|
||||
/*
|
||||
* Read or write the given number of 4k blocks from the front of the
|
||||
* pages provided by the caller. We translate the block count into a
|
||||
* page count and fill bios a page at a time.
|
||||
*
|
||||
* The caller is responsible for ensuring that the pages aren't freed
|
||||
* while bios are in flight.
|
||||
*
|
||||
* The end_io function is always called once with the error result of
|
||||
* the IO. It can be called before _submit returns.
|
||||
*/
|
||||
void scoutfs_bio_submit(struct super_block *sb, int rw, struct page **pages,
|
||||
u64 blkno, unsigned int nr_blocks,
|
||||
scoutfs_bio_end_io_t end_io, void *data)
|
||||
{
|
||||
unsigned int nr_pages = DIV_ROUND_UP(nr_blocks,
|
||||
SCOUTFS_BLOCKS_PER_PAGE);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct bio_end_io_args *args;
|
||||
struct blk_plug plug;
|
||||
unsigned int bytes;
|
||||
struct page *page;
|
||||
struct bio *bio = NULL;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
if (super->total_blocks &&
|
||||
WARN_ON_ONCE(blkno >= le64_to_cpu(super->total_blocks))) {
|
||||
end_io(sb, data, -EIO);
|
||||
return;
|
||||
}
|
||||
|
||||
args = kmalloc(sizeof(struct bio_end_io_args), GFP_NOFS);
|
||||
if (!args) {
|
||||
end_io(sb, data, -ENOMEM);
|
||||
return;
|
||||
}
|
||||
|
||||
args->sb = sb;
|
||||
atomic_set(&args->in_flight, 1);
|
||||
args->err = 0;
|
||||
args->end_io = end_io;
|
||||
args->data = data;
|
||||
|
||||
blk_start_plug(&plug);
|
||||
|
||||
for (i = 0; i < nr_pages; i++) {
|
||||
page = pages[i];
|
||||
|
||||
if (!bio) {
|
||||
bio = bio_alloc(GFP_NOFS, nr_pages - i);
|
||||
if (!bio)
|
||||
bio = bio_alloc(GFP_NOFS, 1);
|
||||
if (!bio) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
bio->bi_sector = blkno << (SCOUTFS_BLOCK_SHIFT - 9);
|
||||
bio->bi_bdev = sb->s_bdev;
|
||||
bio->bi_end_io = bio_end_io;
|
||||
bio->bi_private = args;
|
||||
}
|
||||
|
||||
bytes = min_t(int, nr_blocks << SCOUTFS_BLOCK_SHIFT, PAGE_SIZE);
|
||||
|
||||
if (bio_add_page(bio, page, bytes, 0) != bytes) {
|
||||
/* submit the full bio and retry this page */
|
||||
atomic_inc(&args->in_flight);
|
||||
trace_scoutfs_bio_submit(sb, bio, args,
|
||||
atomic_read(&args->in_flight));
|
||||
submit_bio(rw, bio);
|
||||
bio = NULL;
|
||||
i--;
|
||||
continue;
|
||||
}
|
||||
|
||||
blkno += SCOUTFS_BLOCKS_PER_PAGE;
|
||||
nr_blocks -= SCOUTFS_BLOCKS_PER_PAGE;
|
||||
}
|
||||
|
||||
if (bio) {
|
||||
atomic_inc(&args->in_flight);
|
||||
trace_scoutfs_bio_submit_partial(sb, bio, args,
|
||||
atomic_read(&args->in_flight));
|
||||
submit_bio(rw, bio);
|
||||
}
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
dec_end_io(args, ret);
|
||||
}
|
||||
|
||||
void scoutfs_bio_init_comp(struct scoutfs_bio_completion *comp)
|
||||
{
|
||||
/* this initial pending is dropped by wait */
|
||||
atomic_set(&comp->pending, 1);
|
||||
init_completion(&comp->comp);
|
||||
comp->err = 0;
|
||||
trace_scoutfs_bio_init_comp(comp);
|
||||
}
|
||||
|
||||
static void comp_end_io(struct super_block *sb, void *data, int err)
|
||||
{
|
||||
struct scoutfs_bio_completion *comp = data;
|
||||
|
||||
if (err && !comp->err)
|
||||
comp->err = err;
|
||||
|
||||
trace_comp_end_io(sb, comp);
|
||||
|
||||
if (atomic_dec_and_test(&comp->pending))
|
||||
complete(&comp->comp);
|
||||
}
|
||||
|
||||
void scoutfs_bio_submit_comp(struct super_block *sb, int rw,
|
||||
struct page **pages, u64 blkno,
|
||||
unsigned int nr_blocks,
|
||||
struct scoutfs_bio_completion *comp)
|
||||
{
|
||||
atomic_inc(&comp->pending);
|
||||
trace_scoutfs_bio_submit_comp(sb, comp);
|
||||
|
||||
scoutfs_bio_submit(sb, rw, pages, blkno, nr_blocks, comp_end_io, comp);
|
||||
}
|
||||
|
||||
int scoutfs_bio_wait_comp(struct super_block *sb,
|
||||
struct scoutfs_bio_completion *comp)
|
||||
{
|
||||
comp_end_io(sb, comp, 0);
|
||||
trace_scoutfs_bio_wait_comp(sb, comp);
|
||||
wait_for_completion(&comp->comp);
|
||||
return comp->err;
|
||||
}
|
||||
|
||||
/*
|
||||
* A synchronous read of the given blocks.
|
||||
*
|
||||
* XXX we could make this interruptible.
|
||||
*/
|
||||
int scoutfs_bio_read(struct super_block *sb, struct page **pages,
|
||||
u64 blkno, unsigned int nr_blocks)
|
||||
{
|
||||
struct scoutfs_bio_completion comp;
|
||||
|
||||
scoutfs_bio_init_comp(&comp);
|
||||
scoutfs_bio_submit_comp(sb, READ, pages, blkno, nr_blocks, &comp);
|
||||
return scoutfs_bio_wait_comp(sb, &comp);
|
||||
}
|
||||
|
||||
int scoutfs_bio_write(struct super_block *sb, struct page **pages,
|
||||
u64 blkno, unsigned int nr_blocks)
|
||||
{
|
||||
struct scoutfs_bio_completion comp;
|
||||
|
||||
scoutfs_bio_init_comp(&comp);
|
||||
scoutfs_bio_submit_comp(sb, WRITE, pages, blkno, nr_blocks, &comp);
|
||||
|
||||
return scoutfs_bio_wait_comp(sb, &comp);
|
||||
}
|
||||
|
||||
/* return pointer to the blk 4k block offset amongst the pages */
|
||||
void *scoutfs_page_block_address(struct page **pages, unsigned int blk)
|
||||
{
|
||||
unsigned int i = blk / SCOUTFS_BLOCKS_PER_PAGE;
|
||||
unsigned int off = (blk % SCOUTFS_BLOCKS_PER_PAGE) <<
|
||||
SCOUTFS_BLOCK_SHIFT;
|
||||
|
||||
return page_address(pages[i]) + off;
|
||||
}
|
||||
@@ -1,43 +0,0 @@
|
||||
#ifndef _SCOUTFS_BIO_H_
|
||||
#define _SCOUTFS_BIO_H_
|
||||
|
||||
/*
|
||||
* Our little block IO wrapper is just a convenience wrapper that takes
|
||||
* our block size units and handles tracks multiple bios per larger io.
|
||||
*
|
||||
* If bios could hold an unlimited number of pages instead of
|
||||
* BIO_MAX_PAGES then this would just use a single bio directly.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Track aggregate IO completion for multiple multi-bio submissions.
|
||||
*/
|
||||
struct scoutfs_bio_completion {
|
||||
atomic_t pending;
|
||||
struct completion comp;
|
||||
long err;
|
||||
};
|
||||
|
||||
typedef void (*scoutfs_bio_end_io_t)(struct super_block *sb, void *data,
|
||||
int err);
|
||||
|
||||
void scoutfs_bio_submit(struct super_block *sb, int rw, struct page **pages,
|
||||
u64 blkno, unsigned int nr_blocks,
|
||||
scoutfs_bio_end_io_t end_io, void *data);
|
||||
|
||||
void scoutfs_bio_init_comp(struct scoutfs_bio_completion *comp);
|
||||
void scoutfs_bio_submit_comp(struct super_block *sb, int rw,
|
||||
struct page **pages, u64 blkno,
|
||||
unsigned int nr_blocks,
|
||||
struct scoutfs_bio_completion *comp);
|
||||
int scoutfs_bio_wait_comp(struct super_block *sb,
|
||||
struct scoutfs_bio_completion *comp);
|
||||
|
||||
int scoutfs_bio_read(struct super_block *sb, struct page **pages,
|
||||
u64 blkno, unsigned int nr_blocks);
|
||||
int scoutfs_bio_write(struct super_block *sb, struct page **pages,
|
||||
u64 blkno, unsigned int nr_blocks);
|
||||
|
||||
void *scoutfs_page_block_address(struct page **pages, unsigned int blk);
|
||||
|
||||
#endif
|
||||
@@ -25,9 +25,6 @@
|
||||
#include "counters.h"
|
||||
#include "inode.h"
|
||||
#include "btree.h"
|
||||
#include "manifest.h"
|
||||
#include "seg.h"
|
||||
#include "compact.h"
|
||||
#include "scoutfs_trace.h"
|
||||
#include "msg.h"
|
||||
#include "client.h"
|
||||
@@ -133,40 +130,6 @@ int scoutfs_client_free_extents(struct super_block *sb,
|
||||
nexl, bytes, NULL, 0);
|
||||
}
|
||||
|
||||
int scoutfs_client_alloc_segno(struct super_block *sb, u64 *segno)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
__le64 lesegno;
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_ALLOC_SEGNO,
|
||||
NULL, 0, &lesegno, sizeof(lesegno));
|
||||
if (ret == 0) {
|
||||
if (lesegno == 0)
|
||||
ret = -ENOSPC;
|
||||
else
|
||||
*segno = le64_to_cpu(lesegno);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_client_record_segment(struct super_block *sb,
|
||||
struct scoutfs_segment *seg, u8 level)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
struct scoutfs_net_manifest_entry net_ment;
|
||||
struct scoutfs_manifest_entry ment;
|
||||
|
||||
scoutfs_seg_init_ment(&ment, level, seg);
|
||||
scoutfs_init_ment_to_net(&net_ment, &ment);
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_RECORD_SEGMENT,
|
||||
&net_ment, sizeof(net_ment), NULL, 0);
|
||||
}
|
||||
|
||||
int scoutfs_client_get_log_trees(struct super_block *sb,
|
||||
struct scoutfs_log_trees *lt)
|
||||
{
|
||||
@@ -219,17 +182,6 @@ int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq)
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_client_get_manifest_root(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_MANIFEST_ROOT,
|
||||
NULL, 0, root,
|
||||
sizeof(struct scoutfs_btree_root));
|
||||
}
|
||||
|
||||
int scoutfs_client_statfs(struct super_block *sb,
|
||||
struct scoutfs_net_statfs *nstatfs)
|
||||
{
|
||||
@@ -488,61 +440,7 @@ out:
|
||||
msecs_to_jiffies(CLIENT_CONNECT_DELAY_MS));
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a compaction in the client as requested by the server. The
|
||||
* server has protected the input segments and allocated the output
|
||||
* segnos for us. This executes in work queued by the client's net
|
||||
* connection. It only reads and write segments. The server will
|
||||
* update the manifest and allocators while processing the response. An
|
||||
* error response includes the compaction id so that the server can
|
||||
* clean it up.
|
||||
*
|
||||
* If we get duplicate requests across a reconnected socket we can have
|
||||
* two workers performing the same compaction simultaneously. This
|
||||
* isn't particularly efficient but it's rare and won't corrupt the
|
||||
* output. Our response can be lost if the socket is shutdown while
|
||||
* it's in flight, the server deals with this.
|
||||
*/
|
||||
static int client_compact(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct scoutfs_net_compact_response *resp = NULL;
|
||||
struct scoutfs_net_compact_request *req;
|
||||
int ret;
|
||||
|
||||
if (arg_len != sizeof(struct scoutfs_net_compact_request)) {
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
req = arg;
|
||||
|
||||
trace_scoutfs_client_compact_start(sb, le64_to_cpu(req->id),
|
||||
req->last_level, req->flags);
|
||||
|
||||
resp = kzalloc(sizeof(struct scoutfs_net_compact_response), GFP_NOFS);
|
||||
if (!resp) {
|
||||
ret = -ENOMEM;
|
||||
} else {
|
||||
resp->id = req->id;
|
||||
ret = scoutfs_compact(sb, req, resp);
|
||||
}
|
||||
|
||||
trace_scoutfs_client_compact_stop(sb, le64_to_cpu(req->id), ret);
|
||||
|
||||
if (ret < 0)
|
||||
ret = scoutfs_net_response(sb, conn, cmd, id, ret,
|
||||
&req->id, sizeof(req->id));
|
||||
else
|
||||
ret = scoutfs_net_response(sb, conn, cmd, id, 0,
|
||||
resp, sizeof(*resp));
|
||||
kfree(resp);
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static scoutfs_net_request_t client_req_funcs[] = {
|
||||
[SCOUTFS_NET_CMD_COMPACT] = client_compact,
|
||||
[SCOUTFS_NET_CMD_LOCK] = client_lock,
|
||||
[SCOUTFS_NET_CMD_LOCK_RECOVER] = client_lock_recover,
|
||||
};
|
||||
|
||||
@@ -1,17 +1,12 @@
|
||||
#ifndef _SCOUTFS_CLIENT_H_
|
||||
#define _SCOUTFS_CLIENT_H_
|
||||
|
||||
struct scoutfs_segment;
|
||||
|
||||
int scoutfs_client_alloc_inodes(struct super_block *sb, u64 count,
|
||||
u64 *ino, u64 *nr);
|
||||
int scoutfs_client_alloc_extent(struct super_block *sb, u64 blocks, u64 *start,
|
||||
u64 *len);
|
||||
int scoutfs_client_free_extents(struct super_block *sb,
|
||||
struct scoutfs_net_extent_list *nexl);
|
||||
int scoutfs_client_alloc_segno(struct super_block *sb, u64 *segno);
|
||||
int scoutfs_client_record_segment(struct super_block *sb,
|
||||
struct scoutfs_segment *seg, u8 level);
|
||||
int scoutfs_client_get_log_trees(struct super_block *sb,
|
||||
struct scoutfs_log_trees *lt);
|
||||
int scoutfs_client_commit_log_trees(struct super_block *sb,
|
||||
@@ -19,8 +14,6 @@ int scoutfs_client_commit_log_trees(struct super_block *sb,
|
||||
u64 *scoutfs_client_bulk_alloc(struct super_block *sb);
|
||||
int scoutfs_client_advance_seq(struct super_block *sb, u64 *seq);
|
||||
int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq);
|
||||
int scoutfs_client_get_manifest_root(struct super_block *sb,
|
||||
struct scoutfs_btree_root *root);
|
||||
int scoutfs_client_statfs(struct super_block *sb,
|
||||
struct scoutfs_net_statfs *nstatfs);
|
||||
int scoutfs_client_lock_request(struct super_block *sb,
|
||||
|
||||
@@ -1,683 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2017 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sort.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "format.h"
|
||||
#include "seg.h"
|
||||
#include "bio.h"
|
||||
#include "cmp.h"
|
||||
#include "compact.h"
|
||||
#include "manifest.h"
|
||||
#include "counters.h"
|
||||
#include "server.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
* Compaction is what maintains the exponentially increasing number of
|
||||
* segments in each level of the lsm tree and is what merges duplicate
|
||||
* and deletion keys.
|
||||
*
|
||||
* The compaction operation itself always involves a single "upper"
|
||||
* segment at a given level and a limited number of "lower" segments at
|
||||
* the next higher level whose key range intersects with the upper
|
||||
* segment.
|
||||
*
|
||||
* Compaction proceeds by iterating over the items in the upper segment
|
||||
* and items in each of the lower segments in sort order. The items
|
||||
* from the two input segments are copied into new output segments in
|
||||
* sorted order. Space is reclaimed as duplicate or deletion items are
|
||||
* removed and fewer segments are written than were read.
|
||||
*/
|
||||
|
||||
struct compact_seg {
|
||||
struct list_head entry;
|
||||
|
||||
u64 segno;
|
||||
u64 seq;
|
||||
u8 level;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
struct scoutfs_segment *seg;
|
||||
int off;
|
||||
bool part_of_move;
|
||||
};
|
||||
|
||||
struct compact_cursor {
|
||||
struct list_head csegs;
|
||||
|
||||
/* buffer holds allocations and our returning them */
|
||||
u64 segnos[SCOUTFS_COMPACTION_MAX_OUTPUT];
|
||||
unsigned int nr_segnos;
|
||||
|
||||
u8 lower_level;
|
||||
u8 last_level;
|
||||
|
||||
struct compact_seg *upper;
|
||||
struct compact_seg *lower;
|
||||
|
||||
bool sticky;
|
||||
struct compact_seg *last_lower;
|
||||
|
||||
__le32 *links[SCOUTFS_MAX_SKIP_LINKS];
|
||||
};
|
||||
|
||||
static void free_cseg(struct super_block *sb, struct compact_seg *cseg)
|
||||
{
|
||||
WARN_ON_ONCE(!list_empty(&cseg->entry));
|
||||
|
||||
scoutfs_seg_put(cseg->seg);
|
||||
kfree(cseg);
|
||||
}
|
||||
|
||||
static struct compact_seg *alloc_cseg(struct super_block *sb,
|
||||
struct scoutfs_key *first,
|
||||
struct scoutfs_key *last)
|
||||
{
|
||||
struct compact_seg *cseg;
|
||||
|
||||
cseg = kzalloc(sizeof(struct compact_seg), GFP_NOFS);
|
||||
if (cseg) {
|
||||
INIT_LIST_HEAD(&cseg->entry);
|
||||
cseg->first = *first;
|
||||
cseg->last = *last;
|
||||
}
|
||||
|
||||
return cseg;
|
||||
}
|
||||
|
||||
static void free_cseg_list(struct super_block *sb, struct list_head *list)
|
||||
{
|
||||
struct compact_seg *cseg;
|
||||
struct compact_seg *tmp;
|
||||
|
||||
list_for_each_entry_safe(cseg, tmp, list, entry) {
|
||||
list_del_init(&cseg->entry);
|
||||
free_cseg(sb, cseg);
|
||||
}
|
||||
}
|
||||
|
||||
static int read_segment(struct super_block *sb, struct compact_seg *cseg)
|
||||
{
|
||||
struct scoutfs_segment *seg;
|
||||
int ret;
|
||||
|
||||
if (cseg == NULL || cseg->seg)
|
||||
return 0;
|
||||
|
||||
seg = scoutfs_seg_submit_read(sb, cseg->segno);
|
||||
if (IS_ERR(seg)) {
|
||||
ret = PTR_ERR(seg);
|
||||
} else {
|
||||
cseg->seg = seg;
|
||||
scoutfs_inc_counter(sb, compact_segment_read);
|
||||
ret = scoutfs_seg_wait(sb, cseg->seg, cseg->segno, cseg->seq);
|
||||
}
|
||||
|
||||
/* XXX verify read segment metadata */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static struct compact_seg *next_spos(struct compact_cursor *curs,
|
||||
struct compact_seg *cseg)
|
||||
{
|
||||
if (cseg->entry.next == &curs->csegs)
|
||||
return NULL;
|
||||
|
||||
return list_next_entry(cseg, entry);
|
||||
}
|
||||
|
||||
/*
|
||||
* Point the caller's key and value kvecs at the next item that should
|
||||
* be copied from the upper or lower segments. We use the item that has
|
||||
* the lowest key or the upper if they're the same. We advance the
|
||||
* cursor past the item that is returned.
|
||||
*
|
||||
* XXX this will get fancier as we get range deletion items and
|
||||
* incremental update items.
|
||||
*/
|
||||
static int next_item(struct super_block *sb, struct compact_cursor *curs,
|
||||
struct scoutfs_key *item_key, struct kvec *item_val,
|
||||
u8 *item_flags)
|
||||
{
|
||||
struct compact_seg *upper = curs->upper;
|
||||
struct compact_seg *lower = curs->lower;
|
||||
struct scoutfs_key lower_key;
|
||||
struct kvec lower_val;
|
||||
u8 lower_flags;
|
||||
int cmp;
|
||||
int ret;
|
||||
|
||||
retry:
|
||||
if (upper) {
|
||||
ret = scoutfs_seg_get_item(upper->seg, upper->off,
|
||||
item_key, item_val, item_flags);
|
||||
if (ret < 0)
|
||||
upper = NULL;
|
||||
}
|
||||
|
||||
while (lower) {
|
||||
ret = read_segment(sb, lower);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_seg_get_item(lower->seg, lower->off,
|
||||
&lower_key, &lower_val,
|
||||
&lower_flags);
|
||||
if (ret == 0)
|
||||
break;
|
||||
lower = next_spos(curs, lower);
|
||||
}
|
||||
|
||||
/* we're done if all are empty */
|
||||
if (!upper && !lower) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* < 0: return upper, advance upper
|
||||
* == 0: return upper, advance both
|
||||
* > 0: return lower, advance lower
|
||||
*/
|
||||
if (upper && lower)
|
||||
cmp = scoutfs_key_compare(item_key, &lower_key);
|
||||
else if (upper)
|
||||
cmp = -1;
|
||||
else
|
||||
cmp = 1;
|
||||
|
||||
if (cmp > 0) {
|
||||
*item_key = lower_key;
|
||||
*item_val = lower_val;
|
||||
*item_flags = lower_flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* If we have a sticky compaction then we can't mix items from
|
||||
* the upper level past the last lower key into the lower level.
|
||||
* The caller will notice when they're emptying the final upper
|
||||
* level in a sticky merge and leave it at the upper level.
|
||||
*/
|
||||
if (curs->sticky && curs->lower &&
|
||||
(!lower || lower == curs->last_lower) &&
|
||||
scoutfs_key_compare(item_key, &curs->last_lower->last) > 0) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (cmp <= 0)
|
||||
upper->off = scoutfs_seg_next_off(upper->seg, upper->off);
|
||||
if (cmp >= 0)
|
||||
lower->off = scoutfs_seg_next_off(lower->seg, lower->off);
|
||||
|
||||
/*
|
||||
* Deletion items make their way down all the levels, replacing
|
||||
* all the duplicate items that they find. When we're
|
||||
* compacting to the last level we can remove them by retrying
|
||||
* the search after we've advanced past them.
|
||||
*
|
||||
* If we're filling the remaining items in a sticky merge into
|
||||
* the upper level then we have to preserve the deletion items.
|
||||
*/
|
||||
if ((curs->lower_level == curs->last_level) &&
|
||||
(!curs->sticky || lower) &&
|
||||
((*item_flags) & SCOUTFS_ITEM_FLAG_DELETION))
|
||||
goto retry;
|
||||
|
||||
ret = 1;
|
||||
out:
|
||||
curs->upper = upper;
|
||||
curs->lower = lower;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int compact_segments(struct super_block *sb,
|
||||
struct compact_cursor *curs,
|
||||
struct scoutfs_bio_completion *comp,
|
||||
struct list_head *results)
|
||||
{
|
||||
struct scoutfs_key item_key;
|
||||
struct scoutfs_segment *seg;
|
||||
struct compact_seg *cseg;
|
||||
struct compact_seg *upper;
|
||||
struct compact_seg *lower;
|
||||
unsigned next_segno = 0;
|
||||
bool append_filled = false;
|
||||
struct kvec item_val;
|
||||
int ret = 0;
|
||||
u8 flags;
|
||||
|
||||
scoutfs_inc_counter(sb, compact_operations);
|
||||
if (curs->sticky)
|
||||
scoutfs_inc_counter(sb, compact_sticky_upper);
|
||||
|
||||
while (curs->upper || curs->lower) {
|
||||
|
||||
upper = curs->upper;
|
||||
lower = curs->lower;
|
||||
|
||||
/*
|
||||
* If we're at the start of the upper segment and
|
||||
* there's no lower segment then we might as well just
|
||||
* move the segment in the manifest. We can't do this
|
||||
* if we're moving to the last level because we might
|
||||
* need to drop any deletion items.
|
||||
*
|
||||
* XXX We should have metadata in the manifest to tell
|
||||
* us that there's no deletion items in the segment.
|
||||
*/
|
||||
if (upper && upper->off == 0 && !lower && !curs->sticky &&
|
||||
((upper->level + 1) < curs->last_level)) {
|
||||
|
||||
/*
|
||||
* XXX blah! these csegs are getting
|
||||
* ridiculous. We should have a robust manifest
|
||||
* entry iterator that reading and compacting
|
||||
* can use.
|
||||
*/
|
||||
cseg = alloc_cseg(sb, &upper->first, &upper->last);
|
||||
if (!cseg) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
cseg->segno = upper->segno;
|
||||
cseg->seq = upper->seq;
|
||||
cseg->level = upper->level + 1;
|
||||
cseg->seg = upper->seg;
|
||||
if (cseg->seg)
|
||||
scoutfs_seg_get(cseg->seg);
|
||||
list_add_tail(&cseg->entry, results);
|
||||
|
||||
/* don't mess with its segno */
|
||||
upper->part_of_move = true;
|
||||
cseg->part_of_move = true;
|
||||
|
||||
curs->upper = NULL;
|
||||
|
||||
scoutfs_inc_counter(sb, compact_segment_moved);
|
||||
break;
|
||||
}
|
||||
|
||||
/* we're going to need its next key */
|
||||
ret = read_segment(sb, upper);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
/*
|
||||
* XXX we could intelligently skip reading and merging
|
||||
* lower segments here. The lower segment won't change
|
||||
* if:
|
||||
* - the lower segment is entirely before the upper
|
||||
* - the lower segment is full
|
||||
*
|
||||
* We don't have the metadata to determine that it's
|
||||
* full today so we want to read lower segments that don't
|
||||
* overlap so that we can merge partial lowers with
|
||||
* its neighbours.
|
||||
*/
|
||||
|
||||
ret = read_segment(sb, lower);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
if (!append_filled)
|
||||
ret = next_item(sb, curs, &item_key, &item_val, &flags);
|
||||
else
|
||||
ret = 1;
|
||||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
/* no cseg keys, manifest update uses seg item keys */
|
||||
cseg = kzalloc(sizeof(struct compact_seg), GFP_NOFS);
|
||||
if (!cseg) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
/* didn't get enough segnos */
|
||||
if (next_segno >= curs->nr_segnos) {
|
||||
ret = -ENOSPC;
|
||||
break;
|
||||
}
|
||||
|
||||
cseg->segno = curs->segnos[next_segno];
|
||||
curs->segnos[next_segno] = 0;
|
||||
next_segno++;
|
||||
|
||||
/*
|
||||
* Compaction can free all the remaining items resulting
|
||||
* in an empty output segment. We just free it in that
|
||||
* case.
|
||||
*/
|
||||
ret = scoutfs_seg_alloc(sb, cseg->segno, &seg);
|
||||
if (ret < 0) {
|
||||
next_segno--;
|
||||
curs->segnos[next_segno] = cseg->segno;
|
||||
kfree(cseg);
|
||||
scoutfs_seg_put(seg);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* The remaining upper items in a sticky merge have to
|
||||
* be written into the upper level.
|
||||
*/
|
||||
if (curs->sticky && !lower) {
|
||||
cseg->level = curs->lower_level - 1;
|
||||
scoutfs_inc_counter(sb, compact_sticky_written);
|
||||
} else {
|
||||
cseg->level = curs->lower_level;
|
||||
}
|
||||
|
||||
/* csegs will be claned up once they're on the list */
|
||||
cseg->seg = seg;
|
||||
list_add_tail(&cseg->entry, results);
|
||||
|
||||
for (;;) {
|
||||
if (!scoutfs_seg_append_item(sb, seg, &item_key,
|
||||
&item_val, flags,
|
||||
curs->links)) {
|
||||
append_filled = true;
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
ret = next_item(sb, curs, &item_key, &item_val, &flags);
|
||||
if (ret <= 0) {
|
||||
append_filled = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ret < 0)
|
||||
break;
|
||||
|
||||
/* start a complete segment write now, we'll wait later */
|
||||
ret = scoutfs_seg_submit_write(sb, seg, comp);
|
||||
if (ret)
|
||||
break;
|
||||
|
||||
scoutfs_inc_counter(sb, compact_segment_writes);
|
||||
scoutfs_add_counter(sb, compact_segment_write_bytes,
|
||||
scoutfs_seg_total_bytes(seg));
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* We want all the non-zero segnos sorted at the front of the array
|
||||
* and the empty segnos all packed at the end. This is easily done by
|
||||
* subtracting one from both then comparing as usual. All relations hold
|
||||
* except that 0 becomes the greatest instead of the least.
|
||||
*/
|
||||
static int sort_cmp_segnos(const void *A, const void *B)
|
||||
{
|
||||
const u64 a = *(const u64 *)A - 1;
|
||||
const u64 b = *(const u64 *)B - 1;
|
||||
|
||||
return a < b ? -1 : a > b ? 1 : 0;
|
||||
}
|
||||
|
||||
static void sort_swap_segnos(void *A, void *B, int size)
|
||||
{
|
||||
u64 *a = A;
|
||||
u64 *b = B;
|
||||
|
||||
swap(*a, *b);
|
||||
}
|
||||
|
||||
static int verify_request(struct super_block *sb,
|
||||
struct scoutfs_net_compact_request *req)
|
||||
{
|
||||
int ret = -EINVAL;
|
||||
int nr_segnos;
|
||||
int nr_ents;
|
||||
int i;
|
||||
|
||||
/* no unknown flags */
|
||||
if (req->flags & ~SCOUTFS_NET_COMPACT_FLAG_STICKY)
|
||||
goto out;
|
||||
|
||||
/* find the number of segments and entries */
|
||||
for (i = 0; i < ARRAY_SIZE(req->segnos); i++) {
|
||||
if (req->segnos[i] == 0)
|
||||
break;
|
||||
}
|
||||
nr_segnos = i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(req->ents); i++) {
|
||||
if (req->ents[i].segno == 0)
|
||||
break;
|
||||
}
|
||||
nr_ents = i;
|
||||
|
||||
/* must have at least an upper */
|
||||
if (nr_ents == 0)
|
||||
goto out;
|
||||
|
||||
sort(req->segnos, nr_segnos, sizeof(req->segnos[i]),
|
||||
sort_cmp_segnos, sort_swap_segnos);
|
||||
|
||||
/* segnos must be unique */
|
||||
for (i = 1; i < nr_segnos; i++) {
|
||||
if (req->segnos[i] == req->segnos[i - 1])
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* if we have a lower it must be under upper */
|
||||
if (nr_ents > 1 && (req->ents[1].level != req->ents[0].level + 1))
|
||||
goto out;
|
||||
|
||||
/* make sure lower ents are on the same level */
|
||||
for (i = 2; i < nr_ents; i++) {
|
||||
if (req->ents[i].level != req->ents[i - 1].level)
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (i = 1; i < nr_ents; i++) {
|
||||
/* lowers must overlap with upper */
|
||||
if (scoutfs_key_compare_ranges(&req->ents[0].first,
|
||||
&req->ents[0].last,
|
||||
&req->ents[i].first,
|
||||
&req->ents[i].last) != 0)
|
||||
goto out;
|
||||
|
||||
/* lowers must be on the level below upper */
|
||||
if (req->ents[i].level != req->ents[0].level + 1)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* last level must include lowest level */
|
||||
if (req->last_level < req->ents[nr_ents - 1].level)
|
||||
goto out;
|
||||
|
||||
for (i = 2; i < nr_ents; i++) {
|
||||
/* lowers must be sorted by first key */
|
||||
if (scoutfs_key_compare(&req->ents[i].first,
|
||||
&req->ents[i - 1].first) <= 0)
|
||||
goto out;
|
||||
|
||||
/* lowers must not overlap with each other */
|
||||
if (scoutfs_key_compare_ranges(&req->ents[i].first,
|
||||
&req->ents[i].last,
|
||||
&req->ents[i - 1].first,
|
||||
&req->ents[i - 1].last) == 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (WARN_ON_ONCE(ret < 0)) {
|
||||
scoutfs_inc_counter(sb, compact_invalid_request);
|
||||
printk("id %llu last_level %u flags 0x%x\n",
|
||||
le64_to_cpu(req->id), req->last_level, req->flags);
|
||||
printk("segnos: ");
|
||||
for (i = 0; i < ARRAY_SIZE(req->segnos); i++)
|
||||
printk("%llu ", le64_to_cpu(req->segnos[i]));
|
||||
printk("\n");
|
||||
printk("entries: ");
|
||||
for (i = 0; i < ARRAY_SIZE(req->ents); i++) {
|
||||
printk(" [%u] segno %llu seq %llu level %u first "SK_FMT" last "SK_FMT"\n",
|
||||
i, le64_to_cpu(req->ents[i].segno),
|
||||
le64_to_cpu(req->ents[i].seq),
|
||||
req->ents[i].level,
|
||||
SK_ARG(&req->ents[i].first),
|
||||
SK_ARG(&req->ents[i].last));
|
||||
}
|
||||
printk("\n");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Translate the compaction request into our native structs that we use
|
||||
* to perform the compaction. The caller has verified that the request
|
||||
* satisfies our constraints.
|
||||
*
|
||||
* If we return an error the caller will clean up a partially prepared
|
||||
* cursor.
|
||||
*/
|
||||
static int prepare_curs(struct super_block *sb, struct compact_cursor *curs,
|
||||
struct scoutfs_net_compact_request *req)
|
||||
{
|
||||
struct scoutfs_manifest_entry ment;
|
||||
struct compact_seg *cseg;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
curs->lower_level = req->ents[0].level + 1;
|
||||
curs->last_level = req->last_level;
|
||||
curs->sticky = !!(req->flags & SCOUTFS_NET_COMPACT_FLAG_STICKY);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(req->segnos); i++) {
|
||||
if (req->segnos[i] == 0)
|
||||
break;
|
||||
curs->segnos[i] = le64_to_cpu(req->segnos[i]);
|
||||
}
|
||||
curs->nr_segnos = i;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(req->ents); i++) {
|
||||
if (req->ents[i].segno == 0)
|
||||
break;
|
||||
|
||||
scoutfs_init_ment_from_net(&ment, &req->ents[i]);
|
||||
|
||||
cseg = alloc_cseg(sb, &ment.first, &ment.last);
|
||||
if (!cseg) {
|
||||
ret = -ENOMEM;
|
||||
break;
|
||||
}
|
||||
|
||||
list_add_tail(&cseg->entry, &curs->csegs);
|
||||
|
||||
cseg->segno = ment.segno;
|
||||
cseg->seq = ment.seq;
|
||||
cseg->level = ment.level;
|
||||
|
||||
if (!curs->upper)
|
||||
curs->upper = cseg;
|
||||
else if (!curs->lower)
|
||||
curs->lower = cseg;
|
||||
if (curs->lower)
|
||||
curs->last_lower = cseg;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Perform a compaction by translating the incoming request into our
|
||||
* working state, iterating over input segments and write output
|
||||
* segments, then generating the response that describes the output
|
||||
* segments.
|
||||
*
|
||||
* The server will either commit our response or cleanup the request if
|
||||
* we return an error that the caller sends in response. The server
|
||||
* protects the input segments so they shouldn't be overwritten by other
|
||||
* compactions or allocations. We shouldn't get stale segment reads.
|
||||
*/
|
||||
int scoutfs_compact(struct super_block *sb,
|
||||
struct scoutfs_net_compact_request *req,
|
||||
struct scoutfs_net_compact_response *resp)
|
||||
{
|
||||
struct compact_cursor curs = {{NULL,}};
|
||||
struct scoutfs_manifest_entry ment;
|
||||
struct scoutfs_bio_completion comp;
|
||||
struct compact_seg *cseg;
|
||||
LIST_HEAD(results);
|
||||
int ret;
|
||||
int err;
|
||||
int nr;
|
||||
|
||||
INIT_LIST_HEAD(&curs.csegs);
|
||||
scoutfs_bio_init_comp(&comp);
|
||||
|
||||
ret = verify_request(sb, req) ?:
|
||||
prepare_curs(sb, &curs, req);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* trace compaction ranges */
|
||||
list_for_each_entry(cseg, &curs.csegs, entry) {
|
||||
trace_scoutfs_compact_input(sb, cseg->level, cseg->segno,
|
||||
cseg->seq, &cseg->first,
|
||||
&cseg->last);
|
||||
}
|
||||
|
||||
ret = compact_segments(sb, &curs, &comp, &results);
|
||||
|
||||
/* always wait for io completion */
|
||||
err = scoutfs_bio_wait_comp(sb, &comp);
|
||||
if (!ret && err)
|
||||
ret = err;
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
/* fill entries for written output segments */
|
||||
nr = 0;
|
||||
list_for_each_entry(cseg, &results, entry) {
|
||||
/* XXX moved upper segments won't have read the segment :P */
|
||||
if (cseg->seg)
|
||||
scoutfs_seg_init_ment(&ment, cseg->level, cseg->seg);
|
||||
else
|
||||
scoutfs_manifest_init_entry(&ment, cseg->level,
|
||||
cseg->segno, cseg->seq,
|
||||
&cseg->first, &cseg->last);
|
||||
|
||||
trace_scoutfs_compact_output(sb, ment.level, ment.segno,
|
||||
ment.seq, &ment.first,
|
||||
&ment.last);
|
||||
|
||||
scoutfs_init_ment_to_net(&resp->ents[nr++], &ment);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
/* server protects input segments, shouldn't be possible */
|
||||
if (WARN_ON_ONCE(ret == -ESTALE)) {
|
||||
scoutfs_inc_counter(sb, compact_stale_error);
|
||||
ret = -EIO;
|
||||
}
|
||||
|
||||
free_cseg_list(sb, &curs.csegs);
|
||||
free_cseg_list(sb, &results);
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
#ifndef _SCOUTFS_COMPACT_H_
|
||||
#define _SCOUTFS_COMPACT_H_
|
||||
|
||||
int scoutfs_compact(struct super_block *sb,
|
||||
struct scoutfs_net_compact_request *req,
|
||||
struct scoutfs_net_compact_response *resp);
|
||||
|
||||
#endif
|
||||
@@ -26,16 +26,6 @@
|
||||
EXPAND_COUNTER(btree_stale_read) \
|
||||
EXPAND_COUNTER(btree_write_error) \
|
||||
EXPAND_COUNTER(client_farewell_error) \
|
||||
EXPAND_COUNTER(compact_invalid_request) \
|
||||
EXPAND_COUNTER(compact_operations) \
|
||||
EXPAND_COUNTER(compact_segment_busy) \
|
||||
EXPAND_COUNTER(compact_segment_moved) \
|
||||
EXPAND_COUNTER(compact_segment_read) \
|
||||
EXPAND_COUNTER(compact_segment_write_bytes) \
|
||||
EXPAND_COUNTER(compact_segment_writes) \
|
||||
EXPAND_COUNTER(compact_stale_error) \
|
||||
EXPAND_COUNTER(compact_sticky_upper) \
|
||||
EXPAND_COUNTER(compact_sticky_written) \
|
||||
EXPAND_COUNTER(corrupt_btree_block_level) \
|
||||
EXPAND_COUNTER(corrupt_btree_no_child_ref) \
|
||||
EXPAND_COUNTER(corrupt_data_extent_trunc_cleanup) \
|
||||
@@ -71,27 +61,6 @@
|
||||
EXPAND_COUNTER(extent_next) \
|
||||
EXPAND_COUNTER(extent_prev) \
|
||||
EXPAND_COUNTER(extent_remove) \
|
||||
EXPAND_COUNTER(item_alloc) \
|
||||
EXPAND_COUNTER(item_batch_duplicate) \
|
||||
EXPAND_COUNTER(item_batch_inserted) \
|
||||
EXPAND_COUNTER(item_create) \
|
||||
EXPAND_COUNTER(item_delete) \
|
||||
EXPAND_COUNTER(item_free) \
|
||||
EXPAND_COUNTER(item_lookup_hit) \
|
||||
EXPAND_COUNTER(item_lookup_miss) \
|
||||
EXPAND_COUNTER(item_range_alloc) \
|
||||
EXPAND_COUNTER(item_range_free) \
|
||||
EXPAND_COUNTER(item_range_hit) \
|
||||
EXPAND_COUNTER(item_range_insert) \
|
||||
EXPAND_COUNTER(item_range_miss) \
|
||||
EXPAND_COUNTER(item_shrink) \
|
||||
EXPAND_COUNTER(item_shrink_alone) \
|
||||
EXPAND_COUNTER(item_shrink_empty_range) \
|
||||
EXPAND_COUNTER(item_shrink_next_dirty) \
|
||||
EXPAND_COUNTER(item_shrink_outside) \
|
||||
EXPAND_COUNTER(item_shrink_range_end) \
|
||||
EXPAND_COUNTER(item_shrink_small_split) \
|
||||
EXPAND_COUNTER(item_shrink_split_range) \
|
||||
EXPAND_COUNTER(lock_alloc) \
|
||||
EXPAND_COUNTER(lock_free) \
|
||||
EXPAND_COUNTER(lock_grace_elapsed) \
|
||||
@@ -113,9 +82,6 @@
|
||||
EXPAND_COUNTER(lock_shrink_request_aborted) \
|
||||
EXPAND_COUNTER(lock_unlock) \
|
||||
EXPAND_COUNTER(lock_wait) \
|
||||
EXPAND_COUNTER(manifest_compact_migrate) \
|
||||
EXPAND_COUNTER(manifest_hard_stale_error) \
|
||||
EXPAND_COUNTER(manifest_read_excluded_key) \
|
||||
EXPAND_COUNTER(net_dropped_response) \
|
||||
EXPAND_COUNTER(net_send_bytes) \
|
||||
EXPAND_COUNTER(net_send_error) \
|
||||
@@ -139,28 +105,18 @@
|
||||
EXPAND_COUNTER(quorum_write_block) \
|
||||
EXPAND_COUNTER(quorum_write_block_error) \
|
||||
EXPAND_COUNTER(quorum_fenced) \
|
||||
EXPAND_COUNTER(seg_alloc) \
|
||||
EXPAND_COUNTER(seg_csum_error) \
|
||||
EXPAND_COUNTER(seg_free) \
|
||||
EXPAND_COUNTER(seg_shrink) \
|
||||
EXPAND_COUNTER(seg_stale_read) \
|
||||
EXPAND_COUNTER(server_alloc_segno) \
|
||||
EXPAND_COUNTER(server_extent_alloc) \
|
||||
EXPAND_COUNTER(server_extent_alloc_error) \
|
||||
EXPAND_COUNTER(server_free_extent) \
|
||||
EXPAND_COUNTER(server_free_pending_extent) \
|
||||
EXPAND_COUNTER(server_free_pending_error) \
|
||||
EXPAND_COUNTER(server_free_segno) \
|
||||
EXPAND_COUNTER(trans_commit_fsync) \
|
||||
EXPAND_COUNTER(trans_commit_full) \
|
||||
EXPAND_COUNTER(trans_commit_item_flush) \
|
||||
EXPAND_COUNTER(trans_commit_sync_fs) \
|
||||
EXPAND_COUNTER(trans_commit_timer) \
|
||||
EXPAND_COUNTER(trans_write_item) \
|
||||
EXPAND_COUNTER(trans_write_deletion_item)
|
||||
EXPAND_COUNTER(trans_commit_timer)
|
||||
|
||||
#define FIRST_COUNTER block_cache_access
|
||||
#define LAST_COUNTER trans_write_deletion_item
|
||||
#define LAST_COUNTER trans_commit_timer
|
||||
|
||||
#undef EXPAND_COUNTER
|
||||
#define EXPAND_COUNTER(which) struct percpu_counter which;
|
||||
|
||||
@@ -23,7 +23,6 @@
|
||||
#include "super.h"
|
||||
#include "data.h"
|
||||
#include "scoutfs_trace.h"
|
||||
#include "item.h"
|
||||
#include "lock.h"
|
||||
#include "file.h"
|
||||
#include "inode.h"
|
||||
|
||||
@@ -20,18 +20,6 @@
|
||||
#define SCOUTFS_BLOCK_SECTORS (1 << SCOUTFS_BLOCK_SECTOR_SHIFT)
|
||||
#define SCOUTFS_BLOCK_MAX (U64_MAX >> SCOUTFS_BLOCK_SHIFT)
|
||||
|
||||
/*
|
||||
* FS data is stored in segments, for now they're fixed size. They'll
|
||||
* be dynamic.
|
||||
*/
|
||||
#define SCOUTFS_SEGMENT_SHIFT 20
|
||||
#define SCOUTFS_SEGMENT_SIZE (1 << SCOUTFS_SEGMENT_SHIFT)
|
||||
#define SCOUTFS_SEGMENT_MASK (SCOUTFS_SEGMENT_SIZE - 1)
|
||||
#define SCOUTFS_SEGMENT_PAGES (SCOUTFS_SEGMENT_SIZE / PAGE_SIZE)
|
||||
#define SCOUTFS_SEGMENT_BLOCKS (SCOUTFS_SEGMENT_SIZE / SCOUTFS_BLOCK_SIZE)
|
||||
#define SCOUTFS_SEGMENT_BLOCK_SHIFT \
|
||||
(SCOUTFS_SEGMENT_SHIFT - SCOUTFS_BLOCK_SHIFT)
|
||||
|
||||
#define SCOUTFS_PAGES_PER_BLOCK (SCOUTFS_BLOCK_SIZE / PAGE_SIZE)
|
||||
#define SCOUTFS_BLOCK_PAGE_ORDER (SCOUTFS_BLOCK_SHIFT - PAGE_SHIFT)
|
||||
|
||||
@@ -224,47 +212,6 @@ struct scoutfs_btree_block {
|
||||
struct scoutfs_btree_item_header item_hdrs[0];
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_btree_ring {
|
||||
__le64 first_blkno;
|
||||
__le64 nr_blocks;
|
||||
__le64 next_block;
|
||||
__le64 next_seq;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* This is absurdly huge. If there was only ever 1 item per segment and
|
||||
* 2^64 items the tree could get this deep.
|
||||
*/
|
||||
#define SCOUTFS_MANIFEST_MAX_LEVEL 20
|
||||
|
||||
#define SCOUTFS_MANIFEST_FANOUT 10
|
||||
|
||||
struct scoutfs_manifest {
|
||||
struct scoutfs_btree_root root;
|
||||
__le64 level_counts[SCOUTFS_MANIFEST_MAX_LEVEL];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Manifest entries are split across btree keys and values. Putting
|
||||
* some entry fields in the value keeps the key smaller and increases
|
||||
* the fanout of the btree which keeps the tree smaller and reduces
|
||||
* block IO.
|
||||
*
|
||||
* The key is made up of the level, first key, and seq. At level 0
|
||||
* segments can completely overlap and have identical key ranges but we
|
||||
* avoid duplicate btree keys by including the unique seq.
|
||||
*/
|
||||
struct scoutfs_manifest_btree_key {
|
||||
__u8 level;
|
||||
struct scoutfs_key_be first_key;
|
||||
__be64 seq;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_manifest_btree_val {
|
||||
__le64 segno;
|
||||
struct scoutfs_key last_key;
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Free metadata blocks are tracked by block allocator items.
|
||||
*/
|
||||
@@ -385,50 +332,6 @@ struct scoutfs_bloom_block {
|
||||
member_sizeof(struct scoutfs_bloom_block, bits[0])) * \
|
||||
member_sizeof(struct scoutfs_bloom_block, bits[0]) * 8) \
|
||||
|
||||
/*
|
||||
* The max number of links defines the max number of entries that we can
|
||||
* index in o(log n) and the static list head storage size in the
|
||||
* segment block. We always pay the static storage cost, which is tiny,
|
||||
* and we can look at the number of items to know the greatest number of
|
||||
* links and skip most of the initial 0 links.
|
||||
*/
|
||||
#define SCOUTFS_MAX_SKIP_LINKS 32
|
||||
|
||||
/*
|
||||
* Items are packed into segments and linked together in a skip list.
|
||||
* Each item's header, links, key, and value are stored contiguously.
|
||||
* They're not allowed to cross a block boundary.
|
||||
*/
|
||||
struct scoutfs_segment_item {
|
||||
struct scoutfs_key key;
|
||||
__le16 val_len;
|
||||
__u8 flags;
|
||||
__u8 nr_links;
|
||||
__le32 skip_links[0];
|
||||
/* __u8 val_bytes[val_len] */
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_ITEM_FLAG_DELETION (1 << 0)
|
||||
|
||||
/*
|
||||
* Each large segment starts with a segment block that describes the
|
||||
* rest of the blocks that make up the segment.
|
||||
*
|
||||
* The crc covers the initial total_bytes of the segment but starts
|
||||
* after the padding.
|
||||
*/
|
||||
struct scoutfs_segment_block {
|
||||
__le32 crc;
|
||||
__le32 _padding;
|
||||
__le64 segno;
|
||||
__le64 seq;
|
||||
__le32 last_item_off;
|
||||
__le32 total_bytes;
|
||||
__le32 nr_items;
|
||||
__le32 skip_links[SCOUTFS_MAX_SKIP_LINKS];
|
||||
/* packed items */
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* Keys are first sorted by major key zones.
|
||||
*/
|
||||
@@ -551,12 +454,8 @@ struct scoutfs_super_block {
|
||||
__le64 next_uninit_free_block;
|
||||
__le64 core_balloc_cursor;
|
||||
__le64 free_blocks;
|
||||
__le64 alloc_cursor;
|
||||
__le64 first_fs_blkno;
|
||||
__le64 last_fs_blkno;
|
||||
struct scoutfs_btree_ring bring;
|
||||
__le64 next_seg_seq;
|
||||
__le64 next_compact_id;
|
||||
__le64 quorum_fenced_term;
|
||||
__le64 quorum_server_term;
|
||||
__le64 unmount_barrier;
|
||||
@@ -566,7 +465,6 @@ struct scoutfs_super_block {
|
||||
struct scoutfs_balloc_root core_balloc_free;
|
||||
struct scoutfs_btree_root alloc_root;
|
||||
struct scoutfs_btree_root fs_root;
|
||||
struct scoutfs_manifest manifest;
|
||||
struct scoutfs_btree_root logs_root;
|
||||
struct scoutfs_btree_root lock_clients;
|
||||
struct scoutfs_btree_root trans_seqs;
|
||||
@@ -757,15 +655,11 @@ enum {
|
||||
SCOUTFS_NET_CMD_ALLOC_INODES,
|
||||
SCOUTFS_NET_CMD_ALLOC_EXTENT,
|
||||
SCOUTFS_NET_CMD_FREE_EXTENTS,
|
||||
SCOUTFS_NET_CMD_ALLOC_SEGNO,
|
||||
SCOUTFS_NET_CMD_RECORD_SEGMENT,
|
||||
SCOUTFS_NET_CMD_GET_LOG_TREES,
|
||||
SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
|
||||
SCOUTFS_NET_CMD_ADVANCE_SEQ,
|
||||
SCOUTFS_NET_CMD_GET_LAST_SEQ,
|
||||
SCOUTFS_NET_CMD_GET_MANIFEST_ROOT,
|
||||
SCOUTFS_NET_CMD_STATFS,
|
||||
SCOUTFS_NET_CMD_COMPACT,
|
||||
SCOUTFS_NET_CMD_LOCK,
|
||||
SCOUTFS_NET_CMD_LOCK_RECOVER,
|
||||
SCOUTFS_NET_CMD_FAREWELL,
|
||||
@@ -804,20 +698,6 @@ struct scoutfs_net_inode_alloc {
|
||||
__le64 nr;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_net_key_range {
|
||||
__le16 start_len;
|
||||
__le16 end_len;
|
||||
__u8 key_bytes[0];
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_net_manifest_entry {
|
||||
__le64 segno;
|
||||
__le64 seq;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
__u8 level;
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_net_statfs {
|
||||
__le64 total_blocks; /* total blocks in device */
|
||||
__le64 next_ino; /* next unused inode number */
|
||||
@@ -844,50 +724,6 @@ struct scoutfs_net_extent_list {
|
||||
/* arbitrarily makes a nice ~1k extent list payload */
|
||||
#define SCOUTFS_NET_EXTENT_LIST_MAX_NR 64
|
||||
|
||||
/* one upper segment and fanout lower segments */
|
||||
#define SCOUTFS_COMPACTION_MAX_INPUT (1 + SCOUTFS_MANIFEST_FANOUT)
|
||||
/* sticky can split the input and item alignment padding can add a lower */
|
||||
#define SCOUTFS_COMPACTION_SEGNO_OVERHEAD 2
|
||||
#define SCOUTFS_COMPACTION_MAX_OUTPUT \
|
||||
(SCOUTFS_COMPACTION_MAX_INPUT + SCOUTFS_COMPACTION_SEGNO_OVERHEAD)
|
||||
|
||||
/*
|
||||
* A compact request is sent by the server to the client. It provides
|
||||
* the input segments and enough allocated segnos to write the results.
|
||||
* The id uniquely identifies this compaction request and is included in
|
||||
* the response to clean up its allocated resources.
|
||||
*/
|
||||
struct scoutfs_net_compact_request {
|
||||
__le64 id;
|
||||
__u8 last_level;
|
||||
__u8 flags;
|
||||
__le64 segnos[SCOUTFS_COMPACTION_MAX_OUTPUT];
|
||||
struct scoutfs_net_manifest_entry ents[SCOUTFS_COMPACTION_MAX_INPUT];
|
||||
} __packed;
|
||||
|
||||
/*
|
||||
* A sticky compaction has more lower level segments that overlap with
|
||||
* the end of the upper after the last lower level segment included in
|
||||
* the compaction. Items left in the upper segment after the last lower
|
||||
* need to be written to the upper level instead of the lower. The
|
||||
* upper segment "sticks" in place instead of moving down to the lower
|
||||
* level.
|
||||
*/
|
||||
#define SCOUTFS_NET_COMPACT_FLAG_STICKY (1 << 0)
|
||||
|
||||
/*
|
||||
* A compact response is sent by the client to the server. It describes
|
||||
* the written output segments that need to be added to the manifest.
|
||||
* The server compares the response to the request to free unused
|
||||
* allocated segnos and input manifest entries. An empty response is
|
||||
* valid and can happen if, say, the upper input segment completely
|
||||
* deleted all the items in a single overlapping lower segment.
|
||||
*/
|
||||
struct scoutfs_net_compact_response {
|
||||
__le64 id;
|
||||
struct scoutfs_net_manifest_entry ents[SCOUTFS_COMPACTION_MAX_OUTPUT];
|
||||
} __packed;
|
||||
|
||||
struct scoutfs_net_lock {
|
||||
struct scoutfs_key key;
|
||||
__u8 old_mode;
|
||||
|
||||
@@ -28,11 +28,9 @@
|
||||
#include "super.h"
|
||||
#include "inode.h"
|
||||
#include "forest.h"
|
||||
#include "item.h"
|
||||
#include "data.h"
|
||||
#include "client.h"
|
||||
#include "lock.h"
|
||||
#include "manifest.h"
|
||||
#include "trans.h"
|
||||
#include "xattr.h"
|
||||
#include "hash.h"
|
||||
@@ -490,61 +488,6 @@ static long scoutfs_ioc_stat_more(struct file *file, unsigned long arg)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static long scoutfs_ioc_item_cache_keys(struct file *file, unsigned long arg)
|
||||
{
|
||||
struct super_block *sb = file_inode(file)->i_sb;
|
||||
struct scoutfs_ioctl_item_cache_keys ick;
|
||||
struct scoutfs_ioctl_key __user *ukeys;
|
||||
struct scoutfs_ioctl_key ikeys[16];
|
||||
struct scoutfs_key keys[16];
|
||||
struct scoutfs_key key;
|
||||
unsigned int nr;
|
||||
int total;
|
||||
int ret;
|
||||
int i;
|
||||
|
||||
if (copy_from_user(&ick, (void __user *)arg, sizeof(ick)))
|
||||
return -EFAULT;
|
||||
|
||||
if (ick.which > SCOUTFS_IOC_ITEM_CACHE_KEYS_RANGES)
|
||||
return -EINVAL;
|
||||
|
||||
scoutfs_key_copy_types(&key, &ick.ikey);
|
||||
|
||||
ukeys = (void __user *)(long)ick.buf_ptr;
|
||||
total = 0;
|
||||
ret = 0;
|
||||
while (ick.buf_nr) {
|
||||
nr = min_t(size_t, ick.buf_nr, ARRAY_SIZE(keys));
|
||||
|
||||
if (ick.which == SCOUTFS_IOC_ITEM_CACHE_KEYS_ITEMS)
|
||||
ret = scoutfs_item_copy_keys(sb, &key, keys, nr);
|
||||
else
|
||||
ret = scoutfs_item_copy_range_keys(sb, &key, keys, nr);
|
||||
BUG_ON(ret > nr); /* stack overflow \o/ */
|
||||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
for (i = 0; i < ret; i++)
|
||||
scoutfs_key_copy_types(&ikeys[i], &keys[i]);
|
||||
|
||||
if (copy_to_user(ukeys, ikeys, ret * sizeof(ikeys[0]))) {
|
||||
ret = -EFAULT;
|
||||
break;
|
||||
}
|
||||
|
||||
key = keys[ret - 1];
|
||||
scoutfs_key_inc(&key);
|
||||
|
||||
ukeys += ret;
|
||||
ick.buf_nr -= ret;
|
||||
total += ret;
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
return ret ?: total;
|
||||
}
|
||||
|
||||
static bool inc_wrapped(u64 *ino, u64 *iblock)
|
||||
{
|
||||
return (++(*iblock) == 0) && (++(*ino) == 0);
|
||||
@@ -876,8 +819,6 @@ long scoutfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||
return scoutfs_ioc_stage(file, arg);
|
||||
case SCOUTFS_IOC_STAT_MORE:
|
||||
return scoutfs_ioc_stat_more(file, arg);
|
||||
case SCOUTFS_IOC_ITEM_CACHE_KEYS:
|
||||
return scoutfs_ioc_item_cache_keys(file, arg);
|
||||
case SCOUTFS_IOC_DATA_WAITING:
|
||||
return scoutfs_ioc_data_waiting(file, arg);
|
||||
case SCOUTFS_IOC_SETATTR_MORE:
|
||||
|
||||
@@ -238,28 +238,6 @@ struct scoutfs_ioctl_stat_more {
|
||||
struct scoutfs_ioctl_stat_more)
|
||||
|
||||
|
||||
/*
|
||||
* Fills the buffer with either the keys for the cached items or the
|
||||
* keys for the cached ranges found starting with the given key. The
|
||||
* number of keys filled in the buffer is returned. When filling range
|
||||
* keys the returned number will always be a multiple of two.
|
||||
*/
|
||||
struct scoutfs_ioctl_item_cache_keys {
|
||||
struct scoutfs_ioctl_key ikey;
|
||||
__u64 buf_ptr;
|
||||
__u16 buf_nr;
|
||||
__u8 which;
|
||||
__u8 _pad[21]; /* padded to align _ioctl_key total size */
|
||||
};
|
||||
|
||||
enum {
|
||||
SCOUTFS_IOC_ITEM_CACHE_KEYS_ITEMS = 0,
|
||||
SCOUTFS_IOC_ITEM_CACHE_KEYS_RANGES,
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_ITEM_CACHE_KEYS _IOR(SCOUTFS_IOCTL_MAGIC, 6, \
|
||||
struct scoutfs_ioctl_item_cache_keys)
|
||||
|
||||
struct scoutfs_ioctl_data_waiting_entry {
|
||||
__u64 ino;
|
||||
__u64 iblock;
|
||||
@@ -283,7 +261,7 @@ struct scoutfs_ioctl_data_waiting {
|
||||
|
||||
#define SCOUTFS_IOC_DATA_WAITING_FLAGS_UNKNOWN (U8_MAX << 0)
|
||||
|
||||
#define SCOUTFS_IOC_DATA_WAITING _IOR(SCOUTFS_IOCTL_MAGIC, 7, \
|
||||
#define SCOUTFS_IOC_DATA_WAITING _IOR(SCOUTFS_IOCTL_MAGIC, 6, \
|
||||
struct scoutfs_ioctl_data_waiting)
|
||||
|
||||
/*
|
||||
@@ -303,7 +281,7 @@ struct scoutfs_ioctl_setattr_more {
|
||||
#define SCOUTFS_IOC_SETATTR_MORE_OFFLINE (1 << 0)
|
||||
#define SCOUTFS_IOC_SETATTR_MORE_UNKNOWN (U8_MAX << 1)
|
||||
|
||||
#define SCOUTFS_IOC_SETATTR_MORE _IOW(SCOUTFS_IOCTL_MAGIC, 8, \
|
||||
#define SCOUTFS_IOC_SETATTR_MORE _IOW(SCOUTFS_IOCTL_MAGIC, 7, \
|
||||
struct scoutfs_ioctl_setattr_more)
|
||||
|
||||
struct scoutfs_ioctl_listxattr_hidden {
|
||||
@@ -313,7 +291,7 @@ struct scoutfs_ioctl_listxattr_hidden {
|
||||
__u32 hash_pos;
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_LISTXATTR_HIDDEN _IOR(SCOUTFS_IOCTL_MAGIC, 9, \
|
||||
#define SCOUTFS_IOC_LISTXATTR_HIDDEN _IOR(SCOUTFS_IOCTL_MAGIC, 8, \
|
||||
struct scoutfs_ioctl_listxattr_hidden)
|
||||
|
||||
/*
|
||||
@@ -344,7 +322,7 @@ struct scoutfs_ioctl_find_xattrs {
|
||||
__u8 _pad[4];
|
||||
};
|
||||
|
||||
#define SCOUTFS_IOC_FIND_XATTRS _IOR(SCOUTFS_IOCTL_MAGIC, 10, \
|
||||
#define SCOUTFS_IOC_FIND_XATTRS _IOR(SCOUTFS_IOCTL_MAGIC, 9, \
|
||||
struct scoutfs_ioctl_find_xattrs)
|
||||
|
||||
/*
|
||||
@@ -365,7 +343,7 @@ struct scoutfs_ioctl_statfs_more {
|
||||
__u64 rid;
|
||||
} __packed;
|
||||
|
||||
#define SCOUTFS_IOC_STATFS_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 11, \
|
||||
#define SCOUTFS_IOC_STATFS_MORE _IOR(SCOUTFS_IOCTL_MAGIC, 10, \
|
||||
struct scoutfs_ioctl_statfs_more)
|
||||
|
||||
|
||||
|
||||
2498
kmod/src/item.c
2498
kmod/src/item.c
File diff suppressed because it is too large
Load Diff
@@ -1,75 +0,0 @@
|
||||
#ifndef _SCOUTFS_ITEM_H_
|
||||
#define _SCOUTFS_ITEM_H_
|
||||
|
||||
#include <linux/uio.h>
|
||||
|
||||
struct scoutfs_segment;
|
||||
struct scoutfs_key;
|
||||
|
||||
int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct kvec *val, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_lookup_exact(struct super_block *sb,
|
||||
struct scoutfs_key *key, struct kvec *val,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_next(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *last, struct kvec *val,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_prev(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *first, struct kvec *val,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_create(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct kvec *val, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_create_force(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct kvec *val, struct scoutfs_lock *lock);
|
||||
int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_update(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct kvec *val, struct scoutfs_lock *lock);
|
||||
void scoutfs_item_delete_dirty(struct super_block *sb,
|
||||
struct scoutfs_key *key);
|
||||
void scoutfs_item_update_dirty(struct super_block *sb,
|
||||
struct scoutfs_key *key, struct kvec *val);
|
||||
int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_delete_force(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_delete_save(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct list_head *list,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_item_restore(struct super_block *sb, struct list_head *list,
|
||||
struct scoutfs_lock *lock);
|
||||
|
||||
int scoutfs_item_add_batch(struct super_block *sb, struct list_head *list,
|
||||
struct scoutfs_key *key, struct kvec *val);
|
||||
int scoutfs_item_insert_batch(struct super_block *sb, struct list_head *list,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end);
|
||||
void scoutfs_item_free_batch(struct super_block *sb, struct list_head *list);
|
||||
|
||||
bool scoutfs_item_has_dirty(struct super_block *sb);
|
||||
bool scoutfs_item_range_cached(struct super_block *sb,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end, bool dirty);
|
||||
bool scoutfs_item_dirty_fits_single(struct super_block *sb, u32 nr_items,
|
||||
u32 val_bytes);
|
||||
int scoutfs_item_dirty_seg(struct super_block *sb, struct scoutfs_segment *seg);
|
||||
int scoutfs_item_writeback(struct super_block *sb,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end);
|
||||
int scoutfs_item_invalidate(struct super_block *sb,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end);
|
||||
|
||||
int scoutfs_item_copy_range_keys(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *keys, unsigned nr);
|
||||
int scoutfs_item_copy_keys(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *keys, unsigned nr);
|
||||
|
||||
int scoutfs_item_setup(struct super_block *sb);
|
||||
void scoutfs_item_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
1297
kmod/src/manifest.c
1297
kmod/src/manifest.c
File diff suppressed because it is too large
Load Diff
@@ -1,52 +0,0 @@
|
||||
#ifndef _SCOUTFS_MANIFEST_H_
|
||||
#define _SCOUTFS_MANIFEST_H_
|
||||
|
||||
#include "key.h"
|
||||
|
||||
struct scoutfs_bio_completion;
|
||||
|
||||
/*
|
||||
* This native manifest entry references the physical storage of a
|
||||
* manifest entry which can exist in a segment header and its edge keys,
|
||||
* a network transmission of a packed entry and its keys, or in btree
|
||||
* blocks spread between an item key and value.
|
||||
*/
|
||||
struct scoutfs_manifest_entry {
|
||||
u8 level;
|
||||
u64 segno;
|
||||
u64 seq;
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
};
|
||||
|
||||
void scoutfs_manifest_init_entry(struct scoutfs_manifest_entry *ment,
|
||||
u64 level, u64 segno, u64 seq,
|
||||
struct scoutfs_key *first,
|
||||
struct scoutfs_key *last);
|
||||
int scoutfs_manifest_add(struct super_block *sb,
|
||||
struct scoutfs_manifest_entry *ment);
|
||||
int scoutfs_manifest_del(struct super_block *sb,
|
||||
struct scoutfs_manifest_entry *ment);
|
||||
|
||||
int scoutfs_manifest_lock(struct super_block *sb);
|
||||
int scoutfs_manifest_unlock(struct super_block *sb);
|
||||
|
||||
int scoutfs_manifest_read_items(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end);
|
||||
int scoutfs_manifest_next_key(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *next_key);
|
||||
|
||||
int scoutfs_manifest_should_compact(struct super_block *sb);
|
||||
int scoutfs_manifest_next_compact(struct super_block *sb,
|
||||
struct scoutfs_net_compact_request *req);
|
||||
void scoutfs_manifest_compact_done(struct super_block *sb,
|
||||
struct scoutfs_net_compact_request *req);
|
||||
|
||||
bool scoutfs_manifest_level0_full(struct super_block *sb);
|
||||
|
||||
int scoutfs_manifest_setup(struct super_block *sb);
|
||||
void scoutfs_manifest_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
@@ -25,9 +25,6 @@
|
||||
#include "counters.h"
|
||||
#include "inode.h"
|
||||
#include "btree.h"
|
||||
#include "manifest.h"
|
||||
#include "seg.h"
|
||||
#include "compact.h"
|
||||
#include "scoutfs_trace.h"
|
||||
#include "msg.h"
|
||||
#include "net.h"
|
||||
|
||||
@@ -29,11 +29,9 @@
|
||||
#include "key.h"
|
||||
#include "format.h"
|
||||
#include "lock.h"
|
||||
#include "seg.h"
|
||||
#include "super.h"
|
||||
#include "ioctl.h"
|
||||
#include "count.h"
|
||||
#include "bio.h"
|
||||
#include "export.h"
|
||||
#include "dir.h"
|
||||
#include "extents.h"
|
||||
@@ -99,258 +97,6 @@ TRACE_EVENT(scoutfs_complete_truncate,
|
||||
__entry->flags)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_comp_class,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_bio_completion *comp),
|
||||
|
||||
TP_ARGS(sb, comp),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(struct scoutfs_bio_completion *, comp)
|
||||
__field(int, pending)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->comp = comp;
|
||||
__entry->pending = atomic_read(&comp->pending);
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" comp %p pending before %d", SCSB_TRACE_ARGS,
|
||||
__entry->comp, __entry->pending)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_comp_class, comp_end_io,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_bio_completion *comp),
|
||||
TP_ARGS(sb, comp)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_comp_class, scoutfs_bio_submit_comp,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_bio_completion *comp),
|
||||
TP_ARGS(sb, comp)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_comp_class, scoutfs_bio_wait_comp,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_bio_completion *comp),
|
||||
TP_ARGS(sb, comp)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_bio_init_comp,
|
||||
TP_PROTO(void *comp),
|
||||
|
||||
TP_ARGS(comp),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
__field(void *, comp)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
__entry->comp = comp;
|
||||
),
|
||||
|
||||
TP_printk("initing comp %p", __entry->comp)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_bio_class,
|
||||
TP_PROTO(struct super_block *sb, void *bio, void *args, int in_flight),
|
||||
|
||||
TP_ARGS(sb, bio, args, in_flight),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, bio)
|
||||
__field(void *, args)
|
||||
__field(int, in_flight)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->bio = bio;
|
||||
__entry->args = args;
|
||||
__entry->in_flight = in_flight;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" bio %p args %p in_flight %d", SCSB_TRACE_ARGS,
|
||||
__entry->bio, __entry->args, __entry->in_flight)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_bio_class, scoutfs_bio_submit,
|
||||
TP_PROTO(struct super_block *sb, void *bio, void *args, int in_flight),
|
||||
TP_ARGS(sb, bio, args, in_flight)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_bio_class, scoutfs_bio_submit_partial,
|
||||
TP_PROTO(struct super_block *sb, void *bio, void *args, int in_flight),
|
||||
TP_ARGS(sb, bio, args, in_flight)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_bio_end_io,
|
||||
TP_PROTO(struct super_block *sb, void *bio, int size, int err),
|
||||
|
||||
TP_ARGS(sb, bio, size, err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, bio)
|
||||
__field(int, size)
|
||||
__field(int, err)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->bio = bio;
|
||||
__entry->size = size;
|
||||
__entry->err = err;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" bio %p size %u err %d", SCSB_TRACE_ARGS,
|
||||
__entry->bio, __entry->size, __entry->err)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_dec_end_io,
|
||||
TP_PROTO(struct super_block *sb, void *args, int in_flight, int err),
|
||||
|
||||
TP_ARGS(sb, args, in_flight, err),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, args)
|
||||
__field(int, in_flight)
|
||||
__field(int, err)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->args = args;
|
||||
__entry->in_flight = in_flight;
|
||||
__entry->err = err;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" args %p in_flight %d err %d", SCSB_TRACE_ARGS,
|
||||
__entry->args, __entry->in_flight, __entry->err)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_key_ret_class,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int ret),
|
||||
|
||||
TP_ARGS(sb, key, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
sk_trace_define(key)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
sk_trace_assign(key, key);
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" key "SK_FMT" ret %d",
|
||||
SCSB_TRACE_ARGS, sk_trace_args(key), __entry->ret)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_key_ret_class, scoutfs_item_create,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int ret),
|
||||
TP_ARGS(sb, key, ret)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_key_ret_class, scoutfs_item_delete,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int ret),
|
||||
TP_ARGS(sb, key, ret)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_key_ret_class, scoutfs_item_delete_save,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key, int ret),
|
||||
TP_ARGS(sb, key, ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_item_dirty_ret,
|
||||
TP_PROTO(struct super_block *sb, int ret),
|
||||
|
||||
TP_ARGS(sb, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" ret %d", SCSB_TRACE_ARGS, __entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_item_update_ret,
|
||||
TP_PROTO(struct super_block *sb, int ret),
|
||||
|
||||
TP_ARGS(sb, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" ret %d", SCSB_TRACE_ARGS, __entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_item_next_ret,
|
||||
TP_PROTO(struct super_block *sb, int ret),
|
||||
|
||||
TP_ARGS(sb, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" ret %d", SCSB_TRACE_ARGS, __entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_item_prev_ret,
|
||||
TP_PROTO(struct super_block *sb, int ret),
|
||||
|
||||
TP_ARGS(sb, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" ret %d", SCSB_TRACE_ARGS, __entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_erase_item,
|
||||
TP_PROTO(struct super_block *sb, void *item),
|
||||
|
||||
TP_ARGS(sb, item),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, item)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->item = item;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" erasing item %p", SCSB_TRACE_ARGS, __entry->item)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_data_fallocate,
|
||||
TP_PROTO(struct super_block *sb, u64 ino, int mode, loff_t offset,
|
||||
loff_t len, int ret),
|
||||
@@ -946,34 +692,6 @@ TRACE_EVENT(scoutfs_inode_walk_writeback,
|
||||
__entry->ino, __entry->write, __entry->ret)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_segment_class,
|
||||
TP_PROTO(struct super_block *sb, __u64 segno),
|
||||
|
||||
TP_ARGS(sb, segno),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, segno)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->segno = segno;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" segno %llu", SCSB_TRACE_ARGS, __entry->segno)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_segment_class, scoutfs_seg_submit_read,
|
||||
TP_PROTO(struct super_block *sb, __u64 segno),
|
||||
TP_ARGS(sb, segno)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_segment_class, scoutfs_seg_submit_write,
|
||||
TP_PROTO(struct super_block *sb, __u64 segno),
|
||||
TP_ARGS(sb, segno)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_lock_info_class,
|
||||
TP_PROTO(struct super_block *sb, struct lock_info *linfo),
|
||||
|
||||
@@ -1034,27 +752,6 @@ TRACE_EVENT(scoutfs_xattr_set,
|
||||
__entry->size, __entry->flags)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_manifest_next_compact,
|
||||
TP_PROTO(struct super_block *sb, int level, int ret),
|
||||
|
||||
TP_ARGS(sb, level, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(int, level)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->level = level;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" level %d ret %d", SCSB_TRACE_ARGS, __entry->level,
|
||||
__entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_advance_dirty_super,
|
||||
TP_PROTO(struct super_block *sb, __u64 seq),
|
||||
|
||||
@@ -1109,130 +806,6 @@ TRACE_EVENT(scoutfs_dir_add_next_linkref,
|
||||
__entry->found_dir_ino, __entry->name_len)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_client_compact_start,
|
||||
TP_PROTO(struct super_block *sb, u64 id, u8 last_level, u8 flags),
|
||||
|
||||
TP_ARGS(sb, id, last_level, flags),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, id)
|
||||
__field(__u8, last_level)
|
||||
__field(__u8, flags)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->id = id;
|
||||
__entry->last_level = last_level;
|
||||
__entry->flags = flags;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" id %llu last_level %u flags 0x%x",
|
||||
SCSB_TRACE_ARGS, __entry->id, __entry->last_level,
|
||||
__entry->flags)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_client_compact_stop,
|
||||
TP_PROTO(struct super_block *sb, u64 id, int ret),
|
||||
|
||||
TP_ARGS(sb, id, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, id)
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->id = id;
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" id %llu ret %d",
|
||||
SCSB_TRACE_ARGS, __entry->id, __entry->ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_server_compact_start,
|
||||
TP_PROTO(struct super_block *sb, u64 id, u8 level, u64 rid,
|
||||
unsigned long client_nr, unsigned long server_nr,
|
||||
unsigned long per_client),
|
||||
|
||||
TP_ARGS(sb, id, level, rid, client_nr, server_nr, per_client),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, id)
|
||||
__field(__u8, level)
|
||||
__field(__u64, c_rid)
|
||||
__field(unsigned long, client_nr)
|
||||
__field(unsigned long, server_nr)
|
||||
__field(unsigned long, per_client)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->id = id;
|
||||
__entry->level = level;
|
||||
__entry->c_rid = rid;
|
||||
__entry->client_nr = client_nr;
|
||||
__entry->server_nr = server_nr;
|
||||
__entry->per_client = per_client;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" id %llu level %u rid %016llx client_nr %lu server_nr %lu per_client %lu",
|
||||
SCSB_TRACE_ARGS, __entry->id, __entry->level,
|
||||
__entry->c_rid, __entry->client_nr, __entry->server_nr,
|
||||
__entry->per_client)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_server_compact_done,
|
||||
TP_PROTO(struct super_block *sb, u64 id, u64 rid,
|
||||
unsigned long server_nr),
|
||||
|
||||
TP_ARGS(sb, id, rid, server_nr),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, id)
|
||||
__field(__u64, c_rid)
|
||||
__field(unsigned long, server_nr)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->id = id;
|
||||
__entry->rid = rid;
|
||||
__entry->server_nr = server_nr;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" id %llu rid %016llx server_nr %lu",
|
||||
SCSB_TRACE_ARGS, __entry->id, __entry->c_rid,
|
||||
__entry->server_nr)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_server_compact_response,
|
||||
TP_PROTO(struct super_block *sb, u64 id, int error),
|
||||
|
||||
TP_ARGS(sb, id, error),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, id)
|
||||
__field(int, error)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->id = id;
|
||||
__entry->error = error;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" id %llu error %d",
|
||||
SCSB_TRACE_ARGS, __entry->id, __entry->error)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_write_begin,
|
||||
TP_PROTO(struct super_block *sb, u64 ino, loff_t pos, unsigned len),
|
||||
|
||||
@@ -1382,89 +955,6 @@ TRACE_EVENT(scoutfs_scan_orphans,
|
||||
TP_printk("dev %d,%d", MAJOR(__entry->dev), MINOR(__entry->dev))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_manifest_class,
|
||||
TP_PROTO(struct super_block *sb, u8 level, u64 segno, u64 seq,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last),
|
||||
TP_ARGS(sb, level, segno, seq, first, last),
|
||||
TP_STRUCT__entry(
|
||||
__field(u8, level)
|
||||
__field(u64, segno)
|
||||
__field(u64, seq)
|
||||
sk_trace_define(first)
|
||||
sk_trace_define(last)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->level = level;
|
||||
__entry->segno = segno;
|
||||
__entry->seq = seq;
|
||||
sk_trace_assign(first, first);
|
||||
sk_trace_assign(last, last);
|
||||
),
|
||||
TP_printk("level %u segno %llu seq %llu first "SK_FMT" last "SK_FMT,
|
||||
__entry->level, __entry->segno, __entry->seq,
|
||||
sk_trace_args(first), sk_trace_args(last))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_manifest_class, scoutfs_manifest_add,
|
||||
TP_PROTO(struct super_block *sb, u8 level, u64 segno, u64 seq,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last),
|
||||
TP_ARGS(sb, level, segno, seq, first, last)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_manifest_class, scoutfs_manifest_delete,
|
||||
TP_PROTO(struct super_block *sb, u8 level, u64 segno, u64 seq,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last),
|
||||
TP_ARGS(sb, level, segno, seq, first, last)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_manifest_class, scoutfs_compact_input,
|
||||
TP_PROTO(struct super_block *sb, u8 level, u64 segno, u64 seq,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last),
|
||||
TP_ARGS(sb, level, segno, seq, first, last)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_manifest_class, scoutfs_compact_output,
|
||||
TP_PROTO(struct super_block *sb, u8 level, u64 segno, u64 seq,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last),
|
||||
TP_ARGS(sb, level, segno, seq, first, last)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_manifest_class, scoutfs_read_item_segment,
|
||||
TP_PROTO(struct super_block *sb, u8 level, u64 segno, u64 seq,
|
||||
struct scoutfs_key *first, struct scoutfs_key *last),
|
||||
TP_ARGS(sb, level, segno, seq, first, last)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_read_item_keys,
|
||||
TP_PROTO(struct super_block *sb,
|
||||
struct scoutfs_key *key,
|
||||
struct scoutfs_key *start,
|
||||
struct scoutfs_key *end,
|
||||
struct scoutfs_key *seg_start,
|
||||
struct scoutfs_key *seg_end),
|
||||
TP_ARGS(sb, key, start, end, seg_start, seg_end),
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
sk_trace_define(key)
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
sk_trace_define(seg_start)
|
||||
sk_trace_define(seg_end)
|
||||
),
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
sk_trace_assign(key, key);
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
sk_trace_assign(seg_start, seg_start);
|
||||
sk_trace_assign(seg_end, seg_end);
|
||||
),
|
||||
TP_printk(SCSBF" key "SK_FMT" start "SK_FMT" end "SK_FMT" seg_start "SK_FMT" seg_end "SK_FMT"",
|
||||
SCSB_TRACE_ARGS, sk_trace_args(key), sk_trace_args(start),
|
||||
sk_trace_args(end), sk_trace_args(seg_start),
|
||||
sk_trace_args(seg_end))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_key_class,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key),
|
||||
TP_ARGS(sb, key),
|
||||
@@ -1479,143 +969,11 @@ DECLARE_EVENT_CLASS(scoutfs_key_class,
|
||||
TP_printk(SCSBF" key "SK_FMT, SCSB_TRACE_ARGS, sk_trace_args(key))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_key_class, scoutfs_item_lookup,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key),
|
||||
TP_ARGS(sb, key)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_item_lookup_ret,
|
||||
TP_PROTO(struct super_block *sb, int ret),
|
||||
|
||||
TP_ARGS(sb, ret),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(int, ret)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->ret = ret;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" ret %d", SCSB_TRACE_ARGS, __entry->ret)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_key_class, scoutfs_item_insertion,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key),
|
||||
TP_ARGS(sb, key)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_key_class, scoutfs_item_shrink,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key),
|
||||
TP_ARGS(sb, key)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_key_class, scoutfs_xattr_get_next_key,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *key),
|
||||
TP_ARGS(sb, key)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_range_class,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end),
|
||||
TP_ARGS(sb, start, end),
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
),
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
),
|
||||
TP_printk(SCSBF" start "SK_FMT" end "SK_FMT,
|
||||
SCSB_TRACE_ARGS, sk_trace_args(start), sk_trace_args(end))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_range_class, scoutfs_item_insert_batch,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end),
|
||||
TP_ARGS(sb, start, end)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_range_class, scoutfs_item_invalidate_range,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end),
|
||||
TP_ARGS(sb, start, end)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_cached_range_class,
|
||||
TP_PROTO(struct super_block *sb, void *rng,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, rng, start, end),
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, rng)
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(end)
|
||||
),
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->rng = rng;
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(end, end);
|
||||
),
|
||||
TP_printk(SCSBF" rng %p start "SK_FMT" end "SK_FMT,
|
||||
SCSB_TRACE_ARGS, __entry->rng, sk_trace_args(start),
|
||||
sk_trace_args(end))
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_cached_range_class, scoutfs_item_range_free,
|
||||
TP_PROTO(struct super_block *sb, void *rng,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, rng, start, end)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_cached_range_class, scoutfs_item_range_ins_rb_insert,
|
||||
TP_PROTO(struct super_block *sb, void *rng,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, rng, start, end)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_cached_range_class, scoutfs_item_range_remove_mid_left,
|
||||
TP_PROTO(struct super_block *sb, void *rng,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, rng, start, end)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_cached_range_class, scoutfs_item_range_remove_start,
|
||||
TP_PROTO(struct super_block *sb, void *rng,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, rng, start, end)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_cached_range_class, scoutfs_item_range_remove_end,
|
||||
TP_PROTO(struct super_block *sb, void *rng,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, rng, start, end)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_cached_range_class, scoutfs_item_range_rem_rb_insert,
|
||||
TP_PROTO(struct super_block *sb, void *rng,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, rng, start, end)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_cached_range_class, scoutfs_item_range_shrink_start,
|
||||
TP_PROTO(struct super_block *sb, void *rng,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, rng, start, end)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_cached_range_class, scoutfs_item_range_shrink_end,
|
||||
TP_PROTO(struct super_block *sb, void *rng,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end),
|
||||
TP_ARGS(sb, rng, start, end)
|
||||
);
|
||||
|
||||
#define lock_mode(mode) \
|
||||
__print_symbolic(mode, \
|
||||
{ SCOUTFS_LOCK_NULL, "NULL" }, \
|
||||
@@ -1708,79 +1066,6 @@ DEFINE_EVENT(scoutfs_lock_class, scoutfs_lock_shrink,
|
||||
TP_ARGS(sb, lck)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_seg_class,
|
||||
TP_PROTO(struct scoutfs_segment *seg),
|
||||
TP_ARGS(seg),
|
||||
TP_STRUCT__entry(
|
||||
__field(unsigned int, major)
|
||||
__field(unsigned int, minor)
|
||||
__field(struct scoutfs_segment *, seg)
|
||||
__field(int, refcount)
|
||||
__field(u64, segno)
|
||||
__field(unsigned long, flags)
|
||||
__field(int, err)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->major = MAJOR(seg->sb->s_bdev->bd_dev);
|
||||
__entry->minor = MINOR(seg->sb->s_bdev->bd_dev);
|
||||
__entry->seg = seg;
|
||||
__entry->refcount = atomic_read(&seg->refcount);
|
||||
__entry->segno = seg->segno;
|
||||
__entry->flags = seg->flags;
|
||||
__entry->err = seg->err;
|
||||
),
|
||||
TP_printk("dev %u:%u seg %p refcount %d segno %llu flags %lx err %d",
|
||||
__entry->major, __entry->minor, __entry->seg, __entry->refcount,
|
||||
__entry->segno, __entry->flags, __entry->err)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_seg_class, scoutfs_seg_alloc,
|
||||
TP_PROTO(struct scoutfs_segment *seg),
|
||||
TP_ARGS(seg)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_seg_class, scoutfs_seg_shrink,
|
||||
TP_PROTO(struct scoutfs_segment *seg),
|
||||
TP_ARGS(seg)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_seg_class, scoutfs_seg_free,
|
||||
TP_PROTO(struct scoutfs_segment *seg),
|
||||
TP_ARGS(seg)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_seg_append_item,
|
||||
TP_PROTO(struct super_block *sb, u64 segno, u64 seq, u32 nr_items,
|
||||
u32 total_bytes, struct scoutfs_key *key, u16 val_len),
|
||||
|
||||
TP_ARGS(sb, segno, seq, nr_items, total_bytes, key, val_len),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__u64, segno)
|
||||
__field(__u64, seq)
|
||||
__field(__u32, nr_items)
|
||||
__field(__u32, total_bytes)
|
||||
sk_trace_define(key)
|
||||
__field(__u16, val_len)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->segno = segno;
|
||||
__entry->seq = seq;
|
||||
__entry->nr_items = nr_items;
|
||||
__entry->total_bytes = total_bytes;
|
||||
sk_trace_assign(key, key);
|
||||
__entry->val_len = val_len;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" segno %llu seq %llu nr_items %u total_bytes %u key "SK_FMT" val_len %u",
|
||||
SCSB_TRACE_ARGS, __entry->segno, __entry->seq,
|
||||
__entry->nr_items, __entry->total_bytes,
|
||||
sk_trace_args(key), __entry->val_len)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_net_class,
|
||||
TP_PROTO(struct super_block *sb, struct sockaddr_in *name,
|
||||
struct sockaddr_in *peer, struct scoutfs_net_header *nh),
|
||||
@@ -1961,14 +1246,6 @@ DEFINE_EVENT(scoutfs_work_class, scoutfs_server_commit_work_exit,
|
||||
TP_PROTO(struct super_block *sb, u64 data, int ret),
|
||||
TP_ARGS(sb, data, ret)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_work_class, scoutfs_server_compact_work_enter,
|
||||
TP_PROTO(struct super_block *sb, u64 data, int ret),
|
||||
TP_ARGS(sb, data, ret)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_work_class, scoutfs_server_compact_work_exit,
|
||||
TP_PROTO(struct super_block *sb, u64 data, int ret),
|
||||
TP_ARGS(sb, data, ret)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_work_class, scoutfs_net_proc_work_enter,
|
||||
TP_PROTO(struct super_block *sb, u64 data, int ret),
|
||||
TP_ARGS(sb, data, ret)
|
||||
@@ -2054,66 +1331,6 @@ DEFINE_EVENT(scoutfs_work_class, scoutfs_data_return_server_extents_exit,
|
||||
TP_ARGS(sb, data, ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_item_next_range_check,
|
||||
TP_PROTO(struct super_block *sb, int cached,
|
||||
struct scoutfs_key *key, struct scoutfs_key *pos,
|
||||
struct scoutfs_key *last, struct scoutfs_key *end,
|
||||
struct scoutfs_key *range_end),
|
||||
TP_ARGS(sb, cached, key, pos, last, end, range_end),
|
||||
TP_STRUCT__entry(
|
||||
__field(void *, sb)
|
||||
__field(int, cached)
|
||||
sk_trace_define(key)
|
||||
sk_trace_define(pos)
|
||||
sk_trace_define(last)
|
||||
sk_trace_define(end)
|
||||
sk_trace_define(range_end)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->sb = sb;
|
||||
__entry->cached = cached;
|
||||
sk_trace_assign(key, key);
|
||||
sk_trace_assign(pos, pos);
|
||||
sk_trace_assign(last, last);
|
||||
sk_trace_assign(end, end);
|
||||
sk_trace_assign(range_end, range_end);
|
||||
),
|
||||
TP_printk("sb %p cached %d key "SK_FMT" pos "SK_FMT" last "SK_FMT" end "SK_FMT" range_end "SK_FMT,
|
||||
__entry->sb, __entry->cached, sk_trace_args(key),
|
||||
sk_trace_args(pos), sk_trace_args(last),
|
||||
sk_trace_args(end), sk_trace_args(range_end))
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_item_prev_range_check,
|
||||
TP_PROTO(struct super_block *sb, int cached,
|
||||
struct scoutfs_key *key, struct scoutfs_key *pos,
|
||||
struct scoutfs_key *first, struct scoutfs_key *start,
|
||||
struct scoutfs_key *range_start),
|
||||
TP_ARGS(sb, cached, key, pos, first, start, range_start),
|
||||
TP_STRUCT__entry(
|
||||
__field(void *, sb)
|
||||
__field(int, cached)
|
||||
sk_trace_define(key)
|
||||
sk_trace_define(pos)
|
||||
sk_trace_define(first)
|
||||
sk_trace_define(start)
|
||||
sk_trace_define(range_start)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->sb = sb;
|
||||
__entry->cached = cached;
|
||||
sk_trace_assign(key, key);
|
||||
sk_trace_assign(pos, pos);
|
||||
sk_trace_assign(first, first);
|
||||
sk_trace_assign(start, start);
|
||||
sk_trace_assign(range_start, range_start);
|
||||
),
|
||||
TP_printk("sb %p cached %d key "SK_FMT" pos "SK_FMT" first "SK_FMT" start "SK_FMT" range_start "SK_FMT,
|
||||
__entry->sb, __entry->cached, sk_trace_args(key),
|
||||
sk_trace_args(pos), sk_trace_args(first),
|
||||
sk_trace_args(start), sk_trace_args(range_start))
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_shrink_exit_class,
|
||||
TP_PROTO(struct super_block *sb, unsigned long nr_to_scan, int ret),
|
||||
TP_ARGS(sb, nr_to_scan, ret),
|
||||
@@ -2136,50 +1353,6 @@ DEFINE_EVENT(scoutfs_shrink_exit_class, scoutfs_lock_shrink_exit,
|
||||
TP_ARGS(sb, nr_to_scan, ret)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_shrink_exit_class, scoutfs_seg_shrink_exit,
|
||||
TP_PROTO(struct super_block *sb, unsigned long nr_to_scan, int ret),
|
||||
TP_ARGS(sb, nr_to_scan, ret)
|
||||
);
|
||||
|
||||
DEFINE_EVENT(scoutfs_shrink_exit_class, scoutfs_item_shrink_exit,
|
||||
TP_PROTO(struct super_block *sb, unsigned long nr_to_scan, int ret),
|
||||
TP_ARGS(sb, nr_to_scan, ret)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_item_shrink_around,
|
||||
TP_PROTO(struct super_block *sb,
|
||||
struct scoutfs_key *rng_start,
|
||||
struct scoutfs_key *rng_end, struct scoutfs_key *item,
|
||||
struct scoutfs_key *prev, struct scoutfs_key *first,
|
||||
struct scoutfs_key *last, struct scoutfs_key *next),
|
||||
TP_ARGS(sb, rng_start, rng_end, item, prev, first, last, next),
|
||||
TP_STRUCT__entry(
|
||||
__field(void *, sb)
|
||||
sk_trace_define(rng_start)
|
||||
sk_trace_define(rng_end)
|
||||
sk_trace_define(item)
|
||||
sk_trace_define(prev)
|
||||
sk_trace_define(first)
|
||||
sk_trace_define(last)
|
||||
sk_trace_define(next)
|
||||
),
|
||||
TP_fast_assign(
|
||||
__entry->sb = sb;
|
||||
sk_trace_assign(rng_start, rng_start);
|
||||
sk_trace_assign(rng_end, rng_end);
|
||||
sk_trace_assign(item, item);
|
||||
sk_trace_assign(prev, prev);
|
||||
sk_trace_assign(first, first);
|
||||
sk_trace_assign(last, last);
|
||||
sk_trace_assign(next, next);
|
||||
),
|
||||
TP_printk("sb %p rng_start "SK_FMT" rng_end "SK_FMT" item "SK_FMT" prev "SK_FMT" first "SK_FMT" last "SK_FMT" next "SK_FMT,
|
||||
__entry->sb, sk_trace_args(rng_start),
|
||||
sk_trace_args(rng_end), sk_trace_args(item),
|
||||
sk_trace_args(prev), sk_trace_args(first),
|
||||
sk_trace_args(last), sk_trace_args(next))
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_rename,
|
||||
TP_PROTO(struct super_block *sb, struct inode *old_dir,
|
||||
struct dentry *old_dentry, struct inode *new_dir,
|
||||
@@ -2515,14 +1688,6 @@ DEFINE_EVENT(scoutfs_extent_class, scoutfs_server_alloc_extent_allocated,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_extent *ext),
|
||||
TP_ARGS(sb, ext)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_extent_class, scoutfs_server_alloc_segno_next,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_extent *ext),
|
||||
TP_ARGS(sb, ext)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_extent_class, scoutfs_server_alloc_segno_allocated,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_extent *ext),
|
||||
TP_ARGS(sb, ext)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_extent_class, scoutfs_server_free_pending_extent,
|
||||
TP_PROTO(struct super_block *sb, struct scoutfs_extent *ext),
|
||||
TP_ARGS(sb, ext)
|
||||
@@ -2559,37 +1724,6 @@ TRACE_EVENT(scoutfs_online_offline_blocks,
|
||||
__entry->on_now, __entry->off_now)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_segno_class,
|
||||
TP_PROTO(struct super_block *sb, u64 segno),
|
||||
|
||||
TP_ARGS(sb, segno),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(__s64, segno)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->segno = segno;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" segno %llu",
|
||||
SCSB_TRACE_ARGS, __entry->segno)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_segno_class, scoutfs_alloc_segno,
|
||||
TP_PROTO(struct super_block *sb, u64 segno),
|
||||
TP_ARGS(sb, segno)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_segno_class, scoutfs_free_segno,
|
||||
TP_PROTO(struct super_block *sb, u64 segno),
|
||||
TP_ARGS(sb, segno)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_segno_class, scoutfs_remove_segno,
|
||||
TP_PROTO(struct super_block *sb, u64 segno),
|
||||
TP_ARGS(sb, segno)
|
||||
);
|
||||
|
||||
DECLARE_EVENT_CLASS(scoutfs_server_client_count_class,
|
||||
TP_PROTO(struct super_block *sb, u64 rid, unsigned long nr_clients),
|
||||
|
||||
|
||||
868
kmod/src/seg.c
868
kmod/src/seg.c
@@ -1,868 +0,0 @@
|
||||
/*
|
||||
* Copyright (C) 2016 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
#include <linux/kernel.h>
|
||||
#include <linux/fs.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/mm.h>
|
||||
#include <linux/crc32c.h>
|
||||
|
||||
#include "super.h"
|
||||
#include "format.h"
|
||||
#include "seg.h"
|
||||
#include "bio.h"
|
||||
#include "kvec.h"
|
||||
#include "cmp.h"
|
||||
#include "manifest.h"
|
||||
#include "key.h"
|
||||
#include "counters.h"
|
||||
#include "triggers.h"
|
||||
#include "msg.h"
|
||||
#include "server.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
* seg.c should just be about the cache and io, and maybe
|
||||
* iteration and stuff.
|
||||
*
|
||||
* XXX:
|
||||
* - lru and shrinker
|
||||
* - verify csum
|
||||
* - make sure item headers don't cross page boundaries
|
||||
* - just wait on pages instead of weird flags?
|
||||
*/
|
||||
|
||||
struct segment_cache {
|
||||
struct super_block *sb;
|
||||
spinlock_t lock;
|
||||
struct rb_root root;
|
||||
wait_queue_head_t waitq;
|
||||
|
||||
struct shrinker shrinker;
|
||||
struct list_head lru_list;
|
||||
unsigned long lru_nr;
|
||||
};
|
||||
|
||||
|
||||
enum {
|
||||
SF_END_IO = 0,
|
||||
SF_CALC_CRC_STARTED,
|
||||
SF_CALC_CRC_DONE,
|
||||
SF_INVALID_CRC,
|
||||
};
|
||||
|
||||
static void *off_ptr(struct scoutfs_segment *seg, u32 off)
|
||||
{
|
||||
unsigned int pg = off >> PAGE_SHIFT;
|
||||
unsigned int pg_off = off & ~PAGE_MASK;
|
||||
|
||||
return page_address(seg->pages[pg]) + pg_off;
|
||||
}
|
||||
|
||||
static struct scoutfs_segment *alloc_seg(struct super_block *sb, u64 segno)
|
||||
{
|
||||
struct scoutfs_segment *seg;
|
||||
struct page *page;
|
||||
int i;
|
||||
|
||||
/* don't waste the tail of pages */
|
||||
BUILD_BUG_ON(SCOUTFS_SEGMENT_SIZE % PAGE_SIZE);
|
||||
|
||||
seg = kzalloc(sizeof(struct scoutfs_segment), GFP_NOFS);
|
||||
if (!seg)
|
||||
return seg;
|
||||
|
||||
seg->sb = sb;
|
||||
RB_CLEAR_NODE(&seg->node);
|
||||
INIT_LIST_HEAD(&seg->lru_entry);
|
||||
atomic_set(&seg->refcount, 1);
|
||||
seg->segno = segno;
|
||||
|
||||
for (i = 0; i < SCOUTFS_SEGMENT_PAGES; i++) {
|
||||
page = alloc_page(GFP_NOFS);
|
||||
if (!page) {
|
||||
scoutfs_seg_put(seg);
|
||||
return ERR_PTR(-ENOMEM);
|
||||
}
|
||||
|
||||
seg->pages[i] = page;
|
||||
}
|
||||
|
||||
trace_scoutfs_seg_alloc(seg);
|
||||
scoutfs_inc_counter(sb, seg_alloc);
|
||||
|
||||
return seg;
|
||||
}
|
||||
|
||||
void scoutfs_seg_get(struct scoutfs_segment *seg)
|
||||
{
|
||||
atomic_inc(&seg->refcount);
|
||||
}
|
||||
|
||||
void scoutfs_seg_put(struct scoutfs_segment *seg)
|
||||
{
|
||||
int i;
|
||||
|
||||
if (!IS_ERR_OR_NULL(seg) && atomic_dec_and_test(&seg->refcount)) {
|
||||
trace_scoutfs_seg_free(seg);
|
||||
scoutfs_inc_counter(seg->sb, seg_free);
|
||||
WARN_ON_ONCE(!RB_EMPTY_NODE(&seg->node));
|
||||
WARN_ON_ONCE(!list_empty(&seg->lru_entry));
|
||||
for (i = 0; i < SCOUTFS_SEGMENT_PAGES; i++)
|
||||
if (seg->pages[i])
|
||||
__free_page(seg->pages[i]);
|
||||
kfree(seg);
|
||||
}
|
||||
}
|
||||
|
||||
static struct scoutfs_segment *find_seg(struct rb_root *root, u64 segno)
|
||||
{
|
||||
struct rb_node *node = root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct scoutfs_segment *seg;
|
||||
int cmp;
|
||||
|
||||
while (node) {
|
||||
parent = node;
|
||||
seg = container_of(node, struct scoutfs_segment, node);
|
||||
|
||||
cmp = scoutfs_cmp_u64s(segno, seg->segno);
|
||||
if (cmp < 0)
|
||||
node = node->rb_left;
|
||||
else if (cmp > 0)
|
||||
node = node->rb_right;
|
||||
else
|
||||
return seg;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void lru_check(struct segment_cache *cac, struct scoutfs_segment *seg)
|
||||
{
|
||||
if (RB_EMPTY_NODE(&seg->node)) {
|
||||
if (!list_empty(&seg->lru_entry)) {
|
||||
list_del_init(&seg->lru_entry);
|
||||
cac->lru_nr--;
|
||||
}
|
||||
} else {
|
||||
if (list_empty(&seg->lru_entry)) {
|
||||
list_add_tail(&seg->lru_entry, &cac->lru_list);
|
||||
cac->lru_nr++;
|
||||
} else {
|
||||
list_move_tail(&seg->lru_entry, &cac->lru_list);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static __le32 calc_seg_crc(struct scoutfs_segment *seg)
|
||||
{
|
||||
u32 total = scoutfs_seg_total_bytes(seg);
|
||||
u32 crc = ~0;
|
||||
u32 off;
|
||||
u32 len;
|
||||
|
||||
off = offsetof(struct scoutfs_segment_block, _padding) +
|
||||
FIELD_SIZEOF(struct scoutfs_segment_block, _padding);
|
||||
|
||||
while (off < total) {
|
||||
len = min(total - off,
|
||||
SCOUTFS_BLOCK_SIZE - (off & SCOUTFS_BLOCK_MASK));
|
||||
crc = crc32c(crc, off_ptr(seg, off), len);
|
||||
off += len;
|
||||
}
|
||||
|
||||
return cpu_to_le32(crc);
|
||||
}
|
||||
|
||||
/*
|
||||
* This always inserts the segment into the rbtree. If there's already
|
||||
* a segment at the given seg then it is removed and returned. The
|
||||
* caller doesn't have to erase it from the tree if it's returned but it
|
||||
* does have to put the reference that it's given.
|
||||
*/
|
||||
static struct scoutfs_segment *replace_seg(struct segment_cache *cac,
|
||||
struct scoutfs_segment *ins)
|
||||
{
|
||||
struct rb_root *root = &cac->root;
|
||||
struct rb_node **node = &root->rb_node;
|
||||
struct rb_node *parent = NULL;
|
||||
struct scoutfs_segment *seg;
|
||||
struct scoutfs_segment *found = NULL;
|
||||
int cmp;
|
||||
|
||||
while (*node) {
|
||||
parent = *node;
|
||||
seg = container_of(*node, struct scoutfs_segment, node);
|
||||
|
||||
cmp = scoutfs_cmp_u64s(ins->segno, seg->segno);
|
||||
if (cmp < 0) {
|
||||
node = &(*node)->rb_left;
|
||||
} else if (cmp > 0) {
|
||||
node = &(*node)->rb_right;
|
||||
} else {
|
||||
rb_replace_node(&seg->node, &ins->node, root);
|
||||
RB_CLEAR_NODE(&seg->node);
|
||||
lru_check(cac, seg);
|
||||
lru_check(cac, ins);
|
||||
found = seg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!found) {
|
||||
rb_link_node(&ins->node, parent, node);
|
||||
rb_insert_color(&ins->node, root);
|
||||
lru_check(cac, ins);
|
||||
}
|
||||
|
||||
return found;
|
||||
}
|
||||
|
||||
static bool erase_seg(struct segment_cache *cac, struct scoutfs_segment *seg)
|
||||
{
|
||||
if (!RB_EMPTY_NODE(&seg->node)) {
|
||||
rb_erase(&seg->node, &cac->root);
|
||||
RB_CLEAR_NODE(&seg->node);
|
||||
lru_check(cac, seg);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void seg_end_io(struct super_block *sb, void *data, int err)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct segment_cache *cac = sbi->segment_cache;
|
||||
struct scoutfs_segment *seg = data;
|
||||
unsigned long flags;
|
||||
bool erased = false;
|
||||
|
||||
spin_lock_irqsave(&cac->lock, flags);
|
||||
|
||||
set_bit(SF_END_IO, &seg->flags);
|
||||
|
||||
if (err) {
|
||||
seg->err = err;
|
||||
erased = erase_seg(cac, seg);
|
||||
} else {
|
||||
lru_check(cac, seg);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&cac->lock, flags);
|
||||
|
||||
smp_mb__after_atomic();
|
||||
if (waitqueue_active(&cac->waitq))
|
||||
wake_up(&cac->waitq);
|
||||
|
||||
if (erased)
|
||||
scoutfs_seg_put(seg);
|
||||
scoutfs_seg_put(seg);
|
||||
}
|
||||
|
||||
static u64 segno_to_blkno(u64 blkno)
|
||||
{
|
||||
return blkno << (SCOUTFS_SEGMENT_SHIFT - SCOUTFS_BLOCK_SHIFT);
|
||||
}
|
||||
|
||||
int scoutfs_seg_alloc(struct super_block *sb, u64 segno,
|
||||
struct scoutfs_segment **seg_ret)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct segment_cache *cac = sbi->segment_cache;
|
||||
struct scoutfs_segment *existing;
|
||||
struct scoutfs_segment *seg;
|
||||
unsigned long flags;
|
||||
int ret;
|
||||
|
||||
seg = alloc_seg(sb, segno);
|
||||
if (!seg) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* reads shouldn't wait for this */
|
||||
set_bit(SF_END_IO, &seg->flags);
|
||||
|
||||
/* zero the block header so the caller knows to initialize */
|
||||
memset(page_address(seg->pages[0]), 0,
|
||||
sizeof(struct scoutfs_segment_block));
|
||||
|
||||
/* XXX always remove existing segs, is that necessary? */
|
||||
spin_lock_irqsave(&cac->lock, flags);
|
||||
|
||||
atomic_inc(&seg->refcount);
|
||||
existing = replace_seg(cac, seg);
|
||||
spin_unlock_irqrestore(&cac->lock, flags);
|
||||
if (existing)
|
||||
scoutfs_seg_put(existing);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
*seg_ret = seg;
|
||||
return ret;
|
||||
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* The bios submitted by this don't have page references themselves. If
|
||||
* this succeeds then the caller must call _wait before putting their
|
||||
* seg ref.
|
||||
*/
|
||||
struct scoutfs_segment *scoutfs_seg_submit_read(struct super_block *sb,
|
||||
u64 segno)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct segment_cache *cac = sbi->segment_cache;
|
||||
struct scoutfs_segment *existing;
|
||||
struct scoutfs_segment *seg;
|
||||
unsigned long flags;
|
||||
|
||||
trace_scoutfs_seg_submit_read(sb, segno);
|
||||
|
||||
spin_lock_irqsave(&cac->lock, flags);
|
||||
seg = find_seg(&cac->root, segno);
|
||||
if (seg) {
|
||||
lru_check(cac, seg);
|
||||
atomic_inc(&seg->refcount);
|
||||
}
|
||||
spin_unlock_irqrestore(&cac->lock, flags);
|
||||
if (seg)
|
||||
return seg;
|
||||
|
||||
seg = alloc_seg(sb, segno);
|
||||
if (IS_ERR(seg))
|
||||
return seg;
|
||||
|
||||
/* always drop existing segs, could compare seqs */
|
||||
spin_lock_irqsave(&cac->lock, flags);
|
||||
atomic_inc(&seg->refcount);
|
||||
existing = replace_seg(cac, seg);
|
||||
spin_unlock_irqrestore(&cac->lock, flags);
|
||||
if (existing)
|
||||
scoutfs_seg_put(existing);
|
||||
|
||||
atomic_inc(&seg->refcount);
|
||||
scoutfs_bio_submit(sb, READ, seg->pages, segno_to_blkno(seg->segno),
|
||||
SCOUTFS_SEGMENT_BLOCKS, seg_end_io, seg);
|
||||
|
||||
return seg;
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller has ensured that the segment won't be modified while
|
||||
* it is in flight.
|
||||
*/
|
||||
int scoutfs_seg_submit_write(struct super_block *sb,
|
||||
struct scoutfs_segment *seg,
|
||||
struct scoutfs_bio_completion *comp)
|
||||
{
|
||||
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
|
||||
|
||||
trace_scoutfs_seg_submit_write(sb, seg->segno);
|
||||
|
||||
sblk->crc = calc_seg_crc(seg);
|
||||
|
||||
scoutfs_bio_submit_comp(sb, WRITE, seg->pages,
|
||||
segno_to_blkno(seg->segno),
|
||||
SCOUTFS_SEGMENT_BLOCKS, comp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Wait for IO on the segment to complete.
|
||||
*
|
||||
* The caller provides the segno and seq from their segment reference to
|
||||
* validate that we found the version of the segment that they were
|
||||
* looking for. If we find an old cached version we return -ESTALE and
|
||||
* the caller has to retry its reference to find the current segment for
|
||||
* its operation. (Typically by getting a new manifest btree root and
|
||||
* searching for keys in the manifest.)
|
||||
*
|
||||
* An invalid crc can be racing to read a stale segment while it's being
|
||||
* written. The caller will retry and consider it corrupt if it keeps
|
||||
* getting stale reads.
|
||||
*/
|
||||
int scoutfs_seg_wait(struct super_block *sb, struct scoutfs_segment *seg,
|
||||
u64 segno, u64 seq)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct segment_cache *cac = sbi->segment_cache;
|
||||
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
|
||||
unsigned long flags;
|
||||
bool erased;
|
||||
int ret;
|
||||
|
||||
ret = wait_event_interruptible(cac->waitq,
|
||||
test_bit(SF_END_IO, &seg->flags));
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
if (seg->err) {
|
||||
ret = seg->err;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* calc crc in waiting task instead of end_io */
|
||||
if (!test_bit(SF_CALC_CRC_DONE, &seg->flags) &&
|
||||
!test_and_set_bit(SF_CALC_CRC_STARTED, &seg->flags)) {
|
||||
if (sblk->crc != calc_seg_crc(seg)) {
|
||||
scoutfs_inc_counter(sb, seg_csum_error);
|
||||
set_bit(SF_INVALID_CRC, &seg->flags);
|
||||
}
|
||||
set_bit(SF_CALC_CRC_DONE, &seg->flags);
|
||||
wake_up(&cac->waitq);
|
||||
}
|
||||
|
||||
/* very rarely race waiting for calc to finish */
|
||||
ret = wait_event_interruptible(cac->waitq,
|
||||
test_bit(SF_CALC_CRC_DONE, &seg->flags));
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
sblk = off_ptr(seg, 0);
|
||||
|
||||
if (test_bit(SF_INVALID_CRC, &seg->flags) ||
|
||||
segno != le64_to_cpu(sblk->segno) ||
|
||||
seq != le64_to_cpu(sblk->seq) ||
|
||||
scoutfs_trigger(sb, SEG_STALE_READ)) {
|
||||
spin_lock_irqsave(&cac->lock, flags);
|
||||
erased = erase_seg(cac, seg);
|
||||
spin_unlock_irqrestore(&cac->lock, flags);
|
||||
if (erased)
|
||||
scoutfs_seg_put(seg);
|
||||
|
||||
scoutfs_inc_counter(sb, seg_stale_read);
|
||||
ret = -ESTALE;
|
||||
}
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static u32 item_bytes(u8 nr_links, u16 val_len)
|
||||
{
|
||||
return offsetof(struct scoutfs_segment_item, skip_links[nr_links]) +
|
||||
val_len;
|
||||
}
|
||||
|
||||
static inline void *item_val_ptr(struct scoutfs_segment_item *item)
|
||||
{
|
||||
return (void *)item + item_bytes(item->nr_links, 0);
|
||||
}
|
||||
|
||||
/* copy the item key into the caller's key and init their val to ref the val */
|
||||
static void get_item_key_val(struct scoutfs_segment *seg, int off,
|
||||
struct scoutfs_key *key, struct kvec *val)
|
||||
{
|
||||
struct scoutfs_segment_item *item = off_ptr(seg, off);
|
||||
|
||||
if (key)
|
||||
*key = item->key;
|
||||
|
||||
if (val)
|
||||
kvec_init(val, item_val_ptr(item), le16_to_cpu(item->val_len));
|
||||
}
|
||||
|
||||
static void first_last_keys(struct scoutfs_segment *seg,
|
||||
struct scoutfs_key *first,
|
||||
struct scoutfs_key *last)
|
||||
{
|
||||
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
|
||||
|
||||
get_item_key_val(seg, sizeof(struct scoutfs_segment_block),
|
||||
first, NULL);
|
||||
get_item_key_val(seg, le32_to_cpu(sblk->last_item_off), last, NULL);
|
||||
}
|
||||
|
||||
static int check_caller_off(struct scoutfs_segment_block *sblk, int off)
|
||||
{
|
||||
if (off >= 0 && off < sizeof(struct scoutfs_segment_block))
|
||||
off = sizeof(struct scoutfs_segment_block);
|
||||
|
||||
if (off > le32_to_cpu(sblk->last_item_off))
|
||||
off = -ENOENT;
|
||||
|
||||
return off;
|
||||
}
|
||||
|
||||
/*
|
||||
* Give the caller the key and value of the item at the given offset.
|
||||
*
|
||||
* Negative offsets are sticky errors and offsets outside the used bytes
|
||||
* in the segment return -ENOENT;
|
||||
*
|
||||
* All other offsets must be initial values less than the segment header
|
||||
* size, notably including 0, or returned from _next_off().
|
||||
*/
|
||||
int scoutfs_seg_get_item(struct scoutfs_segment *seg, int off,
|
||||
struct scoutfs_key *key, struct kvec *val,u8 *flags)
|
||||
{
|
||||
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
|
||||
struct scoutfs_segment_item *item;
|
||||
|
||||
off = check_caller_off(sblk, off);
|
||||
if (off < 0)
|
||||
return off;
|
||||
|
||||
get_item_key_val(seg, off, key, val);
|
||||
|
||||
if (flags) {
|
||||
item = off_ptr(seg, off);
|
||||
*flags = item->flags;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the number of links that the *next* added node should have.
|
||||
* We're appending in order so we can use the low bits of the node count
|
||||
* to get an ideal distribution of the number of links to enable (log n)
|
||||
* searching: of links in each node. Half of the nodes will have 1
|
||||
* links, a quarter will have 2, an eighth will have 3, and so on.
|
||||
*/
|
||||
static u8 skip_next_nr(u32 nr_items)
|
||||
{
|
||||
return ffs(nr_items + 1);
|
||||
}
|
||||
|
||||
/* The highest 1-based set bit is the max number of links any node can have */
|
||||
static u8 skip_most_nr(u32 nr_items)
|
||||
{
|
||||
return fls(nr_items);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find offset of the first item in the segment whose key is greater
|
||||
* than or equal to the search key. -ENOENT is returned if there's no
|
||||
* item that matches.
|
||||
*
|
||||
* This is a standard skip list search from the segment block through
|
||||
* the items. Follow high less frequent links while the key is greater
|
||||
* than the items and descend down to lower more frequent links when the
|
||||
* search key is less.
|
||||
*/
|
||||
int scoutfs_seg_find_off(struct scoutfs_segment *seg, struct scoutfs_key *key)
|
||||
{
|
||||
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
|
||||
struct scoutfs_segment_item *item;
|
||||
__le32 *links;
|
||||
int cmp;
|
||||
int ret;
|
||||
int i;
|
||||
int off;
|
||||
|
||||
links = sblk->skip_links;
|
||||
ret = -ENOENT;
|
||||
for (i = skip_most_nr(le32_to_cpu(sblk->nr_items)) - 1; i >= 0; i--) {
|
||||
if (links[i] == 0)
|
||||
continue;
|
||||
|
||||
off = le32_to_cpu(links[i]);
|
||||
item = off_ptr(seg, off);
|
||||
|
||||
cmp = scoutfs_key_compare(key, &item->key);
|
||||
if (cmp == 0) {
|
||||
ret = off;
|
||||
break;
|
||||
}
|
||||
|
||||
if (cmp > 0) {
|
||||
links = item->skip_links;
|
||||
i++;
|
||||
} else {
|
||||
ret = off;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the offset of the next item after the current item. The input offset
|
||||
* must be a valid offset from _find_off().
|
||||
*/
|
||||
int scoutfs_seg_next_off(struct scoutfs_segment *seg, int off)
|
||||
{
|
||||
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
|
||||
struct scoutfs_segment_item *item;
|
||||
|
||||
off = check_caller_off(sblk, off);
|
||||
if (off > 0) {
|
||||
item = off_ptr(seg, off);
|
||||
off = le32_to_cpu(item->skip_links[0]);
|
||||
if (off == 0)
|
||||
off = -ENOENT;
|
||||
}
|
||||
return off;
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the count of bytes of the segment actually used.
|
||||
*/
|
||||
u32 scoutfs_seg_total_bytes(struct scoutfs_segment *seg)
|
||||
{
|
||||
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
|
||||
|
||||
return le32_to_cpu(sblk->total_bytes);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns true if the given item population will fit in a single
|
||||
* segment.
|
||||
*
|
||||
* We don't have items cross block boundaries. It would be too
|
||||
* expensive to maintain packing of sorted dirty items in bins. Instead
|
||||
* we assume that we'll lose the worst case largest possible item on every
|
||||
* block transition. This will almost never be the case. This causes us
|
||||
* to lose around 15% of space for level 0 segment writes.
|
||||
*
|
||||
* Our pattern of item link counts ensures that there will always be fewer
|
||||
* than two links per item. We assume the worst case items have the
|
||||
* max number of links.
|
||||
*/
|
||||
bool scoutfs_seg_fits_single(u32 nr_items, u32 val_bytes)
|
||||
{
|
||||
u32 header = sizeof(struct scoutfs_segment_block);
|
||||
u32 items = nr_items * item_bytes(2, 0);
|
||||
u32 item_pad = item_bytes(skip_most_nr(nr_items),
|
||||
SCOUTFS_MAX_VAL_SIZE) - 1;
|
||||
u32 padding = (SCOUTFS_SEGMENT_SIZE / SCOUTFS_BLOCK_SIZE) * item_pad;
|
||||
|
||||
return (header + items + val_bytes + padding) <= SCOUTFS_SEGMENT_SIZE;
|
||||
}
|
||||
|
||||
static u32 align_item_off(struct scoutfs_segment *seg, u32 item_off, u32 bytes)
|
||||
{
|
||||
u32 space = SCOUTFS_BLOCK_SIZE - (item_off & SCOUTFS_BLOCK_MASK);
|
||||
|
||||
if (bytes > space) {
|
||||
memset(off_ptr(seg, item_off), 0, space);
|
||||
return item_off + space;
|
||||
}
|
||||
|
||||
return item_off;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Append an item to the segment. The caller always appends items that
|
||||
* have been sorted by their keys. They may not know how many will fit.
|
||||
* We return true if we appended and false if the segment was full.
|
||||
*/
|
||||
bool scoutfs_seg_append_item(struct super_block *sb, struct scoutfs_segment *seg,
|
||||
struct scoutfs_key *key, struct kvec *val,
|
||||
u8 flags, __le32 **links)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
|
||||
struct scoutfs_segment_item *item;
|
||||
struct kvec item_val;
|
||||
u8 nr_links;
|
||||
u32 val_len;
|
||||
u32 bytes;
|
||||
u32 off;
|
||||
int i;
|
||||
|
||||
val_len = val ? val->iov_len : 0;
|
||||
|
||||
/* initialize the segment and skip links as the first item is appended */
|
||||
if (sblk->nr_items == 0) {
|
||||
/* XXX the segment block header is a mess, be better */
|
||||
sblk->segno = cpu_to_le64(seg->segno);
|
||||
sblk->seq = super->next_seg_seq;
|
||||
le64_add_cpu(&super->next_seg_seq, 1);
|
||||
sblk->total_bytes = cpu_to_le32(sizeof(*sblk));
|
||||
|
||||
for (i = 0; i < SCOUTFS_MAX_SKIP_LINKS; i++)
|
||||
links[i] = &sblk->skip_links[i];
|
||||
}
|
||||
|
||||
trace_scoutfs_seg_append_item(sb, le64_to_cpu(sblk->segno),
|
||||
le64_to_cpu(sblk->seq),
|
||||
le32_to_cpu(sblk->nr_items),
|
||||
le32_to_cpu(sblk->total_bytes),
|
||||
key, val_len);
|
||||
|
||||
/*
|
||||
* It's very bad data corruption if we write out of order items
|
||||
* to a segment. It'll mislead the key search during read and
|
||||
* stop it from finding its items.
|
||||
*/
|
||||
off = le32_to_cpu(sblk->last_item_off);
|
||||
if (off) {
|
||||
item = off_ptr(seg, off);
|
||||
scoutfs_bug_on(sb, scoutfs_key_compare(key, &item->key) <= 0,
|
||||
"key "SK_FMT" item->key "SK_FMT,
|
||||
SK_ARG(key), SK_ARG(&item->key));
|
||||
}
|
||||
|
||||
nr_links = skip_next_nr(le32_to_cpu(sblk->nr_items));
|
||||
bytes = item_bytes(nr_links, val_len);
|
||||
off = align_item_off(seg, le32_to_cpu(sblk->total_bytes), bytes);
|
||||
|
||||
if ((off + bytes) > SCOUTFS_SEGMENT_SIZE)
|
||||
return false;
|
||||
|
||||
sblk->last_item_off = cpu_to_le32(off);
|
||||
sblk->total_bytes = cpu_to_le32(off + bytes);
|
||||
le32_add_cpu(&sblk->nr_items, 1);
|
||||
|
||||
item = off_ptr(seg, off);
|
||||
item->key = *key;
|
||||
item->val_len = cpu_to_le16(val_len);
|
||||
item->flags = flags;
|
||||
|
||||
/* point the previous skip links at our appended item */
|
||||
item->nr_links = nr_links;
|
||||
for (i = 0; i < nr_links; i++) {
|
||||
item->skip_links[i] = 0;
|
||||
*links[i] = cpu_to_le32(off);
|
||||
links[i] = &item->skip_links[i];
|
||||
}
|
||||
|
||||
get_item_key_val(seg, off, NULL, &item_val);
|
||||
if (val_len)
|
||||
memcpy(item_val.iov_base, val->iov_base, val_len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void scoutfs_seg_init_ment(struct scoutfs_manifest_entry *ment, int level,
|
||||
struct scoutfs_segment *seg)
|
||||
{
|
||||
struct scoutfs_segment_block *sblk = off_ptr(seg, 0);
|
||||
struct scoutfs_key first;
|
||||
struct scoutfs_key last;
|
||||
|
||||
first_last_keys(seg, &first, &last);
|
||||
|
||||
scoutfs_manifest_init_entry(ment, level, le64_to_cpu(sblk->segno),
|
||||
le64_to_cpu(sblk->seq), &first, &last);
|
||||
}
|
||||
|
||||
/*
|
||||
* We maintain an LRU of segments so that the shrinker can free the
|
||||
* oldest under memory pressure. Segments are only present in the LRU
|
||||
* after their IO has completed and while they're in the rbtree. This
|
||||
* shrink only removes them from the rbtree and drops the reference it
|
||||
* held. They may be freed a bit later once all their active references
|
||||
* are dropped.
|
||||
*
|
||||
* If this is called with nr_to_scan == 0 then it only returns the nr.
|
||||
* We avoid acquiring the lock in that case.
|
||||
*
|
||||
* Lookup code only uses the lru entry to change position in the LRU while
|
||||
* the segment is in the rbtree. Once we remove it no one else will use
|
||||
* the LRU entry and we can use it to track all the segments that we're
|
||||
* going to put outside of the lock.
|
||||
*
|
||||
* XXX:
|
||||
* - are sc->nr_to_scan and our return meant to be in units of pages?
|
||||
* - should we sync a transaction here?
|
||||
*/
|
||||
static int seg_lru_shrink(struct shrinker *shrink, struct shrink_control *sc)
|
||||
{
|
||||
struct segment_cache *cac = container_of(shrink, struct segment_cache,
|
||||
shrinker);
|
||||
struct super_block *sb = cac->sb;
|
||||
struct scoutfs_segment *seg;
|
||||
struct scoutfs_segment *tmp;
|
||||
unsigned long flags;
|
||||
unsigned long nr;
|
||||
LIST_HEAD(list);
|
||||
int ret;
|
||||
|
||||
nr = DIV_ROUND_UP(sc->nr_to_scan, SCOUTFS_SEGMENT_PAGES);
|
||||
if (!nr)
|
||||
goto out;
|
||||
|
||||
spin_lock_irqsave(&cac->lock, flags);
|
||||
|
||||
list_for_each_entry_safe(seg, tmp, &cac->lru_list, lru_entry) {
|
||||
/* shouldn't be possible */
|
||||
if (WARN_ON_ONCE(RB_EMPTY_NODE(&seg->node)))
|
||||
continue;
|
||||
|
||||
if (nr-- == 0)
|
||||
break;
|
||||
|
||||
/* using ref that rb tree presence had */
|
||||
erase_seg(cac, seg);
|
||||
list_add_tail(&seg->lru_entry, &list);
|
||||
}
|
||||
|
||||
spin_unlock_irqrestore(&cac->lock, flags);
|
||||
|
||||
list_for_each_entry_safe(seg, tmp, &list, lru_entry) {
|
||||
trace_scoutfs_seg_shrink(seg);
|
||||
scoutfs_inc_counter(sb, seg_shrink);
|
||||
list_del_init(&seg->lru_entry);
|
||||
scoutfs_seg_put(seg);
|
||||
}
|
||||
|
||||
out:
|
||||
ret = min_t(unsigned long, cac->lru_nr * SCOUTFS_SEGMENT_PAGES,
|
||||
INT_MAX);
|
||||
trace_scoutfs_seg_shrink_exit(sb, sc->nr_to_scan, ret);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int scoutfs_seg_setup(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct segment_cache *cac;
|
||||
|
||||
cac = kzalloc(sizeof(struct segment_cache), GFP_KERNEL);
|
||||
if (!cac)
|
||||
return -ENOMEM;
|
||||
sbi->segment_cache = cac;
|
||||
|
||||
cac->sb = sb;
|
||||
spin_lock_init(&cac->lock);
|
||||
cac->root = RB_ROOT;
|
||||
init_waitqueue_head(&cac->waitq);
|
||||
|
||||
cac->shrinker.shrink = seg_lru_shrink;
|
||||
cac->shrinker.seeks = DEFAULT_SEEKS;
|
||||
register_shrinker(&cac->shrinker);
|
||||
INIT_LIST_HEAD(&cac->lru_list);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void scoutfs_seg_destroy(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct segment_cache *cac = sbi->segment_cache;
|
||||
struct scoutfs_segment *seg;
|
||||
struct rb_node *node;
|
||||
|
||||
if (cac) {
|
||||
if (cac->shrinker.shrink == seg_lru_shrink)
|
||||
unregister_shrinker(&cac->shrinker);
|
||||
|
||||
for (node = rb_first(&cac->root); node; ) {
|
||||
seg = container_of(node, struct scoutfs_segment, node);
|
||||
node = rb_next(node);
|
||||
erase_seg(cac, seg);
|
||||
scoutfs_seg_put(seg);
|
||||
}
|
||||
|
||||
kfree(cac);
|
||||
}
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
#ifndef _SCOUTFS_SEG_H_
|
||||
#define _SCOUTFS_SEG_H_
|
||||
|
||||
struct scoutfs_bio_completion;
|
||||
struct scoutfs_key;
|
||||
struct scoutfs_manifest_entry;
|
||||
struct kvec;
|
||||
|
||||
/* this is only visible for trace events */
|
||||
struct scoutfs_segment {
|
||||
struct super_block *sb;
|
||||
struct rb_node node;
|
||||
struct list_head lru_entry;
|
||||
atomic_t refcount;
|
||||
u64 segno;
|
||||
unsigned long flags;
|
||||
int err;
|
||||
struct page *pages[SCOUTFS_SEGMENT_PAGES];
|
||||
};
|
||||
|
||||
struct scoutfs_segment *scoutfs_seg_submit_read(struct super_block *sb,
|
||||
u64 segno);
|
||||
int scoutfs_seg_wait(struct super_block *sb, struct scoutfs_segment *seg,
|
||||
u64 segno, u64 seq);
|
||||
|
||||
int scoutfs_seg_find_off(struct scoutfs_segment *seg, struct scoutfs_key *key);
|
||||
int scoutfs_seg_next_off(struct scoutfs_segment *seg, int off);
|
||||
u32 scoutfs_seg_total_bytes(struct scoutfs_segment *seg);
|
||||
int scoutfs_seg_get_item(struct scoutfs_segment *seg, int off,
|
||||
struct scoutfs_key *key, struct kvec *val, u8 *flags);
|
||||
|
||||
void scoutfs_seg_get(struct scoutfs_segment *seg);
|
||||
void scoutfs_seg_put(struct scoutfs_segment *seg);
|
||||
|
||||
int scoutfs_seg_alloc(struct super_block *sb, u64 segno,
|
||||
struct scoutfs_segment **seg_ret);
|
||||
bool scoutfs_seg_fits_single(u32 nr_items, u32 val_bytes);
|
||||
bool scoutfs_seg_append_item(struct super_block *sb, struct scoutfs_segment *seg,
|
||||
struct scoutfs_key *key, struct kvec *val,
|
||||
u8 flags, __le32 **links);
|
||||
void scoutfs_seg_init_ment(struct scoutfs_manifest_entry *ment, int level,
|
||||
struct scoutfs_segment *seg);
|
||||
|
||||
int scoutfs_seg_submit_write(struct super_block *sb,
|
||||
struct scoutfs_segment *seg,
|
||||
struct scoutfs_bio_completion *comp);
|
||||
|
||||
int scoutfs_seg_setup(struct super_block *sb);
|
||||
void scoutfs_seg_destroy(struct super_block *sb);
|
||||
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
@@ -56,14 +56,6 @@ do { \
|
||||
__entry->name##_data_len, __entry->name##_cmd, __entry->name##_flags, \
|
||||
__entry->name##_error
|
||||
|
||||
struct scoutfs_net_manifest_entry;
|
||||
struct scoutfs_manifest_entry;
|
||||
|
||||
void scoutfs_init_ment_to_net(struct scoutfs_net_manifest_entry *net_ment,
|
||||
struct scoutfs_manifest_entry *ment);
|
||||
void scoutfs_init_ment_from_net(struct scoutfs_manifest_entry *ment,
|
||||
struct scoutfs_net_manifest_entry *net_ment);
|
||||
|
||||
int scoutfs_server_lock_request(struct super_block *sb, u64 rid,
|
||||
struct scoutfs_net_lock *nl);
|
||||
int scoutfs_server_lock_response(struct super_block *sb, u64 rid,
|
||||
|
||||
@@ -33,11 +33,6 @@
|
||||
#include "counters.h"
|
||||
#include "triggers.h"
|
||||
#include "trans.h"
|
||||
#include "item.h"
|
||||
#include "manifest.h"
|
||||
#include "seg.h"
|
||||
#include "bio.h"
|
||||
#include "compact.h"
|
||||
#include "data.h"
|
||||
#include "lock.h"
|
||||
#include "net.h"
|
||||
@@ -197,13 +192,11 @@ static void scoutfs_put_super(struct super_block *sb)
|
||||
scoutfs_lock_shutdown(sb);
|
||||
scoutfs_server_destroy(sb);
|
||||
scoutfs_net_destroy(sb);
|
||||
scoutfs_seg_destroy(sb);
|
||||
scoutfs_lock_destroy(sb);
|
||||
|
||||
/* server clears quorum leader flag during shutdown */
|
||||
scoutfs_quorum_destroy(sb);
|
||||
|
||||
scoutfs_item_destroy(sb);
|
||||
scoutfs_block_destroy(sb);
|
||||
scoutfs_destroy_triggers(sb);
|
||||
scoutfs_options_destroy(sb);
|
||||
@@ -436,8 +429,6 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
|
||||
scoutfs_sysfs_create_attrs(sb, &sbi->mopts_ssa,
|
||||
mount_options_attrs, "mount_options") ?:
|
||||
scoutfs_setup_triggers(sb) ?:
|
||||
scoutfs_seg_setup(sb) ?:
|
||||
scoutfs_item_setup(sb) ?:
|
||||
scoutfs_block_setup(sb) ?:
|
||||
scoutfs_forest_setup(sb) ?:
|
||||
scoutfs_inode_setup(sb) ?:
|
||||
|
||||
@@ -11,10 +11,7 @@
|
||||
|
||||
struct scoutfs_counters;
|
||||
struct scoutfs_triggers;
|
||||
struct item_cache;
|
||||
struct manifest;
|
||||
struct segment_cache;
|
||||
struct compact_info;
|
||||
struct data_info;
|
||||
struct trans_info;
|
||||
struct lock_info;
|
||||
@@ -40,11 +37,6 @@ struct scoutfs_sb_info {
|
||||
|
||||
spinlock_t next_ino_lock;
|
||||
|
||||
struct manifest *manifest;
|
||||
struct item_cache *item_cache;
|
||||
struct segment_cache *segment_cache;
|
||||
struct seg_alloc *seg_alloc;
|
||||
struct compact_info *compact_info;
|
||||
struct data_info *data_info;
|
||||
struct inode_sb_info *inode_sb_info;
|
||||
struct btree_info *btree_info;
|
||||
|
||||
@@ -21,11 +21,7 @@
|
||||
#include "super.h"
|
||||
#include "trans.h"
|
||||
#include "data.h"
|
||||
#include "bio.h"
|
||||
#include "item.h"
|
||||
#include "forest.h"
|
||||
#include "manifest.h"
|
||||
#include "seg.h"
|
||||
#include "counters.h"
|
||||
#include "client.h"
|
||||
#include "inode.h"
|
||||
|
||||
Reference in New Issue
Block a user