Read items with manifest entries from server

Item reading tries to directly walk the manifest to find segments to
read.  That doesn't work when only the server has read the ring and
loaded the manifest.

This adds a network message to ask the server for the manifest entries
that describe the segments that will be needed to read items.

Previously item reading would walk the manifest and build up native
manifest references in a list that it'd use to read.   To implement the
network message we add request sending, processing, and reply parsing
around those original functions.  Item reading now packs its key range
and sends it to the server.  The server walks the manifest and sends the
entries that intersect with the key range.  Then the reply function
builds up the native manifest references that item reading will use.

The net reply functions needed an argument so that the manifest reading
request could pass in the caller's list that the native manifest
references should be added to.

Signed-off-by: Zach Brown <zab@versity.com>
This commit is contained in:
Zach Brown
2017-04-13 14:21:08 -07:00
parent b50de90196
commit 5487aee6a7
5 changed files with 296 additions and 44 deletions

View File

@@ -373,10 +373,22 @@ struct scoutfs_net_inode_alloc {
__le64 nr;
} __packed;
struct scoutfs_net_key_range {
__le16 start_len;
__le16 end_len;
__u8 key_bytes[0];
} __packed;
struct scoutfs_net_manifest_entries {
__le16 nr;
struct scoutfs_manifest_entry ments[0];
} __packed;
enum {
/* sends and receives a struct scoutfs_timeval */
SCOUTFS_NET_TRADE_TIME = 0,
SCOUTFS_NET_ALLOC_INODES,
SCOUTFS_NET_MANIFEST_RANGE_ENTRIES,
SCOUTFS_NET_UNKNOWN,
};

View File

@@ -26,6 +26,7 @@
#include "manifest.h"
#include "trans.h"
#include "counters.h"
#include "net.h"
#include "scoutfs_trace.h"
/*
@@ -256,6 +257,17 @@ int scoutfs_manifest_del(struct super_block *sb, struct scoutfs_key_buf *first,
return 0;
}
/*
* Return the total number of bytes used by the given manifest entry,
* including its struct.
*/
int scoutfs_manifest_bytes(struct scoutfs_manifest_entry *ment)
{
return sizeof(struct scoutfs_manifest_entry) +
le16_to_cpu(ment->first_key_len) +
le16_to_cpu(ment->last_key_len);
}
/*
* XXX This feels pretty gross, but it's a simple way to give compaction
* atomic updates. It'll go away once compactions go to the trouble of
@@ -291,12 +303,20 @@ static void free_ref(struct super_block *sb, struct manifest_ref *ref)
}
}
static int alloc_add_ref(struct super_block *sb, struct list_head *list,
struct scoutfs_manifest_entry *ment)
/*
* Allocate a native manifest ref so that we can work with segments described
* by the callers manifest entry.
(*
* This frees all the elements on the list if it returns an error.
*/
int scoutfs_manifest_add_ment_ref(struct super_block *sb,
struct list_head *list,
struct scoutfs_manifest_entry *ment)
{
struct scoutfs_key_buf ment_first;
struct scoutfs_key_buf ment_last;
struct manifest_ref *ref;
struct manifest_ref *tmp;
init_ment_keys(ment, &ment_first, &ment_last);
@@ -307,6 +327,10 @@ static int alloc_add_ref(struct super_block *sb, struct list_head *list,
}
if (!ref || !ref->first || !ref->last) {
free_ref(sb, ref);
list_for_each_entry_safe(ref, tmp, list, entry) {
list_del_init(&ref->entry);
free_ref(sb, ref);
}
return -ENOMEM;
}
@@ -320,8 +344,10 @@ static int alloc_add_ref(struct super_block *sb, struct list_head *list,
}
/*
* Get refs on all the segments in the manifest that we'll need to
* search to populate the cache with the given range.
* Return an array of pointers to the entries in the manifest that
* intersect with the given key range. The entries will be ordered by
* the order that they should be read: level 0 from newest to oldest
* then increasing higher order levels.
*
* We have to get all the level 0 segments that intersect with the range
* of items that we want to search because the level 0 segments can
@@ -330,22 +356,40 @@ static int alloc_add_ref(struct super_block *sb, struct list_head *list,
* We only need to search for the starting key in all the higher levels.
* They do not overlap so we can iterate through the key space in each
* segment starting with the key.
*
* This is called by the server who is processing manifest search
* messages from mounts. The server locks down the manifest while it
* gets these pointers and then uses them to allocate and fill a reply
* message.
*/
static int get_range_refs(struct super_block *sb, struct manifest *mani,
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *end,
struct list_head *ref_list)
struct scoutfs_manifest_entry **
scoutfs_manifest_find_range_entries(struct super_block *sb,
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *end,
unsigned *found_bytes)
{
DECLARE_MANIFEST(sb, mani);
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
struct scoutfs_manifest_entry **found;
struct scoutfs_manifest_entry *ment;
struct manifest_search_key skey;
struct scoutfs_key_buf first;
struct scoutfs_key_buf last;
struct manifest_ref *ref;
struct manifest_ref *tmp;
int ret;
unsigned nr;
int i;
down_write(&mani->rwsem);
lockdep_assert_held(&mani->rwsem);
*found_bytes = 0;
/* at most we get all level 0, one from other levels, and null term */
nr = get_level_count(mani, super, 0) + mani->nr_levels + 1;
found = kcalloc(nr, sizeof(struct scoutfs_manifest_entry *), GFP_NOFS);
if (!found) {
found = ERR_PTR(-ENOMEM);
goto out;
}
nr = 0;
/* get level 0 segments that overlap with the missing range */
skey.level = 0;
@@ -353,9 +397,8 @@ static int get_range_refs(struct super_block *sb, struct manifest *mani,
ment = scoutfs_ring_lookup_prev(&mani->ring, &skey);
while (ment) {
if (cmp_range_ment(key, end, ment) == 0) {
ret = alloc_add_ref(sb, ref_list, ment);
if (ret)
goto out;
found[nr++] = ment;
*found_bytes += scoutfs_manifest_bytes(ment);
}
ment = scoutfs_ring_prev(&mani->ring, ment);
@@ -371,27 +414,16 @@ static int get_range_refs(struct super_block *sb, struct manifest *mani,
ment = scoutfs_ring_lookup(&mani->ring, &skey);
if (ment) {
init_ment_keys(ment, &first, &last);
ret = alloc_add_ref(sb, ref_list, ment);
if (ret)
goto out;
found[nr++] = ment;
*found_bytes += scoutfs_manifest_bytes(ment);
}
}
ret = 0;
/* null terminate */
found[nr++] = NULL;
out:
up_write(&mani->rwsem);
if (ret) {
list_for_each_entry_safe(ref, tmp, ref_list, entry) {
list_del_init(&ref->entry);
free_ref(sb, ref);
}
}
trace_printk("ret %d\n", ret);
return ret;
return found;
}
/*
@@ -425,7 +457,6 @@ int scoutfs_manifest_read_items(struct super_block *sb,
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *end)
{
DECLARE_MANIFEST(sb, mani);
struct scoutfs_key_buf item_key;
struct scoutfs_key_buf found_key;
struct scoutfs_key_buf batch_end;
@@ -449,9 +480,9 @@ int scoutfs_manifest_read_items(struct super_block *sb,
trace_printk("reading items\n");
/* get refs on all the segments */
ret = get_range_refs(sb, mani, key, end, &ref_list);
ret = scoutfs_net_manifest_range_entries(sb, key, end, &ref_list);
if (ret)
return ret;
goto out;
/* submit reads for all the segments */
list_for_each_entry(ref, &ref_list, entry) {

View File

@@ -17,12 +17,23 @@ int scoutfs_manifest_submit_write(struct super_block *sb,
struct scoutfs_bio_completion *comp);
void scoutfs_manifest_write_complete(struct super_block *sb);
int scoutfs_manifest_bytes(struct scoutfs_manifest_entry *ment);
int scoutfs_manifest_lock(struct super_block *sb);
int scoutfs_manifest_unlock(struct super_block *sb);
struct scoutfs_manifest_entry **
scoutfs_manifest_find_range_entries(struct super_block *sb,
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *end,
unsigned *found_bytes);
int scoutfs_manifest_read_items(struct super_block *sb,
struct scoutfs_key_buf *key,
struct scoutfs_key_buf *end);
int scoutfs_manifest_add_ment_ref(struct super_block *sb,
struct list_head *list,
struct scoutfs_manifest_entry *ment);
u64 scoutfs_manifest_level_count(struct super_block *sb, u8 level);
int scoutfs_manifest_next_compact(struct super_block *sb, void *data);

View File

@@ -23,6 +23,7 @@
#include "net.h"
#include "counters.h"
#include "inode.h"
#include "manifest.h"
#include "scoutfs_trace.h"
/*
@@ -82,7 +83,8 @@ struct net_info {
#define DECLARE_NET_INFO(sb, name) \
struct net_info *name = SCOUTFS_SB(sb)->net_info
typedef int (*reply_func_t)(struct super_block *sb, void *recv, int bytes);
typedef int (*reply_func_t)(struct super_block *sb, void *recv, int bytes,
void *arg);
/*
* Send buffers are allocated either by clients who send requests or by
@@ -93,6 +95,7 @@ typedef int (*reply_func_t)(struct super_block *sb, void *recv, int bytes);
struct send_buf {
struct list_head head;
reply_func_t func;
void *arg;
struct scoutfs_net_header nh[0];
};
@@ -254,6 +257,73 @@ static struct send_buf *alloc_sbuf(unsigned data_len)
return sbuf;
}
/*
* Find the manifest entries that intersect with the request's key
* range. We lock the manifest and get pointers to the manifest entries
* that intersect. We then allocate a reply buffer and copy them over.
*/
static struct send_buf *process_manifest_range_entries(struct super_block *sb,
void *req, int req_len)
{
struct scoutfs_net_key_range *kr = req;
struct scoutfs_net_manifest_entries *ments;
struct scoutfs_manifest_entry **found = NULL;
struct scoutfs_manifest_entry *ment;
struct scoutfs_key_buf start;
struct scoutfs_key_buf end;
struct send_buf *sbuf;
unsigned total;
unsigned bytes;
int i;
/* XXX this is a write lock and should be a read lock */
scoutfs_manifest_lock(sb);
if (req_len < sizeof(struct scoutfs_net_key_range) ||
req_len < offsetof(struct scoutfs_net_key_range,
key_bytes[le16_to_cpu(kr->start_len) +
le16_to_cpu(kr->end_len)])) {
sbuf = ERR_PTR(-EINVAL);
goto out;
}
scoutfs_key_init(&start, kr->key_bytes, le16_to_cpu(kr->start_len));
scoutfs_key_init(&end, kr->key_bytes + le16_to_cpu(kr->start_len),
le16_to_cpu(kr->end_len));
found = scoutfs_manifest_find_range_entries(sb, &start, &end, &total);
if (IS_ERR(found)) {
sbuf = ERR_CAST(found);
goto out;
}
total += sizeof(struct scoutfs_net_manifest_entries);
sbuf = alloc_sbuf(total);
if (!sbuf) {
sbuf = ERR_PTR(-ENOMEM);
goto out;
}
ments = (void *)sbuf->nh->data;
ment = ments->ments;
for (i = 0; found[i]; i++) {
bytes = scoutfs_manifest_bytes(found[i]);
memcpy(ment, found[i], bytes);
ment = (void *)((char *)ment + bytes);
}
ments->nr = cpu_to_le16(i);
sbuf->nh->status = SCOUTFS_NET_STATUS_SUCCESS;
out:
scoutfs_manifest_unlock(sb);
if (!IS_ERR_OR_NULL(found))
kfree(found);
return sbuf;
}
/*
* XXX should this call into inodes? not sure about the layering here.
*/
@@ -345,6 +415,10 @@ static int process_request(struct net_info *nti, struct recv_buf *rbuf)
else if (rbuf->nh->type == SCOUTFS_NET_ALLOC_INODES)
sbuf = process_alloc_inodes(sb, (void *)rbuf->nh->data,
data_len);
else if (rbuf->nh->type == SCOUTFS_NET_MANIFEST_RANGE_ENTRIES)
sbuf = process_manifest_range_entries(sb,
(void *)rbuf->nh->data,
data_len);
else
sbuf = ERR_PTR(-EINVAL);
@@ -379,6 +453,7 @@ static int process_reply(struct net_info *nti, struct recv_buf *rbuf)
struct super_block *sb = nti->sb;
reply_func_t func = NULL;
struct send_buf *sbuf;
void *arg;
int ret;
mutex_lock(&nti->mutex);
@@ -388,6 +463,7 @@ static int process_reply(struct net_info *nti, struct recv_buf *rbuf)
if (sbuf->nh->id == rbuf->nh->id) {
list_del_init(&sbuf->head);
func = sbuf->func;
arg = sbuf->arg;
kfree(sbuf);
sbuf = NULL;
break;
@@ -405,7 +481,7 @@ static int process_reply(struct net_info *nti, struct recv_buf *rbuf)
else
ret = -EIO;
return func(sb, rbuf->nh->data, ret);
return func(sb, rbuf->nh->data, ret, arg);
}
/*
@@ -646,7 +722,7 @@ static void free_sbuf_list(struct super_block *sb, struct list_head *list,
list_for_each_entry_safe(sbuf, pos, list, head) {
list_del_init(&sbuf->head);
if (ret && sbuf->func)
sbuf->func(sb, NULL, ret);
sbuf->func(sb, NULL, ret, sbuf->arg);
kfree(sbuf);
}
}
@@ -731,7 +807,7 @@ static void scoutfs_net_shutdown_func(struct work_struct *work)
}
static int add_send_buf(struct super_block *sb, int type, void *data,
unsigned data_len, reply_func_t func)
unsigned data_len, reply_func_t func, void *arg)
{
DECLARE_NET_INFO(sb, nti);
struct scoutfs_net_header *nh;
@@ -743,6 +819,7 @@ static int add_send_buf(struct super_block *sb, int type, void *data,
return -ENOMEM;
sbuf->func = func;
sbuf->arg = arg;
sbuf->nh->status = SCOUTFS_NET_STATUS_REQUEST;
nh = sbuf->nh;
@@ -767,7 +844,121 @@ static int add_send_buf(struct super_block *sb, int type, void *data,
return 0;
}
static int alloc_inodes_reply(struct super_block *sb, void *reply, int ret)
struct manifest_range_entries_args {
struct list_head *list;
struct completion comp;
int ret;
};
/*
* The server has given us entries that intersect with our request's
* key range. Our caller is still blocked waiting for our completion.
* We walk the manifest entries and add native manifest refs to their
* list and wake them.
*/
static int manifest_range_entries_reply(struct super_block *sb, void *reply,
int reply_bytes, void *arg)
{
struct manifest_range_entries_args *args = arg;
struct scoutfs_net_manifest_entries *ments = reply;
struct scoutfs_manifest_entry *ment;
unsigned bytes;
int ret = 0;
int i;
if (reply_bytes < 0) {
ret = reply_bytes;
goto out;
}
reply_bytes -= sizeof(struct scoutfs_net_manifest_entries);
if (reply_bytes < 0) {
ret = -EINVAL;
goto out;
}
ment = ments->ments;
for (i = 0; i < le16_to_cpu(ments->nr); i++) {
if (reply_bytes < sizeof(struct scoutfs_manifest_entry)) {
ret = -EINVAL;
goto out;
}
bytes = scoutfs_manifest_bytes(ment);
reply_bytes -= bytes;
if (reply_bytes < 0) {
ret = -EINVAL;
goto out;
}
ret = scoutfs_manifest_add_ment_ref(sb, args->list, ment);
if (ret)
break;
ment = (void *)((char *)ment + bytes);
}
out:
args->ret = ret;
complete(&args->comp); /* args can be freed from this point */
return ret;
}
/*
* Ask the manifest server for the manifest entries whose key range
* intersects with the callers key range. The reply func will fill the
* caller's list with the reply's entries.
*
* XXX for now this can't be interrupted. The reply func which is off
* in work in a worker thread is blocking to allocate and put things on
* a list in our stack. We'd need better lifetime support to let it
* find out that we've returned and that it should stop processing the
* reply.
*/
int scoutfs_net_manifest_range_entries(struct super_block *sb,
struct scoutfs_key_buf *start,
struct scoutfs_key_buf *end,
struct list_head *list)
{
struct manifest_range_entries_args args;
struct scoutfs_net_key_range *kr;
struct scoutfs_key_buf start_key;
struct scoutfs_key_buf end_key;
unsigned len;
int ret;
len = sizeof(struct scoutfs_net_key_range) +
start->key_len + end->key_len;
kr = kmalloc(len, GFP_NOFS);
if (!kr)
return -ENOMEM;
kr->start_len = cpu_to_le16(start->key_len);
kr->end_len = cpu_to_le16(end->key_len);
scoutfs_key_init(&start_key, kr->key_bytes, start->key_len);
scoutfs_key_init(&end_key, kr->key_bytes + start->key_len,
end->key_len);
scoutfs_key_copy(&start_key, start);
scoutfs_key_copy(&end_key, end);
args.list = list;
init_completion(&args.comp);
ret = add_send_buf(sb, SCOUTFS_NET_MANIFEST_RANGE_ENTRIES, kr, len,
manifest_range_entries_reply, &args);
kfree(kr);
if (ret)
return ret;
wait_for_completion(&args.comp);
return args.ret;
}
static int alloc_inodes_reply(struct super_block *sb, void *reply, int ret,
void *arg)
{
struct scoutfs_net_inode_alloc *ial = reply;
u64 ino;
@@ -801,10 +992,11 @@ out:
int scoutfs_net_alloc_inodes(struct super_block *sb)
{
return add_send_buf(sb, SCOUTFS_NET_ALLOC_INODES, NULL, 0,
alloc_inodes_reply);
alloc_inodes_reply, NULL);
}
static int trade_time_reply(struct super_block *sb, void *reply, int ret)
static int trade_time_reply(struct super_block *sb, void *reply, int ret,
void *arg)
{
struct scoutfs_timespec *ts = reply;
@@ -828,7 +1020,7 @@ int scoutfs_net_trade_time(struct super_block *sb)
send.nsec = cpu_to_le32(ts.tv_nsec);
ret = add_send_buf(sb, SCOUTFS_NET_TRADE_TIME, &send,
sizeof(send), trade_time_reply);
sizeof(send), trade_time_reply, NULL);
trace_printk("sent %llu.%lu ret %d\n",
(u64)ts.tv_sec, ts.tv_nsec, ret);

View File

@@ -1,8 +1,14 @@
#ifndef _SCOUTFS_NET_H_
#define _SCOUTFS_NET_H_
struct scoutfs_key_buf;
int scoutfs_net_trade_time(struct super_block *sb);
int scoutfs_net_alloc_inodes(struct super_block *sb);
int scoutfs_net_manifest_range_entries(struct super_block *sb,
struct scoutfs_key_buf *start,
struct scoutfs_key_buf *end,
struct list_head *list);
int scoutfs_net_setup(struct super_block *sb);
void scoutfs_net_destroy(struct super_block *sb);