diff --git a/kmod/src/format.h b/kmod/src/format.h index 1ccac901..fbd59c41 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -557,6 +557,7 @@ enum { SCOUTFS_NET_BULK_ALLOC, SCOUTFS_NET_ADVANCE_SEQ, SCOUTFS_NET_GET_LAST_SEQ, + SCOUTFS_NET_GET_MANIFEST_ROOT, SCOUTFS_NET_UNKNOWN, }; diff --git a/kmod/src/manifest.c b/kmod/src/manifest.c index 31c55283..7cad79cd 100644 --- a/kmod/src/manifest.c +++ b/kmod/src/manifest.c @@ -447,16 +447,14 @@ static int btree_prev_overlap_or_next(struct super_block *sb, * existing segment that intersects with the range, even if it doesn't * contain the key. The key might fall between segments at that level. * - * XXX Today this using the roots from the mount-wide super. This is - * super wrong. Doing so lets it use the dirty btree that could be - * modified by the manifest server running on this node so it has to - * lock. It should be using a specific root communicated by lock lvbs - * (or read from the super on mount). Then the btrees it traverses will - * be stable and read-only. (But can still get -ESTALE if they're - * re-written under us, would need to re-sample roots from the super in - * that case, I imagine.) + * This is walking stable btree roots. The blocks won't be changed as + * long as we read valid blocks. They can be overwritten in which case + * we'll return -ESTALE and the caller can retry with a newer root or + * return hard errors. */ -static int get_manifest_refs(struct super_block *sb, struct scoutfs_key_buf *key, +static int get_manifest_refs(struct super_block *sb, + struct scoutfs_btree_root *root, + struct scoutfs_key_buf *key, struct scoutfs_key_buf *end, struct list_head *ref_list) { @@ -475,8 +473,6 @@ static int get_manifest_refs(struct super_block *sb, struct scoutfs_key_buf *key if (!mkey) return -ENOMEM; - scoutfs_manifest_lock(sb); - /* get level 0 segments that overlap with the missing range */ mkey_len = init_btree_key(mkey, 0, ~0ULL, NULL); ret = scoutfs_btree_prev(sb, &super->manifest.root, @@ -534,8 +530,8 @@ static int get_manifest_refs(struct super_block *sb, struct scoutfs_key_buf *key out: scoutfs_btree_put_iref(&iref); scoutfs_btree_put_iref(&prev); - scoutfs_manifest_unlock(sb); kfree(mkey); + BUG_ON(ret == -ESTALE); /* XXX caller needs to retry or return error */ return ret; } @@ -572,6 +568,7 @@ int scoutfs_manifest_read_items(struct super_block *sb, struct scoutfs_key_buf found_key; struct scoutfs_key_buf batch_end; struct scoutfs_key_buf seg_end; + struct scoutfs_btree_root root; SCOUTFS_DECLARE_KVEC(item_val); SCOUTFS_DECLARE_KVEC(found_val); struct scoutfs_segment *seg; @@ -590,8 +587,19 @@ int scoutfs_manifest_read_items(struct super_block *sb, trace_scoutfs_read_items(sb, key, end); + + /* + * Ask the manifest server which manifest root to read from. Lock + * holding callers will be responsible for this in the future. They'll + * either get a manifest ref in the lvb of their lock or they'll + * ask the server the first time the system sees the lock. + */ + ret = scoutfs_net_get_manifest_root(sb, &root); + if (ret) + goto out; + /* get refs on all the segments */ - ret = get_manifest_refs(sb, key, end, &ref_list); + ret = get_manifest_refs(sb, &root, key, end, &ref_list); if (ret) goto out; diff --git a/kmod/src/net.c b/kmod/src/net.c index 932e599d..d542bd5d 100644 --- a/kmod/src/net.c +++ b/kmod/src/net.c @@ -85,6 +85,9 @@ struct net_info { struct llist_head commit_waiters; struct work_struct commit_work; + /* server remembers the stable manifest root for clients */ + struct scoutfs_btree_root stable_manifest_root; + /* level 0 segment addition waits for it to clear */ wait_queue_head_t waitq; @@ -338,6 +341,7 @@ static void scoutfs_net_commit_func(struct work_struct *work) scoutfs_btree_write_complete(sb); + nti->stable_manifest_root = SCOUTFS_SB(sb)->super.manifest.root; scoutfs_advance_dirty_super(sb); } else { ret = 0; @@ -777,6 +781,32 @@ static struct send_buf *process_get_last_seq(struct super_block *sb, return sbuf; } +static struct send_buf *process_get_manifest_root(struct super_block *sb, + void *req, int req_len) +{ + DECLARE_NET_INFO(sb, nti); + struct scoutfs_btree_root *root; + struct send_buf *sbuf; + + if (req_len != 0) + return ERR_PTR(-EINVAL); + + sbuf = alloc_sbuf(sizeof(struct scoutfs_btree_root)); + if (!sbuf) + return ERR_PTR(-ENOMEM); + + root = (void *)sbuf->nh->data; + + scoutfs_manifest_lock(sb); + memcpy(root, &nti->stable_manifest_root, + sizeof(struct scoutfs_btree_root)); + scoutfs_manifest_unlock(sb); + + sbuf->nh->status = SCOUTFS_NET_STATUS_SUCCESS; + + return sbuf; +} + typedef struct send_buf *(*proc_func_t)(struct super_block *sb, void *req, int req_len); @@ -789,6 +819,7 @@ static proc_func_t type_proc_func(u8 type) [SCOUTFS_NET_BULK_ALLOC] = process_bulk_alloc, [SCOUTFS_NET_ADVANCE_SEQ] = process_advance_seq, [SCOUTFS_NET_GET_LAST_SEQ] = process_get_last_seq, + [SCOUTFS_NET_GET_MANIFEST_ROOT] = process_get_manifest_root, }; return type < SCOUTFS_NET_UNKNOWN ? funcs[type] : NULL; @@ -918,6 +949,8 @@ static void scoutfs_net_proc_func(struct work_struct *work) if (ret == 0) { scoutfs_advance_dirty_super(sb); nti->server_loaded = true; + nti->stable_manifest_root = + SCOUTFS_SB(sb)->super.manifest.root; } else { destroy_server_state(sb); } @@ -1707,6 +1740,48 @@ int scoutfs_net_get_last_seq(struct super_block *sb, u64 *seq) return ret; } +struct get_manifest_root_args { + struct scoutfs_btree_root *root; + struct completion comp; + int ret; +}; + +static int get_manifest_root_reply(struct super_block *sb, void *reply, int ret, + void *arg) +{ + struct get_manifest_root_args *args = arg; + struct scoutfs_btree_root *root = reply; + + if (ret == sizeof(struct scoutfs_btree_root)) { + memcpy(args->root, root, sizeof(struct scoutfs_btree_root)); + args->ret = 0; + } else { + args->ret = -EINVAL; + } + + complete(&args->comp); /* args can be freed from this point */ + return args->ret; +} + +int scoutfs_net_get_manifest_root(struct super_block *sb, + struct scoutfs_btree_root *root) +{ + struct get_manifest_root_args args; + int ret; + + args.root = root; + init_completion(&args.comp); + + ret = add_send_buf(sb, SCOUTFS_NET_GET_MANIFEST_ROOT, NULL, 0, + get_manifest_root_reply, &args); + if (ret == 0) { + wait_for_completion(&args.comp); + ret = args.ret; + } + return ret; +} + + static struct sock_info *alloc_sinf(struct super_block *sb) { struct sock_info *sinf; diff --git a/kmod/src/net.h b/kmod/src/net.h index bcfa34f9..b20a6cb8 100644 --- a/kmod/src/net.h +++ b/kmod/src/net.h @@ -16,6 +16,9 @@ int scoutfs_net_finish_compaction(struct super_block *sb, void *curs, int scoutfs_net_get_last_seq(struct super_block *sb, u64 *seq); int scoutfs_net_advance_seq(struct super_block *sb, u64 *seq); +int scoutfs_net_get_manifest_root(struct super_block *sb, + struct scoutfs_btree_root *root); + int scoutfs_net_setup(struct super_block *sb); void scoutfs_net_destroy(struct super_block *sb);