diff --git a/kmod/src/alloc.c b/kmod/src/alloc.c index f0a94d97..26af17f8 100644 --- a/kmod/src/alloc.c +++ b/kmod/src/alloc.c @@ -1582,12 +1582,10 @@ out: * call the caller's callback. This assumes that the super it's reading * could be stale and will retry if it encounters stale blocks. */ -int scoutfs_alloc_foreach(struct super_block *sb, - scoutfs_alloc_foreach_cb_t cb, void *arg) +int scoutfs_alloc_foreach(struct super_block *sb, scoutfs_alloc_foreach_cb_t cb, void *arg) { struct scoutfs_super_block *super = NULL; - struct scoutfs_block_ref stale_refs[2] = {{0,}}; - struct scoutfs_block_ref refs[2] = {{0,}}; + DECLARE_SAVED_REFS(saved); int ret; super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS); @@ -1596,26 +1594,18 @@ int scoutfs_alloc_foreach(struct super_block *sb, goto out; } -retry: - ret = scoutfs_read_super(sb, super); - if (ret < 0) - goto out; + do { + ret = scoutfs_read_super(sb, super); + if (ret < 0) + goto out; - refs[0] = super->logs_root.ref; - refs[1] = super->srch_root.ref; + ret = scoutfs_alloc_foreach_super(sb, super, cb, arg); + + ret = scoutfs_block_check_stale(sb, ret, &saved, &super->logs_root.ref, + &super->srch_root.ref); + } while (ret == -ESTALE); - ret = scoutfs_alloc_foreach_super(sb, super, cb, arg); out: - if (ret == -ESTALE) { - if (memcmp(&stale_refs, &refs, sizeof(refs)) == 0) { - ret = -EIO; - } else { - BUILD_BUG_ON(sizeof(stale_refs) != sizeof(refs)); - memcpy(stale_refs, refs, sizeof(stale_refs)); - goto retry; - } - } - kfree(super); return ret; } diff --git a/kmod/src/block.c b/kmod/src/block.c index 786bcc2d..6849fe5c 100644 --- a/kmod/src/block.c +++ b/kmod/src/block.c @@ -677,7 +677,7 @@ out: int scoutfs_block_read_ref(struct super_block *sb, struct scoutfs_block_ref *ref, u32 magic, struct scoutfs_block **bl_ret) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct scoutfs_block_header *hdr; struct block_private *bp = NULL; bool retried = false; @@ -701,7 +701,7 @@ retry: set_bit(BLOCK_BIT_CRC_VALID, &bp->bits); } - if (hdr->magic != cpu_to_le32(magic) || hdr->fsid != super->hdr.fsid || + if (hdr->magic != cpu_to_le32(magic) || hdr->fsid != cpu_to_le64(sbi->fsid) || hdr->seq != ref->seq || hdr->blkno != ref->blkno) { ret = -ESTALE; goto out; @@ -728,6 +728,36 @@ out: return ret; } +static bool stale_refs_match(struct scoutfs_block_ref *caller, struct scoutfs_block_ref *saved) +{ + return !caller || (caller->blkno == saved->blkno && caller->seq == saved->seq); +} + +/* + * Check if a read of a reference that gave ESTALE should be retried or + * should generate a hard error. If this is the second time we got + * ESTALE from the same refs then we return EIO and the caller should + * stop. As long as we keep seeing different refs we'll return ESTALE + * and the caller can keep trying. + */ +int scoutfs_block_check_stale(struct super_block *sb, int ret, + struct scoutfs_block_saved_refs *saved, + struct scoutfs_block_ref *a, struct scoutfs_block_ref *b) +{ + if (ret == -ESTALE) { + if (stale_refs_match(a, &saved->refs[0]) && stale_refs_match(b, &saved->refs[1])){ + ret = -EIO; + } else { + if (a) + saved->refs[0] = *a; + if (b) + saved->refs[1] = *b; + } + } + + return ret; +} + void scoutfs_block_put(struct super_block *sb, struct scoutfs_block *bl) { if (!IS_ERR_OR_NULL(bl)) @@ -797,7 +827,7 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc, u32 magic, struct scoutfs_block **bl_ret, u64 dirty_blkno, u64 *ref_blkno) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct scoutfs_block *cow_bl = NULL; struct scoutfs_block *bl = NULL; struct block_private *exist_bp = NULL; @@ -865,7 +895,7 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc, hdr = bl->data; hdr->magic = cpu_to_le32(magic); - hdr->fsid = super->hdr.fsid; + hdr->fsid = cpu_to_le64(sbi->fsid); hdr->blkno = cpu_to_le64(bl->blkno); prandom_bytes(&hdr->seq, sizeof(hdr->seq)); diff --git a/kmod/src/block.h b/kmod/src/block.h index 93d88731..bd1c2e57 100644 --- a/kmod/src/block.h +++ b/kmod/src/block.h @@ -13,6 +13,17 @@ struct scoutfs_block { void *priv; }; +struct scoutfs_block_saved_refs { + struct scoutfs_block_ref refs[2]; +}; + +#define DECLARE_SAVED_REFS(name) \ + struct scoutfs_block_saved_refs name = {{{0,}}} + +int scoutfs_block_check_stale(struct super_block *sb, int ret, + struct scoutfs_block_saved_refs *saved, + struct scoutfs_block_ref *a, struct scoutfs_block_ref *b); + int scoutfs_block_read_ref(struct super_block *sb, struct scoutfs_block_ref *ref, u32 magic, struct scoutfs_block **bl_ret); void scoutfs_block_put(struct super_block *sb, struct scoutfs_block *bl); diff --git a/kmod/src/client.c b/kmod/src/client.c index 0f1006d0..643b5693 100644 --- a/kmod/src/client.c +++ b/kmod/src/client.c @@ -356,7 +356,6 @@ static int client_greeting(struct super_block *sb, { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct client_info *client = sbi->client_info; - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; struct scoutfs_net_greeting *gr = resp; bool new_server; int ret; @@ -371,9 +370,9 @@ static int client_greeting(struct super_block *sb, goto out; } - if (gr->fsid != super->hdr.fsid) { + if (gr->fsid != cpu_to_le64(sbi->fsid)) { scoutfs_warn(sb, "server greeting response fsid 0x%llx did not match client fsid 0x%llx", - le64_to_cpu(gr->fsid), le64_to_cpu(super->hdr.fsid)); + le64_to_cpu(gr->fsid), sbi->fsid); ret = -EINVAL; goto out; } @@ -476,7 +475,6 @@ static void scoutfs_client_connect_worker(struct work_struct *work) connect_dwork.work); struct super_block *sb = client->sb; struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; struct scoutfs_mount_options opts; struct scoutfs_net_greeting greet; struct sockaddr_in sin; @@ -508,7 +506,7 @@ static void scoutfs_client_connect_worker(struct work_struct *work) goto out; /* send a greeting to verify endpoints of each connection */ - greet.fsid = super->hdr.fsid; + greet.fsid = cpu_to_le64(sbi->fsid); greet.fmt_vers = cpu_to_le64(sbi->fmt_vers); greet.server_term = cpu_to_le64(client->server_term); greet.rid = cpu_to_le64(sbi->rid); diff --git a/kmod/src/counters.h b/kmod/src/counters.h index f4111feb..378fcdc1 100644 --- a/kmod/src/counters.h +++ b/kmod/src/counters.h @@ -187,8 +187,6 @@ EXPAND_COUNTER(srch_search_retry_empty) \ EXPAND_COUNTER(srch_search_sorted) \ EXPAND_COUNTER(srch_search_sorted_block) \ - EXPAND_COUNTER(srch_search_stale_eio) \ - EXPAND_COUNTER(srch_search_stale_retry) \ EXPAND_COUNTER(srch_search_xattrs) \ EXPAND_COUNTER(srch_read_stale) \ EXPAND_COUNTER(statfs) \ diff --git a/kmod/src/forest.c b/kmod/src/forest.c index 1b4c9c4b..062d3713 100644 --- a/kmod/src/forest.c +++ b/kmod/src/forest.c @@ -78,11 +78,6 @@ struct forest_refs { struct scoutfs_block_ref logs_ref; }; -/* initialize some refs that initially aren't equal */ -#define DECLARE_STALE_TRACKING_SUPER_REFS(a, b) \ - struct forest_refs a = {{cpu_to_le64(0),}}; \ - struct forest_refs b = {{cpu_to_le64(1),}} - struct forest_bloom_nrs { unsigned int nrs[SCOUTFS_FOREST_BLOOM_NRS]; }; @@ -136,11 +131,11 @@ static struct scoutfs_block *read_bloom_ref(struct super_block *sb, struct scout int scoutfs_forest_next_hint(struct super_block *sb, struct scoutfs_key *key, struct scoutfs_key *next) { - DECLARE_STALE_TRACKING_SUPER_REFS(prev_refs, refs); struct scoutfs_net_roots roots; struct scoutfs_btree_root item_root; struct scoutfs_log_trees *lt; SCOUTFS_BTREE_ITEM_REF(iref); + DECLARE_SAVED_REFS(saved); struct scoutfs_key found; struct scoutfs_key ltk; bool checked_fs; @@ -155,8 +150,6 @@ retry: goto out; trace_scoutfs_forest_using_roots(sb, &roots.fs_root, &roots.logs_root); - refs.fs_ref = roots.fs_root.ref; - refs.logs_ref = roots.logs_root.ref; scoutfs_key_init_log_trees(<k, 0, 0); checked_fs = false; @@ -212,14 +205,10 @@ retry: } } - if (ret == -ESTALE) { - if (memcmp(&prev_refs, &refs, sizeof(refs)) == 0) - return -EIO; - prev_refs = refs; + ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref); + if (ret == -ESTALE) goto retry; - } out: - return ret; } @@ -541,9 +530,8 @@ void scoutfs_forest_dec_inode_count(struct super_block *sb) /* * Return the total inode count from the super block and all the - * log_btrees it references. This assumes it's working with a block - * reference hierarchy that should be fully consistent. If we see - * ESTALE we've hit persistent corruption. + * log_btrees it references. ESTALE from read blocks is returned to the + * caller who is expected to retry or return hard errors. */ int scoutfs_forest_inode_count(struct super_block *sb, struct scoutfs_super_block *super, u64 *inode_count) @@ -572,8 +560,6 @@ int scoutfs_forest_inode_count(struct super_block *sb, struct scoutfs_super_bloc if (ret < 0) { if (ret == -ENOENT) ret = 0; - else if (ret == -ESTALE) - ret = -EIO; break; } } diff --git a/kmod/src/quorum.c b/kmod/src/quorum.c index 34daa105..98f1b264 100644 --- a/kmod/src/quorum.c +++ b/kmod/src/quorum.c @@ -114,6 +114,7 @@ struct quorum_status { struct quorum_info { struct super_block *sb; + struct scoutfs_quorum_config qconf; struct work_struct work; struct socket *sock; bool shutdown; @@ -134,11 +135,18 @@ struct quorum_info { #define DECLARE_QUORUM_INFO_KOBJ(kobj, name) \ DECLARE_QUORUM_INFO(SCOUTFS_SYSFS_ATTRS_SB(kobj), name) -static bool quorum_slot_present(struct scoutfs_super_block *super, int i) +static bool quorum_slot_present(struct scoutfs_quorum_config *qconf, int i) { BUG_ON(i < 0 || i > SCOUTFS_QUORUM_MAX_SLOTS); - return super->qconf.slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4); + return qconf->slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4); +} + +static void quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin) +{ + BUG_ON(i < 0 || i >= SCOUTFS_QUORUM_MAX_SLOTS); + + scoutfs_addr_to_sin(sin, &qconf->slots[i].addr); } static ktime_t election_timeout(void) @@ -160,7 +168,6 @@ static ktime_t heartbeat_timeout(void) static int create_socket(struct super_block *sb) { DECLARE_QUORUM_INFO(sb, qinf); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; struct socket *sock = NULL; struct sockaddr_in sin; int addrlen; @@ -174,7 +181,7 @@ static int create_socket(struct super_block *sb) sock->sk->sk_allocation = GFP_NOFS; - scoutfs_quorum_slot_sin(super, qinf->our_quorum_slot_nr, &sin); + quorum_slot_sin(&qinf->qconf, qinf->our_quorum_slot_nr, &sin); addrlen = sizeof(sin); ret = kernel_bind(sock, (struct sockaddr *)&sin, addrlen); @@ -204,13 +211,13 @@ static __le32 quorum_message_crc(struct scoutfs_quorum_message *qmes) static void send_msg_members(struct super_block *sb, int type, u64 term, int only) { + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); DECLARE_QUORUM_INFO(sb, qinf); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; ktime_t now; int i; struct scoutfs_quorum_message qmes = { - .fsid = super->hdr.fsid, + .fsid = cpu_to_le64(sbi->fsid), .term = cpu_to_le64(term), .type = type, .from = qinf->our_quorum_slot_nr, @@ -234,11 +241,11 @@ static void send_msg_members(struct super_block *sb, int type, u64 term, for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) { - if (!quorum_slot_present(super, i) || + if (!quorum_slot_present(&qinf->qconf, i) || (only >= 0 && i != only) || i == qinf->our_quorum_slot_nr) continue; - scoutfs_quorum_slot_sin(super, i, &sin); + scoutfs_quorum_slot_sin(&qinf->qconf, i, &sin); now = ktime_get(); kernel_sendmsg(qinf->sock, &mh, &kv, 1, kv.iov_len); @@ -266,7 +273,7 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg, ktime_t abs_to) { DECLARE_QUORUM_INFO(sb, qinf); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct scoutfs_quorum_message qmes; struct timeval tv; ktime_t rel_to; @@ -309,10 +316,10 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg, if (ret != sizeof(qmes) || qmes.crc != quorum_message_crc(&qmes) || - qmes.fsid != super->hdr.fsid || + qmes.fsid != cpu_to_le64(sbi->fsid) || qmes.type >= SCOUTFS_QUORUM_MSG_INVALID || qmes.from >= SCOUTFS_QUORUM_MAX_SLOTS || - !quorum_slot_present(super, qmes.from)) { + !quorum_slot_present(&qinf->qconf, qmes.from)) { /* should we be trying to open a new socket? */ scoutfs_inc_counter(sb, quorum_recv_invalid); return -EAGAIN; @@ -342,7 +349,7 @@ static int read_quorum_block(struct super_block *sb, u64 blkno, struct scoutfs_q bool check_rid) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + const u64 fsid = sbi->fsid; const u64 rid = sbi->rid; char msg[150]; __le32 crc; @@ -367,9 +374,9 @@ static int read_quorum_block(struct super_block *sb, u64 blkno, struct scoutfs_q else if (le32_to_cpu(blk->hdr.magic) != SCOUTFS_BLOCK_MAGIC_QUORUM) snprintf(msg, sizeof(msg), "blk magic %08x != %08x", le32_to_cpu(blk->hdr.magic), SCOUTFS_BLOCK_MAGIC_QUORUM); - else if (blk->hdr.fsid != super->hdr.fsid) + else if (blk->hdr.fsid != cpu_to_le64(fsid)) snprintf(msg, sizeof(msg), "blk fsid %016llx != %016llx", - le64_to_cpu(blk->hdr.fsid), le64_to_cpu(super->hdr.fsid)); + le64_to_cpu(blk->hdr.fsid), fsid); else if (le64_to_cpu(blk->hdr.blkno) != blkno) snprintf(msg, sizeof(msg), "blk blkno %llu != %llu", le64_to_cpu(blk->hdr.blkno), blkno); @@ -410,8 +417,7 @@ out: */ static void read_greatest_term(struct super_block *sb, u64 *term) { - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + DECLARE_QUORUM_INFO(sb, qinf); struct scoutfs_quorum_block blk; int ret; int e; @@ -420,7 +426,7 @@ static void read_greatest_term(struct super_block *sb, u64 *term) *term = 0; for (s = 0; s < SCOUTFS_QUORUM_MAX_SLOTS; s++) { - if (!quorum_slot_present(super, s)) + if (!quorum_slot_present(&qinf->qconf, s)) continue; ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + s, &blk, false); @@ -514,14 +520,15 @@ static int update_quorum_block(struct super_block *sb, int event, u64 term, bool * keeps us from being fenced while we allow userspace fencing to take a * reasonably long time. We still want to timeout eventually. */ -int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term) +int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_config *qconf, + u64 term) { #define NR_OLD 2 struct scoutfs_quorum_block_event old[SCOUTFS_QUORUM_MAX_SLOTS][NR_OLD] = {{{0,}}}; struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; struct scoutfs_quorum_block blk; struct sockaddr_in sin; + const __le64 lefsid = cpu_to_le64(sbi->fsid); const u64 rid = sbi->rid; bool fence_started = false; u64 fenced = 0; @@ -534,7 +541,7 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term) BUILD_BUG_ON(SCOUTFS_QUORUM_BLOCKS < SCOUTFS_QUORUM_MAX_SLOTS); for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) { - if (!quorum_slot_present(super, i)) + if (!quorum_slot_present(qconf, i)) continue; ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + i, &blk, false); @@ -567,11 +574,11 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term) continue; scoutfs_inc_counter(sb, quorum_fence_leader); - scoutfs_quorum_slot_sin(super, i, &sin); + quorum_slot_sin(qconf, i, &sin); fence_rid = old[i][j].rid; scoutfs_info(sb, "fencing previous leader "SCSBF" at term %llu in slot %u with address "SIN_FMT, - SCSB_LEFR_ARGS(super->hdr.fsid, fence_rid), + SCSB_LEFR_ARGS(lefsid, fence_rid), le64_to_cpu(old[i][j].term), i, SIN_ARG(&sin)); ret = scoutfs_fence_start(sb, le64_to_cpu(fence_rid), sin.sin_addr.s_addr, SCOUTFS_FENCE_QUORUM_BLOCK_LEADER); @@ -752,7 +759,7 @@ static void scoutfs_quorum_worker(struct work_struct *work) qst.server_start_term = qst.term; qst.server_event = SCOUTFS_QUORUM_EVENT_ELECT; - scoutfs_server_start(sb, qst.term); + scoutfs_server_start(sb, &qinf->qconf, qst.term); } /* @@ -877,16 +884,25 @@ out: */ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin) { - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = NULL; struct scoutfs_quorum_block blk; u64 elect_term; u64 term = 0; int ret = 0; int i; + super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS); + if (!super) { + ret = -ENOMEM; + goto out; + } + + ret = scoutfs_read_super(sb, super); + if (ret) + goto out; + for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) { - if (!quorum_slot_present(super, i)) + if (!quorum_slot_present(&super->qconf, i)) continue; ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + i, &blk, false); @@ -900,7 +916,7 @@ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin) if (elect_term > term && elect_term > le64_to_cpu(blk.events[SCOUTFS_QUORUM_EVENT_STOP].term)) { term = elect_term; - scoutfs_quorum_slot_sin(super, i, sin); + scoutfs_quorum_slot_sin(&super->qconf, i, sin); continue; } } @@ -909,6 +925,7 @@ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin) ret = -ENOENT; out: + kfree(super); return ret; } @@ -924,12 +941,9 @@ u8 scoutfs_quorum_votes_needed(struct super_block *sb) return qinf->votes_needed; } -void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i, - struct sockaddr_in *sin) +void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin) { - BUG_ON(i < 0 || i >= SCOUTFS_QUORUM_MAX_SLOTS); - - scoutfs_addr_to_sin(sin, &super->qconf.slots[i].addr); + return quorum_slot_sin(qconf, i, sin); } static char *role_str(int role) @@ -1060,11 +1074,10 @@ static inline bool valid_ipv4_port(__be16 port) return port != 0 && be16_to_cpu(port) != U16_MAX; } -static int verify_quorum_slots(struct super_block *sb) +static int verify_quorum_slots(struct super_block *sb, struct quorum_info *qinf, + struct scoutfs_quorum_config *qconf) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; char slots[(SCOUTFS_QUORUM_MAX_SLOTS * 3) + 1]; - DECLARE_QUORUM_INFO(sb, qinf); struct sockaddr_in other; struct sockaddr_in sin; int found = 0; @@ -1074,10 +1087,10 @@ static int verify_quorum_slots(struct super_block *sb) for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) { - if (!quorum_slot_present(super, i)) + if (!quorum_slot_present(qconf, i)) continue; - scoutfs_quorum_slot_sin(super, i, &sin); + scoutfs_quorum_slot_sin(qconf, i, &sin); if (!valid_ipv4_unicast(sin.sin_addr.s_addr)) { scoutfs_err(sb, "quorum slot #%d has invalid ipv4 unicast address: "SIN_FMT, @@ -1092,10 +1105,10 @@ static int verify_quorum_slots(struct super_block *sb) } for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) { - if (!quorum_slot_present(super, j)) + if (!quorum_slot_present(qconf, j)) continue; - scoutfs_quorum_slot_sin(super, j, &other); + scoutfs_quorum_slot_sin(qconf, j, &other); if (sin.sin_addr.s_addr == other.sin_addr.s_addr && sin.sin_port == other.sin_port) { @@ -1113,11 +1126,11 @@ static int verify_quorum_slots(struct super_block *sb) return -EINVAL; } - if (!quorum_slot_present(super, qinf->our_quorum_slot_nr)) { + if (!quorum_slot_present(qconf, qinf->our_quorum_slot_nr)) { char *str = slots; *str = '\0'; for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) { - if (quorum_slot_present(super, i)) { + if (quorum_slot_present(qconf, i)) { ret = snprintf(str, &slots[ARRAY_SIZE(slots)] - str, "%c%u", str == slots ? ' ' : ',', i); if (ret < 2 || ret > 3) { @@ -1141,16 +1154,22 @@ static int verify_quorum_slots(struct super_block *sb) else qinf->votes_needed = (found / 2) + 1; + qinf->qconf = *qconf; + return 0; } /* * Once this schedules the quorum worker it can be elected leader and - * start the server, possibly before this returns. + * start the server, possibly before this returns. The quorum agent + * would be responsible for tracking the quorum config in the super + * block if it changes. Until then uses a static config that it reads + * during setup. */ int scoutfs_quorum_setup(struct super_block *sb) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); + struct scoutfs_super_block *super = NULL; struct scoutfs_mount_options opts; struct quorum_info *qinf; int ret; @@ -1160,7 +1179,9 @@ int scoutfs_quorum_setup(struct super_block *sb) return 0; qinf = kzalloc(sizeof(struct quorum_info), GFP_KERNEL); - if (!qinf) { + super = kmalloc(sizeof(struct scoutfs_super_block), GFP_KERNEL); + if (!qinf || !super) { + kfree(qinf); ret = -ENOMEM; goto out; } @@ -1174,7 +1195,11 @@ int scoutfs_quorum_setup(struct super_block *sb) sbi->quorum_info = qinf; qinf->sb = sb; - ret = verify_quorum_slots(sb); + ret = scoutfs_read_super(sb, super); + if (ret < 0) + goto out; + + ret = verify_quorum_slots(sb, qinf, &super->qconf); if (ret < 0) goto out; @@ -1194,6 +1219,7 @@ out: if (ret) scoutfs_quorum_destroy(sb); + kfree(super); return ret; } diff --git a/kmod/src/quorum.h b/kmod/src/quorum.h index 11959ab2..500a5d16 100644 --- a/kmod/src/quorum.h +++ b/kmod/src/quorum.h @@ -4,10 +4,11 @@ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin); u8 scoutfs_quorum_votes_needed(struct super_block *sb); -void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i, +void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin); -int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term); +int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_config *qconf, + u64 term); int scoutfs_quorum_setup(struct super_block *sb); void scoutfs_quorum_shutdown(struct super_block *sb); diff --git a/kmod/src/server.c b/kmod/src/server.c index 17f38481..73a74223 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -130,9 +130,9 @@ struct server_info { struct mutex srch_mutex; struct mutex mounted_clients_mutex; - /* stable versions stored from commits, given in locks and rpcs */ - seqcount_t roots_seqcount; - struct scoutfs_net_roots roots; + /* stable super stored from commits, given in locks and rpcs */ + seqcount_t stable_seqcount; + struct scoutfs_super_block stable_super; /* serializing and get and set volume options */ seqcount_t volopt_seqcount; @@ -143,11 +143,18 @@ struct server_info { struct work_struct fence_pending_recov_work; /* while running we check for fenced mounts to reclaim */ struct delayed_work reclaim_dwork; + + /* a running server gets a static quorum config from quorum as it starts */ + struct scoutfs_quorum_config qconf; + /* a running server maintains a private dirty super */ + struct scoutfs_super_block dirty_super; }; #define DECLARE_SERVER_INFO(sb, name) \ struct server_info *name = SCOUTFS_SB(sb)->server_info +#define DIRTY_SUPER_SB(sb) (&SCOUTFS_SB(sb)->server_info->dirty_super) + /* * The server tracks each connected client. */ @@ -469,16 +476,22 @@ static void commit_end(struct super_block *sb, struct commit_users *cusers, int wake_up(&cusers->waitq); } -static void get_roots(struct super_block *sb, - struct scoutfs_net_roots *roots) +static void get_stable(struct super_block *sb, struct scoutfs_super_block *super, + struct scoutfs_net_roots *roots) { DECLARE_SERVER_INFO(sb, server); unsigned int seq; do { - seq = read_seqcount_begin(&server->roots_seqcount); - *roots = server->roots; - } while (read_seqcount_retry(&server->roots_seqcount, seq)); + seq = read_seqcount_begin(&server->stable_seqcount); + if (super) + *super = server->stable_super; + if (roots) { + roots->fs_root = server->stable_super.fs_root; + roots->logs_root = server->stable_super.logs_root; + roots->srch_root = server->stable_super.srch_root; + } + } while (read_seqcount_retry(&server->stable_seqcount, seq)); } u64 scoutfs_server_seq(struct super_block *sb) @@ -510,17 +523,12 @@ void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq) } } -static void set_roots(struct server_info *server, - struct scoutfs_btree_root *fs_root, - struct scoutfs_btree_root *logs_root, - struct scoutfs_btree_root *srch_root) +static void set_stable_super(struct server_info *server, struct scoutfs_super_block *super) { preempt_disable(); - write_seqcount_begin(&server->roots_seqcount); - server->roots.fs_root = *fs_root; - server->roots.logs_root = *logs_root; - server->roots.srch_root = *srch_root; - write_seqcount_end(&server->roots_seqcount); + write_seqcount_begin(&server->stable_seqcount); + server->stable_super = *super; + write_seqcount_end(&server->stable_seqcount); preempt_enable(); } @@ -545,7 +553,7 @@ static void scoutfs_server_commit_func(struct work_struct *work) struct server_info *server = container_of(work, struct server_info, commit_work); struct super_block *sb = server->sb; - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct commit_users *cusers = &server->cusers; int ret; @@ -603,8 +611,7 @@ static void scoutfs_server_commit_func(struct work_struct *work) goto out; } - set_roots(server, &super->fs_root, &super->logs_root, - &super->srch_root); + set_stable_super(server, super); /* swizzle the active and idle server alloc/freed heads */ server->other_ind ^= 1; @@ -641,7 +648,7 @@ static int server_alloc_inodes(struct super_block *sb, u8 cmd, u64 id, void *arg, u16 arg_len) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_net_inode_alloc ial = { 0, }; COMMIT_HOLD(hold); __le64 lecount; @@ -809,7 +816,7 @@ static void mod_bitmap_bits(__le64 *dst, u64 dst_zone_blocks, static int get_data_alloc_zone_bits(struct super_block *sb, u64 rid, __le64 *exclusive, __le64 *vacant, u64 zone_blocks) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_log_trees *lt; struct scoutfs_key key; @@ -1040,7 +1047,7 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l u64 rid, struct commit_hold *hold) { struct server_info *server = SCOUTFS_SB(sb)->server_info; - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_status stat; struct scoutfs_log_merge_range rng; struct scoutfs_log_trees each_lt; @@ -1242,7 +1249,7 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l static void try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees *lt) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); const u64 rid = le64_to_cpu(lt->rid); const u64 nr = le64_to_cpu(lt->nr); struct scoutfs_log_trees drain; @@ -1329,7 +1336,7 @@ static int server_get_log_trees(struct super_block *sb, struct scoutfs_net_connection *conn, u8 cmd, u64 id, void *arg, u16 arg_len) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); u64 rid = scoutfs_net_client_rid(conn); DECLARE_SERVER_INFO(sb, server); __le64 exclusive[SCOUTFS_DATA_ALLOC_ZONE_LE64S]; @@ -1524,7 +1531,7 @@ static int server_commit_log_trees(struct super_block *sb, struct scoutfs_net_connection *conn, u8 cmd, u64 id, void *arg, u16 arg_len) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); const u64 rid = scoutfs_net_client_rid(conn); DECLARE_SERVER_INFO(sb, server); SCOUTFS_BTREE_ITEM_REF(iref); @@ -1624,7 +1631,7 @@ static int server_get_roots(struct super_block *sb, memset(&roots, 0, sizeof(roots)); ret = -EINVAL; } else { - get_roots(sb, &roots); + get_stable(sb, NULL, &roots); ret = 0; } @@ -1654,7 +1661,7 @@ static int server_get_roots(struct super_block *sb, */ static int reclaim_open_log_tree(struct super_block *sb, u64 rid) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); DECLARE_SERVER_INFO(sb, server); SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_log_trees lt; @@ -1751,9 +1758,8 @@ out: */ static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret) { + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); DECLARE_SERVER_INFO(sb, server); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_log_trees *lt; struct scoutfs_key key; @@ -1909,9 +1915,8 @@ static int server_srch_get_compact(struct super_block *sb, u8 cmd, u64 id, void *arg, u16 arg_len) { DECLARE_SERVER_INFO(sb, server); + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); u64 rid = scoutfs_net_client_rid(conn); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; struct scoutfs_srch_compact *sc = NULL; COMMIT_HOLD(hold); int ret; @@ -1976,8 +1981,7 @@ static int server_srch_commit_compact(struct super_block *sb, { DECLARE_SERVER_INFO(sb, server); u64 rid = scoutfs_net_client_rid(conn); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_srch_compact *sc; struct scoutfs_alloc_list_head av; struct scoutfs_alloc_list_head fr; @@ -2052,8 +2056,7 @@ static int splice_log_merge_completions(struct super_block *sb, bool no_ranges) { struct server_info *server = SCOUTFS_SB(sb)->server_info; - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_complete comp; struct scoutfs_log_merge_freeing fr; struct scoutfs_log_merge_range rng; @@ -2370,7 +2373,7 @@ static void server_log_merge_free_work(struct work_struct *work) struct server_info *server = container_of(work, struct server_info, log_merge_free_work); struct super_block *sb = server->sb; - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_freeing fr; struct scoutfs_key key; COMMIT_HOLD(hold); @@ -2462,8 +2465,7 @@ static int server_get_log_merge(struct super_block *sb, { DECLARE_SERVER_INFO(sb, server); u64 rid = scoutfs_net_client_rid(conn); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_status stat; struct scoutfs_log_merge_range rng; struct scoutfs_log_merge_range remain; @@ -2746,8 +2748,7 @@ static int server_commit_log_merge(struct super_block *sb, { DECLARE_SERVER_INFO(sb, server); u64 rid = scoutfs_net_client_rid(conn); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_request orig_req; struct scoutfs_log_merge_complete *comp; struct scoutfs_log_merge_status stat; @@ -2982,7 +2983,7 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti u8 cmd, u64 id, void *arg, u16 arg_len) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_volume_options *volopt; COMMIT_HOLD(hold); u64 opt; @@ -3051,7 +3052,7 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec u8 cmd, u64 id, void *arg, u16 arg_len) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_volume_options *volopt; COMMIT_HOLD(hold); __le64 *opt; @@ -3105,7 +3106,7 @@ static int server_resize_devices(struct super_block *sb, struct scoutfs_net_conn { DECLARE_SERVER_INFO(sb, server); struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_net_resize_devices *nrd; COMMIT_HOLD(hold); u64 meta_tot; @@ -3212,16 +3213,19 @@ static int count_free_blocks(struct super_block *sb, void *arg, int owner, } /* - * We calculate the total inode count and free blocks from the current in-memory dirty - * versions of the super block and log_trees structs, so we have to lock them. + * We calculate the total inode count and free blocks from the last + * stable super that was written. Other users also walk stable blocks + * so by joining them we don't have to worry about ensuring that we've + * locked all the dirty structures that the summations could reference. + * We handle stale reads by retrying with the most recent stable super. */ static int server_statfs(struct super_block *sb, struct scoutfs_net_connection *conn, u8 cmd, u64 id, void *arg, u16 arg_len) { - DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block super; struct scoutfs_net_statfs nst = {{0,}}; struct statfs_free_blocks sfb = {0,}; + DECLARE_SAVED_REFS(saved); u64 inode_count; int ret; @@ -3230,24 +3234,24 @@ static int server_statfs(struct super_block *sb, struct scoutfs_net_connection * goto out; } - mutex_lock(&server->alloc_mutex); - ret = scoutfs_alloc_foreach_super(sb, super, count_free_blocks, &sfb); - mutex_unlock(&server->alloc_mutex); - if (ret < 0) - goto out; + do { + get_stable(sb, &super, NULL); - mutex_lock(&server->logs_mutex); - ret = scoutfs_forest_inode_count(sb, super, &inode_count); - mutex_unlock(&server->logs_mutex); - if (ret < 0) - goto out; + ret = scoutfs_alloc_foreach_super(sb, &super, count_free_blocks, &sfb) ?: + scoutfs_forest_inode_count(sb, &super, &inode_count); + if (ret < 0 && ret != -ESTALE) + goto out; - BUILD_BUG_ON(sizeof(nst.uuid) != sizeof(super->uuid)); - memcpy(nst.uuid, super->uuid, sizeof(nst.uuid)); + ret = scoutfs_block_check_stale(sb, ret, &saved, &super.logs_root.ref, + &super.srch_root.ref); + } while (ret == -ESTALE); + + BUILD_BUG_ON(sizeof(nst.uuid) != sizeof(super.uuid)); + memcpy(nst.uuid, super.uuid, sizeof(nst.uuid)); nst.free_meta_blocks = cpu_to_le64(sfb.meta); - nst.total_meta_blocks = super->total_meta_blocks; + nst.total_meta_blocks = super.total_meta_blocks; nst.free_data_blocks = cpu_to_le64(sfb.data); - nst.total_data_blocks = super->total_data_blocks; + nst.total_data_blocks = super.total_data_blocks; nst.inode_count = cpu_to_le64(inode_count); ret = 0; @@ -3278,7 +3282,7 @@ static int insert_mounted_client(struct super_block *sb, u64 rid, u64 gr_flags, struct sockaddr_in *sin) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_mounted_client_btree_val mcv; struct scoutfs_key key; int ret; @@ -3304,7 +3308,7 @@ static int lookup_mounted_client_addr(struct super_block *sb, u64 rid, union scoutfs_inet_addr *addr) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_mounted_client_btree_val *mcv; SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_key key; @@ -3338,7 +3342,7 @@ static int lookup_mounted_client_addr(struct super_block *sb, u64 rid, static int delete_mounted_client(struct super_block *sb, u64 rid) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_key key; int ret; @@ -3362,7 +3366,7 @@ static int delete_mounted_client(struct super_block *sb, u64 rid) static int cancel_srch_compact(struct super_block *sb, u64 rid) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_alloc_list_head av; struct scoutfs_alloc_list_head fr; int ret; @@ -3414,7 +3418,7 @@ static int cancel_srch_compact(struct super_block *sb, u64 rid) static int cancel_log_merge(struct super_block *sb, u64 rid) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_status stat; struct scoutfs_log_merge_request req; struct scoutfs_log_merge_range rng; @@ -3538,7 +3542,7 @@ static int server_greeting(struct super_block *sb, u8 cmd, u64 id, void *arg, u16 arg_len) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_net_greeting *gr = arg; struct scoutfs_net_greeting greet; DECLARE_SERVER_INFO(sb, server); @@ -3554,10 +3558,9 @@ static int server_greeting(struct super_block *sb, goto send_err; } - if (gr->fsid != super->hdr.fsid) { + if (gr->fsid != cpu_to_le64(sbi->fsid)) { scoutfs_warn(sb, "client rid %016llx greeting fsid 0x%llx did not match server fsid 0x%llx", - le64_to_cpu(gr->rid), le64_to_cpu(gr->fsid), - le64_to_cpu(super->hdr.fsid)); + le64_to_cpu(gr->rid), le64_to_cpu(gr->fsid), sbi->fsid); ret = -EINVAL; goto send_err; } @@ -3697,7 +3700,7 @@ static void farewell_worker(struct work_struct *work) struct server_info *server = container_of(work, struct server_info, farewell_work); struct super_block *sb = server->sb; - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_mounted_client_btree_val *mcv; struct farewell_request *tmp; struct farewell_request *fw; @@ -4059,7 +4062,7 @@ static void recovery_timeout(struct super_block *sb) static int start_recovery(struct super_block *sb) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_key key; unsigned int nr = 0; @@ -4176,8 +4179,7 @@ static void scoutfs_server_worker(struct work_struct *work) struct server_info *server = container_of(work, struct server_info, work); struct super_block *sb = server->sb; - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_net_connection *conn = NULL; struct scoutfs_mount_options opts; DECLARE_WAIT_QUEUE_HEAD(waitq); @@ -4189,13 +4191,13 @@ static void scoutfs_server_worker(struct work_struct *work) trace_scoutfs_server_work_enter(sb, 0, 0); scoutfs_options_read(sb, &opts); - scoutfs_quorum_slot_sin(super, opts.quorum_slot_nr, &sin); + scoutfs_quorum_slot_sin(&server->qconf, opts.quorum_slot_nr, &sin); scoutfs_info(sb, "server starting at "SIN_FMT, SIN_ARG(&sin)); scoutfs_block_writer_init(sb, &server->wri); /* first make sure no other servers are still running */ - ret = scoutfs_quorum_fence_leaders(sb, server->term); + ret = scoutfs_quorum_fence_leaders(sb, &server->qconf, server->term); if (ret < 0) { scoutfs_err(sb, "server error %d attempting to fence previous leaders", ret); goto out; @@ -4231,8 +4233,7 @@ static void scoutfs_server_worker(struct work_struct *work) write_seqcount_end(&server->volopt_seqcount); atomic64_set(&server->seq_atomic, le64_to_cpu(super->seq)); - set_roots(server, &super->fs_root, &super->logs_root, - &super->srch_root); + set_stable_super(server, super); /* prepare server alloc for this transaction, larger first */ if (le64_to_cpu(super->server_meta_avail[0].total_nr) < @@ -4326,11 +4327,12 @@ out: /* * Start the server but don't wait for it to complete. */ -void scoutfs_server_start(struct super_block *sb, u64 term) +void scoutfs_server_start(struct super_block *sb, struct scoutfs_quorum_config *qconf, u64 term) { DECLARE_SERVER_INFO(sb, server); if (cmpxchg(&server->status, SERVER_DOWN, SERVER_STARTING) == SERVER_DOWN) { + server->qconf = *qconf; server->term = term; queue_work(server->wq, &server->work); } @@ -4382,7 +4384,7 @@ int scoutfs_server_setup(struct super_block *sb) INIT_WORK(&server->log_merge_free_work, server_log_merge_free_work); mutex_init(&server->srch_mutex); mutex_init(&server->mounted_clients_mutex); - seqcount_init(&server->roots_seqcount); + seqcount_init(&server->stable_seqcount); seqcount_init(&server->volopt_seqcount); mutex_init(&server->volopt_mutex); INIT_WORK(&server->fence_pending_recov_work, fence_pending_recov_worker); diff --git a/kmod/src/server.h b/kmod/src/server.h index dc1bd69a..f795ea0f 100644 --- a/kmod/src/server.h +++ b/kmod/src/server.h @@ -75,7 +75,7 @@ u64 scoutfs_server_seq(struct super_block *sb); u64 scoutfs_server_next_seq(struct super_block *sb); void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq); -void scoutfs_server_start(struct super_block *sb, u64 term); +void scoutfs_server_start(struct super_block *sb, struct scoutfs_quorum_config *qconf, u64 term); void scoutfs_server_stop(struct super_block *sb); void scoutfs_server_stop_wait(struct super_block *sb); bool scoutfs_server_is_running(struct super_block *sb); diff --git a/kmod/src/srch.c b/kmod/src/srch.c index b23fc6c2..2cb99b4c 100644 --- a/kmod/src/srch.c +++ b/kmod/src/srch.c @@ -861,7 +861,6 @@ int scoutfs_srch_search_xattrs(struct super_block *sb, struct scoutfs_srch_rb_root *sroot, u64 hash, u64 ino, u64 last_ino, bool *done) { - struct scoutfs_net_roots prev_roots; struct scoutfs_net_roots roots; struct scoutfs_srch_entry start; struct scoutfs_srch_entry end; @@ -869,6 +868,7 @@ int scoutfs_srch_search_xattrs(struct super_block *sb, struct scoutfs_log_trees lt; struct scoutfs_srch_file sfl; SCOUTFS_BTREE_ITEM_REF(iref); + DECLARE_SAVED_REFS(saved); struct scoutfs_key key; unsigned long limit = SRCH_LIMIT; int ret; @@ -877,7 +877,6 @@ int scoutfs_srch_search_xattrs(struct super_block *sb, *done = false; srch_init_rb_root(sroot); - memset(&prev_roots, 0, sizeof(prev_roots)); start.hash = cpu_to_le64(hash); start.ino = cpu_to_le64(ino); @@ -892,7 +891,6 @@ retry: ret = scoutfs_client_get_roots(sb, &roots); if (ret) goto out; - memset(&roots.fs_root, 0, sizeof(roots.fs_root)); end = final; @@ -968,16 +966,10 @@ retry: *done = sre_cmp(&end, &final) == 0; ret = 0; out: - if (ret == -ESTALE) { - if (memcmp(&prev_roots, &roots, sizeof(roots)) == 0) { - scoutfs_inc_counter(sb, srch_search_stale_eio); - ret = -EIO; - } else { - scoutfs_inc_counter(sb, srch_search_stale_retry); - prev_roots = roots; - goto retry; - } - } + ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.srch_root.ref, + &roots.logs_root.ref); + if (ret == -ESTALE) + goto retry; return ret; } diff --git a/kmod/src/super.c b/kmod/src/super.c index 9cc68869..e8e265b4 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -461,9 +461,8 @@ static int scoutfs_read_supers(struct super_block *sb) goto out; } - + sbi->fsid = le64_to_cpu(meta_super->hdr.fsid); sbi->fmt_vers = le64_to_cpu(meta_super->fmt_vers); - sbi->super = *meta_super; out: kfree(meta_super); kfree(data_super); diff --git a/kmod/src/super.h b/kmod/src/super.h index 3d3cd3d2..14ff626a 100644 --- a/kmod/src/super.h +++ b/kmod/src/super.h @@ -35,11 +35,10 @@ struct scoutfs_sb_info { struct super_block *sb; /* assigned once at the start of each mount, read-only */ + u64 fsid; u64 rid; u64 fmt_vers; - struct scoutfs_super_block super; - struct block_device *meta_bdev; spinlock_t next_ino_lock; @@ -135,14 +134,14 @@ static inline bool scoutfs_unmounting(struct super_block *sb) (int)(le64_to_cpu(fsid) >> SCSB_SHIFT), \ (int)(le64_to_cpu(rid) >> SCSB_SHIFT) #define SCSB_ARGS(sb) \ - (int)(le64_to_cpu(SCOUTFS_SB(sb)->super.hdr.fsid) >> SCSB_SHIFT), \ + (int)(SCOUTFS_SB(sb)->fsid >> SCSB_SHIFT), \ (int)(SCOUTFS_SB(sb)->rid >> SCSB_SHIFT) #define SCSB_TRACE_FIELDS \ __field(__u64, fsid) \ __field(__u64, rid) #define SCSB_TRACE_ASSIGN(sb) \ __entry->fsid = SCOUTFS_HAS_SBI(sb) ? \ - le64_to_cpu(SCOUTFS_SB(sb)->super.hdr.fsid) : 0;\ + SCOUTFS_SB(sb)->fsid : 0; \ __entry->rid = SCOUTFS_HAS_SBI(sb) ? \ SCOUTFS_SB(sb)->rid : 0; #define SCSB_TRACE_ARGS \ diff --git a/kmod/src/sysfs.c b/kmod/src/sysfs.c index e429a5e5..34fb4a64 100644 --- a/kmod/src/sysfs.c +++ b/kmod/src/sysfs.c @@ -60,10 +60,9 @@ static ssize_t fsid_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct super_block *sb = KOBJ_TO_SB(kobj, sb_id_kobj); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - return snprintf(buf, PAGE_SIZE, "%016llx\n", - le64_to_cpu(super->hdr.fsid)); + return snprintf(buf, PAGE_SIZE, "%016llx\n", sbi->fsid); } ATTR_FUNCS_RO(fsid);