From 929703213fd0883ba5e523ca3650973d31db4c7b Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 9 Dec 2022 14:32:29 -0800 Subject: [PATCH 1/9] Add fsid sbi field A few paths throughout the code get the fsid for the current mount by using the copy of the super block that we store in the scoutfs_sb_info for the mount. We'd like to remove the super block from the sbi and it's cleaner to have a specific constant field for the fsid of the mount which will not change. Signed-off-by: Zach Brown --- kmod/src/block.c | 8 ++++---- kmod/src/client.c | 8 +++----- kmod/src/quorum.c | 6 +++--- kmod/src/server.c | 5 ++--- kmod/src/super.c | 2 +- kmod/src/super.h | 5 +++-- kmod/src/sysfs.c | 5 ++--- 7 files changed, 18 insertions(+), 21 deletions(-) diff --git a/kmod/src/block.c b/kmod/src/block.c index 786bcc2d..69eb0b57 100644 --- a/kmod/src/block.c +++ b/kmod/src/block.c @@ -677,7 +677,7 @@ out: int scoutfs_block_read_ref(struct super_block *sb, struct scoutfs_block_ref *ref, u32 magic, struct scoutfs_block **bl_ret) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct scoutfs_block_header *hdr; struct block_private *bp = NULL; bool retried = false; @@ -701,7 +701,7 @@ retry: set_bit(BLOCK_BIT_CRC_VALID, &bp->bits); } - if (hdr->magic != cpu_to_le32(magic) || hdr->fsid != super->hdr.fsid || + if (hdr->magic != cpu_to_le32(magic) || hdr->fsid != cpu_to_le64(sbi->fsid) || hdr->seq != ref->seq || hdr->blkno != ref->blkno) { ret = -ESTALE; goto out; @@ -797,7 +797,7 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc, u32 magic, struct scoutfs_block **bl_ret, u64 dirty_blkno, u64 *ref_blkno) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct scoutfs_block *cow_bl = NULL; struct scoutfs_block *bl = NULL; struct block_private *exist_bp = NULL; @@ -865,7 +865,7 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc, hdr = bl->data; hdr->magic = cpu_to_le32(magic); - hdr->fsid = super->hdr.fsid; + hdr->fsid = cpu_to_le64(sbi->fsid); hdr->blkno = cpu_to_le64(bl->blkno); prandom_bytes(&hdr->seq, sizeof(hdr->seq)); diff --git a/kmod/src/client.c b/kmod/src/client.c index 0f1006d0..643b5693 100644 --- a/kmod/src/client.c +++ b/kmod/src/client.c @@ -356,7 +356,6 @@ static int client_greeting(struct super_block *sb, { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct client_info *client = sbi->client_info; - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; struct scoutfs_net_greeting *gr = resp; bool new_server; int ret; @@ -371,9 +370,9 @@ static int client_greeting(struct super_block *sb, goto out; } - if (gr->fsid != super->hdr.fsid) { + if (gr->fsid != cpu_to_le64(sbi->fsid)) { scoutfs_warn(sb, "server greeting response fsid 0x%llx did not match client fsid 0x%llx", - le64_to_cpu(gr->fsid), le64_to_cpu(super->hdr.fsid)); + le64_to_cpu(gr->fsid), sbi->fsid); ret = -EINVAL; goto out; } @@ -476,7 +475,6 @@ static void scoutfs_client_connect_worker(struct work_struct *work) connect_dwork.work); struct super_block *sb = client->sb; struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; struct scoutfs_mount_options opts; struct scoutfs_net_greeting greet; struct sockaddr_in sin; @@ -508,7 +506,7 @@ static void scoutfs_client_connect_worker(struct work_struct *work) goto out; /* send a greeting to verify endpoints of each connection */ - greet.fsid = super->hdr.fsid; + greet.fsid = cpu_to_le64(sbi->fsid); greet.fmt_vers = cpu_to_le64(sbi->fmt_vers); greet.server_term = cpu_to_le64(client->server_term); greet.rid = cpu_to_le64(sbi->rid); diff --git a/kmod/src/quorum.c b/kmod/src/quorum.c index 34daa105..04ed06cb 100644 --- a/kmod/src/quorum.c +++ b/kmod/src/quorum.c @@ -342,7 +342,7 @@ static int read_quorum_block(struct super_block *sb, u64 blkno, struct scoutfs_q bool check_rid) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + const u64 fsid = sbi->fsid; const u64 rid = sbi->rid; char msg[150]; __le32 crc; @@ -367,9 +367,9 @@ static int read_quorum_block(struct super_block *sb, u64 blkno, struct scoutfs_q else if (le32_to_cpu(blk->hdr.magic) != SCOUTFS_BLOCK_MAGIC_QUORUM) snprintf(msg, sizeof(msg), "blk magic %08x != %08x", le32_to_cpu(blk->hdr.magic), SCOUTFS_BLOCK_MAGIC_QUORUM); - else if (blk->hdr.fsid != super->hdr.fsid) + else if (blk->hdr.fsid != cpu_to_le64(fsid)) snprintf(msg, sizeof(msg), "blk fsid %016llx != %016llx", - le64_to_cpu(blk->hdr.fsid), le64_to_cpu(super->hdr.fsid)); + le64_to_cpu(blk->hdr.fsid), fsid); else if (le64_to_cpu(blk->hdr.blkno) != blkno) snprintf(msg, sizeof(msg), "blk blkno %llu != %llu", le64_to_cpu(blk->hdr.blkno), blkno); diff --git a/kmod/src/server.c b/kmod/src/server.c index 17f38481..10e2bed3 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -3554,10 +3554,9 @@ static int server_greeting(struct super_block *sb, goto send_err; } - if (gr->fsid != super->hdr.fsid) { + if (gr->fsid != cpu_to_le64(sbi->fsid)) { scoutfs_warn(sb, "client rid %016llx greeting fsid 0x%llx did not match server fsid 0x%llx", - le64_to_cpu(gr->rid), le64_to_cpu(gr->fsid), - le64_to_cpu(super->hdr.fsid)); + le64_to_cpu(gr->rid), le64_to_cpu(gr->fsid), sbi->fsid); ret = -EINVAL; goto send_err; } diff --git a/kmod/src/super.c b/kmod/src/super.c index 9cc68869..3ca16d0a 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -461,7 +461,7 @@ static int scoutfs_read_supers(struct super_block *sb) goto out; } - + sbi->fsid = le64_to_cpu(meta_super->hdr.fsid); sbi->fmt_vers = le64_to_cpu(meta_super->fmt_vers); sbi->super = *meta_super; out: diff --git a/kmod/src/super.h b/kmod/src/super.h index 3d3cd3d2..e3ab11e3 100644 --- a/kmod/src/super.h +++ b/kmod/src/super.h @@ -35,6 +35,7 @@ struct scoutfs_sb_info { struct super_block *sb; /* assigned once at the start of each mount, read-only */ + u64 fsid; u64 rid; u64 fmt_vers; @@ -135,14 +136,14 @@ static inline bool scoutfs_unmounting(struct super_block *sb) (int)(le64_to_cpu(fsid) >> SCSB_SHIFT), \ (int)(le64_to_cpu(rid) >> SCSB_SHIFT) #define SCSB_ARGS(sb) \ - (int)(le64_to_cpu(SCOUTFS_SB(sb)->super.hdr.fsid) >> SCSB_SHIFT), \ + (int)(SCOUTFS_SB(sb)->fsid >> SCSB_SHIFT), \ (int)(SCOUTFS_SB(sb)->rid >> SCSB_SHIFT) #define SCSB_TRACE_FIELDS \ __field(__u64, fsid) \ __field(__u64, rid) #define SCSB_TRACE_ASSIGN(sb) \ __entry->fsid = SCOUTFS_HAS_SBI(sb) ? \ - le64_to_cpu(SCOUTFS_SB(sb)->super.hdr.fsid) : 0;\ + SCOUTFS_SB(sb)->fsid : 0; \ __entry->rid = SCOUTFS_HAS_SBI(sb) ? \ SCOUTFS_SB(sb)->rid : 0; #define SCSB_TRACE_ARGS \ diff --git a/kmod/src/sysfs.c b/kmod/src/sysfs.c index e429a5e5..34fb4a64 100644 --- a/kmod/src/sysfs.c +++ b/kmod/src/sysfs.c @@ -60,10 +60,9 @@ static ssize_t fsid_show(struct kobject *kobj, struct attribute *attr, char *buf) { struct super_block *sb = KOBJ_TO_SB(kobj, sb_id_kobj); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - return snprintf(buf, PAGE_SIZE, "%016llx\n", - le64_to_cpu(super->hdr.fsid)); + return snprintf(buf, PAGE_SIZE, "%016llx\n", sbi->fsid); } ATTR_FUNCS_RO(fsid); From b1a43bb312f9b1a80e6c69a4844f22f6b76406d0 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 9 Dec 2022 15:19:19 -0800 Subject: [PATCH 2/9] Make quorum config use more precise The quorum code was using the copy of the super block in the sb info for its config. With that going away we make different users more carefully reference the config. The quorum agent has a copy that it reads on setup, the client rarely reads a copy when trying to connect, and the server uses its super. This is about data access isolation and should have no functional effect other than to cause more super reads. Signed-off-by: Zach Brown --- kmod/src/quorum.c | 106 +++++++++++++++++++++++++++++----------------- kmod/src/quorum.h | 5 ++- kmod/src/server.c | 4 +- 3 files changed, 71 insertions(+), 44 deletions(-) diff --git a/kmod/src/quorum.c b/kmod/src/quorum.c index 04ed06cb..4a205311 100644 --- a/kmod/src/quorum.c +++ b/kmod/src/quorum.c @@ -114,6 +114,7 @@ struct quorum_status { struct quorum_info { struct super_block *sb; + struct scoutfs_quorum_config qconf; struct work_struct work; struct socket *sock; bool shutdown; @@ -134,11 +135,18 @@ struct quorum_info { #define DECLARE_QUORUM_INFO_KOBJ(kobj, name) \ DECLARE_QUORUM_INFO(SCOUTFS_SYSFS_ATTRS_SB(kobj), name) -static bool quorum_slot_present(struct scoutfs_super_block *super, int i) +static bool quorum_slot_present(struct scoutfs_quorum_config *qconf, int i) { BUG_ON(i < 0 || i > SCOUTFS_QUORUM_MAX_SLOTS); - return super->qconf.slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4); + return qconf->slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4); +} + +static void quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin) +{ + BUG_ON(i < 0 || i >= SCOUTFS_QUORUM_MAX_SLOTS); + + scoutfs_addr_to_sin(sin, &qconf->slots[i].addr); } static ktime_t election_timeout(void) @@ -160,7 +168,6 @@ static ktime_t heartbeat_timeout(void) static int create_socket(struct super_block *sb) { DECLARE_QUORUM_INFO(sb, qinf); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; struct socket *sock = NULL; struct sockaddr_in sin; int addrlen; @@ -174,7 +181,7 @@ static int create_socket(struct super_block *sb) sock->sk->sk_allocation = GFP_NOFS; - scoutfs_quorum_slot_sin(super, qinf->our_quorum_slot_nr, &sin); + quorum_slot_sin(&qinf->qconf, qinf->our_quorum_slot_nr, &sin); addrlen = sizeof(sin); ret = kernel_bind(sock, (struct sockaddr *)&sin, addrlen); @@ -204,13 +211,13 @@ static __le32 quorum_message_crc(struct scoutfs_quorum_message *qmes) static void send_msg_members(struct super_block *sb, int type, u64 term, int only) { + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); DECLARE_QUORUM_INFO(sb, qinf); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; ktime_t now; int i; struct scoutfs_quorum_message qmes = { - .fsid = super->hdr.fsid, + .fsid = cpu_to_le64(sbi->fsid), .term = cpu_to_le64(term), .type = type, .from = qinf->our_quorum_slot_nr, @@ -234,11 +241,11 @@ static void send_msg_members(struct super_block *sb, int type, u64 term, for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) { - if (!quorum_slot_present(super, i) || + if (!quorum_slot_present(&qinf->qconf, i) || (only >= 0 && i != only) || i == qinf->our_quorum_slot_nr) continue; - scoutfs_quorum_slot_sin(super, i, &sin); + scoutfs_quorum_slot_sin(&qinf->qconf, i, &sin); now = ktime_get(); kernel_sendmsg(qinf->sock, &mh, &kv, 1, kv.iov_len); @@ -266,7 +273,7 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg, ktime_t abs_to) { DECLARE_QUORUM_INFO(sb, qinf); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); struct scoutfs_quorum_message qmes; struct timeval tv; ktime_t rel_to; @@ -309,10 +316,10 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg, if (ret != sizeof(qmes) || qmes.crc != quorum_message_crc(&qmes) || - qmes.fsid != super->hdr.fsid || + qmes.fsid != cpu_to_le64(sbi->fsid) || qmes.type >= SCOUTFS_QUORUM_MSG_INVALID || qmes.from >= SCOUTFS_QUORUM_MAX_SLOTS || - !quorum_slot_present(super, qmes.from)) { + !quorum_slot_present(&qinf->qconf, qmes.from)) { /* should we be trying to open a new socket? */ scoutfs_inc_counter(sb, quorum_recv_invalid); return -EAGAIN; @@ -410,8 +417,7 @@ out: */ static void read_greatest_term(struct super_block *sb, u64 *term) { - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + DECLARE_QUORUM_INFO(sb, qinf); struct scoutfs_quorum_block blk; int ret; int e; @@ -420,7 +426,7 @@ static void read_greatest_term(struct super_block *sb, u64 *term) *term = 0; for (s = 0; s < SCOUTFS_QUORUM_MAX_SLOTS; s++) { - if (!quorum_slot_present(super, s)) + if (!quorum_slot_present(&qinf->qconf, s)) continue; ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + s, &blk, false); @@ -514,14 +520,15 @@ static int update_quorum_block(struct super_block *sb, int event, u64 term, bool * keeps us from being fenced while we allow userspace fencing to take a * reasonably long time. We still want to timeout eventually. */ -int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term) +int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_config *qconf, + u64 term) { #define NR_OLD 2 struct scoutfs_quorum_block_event old[SCOUTFS_QUORUM_MAX_SLOTS][NR_OLD] = {{{0,}}}; struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; struct scoutfs_quorum_block blk; struct sockaddr_in sin; + const __le64 lefsid = cpu_to_le64(sbi->fsid); const u64 rid = sbi->rid; bool fence_started = false; u64 fenced = 0; @@ -534,7 +541,7 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term) BUILD_BUG_ON(SCOUTFS_QUORUM_BLOCKS < SCOUTFS_QUORUM_MAX_SLOTS); for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) { - if (!quorum_slot_present(super, i)) + if (!quorum_slot_present(qconf, i)) continue; ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + i, &blk, false); @@ -567,11 +574,11 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term) continue; scoutfs_inc_counter(sb, quorum_fence_leader); - scoutfs_quorum_slot_sin(super, i, &sin); + quorum_slot_sin(qconf, i, &sin); fence_rid = old[i][j].rid; scoutfs_info(sb, "fencing previous leader "SCSBF" at term %llu in slot %u with address "SIN_FMT, - SCSB_LEFR_ARGS(super->hdr.fsid, fence_rid), + SCSB_LEFR_ARGS(lefsid, fence_rid), le64_to_cpu(old[i][j].term), i, SIN_ARG(&sin)); ret = scoutfs_fence_start(sb, le64_to_cpu(fence_rid), sin.sin_addr.s_addr, SCOUTFS_FENCE_QUORUM_BLOCK_LEADER); @@ -877,16 +884,25 @@ out: */ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin) { - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = NULL; struct scoutfs_quorum_block blk; u64 elect_term; u64 term = 0; int ret = 0; int i; + super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS); + if (!super) { + ret = -ENOMEM; + goto out; + } + + ret = scoutfs_read_super(sb, super); + if (ret) + goto out; + for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) { - if (!quorum_slot_present(super, i)) + if (!quorum_slot_present(&super->qconf, i)) continue; ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + i, &blk, false); @@ -900,7 +916,7 @@ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin) if (elect_term > term && elect_term > le64_to_cpu(blk.events[SCOUTFS_QUORUM_EVENT_STOP].term)) { term = elect_term; - scoutfs_quorum_slot_sin(super, i, sin); + scoutfs_quorum_slot_sin(&super->qconf, i, sin); continue; } } @@ -909,6 +925,7 @@ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin) ret = -ENOENT; out: + kfree(super); return ret; } @@ -924,12 +941,9 @@ u8 scoutfs_quorum_votes_needed(struct super_block *sb) return qinf->votes_needed; } -void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i, - struct sockaddr_in *sin) +void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin) { - BUG_ON(i < 0 || i >= SCOUTFS_QUORUM_MAX_SLOTS); - - scoutfs_addr_to_sin(sin, &super->qconf.slots[i].addr); + return quorum_slot_sin(qconf, i, sin); } static char *role_str(int role) @@ -1060,11 +1074,10 @@ static inline bool valid_ipv4_port(__be16 port) return port != 0 && be16_to_cpu(port) != U16_MAX; } -static int verify_quorum_slots(struct super_block *sb) +static int verify_quorum_slots(struct super_block *sb, struct quorum_info *qinf, + struct scoutfs_quorum_config *qconf) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; char slots[(SCOUTFS_QUORUM_MAX_SLOTS * 3) + 1]; - DECLARE_QUORUM_INFO(sb, qinf); struct sockaddr_in other; struct sockaddr_in sin; int found = 0; @@ -1074,10 +1087,10 @@ static int verify_quorum_slots(struct super_block *sb) for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) { - if (!quorum_slot_present(super, i)) + if (!quorum_slot_present(qconf, i)) continue; - scoutfs_quorum_slot_sin(super, i, &sin); + scoutfs_quorum_slot_sin(qconf, i, &sin); if (!valid_ipv4_unicast(sin.sin_addr.s_addr)) { scoutfs_err(sb, "quorum slot #%d has invalid ipv4 unicast address: "SIN_FMT, @@ -1092,10 +1105,10 @@ static int verify_quorum_slots(struct super_block *sb) } for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) { - if (!quorum_slot_present(super, j)) + if (!quorum_slot_present(qconf, j)) continue; - scoutfs_quorum_slot_sin(super, j, &other); + scoutfs_quorum_slot_sin(qconf, j, &other); if (sin.sin_addr.s_addr == other.sin_addr.s_addr && sin.sin_port == other.sin_port) { @@ -1113,11 +1126,11 @@ static int verify_quorum_slots(struct super_block *sb) return -EINVAL; } - if (!quorum_slot_present(super, qinf->our_quorum_slot_nr)) { + if (!quorum_slot_present(qconf, qinf->our_quorum_slot_nr)) { char *str = slots; *str = '\0'; for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) { - if (quorum_slot_present(super, i)) { + if (quorum_slot_present(qconf, i)) { ret = snprintf(str, &slots[ARRAY_SIZE(slots)] - str, "%c%u", str == slots ? ' ' : ',', i); if (ret < 2 || ret > 3) { @@ -1141,16 +1154,22 @@ static int verify_quorum_slots(struct super_block *sb) else qinf->votes_needed = (found / 2) + 1; + qinf->qconf = *qconf; + return 0; } /* * Once this schedules the quorum worker it can be elected leader and - * start the server, possibly before this returns. + * start the server, possibly before this returns. The quorum agent + * would be responsible for tracking the quorum config in the super + * block if it changes. Until then uses a static config that it reads + * during setup. */ int scoutfs_quorum_setup(struct super_block *sb) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); + struct scoutfs_super_block *super = NULL; struct scoutfs_mount_options opts; struct quorum_info *qinf; int ret; @@ -1160,7 +1179,9 @@ int scoutfs_quorum_setup(struct super_block *sb) return 0; qinf = kzalloc(sizeof(struct quorum_info), GFP_KERNEL); - if (!qinf) { + super = kmalloc(sizeof(struct scoutfs_super_block), GFP_KERNEL); + if (!qinf || !super) { + kfree(qinf); ret = -ENOMEM; goto out; } @@ -1174,7 +1195,11 @@ int scoutfs_quorum_setup(struct super_block *sb) sbi->quorum_info = qinf; qinf->sb = sb; - ret = verify_quorum_slots(sb); + ret = scoutfs_read_super(sb, super); + if (ret < 0) + goto out; + + ret = verify_quorum_slots(sb, qinf, &super->qconf); if (ret < 0) goto out; @@ -1194,6 +1219,7 @@ out: if (ret) scoutfs_quorum_destroy(sb); + kfree(super); return ret; } diff --git a/kmod/src/quorum.h b/kmod/src/quorum.h index 11959ab2..500a5d16 100644 --- a/kmod/src/quorum.h +++ b/kmod/src/quorum.h @@ -4,10 +4,11 @@ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin); u8 scoutfs_quorum_votes_needed(struct super_block *sb); -void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i, +void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin); -int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term); +int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_config *qconf, + u64 term); int scoutfs_quorum_setup(struct super_block *sb); void scoutfs_quorum_shutdown(struct super_block *sb); diff --git a/kmod/src/server.c b/kmod/src/server.c index 10e2bed3..8ce503c1 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -4188,13 +4188,13 @@ static void scoutfs_server_worker(struct work_struct *work) trace_scoutfs_server_work_enter(sb, 0, 0); scoutfs_options_read(sb, &opts); - scoutfs_quorum_slot_sin(super, opts.quorum_slot_nr, &sin); + scoutfs_quorum_slot_sin(&super->qconf, opts.quorum_slot_nr, &sin); scoutfs_info(sb, "server starting at "SIN_FMT, SIN_ARG(&sin)); scoutfs_block_writer_init(sb, &server->wri); /* first make sure no other servers are still running */ - ret = scoutfs_quorum_fence_leaders(sb, server->term); + ret = scoutfs_quorum_fence_leaders(sb, &super->qconf, server->term); if (ret < 0) { scoutfs_err(sb, "server error %d attempting to fence previous leaders", ret); goto out; From fe4734d01989e7f03d5d781687fa4962d965b662 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 9 Dec 2022 15:39:10 -0800 Subject: [PATCH 3/9] Save a full stable super in the server The server has a mechanism for tracking the last stable roots used by network rpcs. We expand it a bit to include the entire super so that we can add users in the server which want the last full stable super. We can still use the stable super to give out the stable roots. Signed-off-by: Zach Brown --- kmod/src/server.c | 45 ++++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/kmod/src/server.c b/kmod/src/server.c index 8ce503c1..bd76cec1 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -130,9 +130,9 @@ struct server_info { struct mutex srch_mutex; struct mutex mounted_clients_mutex; - /* stable versions stored from commits, given in locks and rpcs */ - seqcount_t roots_seqcount; - struct scoutfs_net_roots roots; + /* stable super stored from commits, given in locks and rpcs */ + seqcount_t stable_seqcount; + struct scoutfs_super_block stable_super; /* serializing and get and set volume options */ seqcount_t volopt_seqcount; @@ -469,16 +469,22 @@ static void commit_end(struct super_block *sb, struct commit_users *cusers, int wake_up(&cusers->waitq); } -static void get_roots(struct super_block *sb, - struct scoutfs_net_roots *roots) +static void get_stable(struct super_block *sb, struct scoutfs_super_block *super, + struct scoutfs_net_roots *roots) { DECLARE_SERVER_INFO(sb, server); unsigned int seq; do { - seq = read_seqcount_begin(&server->roots_seqcount); - *roots = server->roots; - } while (read_seqcount_retry(&server->roots_seqcount, seq)); + seq = read_seqcount_begin(&server->stable_seqcount); + if (super) + *super = server->stable_super; + if (roots) { + roots->fs_root = server->stable_super.fs_root; + roots->logs_root = server->stable_super.logs_root; + roots->srch_root = server->stable_super.srch_root; + } + } while (read_seqcount_retry(&server->stable_seqcount, seq)); } u64 scoutfs_server_seq(struct super_block *sb) @@ -510,17 +516,12 @@ void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq) } } -static void set_roots(struct server_info *server, - struct scoutfs_btree_root *fs_root, - struct scoutfs_btree_root *logs_root, - struct scoutfs_btree_root *srch_root) +static void set_stable_super(struct server_info *server, struct scoutfs_super_block *super) { preempt_disable(); - write_seqcount_begin(&server->roots_seqcount); - server->roots.fs_root = *fs_root; - server->roots.logs_root = *logs_root; - server->roots.srch_root = *srch_root; - write_seqcount_end(&server->roots_seqcount); + write_seqcount_begin(&server->stable_seqcount); + server->stable_super = *super; + write_seqcount_end(&server->stable_seqcount); preempt_enable(); } @@ -603,8 +604,7 @@ static void scoutfs_server_commit_func(struct work_struct *work) goto out; } - set_roots(server, &super->fs_root, &super->logs_root, - &super->srch_root); + set_stable_super(server, super); /* swizzle the active and idle server alloc/freed heads */ server->other_ind ^= 1; @@ -1624,7 +1624,7 @@ static int server_get_roots(struct super_block *sb, memset(&roots, 0, sizeof(roots)); ret = -EINVAL; } else { - get_roots(sb, &roots); + get_stable(sb, NULL, &roots); ret = 0; } @@ -4230,8 +4230,7 @@ static void scoutfs_server_worker(struct work_struct *work) write_seqcount_end(&server->volopt_seqcount); atomic64_set(&server->seq_atomic, le64_to_cpu(super->seq)); - set_roots(server, &super->fs_root, &super->logs_root, - &super->srch_root); + set_stable_super(server, super); /* prepare server alloc for this transaction, larger first */ if (le64_to_cpu(super->server_meta_avail[0].total_nr) < @@ -4381,7 +4380,7 @@ int scoutfs_server_setup(struct super_block *sb) INIT_WORK(&server->log_merge_free_work, server_log_merge_free_work); mutex_init(&server->srch_mutex); mutex_init(&server->mounted_clients_mutex); - seqcount_init(&server->roots_seqcount); + seqcount_init(&server->stable_seqcount); seqcount_init(&server->volopt_seqcount); mutex_init(&server->volopt_mutex); INIT_WORK(&server->fence_pending_recov_work, fence_pending_recov_worker); From 342c2065501d2502b410698935c71b7194a8ad33 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 9 Dec 2022 15:58:52 -0800 Subject: [PATCH 4/9] Have scoutfs_forest_inode_count return stale reads scoutfs_forest_inode_count() assumed it was called with stable refs and would always translate ESTALE to EIO. Change it so that it passes ESTALE to the caller who is responsible for handling it. The server will use this to retry reading from stable supers that it's storing in memory. Signed-off-by: Zach Brown --- kmod/src/forest.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/kmod/src/forest.c b/kmod/src/forest.c index 1b4c9c4b..37705c62 100644 --- a/kmod/src/forest.c +++ b/kmod/src/forest.c @@ -541,9 +541,8 @@ void scoutfs_forest_dec_inode_count(struct super_block *sb) /* * Return the total inode count from the super block and all the - * log_btrees it references. This assumes it's working with a block - * reference hierarchy that should be fully consistent. If we see - * ESTALE we've hit persistent corruption. + * log_btrees it references. ESTALE from read blocks is returned to the + * caller who is expected to retry or return hard errors. */ int scoutfs_forest_inode_count(struct super_block *sb, struct scoutfs_super_block *super, u64 *inode_count) @@ -572,8 +571,6 @@ int scoutfs_forest_inode_count(struct super_block *sb, struct scoutfs_super_bloc if (ret < 0) { if (ret == -ENOENT) ret = 0; - else if (ret == -ESTALE) - ret = -EIO; break; } } From 464de56d280f21bdaa87ea183b9bf22f89d5b355 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 12 Dec 2022 10:36:35 -0800 Subject: [PATCH 5/9] Add stale block read retrying helper Many readers had little implementations of the logic to decide to retry stale reads with different refs or decide that they're persistent and return hard errors. Let's move that into a small helper. Signed-off-by: Zach Brown --- kmod/src/block.c | 30 ++++++++++++++++++++++++++++++ kmod/src/block.h | 11 +++++++++++ 2 files changed, 41 insertions(+) diff --git a/kmod/src/block.c b/kmod/src/block.c index 69eb0b57..6849fe5c 100644 --- a/kmod/src/block.c +++ b/kmod/src/block.c @@ -728,6 +728,36 @@ out: return ret; } +static bool stale_refs_match(struct scoutfs_block_ref *caller, struct scoutfs_block_ref *saved) +{ + return !caller || (caller->blkno == saved->blkno && caller->seq == saved->seq); +} + +/* + * Check if a read of a reference that gave ESTALE should be retried or + * should generate a hard error. If this is the second time we got + * ESTALE from the same refs then we return EIO and the caller should + * stop. As long as we keep seeing different refs we'll return ESTALE + * and the caller can keep trying. + */ +int scoutfs_block_check_stale(struct super_block *sb, int ret, + struct scoutfs_block_saved_refs *saved, + struct scoutfs_block_ref *a, struct scoutfs_block_ref *b) +{ + if (ret == -ESTALE) { + if (stale_refs_match(a, &saved->refs[0]) && stale_refs_match(b, &saved->refs[1])){ + ret = -EIO; + } else { + if (a) + saved->refs[0] = *a; + if (b) + saved->refs[1] = *b; + } + } + + return ret; +} + void scoutfs_block_put(struct super_block *sb, struct scoutfs_block *bl) { if (!IS_ERR_OR_NULL(bl)) diff --git a/kmod/src/block.h b/kmod/src/block.h index 93d88731..bd1c2e57 100644 --- a/kmod/src/block.h +++ b/kmod/src/block.h @@ -13,6 +13,17 @@ struct scoutfs_block { void *priv; }; +struct scoutfs_block_saved_refs { + struct scoutfs_block_ref refs[2]; +}; + +#define DECLARE_SAVED_REFS(name) \ + struct scoutfs_block_saved_refs name = {{{0,}}} + +int scoutfs_block_check_stale(struct super_block *sb, int ret, + struct scoutfs_block_saved_refs *saved, + struct scoutfs_block_ref *a, struct scoutfs_block_ref *b); + int scoutfs_block_read_ref(struct super_block *sb, struct scoutfs_block_ref *ref, u32 magic, struct scoutfs_block **bl_ret); void scoutfs_block_put(struct super_block *sb, struct scoutfs_block *bl); From fff07ce19c140cae7f7e4ca3256b65e9563af082 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 12 Dec 2022 14:05:42 -0800 Subject: [PATCH 6/9] Use stale block read retrying helper Transition from manual checking for persistent ESTALE to the shared helper that we just added. This should not change behavior. Signed-off-by: Zach Brown --- kmod/src/alloc.c | 32 +++++++++++--------------------- kmod/src/counters.h | 2 -- kmod/src/forest.c | 17 +++-------------- kmod/src/srch.c | 18 +++++------------- 4 files changed, 19 insertions(+), 50 deletions(-) diff --git a/kmod/src/alloc.c b/kmod/src/alloc.c index a36387b7..8b5ac274 100644 --- a/kmod/src/alloc.c +++ b/kmod/src/alloc.c @@ -1572,12 +1572,10 @@ out: * call the caller's callback. This assumes that the super it's reading * could be stale and will retry if it encounters stale blocks. */ -int scoutfs_alloc_foreach(struct super_block *sb, - scoutfs_alloc_foreach_cb_t cb, void *arg) +int scoutfs_alloc_foreach(struct super_block *sb, scoutfs_alloc_foreach_cb_t cb, void *arg) { struct scoutfs_super_block *super = NULL; - struct scoutfs_block_ref stale_refs[2] = {{0,}}; - struct scoutfs_block_ref refs[2] = {{0,}}; + DECLARE_SAVED_REFS(saved); int ret; super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS); @@ -1586,26 +1584,18 @@ int scoutfs_alloc_foreach(struct super_block *sb, goto out; } -retry: - ret = scoutfs_read_super(sb, super); - if (ret < 0) - goto out; + do { + ret = scoutfs_read_super(sb, super); + if (ret < 0) + goto out; - refs[0] = super->logs_root.ref; - refs[1] = super->srch_root.ref; + ret = scoutfs_alloc_foreach_super(sb, super, cb, arg); + + ret = scoutfs_block_check_stale(sb, ret, &saved, &super->logs_root.ref, + &super->srch_root.ref); + } while (ret == -ESTALE); - ret = scoutfs_alloc_foreach_super(sb, super, cb, arg); out: - if (ret == -ESTALE) { - if (memcmp(&stale_refs, &refs, sizeof(refs)) == 0) { - ret = -EIO; - } else { - BUILD_BUG_ON(sizeof(stale_refs) != sizeof(refs)); - memcpy(stale_refs, refs, sizeof(stale_refs)); - goto retry; - } - } - kfree(super); return ret; } diff --git a/kmod/src/counters.h b/kmod/src/counters.h index f4111feb..378fcdc1 100644 --- a/kmod/src/counters.h +++ b/kmod/src/counters.h @@ -187,8 +187,6 @@ EXPAND_COUNTER(srch_search_retry_empty) \ EXPAND_COUNTER(srch_search_sorted) \ EXPAND_COUNTER(srch_search_sorted_block) \ - EXPAND_COUNTER(srch_search_stale_eio) \ - EXPAND_COUNTER(srch_search_stale_retry) \ EXPAND_COUNTER(srch_search_xattrs) \ EXPAND_COUNTER(srch_read_stale) \ EXPAND_COUNTER(statfs) \ diff --git a/kmod/src/forest.c b/kmod/src/forest.c index 37705c62..062d3713 100644 --- a/kmod/src/forest.c +++ b/kmod/src/forest.c @@ -78,11 +78,6 @@ struct forest_refs { struct scoutfs_block_ref logs_ref; }; -/* initialize some refs that initially aren't equal */ -#define DECLARE_STALE_TRACKING_SUPER_REFS(a, b) \ - struct forest_refs a = {{cpu_to_le64(0),}}; \ - struct forest_refs b = {{cpu_to_le64(1),}} - struct forest_bloom_nrs { unsigned int nrs[SCOUTFS_FOREST_BLOOM_NRS]; }; @@ -136,11 +131,11 @@ static struct scoutfs_block *read_bloom_ref(struct super_block *sb, struct scout int scoutfs_forest_next_hint(struct super_block *sb, struct scoutfs_key *key, struct scoutfs_key *next) { - DECLARE_STALE_TRACKING_SUPER_REFS(prev_refs, refs); struct scoutfs_net_roots roots; struct scoutfs_btree_root item_root; struct scoutfs_log_trees *lt; SCOUTFS_BTREE_ITEM_REF(iref); + DECLARE_SAVED_REFS(saved); struct scoutfs_key found; struct scoutfs_key ltk; bool checked_fs; @@ -155,8 +150,6 @@ retry: goto out; trace_scoutfs_forest_using_roots(sb, &roots.fs_root, &roots.logs_root); - refs.fs_ref = roots.fs_root.ref; - refs.logs_ref = roots.logs_root.ref; scoutfs_key_init_log_trees(<k, 0, 0); checked_fs = false; @@ -212,14 +205,10 @@ retry: } } - if (ret == -ESTALE) { - if (memcmp(&prev_refs, &refs, sizeof(refs)) == 0) - return -EIO; - prev_refs = refs; + ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref); + if (ret == -ESTALE) goto retry; - } out: - return ret; } diff --git a/kmod/src/srch.c b/kmod/src/srch.c index b23fc6c2..2cb99b4c 100644 --- a/kmod/src/srch.c +++ b/kmod/src/srch.c @@ -861,7 +861,6 @@ int scoutfs_srch_search_xattrs(struct super_block *sb, struct scoutfs_srch_rb_root *sroot, u64 hash, u64 ino, u64 last_ino, bool *done) { - struct scoutfs_net_roots prev_roots; struct scoutfs_net_roots roots; struct scoutfs_srch_entry start; struct scoutfs_srch_entry end; @@ -869,6 +868,7 @@ int scoutfs_srch_search_xattrs(struct super_block *sb, struct scoutfs_log_trees lt; struct scoutfs_srch_file sfl; SCOUTFS_BTREE_ITEM_REF(iref); + DECLARE_SAVED_REFS(saved); struct scoutfs_key key; unsigned long limit = SRCH_LIMIT; int ret; @@ -877,7 +877,6 @@ int scoutfs_srch_search_xattrs(struct super_block *sb, *done = false; srch_init_rb_root(sroot); - memset(&prev_roots, 0, sizeof(prev_roots)); start.hash = cpu_to_le64(hash); start.ino = cpu_to_le64(ino); @@ -892,7 +891,6 @@ retry: ret = scoutfs_client_get_roots(sb, &roots); if (ret) goto out; - memset(&roots.fs_root, 0, sizeof(roots.fs_root)); end = final; @@ -968,16 +966,10 @@ retry: *done = sre_cmp(&end, &final) == 0; ret = 0; out: - if (ret == -ESTALE) { - if (memcmp(&prev_roots, &roots, sizeof(roots)) == 0) { - scoutfs_inc_counter(sb, srch_search_stale_eio); - ret = -EIO; - } else { - scoutfs_inc_counter(sb, srch_search_stale_retry); - prev_roots = roots; - goto retry; - } - } + ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.srch_root.ref, + &roots.logs_root.ref); + if (ret == -ESTALE) + goto retry; return ret; } From 772022258842c5816aeefd805e747417640ccfec Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 12 Dec 2022 14:00:51 -0800 Subject: [PATCH 7/9] Have statfs use unlocked stable roots The server's statfs request handler was intending to lock dirty structures as they were walked to get sums used for statfs fields. Other callers walk stable structures, though, so the summation calls had grown iteration over other structures that the server didn't know it had to lock. This meant that the server was walking unlocked dirty structures as they were being modified. The races are very tight, but it can result in request handling errors that shut down connections and IO errors from trying to read inconsistent refs as they were modified by the locked writer. We've built up infrastructure so the server can now walk stable structures just like the other callers. It will no longer wander into dirty blocks so it doesn't need to lock them and it will retry if its walk of stale data crosses a broken reference. Signed-off-by: Zach Brown --- kmod/src/server.c | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/kmod/src/server.c b/kmod/src/server.c index bd76cec1..c68e9c20 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -3212,16 +3212,19 @@ static int count_free_blocks(struct super_block *sb, void *arg, int owner, } /* - * We calculate the total inode count and free blocks from the current in-memory dirty - * versions of the super block and log_trees structs, so we have to lock them. + * We calculate the total inode count and free blocks from the last + * stable super that was written. Other users also walk stable blocks + * so by joining them we don't have to worry about ensuring that we've + * locked all the dirty structures that the summations could reference. + * We handle stale reads by retrying with the most recent stable super. */ static int server_statfs(struct super_block *sb, struct scoutfs_net_connection *conn, u8 cmd, u64 id, void *arg, u16 arg_len) { - DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block super; struct scoutfs_net_statfs nst = {{0,}}; struct statfs_free_blocks sfb = {0,}; + DECLARE_SAVED_REFS(saved); u64 inode_count; int ret; @@ -3230,24 +3233,24 @@ static int server_statfs(struct super_block *sb, struct scoutfs_net_connection * goto out; } - mutex_lock(&server->alloc_mutex); - ret = scoutfs_alloc_foreach_super(sb, super, count_free_blocks, &sfb); - mutex_unlock(&server->alloc_mutex); - if (ret < 0) - goto out; + do { + get_stable(sb, &super, NULL); - mutex_lock(&server->logs_mutex); - ret = scoutfs_forest_inode_count(sb, super, &inode_count); - mutex_unlock(&server->logs_mutex); - if (ret < 0) - goto out; + ret = scoutfs_alloc_foreach_super(sb, &super, count_free_blocks, &sfb) ?: + scoutfs_forest_inode_count(sb, &super, &inode_count); + if (ret < 0 && ret != -ESTALE) + goto out; - BUILD_BUG_ON(sizeof(nst.uuid) != sizeof(super->uuid)); - memcpy(nst.uuid, super->uuid, sizeof(nst.uuid)); + ret = scoutfs_block_check_stale(sb, ret, &saved, &super.logs_root.ref, + &super.srch_root.ref); + } while (ret == -ESTALE); + + BUILD_BUG_ON(sizeof(nst.uuid) != sizeof(super.uuid)); + memcpy(nst.uuid, super.uuid, sizeof(nst.uuid)); nst.free_meta_blocks = cpu_to_le64(sfb.meta); - nst.total_meta_blocks = super->total_meta_blocks; + nst.total_meta_blocks = super.total_meta_blocks; nst.free_data_blocks = cpu_to_le64(sfb.data); - nst.total_data_blocks = super->total_data_blocks; + nst.total_data_blocks = super.total_data_blocks; nst.inode_count = cpu_to_le64(inode_count); ret = 0; From 40aa47c8880e0068f4db0e592c295b18b0c067ea Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 12 Dec 2022 14:56:20 -0800 Subject: [PATCH 8/9] Have the server keep a private dirty super block As the server does its work its transactions modify a dirty super block in memory. This used the global super block in scoutfs_sb_info which was visible to everything, including the client. Move the dirty super block over to the private server info so that only the server can see it. This is mostly boring storage motion but we do change that the quorum code hands the server a static copy of the quorum config to use as it starts up before it reads the most recent super block. Signed-off-by: Zach Brown --- kmod/src/quorum.c | 2 +- kmod/src/server.c | 75 ++++++++++++++++++++++++----------------------- kmod/src/server.h | 2 +- 3 files changed, 40 insertions(+), 39 deletions(-) diff --git a/kmod/src/quorum.c b/kmod/src/quorum.c index 4a205311..98f1b264 100644 --- a/kmod/src/quorum.c +++ b/kmod/src/quorum.c @@ -759,7 +759,7 @@ static void scoutfs_quorum_worker(struct work_struct *work) qst.server_start_term = qst.term; qst.server_event = SCOUTFS_QUORUM_EVENT_ELECT; - scoutfs_server_start(sb, qst.term); + scoutfs_server_start(sb, &qinf->qconf, qst.term); } /* diff --git a/kmod/src/server.c b/kmod/src/server.c index c68e9c20..73a74223 100644 --- a/kmod/src/server.c +++ b/kmod/src/server.c @@ -143,11 +143,18 @@ struct server_info { struct work_struct fence_pending_recov_work; /* while running we check for fenced mounts to reclaim */ struct delayed_work reclaim_dwork; + + /* a running server gets a static quorum config from quorum as it starts */ + struct scoutfs_quorum_config qconf; + /* a running server maintains a private dirty super */ + struct scoutfs_super_block dirty_super; }; #define DECLARE_SERVER_INFO(sb, name) \ struct server_info *name = SCOUTFS_SB(sb)->server_info +#define DIRTY_SUPER_SB(sb) (&SCOUTFS_SB(sb)->server_info->dirty_super) + /* * The server tracks each connected client. */ @@ -546,7 +553,7 @@ static void scoutfs_server_commit_func(struct work_struct *work) struct server_info *server = container_of(work, struct server_info, commit_work); struct super_block *sb = server->sb; - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct commit_users *cusers = &server->cusers; int ret; @@ -641,7 +648,7 @@ static int server_alloc_inodes(struct super_block *sb, u8 cmd, u64 id, void *arg, u16 arg_len) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_net_inode_alloc ial = { 0, }; COMMIT_HOLD(hold); __le64 lecount; @@ -809,7 +816,7 @@ static void mod_bitmap_bits(__le64 *dst, u64 dst_zone_blocks, static int get_data_alloc_zone_bits(struct super_block *sb, u64 rid, __le64 *exclusive, __le64 *vacant, u64 zone_blocks) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_log_trees *lt; struct scoutfs_key key; @@ -1040,7 +1047,7 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l u64 rid, struct commit_hold *hold) { struct server_info *server = SCOUTFS_SB(sb)->server_info; - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_status stat; struct scoutfs_log_merge_range rng; struct scoutfs_log_trees each_lt; @@ -1242,7 +1249,7 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l static void try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees *lt) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); const u64 rid = le64_to_cpu(lt->rid); const u64 nr = le64_to_cpu(lt->nr); struct scoutfs_log_trees drain; @@ -1329,7 +1336,7 @@ static int server_get_log_trees(struct super_block *sb, struct scoutfs_net_connection *conn, u8 cmd, u64 id, void *arg, u16 arg_len) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); u64 rid = scoutfs_net_client_rid(conn); DECLARE_SERVER_INFO(sb, server); __le64 exclusive[SCOUTFS_DATA_ALLOC_ZONE_LE64S]; @@ -1524,7 +1531,7 @@ static int server_commit_log_trees(struct super_block *sb, struct scoutfs_net_connection *conn, u8 cmd, u64 id, void *arg, u16 arg_len) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); const u64 rid = scoutfs_net_client_rid(conn); DECLARE_SERVER_INFO(sb, server); SCOUTFS_BTREE_ITEM_REF(iref); @@ -1654,7 +1661,7 @@ static int server_get_roots(struct super_block *sb, */ static int reclaim_open_log_tree(struct super_block *sb, u64 rid) { - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); DECLARE_SERVER_INFO(sb, server); SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_log_trees lt; @@ -1751,9 +1758,8 @@ out: */ static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret) { + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); DECLARE_SERVER_INFO(sb, server); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_log_trees *lt; struct scoutfs_key key; @@ -1909,9 +1915,8 @@ static int server_srch_get_compact(struct super_block *sb, u8 cmd, u64 id, void *arg, u16 arg_len) { DECLARE_SERVER_INFO(sb, server); + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); u64 rid = scoutfs_net_client_rid(conn); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; struct scoutfs_srch_compact *sc = NULL; COMMIT_HOLD(hold); int ret; @@ -1976,8 +1981,7 @@ static int server_srch_commit_compact(struct super_block *sb, { DECLARE_SERVER_INFO(sb, server); u64 rid = scoutfs_net_client_rid(conn); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_srch_compact *sc; struct scoutfs_alloc_list_head av; struct scoutfs_alloc_list_head fr; @@ -2052,8 +2056,7 @@ static int splice_log_merge_completions(struct super_block *sb, bool no_ranges) { struct server_info *server = SCOUTFS_SB(sb)->server_info; - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_complete comp; struct scoutfs_log_merge_freeing fr; struct scoutfs_log_merge_range rng; @@ -2370,7 +2373,7 @@ static void server_log_merge_free_work(struct work_struct *work) struct server_info *server = container_of(work, struct server_info, log_merge_free_work); struct super_block *sb = server->sb; - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_freeing fr; struct scoutfs_key key; COMMIT_HOLD(hold); @@ -2462,8 +2465,7 @@ static int server_get_log_merge(struct super_block *sb, { DECLARE_SERVER_INFO(sb, server); u64 rid = scoutfs_net_client_rid(conn); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_status stat; struct scoutfs_log_merge_range rng; struct scoutfs_log_merge_range remain; @@ -2746,8 +2748,7 @@ static int server_commit_log_merge(struct super_block *sb, { DECLARE_SERVER_INFO(sb, server); u64 rid = scoutfs_net_client_rid(conn); - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_request orig_req; struct scoutfs_log_merge_complete *comp; struct scoutfs_log_merge_status stat; @@ -2982,7 +2983,7 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti u8 cmd, u64 id, void *arg, u16 arg_len) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_volume_options *volopt; COMMIT_HOLD(hold); u64 opt; @@ -3051,7 +3052,7 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec u8 cmd, u64 id, void *arg, u16 arg_len) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_volume_options *volopt; COMMIT_HOLD(hold); __le64 *opt; @@ -3105,7 +3106,7 @@ static int server_resize_devices(struct super_block *sb, struct scoutfs_net_conn { DECLARE_SERVER_INFO(sb, server); struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_net_resize_devices *nrd; COMMIT_HOLD(hold); u64 meta_tot; @@ -3281,7 +3282,7 @@ static int insert_mounted_client(struct super_block *sb, u64 rid, u64 gr_flags, struct sockaddr_in *sin) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_mounted_client_btree_val mcv; struct scoutfs_key key; int ret; @@ -3307,7 +3308,7 @@ static int lookup_mounted_client_addr(struct super_block *sb, u64 rid, union scoutfs_inet_addr *addr) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_mounted_client_btree_val *mcv; SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_key key; @@ -3341,7 +3342,7 @@ static int lookup_mounted_client_addr(struct super_block *sb, u64 rid, static int delete_mounted_client(struct super_block *sb, u64 rid) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_key key; int ret; @@ -3365,7 +3366,7 @@ static int delete_mounted_client(struct super_block *sb, u64 rid) static int cancel_srch_compact(struct super_block *sb, u64 rid) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_alloc_list_head av; struct scoutfs_alloc_list_head fr; int ret; @@ -3417,7 +3418,7 @@ static int cancel_srch_compact(struct super_block *sb, u64 rid) static int cancel_log_merge(struct super_block *sb, u64 rid) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_log_merge_status stat; struct scoutfs_log_merge_request req; struct scoutfs_log_merge_range rng; @@ -3541,7 +3542,7 @@ static int server_greeting(struct super_block *sb, u8 cmd, u64 id, void *arg, u16 arg_len) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_net_greeting *gr = arg; struct scoutfs_net_greeting greet; DECLARE_SERVER_INFO(sb, server); @@ -3699,7 +3700,7 @@ static void farewell_worker(struct work_struct *work) struct server_info *server = container_of(work, struct server_info, farewell_work); struct super_block *sb = server->sb; - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_mounted_client_btree_val *mcv; struct farewell_request *tmp; struct farewell_request *fw; @@ -4061,7 +4062,7 @@ static void recovery_timeout(struct super_block *sb) static int start_recovery(struct super_block *sb) { DECLARE_SERVER_INFO(sb, server); - struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); SCOUTFS_BTREE_ITEM_REF(iref); struct scoutfs_key key; unsigned int nr = 0; @@ -4178,8 +4179,7 @@ static void scoutfs_server_worker(struct work_struct *work) struct server_info *server = container_of(work, struct server_info, work); struct super_block *sb = server->sb; - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct scoutfs_super_block *super = &sbi->super; + struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb); struct scoutfs_net_connection *conn = NULL; struct scoutfs_mount_options opts; DECLARE_WAIT_QUEUE_HEAD(waitq); @@ -4191,13 +4191,13 @@ static void scoutfs_server_worker(struct work_struct *work) trace_scoutfs_server_work_enter(sb, 0, 0); scoutfs_options_read(sb, &opts); - scoutfs_quorum_slot_sin(&super->qconf, opts.quorum_slot_nr, &sin); + scoutfs_quorum_slot_sin(&server->qconf, opts.quorum_slot_nr, &sin); scoutfs_info(sb, "server starting at "SIN_FMT, SIN_ARG(&sin)); scoutfs_block_writer_init(sb, &server->wri); /* first make sure no other servers are still running */ - ret = scoutfs_quorum_fence_leaders(sb, &super->qconf, server->term); + ret = scoutfs_quorum_fence_leaders(sb, &server->qconf, server->term); if (ret < 0) { scoutfs_err(sb, "server error %d attempting to fence previous leaders", ret); goto out; @@ -4327,11 +4327,12 @@ out: /* * Start the server but don't wait for it to complete. */ -void scoutfs_server_start(struct super_block *sb, u64 term) +void scoutfs_server_start(struct super_block *sb, struct scoutfs_quorum_config *qconf, u64 term) { DECLARE_SERVER_INFO(sb, server); if (cmpxchg(&server->status, SERVER_DOWN, SERVER_STARTING) == SERVER_DOWN) { + server->qconf = *qconf; server->term = term; queue_work(server->wq, &server->work); } diff --git a/kmod/src/server.h b/kmod/src/server.h index dc1bd69a..f795ea0f 100644 --- a/kmod/src/server.h +++ b/kmod/src/server.h @@ -75,7 +75,7 @@ u64 scoutfs_server_seq(struct super_block *sb); u64 scoutfs_server_next_seq(struct super_block *sb); void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq); -void scoutfs_server_start(struct super_block *sb, u64 term); +void scoutfs_server_start(struct super_block *sb, struct scoutfs_quorum_config *qconf, u64 term); void scoutfs_server_stop(struct super_block *sb); void scoutfs_server_stop_wait(struct super_block *sb); bool scoutfs_server_is_running(struct super_block *sb); From 7c2d83e2f8a2bd4baf53f44d527061a488f277be Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Mon, 12 Dec 2022 14:54:33 -0800 Subject: [PATCH 9/9] Remove saved super block in scoutfs_sb_info Now that we've removed its users we can remove the global saved copy of the super block from scoutfs_sb_info. Signed-off-by: Zach Brown --- kmod/src/super.c | 1 - kmod/src/super.h | 2 -- 2 files changed, 3 deletions(-) diff --git a/kmod/src/super.c b/kmod/src/super.c index 3ca16d0a..e8e265b4 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -463,7 +463,6 @@ static int scoutfs_read_supers(struct super_block *sb) sbi->fsid = le64_to_cpu(meta_super->hdr.fsid); sbi->fmt_vers = le64_to_cpu(meta_super->fmt_vers); - sbi->super = *meta_super; out: kfree(meta_super); kfree(data_super); diff --git a/kmod/src/super.h b/kmod/src/super.h index e3ab11e3..14ff626a 100644 --- a/kmod/src/super.h +++ b/kmod/src/super.h @@ -39,8 +39,6 @@ struct scoutfs_sb_info { u64 rid; u64 fmt_vers; - struct scoutfs_super_block super; - struct block_device *meta_bdev; spinlock_t next_ino_lock;