mirror of
https://github.com/versity/scoutfs.git
synced 2026-01-11 06:00:19 +00:00
Compare commits
25 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
5512d5c03e | ||
|
|
8cf7be4651 | ||
|
|
3363b4fb79 | ||
|
|
ddb5cce2a5 | ||
|
|
1b0e9c45f4 | ||
|
|
2e2ccb6f61 | ||
|
|
01c8bba56d | ||
|
|
17cb1fe84b | ||
|
|
78ae87031b | ||
|
|
bf93ea73c4 | ||
|
|
a23e7478a0 | ||
|
|
9ba2ee5c88 | ||
|
|
fe33a492c2 | ||
|
|
77c0ff89fb | ||
|
|
7c2d83e2f8 | ||
|
|
40aa47c888 | ||
|
|
c1bd7bcce5 | ||
|
|
7720222588 | ||
|
|
fff07ce19c | ||
|
|
464de56d28 | ||
|
|
342c206550 | ||
|
|
fe4734d019 | ||
|
|
b1a43bb312 | ||
|
|
929703213f | ||
|
|
78279ffb4a |
@@ -1,6 +1,29 @@
|
||||
Versity ScoutFS Release Notes
|
||||
=============================
|
||||
|
||||
---
|
||||
v1.11
|
||||
\
|
||||
*Feb 2, 2023*
|
||||
|
||||
Fixed a free extent processing error that could prevent mount from
|
||||
proceeding when free data extents were sufficiently fragmented. It now
|
||||
properly handle very fragmented free extent maps.
|
||||
|
||||
Fixed a statfs server processing race that could return spurious errors
|
||||
and shut down the server. With the race closed statfs processing is
|
||||
reliable.
|
||||
|
||||
Fixed a rare livelock in the move\_blocks ioctl. With the right
|
||||
relationship between ioctl arguments and eventual file extent items the
|
||||
core loop in the move\_blocks ioctl could get stuck looping on an extent
|
||||
item and never return. The loop exit conditions were fixed and the loop
|
||||
will always advance through all extents.
|
||||
|
||||
Changed the 'print' scoutfs commands to flush the block cache for the
|
||||
devices. It was inconvenient to expect cache flushing to be a separate
|
||||
step to ensure consistency with remote node writes.
|
||||
|
||||
---
|
||||
v1.10
|
||||
\
|
||||
|
||||
@@ -976,6 +976,16 @@ int scoutfs_alloc_move(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
break;
|
||||
}
|
||||
|
||||
/* return partial if the server alloc can't dirty any more */
|
||||
if (scoutfs_alloc_meta_low(sb, alloc, 50 + extent_mod_blocks(src->root.height) +
|
||||
extent_mod_blocks(dst->root.height))) {
|
||||
if (WARN_ON_ONCE(!moved))
|
||||
ret = -ENOSPC;
|
||||
else
|
||||
ret = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
/* searching set start/len, finish initializing alloced extent */
|
||||
ext.map = found.map ? ext.start - found.start + found.map : 0;
|
||||
ext.flags = found.flags;
|
||||
@@ -1572,12 +1582,10 @@ out:
|
||||
* call the caller's callback. This assumes that the super it's reading
|
||||
* could be stale and will retry if it encounters stale blocks.
|
||||
*/
|
||||
int scoutfs_alloc_foreach(struct super_block *sb,
|
||||
scoutfs_alloc_foreach_cb_t cb, void *arg)
|
||||
int scoutfs_alloc_foreach(struct super_block *sb, scoutfs_alloc_foreach_cb_t cb, void *arg)
|
||||
{
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct scoutfs_block_ref stale_refs[2] = {{0,}};
|
||||
struct scoutfs_block_ref refs[2] = {{0,}};
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
int ret;
|
||||
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
@@ -1586,26 +1594,18 @@ int scoutfs_alloc_foreach(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
retry:
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
do {
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
refs[0] = super->logs_root.ref;
|
||||
refs[1] = super->srch_root.ref;
|
||||
ret = scoutfs_alloc_foreach_super(sb, super, cb, arg);
|
||||
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &super->logs_root.ref,
|
||||
&super->srch_root.ref);
|
||||
} while (ret == -ESTALE);
|
||||
|
||||
ret = scoutfs_alloc_foreach_super(sb, super, cb, arg);
|
||||
out:
|
||||
if (ret == -ESTALE) {
|
||||
if (memcmp(&stale_refs, &refs, sizeof(refs)) == 0) {
|
||||
ret = -EIO;
|
||||
} else {
|
||||
BUILD_BUG_ON(sizeof(stale_refs) != sizeof(refs));
|
||||
memcpy(stale_refs, refs, sizeof(stale_refs));
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
|
||||
kfree(super);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -677,7 +677,7 @@ out:
|
||||
int scoutfs_block_read_ref(struct super_block *sb, struct scoutfs_block_ref *ref, u32 magic,
|
||||
struct scoutfs_block **bl_ret)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_block_header *hdr;
|
||||
struct block_private *bp = NULL;
|
||||
bool retried = false;
|
||||
@@ -701,7 +701,7 @@ retry:
|
||||
set_bit(BLOCK_BIT_CRC_VALID, &bp->bits);
|
||||
}
|
||||
|
||||
if (hdr->magic != cpu_to_le32(magic) || hdr->fsid != super->hdr.fsid ||
|
||||
if (hdr->magic != cpu_to_le32(magic) || hdr->fsid != cpu_to_le64(sbi->fsid) ||
|
||||
hdr->seq != ref->seq || hdr->blkno != ref->blkno) {
|
||||
ret = -ESTALE;
|
||||
goto out;
|
||||
@@ -728,6 +728,36 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
static bool stale_refs_match(struct scoutfs_block_ref *caller, struct scoutfs_block_ref *saved)
|
||||
{
|
||||
return !caller || (caller->blkno == saved->blkno && caller->seq == saved->seq);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if a read of a reference that gave ESTALE should be retried or
|
||||
* should generate a hard error. If this is the second time we got
|
||||
* ESTALE from the same refs then we return EIO and the caller should
|
||||
* stop. As long as we keep seeing different refs we'll return ESTALE
|
||||
* and the caller can keep trying.
|
||||
*/
|
||||
int scoutfs_block_check_stale(struct super_block *sb, int ret,
|
||||
struct scoutfs_block_saved_refs *saved,
|
||||
struct scoutfs_block_ref *a, struct scoutfs_block_ref *b)
|
||||
{
|
||||
if (ret == -ESTALE) {
|
||||
if (stale_refs_match(a, &saved->refs[0]) && stale_refs_match(b, &saved->refs[1])){
|
||||
ret = -EIO;
|
||||
} else {
|
||||
if (a)
|
||||
saved->refs[0] = *a;
|
||||
if (b)
|
||||
saved->refs[1] = *b;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void scoutfs_block_put(struct super_block *sb, struct scoutfs_block *bl)
|
||||
{
|
||||
if (!IS_ERR_OR_NULL(bl))
|
||||
@@ -797,7 +827,7 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
u32 magic, struct scoutfs_block **bl_ret,
|
||||
u64 dirty_blkno, u64 *ref_blkno)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_block *cow_bl = NULL;
|
||||
struct scoutfs_block *bl = NULL;
|
||||
struct block_private *exist_bp = NULL;
|
||||
@@ -865,7 +895,7 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
|
||||
hdr = bl->data;
|
||||
hdr->magic = cpu_to_le32(magic);
|
||||
hdr->fsid = super->hdr.fsid;
|
||||
hdr->fsid = cpu_to_le64(sbi->fsid);
|
||||
hdr->blkno = cpu_to_le64(bl->blkno);
|
||||
prandom_bytes(&hdr->seq, sizeof(hdr->seq));
|
||||
|
||||
|
||||
@@ -13,6 +13,17 @@ struct scoutfs_block {
|
||||
void *priv;
|
||||
};
|
||||
|
||||
struct scoutfs_block_saved_refs {
|
||||
struct scoutfs_block_ref refs[2];
|
||||
};
|
||||
|
||||
#define DECLARE_SAVED_REFS(name) \
|
||||
struct scoutfs_block_saved_refs name = {{{0,}}}
|
||||
|
||||
int scoutfs_block_check_stale(struct super_block *sb, int ret,
|
||||
struct scoutfs_block_saved_refs *saved,
|
||||
struct scoutfs_block_ref *a, struct scoutfs_block_ref *b);
|
||||
|
||||
int scoutfs_block_read_ref(struct super_block *sb, struct scoutfs_block_ref *ref, u32 magic,
|
||||
struct scoutfs_block **bl_ret);
|
||||
void scoutfs_block_put(struct super_block *sb, struct scoutfs_block *bl);
|
||||
|
||||
@@ -356,7 +356,6 @@ static int client_greeting(struct super_block *sb,
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct client_info *client = sbi->client_info;
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_net_greeting *gr = resp;
|
||||
bool new_server;
|
||||
int ret;
|
||||
@@ -371,9 +370,9 @@ static int client_greeting(struct super_block *sb,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (gr->fsid != super->hdr.fsid) {
|
||||
if (gr->fsid != cpu_to_le64(sbi->fsid)) {
|
||||
scoutfs_warn(sb, "server greeting response fsid 0x%llx did not match client fsid 0x%llx",
|
||||
le64_to_cpu(gr->fsid), le64_to_cpu(super->hdr.fsid));
|
||||
le64_to_cpu(gr->fsid), sbi->fsid);
|
||||
ret = -EINVAL;
|
||||
goto out;
|
||||
}
|
||||
@@ -476,7 +475,6 @@ static void scoutfs_client_connect_worker(struct work_struct *work)
|
||||
connect_dwork.work);
|
||||
struct super_block *sb = client->sb;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_mount_options opts;
|
||||
struct scoutfs_net_greeting greet;
|
||||
struct sockaddr_in sin;
|
||||
@@ -508,7 +506,7 @@ static void scoutfs_client_connect_worker(struct work_struct *work)
|
||||
goto out;
|
||||
|
||||
/* send a greeting to verify endpoints of each connection */
|
||||
greet.fsid = super->hdr.fsid;
|
||||
greet.fsid = cpu_to_le64(sbi->fsid);
|
||||
greet.fmt_vers = cpu_to_le64(sbi->fmt_vers);
|
||||
greet.server_term = cpu_to_le64(client->server_term);
|
||||
greet.rid = cpu_to_le64(sbi->rid);
|
||||
|
||||
@@ -187,8 +187,6 @@
|
||||
EXPAND_COUNTER(srch_search_retry_empty) \
|
||||
EXPAND_COUNTER(srch_search_sorted) \
|
||||
EXPAND_COUNTER(srch_search_sorted_block) \
|
||||
EXPAND_COUNTER(srch_search_stale_eio) \
|
||||
EXPAND_COUNTER(srch_search_stale_retry) \
|
||||
EXPAND_COUNTER(srch_search_xattrs) \
|
||||
EXPAND_COUNTER(srch_read_stale) \
|
||||
EXPAND_COUNTER(statfs) \
|
||||
|
||||
@@ -1192,9 +1192,9 @@ static void truncate_inode_pages_extent(struct inode *inode, u64 start, u64 len)
|
||||
* explained above the move_blocks ioctl argument structure definition.
|
||||
*
|
||||
* The caller has processed the ioctl args and performed the most basic
|
||||
* inode checks, but we perform more detailed inode checks once we have
|
||||
* the inode lock and refreshed inodes. Our job is to safely lock the
|
||||
* two files and move the extents.
|
||||
* argument sanity and inode checks, but we perform more detailed inode
|
||||
* checks once we have the inode lock and refreshed inodes. Our job is
|
||||
* to safely lock the two files and move the extents.
|
||||
*/
|
||||
#define MOVE_DATA_EXTENTS_PER_HOLD 16
|
||||
int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
@@ -1254,6 +1254,15 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
count = (byte_len + SCOUTFS_BLOCK_SM_MASK) >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
to_iblock = to_off >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
|
||||
/* only move extent blocks inside i_size, careful not to wrap */
|
||||
from_size = i_size_read(from);
|
||||
if (from_off >= from_size) {
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
if (from_off + byte_len > from_size)
|
||||
count = ((from_size - from_off) + SCOUTFS_BLOCK_SM_MASK) >> SCOUTFS_BLOCK_SM_SHIFT;
|
||||
|
||||
if (S_ISDIR(from->i_mode) || S_ISDIR(to->i_mode)) {
|
||||
ret = -EISDIR;
|
||||
goto out;
|
||||
@@ -1329,9 +1338,8 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
break;
|
||||
}
|
||||
|
||||
/* only move extents within count and i_size */
|
||||
if (ext.start >= from_iblock + count ||
|
||||
ext.start >= i_size_read(from)) {
|
||||
/* done if next extent starts after moving region */
|
||||
if (ext.start >= from_iblock + count) {
|
||||
done = true;
|
||||
ret = 0;
|
||||
break;
|
||||
@@ -1339,13 +1347,15 @@ int scoutfs_data_move_blocks(struct inode *from, u64 from_off,
|
||||
|
||||
from_start = max(ext.start, from_iblock);
|
||||
map = ext.map + (from_start - ext.start);
|
||||
len = min3(from_iblock + count,
|
||||
round_up((u64)i_size_read(from),
|
||||
SCOUTFS_BLOCK_SM_SIZE),
|
||||
ext.start + ext.len) - from_start;
|
||||
|
||||
len = min(from_iblock + count, ext.start + ext.len) - from_start;
|
||||
to_start = to_iblock + (from_start - from_iblock);
|
||||
|
||||
/* we'd get stuck, shouldn't happen */
|
||||
if (WARN_ON_ONCE(len == 0)) {
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (is_stage) {
|
||||
ret = scoutfs_ext_next(sb, &data_ext_ops, &to_args,
|
||||
to_start, 1, &off_ext);
|
||||
|
||||
@@ -78,11 +78,6 @@ struct forest_refs {
|
||||
struct scoutfs_block_ref logs_ref;
|
||||
};
|
||||
|
||||
/* initialize some refs that initially aren't equal */
|
||||
#define DECLARE_STALE_TRACKING_SUPER_REFS(a, b) \
|
||||
struct forest_refs a = {{cpu_to_le64(0),}}; \
|
||||
struct forest_refs b = {{cpu_to_le64(1),}}
|
||||
|
||||
struct forest_bloom_nrs {
|
||||
unsigned int nrs[SCOUTFS_FOREST_BLOOM_NRS];
|
||||
};
|
||||
@@ -136,11 +131,11 @@ static struct scoutfs_block *read_bloom_ref(struct super_block *sb, struct scout
|
||||
int scoutfs_forest_next_hint(struct super_block *sb, struct scoutfs_key *key,
|
||||
struct scoutfs_key *next)
|
||||
{
|
||||
DECLARE_STALE_TRACKING_SUPER_REFS(prev_refs, refs);
|
||||
struct scoutfs_net_roots roots;
|
||||
struct scoutfs_btree_root item_root;
|
||||
struct scoutfs_log_trees *lt;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
struct scoutfs_key found;
|
||||
struct scoutfs_key ltk;
|
||||
bool checked_fs;
|
||||
@@ -155,8 +150,6 @@ retry:
|
||||
goto out;
|
||||
|
||||
trace_scoutfs_forest_using_roots(sb, &roots.fs_root, &roots.logs_root);
|
||||
refs.fs_ref = roots.fs_root.ref;
|
||||
refs.logs_ref = roots.logs_root.ref;
|
||||
|
||||
scoutfs_key_init_log_trees(<k, 0, 0);
|
||||
checked_fs = false;
|
||||
@@ -212,14 +205,10 @@ retry:
|
||||
}
|
||||
}
|
||||
|
||||
if (ret == -ESTALE) {
|
||||
if (memcmp(&prev_refs, &refs, sizeof(refs)) == 0)
|
||||
return -EIO;
|
||||
prev_refs = refs;
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
|
||||
if (ret == -ESTALE)
|
||||
goto retry;
|
||||
}
|
||||
out:
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -541,9 +530,8 @@ void scoutfs_forest_dec_inode_count(struct super_block *sb)
|
||||
|
||||
/*
|
||||
* Return the total inode count from the super block and all the
|
||||
* log_btrees it references. This assumes it's working with a block
|
||||
* reference hierarchy that should be fully consistent. If we see
|
||||
* ESTALE we've hit persistent corruption.
|
||||
* log_btrees it references. ESTALE from read blocks is returned to the
|
||||
* caller who is expected to retry or return hard errors.
|
||||
*/
|
||||
int scoutfs_forest_inode_count(struct super_block *sb, struct scoutfs_super_block *super,
|
||||
u64 *inode_count)
|
||||
@@ -572,8 +560,6 @@ int scoutfs_forest_inode_count(struct super_block *sb, struct scoutfs_super_bloc
|
||||
if (ret < 0) {
|
||||
if (ret == -ENOENT)
|
||||
ret = 0;
|
||||
else if (ret == -ESTALE)
|
||||
ret = -EIO;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -114,6 +114,7 @@ struct quorum_status {
|
||||
|
||||
struct quorum_info {
|
||||
struct super_block *sb;
|
||||
struct scoutfs_quorum_config qconf;
|
||||
struct work_struct work;
|
||||
struct socket *sock;
|
||||
bool shutdown;
|
||||
@@ -134,11 +135,18 @@ struct quorum_info {
|
||||
#define DECLARE_QUORUM_INFO_KOBJ(kobj, name) \
|
||||
DECLARE_QUORUM_INFO(SCOUTFS_SYSFS_ATTRS_SB(kobj), name)
|
||||
|
||||
static bool quorum_slot_present(struct scoutfs_super_block *super, int i)
|
||||
static bool quorum_slot_present(struct scoutfs_quorum_config *qconf, int i)
|
||||
{
|
||||
BUG_ON(i < 0 || i > SCOUTFS_QUORUM_MAX_SLOTS);
|
||||
|
||||
return super->qconf.slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4);
|
||||
return qconf->slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4);
|
||||
}
|
||||
|
||||
static void quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin)
|
||||
{
|
||||
BUG_ON(i < 0 || i >= SCOUTFS_QUORUM_MAX_SLOTS);
|
||||
|
||||
scoutfs_addr_to_sin(sin, &qconf->slots[i].addr);
|
||||
}
|
||||
|
||||
static ktime_t election_timeout(void)
|
||||
@@ -160,7 +168,6 @@ static ktime_t heartbeat_timeout(void)
|
||||
static int create_socket(struct super_block *sb)
|
||||
{
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct socket *sock = NULL;
|
||||
struct sockaddr_in sin;
|
||||
int addrlen;
|
||||
@@ -174,7 +181,7 @@ static int create_socket(struct super_block *sb)
|
||||
|
||||
sock->sk->sk_allocation = GFP_NOFS;
|
||||
|
||||
scoutfs_quorum_slot_sin(super, qinf->our_quorum_slot_nr, &sin);
|
||||
quorum_slot_sin(&qinf->qconf, qinf->our_quorum_slot_nr, &sin);
|
||||
|
||||
addrlen = sizeof(sin);
|
||||
ret = kernel_bind(sock, (struct sockaddr *)&sin, addrlen);
|
||||
@@ -204,13 +211,13 @@ static __le32 quorum_message_crc(struct scoutfs_quorum_message *qmes)
|
||||
static void send_msg_members(struct super_block *sb, int type, u64 term,
|
||||
int only)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
ktime_t now;
|
||||
int i;
|
||||
|
||||
struct scoutfs_quorum_message qmes = {
|
||||
.fsid = super->hdr.fsid,
|
||||
.fsid = cpu_to_le64(sbi->fsid),
|
||||
.term = cpu_to_le64(term),
|
||||
.type = type,
|
||||
.from = qinf->our_quorum_slot_nr,
|
||||
@@ -234,11 +241,11 @@ static void send_msg_members(struct super_block *sb, int type, u64 term,
|
||||
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (!quorum_slot_present(super, i) ||
|
||||
if (!quorum_slot_present(&qinf->qconf, i) ||
|
||||
(only >= 0 && i != only) || i == qinf->our_quorum_slot_nr)
|
||||
continue;
|
||||
|
||||
scoutfs_quorum_slot_sin(super, i, &sin);
|
||||
scoutfs_quorum_slot_sin(&qinf->qconf, i, &sin);
|
||||
now = ktime_get();
|
||||
kernel_sendmsg(qinf->sock, &mh, &kv, 1, kv.iov_len);
|
||||
|
||||
@@ -266,7 +273,7 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
|
||||
ktime_t abs_to)
|
||||
{
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_quorum_message qmes;
|
||||
struct timeval tv;
|
||||
ktime_t rel_to;
|
||||
@@ -309,10 +316,10 @@ static int recv_msg(struct super_block *sb, struct quorum_host_msg *msg,
|
||||
|
||||
if (ret != sizeof(qmes) ||
|
||||
qmes.crc != quorum_message_crc(&qmes) ||
|
||||
qmes.fsid != super->hdr.fsid ||
|
||||
qmes.fsid != cpu_to_le64(sbi->fsid) ||
|
||||
qmes.type >= SCOUTFS_QUORUM_MSG_INVALID ||
|
||||
qmes.from >= SCOUTFS_QUORUM_MAX_SLOTS ||
|
||||
!quorum_slot_present(super, qmes.from)) {
|
||||
!quorum_slot_present(&qinf->qconf, qmes.from)) {
|
||||
/* should we be trying to open a new socket? */
|
||||
scoutfs_inc_counter(sb, quorum_recv_invalid);
|
||||
return -EAGAIN;
|
||||
@@ -342,7 +349,7 @@ static int read_quorum_block(struct super_block *sb, u64 blkno, struct scoutfs_q
|
||||
bool check_rid)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
const u64 fsid = sbi->fsid;
|
||||
const u64 rid = sbi->rid;
|
||||
char msg[150];
|
||||
__le32 crc;
|
||||
@@ -367,9 +374,9 @@ static int read_quorum_block(struct super_block *sb, u64 blkno, struct scoutfs_q
|
||||
else if (le32_to_cpu(blk->hdr.magic) != SCOUTFS_BLOCK_MAGIC_QUORUM)
|
||||
snprintf(msg, sizeof(msg), "blk magic %08x != %08x",
|
||||
le32_to_cpu(blk->hdr.magic), SCOUTFS_BLOCK_MAGIC_QUORUM);
|
||||
else if (blk->hdr.fsid != super->hdr.fsid)
|
||||
else if (blk->hdr.fsid != cpu_to_le64(fsid))
|
||||
snprintf(msg, sizeof(msg), "blk fsid %016llx != %016llx",
|
||||
le64_to_cpu(blk->hdr.fsid), le64_to_cpu(super->hdr.fsid));
|
||||
le64_to_cpu(blk->hdr.fsid), fsid);
|
||||
else if (le64_to_cpu(blk->hdr.blkno) != blkno)
|
||||
snprintf(msg, sizeof(msg), "blk blkno %llu != %llu",
|
||||
le64_to_cpu(blk->hdr.blkno), blkno);
|
||||
@@ -410,8 +417,7 @@ out:
|
||||
*/
|
||||
static void read_greatest_term(struct super_block *sb, u64 *term)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct scoutfs_quorum_block blk;
|
||||
int ret;
|
||||
int e;
|
||||
@@ -420,7 +426,7 @@ static void read_greatest_term(struct super_block *sb, u64 *term)
|
||||
*term = 0;
|
||||
|
||||
for (s = 0; s < SCOUTFS_QUORUM_MAX_SLOTS; s++) {
|
||||
if (!quorum_slot_present(super, s))
|
||||
if (!quorum_slot_present(&qinf->qconf, s))
|
||||
continue;
|
||||
|
||||
ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + s, &blk, false);
|
||||
@@ -514,14 +520,15 @@ static int update_quorum_block(struct super_block *sb, int event, u64 term, bool
|
||||
* keeps us from being fenced while we allow userspace fencing to take a
|
||||
* reasonably long time. We still want to timeout eventually.
|
||||
*/
|
||||
int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term)
|
||||
int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_config *qconf,
|
||||
u64 term)
|
||||
{
|
||||
#define NR_OLD 2
|
||||
struct scoutfs_quorum_block_event old[SCOUTFS_QUORUM_MAX_SLOTS][NR_OLD] = {{{0,}}};
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_quorum_block blk;
|
||||
struct sockaddr_in sin;
|
||||
const __le64 lefsid = cpu_to_le64(sbi->fsid);
|
||||
const u64 rid = sbi->rid;
|
||||
bool fence_started = false;
|
||||
u64 fenced = 0;
|
||||
@@ -534,7 +541,7 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term)
|
||||
BUILD_BUG_ON(SCOUTFS_QUORUM_BLOCKS < SCOUTFS_QUORUM_MAX_SLOTS);
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (!quorum_slot_present(super, i))
|
||||
if (!quorum_slot_present(qconf, i))
|
||||
continue;
|
||||
|
||||
ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + i, &blk, false);
|
||||
@@ -567,11 +574,11 @@ int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term)
|
||||
continue;
|
||||
|
||||
scoutfs_inc_counter(sb, quorum_fence_leader);
|
||||
scoutfs_quorum_slot_sin(super, i, &sin);
|
||||
quorum_slot_sin(qconf, i, &sin);
|
||||
fence_rid = old[i][j].rid;
|
||||
|
||||
scoutfs_info(sb, "fencing previous leader "SCSBF" at term %llu in slot %u with address "SIN_FMT,
|
||||
SCSB_LEFR_ARGS(super->hdr.fsid, fence_rid),
|
||||
SCSB_LEFR_ARGS(lefsid, fence_rid),
|
||||
le64_to_cpu(old[i][j].term), i, SIN_ARG(&sin));
|
||||
ret = scoutfs_fence_start(sb, le64_to_cpu(fence_rid), sin.sin_addr.s_addr,
|
||||
SCOUTFS_FENCE_QUORUM_BLOCK_LEADER);
|
||||
@@ -752,7 +759,7 @@ static void scoutfs_quorum_worker(struct work_struct *work)
|
||||
|
||||
qst.server_start_term = qst.term;
|
||||
qst.server_event = SCOUTFS_QUORUM_EVENT_ELECT;
|
||||
scoutfs_server_start(sb, qst.term);
|
||||
scoutfs_server_start(sb, &qinf->qconf, qst.term);
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -877,16 +884,25 @@ out:
|
||||
*/
|
||||
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct scoutfs_quorum_block blk;
|
||||
u64 elect_term;
|
||||
u64 term = 0;
|
||||
int ret = 0;
|
||||
int i;
|
||||
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
|
||||
if (!super) {
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (!quorum_slot_present(super, i))
|
||||
if (!quorum_slot_present(&super->qconf, i))
|
||||
continue;
|
||||
|
||||
ret = read_quorum_block(sb, SCOUTFS_QUORUM_BLKNO + i, &blk, false);
|
||||
@@ -900,7 +916,7 @@ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin)
|
||||
if (elect_term > term &&
|
||||
elect_term > le64_to_cpu(blk.events[SCOUTFS_QUORUM_EVENT_STOP].term)) {
|
||||
term = elect_term;
|
||||
scoutfs_quorum_slot_sin(super, i, sin);
|
||||
scoutfs_quorum_slot_sin(&super->qconf, i, sin);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
@@ -909,6 +925,7 @@ int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin)
|
||||
ret = -ENOENT;
|
||||
|
||||
out:
|
||||
kfree(super);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -924,12 +941,9 @@ u8 scoutfs_quorum_votes_needed(struct super_block *sb)
|
||||
return qinf->votes_needed;
|
||||
}
|
||||
|
||||
void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i,
|
||||
struct sockaddr_in *sin)
|
||||
void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i, struct sockaddr_in *sin)
|
||||
{
|
||||
BUG_ON(i < 0 || i >= SCOUTFS_QUORUM_MAX_SLOTS);
|
||||
|
||||
scoutfs_addr_to_sin(sin, &super->qconf.slots[i].addr);
|
||||
return quorum_slot_sin(qconf, i, sin);
|
||||
}
|
||||
|
||||
static char *role_str(int role)
|
||||
@@ -1060,11 +1074,10 @@ static inline bool valid_ipv4_port(__be16 port)
|
||||
return port != 0 && be16_to_cpu(port) != U16_MAX;
|
||||
}
|
||||
|
||||
static int verify_quorum_slots(struct super_block *sb)
|
||||
static int verify_quorum_slots(struct super_block *sb, struct quorum_info *qinf,
|
||||
struct scoutfs_quorum_config *qconf)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
char slots[(SCOUTFS_QUORUM_MAX_SLOTS * 3) + 1];
|
||||
DECLARE_QUORUM_INFO(sb, qinf);
|
||||
struct sockaddr_in other;
|
||||
struct sockaddr_in sin;
|
||||
int found = 0;
|
||||
@@ -1074,10 +1087,10 @@ static int verify_quorum_slots(struct super_block *sb)
|
||||
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (!quorum_slot_present(super, i))
|
||||
if (!quorum_slot_present(qconf, i))
|
||||
continue;
|
||||
|
||||
scoutfs_quorum_slot_sin(super, i, &sin);
|
||||
scoutfs_quorum_slot_sin(qconf, i, &sin);
|
||||
|
||||
if (!valid_ipv4_unicast(sin.sin_addr.s_addr)) {
|
||||
scoutfs_err(sb, "quorum slot #%d has invalid ipv4 unicast address: "SIN_FMT,
|
||||
@@ -1092,10 +1105,10 @@ static int verify_quorum_slots(struct super_block *sb)
|
||||
}
|
||||
|
||||
for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) {
|
||||
if (!quorum_slot_present(super, j))
|
||||
if (!quorum_slot_present(qconf, j))
|
||||
continue;
|
||||
|
||||
scoutfs_quorum_slot_sin(super, j, &other);
|
||||
scoutfs_quorum_slot_sin(qconf, j, &other);
|
||||
|
||||
if (sin.sin_addr.s_addr == other.sin_addr.s_addr &&
|
||||
sin.sin_port == other.sin_port) {
|
||||
@@ -1113,11 +1126,11 @@ static int verify_quorum_slots(struct super_block *sb)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (!quorum_slot_present(super, qinf->our_quorum_slot_nr)) {
|
||||
if (!quorum_slot_present(qconf, qinf->our_quorum_slot_nr)) {
|
||||
char *str = slots;
|
||||
*str = '\0';
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (quorum_slot_present(super, i)) {
|
||||
if (quorum_slot_present(qconf, i)) {
|
||||
ret = snprintf(str, &slots[ARRAY_SIZE(slots)] - str, "%c%u",
|
||||
str == slots ? ' ' : ',', i);
|
||||
if (ret < 2 || ret > 3) {
|
||||
@@ -1141,16 +1154,22 @@ static int verify_quorum_slots(struct super_block *sb)
|
||||
else
|
||||
qinf->votes_needed = (found / 2) + 1;
|
||||
|
||||
qinf->qconf = *qconf;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Once this schedules the quorum worker it can be elected leader and
|
||||
* start the server, possibly before this returns.
|
||||
* start the server, possibly before this returns. The quorum agent
|
||||
* would be responsible for tracking the quorum config in the super
|
||||
* block if it changes. Until then uses a static config that it reads
|
||||
* during setup.
|
||||
*/
|
||||
int scoutfs_quorum_setup(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = NULL;
|
||||
struct scoutfs_mount_options opts;
|
||||
struct quorum_info *qinf;
|
||||
int ret;
|
||||
@@ -1160,7 +1179,9 @@ int scoutfs_quorum_setup(struct super_block *sb)
|
||||
return 0;
|
||||
|
||||
qinf = kzalloc(sizeof(struct quorum_info), GFP_KERNEL);
|
||||
if (!qinf) {
|
||||
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_KERNEL);
|
||||
if (!qinf || !super) {
|
||||
kfree(qinf);
|
||||
ret = -ENOMEM;
|
||||
goto out;
|
||||
}
|
||||
@@ -1174,7 +1195,11 @@ int scoutfs_quorum_setup(struct super_block *sb)
|
||||
sbi->quorum_info = qinf;
|
||||
qinf->sb = sb;
|
||||
|
||||
ret = verify_quorum_slots(sb);
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = verify_quorum_slots(sb, qinf, &super->qconf);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
@@ -1194,6 +1219,7 @@ out:
|
||||
if (ret)
|
||||
scoutfs_quorum_destroy(sb);
|
||||
|
||||
kfree(super);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -4,10 +4,11 @@
|
||||
int scoutfs_quorum_server_sin(struct super_block *sb, struct sockaddr_in *sin);
|
||||
|
||||
u8 scoutfs_quorum_votes_needed(struct super_block *sb);
|
||||
void scoutfs_quorum_slot_sin(struct scoutfs_super_block *super, int i,
|
||||
void scoutfs_quorum_slot_sin(struct scoutfs_quorum_config *qconf, int i,
|
||||
struct sockaddr_in *sin);
|
||||
|
||||
int scoutfs_quorum_fence_leaders(struct super_block *sb, u64 term);
|
||||
int scoutfs_quorum_fence_leaders(struct super_block *sb, struct scoutfs_quorum_config *qconf,
|
||||
u64 term);
|
||||
|
||||
int scoutfs_quorum_setup(struct super_block *sb);
|
||||
void scoutfs_quorum_shutdown(struct super_block *sb);
|
||||
|
||||
@@ -130,9 +130,9 @@ struct server_info {
|
||||
struct mutex srch_mutex;
|
||||
struct mutex mounted_clients_mutex;
|
||||
|
||||
/* stable versions stored from commits, given in locks and rpcs */
|
||||
seqcount_t roots_seqcount;
|
||||
struct scoutfs_net_roots roots;
|
||||
/* stable super stored from commits, given in locks and rpcs */
|
||||
seqcount_t stable_seqcount;
|
||||
struct scoutfs_super_block stable_super;
|
||||
|
||||
/* serializing and get and set volume options */
|
||||
seqcount_t volopt_seqcount;
|
||||
@@ -143,11 +143,18 @@ struct server_info {
|
||||
struct work_struct fence_pending_recov_work;
|
||||
/* while running we check for fenced mounts to reclaim */
|
||||
struct delayed_work reclaim_dwork;
|
||||
|
||||
/* a running server gets a static quorum config from quorum as it starts */
|
||||
struct scoutfs_quorum_config qconf;
|
||||
/* a running server maintains a private dirty super */
|
||||
struct scoutfs_super_block dirty_super;
|
||||
};
|
||||
|
||||
#define DECLARE_SERVER_INFO(sb, name) \
|
||||
struct server_info *name = SCOUTFS_SB(sb)->server_info
|
||||
|
||||
#define DIRTY_SUPER_SB(sb) (&SCOUTFS_SB(sb)->server_info->dirty_super)
|
||||
|
||||
/*
|
||||
* The server tracks each connected client.
|
||||
*/
|
||||
@@ -469,16 +476,22 @@ static void commit_end(struct super_block *sb, struct commit_users *cusers, int
|
||||
wake_up(&cusers->waitq);
|
||||
}
|
||||
|
||||
static void get_roots(struct super_block *sb,
|
||||
struct scoutfs_net_roots *roots)
|
||||
static void get_stable(struct super_block *sb, struct scoutfs_super_block *super,
|
||||
struct scoutfs_net_roots *roots)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
unsigned int seq;
|
||||
|
||||
do {
|
||||
seq = read_seqcount_begin(&server->roots_seqcount);
|
||||
*roots = server->roots;
|
||||
} while (read_seqcount_retry(&server->roots_seqcount, seq));
|
||||
seq = read_seqcount_begin(&server->stable_seqcount);
|
||||
if (super)
|
||||
*super = server->stable_super;
|
||||
if (roots) {
|
||||
roots->fs_root = server->stable_super.fs_root;
|
||||
roots->logs_root = server->stable_super.logs_root;
|
||||
roots->srch_root = server->stable_super.srch_root;
|
||||
}
|
||||
} while (read_seqcount_retry(&server->stable_seqcount, seq));
|
||||
}
|
||||
|
||||
u64 scoutfs_server_seq(struct super_block *sb)
|
||||
@@ -510,17 +523,12 @@ void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq)
|
||||
}
|
||||
}
|
||||
|
||||
static void set_roots(struct server_info *server,
|
||||
struct scoutfs_btree_root *fs_root,
|
||||
struct scoutfs_btree_root *logs_root,
|
||||
struct scoutfs_btree_root *srch_root)
|
||||
static void set_stable_super(struct server_info *server, struct scoutfs_super_block *super)
|
||||
{
|
||||
preempt_disable();
|
||||
write_seqcount_begin(&server->roots_seqcount);
|
||||
server->roots.fs_root = *fs_root;
|
||||
server->roots.logs_root = *logs_root;
|
||||
server->roots.srch_root = *srch_root;
|
||||
write_seqcount_end(&server->roots_seqcount);
|
||||
write_seqcount_begin(&server->stable_seqcount);
|
||||
server->stable_super = *super;
|
||||
write_seqcount_end(&server->stable_seqcount);
|
||||
preempt_enable();
|
||||
}
|
||||
|
||||
@@ -545,7 +553,7 @@ static void scoutfs_server_commit_func(struct work_struct *work)
|
||||
struct server_info *server = container_of(work, struct server_info,
|
||||
commit_work);
|
||||
struct super_block *sb = server->sb;
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct commit_users *cusers = &server->cusers;
|
||||
int ret;
|
||||
|
||||
@@ -603,8 +611,7 @@ static void scoutfs_server_commit_func(struct work_struct *work)
|
||||
goto out;
|
||||
}
|
||||
|
||||
set_roots(server, &super->fs_root, &super->logs_root,
|
||||
&super->srch_root);
|
||||
set_stable_super(server, super);
|
||||
|
||||
/* swizzle the active and idle server alloc/freed heads */
|
||||
server->other_ind ^= 1;
|
||||
@@ -641,7 +648,7 @@ static int server_alloc_inodes(struct super_block *sb,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_net_inode_alloc ial = { 0, };
|
||||
COMMIT_HOLD(hold);
|
||||
__le64 lecount;
|
||||
@@ -809,7 +816,7 @@ static void mod_bitmap_bits(__le64 *dst, u64 dst_zone_blocks,
|
||||
static int get_data_alloc_zone_bits(struct super_block *sb, u64 rid, __le64 *exclusive,
|
||||
__le64 *vacant, u64 zone_blocks)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_log_trees *lt;
|
||||
struct scoutfs_key key;
|
||||
@@ -1040,7 +1047,7 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
u64 rid, struct commit_hold *hold)
|
||||
{
|
||||
struct server_info *server = SCOUTFS_SB(sb)->server_info;
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_log_merge_status stat;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
struct scoutfs_log_trees each_lt;
|
||||
@@ -1242,7 +1249,7 @@ static int finalize_and_start_log_merge(struct super_block *sb, struct scoutfs_l
|
||||
static void try_drain_data_freed(struct super_block *sb, struct scoutfs_log_trees *lt)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
const u64 rid = le64_to_cpu(lt->rid);
|
||||
const u64 nr = le64_to_cpu(lt->nr);
|
||||
struct scoutfs_log_trees drain;
|
||||
@@ -1329,7 +1336,7 @@ static int server_get_log_trees(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
__le64 exclusive[SCOUTFS_DATA_ALLOC_ZONE_LE64S];
|
||||
@@ -1524,7 +1531,7 @@ static int server_commit_log_trees(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
const u64 rid = scoutfs_net_client_rid(conn);
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
@@ -1579,6 +1586,13 @@ static int server_commit_log_trees(struct super_block *sb,
|
||||
if (ret < 0 || committed)
|
||||
goto unlock;
|
||||
|
||||
/* make sure _update succeeds before we modify srch items */
|
||||
ret = scoutfs_btree_dirty(sb, &server->alloc, &server->wri, &super->logs_root, &key);
|
||||
if (ret < 0) {
|
||||
err_str = "dirtying lt item";
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
/* try to rotate the srch log when big enough */
|
||||
mutex_lock(&server->srch_mutex);
|
||||
ret = scoutfs_srch_rotate_log(sb, &server->alloc, &server->wri,
|
||||
@@ -1593,6 +1607,7 @@ static int server_commit_log_trees(struct super_block *sb,
|
||||
|
||||
ret = scoutfs_btree_update(sb, &server->alloc, &server->wri,
|
||||
&super->logs_root, &key, <, sizeof(lt));
|
||||
BUG_ON(ret < 0); /* dirtying should have guaranteed success */
|
||||
if (ret < 0)
|
||||
err_str = "updating log trees item";
|
||||
|
||||
@@ -1624,7 +1639,7 @@ static int server_get_roots(struct super_block *sb,
|
||||
memset(&roots, 0, sizeof(roots));
|
||||
ret = -EINVAL;
|
||||
} else {
|
||||
get_roots(sb, &roots);
|
||||
get_stable(sb, NULL, &roots);
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
@@ -1654,7 +1669,7 @@ static int server_get_roots(struct super_block *sb,
|
||||
*/
|
||||
static int reclaim_open_log_tree(struct super_block *sb, u64 rid)
|
||||
{
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_log_trees lt;
|
||||
@@ -1751,9 +1766,8 @@ out:
|
||||
*/
|
||||
static int get_stable_trans_seq(struct super_block *sb, u64 *last_seq_ret)
|
||||
{
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_log_trees *lt;
|
||||
struct scoutfs_key key;
|
||||
@@ -1909,9 +1923,8 @@ static int server_srch_get_compact(struct super_block *sb,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_srch_compact *sc = NULL;
|
||||
COMMIT_HOLD(hold);
|
||||
int ret;
|
||||
@@ -1976,8 +1989,7 @@ static int server_srch_commit_compact(struct super_block *sb,
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_srch_compact *sc;
|
||||
struct scoutfs_alloc_list_head av;
|
||||
struct scoutfs_alloc_list_head fr;
|
||||
@@ -2052,8 +2064,7 @@ static int splice_log_merge_completions(struct super_block *sb,
|
||||
bool no_ranges)
|
||||
{
|
||||
struct server_info *server = SCOUTFS_SB(sb)->server_info;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_log_merge_complete comp;
|
||||
struct scoutfs_log_merge_freeing fr;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
@@ -2370,7 +2381,7 @@ static void server_log_merge_free_work(struct work_struct *work)
|
||||
struct server_info *server = container_of(work, struct server_info,
|
||||
log_merge_free_work);
|
||||
struct super_block *sb = server->sb;
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_log_merge_freeing fr;
|
||||
struct scoutfs_key key;
|
||||
COMMIT_HOLD(hold);
|
||||
@@ -2462,8 +2473,7 @@ static int server_get_log_merge(struct super_block *sb,
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_log_merge_status stat;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
struct scoutfs_log_merge_range remain;
|
||||
@@ -2746,8 +2756,7 @@ static int server_commit_log_merge(struct super_block *sb,
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
u64 rid = scoutfs_net_client_rid(conn);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_log_merge_request orig_req;
|
||||
struct scoutfs_log_merge_complete *comp;
|
||||
struct scoutfs_log_merge_status stat;
|
||||
@@ -2982,7 +2991,7 @@ static int server_set_volopt(struct super_block *sb, struct scoutfs_net_connecti
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_volume_options *volopt;
|
||||
COMMIT_HOLD(hold);
|
||||
u64 opt;
|
||||
@@ -3051,7 +3060,7 @@ static int server_clear_volopt(struct super_block *sb, struct scoutfs_net_connec
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_volume_options *volopt;
|
||||
COMMIT_HOLD(hold);
|
||||
__le64 *opt;
|
||||
@@ -3105,7 +3114,7 @@ static int server_resize_devices(struct super_block *sb, struct scoutfs_net_conn
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_net_resize_devices *nrd;
|
||||
COMMIT_HOLD(hold);
|
||||
u64 meta_tot;
|
||||
@@ -3212,16 +3221,19 @@ static int count_free_blocks(struct super_block *sb, void *arg, int owner,
|
||||
}
|
||||
|
||||
/*
|
||||
* We calculate the total inode count and free blocks from the current in-memory dirty
|
||||
* versions of the super block and log_trees structs, so we have to lock them.
|
||||
* We calculate the total inode count and free blocks from the last
|
||||
* stable super that was written. Other users also walk stable blocks
|
||||
* so by joining them we don't have to worry about ensuring that we've
|
||||
* locked all the dirty structures that the summations could reference.
|
||||
* We handle stale reads by retrying with the most recent stable super.
|
||||
*/
|
||||
static int server_statfs(struct super_block *sb, struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block super;
|
||||
struct scoutfs_net_statfs nst = {{0,}};
|
||||
struct statfs_free_blocks sfb = {0,};
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
u64 inode_count;
|
||||
int ret;
|
||||
|
||||
@@ -3230,24 +3242,24 @@ static int server_statfs(struct super_block *sb, struct scoutfs_net_connection *
|
||||
goto out;
|
||||
}
|
||||
|
||||
mutex_lock(&server->alloc_mutex);
|
||||
ret = scoutfs_alloc_foreach_super(sb, super, count_free_blocks, &sfb);
|
||||
mutex_unlock(&server->alloc_mutex);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
do {
|
||||
get_stable(sb, &super, NULL);
|
||||
|
||||
mutex_lock(&server->logs_mutex);
|
||||
ret = scoutfs_forest_inode_count(sb, super, &inode_count);
|
||||
mutex_unlock(&server->logs_mutex);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
ret = scoutfs_alloc_foreach_super(sb, &super, count_free_blocks, &sfb) ?:
|
||||
scoutfs_forest_inode_count(sb, &super, &inode_count);
|
||||
if (ret < 0 && ret != -ESTALE)
|
||||
goto out;
|
||||
|
||||
BUILD_BUG_ON(sizeof(nst.uuid) != sizeof(super->uuid));
|
||||
memcpy(nst.uuid, super->uuid, sizeof(nst.uuid));
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &super.logs_root.ref,
|
||||
&super.srch_root.ref);
|
||||
} while (ret == -ESTALE);
|
||||
|
||||
BUILD_BUG_ON(sizeof(nst.uuid) != sizeof(super.uuid));
|
||||
memcpy(nst.uuid, super.uuid, sizeof(nst.uuid));
|
||||
nst.free_meta_blocks = cpu_to_le64(sfb.meta);
|
||||
nst.total_meta_blocks = super->total_meta_blocks;
|
||||
nst.total_meta_blocks = super.total_meta_blocks;
|
||||
nst.free_data_blocks = cpu_to_le64(sfb.data);
|
||||
nst.total_data_blocks = super->total_data_blocks;
|
||||
nst.total_data_blocks = super.total_data_blocks;
|
||||
nst.inode_count = cpu_to_le64(inode_count);
|
||||
|
||||
ret = 0;
|
||||
@@ -3278,7 +3290,7 @@ static int insert_mounted_client(struct super_block *sb, u64 rid, u64 gr_flags,
|
||||
struct sockaddr_in *sin)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_mounted_client_btree_val mcv;
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
@@ -3304,7 +3316,7 @@ static int lookup_mounted_client_addr(struct super_block *sb, u64 rid,
|
||||
union scoutfs_inet_addr *addr)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_mounted_client_btree_val *mcv;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
@@ -3338,7 +3350,7 @@ static int lookup_mounted_client_addr(struct super_block *sb, u64 rid,
|
||||
static int delete_mounted_client(struct super_block *sb, u64 rid)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
@@ -3362,7 +3374,7 @@ static int delete_mounted_client(struct super_block *sb, u64 rid)
|
||||
static int cancel_srch_compact(struct super_block *sb, u64 rid)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_alloc_list_head av;
|
||||
struct scoutfs_alloc_list_head fr;
|
||||
int ret;
|
||||
@@ -3414,7 +3426,7 @@ static int cancel_srch_compact(struct super_block *sb, u64 rid)
|
||||
static int cancel_log_merge(struct super_block *sb, u64 rid)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_log_merge_status stat;
|
||||
struct scoutfs_log_merge_request req;
|
||||
struct scoutfs_log_merge_range rng;
|
||||
@@ -3538,7 +3550,7 @@ static int server_greeting(struct super_block *sb,
|
||||
u8 cmd, u64 id, void *arg, u16 arg_len)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_net_greeting *gr = arg;
|
||||
struct scoutfs_net_greeting greet;
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
@@ -3554,10 +3566,9 @@ static int server_greeting(struct super_block *sb,
|
||||
goto send_err;
|
||||
}
|
||||
|
||||
if (gr->fsid != super->hdr.fsid) {
|
||||
if (gr->fsid != cpu_to_le64(sbi->fsid)) {
|
||||
scoutfs_warn(sb, "client rid %016llx greeting fsid 0x%llx did not match server fsid 0x%llx",
|
||||
le64_to_cpu(gr->rid), le64_to_cpu(gr->fsid),
|
||||
le64_to_cpu(super->hdr.fsid));
|
||||
le64_to_cpu(gr->rid), le64_to_cpu(gr->fsid), sbi->fsid);
|
||||
ret = -EINVAL;
|
||||
goto send_err;
|
||||
}
|
||||
@@ -3697,7 +3708,7 @@ static void farewell_worker(struct work_struct *work)
|
||||
struct server_info *server = container_of(work, struct server_info,
|
||||
farewell_work);
|
||||
struct super_block *sb = server->sb;
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_mounted_client_btree_val *mcv;
|
||||
struct farewell_request *tmp;
|
||||
struct farewell_request *fw;
|
||||
@@ -4059,7 +4070,7 @@ static void recovery_timeout(struct super_block *sb)
|
||||
static int start_recovery(struct super_block *sb)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
unsigned int nr = 0;
|
||||
@@ -4176,8 +4187,7 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
struct server_info *server = container_of(work, struct server_info,
|
||||
work);
|
||||
struct super_block *sb = server->sb;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_super_block *super = &sbi->super;
|
||||
struct scoutfs_super_block *super = DIRTY_SUPER_SB(sb);
|
||||
struct scoutfs_net_connection *conn = NULL;
|
||||
struct scoutfs_mount_options opts;
|
||||
DECLARE_WAIT_QUEUE_HEAD(waitq);
|
||||
@@ -4189,13 +4199,13 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
trace_scoutfs_server_work_enter(sb, 0, 0);
|
||||
|
||||
scoutfs_options_read(sb, &opts);
|
||||
scoutfs_quorum_slot_sin(super, opts.quorum_slot_nr, &sin);
|
||||
scoutfs_quorum_slot_sin(&server->qconf, opts.quorum_slot_nr, &sin);
|
||||
scoutfs_info(sb, "server starting at "SIN_FMT, SIN_ARG(&sin));
|
||||
|
||||
scoutfs_block_writer_init(sb, &server->wri);
|
||||
|
||||
/* first make sure no other servers are still running */
|
||||
ret = scoutfs_quorum_fence_leaders(sb, server->term);
|
||||
ret = scoutfs_quorum_fence_leaders(sb, &server->qconf, server->term);
|
||||
if (ret < 0) {
|
||||
scoutfs_err(sb, "server error %d attempting to fence previous leaders", ret);
|
||||
goto out;
|
||||
@@ -4231,8 +4241,7 @@ static void scoutfs_server_worker(struct work_struct *work)
|
||||
write_seqcount_end(&server->volopt_seqcount);
|
||||
|
||||
atomic64_set(&server->seq_atomic, le64_to_cpu(super->seq));
|
||||
set_roots(server, &super->fs_root, &super->logs_root,
|
||||
&super->srch_root);
|
||||
set_stable_super(server, super);
|
||||
|
||||
/* prepare server alloc for this transaction, larger first */
|
||||
if (le64_to_cpu(super->server_meta_avail[0].total_nr) <
|
||||
@@ -4326,11 +4335,12 @@ out:
|
||||
/*
|
||||
* Start the server but don't wait for it to complete.
|
||||
*/
|
||||
void scoutfs_server_start(struct super_block *sb, u64 term)
|
||||
void scoutfs_server_start(struct super_block *sb, struct scoutfs_quorum_config *qconf, u64 term)
|
||||
{
|
||||
DECLARE_SERVER_INFO(sb, server);
|
||||
|
||||
if (cmpxchg(&server->status, SERVER_DOWN, SERVER_STARTING) == SERVER_DOWN) {
|
||||
server->qconf = *qconf;
|
||||
server->term = term;
|
||||
queue_work(server->wq, &server->work);
|
||||
}
|
||||
@@ -4382,7 +4392,7 @@ int scoutfs_server_setup(struct super_block *sb)
|
||||
INIT_WORK(&server->log_merge_free_work, server_log_merge_free_work);
|
||||
mutex_init(&server->srch_mutex);
|
||||
mutex_init(&server->mounted_clients_mutex);
|
||||
seqcount_init(&server->roots_seqcount);
|
||||
seqcount_init(&server->stable_seqcount);
|
||||
seqcount_init(&server->volopt_seqcount);
|
||||
mutex_init(&server->volopt_mutex);
|
||||
INIT_WORK(&server->fence_pending_recov_work, fence_pending_recov_worker);
|
||||
|
||||
@@ -75,7 +75,7 @@ u64 scoutfs_server_seq(struct super_block *sb);
|
||||
u64 scoutfs_server_next_seq(struct super_block *sb);
|
||||
void scoutfs_server_set_seq_if_greater(struct super_block *sb, u64 seq);
|
||||
|
||||
void scoutfs_server_start(struct super_block *sb, u64 term);
|
||||
void scoutfs_server_start(struct super_block *sb, struct scoutfs_quorum_config *qconf, u64 term);
|
||||
void scoutfs_server_stop(struct super_block *sb);
|
||||
void scoutfs_server_stop_wait(struct super_block *sb);
|
||||
bool scoutfs_server_is_running(struct super_block *sb);
|
||||
|
||||
@@ -861,7 +861,6 @@ int scoutfs_srch_search_xattrs(struct super_block *sb,
|
||||
struct scoutfs_srch_rb_root *sroot,
|
||||
u64 hash, u64 ino, u64 last_ino, bool *done)
|
||||
{
|
||||
struct scoutfs_net_roots prev_roots;
|
||||
struct scoutfs_net_roots roots;
|
||||
struct scoutfs_srch_entry start;
|
||||
struct scoutfs_srch_entry end;
|
||||
@@ -869,6 +868,7 @@ int scoutfs_srch_search_xattrs(struct super_block *sb,
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_srch_file sfl;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
DECLARE_SAVED_REFS(saved);
|
||||
struct scoutfs_key key;
|
||||
unsigned long limit = SRCH_LIMIT;
|
||||
int ret;
|
||||
@@ -877,7 +877,6 @@ int scoutfs_srch_search_xattrs(struct super_block *sb,
|
||||
|
||||
*done = false;
|
||||
srch_init_rb_root(sroot);
|
||||
memset(&prev_roots, 0, sizeof(prev_roots));
|
||||
|
||||
start.hash = cpu_to_le64(hash);
|
||||
start.ino = cpu_to_le64(ino);
|
||||
@@ -892,7 +891,6 @@ retry:
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret)
|
||||
goto out;
|
||||
memset(&roots.fs_root, 0, sizeof(roots.fs_root));
|
||||
|
||||
end = final;
|
||||
|
||||
@@ -968,16 +966,10 @@ retry:
|
||||
*done = sre_cmp(&end, &final) == 0;
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret == -ESTALE) {
|
||||
if (memcmp(&prev_roots, &roots, sizeof(roots)) == 0) {
|
||||
scoutfs_inc_counter(sb, srch_search_stale_eio);
|
||||
ret = -EIO;
|
||||
} else {
|
||||
scoutfs_inc_counter(sb, srch_search_stale_retry);
|
||||
prev_roots = roots;
|
||||
goto retry;
|
||||
}
|
||||
}
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.srch_root.ref,
|
||||
&roots.logs_root.ref);
|
||||
if (ret == -ESTALE)
|
||||
goto retry;
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -1003,6 +995,14 @@ int scoutfs_srch_rotate_log(struct super_block *sb,
|
||||
le64_to_cpu(sfl->ref.blkno), 0);
|
||||
ret = scoutfs_btree_insert(sb, alloc, wri, root, &key,
|
||||
sfl, sizeof(*sfl));
|
||||
/*
|
||||
* While it's fine to replay moving the client's logging srch
|
||||
* file to the core btree item, server commits should keep it
|
||||
* from happening. So we'll warn if we see it happen. This can
|
||||
* be removed eventually.
|
||||
*/
|
||||
if (WARN_ON_ONCE(ret == -EEXIST))
|
||||
ret = 0;
|
||||
if (ret == 0) {
|
||||
memset(sfl, 0, sizeof(*sfl));
|
||||
scoutfs_inc_counter(sb, srch_rotate_log);
|
||||
|
||||
@@ -461,9 +461,8 @@ static int scoutfs_read_supers(struct super_block *sb)
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
sbi->fsid = le64_to_cpu(meta_super->hdr.fsid);
|
||||
sbi->fmt_vers = le64_to_cpu(meta_super->fmt_vers);
|
||||
sbi->super = *meta_super;
|
||||
out:
|
||||
kfree(meta_super);
|
||||
kfree(data_super);
|
||||
|
||||
@@ -35,11 +35,10 @@ struct scoutfs_sb_info {
|
||||
struct super_block *sb;
|
||||
|
||||
/* assigned once at the start of each mount, read-only */
|
||||
u64 fsid;
|
||||
u64 rid;
|
||||
u64 fmt_vers;
|
||||
|
||||
struct scoutfs_super_block super;
|
||||
|
||||
struct block_device *meta_bdev;
|
||||
|
||||
spinlock_t next_ino_lock;
|
||||
@@ -135,14 +134,14 @@ static inline bool scoutfs_unmounting(struct super_block *sb)
|
||||
(int)(le64_to_cpu(fsid) >> SCSB_SHIFT), \
|
||||
(int)(le64_to_cpu(rid) >> SCSB_SHIFT)
|
||||
#define SCSB_ARGS(sb) \
|
||||
(int)(le64_to_cpu(SCOUTFS_SB(sb)->super.hdr.fsid) >> SCSB_SHIFT), \
|
||||
(int)(SCOUTFS_SB(sb)->fsid >> SCSB_SHIFT), \
|
||||
(int)(SCOUTFS_SB(sb)->rid >> SCSB_SHIFT)
|
||||
#define SCSB_TRACE_FIELDS \
|
||||
__field(__u64, fsid) \
|
||||
__field(__u64, rid)
|
||||
#define SCSB_TRACE_ASSIGN(sb) \
|
||||
__entry->fsid = SCOUTFS_HAS_SBI(sb) ? \
|
||||
le64_to_cpu(SCOUTFS_SB(sb)->super.hdr.fsid) : 0;\
|
||||
SCOUTFS_SB(sb)->fsid : 0; \
|
||||
__entry->rid = SCOUTFS_HAS_SBI(sb) ? \
|
||||
SCOUTFS_SB(sb)->rid : 0;
|
||||
#define SCSB_TRACE_ARGS \
|
||||
|
||||
@@ -60,10 +60,9 @@ static ssize_t fsid_show(struct kobject *kobj, struct attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct super_block *sb = KOBJ_TO_SB(kobj, sb_id_kobj);
|
||||
struct scoutfs_super_block *super = &SCOUTFS_SB(sb)->super;
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
|
||||
return snprintf(buf, PAGE_SIZE, "%016llx\n",
|
||||
le64_to_cpu(super->hdr.fsid));
|
||||
return snprintf(buf, PAGE_SIZE, "%016llx\n", sbi->fsid);
|
||||
}
|
||||
ATTR_FUNCS_RO(fsid);
|
||||
|
||||
|
||||
1
tests/.gitignore
vendored
1
tests/.gitignore
vendored
@@ -8,3 +8,4 @@ src/bulk_create_paths
|
||||
src/find_xattrs
|
||||
src/stage_tmpfile
|
||||
src/create_xattr_loop
|
||||
src/o_tmpfile_umask
|
||||
|
||||
@@ -11,7 +11,8 @@ BIN := src/createmany \
|
||||
src/stage_tmpfile \
|
||||
src/find_xattrs \
|
||||
src/create_xattr_loop \
|
||||
src/fragmented_data_extents
|
||||
src/fragmented_data_extents \
|
||||
src/o_tmpfile_umask
|
||||
|
||||
DEPS := $(wildcard src/*.d)
|
||||
|
||||
|
||||
@@ -1,3 +1,11 @@
|
||||
== non-acl O_TMPFILE creation honors umask
|
||||
umask 022
|
||||
fstat after open(0777): 0100755
|
||||
stat after linkat: 0100755
|
||||
umask 077
|
||||
fstat after open(0777): 0100700
|
||||
stat after linkat: 0100700
|
||||
== stage from tmpfile
|
||||
total file size 33669120
|
||||
00000000 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 41 |AAAAAAAAAAAAAAAA|
|
||||
*
|
||||
@@ -27,7 +27,7 @@ createmany-large-names.sh
|
||||
createmany-rename-large-dir.sh
|
||||
stage-release-race-alloc.sh
|
||||
stage-multi-part.sh
|
||||
stage-tmpfile.sh
|
||||
o_tmpfile.sh
|
||||
basic-posix-consistency.sh
|
||||
dirent-consistency.sh
|
||||
mkdir-rename-rmdir.sh
|
||||
|
||||
97
tests/src/o_tmpfile_umask.c
Normal file
97
tests/src/o_tmpfile_umask.c
Normal file
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Show the modes of files as we create them with O_TMPFILE and link
|
||||
* them into the namespace.
|
||||
*
|
||||
* Copyright (C) 2022 Versity Software, Inc. All rights reserved.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public
|
||||
* License v2 as published by the Free Software Foundation.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* General Public License for more details.
|
||||
*/
|
||||
|
||||
#ifndef _GNU_SOURCE
|
||||
#define _GNU_SOURCE
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <fcntl.h>
|
||||
#include <errno.h>
|
||||
#include <sys/stat.h>
|
||||
#include <assert.h>
|
||||
#include <limits.h>
|
||||
|
||||
static void linkat_tmpfile_modes(char *dir, char *lpath, mode_t mode)
|
||||
{
|
||||
char proc_self[PATH_MAX];
|
||||
struct stat st;
|
||||
int ret;
|
||||
int fd;
|
||||
|
||||
umask(mode);
|
||||
printf("umask 0%o\n", mode);
|
||||
|
||||
fd = open(dir, O_RDWR | O_TMPFILE, 0777);
|
||||
if (fd < 0) {
|
||||
perror("open(O_TMPFILE)");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ret = fstat(fd, &st);
|
||||
if (ret < 0) {
|
||||
perror("fstat");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("fstat after open(0777): 0%o\n", st.st_mode);
|
||||
|
||||
snprintf(proc_self, sizeof(proc_self), "/proc/self/fd/%d", fd);
|
||||
|
||||
ret = linkat(AT_FDCWD, proc_self, AT_FDCWD, lpath, AT_SYMLINK_FOLLOW);
|
||||
if (ret < 0) {
|
||||
perror("linkat");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
close(fd);
|
||||
|
||||
ret = stat(lpath, &st);
|
||||
if (ret < 0) {
|
||||
perror("fstat");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
printf("stat after linkat: 0%o\n", st.st_mode);
|
||||
|
||||
ret = unlink(lpath);
|
||||
if (ret < 0) {
|
||||
perror("unlink");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
char *lpath;
|
||||
char *dir;
|
||||
|
||||
if (argc < 3) {
|
||||
printf("%s <open_dir> <linkat_path>\n", argv[0]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
dir = argv[1];
|
||||
lpath = argv[2];
|
||||
|
||||
linkat_tmpfile_modes(dir, lpath, 022);
|
||||
linkat_tmpfile_modes(dir, lpath, 077);
|
||||
|
||||
return 0;
|
||||
}
|
||||
16
tests/tests/o_tmpfile.sh
Normal file
16
tests/tests/o_tmpfile.sh
Normal file
@@ -0,0 +1,16 @@
|
||||
#
|
||||
# basic tests of O_TMPFILE
|
||||
#
|
||||
|
||||
t_require_commands stage_tmpfile hexdump
|
||||
|
||||
echo "== non-acl O_TMPFILE creation honors umask"
|
||||
o_tmpfile_umask "$T_D0" "$T_D0/umask-file"
|
||||
|
||||
echo "== stage from tmpfile"
|
||||
DEST_FILE="$T_D0/dest_file"
|
||||
stage_tmpfile $T_D0 $DEST_FILE
|
||||
hexdump -C "$DEST_FILE"
|
||||
rm -f "$DEST_FILE"
|
||||
|
||||
t_pass
|
||||
@@ -1,15 +0,0 @@
|
||||
#
|
||||
# Run tmpfile_stage and check the output with hexdump.
|
||||
#
|
||||
|
||||
t_require_commands stage_tmpfile hexdump
|
||||
|
||||
DEST_FILE="$T_D0/dest_file"
|
||||
|
||||
stage_tmpfile $T_D0 $DEST_FILE
|
||||
|
||||
hexdump -C "$DEST_FILE"
|
||||
|
||||
rm -fr "$DEST_FILE"
|
||||
|
||||
t_pass
|
||||
@@ -623,11 +623,9 @@ space of the volume making the output much more useful for inspection.
|
||||
.TP
|
||||
.B "META-DEVICE"
|
||||
The path to the metadata device for the filesystem whose metadata will be
|
||||
printed. Since this command reads via the host's buffer cache, it may not
|
||||
reflect the current blocks in the filesystem possibly written to the shared
|
||||
block devices from another host, unless
|
||||
.B blockdev \--flushbufs
|
||||
command is used first.
|
||||
printed. An attempt will be made to flush the host's buffer cache for
|
||||
this device with the BLKFLSBUF ioctl, or with posix_fadvise() if
|
||||
the path refers to a regular file.
|
||||
.RE
|
||||
.PD
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/ioctl.h>
|
||||
#include <linux/fs.h>
|
||||
#include <errno.h>
|
||||
@@ -103,3 +104,44 @@ char *size_str(u64 nr, unsigned size)
|
||||
|
||||
return suffixes[i];
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to flush the local read cache for a device. This is only a best
|
||||
* effort as these interfaces don't block waiting to fully purge the
|
||||
* cache. This is OK because it's used by cached readers that are known
|
||||
* to be racy anyway.
|
||||
*/
|
||||
int flush_device(int fd)
|
||||
{
|
||||
struct stat st;
|
||||
int ret;
|
||||
|
||||
ret = fstat(fd, &st);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "fstat failed: %s (%d)\n", strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (S_ISREG(st.st_mode)) {
|
||||
ret = posix_fadvise(fd, 0, st.st_size, POSIX_FADV_DONTNEED);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "POSIX_FADV_DONTNEED failed: %s (%d)\n",
|
||||
strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
} else if (S_ISBLK(st.st_mode)) {
|
||||
ret = ioctl(fd, BLKFLSBUF, 0);
|
||||
if (ret < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "BLKFLSBUF, failed: %s (%d)\n", strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -14,5 +14,6 @@ int device_size(char *path, int fd,
|
||||
char *use_type, u64 *size_ret);
|
||||
float size_flt(u64 nr, unsigned size);
|
||||
char *size_str(u64 nr, unsigned size);
|
||||
int flush_device(int fd);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -118,6 +118,33 @@ struct mkfs_args {
|
||||
struct scoutfs_quorum_slot slots[SCOUTFS_QUORUM_MAX_SLOTS];
|
||||
};
|
||||
|
||||
static int open_mkfs_dev(struct mkfs_args *args, char *path, mode_t mode, char *which)
|
||||
{
|
||||
int ret;
|
||||
int fd = -1;
|
||||
|
||||
fd = open(path, mode);
|
||||
if (fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open %s dev '%s': %s (%d)\n",
|
||||
which, path, strerror(errno), errno);
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = flush_device(fd);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
if (!args->force)
|
||||
ret = check_bdev(fd, path, which);
|
||||
|
||||
out:
|
||||
if (ret < 0 && fd >= 0)
|
||||
close(fd);
|
||||
|
||||
return ret ?: fd;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make a new file system by writing:
|
||||
* - super blocks
|
||||
@@ -156,32 +183,17 @@ static int do_mkfs(struct mkfs_args *args)
|
||||
gettimeofday(&tv, NULL);
|
||||
pseudo_random_bytes(&fsid, sizeof(fsid));
|
||||
|
||||
meta_fd = open(args->meta_device, O_RDWR | O_EXCL);
|
||||
meta_fd = open_mkfs_dev(args, args->meta_device, O_RDWR | O_EXCL, "meta");
|
||||
if (meta_fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open '%s': %s (%d)\n",
|
||||
args->meta_device, strerror(errno), errno);
|
||||
ret = meta_fd;
|
||||
goto out;
|
||||
}
|
||||
if (!args->force) {
|
||||
ret = check_bdev(meta_fd, args->meta_device, "meta");
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
data_fd = open(args->data_device, O_RDWR | O_EXCL);
|
||||
data_fd = open_mkfs_dev(args, args->data_device, O_RDWR | O_EXCL, "data");
|
||||
if (data_fd < 0) {
|
||||
ret = -errno;
|
||||
fprintf(stderr, "failed to open '%s': %s (%d)\n",
|
||||
args->data_device, strerror(errno), errno);
|
||||
ret = data_fd;
|
||||
goto out;
|
||||
}
|
||||
if (!args->force) {
|
||||
ret = check_bdev(data_fd, args->data_device, "data");
|
||||
if (ret)
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
super = calloc(1, SCOUTFS_BLOCK_SM_SIZE);
|
||||
bt = calloc(1, SCOUTFS_BLOCK_LG_SIZE);
|
||||
|
||||
@@ -27,6 +27,7 @@
|
||||
#include "avl.h"
|
||||
#include "srch.h"
|
||||
#include "leaf_item_hash.h"
|
||||
#include "dev.h"
|
||||
|
||||
static void print_block_header(struct scoutfs_block_header *hdr, int size)
|
||||
{
|
||||
@@ -1107,7 +1108,12 @@ static int do_print(struct print_args *args)
|
||||
return ret;
|
||||
}
|
||||
|
||||
ret = flush_device(fd);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = print_volume(fd, args);
|
||||
out:
|
||||
close(fd);
|
||||
return ret;
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user