mirror of
https://github.com/versity/scoutfs.git
synced 2026-04-30 01:46:54 +00:00
Compare commits
16 Commits
zab/block_
...
zab/coding
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8ee41caa24 | ||
|
|
09c879bcf1 | ||
|
|
3de703757f | ||
|
|
7d67489b0c | ||
|
|
73084462e9 | ||
|
|
8c81af2b9b | ||
|
|
efe5d92458 | ||
|
|
d39e56d953 | ||
|
|
5661a1fb02 | ||
|
|
12fa289399 | ||
|
|
75e8fab57c | ||
|
|
513d6b2734 | ||
|
|
f8d39610a2 | ||
|
|
c470c1c9f6 | ||
|
|
cad902b9cd | ||
|
|
e163f3b099 |
82
CodingStyle.txt
Normal file
82
CodingStyle.txt
Normal file
@@ -0,0 +1,82 @@
|
||||
|
||||
We try to maintain a consistent coding style across the project. It's
|
||||
admitedly arbitrary and starts with and is based on upstream's
|
||||
Documentation/CodingStyle. Conventions are added here as they come up
|
||||
during review. We'll demonstrate each sylistic preference with a diff
|
||||
snippet.
|
||||
|
||||
== Try to make one exit point for reasonably long functions
|
||||
|
||||
{
|
||||
- void *a;
|
||||
- void *b;
|
||||
+ void *a = NULL;
|
||||
+ void *b = NULL;
|
||||
+ int ret;
|
||||
|
||||
a = kalloc();
|
||||
- if (!a)
|
||||
- return 1;
|
||||
+ if (!a) {
|
||||
+ ret = 1;
|
||||
+ goto out;
|
||||
+ }
|
||||
|
||||
b = kalloc();
|
||||
if (!b) {
|
||||
- kfree(a);
|
||||
- return 2;
|
||||
+ ret = 2;
|
||||
+ goto out;
|
||||
}
|
||||
|
||||
- return 3
|
||||
+ ret = 3;
|
||||
+out:
|
||||
+ kfree(a);
|
||||
+ kfree(b);
|
||||
+ return ret;
|
||||
}
|
||||
|
||||
The idea is to initialize all state at the top of the function,
|
||||
modifying it throughout, and clean it all up at the end. Having one
|
||||
exit point also gives us a place to add tracing of function exit.
|
||||
|
||||
== Multiple declarations on a line
|
||||
|
||||
- int i, j;
|
||||
+ int i;
|
||||
+ int j;
|
||||
|
||||
Declare function variables one per line. The verbose declarations
|
||||
create pressure to think about excessive stack use or over-long
|
||||
functions, makes initializers clear, and leaves room for comments.
|
||||
|
||||
== Balance braces
|
||||
|
||||
- if (IS_ERR(super_block))
|
||||
+ if (IS_ERR(super_block)) {
|
||||
return PTR_ERR(super_block);
|
||||
- else {
|
||||
+ } else {
|
||||
*super_res = *super_block;
|
||||
kfree(super_block);
|
||||
return 0;
|
||||
}
|
||||
|
||||
*nervous twitch*
|
||||
|
||||
== Cute variable defintion waterfalls
|
||||
|
||||
+ struct block_device *meta_bdev;
|
||||
struct scoutfs_sb_info *sbi;
|
||||
struct mount_options opts;
|
||||
- struct block_device *meta_bdev;
|
||||
struct inode *inode;
|
||||
|
||||
This isn't strictly necessary, but it's nice to try and make a pretty
|
||||
descending length of variable distributions. It often has the
|
||||
accidental effect of sorting definitions by decreasing complexity. I
|
||||
tend to group types when the name lengths are pretty close, even if
|
||||
they're not strictly sorted, so that all the ints, u64s, keys, etc, are
|
||||
all together.
|
||||
@@ -252,7 +252,7 @@ void scoutfs_alloc_init(struct scoutfs_alloc *alloc,
|
||||
{
|
||||
memset(alloc, 0, sizeof(struct scoutfs_alloc));
|
||||
|
||||
spin_lock_init(&alloc->lock);
|
||||
seqlock_init(&alloc->seqlock);
|
||||
mutex_init(&alloc->mutex);
|
||||
alloc->avail = *avail;
|
||||
alloc->freed = *freed;
|
||||
@@ -526,7 +526,8 @@ int scoutfs_alloc_meta(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
spin_lock(&alloc->lock);
|
||||
write_seqlock(&alloc->seqlock);
|
||||
|
||||
lblk = alloc->dirty_avail_bl->data;
|
||||
if (WARN_ON_ONCE(lblk->nr == 0)) {
|
||||
/* shouldn't happen, transaction should commit first */
|
||||
@@ -536,7 +537,8 @@ int scoutfs_alloc_meta(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
list_block_remove(&alloc->avail, lblk, 1);
|
||||
ret = 0;
|
||||
}
|
||||
spin_unlock(&alloc->lock);
|
||||
|
||||
write_sequnlock(&alloc->seqlock);
|
||||
|
||||
out:
|
||||
if (ret < 0)
|
||||
@@ -559,7 +561,8 @@ int scoutfs_free_meta(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
spin_lock(&alloc->lock);
|
||||
write_seqlock(&alloc->seqlock);
|
||||
|
||||
lblk = alloc->dirty_freed_bl->data;
|
||||
if (WARN_ON_ONCE(list_block_space(lblk->nr) == 0)) {
|
||||
/* shouldn't happen, transaction should commit first */
|
||||
@@ -568,7 +571,8 @@ int scoutfs_free_meta(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
list_block_add(&alloc->freed, lblk, blkno);
|
||||
ret = 0;
|
||||
}
|
||||
spin_unlock(&alloc->lock);
|
||||
|
||||
write_sequnlock(&alloc->seqlock);
|
||||
|
||||
out:
|
||||
scoutfs_inc_counter(sb, alloc_free_meta);
|
||||
@@ -1066,17 +1070,23 @@ out:
|
||||
|
||||
/*
|
||||
* Returns true if meta avail and free don't have room for the given
|
||||
* number of alloctions or frees.
|
||||
* number of allocations or frees. This is called at a significantly
|
||||
* higher frequency than allocations as writers try to enter
|
||||
* transactions. This is the only reader of the seqlock which gives
|
||||
* read-mostly sampling instead of bouncing a spinlock around all the
|
||||
* cores.
|
||||
*/
|
||||
bool scoutfs_alloc_meta_low(struct super_block *sb,
|
||||
struct scoutfs_alloc *alloc, u32 nr)
|
||||
{
|
||||
unsigned int seq;
|
||||
bool lo;
|
||||
|
||||
spin_lock(&alloc->lock);
|
||||
lo = le32_to_cpu(alloc->avail.first_nr) < nr ||
|
||||
list_block_space(alloc->freed.first_nr) < nr;
|
||||
spin_unlock(&alloc->lock);
|
||||
do {
|
||||
seq = read_seqbegin(&alloc->seqlock);
|
||||
lo = le32_to_cpu(alloc->avail.first_nr) < nr ||
|
||||
list_block_space(alloc->freed.first_nr) < nr;
|
||||
} while (read_seqretry(&alloc->seqlock, seq));
|
||||
|
||||
return lo;
|
||||
}
|
||||
|
||||
@@ -72,7 +72,8 @@
|
||||
* transaction.
|
||||
*/
|
||||
struct scoutfs_alloc {
|
||||
spinlock_t lock;
|
||||
/* writers rarely modify list_head avail/freed. readers often check for _meta_alloc_low */
|
||||
seqlock_t seqlock;
|
||||
struct mutex mutex;
|
||||
struct scoutfs_block *dirty_avail_bl;
|
||||
struct scoutfs_block *dirty_freed_bl;
|
||||
|
||||
@@ -49,7 +49,6 @@ struct client_info {
|
||||
struct delayed_work connect_dwork;
|
||||
|
||||
u64 server_term;
|
||||
u64 greeting_umb;
|
||||
|
||||
bool sending_farewell;
|
||||
int farewell_error;
|
||||
|
||||
@@ -86,11 +86,33 @@ struct scoutfs_timespec {
|
||||
__u8 __pad[4];
|
||||
};
|
||||
|
||||
/* XXX ipv6 */
|
||||
struct scoutfs_inet_addr {
|
||||
__le32 addr;
|
||||
enum scoutfs_inet_family {
|
||||
SCOUTFS_AF_NONE = 0,
|
||||
SCOUTFS_AF_IPV4 = 1,
|
||||
SCOUTFS_AF_IPV6 = 2,
|
||||
};
|
||||
|
||||
struct scoutfs_inet_addr4 {
|
||||
__le16 family;
|
||||
__le16 port;
|
||||
__u8 __pad[2];
|
||||
__le32 addr;
|
||||
};
|
||||
|
||||
/*
|
||||
* Not yet supported by code.
|
||||
*/
|
||||
struct scoutfs_inet_addr6 {
|
||||
__le16 family;
|
||||
__le16 port;
|
||||
__u8 addr[16];
|
||||
__le32 flow_info;
|
||||
__le32 scope_id;
|
||||
__u8 __pad[4];
|
||||
};
|
||||
|
||||
union scoutfs_inet_addr {
|
||||
struct scoutfs_inet_addr4 v4;
|
||||
struct scoutfs_inet_addr6 v6;
|
||||
};
|
||||
|
||||
/*
|
||||
@@ -591,7 +613,7 @@ struct scoutfs_quorum_message {
|
||||
struct scoutfs_quorum_config {
|
||||
__le64 version;
|
||||
struct scoutfs_quorum_slot {
|
||||
struct scoutfs_inet_addr addr;
|
||||
union scoutfs_inet_addr addr;
|
||||
} slots[SCOUTFS_QUORUM_MAX_SLOTS];
|
||||
};
|
||||
|
||||
|
||||
@@ -1621,19 +1621,28 @@ int scoutfs_orphan_inode(struct inode *inode)
|
||||
}
|
||||
|
||||
/*
|
||||
* Track an inode that could have dirty pages. Used to kick off writeback
|
||||
* on all dirty pages during transaction commit without tying ourselves in
|
||||
* knots trying to call through the high level vfs sync methods.
|
||||
* Track an inode that could have dirty pages. Used to kick off
|
||||
* writeback on all dirty pages during transaction commit without tying
|
||||
* ourselves in knots trying to call through the high level vfs sync
|
||||
* methods.
|
||||
*
|
||||
* This is called by writers who hold the inode and transaction. The
|
||||
* inode's presence in the rbtree is removed by destroy_inode, prevented
|
||||
* by the inode hold, and by committing the transaction, which is
|
||||
* prevented by holding the transaction. The inode can only go from
|
||||
* empty to on the rbtree while we're here.
|
||||
*/
|
||||
void scoutfs_inode_queue_writeback(struct inode *inode)
|
||||
{
|
||||
DECLARE_INODE_SB_INFO(inode->i_sb, inf);
|
||||
struct scoutfs_inode_info *si = SCOUTFS_I(inode);
|
||||
|
||||
spin_lock(&inf->writeback_lock);
|
||||
if (RB_EMPTY_NODE(&si->writeback_node))
|
||||
insert_writeback_inode(inf, si);
|
||||
spin_unlock(&inf->writeback_lock);
|
||||
if (RB_EMPTY_NODE(&si->writeback_node)) {
|
||||
spin_lock(&inf->writeback_lock);
|
||||
if (RB_EMPTY_NODE(&si->writeback_node))
|
||||
insert_writeback_inode(inf, si);
|
||||
spin_unlock(&inf->writeback_lock);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
||||
@@ -586,7 +586,9 @@ static void init_lock_clients_key(struct scoutfs_key *key, u64 rid)
|
||||
* the client had already talked to the server then we must find an
|
||||
* existing record for it and should begin recovery. If it doesn't have
|
||||
* a record then its timed out and we can't allow it to reconnect. If
|
||||
* its connecting for the first time then we insert a new record. If
|
||||
* we're creating a new record for a client we can see EEXIST if the
|
||||
* greeting is resent to a new server after the record was committed but
|
||||
* before the response was received by the client.
|
||||
*
|
||||
* This is running in concurrent client greeting processing contexts.
|
||||
*/
|
||||
@@ -611,6 +613,8 @@ int scoutfs_lock_server_greeting(struct super_block *sb, u64 rid,
|
||||
ret = scoutfs_btree_insert(sb, inf->alloc, inf->wri,
|
||||
&super->lock_clients,
|
||||
&key, NULL, 0);
|
||||
if (ret == -EEXIST)
|
||||
ret = 0;
|
||||
}
|
||||
mutex_unlock(&inf->mutex);
|
||||
|
||||
|
||||
@@ -1546,9 +1546,8 @@ void scoutfs_net_client_greeting(struct super_block *sb,
|
||||
* response and they can disconnect cleanly.
|
||||
*
|
||||
* At this point our connection is idle except for send submissions and
|
||||
* shutdown being queued. Once we shut down a We completely own a We
|
||||
* have exclusive access to a previous conn once its shutdown and we set
|
||||
* _freeing.
|
||||
* shutdown being queued. We have exclusive access to the previous conn
|
||||
* once it's shutdown and we set _freeing.
|
||||
*/
|
||||
void scoutfs_net_server_greeting(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
|
||||
@@ -90,19 +90,13 @@ enum conn_flags {
|
||||
#define SIN_ARG(sin) sin, be16_to_cpu((sin)->sin_port)
|
||||
|
||||
static inline void scoutfs_addr_to_sin(struct sockaddr_in *sin,
|
||||
struct scoutfs_inet_addr *addr)
|
||||
union scoutfs_inet_addr *addr)
|
||||
{
|
||||
sin->sin_family = AF_INET;
|
||||
sin->sin_addr.s_addr = cpu_to_be32(le32_to_cpu(addr->addr));
|
||||
sin->sin_port = cpu_to_be16(le16_to_cpu(addr->port));
|
||||
}
|
||||
BUG_ON(addr->v4.family != cpu_to_le16(SCOUTFS_AF_IPV4));
|
||||
|
||||
static inline void scoutfs_addr_from_sin(struct scoutfs_inet_addr *addr,
|
||||
struct sockaddr_in *sin)
|
||||
{
|
||||
addr->addr = be32_to_le32(sin->sin_addr.s_addr);
|
||||
addr->port = be16_to_le16(sin->sin_port);
|
||||
memset(addr->__pad, 0, sizeof(addr->__pad));
|
||||
sin->sin_family = AF_INET;
|
||||
sin->sin_addr.s_addr = cpu_to_be32(le32_to_cpu(addr->v4.addr));
|
||||
sin->sin_port = cpu_to_be16(le16_to_cpu(addr->v4.port));
|
||||
}
|
||||
|
||||
struct scoutfs_net_connection *
|
||||
|
||||
@@ -138,7 +138,7 @@ static bool quorum_slot_present(struct scoutfs_super_block *super, int i)
|
||||
{
|
||||
BUG_ON(i < 0 || i > SCOUTFS_QUORUM_MAX_SLOTS);
|
||||
|
||||
return super->qconf.slots[i].addr.addr != 0;
|
||||
return super->qconf.slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_IPV4);
|
||||
}
|
||||
|
||||
static ktime_t election_timeout(void)
|
||||
@@ -976,6 +976,9 @@ static int verify_quorum_slots(struct super_block *sb)
|
||||
}
|
||||
|
||||
for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) {
|
||||
if (!quorum_slot_present(super, j))
|
||||
continue;
|
||||
|
||||
scoutfs_quorum_slot_sin(super, j, &other);
|
||||
|
||||
if (sin.sin_addr.s_addr == other.sin_addr.s_addr &&
|
||||
|
||||
@@ -423,61 +423,34 @@ TRACE_EVENT(scoutfs_trans_write_func,
|
||||
TP_printk(SCSBF" dirty %lu", SCSB_TRACE_ARGS, __entry->dirty)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_release_trans,
|
||||
TP_PROTO(struct super_block *sb, void *rsv, unsigned int rsv_holders,
|
||||
unsigned int tri_holders,
|
||||
unsigned int tri_writing),
|
||||
DECLARE_EVENT_CLASS(scoutfs_trans_hold_release_class,
|
||||
TP_PROTO(struct super_block *sb, void *journal_info, int holders),
|
||||
|
||||
TP_ARGS(sb, rsv, rsv_holders, tri_holders, tri_writing),
|
||||
TP_ARGS(sb, journal_info, holders),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, rsv)
|
||||
__field(unsigned int, rsv_holders)
|
||||
__field(unsigned int, tri_holders)
|
||||
__field(unsigned int, tri_writing)
|
||||
__field(unsigned long, journal_info)
|
||||
__field(int, holders)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->rsv = rsv;
|
||||
__entry->rsv_holders = rsv_holders;
|
||||
__entry->tri_holders = tri_holders;
|
||||
__entry->tri_writing = tri_writing;
|
||||
__entry->journal_info = (unsigned long)journal_info;
|
||||
__entry->holders = holders;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" rsv %p holders %u trans holders %u writing %u",
|
||||
SCSB_TRACE_ARGS, __entry->rsv, __entry->rsv_holders,
|
||||
__entry->tri_holders, __entry->tri_writing)
|
||||
TP_printk(SCSBF" journal_info 0x%0lx holders %d",
|
||||
SCSB_TRACE_ARGS, __entry->journal_info, __entry->holders)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_trans_acquired_hold,
|
||||
TP_PROTO(struct super_block *sb,
|
||||
void *rsv, unsigned int rsv_holders,
|
||||
unsigned int tri_holders,
|
||||
unsigned int tri_writing),
|
||||
|
||||
TP_ARGS(sb, rsv, rsv_holders, tri_holders, tri_writing),
|
||||
|
||||
TP_STRUCT__entry(
|
||||
SCSB_TRACE_FIELDS
|
||||
__field(void *, rsv)
|
||||
__field(unsigned int, rsv_holders)
|
||||
__field(unsigned int, tri_holders)
|
||||
__field(unsigned int, tri_writing)
|
||||
),
|
||||
|
||||
TP_fast_assign(
|
||||
SCSB_TRACE_ASSIGN(sb);
|
||||
__entry->rsv = rsv;
|
||||
__entry->rsv_holders = rsv_holders;
|
||||
__entry->tri_holders = tri_holders;
|
||||
__entry->tri_writing = tri_writing;
|
||||
),
|
||||
|
||||
TP_printk(SCSBF" rsv %p holders %u trans holders %u writing %u",
|
||||
SCSB_TRACE_ARGS, __entry->rsv, __entry->rsv_holders,
|
||||
__entry->tri_holders, __entry->tri_writing)
|
||||
DEFINE_EVENT(scoutfs_trans_hold_release_class, scoutfs_trans_acquired_hold,
|
||||
TP_PROTO(struct super_block *sb, void *journal_info, int holders),
|
||||
TP_ARGS(sb, journal_info, holders)
|
||||
);
|
||||
DEFINE_EVENT(scoutfs_trans_hold_release_class, scoutfs_release_trans,
|
||||
TP_PROTO(struct super_block *sb, void *journal_info, int holders),
|
||||
TP_ARGS(sb, journal_info, holders)
|
||||
);
|
||||
|
||||
TRACE_EVENT(scoutfs_ioc_release,
|
||||
|
||||
@@ -1024,6 +1024,12 @@ static void init_mounted_client_key(struct scoutfs_key *key, u64 rid)
|
||||
};
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert a new mounted client item for a client that is sending us a
|
||||
* greeting that hasn't yet seen a response. The greeting can be
|
||||
* retransmitted to a new server after the previous inserted the item so
|
||||
* it's acceptable to see -EEXIST.
|
||||
*/
|
||||
static int insert_mounted_client(struct super_block *sb, u64 rid,
|
||||
u64 gr_flags)
|
||||
{
|
||||
@@ -1042,6 +1048,8 @@ static int insert_mounted_client(struct super_block *sb, u64 rid,
|
||||
ret = scoutfs_btree_insert(sb, &server->alloc, &server->wri,
|
||||
&super->mounted_clients, &key, &mcv,
|
||||
sizeof(mcv));
|
||||
if (ret == -EEXIST)
|
||||
ret = 0;
|
||||
mutex_unlock(&server->mounted_clients_mutex);
|
||||
|
||||
return ret;
|
||||
|
||||
299
kmod/src/trans.c
299
kmod/src/trans.c
@@ -39,17 +39,15 @@
|
||||
* track the relationships between dirty blocks so there's only ever one
|
||||
* transaction being built.
|
||||
*
|
||||
* The copy of the on-disk super block in the fs sb info has its header
|
||||
* sequence advanced so that new dirty blocks inherit this dirty
|
||||
* sequence number. It's only advanced once all those dirty blocks are
|
||||
* reachable after having first written them all out and then the new
|
||||
* super with that seq. It's first incremented at mount.
|
||||
* Committing the current dirty transaction can be triggered by sync, a
|
||||
* regular background commit interval, reaching a dirty block threshold,
|
||||
* or the transaction running out of its private allocator resources.
|
||||
* Once all the current holders release the writing func writes out the
|
||||
* dirty blocks while excluding holders until it finishes.
|
||||
*
|
||||
* Unfortunately writers can nest. We don't bother trying to special
|
||||
* case holding a transaction that you're already holding because that
|
||||
* requires per-task storage. We just let anyone hold transactions
|
||||
* regardless of waiters waiting to write, which risks waiters waiting a
|
||||
* very long time.
|
||||
* Unfortunately writing holders can nest. We track nested hold callers
|
||||
* with the per-task journal_info pointer to avoid deadlocks between
|
||||
* holders that might otherwise wait for a pending commit.
|
||||
*/
|
||||
|
||||
/* sync dirty data at least this often */
|
||||
@@ -59,9 +57,7 @@
|
||||
* XXX move the rest of the super trans_ fields here.
|
||||
*/
|
||||
struct trans_info {
|
||||
spinlock_t lock;
|
||||
unsigned holders;
|
||||
bool writing;
|
||||
atomic_t holders;
|
||||
|
||||
struct scoutfs_log_trees lt;
|
||||
struct scoutfs_alloc alloc;
|
||||
@@ -71,17 +67,9 @@ struct trans_info {
|
||||
#define DECLARE_TRANS_INFO(sb, name) \
|
||||
struct trans_info *name = SCOUTFS_SB(sb)->trans_info
|
||||
|
||||
static bool drained_holders(struct trans_info *tri)
|
||||
{
|
||||
bool drained;
|
||||
|
||||
spin_lock(&tri->lock);
|
||||
tri->writing = true;
|
||||
drained = tri->holders == 0;
|
||||
spin_unlock(&tri->lock);
|
||||
|
||||
return drained;
|
||||
}
|
||||
/* avoid the high sign bit out of an abundance of caution*/
|
||||
#define TRANS_HOLDERS_WRITE_FUNC_BIT (1 << 30)
|
||||
#define TRANS_HOLDERS_COUNT_MASK (TRANS_HOLDERS_WRITE_FUNC_BIT - 1)
|
||||
|
||||
static int commit_btrees(struct super_block *sb)
|
||||
{
|
||||
@@ -126,6 +114,36 @@ bool scoutfs_trans_has_dirty(struct super_block *sb)
|
||||
return scoutfs_block_writer_has_dirty(sb, &tri->wri);
|
||||
}
|
||||
|
||||
/*
|
||||
* This is racing with wait_event conditions, make sure our atomic
|
||||
* stores and waitqueue loads are ordered.
|
||||
*/
|
||||
static void sub_holders_and_wake(struct super_block *sb, int val)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
DECLARE_TRANS_INFO(sb, tri);
|
||||
|
||||
atomic_sub(val, &tri->holders);
|
||||
smp_mb(); /* make sure sub is visible before we wake */
|
||||
if (waitqueue_active(&sbi->trans_hold_wq))
|
||||
wake_up(&sbi->trans_hold_wq);
|
||||
}
|
||||
|
||||
/*
|
||||
* called as a wait_event condition, needs to be careful to not change
|
||||
* task state and is racing with waking paths that sub_return, test, and
|
||||
* wake.
|
||||
*/
|
||||
static bool drained_holders(struct trans_info *tri)
|
||||
{
|
||||
int holders;
|
||||
|
||||
smp_mb(); /* make sure task in wait_event queue before atomic read */
|
||||
holders = atomic_read(&tri->holders) & TRANS_HOLDERS_COUNT_MASK;
|
||||
|
||||
return holders == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* This work func is responsible for writing out all the dirty blocks
|
||||
* that make up the current dirty transaction. It prevents writers from
|
||||
@@ -162,6 +180,9 @@ void scoutfs_trans_write_func(struct work_struct *work)
|
||||
|
||||
sbi->trans_task = current;
|
||||
|
||||
/* mark that we're writing so holders wait for us to finish and clear our bit */
|
||||
atomic_add(TRANS_HOLDERS_WRITE_FUNC_BIT, &tri->holders);
|
||||
|
||||
wait_event(sbi->trans_hold_wq, drained_holders(tri));
|
||||
|
||||
trace_scoutfs_trans_write_func(sb,
|
||||
@@ -213,11 +234,8 @@ out:
|
||||
spin_unlock(&sbi->trans_write_lock);
|
||||
wake_up(&sbi->trans_write_wq);
|
||||
|
||||
spin_lock(&tri->lock);
|
||||
tri->writing = false;
|
||||
spin_unlock(&tri->lock);
|
||||
|
||||
wake_up(&sbi->trans_hold_wq);
|
||||
/* we're done, wake waiting holders */
|
||||
sub_holders_and_wake(sb, TRANS_HOLDERS_WRITE_FUNC_BIT);
|
||||
|
||||
sbi->trans_task = NULL;
|
||||
|
||||
@@ -309,53 +327,83 @@ void scoutfs_trans_restart_sync_deadline(struct super_block *sb)
|
||||
}
|
||||
|
||||
/*
|
||||
* Each thread reserves space in the segment for their dirty items while
|
||||
* they hold the transaction. This is calculated before the first
|
||||
* transaction hold is acquired. It includes all the potential nested
|
||||
* item manipulation that could happen with the transaction held.
|
||||
* Including nested holds avoids having to deal with writing out partial
|
||||
* transactions while a caller still holds the transaction.
|
||||
* We store nested holders in the lower bits of journal_info. We use
|
||||
* some higher bits as a magic value to detect if something goes
|
||||
* horribly wrong and it gets clobbered.
|
||||
*/
|
||||
#define TRANS_JI_MAGIC 0xd5700000
|
||||
#define TRANS_JI_MAGIC_MASK 0xfff00000
|
||||
#define TRANS_JI_COUNT_MASK 0x000fffff
|
||||
|
||||
#define SCOUTFS_RESERVATION_MAGIC 0xd57cd13b
|
||||
struct scoutfs_reservation {
|
||||
unsigned magic;
|
||||
unsigned holders;
|
||||
};
|
||||
/* returns true if a caller already had a holder counted in journal_info */
|
||||
static bool inc_journal_info_holders(void)
|
||||
{
|
||||
unsigned long holders = (unsigned long)current->journal_info;
|
||||
|
||||
WARN_ON_ONCE(holders != 0 && ((holders & TRANS_JI_MAGIC_MASK) != TRANS_JI_MAGIC));
|
||||
|
||||
if (holders == 0)
|
||||
holders = TRANS_JI_MAGIC;
|
||||
holders++;
|
||||
|
||||
current->journal_info = (void *)holders;
|
||||
return (holders > (TRANS_JI_MAGIC | 1));
|
||||
}
|
||||
|
||||
static void dec_journal_info_holders(void)
|
||||
{
|
||||
unsigned long holders = (unsigned long)current->journal_info;
|
||||
|
||||
WARN_ON_ONCE(holders != 0 && ((holders & TRANS_JI_MAGIC_MASK) != TRANS_JI_MAGIC));
|
||||
WARN_ON_ONCE((holders & TRANS_JI_COUNT_MASK) == 0);
|
||||
|
||||
holders--;
|
||||
if (holders == TRANS_JI_MAGIC)
|
||||
holders = 0;
|
||||
|
||||
current->journal_info = (void *)holders;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to hold the transaction. If a caller already holds the trans then
|
||||
* we piggy back on their hold. We wait if the writer is trying to
|
||||
* write out the transation. And if our items won't fit then we kick off
|
||||
* a write.
|
||||
* This is called as the wait_event condition for holding a transaction.
|
||||
* Increment the holder count unless the writer is present. We return
|
||||
* false to wait until the writer finishes and wakes us.
|
||||
*
|
||||
* This is called as a condition for wait_event. It is very limited in
|
||||
* the locking (blocking) it can do because the caller has set the task
|
||||
* state before testing the condition safely race with waking after
|
||||
* setting the condition. Our checking the amount of dirty metadata
|
||||
* blocks and free data blocks is racy, but we don't mind the risk of
|
||||
* delaying or prematurely forcing commits.
|
||||
* This can be racing with itself while there's no waiters. We retry
|
||||
* the cmpxchg instead of returning and waiting.
|
||||
*/
|
||||
static bool acquired_hold(struct super_block *sb,
|
||||
struct scoutfs_reservation *rsv)
|
||||
static bool inc_holders_unless_writer(struct trans_info *tri)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
DECLARE_TRANS_INFO(sb, tri);
|
||||
bool acquired = false;
|
||||
int holders;
|
||||
|
||||
spin_lock(&tri->lock);
|
||||
do {
|
||||
smp_mb(); /* make sure we read after wait puts task in queue */
|
||||
holders = atomic_read(&tri->holders);
|
||||
if (holders & TRANS_HOLDERS_WRITE_FUNC_BIT)
|
||||
return false;
|
||||
|
||||
trace_scoutfs_trans_acquired_hold(sb, rsv, rsv->holders,
|
||||
tri->holders, tri->writing);
|
||||
} while (atomic_cmpxchg(&tri->holders, holders, holders + 1) != holders);
|
||||
|
||||
/* use a caller's existing reservation */
|
||||
if (rsv->holders)
|
||||
goto hold;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* wait until the writing thread is finished */
|
||||
if (tri->writing)
|
||||
goto out;
|
||||
/*
|
||||
* As we drop the last trans holder we try to wake a writing thread that
|
||||
* was waiting for us to finish.
|
||||
*/
|
||||
static void release_holders(struct super_block *sb)
|
||||
{
|
||||
dec_journal_info_holders();
|
||||
sub_holders_and_wake(sb, 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* The caller has incremented holders so it is blocking commits. We
|
||||
* make some quick checks to see if we need to trigger and wait for
|
||||
* another commit before proceeding.
|
||||
*/
|
||||
static bool commit_before_hold(struct super_block *sb, struct trans_info *tri)
|
||||
{
|
||||
/*
|
||||
* In theory each dirty item page could be straddling two full
|
||||
* blocks, requiring 4 allocations for each item cache page.
|
||||
@@ -365,11 +413,9 @@ static bool acquired_hold(struct super_block *sb,
|
||||
* that it accounts for having to dirty parent blocks and
|
||||
* whatever dirtying is done during the transaction hold.
|
||||
*/
|
||||
if (scoutfs_alloc_meta_low(sb, &tri->alloc,
|
||||
scoutfs_item_dirty_pages(sb) * 2)) {
|
||||
if (scoutfs_alloc_meta_low(sb, &tri->alloc, scoutfs_item_dirty_pages(sb) * 2)) {
|
||||
scoutfs_inc_counter(sb, trans_commit_dirty_meta_full);
|
||||
queue_trans_work(sbi);
|
||||
goto out;
|
||||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -381,57 +427,74 @@ static bool acquired_hold(struct super_block *sb,
|
||||
*/
|
||||
if (scoutfs_alloc_meta_low(sb, &tri->alloc, 16)) {
|
||||
scoutfs_inc_counter(sb, trans_commit_meta_alloc_low);
|
||||
queue_trans_work(sbi);
|
||||
goto out;
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Try to refill data allocator before premature enospc */
|
||||
if (scoutfs_data_alloc_free_bytes(sb) <= SCOUTFS_TRANS_DATA_ALLOC_LWM) {
|
||||
scoutfs_inc_counter(sb, trans_commit_data_alloc_low);
|
||||
queue_trans_work(sbi);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool acquired_hold(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
DECLARE_TRANS_INFO(sb, tri);
|
||||
bool acquired;
|
||||
|
||||
/* if a caller already has a hold we acquire unconditionally */
|
||||
if (inc_journal_info_holders()) {
|
||||
atomic_inc(&tri->holders);
|
||||
acquired = true;
|
||||
goto out;
|
||||
}
|
||||
|
||||
hold:
|
||||
rsv->holders++;
|
||||
tri->holders++;
|
||||
/* wait if the writer is blocking holds */
|
||||
if (!inc_holders_unless_writer(tri)) {
|
||||
dec_journal_info_holders();
|
||||
acquired = false;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* wait if we're triggering another commit */
|
||||
if (commit_before_hold(sb, tri)) {
|
||||
release_holders(sb);
|
||||
queue_trans_work(sbi);
|
||||
acquired = false;
|
||||
goto out;
|
||||
}
|
||||
|
||||
trace_scoutfs_trans_acquired_hold(sb, current->journal_info, atomic_read(&tri->holders));
|
||||
acquired = true;
|
||||
|
||||
out:
|
||||
|
||||
spin_unlock(&tri->lock);
|
||||
|
||||
return acquired;
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to hold the transaction. Holding the transaction prevents it
|
||||
* from being committed. If a transaction is currently being written
|
||||
* then we'll block until it's done and our hold can be granted.
|
||||
*
|
||||
* If a caller already holds the trans then we unconditionally acquire
|
||||
* our hold and return to avoid deadlocks with our caller, the writing
|
||||
* thread, and us. We record nested holds in a call stack with the
|
||||
* journal_info pointer in the task_struct.
|
||||
*
|
||||
* The writing thread marks itself as a global trans_task which
|
||||
* short-circuits all the hold machinery so it can call code that would
|
||||
* otherwise try to hold transactions while it is writing.
|
||||
*/
|
||||
int scoutfs_hold_trans(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_reservation *rsv;
|
||||
int ret;
|
||||
|
||||
if (current == sbi->trans_task)
|
||||
return 0;
|
||||
|
||||
rsv = current->journal_info;
|
||||
if (rsv == NULL) {
|
||||
rsv = kzalloc(sizeof(struct scoutfs_reservation), GFP_NOFS);
|
||||
if (!rsv)
|
||||
return -ENOMEM;
|
||||
|
||||
rsv->magic = SCOUTFS_RESERVATION_MAGIC;
|
||||
current->journal_info = rsv;
|
||||
}
|
||||
|
||||
BUG_ON(rsv->magic != SCOUTFS_RESERVATION_MAGIC);
|
||||
|
||||
ret = wait_event_interruptible(sbi->trans_hold_wq,
|
||||
acquired_hold(sb, rsv));
|
||||
if (ret && rsv->holders == 0) {
|
||||
current->journal_info = NULL;
|
||||
kfree(rsv);
|
||||
}
|
||||
return ret;
|
||||
return wait_event_interruptible(sbi->trans_hold_wq, acquired_hold(sb));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -441,50 +504,22 @@ int scoutfs_hold_trans(struct super_block *sb)
|
||||
*/
|
||||
bool scoutfs_trans_held(void)
|
||||
{
|
||||
struct scoutfs_reservation *rsv = current->journal_info;
|
||||
unsigned long holders = (unsigned long)current->journal_info;
|
||||
|
||||
return rsv && rsv->magic == SCOUTFS_RESERVATION_MAGIC;
|
||||
return (holders != 0 && ((holders & TRANS_JI_MAGIC_MASK) == TRANS_JI_MAGIC));
|
||||
}
|
||||
|
||||
/*
|
||||
* As we drop the last hold in the reservation we try and wake other
|
||||
* hold attempts that were waiting for space. As we drop the last trans
|
||||
* holder we try to wake a writing thread that was waiting for us to
|
||||
* finish.
|
||||
*/
|
||||
void scoutfs_release_trans(struct super_block *sb)
|
||||
{
|
||||
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
||||
struct scoutfs_reservation *rsv;
|
||||
DECLARE_TRANS_INFO(sb, tri);
|
||||
bool wake = false;
|
||||
|
||||
if (current == sbi->trans_task)
|
||||
return;
|
||||
|
||||
rsv = current->journal_info;
|
||||
BUG_ON(!rsv || rsv->magic != SCOUTFS_RESERVATION_MAGIC);
|
||||
release_holders(sb);
|
||||
|
||||
spin_lock(&tri->lock);
|
||||
|
||||
trace_scoutfs_release_trans(sb, rsv, rsv->holders, tri->holders, tri->writing);
|
||||
|
||||
BUG_ON(rsv->holders <= 0);
|
||||
BUG_ON(tri->holders <= 0);
|
||||
|
||||
if (--rsv->holders == 0) {
|
||||
current->journal_info = NULL;
|
||||
kfree(rsv);
|
||||
wake = true;
|
||||
}
|
||||
|
||||
if (--tri->holders == 0)
|
||||
wake = true;
|
||||
|
||||
spin_unlock(&tri->lock);
|
||||
|
||||
if (wake)
|
||||
wake_up(&sbi->trans_hold_wq);
|
||||
trace_scoutfs_release_trans(sb, current->journal_info, atomic_read(&tri->holders));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -513,7 +548,7 @@ int scoutfs_setup_trans(struct super_block *sb)
|
||||
if (!tri)
|
||||
return -ENOMEM;
|
||||
|
||||
spin_lock_init(&tri->lock);
|
||||
atomic_set(&tri->holders, 0);
|
||||
scoutfs_block_writer_init(sb, &tri->wri);
|
||||
|
||||
sbi->trans_write_workq = alloc_workqueue("scoutfs_trans",
|
||||
|
||||
@@ -209,12 +209,19 @@ t_trigger_show() {
|
||||
echo "trigger $which $string: $(t_trigger_get $which $nr)"
|
||||
}
|
||||
|
||||
t_trigger_arm() {
|
||||
t_trigger_arm_silent() {
|
||||
local which="$1"
|
||||
local nr="$2"
|
||||
local path=$(t_trigger_path "$nr")
|
||||
|
||||
echo 1 > "$path/$which"
|
||||
}
|
||||
|
||||
t_trigger_arm() {
|
||||
local which="$1"
|
||||
local nr="$2"
|
||||
|
||||
t_trigger_arm_silent $which $nr
|
||||
t_trigger_show $which armed $nr
|
||||
}
|
||||
|
||||
@@ -229,16 +236,44 @@ t_counter() {
|
||||
cat "$(t_sysfs_path $nr)/counters/$which"
|
||||
}
|
||||
|
||||
#
|
||||
# output the difference between the current value of a counter and the
|
||||
# caller's provided previous value.
|
||||
#
|
||||
t_counter_diff_value() {
|
||||
local which="$1"
|
||||
local old="$2"
|
||||
local nr="$3"
|
||||
local new="$(t_counter $which $nr)"
|
||||
|
||||
echo "$((new - old))"
|
||||
}
|
||||
|
||||
#
|
||||
# output the value of the given counter for the given mount, defaulting
|
||||
# to mount 0 if a mount isn't specified.
|
||||
# to mount 0 if a mount isn't specified. For tests which expect a
|
||||
# specific difference in counters.
|
||||
#
|
||||
t_counter_diff() {
|
||||
local which="$1"
|
||||
local old="$2"
|
||||
local nr="$3"
|
||||
local new
|
||||
|
||||
new="$(t_counter $which $nr)"
|
||||
echo "counter $which diff $((new - old))"
|
||||
echo "counter $which diff $(t_counter_diff_value $which $old $nr)"
|
||||
}
|
||||
|
||||
#
|
||||
# output a message indicating whether or not the counter value changed.
|
||||
# For tests that expect a difference, or not, but the amount of
|
||||
# difference isn't significant.
|
||||
#
|
||||
t_counter_diff_changed() {
|
||||
local which="$1"
|
||||
local old="$2"
|
||||
local nr="$3"
|
||||
local diff="$(t_counter_diff_value $which $old $nr)"
|
||||
|
||||
test "$diff" -eq 0 && \
|
||||
echo "counter $which didn't change" ||
|
||||
echo "counter $which changed"
|
||||
}
|
||||
|
||||
@@ -1,29 +1,52 @@
|
||||
== create file for xattr ping pong
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="initial"
|
||||
|
||||
== retry btree forest reads between mounts
|
||||
trigger block_remove_stale armed: 0
|
||||
== create shared test file
|
||||
== set and get xattrs between mount pairs while retrying
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="1"
|
||||
|
||||
trigger block_remove_stale after: 0
|
||||
counter block_cache_remove_stale diff 1
|
||||
trigger block_remove_stale armed: 0
|
||||
counter block_cache_remove_stale changed
|
||||
counter block_cache_remove_stale changed
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="2"
|
||||
|
||||
trigger block_remove_stale after: 0
|
||||
counter block_cache_remove_stale diff 2
|
||||
trigger block_remove_stale armed: 0
|
||||
counter block_cache_remove_stale changed
|
||||
counter block_cache_remove_stale changed
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="3"
|
||||
|
||||
trigger block_remove_stale after: 0
|
||||
counter block_cache_remove_stale diff 3
|
||||
trigger block_remove_stale armed: 0
|
||||
counter block_cache_remove_stale changed
|
||||
counter block_cache_remove_stale changed
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="4"
|
||||
|
||||
trigger block_remove_stale after: 0
|
||||
counter block_cache_remove_stale diff 4
|
||||
counter block_cache_remove_stale changed
|
||||
counter block_cache_remove_stale changed
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="5"
|
||||
|
||||
counter block_cache_remove_stale changed
|
||||
counter block_cache_remove_stale changed
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="6"
|
||||
|
||||
counter block_cache_remove_stale changed
|
||||
counter block_cache_remove_stale changed
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="7"
|
||||
|
||||
counter block_cache_remove_stale changed
|
||||
counter block_cache_remove_stale changed
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="8"
|
||||
|
||||
counter block_cache_remove_stale changed
|
||||
counter block_cache_remove_stale changed
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="9"
|
||||
|
||||
counter block_cache_remove_stale changed
|
||||
counter block_cache_remove_stale changed
|
||||
# file: /mnt/test/test/block-stale-reads/file
|
||||
user.xat="10"
|
||||
|
||||
counter block_cache_remove_stale changed
|
||||
counter block_cache_remove_stale changed
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#
|
||||
# exercise stale block reading.
|
||||
# Exercise stale block reading.
|
||||
#
|
||||
# It would be very difficult to manipulate the allocators, cache, and
|
||||
# persistent blocks to create stable block reading scenarios. Instead
|
||||
@@ -7,34 +7,55 @@
|
||||
#
|
||||
|
||||
t_require_commands touch setfattr getfattr
|
||||
t_require_mounts 2
|
||||
|
||||
inc_wrap_fs_nr()
|
||||
{
|
||||
local nr="$(($1 + 1))"
|
||||
|
||||
if [ "$nr" == "$T_NR_MOUNTS" ]; then
|
||||
nr=0
|
||||
fi
|
||||
|
||||
echo $nr
|
||||
}
|
||||
|
||||
GETFATTR="getfattr --absolute-names"
|
||||
SETFATTR="setfattr"
|
||||
|
||||
#
|
||||
# force re-reading forest btree blocks as each mount reads the items
|
||||
# written by the other.
|
||||
#
|
||||
set_file="$T_D0/file"
|
||||
get_file="$T_D1/file"
|
||||
echo "== create file for xattr ping pong"
|
||||
touch "$set_file"
|
||||
$SETFATTR -n user.xat -v initial "$set_file"
|
||||
$GETFATTR -n user.xat "$get_file" 2>&1 | t_filter_fs
|
||||
echo "== create shared test file"
|
||||
touch "$T_D0/file"
|
||||
$SETFATTR -n user.xat -v 0 "$T_D0/file"
|
||||
|
||||
echo "== retry btree forest reads between mounts"
|
||||
for i in $(seq 1 4); do
|
||||
tmp="$set_file"
|
||||
set_file="$get_file"
|
||||
get_file="$tmp"
|
||||
#
|
||||
# Trigger retries in the block cache as we bounce xattr values around
|
||||
# between sequential pairs of mounts. This is a little silly because if
|
||||
# either of the mounts are the server then they'll almost certaily have
|
||||
# their trigger fired prematurely by message handling btree calls while
|
||||
# working with the t_ helpers long before we work with the xattrs. But
|
||||
# the block cache stale retry path is still being exercised.
|
||||
#
|
||||
echo "== set and get xattrs between mount pairs while retrying"
|
||||
set_nr=0
|
||||
get_nr=$(inc_wrap_fs_nr $set_nr)
|
||||
|
||||
for i in $(seq 1 10); do
|
||||
eval set_file="\$T_D${set_nr}/file"
|
||||
eval get_file="\$T_D${get_nr}/file"
|
||||
|
||||
old_set=$(t_counter block_cache_remove_stale $set_nr)
|
||||
old_get=$(t_counter block_cache_remove_stale $get_nr)
|
||||
|
||||
t_trigger_arm_silent block_remove_stale $set_nr
|
||||
t_trigger_arm_silent block_remove_stale $get_nr
|
||||
|
||||
$SETFATTR -n user.xat -v $i "$set_file"
|
||||
t_trigger_arm block_remove_stale $cl
|
||||
old=$(t_counter btree_stale_read $cl)
|
||||
$GETFATTR -n user.xat "$get_file" 2>&1 | t_filter_fs
|
||||
t_trigger_show block_remove_stale "after" $cl
|
||||
t_counter_diff block_cache_remove_stale $old $cl
|
||||
|
||||
t_counter_diff_changed block_cache_remove_stale $old_set $set_nr
|
||||
t_counter_diff_changed block_cache_remove_stale $old_get $get_nr
|
||||
|
||||
set_nr="$get_nr"
|
||||
get_nr=$(inc_wrap_fs_nr $set_nr)
|
||||
done
|
||||
|
||||
t_pass
|
||||
|
||||
@@ -361,12 +361,12 @@ static int do_mkfs(struct mkfs_args *args)
|
||||
struct scoutfs_quorum_slot *sl = &super->qconf.slots[i];
|
||||
struct in_addr in;
|
||||
|
||||
if (sl->addr.addr == 0)
|
||||
if (sl->addr.v4.family != cpu_to_le16(SCOUTFS_AF_IPV4))
|
||||
continue;
|
||||
|
||||
in.s_addr = htonl(le32_to_cpu(sl->addr.addr));
|
||||
in.s_addr = htonl(le32_to_cpu(sl->addr.v4.addr));
|
||||
printf("%s%u: %s:%u", indent,
|
||||
i, inet_ntoa(in), le16_to_cpu(sl->addr.port));
|
||||
i, inet_ntoa(in), le16_to_cpu(sl->addr.v4.port));
|
||||
indent = "\n ";
|
||||
}
|
||||
printf("\n");
|
||||
@@ -395,22 +395,28 @@ static bool valid_quorum_slots(struct scoutfs_quorum_slot *slots)
|
||||
int j;
|
||||
|
||||
for (i = 0; i < SCOUTFS_QUORUM_MAX_SLOTS; i++) {
|
||||
if (slots[i].addr.addr == 0)
|
||||
if (slots[i].addr.v4.family == cpu_to_le16(SCOUTFS_AF_NONE))
|
||||
continue;
|
||||
|
||||
if (slots[i].addr.v4.family != cpu_to_le16(SCOUTFS_AF_IPV4)) {
|
||||
fprintf(stderr, "quorum slot nr %u has invalid family %u\n",
|
||||
i, le16_to_cpu(slots[i].addr.v4.family));
|
||||
valid = false;
|
||||
}
|
||||
|
||||
for (j = i + 1; j < SCOUTFS_QUORUM_MAX_SLOTS; j++) {
|
||||
if (slots[j].addr.addr == 0)
|
||||
if (slots[i].addr.v4.family != cpu_to_le16(SCOUTFS_AF_IPV4))
|
||||
continue;
|
||||
|
||||
if (slots[i].addr.addr == slots[j].addr.addr &&
|
||||
slots[i].addr.port == slots[j].addr.port) {
|
||||
if (slots[i].addr.v4.addr == slots[j].addr.v4.addr &&
|
||||
slots[i].addr.v4.port == slots[j].addr.v4.port) {
|
||||
|
||||
in.s_addr =
|
||||
htonl(le32_to_cpu(slots[i].addr.addr));
|
||||
htonl(le32_to_cpu(slots[i].addr.v4.addr));
|
||||
addr = inet_ntoa(in);
|
||||
fprintf(stderr, "quorum slot nr %u and %u have the same address %s:%u\n",
|
||||
i, j, addr,
|
||||
le16_to_cpu(slots[i].addr.port));
|
||||
le16_to_cpu(slots[i].addr.v4.port));
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
@@ -430,7 +436,7 @@ static int parse_opt(int key, char *arg, struct argp_state *state)
|
||||
ret = parse_quorum_slot(&slot, arg);
|
||||
if (ret < 0)
|
||||
return ret;
|
||||
if (args->slots[ret].addr.addr != 0)
|
||||
if (args->slots[ret].addr.v4.family != cpu_to_le16(SCOUTFS_AF_NONE))
|
||||
argp_error(state, "Quorum slot %u already specified before slot '%s'\n",
|
||||
ret, arg);
|
||||
args->slots[ret] = slot;
|
||||
|
||||
@@ -213,7 +213,8 @@ int parse_quorum_slot(struct scoutfs_quorum_slot *slot, char *arg)
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
slot->addr.addr = cpu_to_le32(htonl(in.s_addr));
|
||||
slot->addr.port = cpu_to_le16(port);
|
||||
slot->addr.v4.family = cpu_to_le16(SCOUTFS_AF_IPV4);
|
||||
slot->addr.v4.addr = cpu_to_le32(htonl(in.s_addr));
|
||||
slot->addr.v4.port = cpu_to_le16(port);
|
||||
return nr;
|
||||
}
|
||||
|
||||
@@ -769,7 +769,7 @@ static int print_btree_leaf_items(int fd, struct scoutfs_super_block *super,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static char *alloc_addr_str(struct scoutfs_inet_addr *ia)
|
||||
static char *alloc_addr_str(union scoutfs_inet_addr *ia)
|
||||
{
|
||||
struct in_addr addr;
|
||||
char *quad;
|
||||
@@ -777,12 +777,12 @@ static char *alloc_addr_str(struct scoutfs_inet_addr *ia)
|
||||
int len;
|
||||
|
||||
memset(&addr, 0, sizeof(addr));
|
||||
addr.s_addr = htonl(le32_to_cpu(ia->addr));
|
||||
addr.s_addr = htonl(le32_to_cpu(ia->v4.addr));
|
||||
quad = inet_ntoa(addr);
|
||||
if (quad == NULL)
|
||||
return NULL;
|
||||
|
||||
len = snprintf(NULL, 0, "%s:%u", quad, le16_to_cpu(ia->port));
|
||||
len = snprintf(NULL, 0, "%s:%u", quad, le16_to_cpu(ia->v4.port));
|
||||
if (len < 1 || len > 22)
|
||||
return NULL;
|
||||
|
||||
@@ -791,7 +791,7 @@ static char *alloc_addr_str(struct scoutfs_inet_addr *ia)
|
||||
if (!str)
|
||||
return NULL;
|
||||
|
||||
snprintf(str, len, "%s:%u", quad, le16_to_cpu(ia->port));
|
||||
snprintf(str, len, "%s:%u", quad, le16_to_cpu(ia->v4.port));
|
||||
return str;
|
||||
}
|
||||
|
||||
@@ -915,8 +915,7 @@ static void print_super_block(struct scoutfs_super_block *super, u64 blkno)
|
||||
printf(" quorum config version %llu\n",
|
||||
le64_to_cpu(super->qconf.version));
|
||||
for (i = 0; i < array_size(super->qconf.slots); i++) {
|
||||
if (!super->qconf.slots[i].addr.addr &&
|
||||
!super->qconf.slots[i].addr.port)
|
||||
if (super->qconf.slots[i].addr.v4.family != cpu_to_le16(SCOUTFS_AF_IPV4))
|
||||
continue;
|
||||
|
||||
addr = alloc_addr_str(&super->qconf.slots[i].addr);
|
||||
|
||||
Reference in New Issue
Block a user