mirror of
https://github.com/versity/scoutfs.git
synced 2026-04-29 17:36:55 +00:00
Compare commits
35 Commits
v1.29
...
auke/make_
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
4e7cf76afc | ||
|
|
64200ed61c | ||
|
|
fe43c624aa | ||
|
|
0360462a35 | ||
|
|
283564f9a2 | ||
|
|
e8f4d0b8cc | ||
|
|
d52fa5d71b | ||
|
|
2e45e9dc8c | ||
|
|
d6a4034564 | ||
|
|
c2e8675a8c | ||
|
|
7c6c3d223d | ||
|
|
6ec131da03 | ||
|
|
d76a217ff8 | ||
|
|
af31b9f1e8 | ||
|
|
ad65116d8f | ||
|
|
e20765a9c7 | ||
|
|
066da5c2a2 | ||
|
|
7eacc7139c | ||
|
|
9e3b01b3b4 | ||
|
|
876c233f06 | ||
|
|
6aa5876c71 | ||
|
|
7a9f9ec698 | ||
|
|
fc0fc1427f | ||
|
|
ec68845201 | ||
|
|
5e2009f939 | ||
|
|
8bdc20af21 | ||
|
|
857a39579e | ||
|
|
38d36c9f5c | ||
|
|
b724567b2a | ||
|
|
add1da10dc | ||
|
|
b9c49629a2 | ||
|
|
9737009437 | ||
|
|
3d54ae03e6 | ||
|
|
e27ec0add6 | ||
|
|
5457741672 |
@@ -1,6 +1,23 @@
|
||||
Versity ScoutFS Release Notes
|
||||
=============================
|
||||
|
||||
---
|
||||
v1.30
|
||||
\
|
||||
*Apr 21, 2026*
|
||||
|
||||
Fix a problem reading the accumulated totals of contributing .totl.
|
||||
xattrs when log merging is in progress. The problem would have readers
|
||||
of the totals calculate the sums incorrectly.
|
||||
|
||||
Fix a problem updating quota rules. There was a race where updates
|
||||
could be corrupted if they happened while a transaction was being
|
||||
written.
|
||||
|
||||
Fix a problem deleting files with .indx. xattrs. The internal indexing
|
||||
metadata wouldn't be properly deleted so the files would still claim to
|
||||
be present and visible in the index, though the file no longer existed.
|
||||
|
||||
---
|
||||
v1.29
|
||||
\
|
||||
|
||||
@@ -24,6 +24,7 @@
|
||||
#include "trans.h"
|
||||
#include "alloc.h"
|
||||
#include "counters.h"
|
||||
#include "msg.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
/*
|
||||
@@ -496,10 +497,11 @@ static int dirty_alloc_blocks(struct super_block *sb,
|
||||
struct scoutfs_block *fr_bl = NULL;
|
||||
struct scoutfs_block *bl;
|
||||
bool link_orig = false;
|
||||
__le32 orig_first_nr;
|
||||
u64 av_peek;
|
||||
u64 av_old;
|
||||
u64 av_old = 0;
|
||||
u64 fr_peek;
|
||||
u64 fr_old;
|
||||
u64 fr_old = 0;
|
||||
int ret;
|
||||
|
||||
if (alloc->dirty_avail_bl != NULL)
|
||||
@@ -509,6 +511,7 @@ static int dirty_alloc_blocks(struct super_block *sb,
|
||||
|
||||
/* undo dirty freed if we get an error after */
|
||||
orig_freed = alloc->freed.ref;
|
||||
orig_first_nr = alloc->freed.first_nr;
|
||||
|
||||
if (alloc->dirty_avail_bl != NULL) {
|
||||
ret = 0;
|
||||
@@ -562,6 +565,17 @@ static int dirty_alloc_blocks(struct super_block *sb,
|
||||
/* sort dirty avail to encourage contiguous sorted meta blocks */
|
||||
list_block_sort(av_bl->data);
|
||||
|
||||
lblk = fr_bl->data;
|
||||
if (WARN_ON_ONCE(alloc->freed.ref.blkno != lblk->hdr.blkno)) {
|
||||
scoutfs_err(sb, "dirty_alloc freed ref %llu hdr %llu av_old %llu fr_old %llu av_peek %llu fr_peek %llu link_orig %d",
|
||||
le64_to_cpu(alloc->freed.ref.blkno),
|
||||
le64_to_cpu(lblk->hdr.blkno),
|
||||
av_old, fr_old, av_peek, fr_peek, link_orig);
|
||||
ret = -EIO;
|
||||
goto out;
|
||||
}
|
||||
lblk = NULL;
|
||||
|
||||
if (av_old)
|
||||
list_block_add(&alloc->freed, fr_bl->data, av_old);
|
||||
if (fr_old)
|
||||
@@ -578,6 +592,7 @@ out:
|
||||
if (fr_bl)
|
||||
scoutfs_block_writer_forget(sb, wri, fr_bl);
|
||||
alloc->freed.ref = orig_freed;
|
||||
alloc->freed.first_nr = orig_first_nr;
|
||||
}
|
||||
|
||||
mutex_unlock(&alloc->mutex);
|
||||
|
||||
@@ -218,6 +218,7 @@ static void block_free_work(struct work_struct *work)
|
||||
|
||||
llist_for_each_entry_safe(bp, tmp, deleted, free_node) {
|
||||
block_free(sb, bp);
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -467,9 +468,6 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
sector_t sector;
|
||||
int ret = 0;
|
||||
|
||||
if (scoutfs_forcing_unmount(sb))
|
||||
return -ENOLINK;
|
||||
|
||||
sector = bp->bl.blkno << (SCOUTFS_BLOCK_LG_SHIFT - 9);
|
||||
|
||||
WARN_ON_ONCE(bp->bl.blkno == U64_MAX);
|
||||
@@ -480,6 +478,17 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
set_bit(BLOCK_BIT_IO_BUSY, &bp->bits);
|
||||
block_get(bp);
|
||||
|
||||
/*
|
||||
* A second thread may already be waiting on this block's completion
|
||||
* after this thread won the race to submit the block. We exit through
|
||||
* the block_end_io error path which sets BLOCK_BIT_ERROR and assures
|
||||
* that other callers in the waitq get woken up.
|
||||
*/
|
||||
if (scoutfs_forcing_unmount(sb)) {
|
||||
ret = -ENOLINK;
|
||||
goto end_io;
|
||||
}
|
||||
|
||||
blk_start_plug(&plug);
|
||||
|
||||
for (off = 0; off < SCOUTFS_BLOCK_LG_SIZE; off += PAGE_SIZE) {
|
||||
@@ -517,6 +526,7 @@ static int block_submit_bio(struct super_block *sb, struct block_private *bp,
|
||||
|
||||
blk_finish_plug(&plug);
|
||||
|
||||
end_io:
|
||||
/* let racing end_io know we're done */
|
||||
block_end_io(sb, opf, bp, ret);
|
||||
|
||||
@@ -836,6 +846,8 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc,
|
||||
bp = BLOCK_PRIVATE(bl);
|
||||
|
||||
if (block_is_dirty(bp)) {
|
||||
if (ref_blkno)
|
||||
*ref_blkno = 0;
|
||||
ret = 0;
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -2183,6 +2183,8 @@ static int merge_read_item(struct super_block *sb, struct scoutfs_key *key, u64
|
||||
if (ret > 0) {
|
||||
if (ret == SCOUTFS_DELTA_COMBINED) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_combined);
|
||||
if (seq > found->seq)
|
||||
found->seq = seq;
|
||||
} else if (ret == SCOUTFS_DELTA_COMBINED_NULL) {
|
||||
scoutfs_inc_counter(sb, btree_merge_delta_null);
|
||||
free_mitem(rng, found);
|
||||
@@ -2486,6 +2488,14 @@ int scoutfs_btree_merge(struct super_block *sb,
|
||||
mitem = next_mitem(mitem);
|
||||
free_mitem(&rng, tmp);
|
||||
}
|
||||
|
||||
if (mitem && walk_val_len == 0 &&
|
||||
!(walk_flags & (BTW_INSERT | BTW_DELETE)) &&
|
||||
scoutfs_trigger(sb, LOG_MERGE_FORCE_PARTIAL)) {
|
||||
ret = -ERANGE;
|
||||
*next_ret = mitem->key;
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
|
||||
@@ -59,6 +59,31 @@ struct client_info {
|
||||
struct completion farewell_comp;
|
||||
};
|
||||
|
||||
/*
|
||||
* Reconnection to a new server completes pending sync requests with
|
||||
* -ECONNRESET because their state in the old server was reclaimed at
|
||||
* fence time. Transparently retry so callers don't surface the
|
||||
* reconnect as a failed RPC; preserve the pre-drain behavior where a
|
||||
* sync request was silently resent across failover. Shutdown paths
|
||||
* break the loop via the errors that submit and wait already return.
|
||||
*/
|
||||
static int client_sync_request(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, void *arg, unsigned arg_len,
|
||||
void *resp, size_t resp_len)
|
||||
{
|
||||
int ret;
|
||||
|
||||
for (;;) {
|
||||
ret = scoutfs_net_sync_request(sb, conn, cmd, arg, arg_len,
|
||||
resp, resp_len);
|
||||
if (ret != -ECONNRESET)
|
||||
return ret;
|
||||
if (scoutfs_unmounting(sb) || scoutfs_forcing_unmount(sb))
|
||||
return -ESHUTDOWN;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Ask for a new run of allocated inode numbers. The server can return
|
||||
* fewer than @count. It will success with nr == 0 if we've run out.
|
||||
@@ -72,10 +97,10 @@ int scoutfs_client_alloc_inodes(struct super_block *sb, u64 count,
|
||||
u64 tmp;
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_ALLOC_INODES,
|
||||
&lecount, sizeof(lecount),
|
||||
&ial, sizeof(ial));
|
||||
ret = client_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_ALLOC_INODES,
|
||||
&lecount, sizeof(lecount),
|
||||
&ial, sizeof(ial));
|
||||
if (ret == 0) {
|
||||
*ino = le64_to_cpu(ial.ino);
|
||||
*nr = le64_to_cpu(ial.nr);
|
||||
@@ -94,9 +119,9 @@ int scoutfs_client_get_log_trees(struct super_block *sb,
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_LOG_TREES,
|
||||
NULL, 0, lt, sizeof(*lt));
|
||||
return client_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_LOG_TREES,
|
||||
NULL, 0, lt, sizeof(*lt));
|
||||
}
|
||||
|
||||
int scoutfs_client_commit_log_trees(struct super_block *sb,
|
||||
@@ -104,9 +129,9 @@ int scoutfs_client_commit_log_trees(struct super_block *sb,
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
|
||||
lt, sizeof(*lt), NULL, 0);
|
||||
return client_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
|
||||
lt, sizeof(*lt), NULL, 0);
|
||||
}
|
||||
|
||||
int scoutfs_client_get_roots(struct super_block *sb,
|
||||
@@ -114,9 +139,26 @@ int scoutfs_client_get_roots(struct super_block *sb,
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_ROOTS,
|
||||
NULL, 0, roots, sizeof(*roots));
|
||||
return client_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_ROOTS,
|
||||
NULL, 0, roots, sizeof(*roots));
|
||||
}
|
||||
|
||||
/*
|
||||
* Bounded-wait get_roots for the orphan scan worker. The worker
|
||||
* reschedules on error, so -ETIMEDOUT is treated like any other RPC
|
||||
* failure and retries on the next scan.
|
||||
*/
|
||||
int scoutfs_client_get_roots_timeout(struct super_block *sb,
|
||||
struct scoutfs_net_roots *roots,
|
||||
unsigned long timeout_jiffies)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request_timeout(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_ROOTS,
|
||||
NULL, 0, roots, sizeof(*roots),
|
||||
timeout_jiffies);
|
||||
}
|
||||
|
||||
int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq)
|
||||
@@ -125,9 +167,9 @@ int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq)
|
||||
__le64 last_seq;
|
||||
int ret;
|
||||
|
||||
ret = scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_LAST_SEQ,
|
||||
NULL, 0, &last_seq, sizeof(last_seq));
|
||||
ret = client_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_LAST_SEQ,
|
||||
NULL, 0, &last_seq, sizeof(last_seq));
|
||||
if (ret == 0)
|
||||
*seq = le64_to_cpu(last_seq);
|
||||
|
||||
@@ -140,24 +182,34 @@ static int client_lock_response(struct super_block *sb,
|
||||
void *resp, unsigned int resp_len,
|
||||
int error, void *data)
|
||||
{
|
||||
struct scoutfs_lock *lock = data;
|
||||
|
||||
if (error) {
|
||||
scoutfs_lock_request_failed(sb, lock);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (resp_len != sizeof(struct scoutfs_net_lock))
|
||||
return -EINVAL;
|
||||
|
||||
/* XXX error? */
|
||||
|
||||
return scoutfs_lock_grant_response(sb, resp);
|
||||
}
|
||||
|
||||
/* Send a lock request to the server. */
|
||||
/*
|
||||
* Send a lock request to the server. The lock is anchored by
|
||||
* request_pending so its address is stable until the response callback
|
||||
* runs and clears request_pending on either the grant or error path.
|
||||
*/
|
||||
int scoutfs_client_lock_request(struct super_block *sb,
|
||||
struct scoutfs_net_lock *nl)
|
||||
struct scoutfs_net_lock *nl,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_submit_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_LOCK,
|
||||
nl, sizeof(*nl),
|
||||
client_lock_response, NULL, NULL);
|
||||
client_lock_response, lock, NULL);
|
||||
}
|
||||
|
||||
/* Send a lock response to the server. */
|
||||
@@ -189,9 +241,26 @@ int scoutfs_client_srch_get_compact(struct super_block *sb,
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
|
||||
NULL, 0, sc, sizeof(*sc));
|
||||
return client_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
|
||||
NULL, 0, sc, sizeof(*sc));
|
||||
}
|
||||
|
||||
/*
|
||||
* Bounded-wait get_compact for the srch compact worker. The worker
|
||||
* reschedules on any error and the compact work is idempotent, so
|
||||
* -ETIMEDOUT just defers this round.
|
||||
*/
|
||||
int scoutfs_client_srch_get_compact_timeout(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *sc,
|
||||
unsigned long timeout_jiffies)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request_timeout(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
|
||||
NULL, 0, sc, sizeof(*sc),
|
||||
timeout_jiffies);
|
||||
}
|
||||
|
||||
/* Commit the result of a srch file compaction. */
|
||||
@@ -200,9 +269,27 @@ int scoutfs_client_srch_commit_compact(struct super_block *sb,
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
|
||||
res, sizeof(*res), NULL, 0);
|
||||
return client_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
|
||||
res, sizeof(*res), NULL, 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Bounded-wait commit_compact for the srch compact worker. The server
|
||||
* ignores partial work flagged with ERROR, so a timed-out commit
|
||||
* (marked ERROR on this side) lets the server reclaim our allocators
|
||||
* and reassign the compact on the next scheduled attempt.
|
||||
*/
|
||||
int scoutfs_client_srch_commit_compact_timeout(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *res,
|
||||
unsigned long timeout_jiffies)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request_timeout(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
|
||||
res, sizeof(*res), NULL, 0,
|
||||
timeout_jiffies);
|
||||
}
|
||||
|
||||
int scoutfs_client_get_log_merge(struct super_block *sb,
|
||||
@@ -210,9 +297,9 @@ int scoutfs_client_get_log_merge(struct super_block *sb,
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_LOG_MERGE,
|
||||
NULL, 0, req, sizeof(*req));
|
||||
return client_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_GET_LOG_MERGE,
|
||||
NULL, 0, req, sizeof(*req));
|
||||
}
|
||||
|
||||
int scoutfs_client_commit_log_merge(struct super_block *sb,
|
||||
@@ -220,9 +307,9 @@ int scoutfs_client_commit_log_merge(struct super_block *sb,
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
|
||||
comp, sizeof(*comp), NULL, 0);
|
||||
return client_sync_request(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
|
||||
comp, sizeof(*comp), NULL, 0);
|
||||
}
|
||||
|
||||
int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
|
||||
@@ -254,8 +341,30 @@ int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
|
||||
.req_id = 0,
|
||||
};
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_OPEN_INO_MAP,
|
||||
&args, sizeof(args), map, sizeof(*map));
|
||||
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_OPEN_INO_MAP,
|
||||
&args, sizeof(args), map, sizeof(*map));
|
||||
}
|
||||
|
||||
/*
|
||||
* Bounded-wait open_ino_map for the orphan scan worker. The scan
|
||||
* reschedules on error; the delete path callers keep the unbounded
|
||||
* retry.
|
||||
*/
|
||||
int scoutfs_client_open_ino_map_timeout(struct super_block *sb, u64 group_nr,
|
||||
struct scoutfs_open_ino_map *map,
|
||||
unsigned long timeout_jiffies)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
struct scoutfs_open_ino_map_args args = {
|
||||
.group_nr = cpu_to_le64(group_nr),
|
||||
.req_id = 0,
|
||||
};
|
||||
|
||||
return scoutfs_net_sync_request_timeout(sb, client->conn,
|
||||
SCOUTFS_NET_CMD_OPEN_INO_MAP,
|
||||
&args, sizeof(args),
|
||||
map, sizeof(*map),
|
||||
timeout_jiffies);
|
||||
}
|
||||
|
||||
/* The client is asking the server for the current volume options */
|
||||
@@ -263,8 +372,8 @@ int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_opti
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_GET_VOLOPT,
|
||||
NULL, 0, volopt, sizeof(*volopt));
|
||||
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_GET_VOLOPT,
|
||||
NULL, 0, volopt, sizeof(*volopt));
|
||||
}
|
||||
|
||||
/* The client is asking the server to update volume options */
|
||||
@@ -272,8 +381,8 @@ int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_opti
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_SET_VOLOPT,
|
||||
volopt, sizeof(*volopt), NULL, 0);
|
||||
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_SET_VOLOPT,
|
||||
volopt, sizeof(*volopt), NULL, 0);
|
||||
}
|
||||
|
||||
/* The client is asking the server to clear volume options */
|
||||
@@ -281,24 +390,24 @@ int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_op
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_CLEAR_VOLOPT,
|
||||
volopt, sizeof(*volopt), NULL, 0);
|
||||
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_CLEAR_VOLOPT,
|
||||
volopt, sizeof(*volopt), NULL, 0);
|
||||
}
|
||||
|
||||
int scoutfs_client_resize_devices(struct super_block *sb, struct scoutfs_net_resize_devices *nrd)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES,
|
||||
nrd, sizeof(*nrd), NULL, 0);
|
||||
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES,
|
||||
nrd, sizeof(*nrd), NULL, 0);
|
||||
}
|
||||
|
||||
int scoutfs_client_statfs(struct super_block *sb, struct scoutfs_net_statfs *nst)
|
||||
{
|
||||
struct client_info *client = SCOUTFS_SB(sb)->client_info;
|
||||
|
||||
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_STATFS,
|
||||
NULL, 0, nst, sizeof(*nst));
|
||||
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_STATFS,
|
||||
NULL, 0, nst, sizeof(*nst));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -646,8 +755,12 @@ void scoutfs_client_destroy(struct super_block *sb)
|
||||
client_farewell_response,
|
||||
NULL, NULL);
|
||||
if (ret == 0) {
|
||||
wait_for_completion(&client->farewell_comp);
|
||||
ret = client->farewell_error;
|
||||
if (!wait_for_completion_timeout(&client->farewell_comp,
|
||||
120 * HZ)) {
|
||||
ret = -ETIMEDOUT;
|
||||
} else {
|
||||
ret = client->farewell_error;
|
||||
}
|
||||
}
|
||||
if (ret) {
|
||||
scoutfs_inc_counter(sb, client_farewell_error);
|
||||
@@ -661,10 +774,16 @@ void scoutfs_client_destroy(struct super_block *sb)
|
||||
/* make sure worker isn't using the conn */
|
||||
cancel_delayed_work_sync(&client->connect_dwork);
|
||||
|
||||
/* make racing conn use explode */
|
||||
/*
|
||||
* Drain the conn's workers before nulling client->conn. In-flight
|
||||
* proc_workers dispatch request handlers that call back into client
|
||||
* response helpers (e.g. scoutfs_client_lock_recover_response) which
|
||||
* read client->conn; nulling it first races with those workers and
|
||||
* causes submit_send to dereference a NULL conn->lock.
|
||||
*/
|
||||
conn = client->conn;
|
||||
client->conn = NULL;
|
||||
scoutfs_net_free_conn(sb, conn);
|
||||
client->conn = NULL;
|
||||
|
||||
if (client->workq)
|
||||
destroy_workqueue(client->workq);
|
||||
|
||||
@@ -9,18 +9,28 @@ int scoutfs_client_commit_log_trees(struct super_block *sb,
|
||||
struct scoutfs_log_trees *lt);
|
||||
int scoutfs_client_get_roots(struct super_block *sb,
|
||||
struct scoutfs_net_roots *roots);
|
||||
int scoutfs_client_get_roots_timeout(struct super_block *sb,
|
||||
struct scoutfs_net_roots *roots,
|
||||
unsigned long timeout_jiffies);
|
||||
u64 *scoutfs_client_bulk_alloc(struct super_block *sb);
|
||||
int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq);
|
||||
int scoutfs_client_lock_request(struct super_block *sb,
|
||||
struct scoutfs_net_lock *nl);
|
||||
struct scoutfs_net_lock *nl,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_client_lock_response(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_net_lock *nl);
|
||||
int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_net_lock_recover *nlr);
|
||||
int scoutfs_client_srch_get_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *sc);
|
||||
int scoutfs_client_srch_get_compact_timeout(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *sc,
|
||||
unsigned long timeout_jiffies);
|
||||
int scoutfs_client_srch_commit_compact(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *res);
|
||||
int scoutfs_client_srch_commit_compact_timeout(struct super_block *sb,
|
||||
struct scoutfs_srch_compact *res,
|
||||
unsigned long timeout_jiffies);
|
||||
int scoutfs_client_get_log_merge(struct super_block *sb,
|
||||
struct scoutfs_log_merge_request *req);
|
||||
int scoutfs_client_commit_log_merge(struct super_block *sb,
|
||||
@@ -29,6 +39,9 @@ int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
|
||||
struct scoutfs_open_ino_map *map);
|
||||
int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
|
||||
struct scoutfs_open_ino_map *map);
|
||||
int scoutfs_client_open_ino_map_timeout(struct super_block *sb, u64 group_nr,
|
||||
struct scoutfs_open_ino_map *map,
|
||||
unsigned long timeout_jiffies);
|
||||
int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
|
||||
int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
|
||||
int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
|
||||
|
||||
@@ -62,6 +62,7 @@
|
||||
EXPAND_COUNTER(btree_walk) \
|
||||
EXPAND_COUNTER(btree_walk_restart) \
|
||||
EXPAND_COUNTER(client_farewell_error) \
|
||||
EXPAND_COUNTER(client_rpc_timeout) \
|
||||
EXPAND_COUNTER(corrupt_btree_block_level) \
|
||||
EXPAND_COUNTER(corrupt_btree_no_child_ref) \
|
||||
EXPAND_COUNTER(corrupt_dirent_backref_name_len) \
|
||||
@@ -138,6 +139,7 @@
|
||||
EXPAND_COUNTER(lock_lock_error) \
|
||||
EXPAND_COUNTER(lock_nonblock_eagain) \
|
||||
EXPAND_COUNTER(lock_recover_request) \
|
||||
EXPAND_COUNTER(lock_request_failed) \
|
||||
EXPAND_COUNTER(lock_shrink_attempted) \
|
||||
EXPAND_COUNTER(lock_shrink_request_failed) \
|
||||
EXPAND_COUNTER(lock_unlock) \
|
||||
|
||||
@@ -239,9 +239,9 @@ static int forest_read_items(struct super_block *sb, struct scoutfs_key *key, u6
|
||||
* to reset their state and retry with a newer version of the btrees.
|
||||
*/
|
||||
int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
|
||||
struct scoutfs_key *key, struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg)
|
||||
u64 merge_input_seq, struct scoutfs_key *key,
|
||||
struct scoutfs_key *bloom_key, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end, scoutfs_forest_item_cb cb, void *arg)
|
||||
{
|
||||
struct forest_read_items_data rid = {
|
||||
.cb = cb,
|
||||
@@ -317,15 +317,17 @@ int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_r
|
||||
|
||||
scoutfs_inc_counter(sb, forest_bloom_pass);
|
||||
|
||||
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED))
|
||||
rid.fic |= FIC_FINALIZED;
|
||||
if ((le64_to_cpu(lt.flags) & SCOUTFS_LOG_TREES_FINALIZED) &&
|
||||
(merge_input_seq == 0 ||
|
||||
le64_to_cpu(lt.finalize_seq) < merge_input_seq))
|
||||
rid.fic |= FIC_MERGE_INPUT;
|
||||
|
||||
ret = scoutfs_btree_read_items(sb, <.item_root, key, start,
|
||||
end, forest_read_items, &rid);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
rid.fic &= ~FIC_FINALIZED;
|
||||
rid.fic &= ~FIC_MERGE_INPUT;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
@@ -345,7 +347,7 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
if (ret == 0)
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, key, bloom_key, start, end,
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, 0, key, bloom_key, start, end,
|
||||
cb, arg);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ struct scoutfs_lock;
|
||||
/* caller gives an item to the callback */
|
||||
enum {
|
||||
FIC_FS_ROOT = (1 << 0),
|
||||
FIC_FINALIZED = (1 << 1),
|
||||
FIC_MERGE_INPUT = (1 << 1),
|
||||
};
|
||||
typedef int (*scoutfs_forest_item_cb)(struct super_block *sb, struct scoutfs_key *key, u64 seq,
|
||||
u8 flags, void *val, int val_len, int fic, void *arg);
|
||||
@@ -25,9 +25,9 @@ int scoutfs_forest_read_items(struct super_block *sb,
|
||||
struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_read_items_roots(struct super_block *sb, struct scoutfs_net_roots *roots,
|
||||
struct scoutfs_key *key, struct scoutfs_key *bloom_key,
|
||||
struct scoutfs_key *start, struct scoutfs_key *end,
|
||||
scoutfs_forest_item_cb cb, void *arg);
|
||||
u64 merge_input_seq, struct scoutfs_key *key,
|
||||
struct scoutfs_key *bloom_key, struct scoutfs_key *start,
|
||||
struct scoutfs_key *end, scoutfs_forest_item_cb cb, void *arg);
|
||||
int scoutfs_forest_set_bloom_bits(struct super_block *sb,
|
||||
struct scoutfs_lock *lock);
|
||||
void scoutfs_forest_set_max_seq(struct super_block *sb, u64 max_seq);
|
||||
|
||||
@@ -2074,6 +2074,14 @@ void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Generous per-RPC bound for the idempotent orphan scan worker. A
|
||||
* server that hasn't answered in this long is assumed to be broken;
|
||||
* dropping the request lets the scan reschedule instead of blocking
|
||||
* forever.
|
||||
*/
|
||||
#define ORPHAN_SCAN_RPC_TIMEOUT (5 * 60 * HZ)
|
||||
|
||||
/*
|
||||
* Find and delete inodes whose only remaining reference is the
|
||||
* persistent orphan item that was created as they were unlinked.
|
||||
@@ -2128,7 +2136,7 @@ static void inode_orphan_scan_worker(struct work_struct *work)
|
||||
init_orphan_key(&last, U64_MAX);
|
||||
omap.args.group_nr = cpu_to_le64(U64_MAX);
|
||||
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
ret = scoutfs_client_get_roots_timeout(sb, &roots, ORPHAN_SCAN_RPC_TIMEOUT);
|
||||
if (ret)
|
||||
goto out;
|
||||
|
||||
@@ -2169,7 +2177,8 @@ static void inode_orphan_scan_worker(struct work_struct *work)
|
||||
scoutfs_omap_calc_group_nrs(ino, &group_nr, &bit_nr);
|
||||
|
||||
if (le64_to_cpu(omap.args.group_nr) != group_nr) {
|
||||
ret = scoutfs_client_open_ino_map(sb, group_nr, &omap);
|
||||
ret = scoutfs_client_open_ino_map_timeout(sb, group_nr, &omap,
|
||||
ORPHAN_SCAN_RPC_TIMEOUT);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
105
kmod/src/lock.c
105
kmod/src/lock.c
@@ -71,6 +71,8 @@
|
||||
* relative to that lock state we resend.
|
||||
*/
|
||||
|
||||
#define CLIENT_LOCK_WAIT_TIMEOUT (60 * HZ)
|
||||
|
||||
/*
|
||||
* allocated per-super, freed on unmount.
|
||||
*/
|
||||
@@ -157,6 +159,33 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove all coverage items from the lock to tell users that their
|
||||
* cache is stale. This is lock-internal bookkeeping that is safe to
|
||||
* call during shutdown and unmount. The unconditional unlock/relock
|
||||
* of cov_list_lock avoids sparse warnings from unbalanced locking in
|
||||
* the trylock failure path.
|
||||
*/
|
||||
static void lock_clear_coverage(struct super_block *sb,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
struct scoutfs_lock_coverage *cov;
|
||||
|
||||
spin_lock(&lock->cov_list_lock);
|
||||
while ((cov = list_first_entry_or_null(&lock->cov_list,
|
||||
struct scoutfs_lock_coverage, head))) {
|
||||
if (spin_trylock(&cov->cov_lock)) {
|
||||
list_del_init(&cov->head);
|
||||
cov->lock = NULL;
|
||||
spin_unlock(&cov->cov_lock);
|
||||
scoutfs_inc_counter(sb, lock_invalidate_coverage);
|
||||
}
|
||||
spin_unlock(&lock->cov_list_lock);
|
||||
spin_lock(&lock->cov_list_lock);
|
||||
}
|
||||
spin_unlock(&lock->cov_list_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Invalidate caches associated with this lock. Either we're
|
||||
* invalidating a write to a read or we're invalidating to null. We
|
||||
@@ -166,7 +195,6 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
|
||||
static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
enum scoutfs_lock_mode prev, enum scoutfs_lock_mode mode)
|
||||
{
|
||||
struct scoutfs_lock_coverage *cov;
|
||||
u64 ino, last;
|
||||
int ret = 0;
|
||||
|
||||
@@ -190,24 +218,7 @@ static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
|
||||
/* have to invalidate if we're not in the only usable case */
|
||||
if (!(prev == SCOUTFS_LOCK_WRITE && mode == SCOUTFS_LOCK_READ)) {
|
||||
/*
|
||||
* Remove cov items to tell users that their cache is
|
||||
* stale. The unlock pattern comes from avoiding bad
|
||||
* sparse warnings when taking else in a failed trylock.
|
||||
*/
|
||||
spin_lock(&lock->cov_list_lock);
|
||||
while ((cov = list_first_entry_or_null(&lock->cov_list,
|
||||
struct scoutfs_lock_coverage, head))) {
|
||||
if (spin_trylock(&cov->cov_lock)) {
|
||||
list_del_init(&cov->head);
|
||||
cov->lock = NULL;
|
||||
spin_unlock(&cov->cov_lock);
|
||||
scoutfs_inc_counter(sb, lock_invalidate_coverage);
|
||||
}
|
||||
spin_unlock(&lock->cov_list_lock);
|
||||
spin_lock(&lock->cov_list_lock);
|
||||
}
|
||||
spin_unlock(&lock->cov_list_lock);
|
||||
lock_clear_coverage(sb, lock);
|
||||
|
||||
/* invalidate inodes after removing coverage so drop/evict aren't covered */
|
||||
if (lock->start.sk_zone == SCOUTFS_FS_ZONE) {
|
||||
@@ -643,6 +654,33 @@ int scoutfs_lock_grant_response(struct super_block *sb,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* The lock request we sent to the server was dropped before we could
|
||||
* receive a grant response. This happens when the client reconnects to
|
||||
* a new server and completes pending requests with an error, since the
|
||||
* old server's pending-request state was reclaimed at fence time.
|
||||
*
|
||||
* Clear request_pending so that a waiter in lock_key_range re-evaluates
|
||||
* and sends a fresh request to the new server, and symmetrically put
|
||||
* the lock so shrink's lru state matches the grant_response path.
|
||||
*/
|
||||
void scoutfs_lock_request_failed(struct super_block *sb,
|
||||
struct scoutfs_lock *lock)
|
||||
{
|
||||
DECLARE_LOCK_INFO(sb, linfo);
|
||||
|
||||
scoutfs_inc_counter(sb, lock_request_failed);
|
||||
|
||||
spin_lock(&linfo->lock);
|
||||
|
||||
BUG_ON(!lock->request_pending);
|
||||
lock->request_pending = 0;
|
||||
wake_up(&lock->waitq);
|
||||
put_lock(linfo, lock);
|
||||
|
||||
spin_unlock(&linfo->lock);
|
||||
}
|
||||
|
||||
struct inv_req {
|
||||
struct list_head head;
|
||||
struct scoutfs_lock *lock;
|
||||
@@ -714,10 +752,13 @@ static void lock_invalidate_worker(struct work_struct *work)
|
||||
ireq = list_first_entry(&lock->inv_list, struct inv_req, head);
|
||||
nl = &ireq->nl;
|
||||
|
||||
/* only lock protocol, inv can't call subsystems after shutdown */
|
||||
if (!linfo->shutdown) {
|
||||
/* only lock protocol, inv can't call subsystems after shutdown or unmount */
|
||||
if (!linfo->shutdown && !scoutfs_unmounting(sb)) {
|
||||
ret = lock_invalidate(sb, lock, nl->old_mode, nl->new_mode);
|
||||
BUG_ON(ret < 0 && ret != -ENOLINK);
|
||||
} else {
|
||||
lock_clear_coverage(sb, lock);
|
||||
scoutfs_item_invalidate(sb, &lock->start, &lock->end);
|
||||
}
|
||||
|
||||
/* respond with the key and modes from the request, server might have died */
|
||||
@@ -922,7 +963,7 @@ static bool try_shrink_lock(struct super_block *sb, struct lock_info *linfo, boo
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
if (lock) {
|
||||
ret = scoutfs_client_lock_request(sb, &nl);
|
||||
ret = scoutfs_client_lock_request(sb, &nl, lock);
|
||||
if (ret < 0) {
|
||||
scoutfs_inc_counter(sb, lock_shrink_request_failed);
|
||||
|
||||
@@ -953,6 +994,9 @@ static bool lock_wait_cond(struct super_block *sb, struct scoutfs_lock *lock,
|
||||
!lock->request_pending;
|
||||
spin_unlock(&linfo->lock);
|
||||
|
||||
if (!wake)
|
||||
wake = scoutfs_unmounting(sb);
|
||||
|
||||
if (!wake)
|
||||
scoutfs_inc_counter(sb, lock_wait);
|
||||
|
||||
@@ -997,8 +1041,10 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
|
||||
return -EINVAL;
|
||||
|
||||
/* maybe catch _setup() and _shutdown order mistakes */
|
||||
if (WARN_ON_ONCE(!linfo || linfo->shutdown))
|
||||
if (!linfo || linfo->shutdown) {
|
||||
WARN_ON_ONCE(!scoutfs_unmounting(sb));
|
||||
return -ENOLCK;
|
||||
}
|
||||
|
||||
/* have to lock before entering transactions */
|
||||
if (WARN_ON_ONCE(scoutfs_trans_held()))
|
||||
@@ -1024,6 +1070,11 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
|
||||
break;
|
||||
}
|
||||
|
||||
if (scoutfs_unmounting(sb)) {
|
||||
ret = -ESHUTDOWN;
|
||||
break;
|
||||
}
|
||||
|
||||
/* the fast path where we can use the granted mode */
|
||||
if (lock_modes_match(lock->mode, mode)) {
|
||||
lock_inc_count(lock->users, mode);
|
||||
@@ -1053,7 +1104,7 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
|
||||
nl.old_mode = lock->mode;
|
||||
nl.new_mode = mode;
|
||||
|
||||
ret = scoutfs_client_lock_request(sb, &nl);
|
||||
ret = scoutfs_client_lock_request(sb, &nl, lock);
|
||||
if (ret) {
|
||||
spin_lock(&linfo->lock);
|
||||
lock->request_pending = 0;
|
||||
@@ -1067,8 +1118,9 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
|
||||
if (flags & SCOUTFS_LKF_INTERRUPTIBLE) {
|
||||
ret = wait_event_interruptible(lock->waitq,
|
||||
lock_wait_cond(sb, lock, mode));
|
||||
} else {
|
||||
wait_event(lock->waitq, lock_wait_cond(sb, lock, mode));
|
||||
} else if (!wait_event_timeout(lock->waitq,
|
||||
lock_wait_cond(sb, lock, mode),
|
||||
CLIENT_LOCK_WAIT_TIMEOUT)) {
|
||||
ret = 0;
|
||||
}
|
||||
|
||||
@@ -1650,6 +1702,7 @@ void scoutfs_lock_destroy(struct super_block *sb)
|
||||
list_del_init(&lock->inv_head);
|
||||
lock->invalidate_pending = 0;
|
||||
}
|
||||
lock_clear_coverage(sb, lock);
|
||||
lock_remove(linfo, lock);
|
||||
lock_free(linfo, lock);
|
||||
}
|
||||
|
||||
@@ -60,6 +60,8 @@ struct scoutfs_lock_coverage {
|
||||
|
||||
int scoutfs_lock_grant_response(struct super_block *sb,
|
||||
struct scoutfs_net_lock *nl);
|
||||
void scoutfs_lock_request_failed(struct super_block *sb,
|
||||
struct scoutfs_lock *lock);
|
||||
int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
|
||||
struct scoutfs_net_lock *nl);
|
||||
int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,
|
||||
|
||||
145
kmod/src/net.c
145
kmod/src/net.c
@@ -1750,8 +1750,10 @@ void scoutfs_net_client_greeting(struct super_block *sb,
|
||||
bool new_server)
|
||||
{
|
||||
struct net_info *ninf = SCOUTFS_SB(sb)->net_info;
|
||||
scoutfs_net_response_t resp_func;
|
||||
struct message_send *msend;
|
||||
struct message_send *tmp;
|
||||
void *resp_data;
|
||||
|
||||
/* only called on client connections :/ */
|
||||
BUG_ON(conn->listening_conn);
|
||||
@@ -1760,10 +1762,32 @@ void scoutfs_net_client_greeting(struct super_block *sb,
|
||||
|
||||
if (new_server) {
|
||||
atomic64_set(&conn->recv_seq, 0);
|
||||
|
||||
/* drop stale responses; old server's state is gone */
|
||||
list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head){
|
||||
if (nh_is_response(&msend->nh))
|
||||
free_msend(ninf, conn, msend);
|
||||
}
|
||||
|
||||
/*
|
||||
* Complete pending requests with -ECONNRESET. Any state
|
||||
* they depended on in the old server was reclaimed at
|
||||
* fence time, so resending is wrong. Callers re-issue on
|
||||
* the new server if they still care.
|
||||
*/
|
||||
while ((msend = list_first_entry_or_null(&conn->resend_queue,
|
||||
struct message_send, head))) {
|
||||
if (nh_is_response(&msend->nh))
|
||||
break;
|
||||
resp_func = msend->resp_func;
|
||||
resp_data = msend->resp_data;
|
||||
free_msend(ninf, conn, msend);
|
||||
spin_unlock(&conn->lock);
|
||||
|
||||
call_resp_func(sb, conn, resp_func, resp_data, NULL, 0, -ECONNRESET);
|
||||
|
||||
spin_lock(&conn->lock);
|
||||
}
|
||||
}
|
||||
|
||||
set_valid_greeting(conn);
|
||||
@@ -1990,8 +2014,9 @@ static int sync_response(struct super_block *sb,
|
||||
* buffer. Errors returned can come from the remote request processing
|
||||
* or local failure to send.
|
||||
*
|
||||
* The wait for the response is interruptible and can return
|
||||
* -ERESTARTSYS if it is interrupted.
|
||||
* The wait for the response uses a 60 second timeout loop that
|
||||
* checks for unmount, returning -ESHUTDOWN if the mount is
|
||||
* being torn down.
|
||||
*
|
||||
* -EOVERFLOW is returned if the response message's data_length doesn't
|
||||
* match the caller's resp_len buffer.
|
||||
@@ -2002,6 +2027,7 @@ int scoutfs_net_sync_request(struct super_block *sb,
|
||||
void *resp, size_t resp_len)
|
||||
{
|
||||
struct sync_request_completion sreq;
|
||||
struct message_send *msend;
|
||||
int ret;
|
||||
u64 id;
|
||||
|
||||
@@ -2014,13 +2040,124 @@ int scoutfs_net_sync_request(struct super_block *sb,
|
||||
sync_response, &sreq, &id);
|
||||
|
||||
if (ret == 0) {
|
||||
wait_for_completion(&sreq.comp);
|
||||
ret = sreq.error;
|
||||
while (!wait_for_completion_timeout(&sreq.comp, 60 * HZ)) {
|
||||
if (scoutfs_unmounting(sb)) {
|
||||
ret = -ESHUTDOWN;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (ret == -ESHUTDOWN) {
|
||||
spin_lock(&conn->lock);
|
||||
msend = find_request(conn, cmd, id);
|
||||
if (msend)
|
||||
queue_dead_free(conn, msend);
|
||||
spin_unlock(&conn->lock);
|
||||
} else {
|
||||
ret = sreq.error;
|
||||
}
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/*
|
||||
* A bounded-wait variant of sync_request for idempotent background
|
||||
* workers that must reschedule instead of blocking indefinitely on an
|
||||
* unresponsive server. Returns -ETIMEDOUT if the response doesn't
|
||||
* arrive within timeout_jiffies; the caller then treats it like any
|
||||
* other RPC failure and retries on its normal reschedule cadence.
|
||||
*
|
||||
* Response state lives in a refcounted heap allocation rather than on
|
||||
* the caller's stack so a late callback can't scribble into freed
|
||||
* memory if we give up waiting. On timeout we race with an arriving
|
||||
* response for the msend: if find_request wins we queue_dead_free and
|
||||
* the callback won't fire (we drop its ref); otherwise the callback is
|
||||
* already running so we wait for it to complete before returning.
|
||||
*/
|
||||
struct bounded_sync {
|
||||
struct completion comp;
|
||||
void *resp;
|
||||
unsigned int resp_len;
|
||||
int error;
|
||||
atomic_t refs;
|
||||
};
|
||||
|
||||
static void bounded_sync_put(struct bounded_sync *bs)
|
||||
{
|
||||
if (atomic_dec_and_test(&bs->refs))
|
||||
kfree(bs);
|
||||
}
|
||||
|
||||
static int bounded_sync_response(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
void *resp, unsigned int resp_len,
|
||||
int error, void *data)
|
||||
{
|
||||
struct bounded_sync *bs = data;
|
||||
|
||||
if (error == 0 && resp_len != bs->resp_len)
|
||||
error = -EMSGSIZE;
|
||||
|
||||
if (error)
|
||||
bs->error = error;
|
||||
else if (resp_len)
|
||||
memcpy(bs->resp, resp, resp_len);
|
||||
|
||||
complete(&bs->comp);
|
||||
bounded_sync_put(bs);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int scoutfs_net_sync_request_timeout(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, void *arg, unsigned arg_len,
|
||||
void *resp, size_t resp_len,
|
||||
unsigned long timeout_jiffies)
|
||||
{
|
||||
struct message_send *msend;
|
||||
struct bounded_sync *bs;
|
||||
int ret;
|
||||
u64 id;
|
||||
|
||||
bs = kzalloc(sizeof(*bs), GFP_NOFS);
|
||||
if (!bs)
|
||||
return -ENOMEM;
|
||||
init_completion(&bs->comp);
|
||||
bs->resp = resp;
|
||||
bs->resp_len = resp_len;
|
||||
bs->error = 0;
|
||||
atomic_set(&bs->refs, 2);
|
||||
|
||||
ret = scoutfs_net_submit_request(sb, conn, cmd, arg, arg_len,
|
||||
bounded_sync_response, bs, &id);
|
||||
if (ret) {
|
||||
bounded_sync_put(bs);
|
||||
bounded_sync_put(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
if (wait_for_completion_timeout(&bs->comp, timeout_jiffies) == 0) {
|
||||
scoutfs_inc_counter(sb, client_rpc_timeout);
|
||||
|
||||
spin_lock(&conn->lock);
|
||||
msend = find_request(conn, cmd, id);
|
||||
if (msend)
|
||||
queue_dead_free(conn, msend);
|
||||
spin_unlock(&conn->lock);
|
||||
|
||||
if (msend)
|
||||
bounded_sync_put(bs);
|
||||
else
|
||||
wait_for_completion(&bs->comp);
|
||||
ret = -ETIMEDOUT;
|
||||
} else {
|
||||
ret = bs->error;
|
||||
}
|
||||
|
||||
bounded_sync_put(bs);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static void net_tseq_show_conn(struct seq_file *m,
|
||||
struct scoutfs_tseq_entry *ent)
|
||||
{
|
||||
|
||||
@@ -150,6 +150,11 @@ int scoutfs_net_sync_request(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, void *arg, unsigned arg_len,
|
||||
void *resp, size_t resp_len);
|
||||
int scoutfs_net_sync_request_timeout(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, void *arg, unsigned arg_len,
|
||||
void *resp, size_t resp_len,
|
||||
unsigned long timeout_jiffies);
|
||||
int scoutfs_net_response(struct super_block *sb,
|
||||
struct scoutfs_net_connection *conn,
|
||||
u8 cmd, u64 id, int error, void *resp, u16 resp_len);
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "totl.h"
|
||||
#include "util.h"
|
||||
#include "quota.h"
|
||||
#include "trans.h"
|
||||
#include "counters.h"
|
||||
#include "scoutfs_trace.h"
|
||||
|
||||
@@ -1086,6 +1087,10 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
ret = scoutfs_hold_trans(sb, true);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
down_write(&qtinf->rwsem);
|
||||
|
||||
if (is_add) {
|
||||
@@ -1095,28 +1100,30 @@ int scoutfs_quota_mod_rule(struct super_block *sb, bool is_add,
|
||||
else if (ret == 0)
|
||||
ret = -EEXIST;
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
goto release;
|
||||
|
||||
rule_to_rule_val(&rv, &rule);
|
||||
ret = scoutfs_item_create(sb, &key, &rv, sizeof(rv), lock);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
goto release;
|
||||
|
||||
} else {
|
||||
ret = find_rule(sb, &rule, &key, lock) ?:
|
||||
scoutfs_item_delete(sb, &key, lock);
|
||||
if (ret < 0)
|
||||
goto unlock;
|
||||
goto release;
|
||||
}
|
||||
|
||||
scoutfs_quota_invalidate(sb);
|
||||
ret = 0;
|
||||
|
||||
unlock:
|
||||
release:
|
||||
up_write(&qtinf->rwsem);
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
scoutfs_release_trans(sb);
|
||||
|
||||
out:
|
||||
scoutfs_unlock(sb, lock, SCOUTFS_LOCK_WRITE);
|
||||
|
||||
if (is_add)
|
||||
trace_scoutfs_quota_add_rule(sb, &rule, ret);
|
||||
else
|
||||
|
||||
@@ -638,7 +638,7 @@ static void scoutfs_server_commit_func(struct work_struct *work)
|
||||
ret = scoutfs_alloc_empty_list(sb, &server->alloc, &server->wri,
|
||||
server->meta_freed,
|
||||
server->other_freed);
|
||||
if (ret) {
|
||||
if (ret && ret != -ENOLINK) {
|
||||
scoutfs_err(sb, "server error emptying freed: %d", ret);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -95,6 +95,13 @@ struct srch_info {
|
||||
*/
|
||||
#define SRCH_COMPACT_DIRTY_LIMIT_BYTES (32 * 1024 * 1024)
|
||||
|
||||
/*
|
||||
* Generous per-RPC bound for the idempotent compact worker. A server
|
||||
* that hasn't answered in this long is assumed to be broken; dropping
|
||||
* the request lets the worker reschedule instead of blocking forever.
|
||||
*/
|
||||
#define COMPACT_RPC_TIMEOUT (5 * 60 * HZ)
|
||||
|
||||
static int sre_cmp(const struct scoutfs_srch_entry *a,
|
||||
const struct scoutfs_srch_entry *b)
|
||||
{
|
||||
@@ -2256,7 +2263,8 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
|
||||
|
||||
scoutfs_block_writer_init(sb, &wri);
|
||||
|
||||
ret = scoutfs_client_srch_get_compact(sb, sc);
|
||||
ret = scoutfs_client_srch_get_compact_timeout(sb, sc,
|
||||
COMPACT_RPC_TIMEOUT);
|
||||
if (ret >= 0)
|
||||
trace_scoutfs_srch_compact_client_recv(sb, sc);
|
||||
if (ret < 0 || sc->nr == 0)
|
||||
@@ -2287,7 +2295,8 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
|
||||
sc->flags |= ret < 0 ? SCOUTFS_SRCH_COMPACT_FLAG_ERROR : 0;
|
||||
|
||||
trace_scoutfs_srch_compact_client_send(sb, sc);
|
||||
err = scoutfs_client_srch_commit_compact(sb, sc);
|
||||
err = scoutfs_client_srch_commit_compact_timeout(sb, sc,
|
||||
COMPACT_RPC_TIMEOUT);
|
||||
if (err < 0 && ret == 0)
|
||||
ret = err;
|
||||
out:
|
||||
|
||||
@@ -30,6 +30,11 @@ void scoutfs_totl_merge_init(struct scoutfs_totl_merging *merg)
|
||||
memset(merg, 0, sizeof(struct scoutfs_totl_merging));
|
||||
}
|
||||
|
||||
/*
|
||||
* bin the incoming merge inputs so that we can resolve delta items
|
||||
* properly. Finalized logs that are merge inputs are kept separately
|
||||
* from those that are not.
|
||||
*/
|
||||
void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
|
||||
u64 seq, u8 flags, void *val, int val_len, int fic)
|
||||
{
|
||||
@@ -39,10 +44,10 @@ void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
|
||||
merg->fs_seq = seq;
|
||||
merg->fs_total = le64_to_cpu(tval->total);
|
||||
merg->fs_count = le64_to_cpu(tval->count);
|
||||
} else if (fic & FIC_FINALIZED) {
|
||||
merg->fin_seq = seq;
|
||||
merg->fin_total += le64_to_cpu(tval->total);
|
||||
merg->fin_count += le64_to_cpu(tval->count);
|
||||
} else if (fic & FIC_MERGE_INPUT) {
|
||||
merg->inp_seq = seq;
|
||||
merg->inp_total += le64_to_cpu(tval->total);
|
||||
merg->inp_count += le64_to_cpu(tval->count);
|
||||
} else {
|
||||
merg->log_seq = seq;
|
||||
merg->log_total += le64_to_cpu(tval->total);
|
||||
@@ -53,15 +58,18 @@ void scoutfs_totl_merge_contribute(struct scoutfs_totl_merging *merg,
|
||||
/*
|
||||
* .totl. item merging has to be careful because the log btree merging
|
||||
* code can write partial results to the fs_root. This means that a
|
||||
* reader can see both cases where new finalized logs should be applied
|
||||
* to the old fs items and where old finalized logs have already been
|
||||
* applied to the partially merged fs items. Currently active logged
|
||||
* items are always applied on top of all cases.
|
||||
* reader can see both cases where merge input deltas should be applied
|
||||
* to the old fs items and where they have already been applied to the
|
||||
* partially merged fs items.
|
||||
*
|
||||
* Only finalized log trees that are inputs to the current merge cycle
|
||||
* are tracked in the inp_ bucket. Finalized trees that aren't merge
|
||||
* inputs and active log trees are always applied unconditionally since
|
||||
* they cannot be in fs_root.
|
||||
*
|
||||
* These cases are differentiated with a combination of sequence numbers
|
||||
* in items, the count of contributing xattrs, and a flag
|
||||
* differentiating finalized and active logged items. This lets us
|
||||
* recognize all cases, including when finalized logs were merged and
|
||||
* in items and the count of contributing xattrs. This lets us
|
||||
* recognize all cases, including when merge inputs were merged and
|
||||
* deleted the fs item.
|
||||
*/
|
||||
void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total, __u64 *count)
|
||||
@@ -75,14 +83,14 @@ void scoutfs_totl_merge_resolve(struct scoutfs_totl_merging *merg, __u64 *total,
|
||||
*count = merg->fs_count;
|
||||
}
|
||||
|
||||
/* apply finalized logs if they're newer or creating */
|
||||
if (((merg->fs_seq != 0) && (merg->fin_seq > merg->fs_seq)) ||
|
||||
((merg->fs_seq == 0) && (merg->fin_count > 0))) {
|
||||
*total += merg->fin_total;
|
||||
*count += merg->fin_count;
|
||||
/* apply merge input deltas if they're newer or creating */
|
||||
if (((merg->fs_seq != 0) && (merg->inp_seq > merg->fs_seq)) ||
|
||||
((merg->fs_seq == 0) && (merg->inp_count > 0))) {
|
||||
*total += merg->inp_total;
|
||||
*count += merg->inp_count;
|
||||
}
|
||||
|
||||
/* always apply active logs which must be newer than fs and finalized */
|
||||
/* always apply non-input finalized and active logs */
|
||||
if (merg->log_seq > 0) {
|
||||
*total += merg->log_total;
|
||||
*count += merg->log_count;
|
||||
|
||||
@@ -7,9 +7,9 @@ struct scoutfs_totl_merging {
|
||||
u64 fs_seq;
|
||||
u64 fs_total;
|
||||
u64 fs_count;
|
||||
u64 fin_seq;
|
||||
u64 fin_total;
|
||||
s64 fin_count;
|
||||
u64 inp_seq;
|
||||
u64 inp_total;
|
||||
s64 inp_count;
|
||||
u64 log_seq;
|
||||
u64 log_total;
|
||||
s64 log_count;
|
||||
|
||||
@@ -195,7 +195,8 @@ static int retry_forever(struct super_block *sb, int (*func)(struct super_block
|
||||
retrying = true;
|
||||
}
|
||||
|
||||
if (scoutfs_forcing_unmount(sb)) {
|
||||
if (scoutfs_forcing_unmount(sb) ||
|
||||
scoutfs_unmounting(sb)) {
|
||||
ret = -ENOLINK;
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -46,6 +46,7 @@ static char *names[] = {
|
||||
[SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE] = "srch_merge_stop_safe",
|
||||
[SCOUTFS_TRIGGER_STATFS_LOCK_PURGE] = "statfs_lock_purge",
|
||||
[SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE] = "reclaim_skip_finalize",
|
||||
[SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL] = "log_merge_force_partial",
|
||||
};
|
||||
|
||||
bool scoutfs_trigger_test_and_clear(struct super_block *sb, unsigned int t)
|
||||
|
||||
@@ -9,6 +9,7 @@ enum scoutfs_trigger {
|
||||
SCOUTFS_TRIGGER_SRCH_MERGE_STOP_SAFE,
|
||||
SCOUTFS_TRIGGER_STATFS_LOCK_PURGE,
|
||||
SCOUTFS_TRIGGER_RECLAIM_SKIP_FINALIZE,
|
||||
SCOUTFS_TRIGGER_LOG_MERGE_FORCE_PARTIAL,
|
||||
SCOUTFS_TRIGGER_NR,
|
||||
};
|
||||
|
||||
|
||||
@@ -95,6 +95,7 @@ struct wkic_info {
|
||||
/* block reading slow path */
|
||||
struct mutex roots_mutex;
|
||||
struct scoutfs_net_roots roots;
|
||||
u64 merge_input_seq;
|
||||
u64 roots_read_seq;
|
||||
ktime_t roots_expire;
|
||||
|
||||
@@ -805,29 +806,79 @@ static void free_page_list(struct super_block *sb, struct list_head *list)
|
||||
* read_seq number so that we can compare the age of the items in cached
|
||||
* pages. Only one request to refresh the roots is in progress at a
|
||||
* time. This is the slow path that's only used when the cache isn't
|
||||
* populated and the roots aren't cached. The root request is fast
|
||||
* enough, especially compared to the resulting item reading IO, that we
|
||||
* don't mind hiding it behind a trivial mutex.
|
||||
* populated and the roots aren't cached.
|
||||
*
|
||||
* We read roots directly from the on-disk superblock rather than
|
||||
* requesting them from the server so that we can also read the
|
||||
* log_merge btree from the same superblock. The merge status item
|
||||
* seq tells us which finalized log trees are inputs to the current
|
||||
* merge, which is needed to correctly resolve totl delta items.
|
||||
*/
|
||||
static int get_roots(struct super_block *sb, struct wkic_info *winf,
|
||||
struct scoutfs_net_roots *roots_ret, u64 *read_seq, bool force_new)
|
||||
static int refresh_roots(struct super_block *sb, struct wkic_info *winf)
|
||||
{
|
||||
struct scoutfs_super_block *super;
|
||||
struct scoutfs_log_merge_status *stat;
|
||||
SCOUTFS_BTREE_ITEM_REF(iref);
|
||||
struct scoutfs_key key;
|
||||
int ret;
|
||||
|
||||
super = kmalloc(sizeof(*super), GFP_NOFS);
|
||||
if (!super)
|
||||
return -ENOMEM;
|
||||
|
||||
ret = scoutfs_read_super(sb, super);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
winf->roots = (struct scoutfs_net_roots){
|
||||
.fs_root = super->fs_root,
|
||||
.logs_root = super->logs_root,
|
||||
.srch_root = super->srch_root,
|
||||
};
|
||||
|
||||
winf->merge_input_seq = 0;
|
||||
if (super->log_merge.ref.blkno) {
|
||||
scoutfs_key_set_zeros(&key);
|
||||
key.sk_zone = SCOUTFS_LOG_MERGE_STATUS_ZONE;
|
||||
ret = scoutfs_btree_lookup(sb, &super->log_merge, &key, &iref);
|
||||
if (ret == 0) {
|
||||
if (iref.val_len == sizeof(*stat)) {
|
||||
stat = iref.val;
|
||||
winf->merge_input_seq = le64_to_cpu(stat->seq);
|
||||
} else {
|
||||
ret = -EUCLEAN;
|
||||
}
|
||||
scoutfs_btree_put_iref(&iref);
|
||||
} else if (ret == -ENOENT) {
|
||||
ret = 0;
|
||||
}
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
winf->roots_read_seq++;
|
||||
winf->roots_expire = ktime_add_ms(ktime_get_raw(), WKIC_CACHE_LIFETIME_MS);
|
||||
out:
|
||||
kfree(super);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int get_roots(struct super_block *sb, struct wkic_info *winf,
|
||||
struct scoutfs_net_roots *roots_ret, u64 *merge_input_seq,
|
||||
u64 *read_seq, bool force_new)
|
||||
{
|
||||
struct scoutfs_net_roots roots;
|
||||
int ret;
|
||||
|
||||
mutex_lock(&winf->roots_mutex);
|
||||
|
||||
if (force_new || ktime_before(winf->roots_expire, ktime_get_raw())) {
|
||||
ret = scoutfs_client_get_roots(sb, &roots);
|
||||
ret = refresh_roots(sb, winf);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
|
||||
winf->roots = roots;
|
||||
winf->roots_read_seq++;
|
||||
winf->roots_expire = ktime_add_ms(ktime_get_raw(), WKIC_CACHE_LIFETIME_MS);
|
||||
}
|
||||
|
||||
*roots_ret = winf->roots;
|
||||
*merge_input_seq = winf->merge_input_seq;
|
||||
*read_seq = winf->roots_read_seq;
|
||||
ret = 0;
|
||||
out:
|
||||
@@ -870,24 +921,30 @@ static int insert_read_pages(struct super_block *sb, struct wkic_info *winf,
|
||||
struct scoutfs_key end;
|
||||
struct wkic_page *wpage;
|
||||
LIST_HEAD(pages);
|
||||
u64 read_seq;
|
||||
u64 merge_input_seq;
|
||||
u64 read_seq = 0;
|
||||
int ret;
|
||||
|
||||
ret = 0;
|
||||
retry_stale:
|
||||
ret = get_roots(sb, winf, &roots, &read_seq, ret == -ESTALE);
|
||||
ret = get_roots(sb, winf, &roots, &merge_input_seq, &read_seq, ret == -ESTALE);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
goto check_stale;
|
||||
|
||||
start = *range_start;
|
||||
end = *range_end;
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, key, range_start, &start, &end,
|
||||
read_items_cb, &root);
|
||||
ret = scoutfs_forest_read_items_roots(sb, &roots, merge_input_seq, key, range_start,
|
||||
&start, &end, read_items_cb, &root);
|
||||
trace_scoutfs_wkic_read_items(sb, key, &start, &end);
|
||||
check_stale:
|
||||
ret = scoutfs_block_check_stale(sb, ret, &saved, &roots.fs_root.ref, &roots.logs_root.ref);
|
||||
if (ret < 0) {
|
||||
if (ret == -ESTALE)
|
||||
if (ret == -ESTALE) {
|
||||
/* not safe to retry due to delta items, must restart clean */
|
||||
free_item_tree(&root);
|
||||
root = RB_ROOT;
|
||||
goto retry_stale;
|
||||
}
|
||||
goto out;
|
||||
}
|
||||
|
||||
|
||||
@@ -1265,6 +1265,7 @@ int scoutfs_xattr_drop(struct super_block *sb, u64 ino,
|
||||
ret = parse_indx_key(&tag_key, xat->name, xat->name_len, ino);
|
||||
if (ret < 0)
|
||||
goto out;
|
||||
scoutfs_xattr_set_indx_key_xid(&tag_key, le64_to_cpu(key.skx_id));
|
||||
}
|
||||
|
||||
if ((tgs.totl || tgs.indx) && locked_zone != tag_key.sk_zone) {
|
||||
|
||||
@@ -20,9 +20,6 @@ t_filter_fs()
|
||||
# [ 2687.691366] BUG: KASAN: stack-out-of-bounds in get_reg+0x1bc/0x230
|
||||
# ...
|
||||
# [ 2687.706220] ==================================================================
|
||||
# [ 2687.707284] Disabling lock debugging due to kernel taint
|
||||
#
|
||||
# That final lock debugging message may not be included.
|
||||
#
|
||||
ignore_harmless_unwind_kasan_stack_oob()
|
||||
{
|
||||
@@ -46,10 +43,6 @@ awk '
|
||||
saved=""
|
||||
}
|
||||
( in_soob == 2 && $0 ~ /==================================================================/ ) {
|
||||
in_soob = 3
|
||||
soob_nr = NR
|
||||
}
|
||||
( in_soob == 3 && NR > soob_nr && $0 !~ /Disabling lock debugging/ ) {
|
||||
in_soob = 0
|
||||
}
|
||||
( !in_soob ) { print $0 }
|
||||
@@ -61,6 +54,58 @@ awk '
|
||||
'
|
||||
}
|
||||
|
||||
#
|
||||
# in el97+, XFS can generate a spurious lockdep circular dependency
|
||||
# warning about reclaim. Fixed upstream in e.g. v5.7-rc4-129-g6dcde60efd94
|
||||
#
|
||||
ignore_harmless_xfs_lockdep_warning()
|
||||
{
|
||||
awk '
|
||||
BEGIN {
|
||||
in_block = 0
|
||||
block_nr = 0
|
||||
buf = ""
|
||||
}
|
||||
( !in_block && $0 ~ /======================================================/ ) {
|
||||
in_block = 1
|
||||
block_nr = NR
|
||||
buf = $0 "\n"
|
||||
next
|
||||
}
|
||||
( in_block == 1 && NR == (block_nr + 1) ) {
|
||||
if (match($0, /WARNING: possible circular locking dependency detected/) != 0) {
|
||||
in_block = 2
|
||||
buf = buf $0 "\n"
|
||||
} else {
|
||||
in_block = 0
|
||||
printf "%s", buf
|
||||
print $0
|
||||
buf = ""
|
||||
}
|
||||
next
|
||||
}
|
||||
( in_block == 2 ) {
|
||||
buf = buf $0 "\n"
|
||||
if ($0 ~ /<\/TASK>/) {
|
||||
if (buf ~ /xfs_(nondir_|dir_)?ilock_class/ && buf ~ /fs_reclaim/) {
|
||||
# known xfs lockdep false positive, discard
|
||||
} else {
|
||||
printf "%s", buf
|
||||
}
|
||||
in_block = 0
|
||||
buf = ""
|
||||
}
|
||||
next
|
||||
}
|
||||
{ print $0 }
|
||||
END {
|
||||
if (buf) {
|
||||
printf "%s", buf
|
||||
}
|
||||
}
|
||||
'
|
||||
}
|
||||
|
||||
#
|
||||
# Filter out expected messages. Putting messages here implies that
|
||||
# tests aren't relying on messages to discover failures.. they're
|
||||
@@ -176,6 +221,10 @@ t_filter_dmesg()
|
||||
# creating block devices may trigger this
|
||||
re="$re|block device autoloading is deprecated and will be removed."
|
||||
|
||||
# lockdep or kasan warnings can cause this
|
||||
re="$re|Disabling lock debugging due to kernel taint"
|
||||
|
||||
egrep -v "($re)" | \
|
||||
ignore_harmless_unwind_kasan_stack_oob
|
||||
ignore_harmless_unwind_kasan_stack_oob | \
|
||||
ignore_harmless_xfs_lockdep_warning
|
||||
}
|
||||
|
||||
54
tests/golden/basic-xattr-indx
Normal file
54
tests/golden/basic-xattr-indx
Normal file
@@ -0,0 +1,54 @@
|
||||
== testing invalid read-xattr-index arguments
|
||||
bad index position entry argument 'bad', it must be in the form "a.b.ino" where each value can be prefixed by '0' for octal or '0x' for hex
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
bad index position entry argument '1.2', it must be in the form "a.b.ino" where each value can be prefixed by '0' for octal or '0x' for hex
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
initial major index position '256' must be between 0 and 255, inclusive.
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
first index position 1.2.3 must be less than last index position 0.0.0
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
first index position 1.2.0 must be less than last index position 1.1.2
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
first index position 2.2.2 must be less than last index position 2.2.1
|
||||
scoutfs: read-xattr-index failed: Invalid argument (22)
|
||||
== testing invalid names
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/invalid: Numerical result out of range
|
||||
== testing boundary values
|
||||
0.0 found
|
||||
255.max found
|
||||
== indx xattr must have no value
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/noval: Invalid argument
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/noval: Invalid argument
|
||||
== set indx xattr and verify index entry
|
||||
found
|
||||
== setting same indx xattr again is a no-op
|
||||
found
|
||||
== removing non-existent indx xattr succeeds
|
||||
setfattr: /mnt/test/test/basic-xattr-indx/file: No such attribute
|
||||
still found
|
||||
== explicit xattr removal cleans up index entry
|
||||
== file deletion cleans up index entry
|
||||
found before delete
|
||||
== multiple indx xattrs on one file cleaned up by deletion
|
||||
entries before delete: 2
|
||||
entries after delete: 0
|
||||
== partial removal leaves other entries
|
||||
300 found
|
||||
== multiple files at same index position
|
||||
files at same position: 2
|
||||
surviving file found
|
||||
== cross-mount visibility
|
||||
found on mount 1
|
||||
== duplicate position deduplication
|
||||
entries for same position: 1
|
||||
3
tests/golden/totl-merge-read
Normal file
3
tests/golden/totl-merge-read
Normal file
@@ -0,0 +1,3 @@
|
||||
== setup
|
||||
expected 4681
|
||||
== cleanup
|
||||
@@ -694,8 +694,8 @@ for t in $tests; do
|
||||
if [ "$sts" == "$T_PASS_STATUS" ]; then
|
||||
dmesg | t_filter_dmesg > "$T_TMPDIR/dmesg.after"
|
||||
diff --old-line-format="" --unchanged-line-format="" \
|
||||
"$T_TMPDIR/dmesg.before" "$T_TMPDIR/dmesg.after" > \
|
||||
"$T_TMPDIR/dmesg.new"
|
||||
"$T_TMPDIR/dmesg.before" "$T_TMPDIR/dmesg.after" | \
|
||||
grep -v '^$' > "$T_TMPDIR/dmesg.new"
|
||||
|
||||
if [ -s "$T_TMPDIR/dmesg.new" ]; then
|
||||
message="unexpected messages in dmesg"
|
||||
|
||||
@@ -26,7 +26,9 @@ srch-basic-functionality.sh
|
||||
simple-xattr-unit.sh
|
||||
retention-basic.sh
|
||||
totl-xattr-tag.sh
|
||||
basic-xattr-indx.sh
|
||||
quota.sh
|
||||
totl-merge-read.sh
|
||||
lock-refleak.sh
|
||||
lock-shrink-consistency.sh
|
||||
lock-shrink-read-race.sh
|
||||
|
||||
143
tests/tests/basic-xattr-indx.sh
Normal file
143
tests/tests/basic-xattr-indx.sh
Normal file
@@ -0,0 +1,143 @@
|
||||
#
|
||||
# Test basic .indx. xattr tag functionality and index entry lifecycle
|
||||
#
|
||||
|
||||
t_require_commands touch rm setfattr scoutfs stat
|
||||
t_require_mounts 2
|
||||
|
||||
# query index from a specific mount, default mount 0
|
||||
read_xattr_index()
|
||||
{
|
||||
local nr="${1:-0}"
|
||||
local mnt="$(eval echo \$T_M$nr)"
|
||||
shift
|
||||
|
||||
sync
|
||||
echo 1 > $(t_debugfs_path $nr)/drop_weak_item_cache
|
||||
scoutfs read-xattr-index -p "$mnt" "$@"
|
||||
}
|
||||
|
||||
MAJOR=5
|
||||
MINOR=100
|
||||
|
||||
echo "== testing invalid read-xattr-index arguments"
|
||||
scoutfs read-xattr-index -p "$T_M0" bad 2>&1
|
||||
scoutfs read-xattr-index -p "$T_M0" 1.2 2>&1
|
||||
scoutfs read-xattr-index -p "$T_M0" 1.2.3 256.0.0 2>&1
|
||||
scoutfs read-xattr-index -p "$T_M0" 1.2.3 0.0.0 2>&1
|
||||
scoutfs read-xattr-index -p "$T_M0" 1.2.0 1.1.2 2>&1
|
||||
scoutfs read-xattr-index -p "$T_M0" 2.2.2 2.2.1 2>&1
|
||||
|
||||
echo "== testing invalid names"
|
||||
touch "$T_D0/invalid"
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.. "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test..$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR. "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.256.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.abc.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.abc "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.-1.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.-1 "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.18446744073709551616.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.$(printf 'x%.0s' $(seq 1 240)).$MAJOR.$MINOR "$T_D0/invalid" 2>&1 | t_filter_fs
|
||||
rm -f "$T_D0/invalid"
|
||||
|
||||
echo "== testing boundary values"
|
||||
touch "$T_D0/boundary"
|
||||
INO=$(stat -c "%i" "$T_D0/boundary")
|
||||
setfattr -n scoutfs.hide.indx.test.0.0 "$T_D0/boundary"
|
||||
read_xattr_index 0 0.0.0 0.0.-1 | awk '($3 == "'$INO'") {print "0.0 found"}'
|
||||
setfattr -x scoutfs.hide.indx.test.0.0 "$T_D0/boundary"
|
||||
setfattr -n scoutfs.hide.indx.test.255.18446744073709551615 "$T_D0/boundary"
|
||||
read_xattr_index 0 255.0.0 255.-1.-1 | awk '($3 == "'$INO'") {print "255.max found"}'
|
||||
setfattr -x scoutfs.hide.indx.test.255.18446744073709551615 "$T_D0/boundary"
|
||||
rm -f "$T_D0/boundary"
|
||||
|
||||
echo "== indx xattr must have no value"
|
||||
touch "$T_D0/noval"
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v "" "$T_D0/noval" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v 0 "$T_D0/noval" 2>&1 | t_filter_fs
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR -v 1 "$T_D0/noval" 2>&1 | t_filter_fs
|
||||
rm -f "$T_D0/noval"
|
||||
|
||||
echo "== set indx xattr and verify index entry"
|
||||
touch "$T_D0/file"
|
||||
INO=$(stat -c "%i" "$T_D0/file")
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found"}'
|
||||
|
||||
echo "== setting same indx xattr again is a no-op"
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found"}'
|
||||
|
||||
echo "== removing non-existent indx xattr succeeds"
|
||||
setfattr -x scoutfs.hide.indx.nonexistent.$MAJOR.999 "$T_D0/file" 2>&1 | t_filter_fs
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "still found"}'
|
||||
|
||||
echo "== explicit xattr removal cleans up index entry"
|
||||
setfattr -x scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file"
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan"}'
|
||||
rm -f "$T_D0/file"
|
||||
|
||||
echo "== file deletion cleans up index entry"
|
||||
touch "$T_D0/file2"
|
||||
INO=$(stat -c "%i" "$T_D0/file2")
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file2"
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found before delete"}'
|
||||
rm -f "$T_D0/file2"
|
||||
read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan after delete"}'
|
||||
|
||||
echo "== multiple indx xattrs on one file cleaned up by deletion"
|
||||
touch "$T_D0/file3"
|
||||
INO=$(stat -c "%i" "$T_D0/file3")
|
||||
setfattr -n scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/file3"
|
||||
setfattr -n scoutfs.hide.indx.b.$MAJOR.300 "$T_D0/file3"
|
||||
BEFORE=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
|
||||
echo "entries before delete: $BEFORE"
|
||||
rm -f "$T_D0/file3"
|
||||
AFTER=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
|
||||
echo "entries after delete: $AFTER"
|
||||
|
||||
echo "== partial removal leaves other entries"
|
||||
touch "$T_D0/partial"
|
||||
INO=$(stat -c "%i" "$T_D0/partial")
|
||||
setfattr -n scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/partial"
|
||||
setfattr -n scoutfs.hide.indx.b.$MAJOR.300 "$T_D0/partial"
|
||||
setfattr -x scoutfs.hide.indx.a.$MAJOR.200 "$T_D0/partial"
|
||||
read_xattr_index 0 $MAJOR.200.0 $MAJOR.200.-1 | awk '($3 == "'$INO'") {print "200 found"}'
|
||||
read_xattr_index 0 $MAJOR.300.0 $MAJOR.300.-1 | awk '($3 == "'$INO'") {print "300 found"}'
|
||||
rm -f "$T_D0/partial"
|
||||
|
||||
echo "== multiple files at same index position"
|
||||
touch "$T_D0/multi_a" "$T_D0/multi_b"
|
||||
INO_A=$(stat -c "%i" "$T_D0/multi_a")
|
||||
INO_B=$(stat -c "%i" "$T_D0/multi_b")
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/multi_a"
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/multi_b"
|
||||
COUNT=$(read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | wc -l)
|
||||
echo "files at same position: $COUNT"
|
||||
rm -f "$T_D0/multi_a"
|
||||
read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | awk '($3 == "'$INO_A'") {print "deleted file still found"}'
|
||||
read_xattr_index 0 $MAJOR.$MINOR.0 $MAJOR.$MINOR.-1 | awk '($3 == "'$INO_B'") {print "surviving file found"}'
|
||||
rm -f "$T_D0/multi_b"
|
||||
|
||||
echo "== cross-mount visibility"
|
||||
touch "$T_D0/file4"
|
||||
INO=$(stat -c "%i" "$T_D0/file4")
|
||||
setfattr -n scoutfs.hide.indx.test.$MAJOR.$MINOR "$T_D0/file4"
|
||||
read_xattr_index 1 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found on mount 1"}'
|
||||
rm -f "$T_D0/file4"
|
||||
read_xattr_index 1 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'") {print "found orphan on mount 1"}'
|
||||
|
||||
echo "== duplicate position deduplication"
|
||||
touch "$T_D0/file5"
|
||||
INO=$(stat -c "%i" "$T_D0/file5")
|
||||
setfattr -n scoutfs.hide.indx.aa.$MAJOR.$MINOR "$T_D0/file5"
|
||||
setfattr -n scoutfs.hide.indx.bb.$MAJOR.$MINOR "$T_D0/file5"
|
||||
COUNT=$(read_xattr_index 0 $MAJOR.0.0 $MAJOR.-1.-1 | awk '($3 == "'$INO'")' | wc -l)
|
||||
echo "entries for same position: $COUNT"
|
||||
rm -f "$T_D0/file5"
|
||||
|
||||
t_pass
|
||||
50
tests/tests/totl-merge-read.sh
Normal file
50
tests/tests/totl-merge-read.sh
Normal file
@@ -0,0 +1,50 @@
|
||||
#
|
||||
# Test that merge_read_item() correctly updates the sequence number when
|
||||
# combining delta items from multiple finalized log trees. Each mount
|
||||
# sets a totl value in its own 3-bit lane (powers of 8) so that any
|
||||
# double-counting overflows the lane and is caught by: or(v, exp) != exp.
|
||||
#
|
||||
|
||||
t_require_commands setfattr scoutfs
|
||||
t_require_mounts 5
|
||||
|
||||
echo "== setup"
|
||||
for nr in $(t_fs_nrs); do
|
||||
d=$(eval echo \$T_D$nr)
|
||||
for i in $(seq 1 2500); do : > "$d/f$nr$i"; done
|
||||
done
|
||||
sync
|
||||
t_force_log_merge
|
||||
|
||||
vals=(1 8 64 512 4096)
|
||||
expected=4681
|
||||
n=0
|
||||
for nr in $(t_fs_nrs); do
|
||||
d=$(eval echo \$T_D$nr)
|
||||
v=${vals[$((n++))]}
|
||||
for i in $(seq 1 2500); do
|
||||
setfattr -n "scoutfs.totl.t.$i.0.0" -v $v "$d/f$nr$i"
|
||||
done
|
||||
done
|
||||
|
||||
t_trigger_arm_silent log_merge_force_partial $(t_server_nr)
|
||||
|
||||
bad="$T_TMPDIR/bad"
|
||||
for nr in $(t_fs_nrs); do
|
||||
( while true; do
|
||||
echo 1 > "$(t_debugfs_path $nr)/drop_weak_item_cache"
|
||||
scoutfs read-xattr-totals -p "$(eval echo \$T_M$nr)" | \
|
||||
awk -F'[ =,]+' -v e=$expected 'or($2+0,e) != e'
|
||||
done ) >> "$bad" &
|
||||
done
|
||||
|
||||
echo "expected $expected"
|
||||
t_force_log_merge
|
||||
t_silent_kill $(jobs -p)
|
||||
test -s "$bad" && echo "double-counted:" && cat "$bad"
|
||||
|
||||
echo "== cleanup"
|
||||
for nr in $(t_fs_nrs); do
|
||||
find "$(eval echo \$T_D$nr)" -name "f$nr*" -delete
|
||||
done
|
||||
t_pass
|
||||
Reference in New Issue
Block a user