Compare commits

..

1 Commits

Author SHA1 Message Date
Auke Kok
8bfd35db0b Set BLOCK_BIT_ERROR on bio submit failure during forced unmount
block_submit_bio will return -ENOLINK if called during a forced
shutdown, the bio is never submitted, and thus no completion callback
will fire to set BLOCK_BIT_ERROR. Any other task waiting for this
specific bp will end up waiting forever.

To fix, fall through to the existing block_end_io call on the
error path instead of returning directly.  That means moving
the forcing_unmount check past the setup calls so block_end_io's
bookkeeping stays balanced. block_end_io then sets BLOCK_BIT_ERROR
and wakes up waiters just as it would on a failed async completion.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2026-04-20 17:01:12 -07:00
14 changed files with 88 additions and 473 deletions

View File

@@ -1,23 +1,6 @@
Versity ScoutFS Release Notes
=============================
---
v1.30
\
*Apr 21, 2026*
Fix a problem reading the accumulated totals of contributing .totl.
xattrs when log merging is in progress. The problem would have readers
of the totals calculate the sums incorrectly.
Fix a problem updating quota rules. There was a race where updates
could be corrupted if they happened while a transaction was being
written.
Fix a problem deleting files with .indx. xattrs. The internal indexing
metadata wouldn't be properly deleted so the files would still claim to
be present and visible in the index, though the file no longer existed.
---
v1.29
\

View File

@@ -24,7 +24,6 @@
#include "trans.h"
#include "alloc.h"
#include "counters.h"
#include "msg.h"
#include "scoutfs_trace.h"
/*
@@ -497,11 +496,10 @@ static int dirty_alloc_blocks(struct super_block *sb,
struct scoutfs_block *fr_bl = NULL;
struct scoutfs_block *bl;
bool link_orig = false;
__le32 orig_first_nr;
u64 av_peek;
u64 av_old = 0;
u64 av_old;
u64 fr_peek;
u64 fr_old = 0;
u64 fr_old;
int ret;
if (alloc->dirty_avail_bl != NULL)
@@ -511,7 +509,6 @@ static int dirty_alloc_blocks(struct super_block *sb,
/* undo dirty freed if we get an error after */
orig_freed = alloc->freed.ref;
orig_first_nr = alloc->freed.first_nr;
if (alloc->dirty_avail_bl != NULL) {
ret = 0;
@@ -565,17 +562,6 @@ static int dirty_alloc_blocks(struct super_block *sb,
/* sort dirty avail to encourage contiguous sorted meta blocks */
list_block_sort(av_bl->data);
lblk = fr_bl->data;
if (WARN_ON_ONCE(alloc->freed.ref.blkno != lblk->hdr.blkno)) {
scoutfs_err(sb, "dirty_alloc freed ref %llu hdr %llu av_old %llu fr_old %llu av_peek %llu fr_peek %llu link_orig %d",
le64_to_cpu(alloc->freed.ref.blkno),
le64_to_cpu(lblk->hdr.blkno),
av_old, fr_old, av_peek, fr_peek, link_orig);
ret = -EIO;
goto out;
}
lblk = NULL;
if (av_old)
list_block_add(&alloc->freed, fr_bl->data, av_old);
if (fr_old)
@@ -592,7 +578,6 @@ out:
if (fr_bl)
scoutfs_block_writer_forget(sb, wri, fr_bl);
alloc->freed.ref = orig_freed;
alloc->freed.first_nr = orig_first_nr;
}
mutex_unlock(&alloc->mutex);

View File

@@ -218,7 +218,6 @@ static void block_free_work(struct work_struct *work)
llist_for_each_entry_safe(bp, tmp, deleted, free_node) {
block_free(sb, bp);
cond_resched();
}
}
@@ -846,8 +845,6 @@ int scoutfs_block_dirty_ref(struct super_block *sb, struct scoutfs_alloc *alloc,
bp = BLOCK_PRIVATE(bl);
if (block_is_dirty(bp)) {
if (ref_blkno)
*ref_blkno = 0;
ret = 0;
goto out;
}

View File

@@ -59,31 +59,6 @@ struct client_info {
struct completion farewell_comp;
};
/*
* Reconnection to a new server completes pending sync requests with
* -ECONNRESET because their state in the old server was reclaimed at
* fence time. Transparently retry so callers don't surface the
* reconnect as a failed RPC; preserve the pre-drain behavior where a
* sync request was silently resent across failover. Shutdown paths
* break the loop via the errors that submit and wait already return.
*/
static int client_sync_request(struct super_block *sb,
struct scoutfs_net_connection *conn,
u8 cmd, void *arg, unsigned arg_len,
void *resp, size_t resp_len)
{
int ret;
for (;;) {
ret = scoutfs_net_sync_request(sb, conn, cmd, arg, arg_len,
resp, resp_len);
if (ret != -ECONNRESET)
return ret;
if (scoutfs_unmounting(sb) || scoutfs_forcing_unmount(sb))
return -ESHUTDOWN;
}
}
/*
* Ask for a new run of allocated inode numbers. The server can return
* fewer than @count. It will success with nr == 0 if we've run out.
@@ -97,10 +72,10 @@ int scoutfs_client_alloc_inodes(struct super_block *sb, u64 count,
u64 tmp;
int ret;
ret = client_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_ALLOC_INODES,
&lecount, sizeof(lecount),
&ial, sizeof(ial));
ret = scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_ALLOC_INODES,
&lecount, sizeof(lecount),
&ial, sizeof(ial));
if (ret == 0) {
*ino = le64_to_cpu(ial.ino);
*nr = le64_to_cpu(ial.nr);
@@ -119,9 +94,9 @@ int scoutfs_client_get_log_trees(struct super_block *sb,
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_LOG_TREES,
NULL, 0, lt, sizeof(*lt));
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_LOG_TREES,
NULL, 0, lt, sizeof(*lt));
}
int scoutfs_client_commit_log_trees(struct super_block *sb,
@@ -129,9 +104,9 @@ int scoutfs_client_commit_log_trees(struct super_block *sb,
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
lt, sizeof(*lt), NULL, 0);
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
lt, sizeof(*lt), NULL, 0);
}
int scoutfs_client_get_roots(struct super_block *sb,
@@ -139,26 +114,9 @@ int scoutfs_client_get_roots(struct super_block *sb,
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_ROOTS,
NULL, 0, roots, sizeof(*roots));
}
/*
* Bounded-wait get_roots for the orphan scan worker. The worker
* reschedules on error, so -ETIMEDOUT is treated like any other RPC
* failure and retries on the next scan.
*/
int scoutfs_client_get_roots_timeout(struct super_block *sb,
struct scoutfs_net_roots *roots,
unsigned long timeout_jiffies)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request_timeout(sb, client->conn,
SCOUTFS_NET_CMD_GET_ROOTS,
NULL, 0, roots, sizeof(*roots),
timeout_jiffies);
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_ROOTS,
NULL, 0, roots, sizeof(*roots));
}
int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq)
@@ -167,9 +125,9 @@ int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq)
__le64 last_seq;
int ret;
ret = client_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_LAST_SEQ,
NULL, 0, &last_seq, sizeof(last_seq));
ret = scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_LAST_SEQ,
NULL, 0, &last_seq, sizeof(last_seq));
if (ret == 0)
*seq = le64_to_cpu(last_seq);
@@ -182,34 +140,24 @@ static int client_lock_response(struct super_block *sb,
void *resp, unsigned int resp_len,
int error, void *data)
{
struct scoutfs_lock *lock = data;
if (error) {
scoutfs_lock_request_failed(sb, lock);
return 0;
}
if (resp_len != sizeof(struct scoutfs_net_lock))
return -EINVAL;
/* XXX error? */
return scoutfs_lock_grant_response(sb, resp);
}
/*
* Send a lock request to the server. The lock is anchored by
* request_pending so its address is stable until the response callback
* runs and clears request_pending on either the grant or error path.
*/
/* Send a lock request to the server. */
int scoutfs_client_lock_request(struct super_block *sb,
struct scoutfs_net_lock *nl,
struct scoutfs_lock *lock)
struct scoutfs_net_lock *nl)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_submit_request(sb, client->conn,
SCOUTFS_NET_CMD_LOCK,
nl, sizeof(*nl),
client_lock_response, lock, NULL);
client_lock_response, NULL, NULL);
}
/* Send a lock response to the server. */
@@ -241,26 +189,9 @@ int scoutfs_client_srch_get_compact(struct super_block *sb,
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
NULL, 0, sc, sizeof(*sc));
}
/*
* Bounded-wait get_compact for the srch compact worker. The worker
* reschedules on any error and the compact work is idempotent, so
* -ETIMEDOUT just defers this round.
*/
int scoutfs_client_srch_get_compact_timeout(struct super_block *sb,
struct scoutfs_srch_compact *sc,
unsigned long timeout_jiffies)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request_timeout(sb, client->conn,
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
NULL, 0, sc, sizeof(*sc),
timeout_jiffies);
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
NULL, 0, sc, sizeof(*sc));
}
/* Commit the result of a srch file compaction. */
@@ -269,27 +200,9 @@ int scoutfs_client_srch_commit_compact(struct super_block *sb,
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
res, sizeof(*res), NULL, 0);
}
/*
* Bounded-wait commit_compact for the srch compact worker. The server
* ignores partial work flagged with ERROR, so a timed-out commit
* (marked ERROR on this side) lets the server reclaim our allocators
* and reassign the compact on the next scheduled attempt.
*/
int scoutfs_client_srch_commit_compact_timeout(struct super_block *sb,
struct scoutfs_srch_compact *res,
unsigned long timeout_jiffies)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request_timeout(sb, client->conn,
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
res, sizeof(*res), NULL, 0,
timeout_jiffies);
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
res, sizeof(*res), NULL, 0);
}
int scoutfs_client_get_log_merge(struct super_block *sb,
@@ -297,9 +210,9 @@ int scoutfs_client_get_log_merge(struct super_block *sb,
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_LOG_MERGE,
NULL, 0, req, sizeof(*req));
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_LOG_MERGE,
NULL, 0, req, sizeof(*req));
}
int scoutfs_client_commit_log_merge(struct super_block *sb,
@@ -307,9 +220,9 @@ int scoutfs_client_commit_log_merge(struct super_block *sb,
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
comp, sizeof(*comp), NULL, 0);
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
comp, sizeof(*comp), NULL, 0);
}
int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
@@ -341,30 +254,8 @@ int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
.req_id = 0,
};
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_OPEN_INO_MAP,
&args, sizeof(args), map, sizeof(*map));
}
/*
* Bounded-wait open_ino_map for the orphan scan worker. The scan
* reschedules on error; the delete path callers keep the unbounded
* retry.
*/
int scoutfs_client_open_ino_map_timeout(struct super_block *sb, u64 group_nr,
struct scoutfs_open_ino_map *map,
unsigned long timeout_jiffies)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
struct scoutfs_open_ino_map_args args = {
.group_nr = cpu_to_le64(group_nr),
.req_id = 0,
};
return scoutfs_net_sync_request_timeout(sb, client->conn,
SCOUTFS_NET_CMD_OPEN_INO_MAP,
&args, sizeof(args),
map, sizeof(*map),
timeout_jiffies);
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_OPEN_INO_MAP,
&args, sizeof(args), map, sizeof(*map));
}
/* The client is asking the server for the current volume options */
@@ -372,8 +263,8 @@ int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_opti
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_GET_VOLOPT,
NULL, 0, volopt, sizeof(*volopt));
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_GET_VOLOPT,
NULL, 0, volopt, sizeof(*volopt));
}
/* The client is asking the server to update volume options */
@@ -381,8 +272,8 @@ int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_opti
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_SET_VOLOPT,
volopt, sizeof(*volopt), NULL, 0);
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_SET_VOLOPT,
volopt, sizeof(*volopt), NULL, 0);
}
/* The client is asking the server to clear volume options */
@@ -390,24 +281,24 @@ int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_op
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_CLEAR_VOLOPT,
volopt, sizeof(*volopt), NULL, 0);
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_CLEAR_VOLOPT,
volopt, sizeof(*volopt), NULL, 0);
}
int scoutfs_client_resize_devices(struct super_block *sb, struct scoutfs_net_resize_devices *nrd)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES,
nrd, sizeof(*nrd), NULL, 0);
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES,
nrd, sizeof(*nrd), NULL, 0);
}
int scoutfs_client_statfs(struct super_block *sb, struct scoutfs_net_statfs *nst)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return client_sync_request(sb, client->conn, SCOUTFS_NET_CMD_STATFS,
NULL, 0, nst, sizeof(*nst));
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_STATFS,
NULL, 0, nst, sizeof(*nst));
}
/*
@@ -755,12 +646,8 @@ void scoutfs_client_destroy(struct super_block *sb)
client_farewell_response,
NULL, NULL);
if (ret == 0) {
if (!wait_for_completion_timeout(&client->farewell_comp,
120 * HZ)) {
ret = -ETIMEDOUT;
} else {
ret = client->farewell_error;
}
wait_for_completion(&client->farewell_comp);
ret = client->farewell_error;
}
if (ret) {
scoutfs_inc_counter(sb, client_farewell_error);
@@ -774,16 +661,10 @@ void scoutfs_client_destroy(struct super_block *sb)
/* make sure worker isn't using the conn */
cancel_delayed_work_sync(&client->connect_dwork);
/*
* Drain the conn's workers before nulling client->conn. In-flight
* proc_workers dispatch request handlers that call back into client
* response helpers (e.g. scoutfs_client_lock_recover_response) which
* read client->conn; nulling it first races with those workers and
* causes submit_send to dereference a NULL conn->lock.
*/
/* make racing conn use explode */
conn = client->conn;
scoutfs_net_free_conn(sb, conn);
client->conn = NULL;
scoutfs_net_free_conn(sb, conn);
if (client->workq)
destroy_workqueue(client->workq);

View File

@@ -9,28 +9,18 @@ int scoutfs_client_commit_log_trees(struct super_block *sb,
struct scoutfs_log_trees *lt);
int scoutfs_client_get_roots(struct super_block *sb,
struct scoutfs_net_roots *roots);
int scoutfs_client_get_roots_timeout(struct super_block *sb,
struct scoutfs_net_roots *roots,
unsigned long timeout_jiffies);
u64 *scoutfs_client_bulk_alloc(struct super_block *sb);
int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq);
int scoutfs_client_lock_request(struct super_block *sb,
struct scoutfs_net_lock *nl,
struct scoutfs_lock *lock);
struct scoutfs_net_lock *nl);
int scoutfs_client_lock_response(struct super_block *sb, u64 net_id,
struct scoutfs_net_lock *nl);
int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
struct scoutfs_net_lock_recover *nlr);
int scoutfs_client_srch_get_compact(struct super_block *sb,
struct scoutfs_srch_compact *sc);
int scoutfs_client_srch_get_compact_timeout(struct super_block *sb,
struct scoutfs_srch_compact *sc,
unsigned long timeout_jiffies);
int scoutfs_client_srch_commit_compact(struct super_block *sb,
struct scoutfs_srch_compact *res);
int scoutfs_client_srch_commit_compact_timeout(struct super_block *sb,
struct scoutfs_srch_compact *res,
unsigned long timeout_jiffies);
int scoutfs_client_get_log_merge(struct super_block *sb,
struct scoutfs_log_merge_request *req);
int scoutfs_client_commit_log_merge(struct super_block *sb,
@@ -39,9 +29,6 @@ int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
struct scoutfs_open_ino_map *map);
int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
struct scoutfs_open_ino_map *map);
int scoutfs_client_open_ino_map_timeout(struct super_block *sb, u64 group_nr,
struct scoutfs_open_ino_map *map,
unsigned long timeout_jiffies);
int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);
int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt);

View File

@@ -62,7 +62,6 @@
EXPAND_COUNTER(btree_walk) \
EXPAND_COUNTER(btree_walk_restart) \
EXPAND_COUNTER(client_farewell_error) \
EXPAND_COUNTER(client_rpc_timeout) \
EXPAND_COUNTER(corrupt_btree_block_level) \
EXPAND_COUNTER(corrupt_btree_no_child_ref) \
EXPAND_COUNTER(corrupt_dirent_backref_name_len) \
@@ -139,7 +138,6 @@
EXPAND_COUNTER(lock_lock_error) \
EXPAND_COUNTER(lock_nonblock_eagain) \
EXPAND_COUNTER(lock_recover_request) \
EXPAND_COUNTER(lock_request_failed) \
EXPAND_COUNTER(lock_shrink_attempted) \
EXPAND_COUNTER(lock_shrink_request_failed) \
EXPAND_COUNTER(lock_unlock) \

View File

@@ -2074,14 +2074,6 @@ void scoutfs_inode_schedule_orphan_dwork(struct super_block *sb)
}
}
/*
* Generous per-RPC bound for the idempotent orphan scan worker. A
* server that hasn't answered in this long is assumed to be broken;
* dropping the request lets the scan reschedule instead of blocking
* forever.
*/
#define ORPHAN_SCAN_RPC_TIMEOUT (5 * 60 * HZ)
/*
* Find and delete inodes whose only remaining reference is the
* persistent orphan item that was created as they were unlinked.
@@ -2136,7 +2128,7 @@ static void inode_orphan_scan_worker(struct work_struct *work)
init_orphan_key(&last, U64_MAX);
omap.args.group_nr = cpu_to_le64(U64_MAX);
ret = scoutfs_client_get_roots_timeout(sb, &roots, ORPHAN_SCAN_RPC_TIMEOUT);
ret = scoutfs_client_get_roots(sb, &roots);
if (ret)
goto out;
@@ -2177,8 +2169,7 @@ static void inode_orphan_scan_worker(struct work_struct *work)
scoutfs_omap_calc_group_nrs(ino, &group_nr, &bit_nr);
if (le64_to_cpu(omap.args.group_nr) != group_nr) {
ret = scoutfs_client_open_ino_map_timeout(sb, group_nr, &omap,
ORPHAN_SCAN_RPC_TIMEOUT);
ret = scoutfs_client_open_ino_map(sb, group_nr, &omap);
if (ret < 0)
goto out;
}

View File

@@ -71,8 +71,6 @@
* relative to that lock state we resend.
*/
#define CLIENT_LOCK_WAIT_TIMEOUT (60 * HZ)
/*
* allocated per-super, freed on unmount.
*/
@@ -159,33 +157,6 @@ static void invalidate_inode(struct super_block *sb, u64 ino)
}
}
/*
* Remove all coverage items from the lock to tell users that their
* cache is stale. This is lock-internal bookkeeping that is safe to
* call during shutdown and unmount. The unconditional unlock/relock
* of cov_list_lock avoids sparse warnings from unbalanced locking in
* the trylock failure path.
*/
static void lock_clear_coverage(struct super_block *sb,
struct scoutfs_lock *lock)
{
struct scoutfs_lock_coverage *cov;
spin_lock(&lock->cov_list_lock);
while ((cov = list_first_entry_or_null(&lock->cov_list,
struct scoutfs_lock_coverage, head))) {
if (spin_trylock(&cov->cov_lock)) {
list_del_init(&cov->head);
cov->lock = NULL;
spin_unlock(&cov->cov_lock);
scoutfs_inc_counter(sb, lock_invalidate_coverage);
}
spin_unlock(&lock->cov_list_lock);
spin_lock(&lock->cov_list_lock);
}
spin_unlock(&lock->cov_list_lock);
}
/*
* Invalidate caches associated with this lock. Either we're
* invalidating a write to a read or we're invalidating to null. We
@@ -195,6 +166,7 @@ static void lock_clear_coverage(struct super_block *sb,
static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,
enum scoutfs_lock_mode prev, enum scoutfs_lock_mode mode)
{
struct scoutfs_lock_coverage *cov;
u64 ino, last;
int ret = 0;
@@ -218,7 +190,24 @@ static int lock_invalidate(struct super_block *sb, struct scoutfs_lock *lock,
/* have to invalidate if we're not in the only usable case */
if (!(prev == SCOUTFS_LOCK_WRITE && mode == SCOUTFS_LOCK_READ)) {
lock_clear_coverage(sb, lock);
/*
* Remove cov items to tell users that their cache is
* stale. The unlock pattern comes from avoiding bad
* sparse warnings when taking else in a failed trylock.
*/
spin_lock(&lock->cov_list_lock);
while ((cov = list_first_entry_or_null(&lock->cov_list,
struct scoutfs_lock_coverage, head))) {
if (spin_trylock(&cov->cov_lock)) {
list_del_init(&cov->head);
cov->lock = NULL;
spin_unlock(&cov->cov_lock);
scoutfs_inc_counter(sb, lock_invalidate_coverage);
}
spin_unlock(&lock->cov_list_lock);
spin_lock(&lock->cov_list_lock);
}
spin_unlock(&lock->cov_list_lock);
/* invalidate inodes after removing coverage so drop/evict aren't covered */
if (lock->start.sk_zone == SCOUTFS_FS_ZONE) {
@@ -654,33 +643,6 @@ int scoutfs_lock_grant_response(struct super_block *sb,
return 0;
}
/*
* The lock request we sent to the server was dropped before we could
* receive a grant response. This happens when the client reconnects to
* a new server and completes pending requests with an error, since the
* old server's pending-request state was reclaimed at fence time.
*
* Clear request_pending so that a waiter in lock_key_range re-evaluates
* and sends a fresh request to the new server, and symmetrically put
* the lock so shrink's lru state matches the grant_response path.
*/
void scoutfs_lock_request_failed(struct super_block *sb,
struct scoutfs_lock *lock)
{
DECLARE_LOCK_INFO(sb, linfo);
scoutfs_inc_counter(sb, lock_request_failed);
spin_lock(&linfo->lock);
BUG_ON(!lock->request_pending);
lock->request_pending = 0;
wake_up(&lock->waitq);
put_lock(linfo, lock);
spin_unlock(&linfo->lock);
}
struct inv_req {
struct list_head head;
struct scoutfs_lock *lock;
@@ -752,13 +714,10 @@ static void lock_invalidate_worker(struct work_struct *work)
ireq = list_first_entry(&lock->inv_list, struct inv_req, head);
nl = &ireq->nl;
/* only lock protocol, inv can't call subsystems after shutdown or unmount */
if (!linfo->shutdown && !scoutfs_unmounting(sb)) {
/* only lock protocol, inv can't call subsystems after shutdown */
if (!linfo->shutdown) {
ret = lock_invalidate(sb, lock, nl->old_mode, nl->new_mode);
BUG_ON(ret < 0 && ret != -ENOLINK);
} else {
lock_clear_coverage(sb, lock);
scoutfs_item_invalidate(sb, &lock->start, &lock->end);
}
/* respond with the key and modes from the request, server might have died */
@@ -963,7 +922,7 @@ static bool try_shrink_lock(struct super_block *sb, struct lock_info *linfo, boo
spin_unlock(&linfo->lock);
if (lock) {
ret = scoutfs_client_lock_request(sb, &nl, lock);
ret = scoutfs_client_lock_request(sb, &nl);
if (ret < 0) {
scoutfs_inc_counter(sb, lock_shrink_request_failed);
@@ -994,9 +953,6 @@ static bool lock_wait_cond(struct super_block *sb, struct scoutfs_lock *lock,
!lock->request_pending;
spin_unlock(&linfo->lock);
if (!wake)
wake = scoutfs_unmounting(sb);
if (!wake)
scoutfs_inc_counter(sb, lock_wait);
@@ -1041,10 +997,8 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
return -EINVAL;
/* maybe catch _setup() and _shutdown order mistakes */
if (!linfo || linfo->shutdown) {
WARN_ON_ONCE(!scoutfs_unmounting(sb));
if (WARN_ON_ONCE(!linfo || linfo->shutdown))
return -ENOLCK;
}
/* have to lock before entering transactions */
if (WARN_ON_ONCE(scoutfs_trans_held()))
@@ -1070,11 +1024,6 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
break;
}
if (scoutfs_unmounting(sb)) {
ret = -ESHUTDOWN;
break;
}
/* the fast path where we can use the granted mode */
if (lock_modes_match(lock->mode, mode)) {
lock_inc_count(lock->users, mode);
@@ -1104,7 +1053,7 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
nl.old_mode = lock->mode;
nl.new_mode = mode;
ret = scoutfs_client_lock_request(sb, &nl, lock);
ret = scoutfs_client_lock_request(sb, &nl);
if (ret) {
spin_lock(&linfo->lock);
lock->request_pending = 0;
@@ -1118,9 +1067,8 @@ static int lock_key_range(struct super_block *sb, enum scoutfs_lock_mode mode, i
if (flags & SCOUTFS_LKF_INTERRUPTIBLE) {
ret = wait_event_interruptible(lock->waitq,
lock_wait_cond(sb, lock, mode));
} else if (!wait_event_timeout(lock->waitq,
lock_wait_cond(sb, lock, mode),
CLIENT_LOCK_WAIT_TIMEOUT)) {
} else {
wait_event(lock->waitq, lock_wait_cond(sb, lock, mode));
ret = 0;
}
@@ -1702,7 +1650,6 @@ void scoutfs_lock_destroy(struct super_block *sb)
list_del_init(&lock->inv_head);
lock->invalidate_pending = 0;
}
lock_clear_coverage(sb, lock);
lock_remove(linfo, lock);
lock_free(linfo, lock);
}

View File

@@ -60,8 +60,6 @@ struct scoutfs_lock_coverage {
int scoutfs_lock_grant_response(struct super_block *sb,
struct scoutfs_net_lock *nl);
void scoutfs_lock_request_failed(struct super_block *sb,
struct scoutfs_lock *lock);
int scoutfs_lock_invalidate_request(struct super_block *sb, u64 net_id,
struct scoutfs_net_lock *nl);
int scoutfs_lock_recover_request(struct super_block *sb, u64 net_id,

View File

@@ -1750,10 +1750,8 @@ void scoutfs_net_client_greeting(struct super_block *sb,
bool new_server)
{
struct net_info *ninf = SCOUTFS_SB(sb)->net_info;
scoutfs_net_response_t resp_func;
struct message_send *msend;
struct message_send *tmp;
void *resp_data;
/* only called on client connections :/ */
BUG_ON(conn->listening_conn);
@@ -1762,32 +1760,10 @@ void scoutfs_net_client_greeting(struct super_block *sb,
if (new_server) {
atomic64_set(&conn->recv_seq, 0);
/* drop stale responses; old server's state is gone */
list_for_each_entry_safe(msend, tmp, &conn->resend_queue, head){
if (nh_is_response(&msend->nh))
free_msend(ninf, conn, msend);
}
/*
* Complete pending requests with -ECONNRESET. Any state
* they depended on in the old server was reclaimed at
* fence time, so resending is wrong. Callers re-issue on
* the new server if they still care.
*/
while ((msend = list_first_entry_or_null(&conn->resend_queue,
struct message_send, head))) {
if (nh_is_response(&msend->nh))
break;
resp_func = msend->resp_func;
resp_data = msend->resp_data;
free_msend(ninf, conn, msend);
spin_unlock(&conn->lock);
call_resp_func(sb, conn, resp_func, resp_data, NULL, 0, -ECONNRESET);
spin_lock(&conn->lock);
}
}
set_valid_greeting(conn);
@@ -2014,9 +1990,8 @@ static int sync_response(struct super_block *sb,
* buffer. Errors returned can come from the remote request processing
* or local failure to send.
*
* The wait for the response uses a 60 second timeout loop that
* checks for unmount, returning -ESHUTDOWN if the mount is
* being torn down.
* The wait for the response is interruptible and can return
* -ERESTARTSYS if it is interrupted.
*
* -EOVERFLOW is returned if the response message's data_length doesn't
* match the caller's resp_len buffer.
@@ -2027,7 +2002,6 @@ int scoutfs_net_sync_request(struct super_block *sb,
void *resp, size_t resp_len)
{
struct sync_request_completion sreq;
struct message_send *msend;
int ret;
u64 id;
@@ -2040,124 +2014,13 @@ int scoutfs_net_sync_request(struct super_block *sb,
sync_response, &sreq, &id);
if (ret == 0) {
while (!wait_for_completion_timeout(&sreq.comp, 60 * HZ)) {
if (scoutfs_unmounting(sb)) {
ret = -ESHUTDOWN;
break;
}
}
if (ret == -ESHUTDOWN) {
spin_lock(&conn->lock);
msend = find_request(conn, cmd, id);
if (msend)
queue_dead_free(conn, msend);
spin_unlock(&conn->lock);
} else {
ret = sreq.error;
}
wait_for_completion(&sreq.comp);
ret = sreq.error;
}
return ret;
}
/*
* A bounded-wait variant of sync_request for idempotent background
* workers that must reschedule instead of blocking indefinitely on an
* unresponsive server. Returns -ETIMEDOUT if the response doesn't
* arrive within timeout_jiffies; the caller then treats it like any
* other RPC failure and retries on its normal reschedule cadence.
*
* Response state lives in a refcounted heap allocation rather than on
* the caller's stack so a late callback can't scribble into freed
* memory if we give up waiting. On timeout we race with an arriving
* response for the msend: if find_request wins we queue_dead_free and
* the callback won't fire (we drop its ref); otherwise the callback is
* already running so we wait for it to complete before returning.
*/
struct bounded_sync {
struct completion comp;
void *resp;
unsigned int resp_len;
int error;
atomic_t refs;
};
static void bounded_sync_put(struct bounded_sync *bs)
{
if (atomic_dec_and_test(&bs->refs))
kfree(bs);
}
static int bounded_sync_response(struct super_block *sb,
struct scoutfs_net_connection *conn,
void *resp, unsigned int resp_len,
int error, void *data)
{
struct bounded_sync *bs = data;
if (error == 0 && resp_len != bs->resp_len)
error = -EMSGSIZE;
if (error)
bs->error = error;
else if (resp_len)
memcpy(bs->resp, resp, resp_len);
complete(&bs->comp);
bounded_sync_put(bs);
return 0;
}
int scoutfs_net_sync_request_timeout(struct super_block *sb,
struct scoutfs_net_connection *conn,
u8 cmd, void *arg, unsigned arg_len,
void *resp, size_t resp_len,
unsigned long timeout_jiffies)
{
struct message_send *msend;
struct bounded_sync *bs;
int ret;
u64 id;
bs = kzalloc(sizeof(*bs), GFP_NOFS);
if (!bs)
return -ENOMEM;
init_completion(&bs->comp);
bs->resp = resp;
bs->resp_len = resp_len;
bs->error = 0;
atomic_set(&bs->refs, 2);
ret = scoutfs_net_submit_request(sb, conn, cmd, arg, arg_len,
bounded_sync_response, bs, &id);
if (ret) {
bounded_sync_put(bs);
bounded_sync_put(bs);
return ret;
}
if (wait_for_completion_timeout(&bs->comp, timeout_jiffies) == 0) {
scoutfs_inc_counter(sb, client_rpc_timeout);
spin_lock(&conn->lock);
msend = find_request(conn, cmd, id);
if (msend)
queue_dead_free(conn, msend);
spin_unlock(&conn->lock);
if (msend)
bounded_sync_put(bs);
else
wait_for_completion(&bs->comp);
ret = -ETIMEDOUT;
} else {
ret = bs->error;
}
bounded_sync_put(bs);
return ret;
}
static void net_tseq_show_conn(struct seq_file *m,
struct scoutfs_tseq_entry *ent)
{

View File

@@ -150,11 +150,6 @@ int scoutfs_net_sync_request(struct super_block *sb,
struct scoutfs_net_connection *conn,
u8 cmd, void *arg, unsigned arg_len,
void *resp, size_t resp_len);
int scoutfs_net_sync_request_timeout(struct super_block *sb,
struct scoutfs_net_connection *conn,
u8 cmd, void *arg, unsigned arg_len,
void *resp, size_t resp_len,
unsigned long timeout_jiffies);
int scoutfs_net_response(struct super_block *sb,
struct scoutfs_net_connection *conn,
u8 cmd, u64 id, int error, void *resp, u16 resp_len);

View File

@@ -638,7 +638,7 @@ static void scoutfs_server_commit_func(struct work_struct *work)
ret = scoutfs_alloc_empty_list(sb, &server->alloc, &server->wri,
server->meta_freed,
server->other_freed);
if (ret && ret != -ENOLINK) {
if (ret) {
scoutfs_err(sb, "server error emptying freed: %d", ret);
goto out;
}

View File

@@ -95,13 +95,6 @@ struct srch_info {
*/
#define SRCH_COMPACT_DIRTY_LIMIT_BYTES (32 * 1024 * 1024)
/*
* Generous per-RPC bound for the idempotent compact worker. A server
* that hasn't answered in this long is assumed to be broken; dropping
* the request lets the worker reschedule instead of blocking forever.
*/
#define COMPACT_RPC_TIMEOUT (5 * 60 * HZ)
static int sre_cmp(const struct scoutfs_srch_entry *a,
const struct scoutfs_srch_entry *b)
{
@@ -2263,8 +2256,7 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
scoutfs_block_writer_init(sb, &wri);
ret = scoutfs_client_srch_get_compact_timeout(sb, sc,
COMPACT_RPC_TIMEOUT);
ret = scoutfs_client_srch_get_compact(sb, sc);
if (ret >= 0)
trace_scoutfs_srch_compact_client_recv(sb, sc);
if (ret < 0 || sc->nr == 0)
@@ -2295,8 +2287,7 @@ static void scoutfs_srch_compact_worker(struct work_struct *work)
sc->flags |= ret < 0 ? SCOUTFS_SRCH_COMPACT_FLAG_ERROR : 0;
trace_scoutfs_srch_compact_client_send(sb, sc);
err = scoutfs_client_srch_commit_compact_timeout(sb, sc,
COMPACT_RPC_TIMEOUT);
err = scoutfs_client_srch_commit_compact(sb, sc);
if (err < 0 && ret == 0)
ret = err;
out:

View File

@@ -195,8 +195,7 @@ static int retry_forever(struct super_block *sb, int (*func)(struct super_block
retrying = true;
}
if (scoutfs_forcing_unmount(sb) ||
scoutfs_unmounting(sb)) {
if (scoutfs_forcing_unmount(sb)) {
ret = -ENOLINK;
break;
}