Files
scoutfs/kmod/src/client.c
Auke Kok b25d8e8741 Plug super leak.
We accidentally could leak super here, so make sure to free it.

Signed-off-by: Auke Kok <auke.kok@versity.com>
2025-10-24 14:21:06 -04:00

682 lines
20 KiB
C

/*
* Copyright (C) 2017 Versity Software, Inc. All rights reserved.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public
* License v2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*/
#include <linux/kernel.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <asm/ioctls.h>
#include <linux/net.h>
#include <linux/inet.h>
#include <linux/in.h>
#include <net/sock.h>
#include <net/tcp.h>
#include <asm/barrier.h>
#include <linux/overflow.h>
#include "format.h"
#include "counters.h"
#include "inode.h"
#include "btree.h"
#include "scoutfs_trace.h"
#include "msg.h"
#include "client.h"
#include "net.h"
#include "endian_swap.h"
#include "quorum.h"
#include "omap.h"
#include "trans.h"
/*
* The client is responsible for maintaining a connection to the server.
*/
#define CLIENT_CONNECT_DELAY_MS (MSEC_PER_SEC / 10)
#define CLIENT_CONNECT_TIMEOUT_MS (1 * MSEC_PER_SEC)
struct client_info {
struct super_block *sb;
struct scoutfs_net_connection *conn;
atomic_t shutting_down;
struct workqueue_struct *workq;
struct delayed_work connect_dwork;
unsigned long connect_delay_jiffies;
u64 server_term;
bool sending_farewell;
int farewell_error;
struct completion farewell_comp;
};
/*
* Ask for a new run of allocated inode numbers. The server can return
* fewer than @count. It will success with nr == 0 if we've run out.
*/
int scoutfs_client_alloc_inodes(struct super_block *sb, u64 count,
u64 *ino, u64 *nr)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
struct scoutfs_net_inode_alloc ial;
__le64 lecount = cpu_to_le64(count);
u64 tmp;
int ret;
ret = scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_ALLOC_INODES,
&lecount, sizeof(lecount),
&ial, sizeof(ial));
if (ret == 0) {
*ino = le64_to_cpu(ial.ino);
*nr = le64_to_cpu(ial.nr);
if (*nr == 0)
ret = -ENOSPC;
else if (check_add_overflow(*ino, *nr - 1, &tmp))
ret = -EINVAL;
}
return ret;
}
int scoutfs_client_get_log_trees(struct super_block *sb,
struct scoutfs_log_trees *lt)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_LOG_TREES,
NULL, 0, lt, sizeof(*lt));
}
int scoutfs_client_commit_log_trees(struct super_block *sb,
struct scoutfs_log_trees *lt)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_COMMIT_LOG_TREES,
lt, sizeof(*lt), NULL, 0);
}
int scoutfs_client_get_roots(struct super_block *sb,
struct scoutfs_net_roots *roots)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_ROOTS,
NULL, 0, roots, sizeof(*roots));
}
int scoutfs_client_get_last_seq(struct super_block *sb, u64 *seq)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
__le64 last_seq;
int ret;
ret = scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_LAST_SEQ,
NULL, 0, &last_seq, sizeof(last_seq));
if (ret == 0)
*seq = le64_to_cpu(last_seq);
return ret;
}
/* process an incoming grant response from the server */
static int client_lock_response(struct super_block *sb,
struct scoutfs_net_connection *conn,
void *resp, unsigned int resp_len,
int error, void *data)
{
if (resp_len != sizeof(struct scoutfs_net_lock))
return -EINVAL;
/* XXX error? */
return scoutfs_lock_grant_response(sb, resp);
}
/* Send a lock request to the server. */
int scoutfs_client_lock_request(struct super_block *sb,
struct scoutfs_net_lock *nl)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_submit_request(sb, client->conn,
SCOUTFS_NET_CMD_LOCK,
nl, sizeof(*nl),
client_lock_response, NULL, NULL);
}
/* Send a lock response to the server. */
int scoutfs_client_lock_response(struct super_block *sb, u64 net_id,
struct scoutfs_net_lock *nl)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_response(sb, client->conn, SCOUTFS_NET_CMD_LOCK,
net_id, 0, nl, sizeof(*nl));
}
/* Send a lock recover response to the server. */
int scoutfs_client_lock_recover_response(struct super_block *sb, u64 net_id,
struct scoutfs_net_lock_recover *nlr)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
u16 bytes = offsetof(struct scoutfs_net_lock_recover,
locks[le16_to_cpu(nlr->nr)]);
return scoutfs_net_response(sb, client->conn,
SCOUTFS_NET_CMD_LOCK_RECOVER,
net_id, 0, nlr, bytes);
}
/* Find srch files that need to be compacted. */
int scoutfs_client_srch_get_compact(struct super_block *sb,
struct scoutfs_srch_compact *sc)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_SRCH_GET_COMPACT,
NULL, 0, sc, sizeof(*sc));
}
/* Commit the result of a srch file compaction. */
int scoutfs_client_srch_commit_compact(struct super_block *sb,
struct scoutfs_srch_compact *res)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_SRCH_COMMIT_COMPACT,
res, sizeof(*res), NULL, 0);
}
int scoutfs_client_get_log_merge(struct super_block *sb,
struct scoutfs_log_merge_request *req)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_GET_LOG_MERGE,
NULL, 0, req, sizeof(*req));
}
int scoutfs_client_commit_log_merge(struct super_block *sb,
struct scoutfs_log_merge_complete *comp)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn,
SCOUTFS_NET_CMD_COMMIT_LOG_MERGE,
comp, sizeof(*comp), NULL, 0);
}
int scoutfs_client_send_omap_response(struct super_block *sb, u64 id,
struct scoutfs_open_ino_map *map)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_response(sb, client->conn, SCOUTFS_NET_CMD_OPEN_INO_MAP,
id, 0, map, sizeof(*map));
}
/* The client is receiving an omap request from the server */
static int client_open_ino_map(struct super_block *sb, struct scoutfs_net_connection *conn,
u8 cmd, u64 id, void *arg, u16 arg_len)
{
if (arg_len != sizeof(struct scoutfs_open_ino_map_args))
return -EINVAL;
return scoutfs_omap_client_handle_request(sb, id, arg);
}
/* The client is sending an omap request to the server */
int scoutfs_client_open_ino_map(struct super_block *sb, u64 group_nr,
struct scoutfs_open_ino_map *map)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
struct scoutfs_open_ino_map_args args = {
.group_nr = cpu_to_le64(group_nr),
.req_id = 0,
};
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_OPEN_INO_MAP,
&args, sizeof(args), map, sizeof(*map));
}
/* The client is asking the server for the current volume options */
int scoutfs_client_get_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_GET_VOLOPT,
NULL, 0, volopt, sizeof(*volopt));
}
/* The client is asking the server to update volume options */
int scoutfs_client_set_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_SET_VOLOPT,
volopt, sizeof(*volopt), NULL, 0);
}
/* The client is asking the server to clear volume options */
int scoutfs_client_clear_volopt(struct super_block *sb, struct scoutfs_volume_options *volopt)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_CLEAR_VOLOPT,
volopt, sizeof(*volopt), NULL, 0);
}
int scoutfs_client_resize_devices(struct super_block *sb, struct scoutfs_net_resize_devices *nrd)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_RESIZE_DEVICES,
nrd, sizeof(*nrd), NULL, 0);
}
int scoutfs_client_statfs(struct super_block *sb, struct scoutfs_net_statfs *nst)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
return scoutfs_net_sync_request(sb, client->conn, SCOUTFS_NET_CMD_STATFS,
NULL, 0, nst, sizeof(*nst));
}
/*
* The server is asking that we trigger a commit of the current log
* trees so that they can ensure an item seq discontinuity between
* finalized log btrees and the next set of open log btrees. If we're
* shutting down then we're already going to perform a final commit.
*/
static int sync_log_trees(struct super_block *sb, struct scoutfs_net_connection *conn,
u8 cmd, u64 id, void *arg, u16 arg_len)
{
if (arg_len != 0)
return -EINVAL;
if (!scoutfs_unmounting(sb))
scoutfs_trans_sync(sb, 0);
return scoutfs_net_response(sb, conn, cmd, id, 0, NULL, 0);
}
/* The client is receiving a invalidation request from the server */
static int client_lock(struct super_block *sb,
struct scoutfs_net_connection *conn, u8 cmd, u64 id,
void *arg, u16 arg_len)
{
if (arg_len != sizeof(struct scoutfs_net_lock))
return -EINVAL;
/* XXX error? */
return scoutfs_lock_invalidate_request(sb, id, arg);
}
/* The server is asking us for the client's locks starting with the given key */
static int client_lock_recover(struct super_block *sb,
struct scoutfs_net_connection *conn,
u8 cmd, u64 id, void *arg, u16 arg_len)
{
if (arg_len != sizeof(struct scoutfs_key))
return -EINVAL;
/* XXX error? */
return scoutfs_lock_recover_request(sb, id, arg);
}
/*
* Process a greeting response in the client from the server. This is
* called for every connected socket on the connection. Each response
* contains the remote server's elected term which can be used to
* identify server failover.
*/
static int client_greeting(struct super_block *sb,
struct scoutfs_net_connection *conn,
void *resp, unsigned int resp_len, int error,
void *data)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct client_info *client = sbi->client_info;
struct scoutfs_net_greeting *gr = resp;
bool new_server;
int ret;
if (error) {
ret = error;
goto out;
}
if (resp_len != sizeof(struct scoutfs_net_greeting)) {
ret = -EINVAL;
goto out;
}
if (gr->fsid != cpu_to_le64(sbi->fsid)) {
scoutfs_warn(sb, "server greeting response fsid 0x%llx did not match client fsid 0x%llx",
le64_to_cpu(gr->fsid), sbi->fsid);
ret = -EINVAL;
goto out;
}
if (le64_to_cpu(gr->fmt_vers) != sbi->fmt_vers) {
scoutfs_warn(sb, "server greeting response format version %llu did not match client format version %llu",
le64_to_cpu(gr->fmt_vers), sbi->fmt_vers);
ret = -EINVAL;
goto out;
}
new_server = le64_to_cpu(gr->server_term) != client->server_term;
scoutfs_net_client_greeting(sb, conn, new_server);
client->server_term = le64_to_cpu(gr->server_term);
client->connect_delay_jiffies = 0;
ret = 0;
out:
return ret;
}
/*
* The client is deciding if it needs to keep trying to reconnect to
* have its farewell request processed. The server removes our mounted
* client item last so that if we don't see it we know the server has
* processed our farewell and we don't need to reconnect, we can unmount
* safely.
*
* This is peeking at btree blocks that the server could be actively
* freeing with cow updates so it can see stale blocks, we just return
* the error and we'll retry eventually as the connection times out.
*/
static int lookup_mounted_client_item(struct super_block *sb, u64 rid)
{
struct scoutfs_key key = {
.sk_zone = SCOUTFS_MOUNTED_CLIENT_ZONE,
.skmc_rid = cpu_to_le64(rid),
};
struct scoutfs_super_block *super;
SCOUTFS_BTREE_ITEM_REF(iref);
int ret;
super = kmalloc(sizeof(struct scoutfs_super_block), GFP_NOFS);
if (!super) {
ret = -ENOMEM;
goto out;
}
ret = scoutfs_read_super(sb, super);
if (ret)
goto out;
ret = scoutfs_btree_lookup(sb, &super->mounted_clients, &key, &iref);
if (ret == 0) {
scoutfs_btree_put_iref(&iref);
ret = 1;
}
if (ret == -ENOENT)
ret = 0;
out:
kfree(super);
return ret;
}
/*
* If we're not seeing successful connections we want to back off. Each
* connection attempt starts by setting a long connection work delay.
* We only set a shorter delay if we see a greeting response from the
* server. At that point we'll try to immediately reconnect if the
* connection is broken.
*/
static void queue_connect_dwork(struct super_block *sb, struct client_info *client)
{
if (!atomic_read(&client->shutting_down) && !scoutfs_forcing_unmount(sb))
queue_delayed_work(client->workq, &client->connect_dwork,
client->connect_delay_jiffies);
}
/*
* This work is responsible for maintaining a connection from the client
* to the server. It's queued on mount and disconnect and we requeue
* the work if the work fails and we're not shutting down.
*
* We ask quorum for an address to try and connect to. If there isn't
* one, or it fails, we back off a bit before trying again.
*
* There's a tricky bit of coordination required to safely unmount.
* Clients need to tell the server that they won't be coming back with a
* farewell request. Once the server processes a farewell request from
* the client it can forget the client. If the connection is broken
* before the client gets the farewell response it doesn't want to
* reconnect to send it again.. instead the client can read the metadata
* device to check for the lack of an item which indicates that the
* server has processed its farewell.
*/
static void scoutfs_client_connect_worker(struct work_struct *work)
{
struct client_info *client = container_of(work, struct client_info,
connect_dwork.work);
struct super_block *sb = client->sb;
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct scoutfs_mount_options opts;
struct scoutfs_net_greeting greet;
struct sockaddr_in sin;
bool am_quorum;
int ret;
scoutfs_options_read(sb, &opts);
am_quorum = opts.quorum_slot_nr >= 0;
/* can unmount once server farewell handling removes our item */
if (client->sending_farewell &&
lookup_mounted_client_item(sb, sbi->rid) == 0) {
client->farewell_error = 0;
complete(&client->farewell_comp);
ret = 0;
goto out;
}
/* always wait a bit until a greeting response sets a lower delay */
client->connect_delay_jiffies = msecs_to_jiffies(CLIENT_CONNECT_DELAY_MS);
ret = scoutfs_quorum_server_sin(sb, &sin);
if (ret < 0)
goto out;
ret = scoutfs_net_connect(sb, client->conn, &sin,
CLIENT_CONNECT_TIMEOUT_MS);
if (ret < 0)
goto out;
/* send a greeting to verify endpoints of each connection */
greet.fsid = cpu_to_le64(sbi->fsid);
greet.fmt_vers = cpu_to_le64(sbi->fmt_vers);
greet.server_term = cpu_to_le64(client->server_term);
greet.rid = cpu_to_le64(sbi->rid);
greet.flags = 0;
if (client->sending_farewell)
greet.flags |= cpu_to_le64(SCOUTFS_NET_GREETING_FLAG_FAREWELL);
if (am_quorum)
greet.flags |= cpu_to_le64(SCOUTFS_NET_GREETING_FLAG_QUORUM);
ret = scoutfs_net_submit_request(sb, client->conn,
SCOUTFS_NET_CMD_GREETING,
&greet, sizeof(greet),
client_greeting, NULL, NULL);
if (ret)
scoutfs_net_shutdown(sb, client->conn);
out:
if (ret)
queue_connect_dwork(sb, client);
}
static scoutfs_net_request_t client_req_funcs[] = {
[SCOUTFS_NET_CMD_SYNC_LOG_TREES] = sync_log_trees,
[SCOUTFS_NET_CMD_LOCK] = client_lock,
[SCOUTFS_NET_CMD_LOCK_RECOVER] = client_lock_recover,
[SCOUTFS_NET_CMD_OPEN_INO_MAP] = client_open_ino_map,
};
/*
* Called when either a connect attempt or established connection times
* out and fails.
*/
static void client_notify_down(struct super_block *sb,
struct scoutfs_net_connection *conn, void *info,
u64 rid)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
queue_connect_dwork(sb, client);
}
int scoutfs_client_setup(struct super_block *sb)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct client_info *client;
int ret;
client = kzalloc(sizeof(struct client_info), GFP_KERNEL);
if (!client) {
ret = -ENOMEM;
goto out;
}
sbi->client_info = client;
client->sb = sb;
atomic_set(&client->shutting_down, 0);
INIT_DELAYED_WORK(&client->connect_dwork,
scoutfs_client_connect_worker);
init_completion(&client->farewell_comp);
client->conn = scoutfs_net_alloc_conn(sb, NULL, client_notify_down, 0,
client_req_funcs, "client");
if (!client->conn) {
ret = -ENOMEM;
goto out;
}
client->workq = alloc_workqueue("scoutfs_client_workq", WQ_UNBOUND, 1);
if (!client->workq) {
ret = -ENOMEM;
goto out;
}
queue_connect_dwork(sb, client);
ret = 0;
out:
if (ret)
scoutfs_client_destroy(sb);
return ret;
}
/* Once we get a response from the server we can shut down */
static int client_farewell_response(struct super_block *sb,
struct scoutfs_net_connection *conn,
void *resp, unsigned int resp_len,
int error, void *data)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
if (resp_len != 0)
return -EINVAL;
client->farewell_error = error;
complete(&client->farewell_comp);
return 0;
}
/*
* There must be no more callers to the client request functions by the
* time we get here.
*
* If we've connected to a server then we send them a farewell request
* so that they don't wait for us to reconnect and trigger a timeout.
*
* This decision is a little racy. The server considers us connected
* when it records a persistent record of our rid as it processes our
* greeting. We can disconnect before receiving the greeting response
* and leave without sending a farewell. So given that awkward initial
* race, we also have a bit of a race where we just test the server_term
* to see if we've ever gotten a greeting reply from any server. We
* don't try to synchronize with pending connection attempts.
*
* The consequences of aborting a mount at just the wrong time and
* disconnecting without the farewell handshake depend on what the
* server does to timed out clients. At best it'll spit out a warning
* message that a client disconnected but it won't fence us if we didn't
* have any persistent state.
*/
void scoutfs_client_destroy(struct super_block *sb)
{
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
struct client_info *client = SCOUTFS_SB(sb)->client_info;
struct scoutfs_net_connection *conn;
int ret;
if (client == NULL)
return;
if (client->server_term != 0 && !scoutfs_forcing_unmount(sb)) {
client->sending_farewell = true;
ret = scoutfs_net_submit_request(sb, client->conn,
SCOUTFS_NET_CMD_FAREWELL,
NULL, 0,
client_farewell_response,
NULL, NULL);
if (ret == 0) {
wait_for_completion(&client->farewell_comp);
ret = client->farewell_error;
}
if (ret) {
scoutfs_inc_counter(sb, client_farewell_error);
scoutfs_warn(sb, "client saw farewell error %d, server might see client connection time out", ret);
}
}
/* stop notify_down from queueing connect work */
atomic_set(&client->shutting_down, 1);
/* make sure worker isn't using the conn */
cancel_delayed_work_sync(&client->connect_dwork);
/* make racing conn use explode */
conn = client->conn;
client->conn = NULL;
scoutfs_net_free_conn(sb, conn);
if (client->workq)
destroy_workqueue(client->workq);
kfree(client);
sbi->client_info = NULL;
}
void scoutfs_client_net_shutdown(struct super_block *sb)
{
struct client_info *client = SCOUTFS_SB(sb)->client_info;
if (client && client->conn)
scoutfs_net_shutdown(sb, client->conn);
}