scoutfs: make networking more reliable

The current networking code has loose reliability guarantees. If a connection between the client and server is broken then the client reconnects as though its an entirely new connection. The client resends requests but no responses are resent. A client's requests could be processed twice on the same server. The server throws away disconnected client state. This was fine, sort of, for the simple requests we had implemented so far. It's not good enough for the locking service which would prefer to let networking worry about reliable message delivery so it doesn't have to track and replay partial state across reconnection between the same client and server. This adds the infrastructure to ensure that requests and responses between a given client and server will be delivered across reconnected sockets and will only be processed once. The server keeps track of disconnected clients and restores state if the same client reconnects. This required some work around the greetings so that clients and servers can recognize each other. Now that the server remembers disconnected clients we add a farewell request so that servers can forget about clients that are shutting down and won't be reconnecting. Now that connections between the client and server are preserved we can resend responses across reconnection. We add outgoing message sequence numbers which are used to drop duplicates and communicate the received sequence back to the sender to free responses once they're received. When the client is reconnecting to a new server it resets its receive state that was dependent on the old server and it drops responses which were being sent to a server instance which no longer exists. This stronger reliable messaging guarantee will make it much easier to implement lock recovery which can now rewind state relative to requests that are in flight and replay existing state on a new server instance. Signed-off-by: Zach Brown <zab@versity.com>
2026-02-09 04:00:10 +00:00 · 2019-02-04 14:29:10 -08:00
parent 20f4e1c338
commit 74366f0df1
8 changed files with 656 additions and 138 deletions
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -56,6 +56,7 @@ struct server_info {
 	bool shutting_down;
 	struct completion start_comp;
 	struct sockaddr_in listen_sin;
+	u64 term;
 	struct scoutfs_net_connection *conn;

 	/* request processing coordinates committing manifest and alloc */
@@ -1072,11 +1073,16 @@ int scoutfs_server_lock_response(struct super_block *sb, u64 node_id,
 * response shuts down the connection.
 *
 * We allocate a new node_id for the first connect attempt from a
- * client.  We update the request node_id for the calling net layer to
- * consume.
+ * client.
 *
 * If a client reconnects they'll send their initially assigned node_id
 * in their greeting request.
+ *
+ * XXX We can lose allocated node_ids here as we record the node_id as
+ * live as we send a valid greeting response.  The client might
+ * disconnect before they receive the response and resent and initial
+ * blank greeting.  We could use a client uuid to associate with
+ * allocated node_ids.
 */
 static int server_greeting(struct super_block *sb,
 			   struct scoutfs_net_connection *conn,
@@ -1088,6 +1094,9 @@ static int server_greeting(struct super_block *sb,
 	DECLARE_SERVER_INFO(sb, server);
 	struct commit_waiter cw;
 	__le64 node_id = 0;
+	bool sent_node_id;
+	bool first_contact;
+	bool farewell;
 	int ret = 0;

 	if (arg_len != sizeof(struct scoutfs_net_greeting)) {
@@ -1122,24 +1131,61 @@ static int server_greeting(struct super_block *sb,
 		queue_commit_work(server, &cw);
 		up_read(&server->commit_rwsem);
 		ret = wait_for_commit(&cw);
-		if (ret)
+		if (ret) {
+			node_id = 0;
 			goto out;
+		}
 	} else {
 		node_id = gr->node_id;
 	}

 	greet.fsid = super->hdr.fsid;
 	greet.format_hash = super->format_hash;
+	greet.server_term = cpu_to_le64(server->term);
 	greet.node_id = node_id;
+	greet.flags = 0;
 out:
 	ret = scoutfs_net_response(sb, conn, cmd, id, ret,
 				   &greet, sizeof(greet));
-	/* give net caller client's new node_id :/ */
-	if (ret == 0 && node_id != 0)
-		gr->node_id = node_id;
+	if (node_id != 0 && ret == 0) {
+		sent_node_id = gr->node_id != 0;
+		first_contact = le64_to_cpu(gr->server_term) != server->term;
+		if (gr->flags & cpu_to_le64(SCOUTFS_NET_GREETING_FLAG_FAREWELL))
+			farewell = true;
+		else
+			farewell = false;
+
+		scoutfs_net_server_greeting(sb, conn, le64_to_cpu(node_id), id,
+					    sent_node_id, first_contact,
+					    farewell);
+	}
+
 	return ret;
 }

+/*
+ * The server is receiving a farewell message from a client that is
+ * unmounting.  It won't send any more requests and once it receives our
+ * response it will not reconnect.
+ *
+ * XXX we should make sure that all our requests to the client have finished
+ * before we respond.  Locking will have its own messaging for orderly
+ * shutdown.  That leaves compaction which will be addressed as part of
+ * the larger work of recovering compactions that were in flight when
+ * a client crashed.
+ */
+static int server_farewell(struct super_block *sb,
+			   struct scoutfs_net_connection *conn,
+			   u8 cmd, u64 id, void *arg, u16 arg_len)
+{
+	if (arg_len != 0)
+		return -EINVAL;
+
+	scoutfs_net_server_farewell(sb, conn);
+
+	return scoutfs_net_response(sb, conn, cmd, id, 0, NULL, 0);
+}
+
 /* requests sent to clients are tracked so we can free resources */
 struct compact_request {
 	struct list_head head;
@@ -1743,6 +1789,7 @@ static scoutfs_net_request_t server_req_funcs[] = {
 	[SCOUTFS_NET_CMD_GET_MANIFEST_ROOT]	= server_get_manifest_root,
 	[SCOUTFS_NET_CMD_STATFS]		= server_statfs,
 	[SCOUTFS_NET_CMD_LOCK]			= server_lock,
+	[SCOUTFS_NET_CMD_FAREWELL]		= server_farewell,
 };

 static void server_notify_up(struct super_block *sb,
@@ -1880,13 +1927,15 @@ out:
 }

 /* XXX can we call start multiple times? */
-int scoutfs_server_start(struct super_block *sb, struct sockaddr_in *sin)
+int scoutfs_server_start(struct super_block *sb, struct sockaddr_in *sin,
+			 u64 term)
 {
 	DECLARE_SERVER_INFO(sb, server);

 	server->err = 0;
 	server->shutting_down = false;
 	server->listen_sin = *sin;
+	server->term = term;
 	init_completion(&server->start_comp);

 	queue_work(server->wq, &server->work);