From 34b8950bcaddc95deb9aaf4f994c40656225b9be Mon Sep 17 00:00:00 2001
From: Zach Brown <zab@versity.com>
Date: Thu, 10 Jan 2019 12:43:48 -0800
Subject: [PATCH] scoutfs: initial lock server core

Add the core lock server code for providing a lock service from our
server.  The lock messages are wired up but nothing calls them.

Signed-off-by: Zach Brown <zab@versity.com>
---
 kmod/src/Makefile        |   1 +
 kmod/src/format.h        |  37 +++
 kmod/src/lock_server.c   | 630 +++++++++++++++++++++++++++++++++++++++
 kmod/src/lock_server.h   |  12 +
 kmod/src/scoutfs_trace.h |  46 +++
 kmod/src/server.c        |  52 +++-
 kmod/src/server.h        |   5 +
 kmod/src/super.h         |   2 +
 8 files changed, 784 insertions(+), 1 deletion(-)
 create mode 100644 kmod/src/lock_server.c
 create mode 100644 kmod/src/lock_server.h

diff --git a/kmod/src/Makefile b/kmod/src/Makefile
index fd79053d..468f6a61 100644
--- a/kmod/src/Makefile
+++ b/kmod/src/Makefile
@@ -21,6 +21,7 @@ scoutfs-y +=			\
 	ioctl.o			\
 	item.o			\
 	lock.o			\
+	lock_server.o		\
 	manifest.o		\
 	msg.o			\
 	net.o			\
diff --git a/kmod/src/format.h b/kmod/src/format.h
index 6d1bd222..ebce65fd 100644
--- a/kmod/src/format.h
+++ b/kmod/src/format.h
@@ -624,6 +624,7 @@ enum {
 	SCOUTFS_NET_CMD_GET_MANIFEST_ROOT,
 	SCOUTFS_NET_CMD_STATFS,
 	SCOUTFS_NET_CMD_COMPACT,
+	SCOUTFS_NET_CMD_LOCK,
 	SCOUTFS_NET_CMD_UNKNOWN,
 };
 
@@ -743,6 +744,42 @@ struct scoutfs_net_compact_response {
 	struct scoutfs_net_manifest_entry ents[SCOUTFS_COMPACTION_MAX_OUTPUT];
 } __packed;
 
+struct scoutfs_net_lock {
+	struct scoutfs_key key;
+	__u8 old_mode;
+	__u8 new_mode;
+} __packed;
+
+/* some enums for tracing */
+enum {
+	SLT_CLIENT,
+	SLT_SERVER,
+	SLT_GRANT,
+	SLT_INVALIDATE,
+	SLT_REQUEST,
+	SLT_RESPONSE,
+};
+
+/*
+ * Read and write locks operate as you'd expect.  Multiple readers can
+ * hold read locks while writers are excluded.  A single writer can hold
+ * a write lock which excludes other readers and writers.  Writers can
+ * read while holding a write lock.
+ *
+ * Multiple writers can hold write only locks but they can not read,
+ * they can only generate dirty items.  It's used when the system has
+ * other means of knowing that it's safe to overwrite items.
+ *
+ * The null mode provides no access and is used to destroy locks.
+ */
+enum {
+	SCOUTFS_LOCK_NULL = 0,
+	SCOUTFS_LOCK_READ,
+	SCOUTFS_LOCK_WRITE,
+	SCOUTFS_LOCK_WRITE_ONLY,
+	SCOUTFS_LOCK_INVALID,
+};
+
 /*
  * Scoutfs file handle structure - this can be copied out to userspace
  * via open by handle or put on the wire from NFS.
diff --git a/kmod/src/lock_server.c b/kmod/src/lock_server.c
new file mode 100644
index 00000000..bb5f4ed3
--- /dev/null
+++ b/kmod/src/lock_server.c
@@ -0,0 +1,630 @@
+/*
+ * Copyright (C) 2019 Versity Software, Inc.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+
+#include "format.h"
+#include "counters.h"
+#include "net.h"
+#include "tseq.h"
+#include "scoutfs_trace.h"
+#include "lock_server.h"
+
+/*
+ * The scoutfs server implements a simple lock service.  Client mounts
+ * request access to locks identified by a key.  The server ensures that
+ * access mode exclusion is properly enforced.
+ *
+ * The server processing paths are implemented in network message
+ * receive processing callbacks.  We're receiving either a grant request
+ * or an invalidation response.  These processing callbacks are fully
+ * concurrent.  Our grant responses and invalidation requests are sent
+ * from these contexts.
+ *
+ * We separate the locking of the global index of tracked locks from the
+ * locking of a lock's state.  This allows concurrent work on unrelated
+ * locks and lets processing block sending responses to unresponsive
+ * clients without affecting other locks.
+ *
+ * Correctness of the protocol relies on the client and server each only
+ * sending one request at a time for a given lock.  The server won't
+ * process a request from a client until its outstanding invalidation
+ * requests for the lock to other clients have been completed.  The
+ * server specifies both the old mode and new mode when sending messages
+ * to the client.  This lets the client resolve possible reordering when
+ * processing incoming grant responses and invalidation requests.  The
+ * server doesn't use the modes specified by the clients but they're
+ * provided to add context.
+ *
+ * The server relies on the node_id allocation and reliable messaging
+ * layers of the system.  Each client has a node_id that is unique for
+ * its life time.   Message requests and responses are reliably
+ * delivered in order across reconnection.
+ *
+ * The server maintains a persistent record of connected clients.  A new
+ * server instance discovers these and waits for previously connected
+ * clients to reconnect and recover their state before proceeding.  If
+ * clients don't reconnect they are forcefully prevented from unsafely
+ * accessing the shared persistent storage.  (fenced, according to the
+ * rules of the platform.. could range from being powered off to having
+ * their switch port disabled to having their local block device set
+ * read-only.)
+ *
+ * The lock server doesn't respond to memory pressure.  The only way
+ * locks are freed is if they are invalidated to null on behalf of a
+ * conflicting request, clients specifically request a null mode, or the
+ * server shuts down.
+ */
+
+struct lock_server_info {
+	spinlock_t lock;
+	struct rb_root locks_root;
+
+	struct scoutfs_tseq_tree tseq_tree;
+	struct dentry *tseq_dentry;
+};
+
+#define DECLARE_LOCK_SERVER_INFO(sb, name) \
+	struct lock_server_info *name = SCOUTFS_SB(sb)->lock_server_info
+
+/*
+ * The state of a lock on the server is a function of the state of the
+ * locks on all clients.
+ *
+ * @granted:
+ * granted or trigger invalidation of previously granted.
+ * The state of a lock on the server is a function of messages that have
+ * been sent and received from clients on behalf of a given lock.
+ *
+ * While the invalidated list has entries, which means invalidation
+ * messages are still in flight, no more requests will be processed.
+ */
+struct server_lock_node {
+	atomic_t refcount;
+	struct mutex mutex;
+	struct rb_node node;
+	struct scoutfs_key key;
+
+	struct list_head granted;
+	struct list_head requested;
+	struct list_head invalidated;
+};
+
+enum {
+	CLE_GRANTED,
+	CLE_REQUESTED,
+	CLE_INVALIDATED,
+};
+
+/*
+ * Interactions with the client are tracked with these little mode
+ * wrappers.
+ *
+ * @entry: The client mode's entry on one of the server lock lists indicating
+ * that the mode is actively granted, a pending request from the client,
+ * or a pending invalidation sent to the client.
+ *
+ * @node_id: The client's node_id used to send messages and tear down
+ * state as client's exit.
+ *
+ * @net_id: The id of a client's request used to send grant responses.  The
+ * id of invalidation requests sent to clients that could be used to cancel
+ * the message.
+ *
+ * @mode: the mode that is granted to the client, that the client
+ * requested, or that the server is asserting with a pending
+ * invalidation request message.
+ */
+struct client_lock_entry {
+	struct list_head head;
+	u64 node_id;
+	u64 net_id;
+	u8 mode;
+
+	struct server_lock_node *snode;
+	struct scoutfs_tseq_entry tseq_entry;
+	u8 on_list;
+};
+
+enum {
+	OL_GRANTED = 0,
+	OL_REQUESTED,
+	OL_INVALIDATED,
+};
+
+/*
+ * Put an entry on a server lock's list while being careful to move or
+ * add the list head and while maintaining debugging info.
+ */
+static void add_client_entry(struct server_lock_node *snode,
+			     struct list_head *list,
+			     struct client_lock_entry *clent)
+{
+	WARN_ON_ONCE(!mutex_is_locked(&snode->mutex));
+
+	if (list_empty(&clent->head))
+		list_add_tail(&clent->head, list);
+	else
+		list_move_tail(&clent->head, list);
+
+	clent->on_list = list == &snode->granted ? OL_GRANTED :
+			 list == &snode->requested ? OL_REQUESTED :
+			 OL_INVALIDATED;
+}
+
+static void free_client_entry(struct lock_server_info *inf,
+			      struct server_lock_node *snode,
+			      struct client_lock_entry *clent)
+{
+	WARN_ON_ONCE(!mutex_is_locked(&snode->mutex));
+
+	if (!list_empty(&clent->head))
+		list_del_init(&clent->head);
+	scoutfs_tseq_del(&inf->tseq_tree, &clent->tseq_entry);
+	kfree(clent);
+}
+
+static bool invalid_mode(u8 mode)
+{
+	return mode >= SCOUTFS_LOCK_INVALID;
+}
+
+/*
+ * Return the mode that we should invalidate a granted lock down to
+ * given an incompatible requested mode.  Usually we completely
+ * invalidate the items because incompatible requests have to be writers
+ * and our cache will then be stale, but the single exception is
+ * invalidating down to a read lock having held a write lock because the
+ * cache is still valid for reads after being written out.
+ */
+static u8 invalidation_mode(u8 granted, u8 requested)
+{
+	if (granted == SCOUTFS_LOCK_WRITE && requested == SCOUTFS_LOCK_READ)
+		return SCOUTFS_LOCK_READ;
+
+	return SCOUTFS_LOCK_NULL;
+}
+
+/*
+ * Return true of the client lock instances described by the entries can
+ * be granted at the same time.  Typically this only means they're both
+ * modes that are compatible between nodes. In addition there's the
+ * special case where a read lock on a client is compatible with a write
+ * lock on the same client because the client's cache covered by the
+ * read lock is still valid if they get a write lock.
+ */
+static bool client_entries_compatible(struct client_lock_entry *granted,
+				      struct client_lock_entry *requested)
+{
+	return (granted->mode == requested->mode &&
+		(granted->mode == SCOUTFS_LOCK_READ ||
+		 granted->mode == SCOUTFS_LOCK_WRITE_ONLY)) ||
+	       (granted->node_id == requested->node_id &&
+		granted->mode == SCOUTFS_LOCK_READ &&
+		requested->mode == SCOUTFS_LOCK_WRITE);
+}
+
+/*
+ * Get a locked server lock, possibly inserting the caller's allocated
+ * lock if we don't find one for the given key.  The server lock's mutex
+ * is held on return and the caller must put the lock when they're done.
+ */
+static struct server_lock_node *get_server_lock(struct lock_server_info *inf,
+						struct scoutfs_key *key,
+						struct server_lock_node *ins)
+{
+	struct rb_root *root = &inf->locks_root;
+	struct server_lock_node *ret = NULL;
+	struct server_lock_node *snode;
+	struct rb_node *parent = NULL;
+	struct rb_node **node;
+	int cmp;
+
+	spin_lock(&inf->lock);
+
+	node = &root->rb_node;
+	while (*node) {
+		parent = *node;
+		snode = container_of(*node, struct server_lock_node, node);
+
+		cmp = scoutfs_key_compare(key, &snode->key);
+		if (cmp < 0) {
+			node = &(*node)->rb_left;
+		} else if (cmp > 0) {
+			node = &(*node)->rb_right;
+		} else {
+			ret = snode;
+			break;
+		}
+	}
+
+	if (ret == NULL && ins) {
+		rb_link_node(&ins->node, parent, node);
+		rb_insert_color(&ins->node, root);
+		ret = ins;
+	}
+
+	if (ret)
+		atomic_inc(&ret->refcount);
+
+	spin_unlock(&inf->lock);
+
+	if (ret)
+		mutex_lock(&ret->mutex);
+
+	return ret;
+}
+
+/*
+ * Finish with a server lock which has the mutex held, freeing it if
+ * it's empty and unused.
+ */
+static void put_server_lock(struct lock_server_info *inf,
+			    struct server_lock_node *snode)
+{
+	bool should_free = false;
+
+	BUG_ON(!mutex_is_locked(&snode->mutex));
+
+	if (atomic_dec_and_test(&snode->refcount) &&
+	    list_empty(&snode->granted) &&
+	    list_empty(&snode->requested) &&
+	    list_empty(&snode->invalidated)) {
+		spin_lock(&inf->lock);
+		rb_erase(&snode->node, &inf->locks_root);
+		spin_unlock(&inf->lock);
+		should_free = true;
+	}
+
+	mutex_unlock(&snode->mutex);
+
+	if (should_free)
+		kfree(snode);
+}
+
+static struct client_lock_entry *find_entry(struct server_lock_node *snode,
+					    struct list_head *list,
+					    u64 node_id)
+{
+	struct client_lock_entry *clent;
+
+	WARN_ON_ONCE(!mutex_is_locked(&snode->mutex));
+
+	list_for_each_entry(clent, list, head) {
+		if (clent->node_id == node_id)
+			return clent;
+	}
+
+	return NULL;
+}
+
+static int process_waiting_requests(struct super_block *sb,
+				    struct server_lock_node *snode);
+
+/*
+ * The server is receiving an incoming request from a client.  We queue
+ * it on the lock and process it.
+ *
+ * XXX shut down if we get enomem?
+ */
+int scoutfs_lock_server_request(struct super_block *sb, u64 node_id,
+				u64 net_id, struct scoutfs_net_lock *nl)
+{
+	DECLARE_LOCK_SERVER_INFO(sb, inf);
+	struct client_lock_entry *clent;
+	struct server_lock_node *snode;
+	struct server_lock_node *ins;
+	int ret;
+
+	trace_scoutfs_lock_message(sb, SLT_SERVER, SLT_GRANT, SLT_REQUEST,
+				   node_id, net_id, nl);
+
+	if (invalid_mode(nl->old_mode) || invalid_mode(nl->new_mode)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	clent = kzalloc(sizeof(struct client_lock_entry), GFP_NOFS);
+	if (!clent) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	INIT_LIST_HEAD(&clent->head);
+	clent->node_id = node_id;
+	clent->net_id = net_id;
+	clent->mode = nl->new_mode;
+
+	snode = get_server_lock(inf, &nl->key, NULL);
+	if (snode == NULL) {
+		ins = kzalloc(sizeof(struct server_lock_node), GFP_NOFS);
+		if (ins == NULL) {
+			kfree(clent);
+			ret = -ENOMEM;
+			goto out;
+		}
+
+		atomic_set(&ins->refcount, 0);
+		mutex_init(&ins->mutex);
+		ins->key = nl->key;
+		INIT_LIST_HEAD(&ins->granted);
+		INIT_LIST_HEAD(&ins->requested);
+		INIT_LIST_HEAD(&ins->invalidated);
+
+		snode = get_server_lock(inf, &nl->key, ins);
+		if (snode != ins)
+			kfree(ins);
+	}
+
+	clent->snode = snode;
+	add_client_entry(snode, &snode->requested, clent);
+	scoutfs_tseq_add(&inf->tseq_tree, &clent->tseq_entry);
+
+	ret = process_waiting_requests(sb, snode);
+out:
+	return ret;
+}
+
+/*
+ * The server is receiving an invalidation response from the client.
+ * Find the client's entry on the server lock's invalidation list and
+ * free it so that request processing might be able to make forward
+ * progress.
+ *
+ * XXX what to do with errors?  kick the client?
+ */
+int scoutfs_lock_server_response(struct super_block *sb, u64 node_id,
+				 struct scoutfs_net_lock *nl)
+{
+	DECLARE_LOCK_SERVER_INFO(sb, inf);
+	struct client_lock_entry *clent;
+	struct server_lock_node *snode;
+	int ret;
+
+	trace_scoutfs_lock_message(sb, SLT_SERVER, SLT_INVALIDATE, SLT_RESPONSE,
+				   node_id, 0, nl);
+
+	if (invalid_mode(nl->old_mode) || invalid_mode(nl->new_mode)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* XXX should always have a server lock here?  recovery? */
+	snode = get_server_lock(inf, &nl->key, NULL);
+	if (!snode) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	clent = find_entry(snode, &snode->invalidated, node_id);
+	if (!clent) {
+		put_server_lock(inf, snode);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (nl->new_mode == SCOUTFS_LOCK_NULL) {
+		free_client_entry(inf, snode, clent);
+	} else {
+		clent->mode = nl->new_mode;
+		add_client_entry(snode, &snode->granted, clent);
+	}
+
+	ret = process_waiting_requests(sb, snode);
+out:
+	return ret;
+}
+
+/*
+ * Make forward progress on a lock by checking each waiting request in
+ * the order that they were received.  If the next request is compatible
+ * with all the clients' grants then the request is granted and a
+ * response is sent.
+ *
+ * Invalidation requests are sent for every client grant that is
+ * incompatible with the next request.  We won't process the next
+ * request again until we receive all the invalidation responses.  Once
+ * they're all received then the request can be processed and will be
+ * compatible with the remaining grants.
+ *
+ * This is called with the snode mutex held.  This can free the snode if
+ * it's empty.  The caller can't reference the snode once this returns
+ * so we unlock the snode mutex.
+ */
+static int process_waiting_requests(struct super_block *sb,
+				    struct server_lock_node *snode)
+{
+	DECLARE_LOCK_SERVER_INFO(sb, inf);
+	struct scoutfs_net_lock nl;
+	struct client_lock_entry *req;
+	struct client_lock_entry *req_tmp;
+	struct client_lock_entry *gr;
+	struct client_lock_entry *gr_tmp;
+	int ret;
+
+	BUG_ON(!mutex_is_locked(&snode->mutex));
+
+	/* request processing waits for all invalidation responses */
+	if (!list_empty(&snode->invalidated)) {
+		ret = 0;
+		goto out;
+	}
+
+	/* walk through pending requests in order received */
+	list_for_each_entry_safe(req, req_tmp, &snode->requested, head) {
+
+		/* send invalidation to any incompatible grants */
+		list_for_each_entry_safe(gr, gr_tmp, &snode->granted, head) {
+			if (client_entries_compatible(gr, req))
+				continue;
+
+			nl.key = snode->key;
+			nl.old_mode = gr->mode;
+			nl.new_mode = invalidation_mode(gr->mode, req->mode);
+
+			ret = scoutfs_server_lock_request(sb, gr->node_id, &nl);
+			if (ret)
+				goto out;
+
+			trace_scoutfs_lock_message(sb, SLT_SERVER,
+						   SLT_INVALIDATE, SLT_REQUEST,
+						   gr->node_id, 0, &nl);
+
+			add_client_entry(snode, &snode->invalidated, gr);
+		}
+
+		/* wait for any newly sent invalidations */
+		if (!list_empty(&snode->invalidated))
+			break;
+
+		nl.key = snode->key;
+		nl.new_mode = req->mode;
+
+		/* see if there's an existing compatible grant to replace */
+		gr = find_entry(snode, &snode->granted, req->node_id);
+		if (gr) {
+			nl.old_mode = gr->mode;
+			free_client_entry(inf, snode, gr);
+		} else {
+			nl.old_mode = SCOUTFS_LOCK_NULL;
+		}
+
+		ret = scoutfs_server_lock_response(sb, req->node_id,
+						   req->net_id, &nl);
+		if (ret)
+			goto out;
+
+		trace_scoutfs_lock_message(sb, SLT_SERVER, SLT_GRANT,
+					   SLT_RESPONSE, req->node_id,
+					   req->net_id, &nl);
+
+		/* don't track null client locks, track all else */ 
+		if (req->mode == SCOUTFS_LOCK_NULL)
+			free_client_entry(inf, snode, req);
+		else
+			add_client_entry(snode, &snode->granted, req);
+	}
+
+	ret = 0;
+out:
+	put_server_lock(inf, snode);
+
+	return ret;
+}
+
+static char *lock_mode_string(u8 mode)
+{
+	static char *mode_strings[] = {
+		[SCOUTFS_LOCK_NULL] = "null",
+		[SCOUTFS_LOCK_READ] = "read",
+		[SCOUTFS_LOCK_WRITE] = "write",
+		[SCOUTFS_LOCK_WRITE_ONLY] = "write_only",
+	};
+
+	if (mode < ARRAY_SIZE(mode_strings) && mode_strings[mode])
+		return mode_strings[mode];
+
+	return "unknown";
+}
+
+static char *lock_on_list_string(u8 on_list)
+{
+	static char *on_list_strings[] = {
+		[OL_GRANTED] = "granted",
+		[OL_REQUESTED] = "requested",
+		[OL_INVALIDATED] = "invalidated",
+	};
+
+	if (on_list < ARRAY_SIZE(on_list_strings) && on_list_strings[on_list])
+		return on_list_strings[on_list];
+
+	return "unknown";
+}
+
+static void lock_server_tseq_show(struct seq_file *m,
+				  struct scoutfs_tseq_entry *ent)
+{
+	struct client_lock_entry *clent = container_of(ent,
+						       struct client_lock_entry,
+						       tseq_entry);
+	struct server_lock_node *snode = clent->snode;
+
+	seq_printf(m, SK_FMT" %s %s node_id %llu net_id %llu\n",
+		   SK_ARG(&snode->key), lock_mode_string(clent->mode),
+		   lock_on_list_string(clent->on_list), clent->node_id,
+		   clent->net_id);
+}
+
+int scoutfs_lock_server_setup(struct super_block *sb)
+{
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+	struct lock_server_info *inf;
+
+	inf = kzalloc(sizeof(struct lock_server_info), GFP_KERNEL);
+	if (!inf)
+		return -ENOMEM;
+
+	spin_lock_init(&inf->lock);
+	inf->locks_root = RB_ROOT;
+	scoutfs_tseq_tree_init(&inf->tseq_tree, lock_server_tseq_show);
+
+	inf->tseq_dentry = scoutfs_tseq_create("server_locks", sbi->debug_root,
+					       &inf->tseq_tree);
+	if (!inf->tseq_dentry) {
+		kfree(inf);
+		return -ENOMEM;
+	}
+
+	sbi->lock_server_info = inf;
+
+	return 0;
+}
+
+/*
+ * The server will have shut down networking before stopping us so we
+ * don't have to worry about message processing calls while we free.
+ */
+void scoutfs_lock_server_destroy(struct super_block *sb)
+{
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+	DECLARE_LOCK_SERVER_INFO(sb, inf);
+	struct server_lock_node *snode;
+	struct server_lock_node *stmp;
+	struct client_lock_entry *clent;
+	struct client_lock_entry *ctmp;
+	LIST_HEAD(list);
+
+	if (inf) {
+		debugfs_remove(inf->tseq_dentry);
+
+		rbtree_postorder_for_each_entry_safe(snode, stmp,
+						     &inf->locks_root, node) {
+
+			list_splice_init(&snode->granted, &list);
+			list_splice_init(&snode->requested, &list);
+			list_splice_init(&snode->invalidated, &list);
+
+			mutex_lock(&snode->mutex);
+			list_for_each_entry_safe(clent, ctmp, &list, head) {
+				free_client_entry(inf, snode, clent);
+			}
+			mutex_unlock(&snode->mutex);
+
+			kfree(snode);
+		}
+
+		kfree(inf);
+		sbi->lock_server_info = NULL;
+	}
+}
diff --git a/kmod/src/lock_server.h b/kmod/src/lock_server.h
new file mode 100644
index 00000000..2b6f4b1f
--- /dev/null
+++ b/kmod/src/lock_server.h
@@ -0,0 +1,12 @@
+#ifndef _SCOUTFS_LOCK_SERVER_H_
+#define _SCOUTFS_LOCK_SERVER_H_
+
+int scoutfs_lock_server_request(struct super_block *sb, u64 node_id,
+				u64 net_id, struct scoutfs_net_lock *nl);
+int scoutfs_lock_server_response(struct super_block *sb, u64 node_id,
+				 struct scoutfs_net_lock *nl);
+
+int scoutfs_lock_server_setup(struct super_block *sb);
+void scoutfs_lock_server_destroy(struct super_block *sb);
+
+#endif
diff --git a/kmod/src/scoutfs_trace.h b/kmod/src/scoutfs_trace.h
index 3afe9059..3dadf912 100644
--- a/kmod/src/scoutfs_trace.h
+++ b/kmod/src/scoutfs_trace.h
@@ -2423,6 +2423,52 @@ DEFINE_EVENT(scoutfs_server_client_count_class, scoutfs_server_client_down,
 	TP_ARGS(sb, node_id, nr_clients)
 );
 
+#define slt_symbolic(mode)						\
+	__print_symbolic(mode,					\
+		{ SLT_CLIENT,		"client" },	\
+		{ SLT_SERVER,		"server" },	\
+		{ SLT_GRANT,		"grant" },	\
+		{ SLT_INVALIDATE,	"invalidate" },	\
+		{ SLT_REQUEST,		"request" },	\
+		{ SLT_RESPONSE,		"response" })
+
+TRACE_EVENT(scoutfs_lock_message,
+	TP_PROTO(struct super_block *sb, int who, int what, int dir,
+		 u64 node_id, u64 net_id, struct scoutfs_net_lock *nl),
+
+	TP_ARGS(sb, who, what, dir, node_id, net_id, nl),
+
+	TP_STRUCT__entry(
+		__field(__u64, fsid)
+		__field(int, who)
+		__field(int, what)
+		__field(int, dir)
+		__field(__u64, node_id)
+		__field(__u64, net_id)
+		sk_trace_define(key)
+		__field(__u8, old_mode)
+		__field(__u8, new_mode)
+	),
+
+	TP_fast_assign(
+		__entry->fsid = FSID_ARG(sb);
+		__entry->who = who;
+		__entry->what = what;
+		__entry->dir = dir;
+		__entry->node_id = node_id;
+		__entry->net_id = net_id;
+		sk_trace_assign(key, &nl->key);
+		__entry->old_mode = nl->old_mode;
+		__entry->new_mode = nl->new_mode;
+	),
+
+	TP_printk("fsid "FSID_FMT" %s %s %s node_id %llu net_id %llu key "SK_FMT" old_mode %u new_mode %u",
+		  __entry->fsid, slt_symbolic(__entry->who),
+		  slt_symbolic(__entry->what), slt_symbolic(__entry->dir),
+		  __entry->node_id, __entry->net_id, sk_trace_args(key),
+		  __entry->old_mode, __entry->new_mode)
+);
+
 DECLARE_EVENT_CLASS(scoutfs_quorum_block_class,
 	TP_PROTO(struct super_block *sb, u64 io_blkno,
 		 struct scoutfs_quorum_block *blk),
diff --git a/kmod/src/server.c b/kmod/src/server.c
index b2e006f2..bf4047be 100644
--- a/kmod/src/server.c
+++ b/kmod/src/server.c
@@ -32,6 +32,7 @@
 #include "msg.h"
 #include "server.h"
 #include "net.h"
+#include "lock_server.h"
 #include "endian_swap.h"
 
 /*
@@ -1020,6 +1021,52 @@ static int server_statfs(struct super_block *sb,
 				    &nstatfs, sizeof(nstatfs));
 }
 
+static int server_lock(struct super_block *sb,
+		       struct scoutfs_net_connection *conn,
+		       u8 cmd, u64 id, void *arg, u16 arg_len)
+{
+	u64 node_id = scoutfs_net_client_node_id(conn);
+
+	if (arg_len != sizeof(struct scoutfs_net_lock))
+		return -EINVAL;
+
+	return scoutfs_lock_server_request(sb, node_id, id, arg);
+}
+
+static int lock_response(struct super_block *sb,
+			 struct scoutfs_net_connection *conn,
+			 void *resp, unsigned int resp_len,
+			 int error, void *data)
+{
+	u64 node_id = scoutfs_net_client_node_id(conn);
+
+	if (resp_len != sizeof(struct scoutfs_net_lock))
+		return -EINVAL;
+
+	return scoutfs_lock_server_response(sb, node_id, resp);
+}
+
+int scoutfs_server_lock_request(struct super_block *sb, u64 node_id,
+				struct scoutfs_net_lock *nl)
+{
+	struct server_info *server = SCOUTFS_SB(sb)->server_info;
+
+	return scoutfs_net_submit_request_node(sb, server->conn, node_id,
+					      SCOUTFS_NET_CMD_LOCK,
+					      nl, sizeof(*nl),
+					      lock_response, NULL, NULL);
+}
+
+int scoutfs_server_lock_response(struct super_block *sb, u64 node_id,
+				 u64 id, struct scoutfs_net_lock *nl)
+{
+	struct server_info *server = SCOUTFS_SB(sb)->server_info;
+
+	return scoutfs_net_response_node(sb, server->conn, node_id,
+					 SCOUTFS_NET_CMD_LOCK, id, 0,
+					 nl, sizeof(*nl));
+}
+
 /*
  * Process an incoming greeting request in the server from the client.
  * We try to send responses to failed greetings so that the sender can
@@ -1712,6 +1759,7 @@ static scoutfs_net_request_t server_req_funcs[] = {
 	[SCOUTFS_NET_CMD_GET_LAST_SEQ]		= server_get_last_seq,
 	[SCOUTFS_NET_CMD_GET_MANIFEST_ROOT]	= server_get_manifest_root,
 	[SCOUTFS_NET_CMD_STATFS]		= server_statfs,
+	[SCOUTFS_NET_CMD_LOCK]			= server_lock,
 };
 
 static void server_notify_up(struct super_block *sb,
@@ -1826,7 +1874,8 @@ static void scoutfs_server_worker(struct work_struct *work)
 
 	/* start up the server subsystems before accepting */
 	ret = scoutfs_btree_setup(sb) ?:
-	      scoutfs_manifest_setup(sb);
+	      scoutfs_manifest_setup(sb) ?:
+	      scoutfs_lock_server_setup(sb);
 	if (ret)
 		goto shutdown;
 
@@ -1856,6 +1905,7 @@ shutdown:
 	destroy_pending_frees(sb);
 	scoutfs_manifest_destroy(sb);
 	scoutfs_btree_destroy(sb);
+	scoutfs_lock_server_destroy(sb);
 
 	/* XXX these should be persistent and reclaimed during recovery */
 	list_for_each_entry_safe(ps, ps_tmp, &server->pending_seqs, head) {
diff --git a/kmod/src/server.h b/kmod/src/server.h
index 365469b1..f2c9b8e7 100644
--- a/kmod/src/server.h
+++ b/kmod/src/server.h
@@ -53,6 +53,11 @@ void scoutfs_init_ment_to_net(struct scoutfs_net_manifest_entry *net_ment,
 void scoutfs_init_ment_from_net(struct scoutfs_manifest_entry *ment,
 				struct scoutfs_net_manifest_entry *net_ment);
 
+int scoutfs_server_lock_request(struct super_block *sb, u64 node_id,
+				struct scoutfs_net_lock *nl);
+int scoutfs_server_lock_response(struct super_block *sb, u64 node_id,
+				 u64 id, struct scoutfs_net_lock *nl);
+
 int scoutfs_server_setup(struct super_block *sb);
 void scoutfs_server_destroy(struct super_block *sb);
 
diff --git a/kmod/src/super.h b/kmod/src/super.h
index 99944c8d..99671dab 100644
--- a/kmod/src/super.h
+++ b/kmod/src/super.h
@@ -16,6 +16,7 @@ struct compact_info;
 struct data_info;
 struct trans_info;
 struct lock_info;
+struct lock_server_info;
 struct client_info;
 struct server_info;
 struct inode_sb_info;
@@ -59,6 +60,7 @@ struct scoutfs_sb_info {
 
 	struct trans_info *trans_info;
 	struct lock_info *lock_info;
+	struct lock_server_info *lock_server_info;
 	struct client_info *client_info;
 	struct server_info *server_info;
 	struct sysfs_info *sfsinfo;