From b50de9019629d68379a112cb73a5fd78ed86da40 Mon Sep 17 00:00:00 2001
From: Zach Brown <zab@versity.com>
Date: Wed, 12 Apr 2017 16:58:07 -0700
Subject: [PATCH] Alloc inodes from pool from server

Inode allocation was always modifying the in-memory super block.  This
doesn't work when the server is solely responsible for modifying the
super blocks.  We add network messages to have mounts send a message to
the server to request inodes that they can use to satisfy allocation.

Signed-off-by: Zach Brown <zab@versity.com>
---
 kmod/src/format.h |  12 ++++-
 kmod/src/inode.c  | 128 ++++++++++++++++++++++++++++++++++++++++++----
 kmod/src/inode.h  |   5 +-
 kmod/src/net.c    |  85 +++++++++++++++++++++++++++++-
 kmod/src/net.h    |   1 +
 kmod/src/super.c  |   2 +
 kmod/src/super.h  |   2 +
 7 files changed, 221 insertions(+), 14 deletions(-)

diff --git a/kmod/src/format.h b/kmod/src/format.h
index 25cb3878..25663ac9 100644
--- a/kmod/src/format.h
+++ b/kmod/src/format.h
@@ -362,11 +362,21 @@ struct scoutfs_net_header {
 	__u8 type;
 	__u8 status;
 	__u8 data[0];
-};
+} __packed;
+
+/*
+ * When there's no more free inodes this will be sent with ino = ~0 and
+ * nr = 0.
+ */
+struct scoutfs_net_inode_alloc {
+	__le64 ino;
+	__le64 nr;
+} __packed;
 
 enum {
 	/* sends and receives a struct scoutfs_timeval */
 	SCOUTFS_NET_TRADE_TIME = 0,
+	SCOUTFS_NET_ALLOC_INODES,
 	SCOUTFS_NET_UNKNOWN,
 };
 
diff --git a/kmod/src/inode.c b/kmod/src/inode.c
index e34441de..560f1d2a 100644
--- a/kmod/src/inode.c
+++ b/kmod/src/inode.c
@@ -17,6 +17,7 @@
 #include <linux/xattr.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
+#include <linux/sched.h>
 
 #include "format.h"
 #include "super.h"
@@ -30,6 +31,7 @@
 #include "msg.h"
 #include "kvec.h"
 #include "item.h"
+#include "net.h"
 
 /*
  * XXX
@@ -37,6 +39,14 @@
  *  - use inode item value lengths for forward/back compat
  */
 
+struct free_ino_pool {
+	wait_queue_head_t waitq;
+	spinlock_t lock;
+	u64 ino;
+	u64 nr;
+	bool in_flight;
+};
+
 static struct kmem_cache *scoutfs_inode_cachep;
 
 static void scoutfs_inode_ctor(void *obj)
@@ -359,24 +369,98 @@ u64 scoutfs_last_ino(struct super_block *sb)
 	return last;
 }
 
+/*
+ * Network replies refill the pool, providing ino = ~0ULL nr = 0 when
+ * there's no more inodes (which should never happen in practice.)
+ */
+void scoutfs_inode_fill_pool(struct super_block *sb, u64 ino, u64 nr)
+{
+	struct free_ino_pool *pool = SCOUTFS_SB(sb)->free_ino_pool;
+
+	trace_printk("filling ino %llu nr %llu\n", ino, nr);
+
+	spin_lock(&pool->lock);
+
+	pool->ino = ino;
+	pool->nr = nr;
+	pool->in_flight = false;
+
+	spin_unlock(&pool->lock);
+
+	wake_up(&pool->waitq);
+}
+
+static bool pool_in_flight(struct free_ino_pool *pool)
+{
+	bool in_flight;
+
+	spin_lock(&pool->lock);
+	in_flight = pool->in_flight;
+	spin_unlock(&pool->lock);
+
+	return in_flight;
+}
+
+/*
+ * We have a pool of free inodes given to us by the server.  If it
+ * empties we only ever have one request for new inodes in flight.  The
+ * net layer calls us when it gets a reply.  If there's no more inodes
+ * we'll get ino == ~0 and nr == 0.
+ */
 static int alloc_ino(struct super_block *sb, u64 *ino)
 {
-	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
-	struct scoutfs_super_block *super = &sbi->super;
+	struct free_ino_pool *pool = SCOUTFS_SB(sb)->free_ino_pool;
+	bool request;
 	int ret;
 
-	spin_lock(&sbi->next_ino_lock);
+	*ino = 0;
 
-	if (super->next_ino == 0) {
-		ret = -ENOSPC;
-	} else {
-		*ino = le64_to_cpu(super->next_ino);
-		le64_add_cpu(&super->next_ino, 1);
-		ret = 0;
+	spin_lock(&pool->lock);
+
+	while (pool->nr == 0 && pool->ino != ~0ULL) {
+		if (pool->in_flight) {
+			request = false;
+		} else {
+			pool->in_flight = true;
+			request = true;
+		}
+
+		spin_unlock(&pool->lock);
+
+		if (request) {
+			ret = scoutfs_net_alloc_inodes(sb);
+			if (ret) {
+				spin_lock(&pool->lock);
+				pool->in_flight = false;
+				spin_unlock(&pool->lock);
+				wake_up(&pool->waitq);
+				goto out;
+			}
+		}
+
+		ret = wait_event_interruptible(pool->waitq,
+					       !pool_in_flight(pool));
+		if (ret)
+			goto out;
+
+		spin_lock(&pool->lock);
 	}
 
-	spin_unlock(&sbi->next_ino_lock);
+	if (pool->nr == 0) {
+		*ino = 0;
+		ret = -ENOSPC;
+	} else {
+		*ino = pool->ino++;
+		pool->nr--;
+		ret = 0;
 
+	}
+
+	spin_unlock(&pool->lock);
+
+out:
+	trace_printk("ret %d ino %llu pool ino %llu nr %llu req %u (racey)\n",
+		     ret, *ino, pool->ino, pool->nr, pool->in_flight);
 	return ret;
 }
 
@@ -633,6 +717,30 @@ int scoutfs_orphan_inode(struct inode *inode)
 	return ret;
 }
 
+int scoutfs_inode_setup(struct super_block *sb)
+{
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+	struct free_ino_pool *pool;
+
+	pool = kzalloc(sizeof(struct free_ino_pool), GFP_KERNEL);
+	if (!pool)
+		return -ENOMEM;
+
+	init_waitqueue_head(&pool->waitq);
+	spin_lock_init(&pool->lock);
+
+	sbi->free_ino_pool = pool;
+
+	return 0;
+}
+
+void scoutfs_inode_destroy(struct super_block *sb)
+{
+	struct free_ino_pool *pool = SCOUTFS_SB(sb)->free_ino_pool;
+
+	kfree(pool);
+}
+
 void scoutfs_inode_exit(void)
 {
 	if (scoutfs_inode_cachep) {
diff --git a/kmod/src/inode.h b/kmod/src/inode.h
index 6dcb03d8..b282960a 100644
--- a/kmod/src/inode.h
+++ b/kmod/src/inode.h
@@ -41,6 +41,7 @@ struct inode *scoutfs_iget(struct super_block *sb, u64 ino);
 int scoutfs_dirty_inode_item(struct inode *inode);
 void scoutfs_dirty_inode(struct inode *inode, int flags);
 void scoutfs_update_inode_item(struct inode *inode);
+void scoutfs_inode_fill_pool(struct super_block *sb, u64 ino, u64 nr);
 struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir,
 				umode_t mode, dev_t rdev);
 void scoutfs_inode_inc_data_version(struct inode *inode);
@@ -53,7 +54,7 @@ u64 scoutfs_last_ino(struct super_block *sb);
 void scoutfs_inode_exit(void);
 int scoutfs_inode_init(void);
 
-int scoutfs_item_setup(struct super_block *sb);
-void scoutfs_item_destroy(struct super_block *sb);
+int scoutfs_inode_setup(struct super_block *sb);
+void scoutfs_inode_destroy(struct super_block *sb);
 
 #endif
diff --git a/kmod/src/net.c b/kmod/src/net.c
index 52dc18aa..6458c40e 100644
--- a/kmod/src/net.c
+++ b/kmod/src/net.c
@@ -22,6 +22,7 @@
 #include "format.h"
 #include "net.h"
 #include "counters.h"
+#include "inode.h"
 #include "scoutfs_trace.h"
 
 /*
@@ -253,6 +254,47 @@ static struct send_buf *alloc_sbuf(unsigned data_len)
 	return sbuf;
 }
 
+/*
+ * XXX should this call into inodes?  not sure about the layering here.
+ */
+static struct send_buf *process_alloc_inodes(struct super_block *sb,
+					     void *req, int req_len)
+{
+	struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
+	struct scoutfs_super_block *super = &sbi->super;
+	struct scoutfs_net_inode_alloc *ial;
+	struct send_buf *sbuf;
+	int ret;
+	u64 ino;
+	u64 nr;
+
+	if (req_len != 0)
+		return ERR_PTR(-EINVAL);
+
+	sbuf = alloc_sbuf(sizeof(struct scoutfs_net_inode_alloc));
+	if (!sbuf)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock(&sbi->next_ino_lock);
+
+	ino = le64_to_cpu(super->next_ino);
+	nr = min(100000ULL, ~0ULL - ino);
+	le64_add_cpu(&super->next_ino, nr);
+
+	spin_unlock(&sbi->next_ino_lock);
+
+	/* XXX think about server ring commits */
+	ret = 0; //sync_or_something();
+
+	ial = (void *)sbuf->nh->data;
+	ial->ino = cpu_to_le64(ino);
+	ial->nr = cpu_to_le64(nr);
+
+	sbuf->nh->status = SCOUTFS_NET_STATUS_SUCCESS;
+
+	return sbuf;
+}
+
 /*
  * Log the time in the request and reply with our current time.
  */
@@ -300,6 +342,9 @@ static int process_request(struct net_info *nti, struct recv_buf *rbuf)
 	if (rbuf->nh->type == SCOUTFS_NET_TRADE_TIME)
 		sbuf = process_trade_time(sb, (void *)rbuf->nh->data,
 					  data_len);
+	else if (rbuf->nh->type == SCOUTFS_NET_ALLOC_INODES)
+		sbuf = process_alloc_inodes(sb, (void *)rbuf->nh->data,
+					    data_len);
 	else
 		sbuf = ERR_PTR(-EINVAL);
 
@@ -702,7 +747,8 @@ static int add_send_buf(struct super_block *sb, int type, void *data,
 
 	nh = sbuf->nh;
 	nh->type = type;
-	memcpy(nh->data, data, data_len);
+	if (data_len)
+		memcpy(nh->data, data, data_len);
 
 	mutex_lock(&nti->mutex);
 
@@ -721,6 +767,43 @@ static int add_send_buf(struct super_block *sb, int type, void *data,
 	return 0;
 }
 
+static int alloc_inodes_reply(struct super_block *sb, void *reply, int ret)
+{
+	struct scoutfs_net_inode_alloc *ial = reply;
+	u64 ino;
+	u64 nr;
+
+	if (ret != sizeof(*ial)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ino = le64_to_cpu(ial->ino);
+	nr = le64_to_cpu(ial->nr);
+
+	/* catch wrapping */
+	if (ino + nr < ino) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	/* XXX compare to greatest inode we've seen? */
+
+	ret = 0;
+out:
+	if (ret < 0)
+		scoutfs_inode_fill_pool(sb, 0, 0);
+	else
+		scoutfs_inode_fill_pool(sb, ino, nr);
+	return ret;
+}
+
+int scoutfs_net_alloc_inodes(struct super_block *sb)
+{
+	return add_send_buf(sb, SCOUTFS_NET_ALLOC_INODES, NULL, 0,
+			    alloc_inodes_reply);
+}
+
 static int trade_time_reply(struct super_block *sb, void *reply, int ret)
 {
 	struct scoutfs_timespec *ts = reply;
diff --git a/kmod/src/net.h b/kmod/src/net.h
index 382b686e..d58c23c9 100644
--- a/kmod/src/net.h
+++ b/kmod/src/net.h
@@ -2,6 +2,7 @@
 #define _SCOUTFS_NET_H_
 
 int scoutfs_net_trade_time(struct super_block *sb);
+int scoutfs_net_alloc_inodes(struct super_block *sb);
 
 int scoutfs_net_setup(struct super_block *sb);
 void scoutfs_net_destroy(struct super_block *sb);
diff --git a/kmod/src/super.c b/kmod/src/super.c
index b15c9339..50daa887 100644
--- a/kmod/src/super.c
+++ b/kmod/src/super.c
@@ -215,6 +215,7 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent)
 	      scoutfs_seg_setup(sb) ?:
 	      scoutfs_manifest_setup(sb) ?:
 	      scoutfs_item_setup(sb) ?:
+	      scoutfs_inode_setup(sb) ?:
 	      scoutfs_data_setup(sb) ?:
 	      scoutfs_alloc_setup(sb) ?:
 	      scoutfs_compact_setup(sb) ?:
@@ -259,6 +260,7 @@ static void scoutfs_kill_sb(struct super_block *sb)
 		scoutfs_compact_destroy(sb);
 		scoutfs_shutdown_trans(sb);
 		scoutfs_data_destroy(sb);
+		scoutfs_inode_destroy(sb);
 		scoutfs_item_destroy(sb);
 		scoutfs_alloc_destroy(sb);
 		scoutfs_manifest_destroy(sb);
diff --git a/kmod/src/super.h b/kmod/src/super.h
index 458345fd..d3e1237a 100644
--- a/kmod/src/super.h
+++ b/kmod/src/super.h
@@ -14,6 +14,7 @@ struct compact_info;
 struct data_info;
 struct lock_info;
 struct net_info;
+struct free_ino_pool;
 
 struct scoutfs_sb_info {
 	struct super_block *sb;
@@ -28,6 +29,7 @@ struct scoutfs_sb_info {
 	struct seg_alloc *seg_alloc;
 	struct compact_info *compact_info;
 	struct data_info *data_info;
+	struct free_ino_pool *free_ino_pool;
 
 	atomic_t trans_holds;
 	wait_queue_head_t trans_hold_wq;