From 5c7ba5ed395f8953bef81c5e8468fe066d3785d4 Mon Sep 17 00:00:00 2001 From: Zach Brown Date: Fri, 1 Jul 2016 21:03:40 -0700 Subject: [PATCH] scoutfs: remove wrlock and roster These were interesting experiments in how to manage locks across the cluster but we'll be going in a more flexible direction. Signed-off-by: Zach Brown --- kmod/src/Makefile | 4 +- kmod/src/dir.c | 20 +- kmod/src/filerw.c | 12 +- kmod/src/format.h | 2 - kmod/src/inode.c | 72 +--- kmod/src/inode.h | 4 +- kmod/src/roster.c | 159 ------- kmod/src/roster.h | 14 - kmod/src/super.c | 7 - kmod/src/super.h | 8 - kmod/src/wire.h | 36 -- kmod/src/wrlock.c | 1000 --------------------------------------------- kmod/src/wrlock.h | 29 -- 13 files changed, 29 insertions(+), 1338 deletions(-) delete mode 100644 kmod/src/roster.c delete mode 100644 kmod/src/roster.h delete mode 100644 kmod/src/wire.h delete mode 100644 kmod/src/wrlock.c delete mode 100644 kmod/src/wrlock.h diff --git a/kmod/src/Makefile b/kmod/src/Makefile index 01a96cdf..62aacf23 100644 --- a/kmod/src/Makefile +++ b/kmod/src/Makefile @@ -11,7 +11,7 @@ CFLAGS_scoutfs_trace.o = -I$(src) # define_trace.h double include scoutfs-y += first.o scoutfs-y += block.o btree.o buddy.o counters.o crc.o dir.o filerw.o \ - inode.o ioctl.o msg.o roster.o scoutfs_trace.o super.o trace.o \ - trans.o treap.o wrlock.o + inode.o ioctl.o msg.o scoutfs_trace.o super.o trace.o trans.o \ + treap.o scoutfs-y += last.o diff --git a/kmod/src/dir.c b/kmod/src/dir.c index c275d7f7..730cc7b2 100644 --- a/kmod/src/dir.c +++ b/kmod/src/dir.c @@ -22,7 +22,7 @@ #include "key.h" #include "super.h" #include "btree.h" -#include "wrlock.h" +#include "trans.h" /* * Directory entries are stored in entries with offsets calculated from @@ -310,9 +310,7 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, struct scoutfs_key first; struct scoutfs_key last; struct scoutfs_key key; - DECLARE_SCOUTFS_WRLOCK_HELD(held); int bytes; - u64 ino; int ret; u64 h; @@ -323,11 +321,7 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, if (dentry->d_name.len > SCOUTFS_NAME_LEN) return -ENAMETOOLONG; - ret = scoutfs_alloc_ino(sb, &ino); - if (ret) - return ret; - - ret = scoutfs_wrlock_lock(sb, &held, 2, scoutfs_ino(dir), ino); + ret = scoutfs_hold_trans(sb); if (ret) return ret; @@ -335,7 +329,7 @@ static int scoutfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, if (ret) goto out; - inode = scoutfs_new_inode(sb, dir, ino, mode, rdev); + inode = scoutfs_new_inode(sb, dir, mode, rdev); if (IS_ERR(inode)) { ret = PTR_ERR(inode); goto out; @@ -392,7 +386,7 @@ out: /* XXX delete the inode item here */ if (ret && !IS_ERR_OR_NULL(inode)) iput(inode); - scoutfs_wrlock_unlock(sb, &held); + scoutfs_release_trans(sb); return ret; } @@ -417,7 +411,6 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry) struct super_block *sb = dir->i_sb; struct inode *inode = dentry->d_inode; struct timespec ts = current_kernel_time(); - DECLARE_SCOUTFS_WRLOCK_HELD(held); struct dentry_info *di; struct scoutfs_key key; int ret = 0; @@ -429,8 +422,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry) if (S_ISDIR(inode->i_mode) && i_size_read(inode)) return -ENOTEMPTY; - ret = scoutfs_wrlock_lock(sb, &held, 2, scoutfs_ino(dir), - scoutfs_ino(inode)); + ret = scoutfs_hold_trans(sb); if (ret) return ret; @@ -459,7 +451,7 @@ static int scoutfs_unlink(struct inode *dir, struct dentry *dentry) scoutfs_update_inode_item(dir); out: - scoutfs_wrlock_unlock(sb, &held); + scoutfs_release_trans(sb); return ret; } diff --git a/kmod/src/filerw.c b/kmod/src/filerw.c index 2082d17e..a6e75fc7 100644 --- a/kmod/src/filerw.c +++ b/kmod/src/filerw.c @@ -18,7 +18,7 @@ #include "inode.h" #include "key.h" #include "filerw.h" -#include "wrlock.h" +#include "trans.h" #include "scoutfs_trace.h" #include "btree.h" @@ -130,7 +130,6 @@ static int scoutfs_writepage(struct page *page, struct writeback_control *wbc) { struct inode *inode = page->mapping->host; DECLARE_SCOUTFS_BTREE_CURSOR(curs); - DECLARE_SCOUTFS_WRLOCK_HELD(held); struct super_block *sb = inode->i_sb; struct scoutfs_key key; struct data_region dr; @@ -140,7 +139,7 @@ static int scoutfs_writepage(struct page *page, struct writeback_control *wbc) set_page_writeback(page); - ret = scoutfs_wrlock_lock(sb, &held, 1, scoutfs_ino(inode)); + ret = scoutfs_hold_trans(sb); if (ret) goto out; @@ -162,7 +161,7 @@ static int scoutfs_writepage(struct page *page, struct writeback_control *wbc) } scoutfs_btree_release(&curs); - scoutfs_wrlock_unlock(sb, &held); + scoutfs_release_trans(sb); out: if (ret) { SetPageError(page); @@ -199,7 +198,6 @@ static int scoutfs_write_end(struct file *file, struct address_space *mapping, { struct inode *inode = mapping->host; struct super_block *sb = inode->i_sb; - DECLARE_SCOUTFS_WRLOCK_HELD(held); unsigned off; trace_scoutfs_write_end(scoutfs_ino(inode), pos, len, copied); @@ -220,10 +218,10 @@ static int scoutfs_write_end(struct file *file, struct address_space *mapping, * up the robust metadata support that's needed to do a * good job with the data pats. */ - if (!scoutfs_wrlock_lock(sb, &held, 1, scoutfs_ino(inode))) { + if (!scoutfs_hold_trans(sb)) { if (!scoutfs_dirty_inode_item(inode)) scoutfs_update_inode_item(inode); - scoutfs_wrlock_unlock(sb, &held); + scoutfs_release_trans(sb); } } diff --git a/kmod/src/format.h b/kmod/src/format.h index 5c8ecb13..5deca747 100644 --- a/kmod/src/format.h +++ b/kmod/src/format.h @@ -159,8 +159,6 @@ struct scoutfs_super_block { } __packed; #define SCOUTFS_ROOT_INO 1 -#define SCOUTFS_INO_BATCH_SHIFT 20 -#define SCOUTFS_INO_BATCH (1 << SCOUTFS_INO_BATCH_SHIFT) struct scoutfs_timespec { __le64 sec; diff --git a/kmod/src/inode.c b/kmod/src/inode.c index a9bc7010..0543c991 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -22,7 +22,6 @@ #include "btree.h" #include "dir.h" #include "filerw.h" -#include "wrlock.h" #include "scoutfs_trace.h" /* @@ -248,69 +247,24 @@ void scoutfs_update_inode_item(struct inode *inode) trace_scoutfs_update_inode(inode); } -/* - * This will need to try and find a mostly idle shard. For now we only - * have one :). - */ -static int get_next_ino_batch(struct super_block *sb) +static int alloc_ino(struct super_block *sb, u64 *ino) { struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - DECLARE_SCOUTFS_WRLOCK_HELD(held); + struct scoutfs_super_block *super = &sbi->super; int ret; - ret = scoutfs_wrlock_lock(sb, &held, 1, 1); - if (ret) - return ret; - spin_lock(&sbi->next_ino_lock); - if (!sbi->next_ino_count) { - sbi->next_ino = le64_to_cpu(sbi->super.next_ino); - if (sbi->next_ino + SCOUTFS_INO_BATCH < sbi->next_ino) { - ret = -ENOSPC; - } else { - le64_add_cpu(&sbi->super.next_ino, SCOUTFS_INO_BATCH); - sbi->next_ino_count = SCOUTFS_INO_BATCH; - ret = 0; - } + + if (super->next_ino == 0) { + ret = -ENOSPC; + } else { + *ino = le64_to_cpu(super->next_ino); + le64_add_cpu(&super->next_ino, 1); + ret = 0; } + spin_unlock(&sbi->next_ino_lock); - scoutfs_wrlock_unlock(sb, &held); - - return ret; -} - -/* - * Inode allocation is at the core of supporting parallel creation. - * Each mount needs to allocate from a pool of free inode numbers which - * map to a shard that it has locked. - */ -int scoutfs_alloc_ino(struct super_block *sb, u64 *ino) -{ - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - int ret; - - do { - /* don't really care if this is racey */ - if (!sbi->next_ino_count) { - ret = get_next_ino_batch(sb); - if (ret) - break; - } - - spin_lock(&sbi->next_ino_lock); - - if (sbi->next_ino_count) { - *ino = sbi->next_ino++; - sbi->next_ino_count--; - ret = 0; - } else { - ret = -EAGAIN; - } - spin_unlock(&sbi->next_ino_lock); - - } while (ret == -EAGAIN); - return ret; } @@ -319,14 +273,18 @@ int scoutfs_alloc_ino(struct super_block *sb, u64 *ino) * creating links to it and updating it. @dir can be null. */ struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, - u64 ino, umode_t mode, dev_t rdev) + umode_t mode, dev_t rdev) { DECLARE_SCOUTFS_BTREE_CURSOR(curs); struct scoutfs_inode_info *ci; struct scoutfs_key key; struct inode *inode; + u64 ino; int ret; + ret = alloc_ino(sb, &ino); + if (ret) + return ERR_PTR(ret); inode = new_inode(sb); if (!inode) diff --git a/kmod/src/inode.h b/kmod/src/inode.h index 5dfc6b1a..f2846201 100644 --- a/kmod/src/inode.h +++ b/kmod/src/inode.h @@ -18,8 +18,6 @@ static inline u64 scoutfs_ino(struct inode *inode) return SCOUTFS_I(inode)->ino; } -int scoutfs_alloc_ino(struct super_block *sb, u64 *ino); - struct inode *scoutfs_alloc_inode(struct super_block *sb); void scoutfs_destroy_inode(struct inode *inode); @@ -27,7 +25,7 @@ struct inode *scoutfs_iget(struct super_block *sb, u64 ino); int scoutfs_dirty_inode_item(struct inode *inode); void scoutfs_update_inode_item(struct inode *inode); struct inode *scoutfs_new_inode(struct super_block *sb, struct inode *dir, - u64 ino, umode_t mode, dev_t rdev); + umode_t mode, dev_t rdev); void scoutfs_inode_exit(void); int scoutfs_inode_init(void); diff --git a/kmod/src/roster.c b/kmod/src/roster.c deleted file mode 100644 index e05b7160..00000000 --- a/kmod/src/roster.c +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Copyright (C) 2016 Versity Software, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#include -#include - -#include "super.h" -#include "wire.h" -#include "wrlock.h" -#include "roster.h" - -/* - * The roster tracks all the mounts on nodes that are working with a - * scoutfs volume. - * - * This trivial first pass lets us test multiple mounts on the same - * node. It'll get a lot more involved as all the nodes manage a roster - * in the shared device. - */ -static DEFINE_MUTEX(roster_mutex); -static u64 roster_next_id = 1; -static LIST_HEAD(roster_list); - -/* - * A new mount is adding itself to the roster. It gets a new increasing - * id assigned and all the other mounts are told that it's now a member. - */ -int scoutfs_roster_add(struct super_block *sb) -{ - struct scoutfs_sb_info *us = SCOUTFS_SB(sb); - struct scoutfs_sb_info *them; - - mutex_lock(&roster_mutex); - list_add_tail(&us->roster_head, &roster_list); - us->roster_id = roster_next_id++; - - list_for_each_entry(them, &roster_list, roster_head) { - if (us->roster_id != them->roster_id) { - scoutfs_wrlock_roster_update(them->sb, us->roster_id, - true); - } - } - - mutex_unlock(&roster_mutex); - - return 0; -} - -/* - * A mount is removing itself to the roster. All the other remaining - * mounts are told that it has gone away. - * - * This is safe to call without having called _add. - */ -void scoutfs_roster_remove(struct super_block *sb) -{ - struct scoutfs_sb_info *us = SCOUTFS_SB(sb); - struct scoutfs_sb_info *them; - - mutex_lock(&roster_mutex); - - if (!list_empty(&us->roster_head)) { - list_del_init(&us->roster_head); - - list_for_each_entry(them, &roster_list, roster_head) - scoutfs_wrlock_roster_update(them->sb, us->roster_id, - false); - } - - mutex_unlock(&roster_mutex); -} - -static int process_message(struct super_block *sb, u64 peer_id, - struct scoutfs_message *msg) -{ - int ret = 0; - - switch (msg->cmd) { - case SCOUTFS_MSG_WRLOCK_REQUEST: - ret = scoutfs_wrlock_process_request(sb, peer_id, - &msg->request); - break; - case SCOUTFS_MSG_WRLOCK_GRANT: - scoutfs_wrlock_process_grant(sb, &msg->grant); - ret = 0; - break; - default: - ret = -EINVAL; - } - - return ret; -} - -/* - * Send a message to a specific member of the roster identified by its - * id. - * - * We don't actually send anything, we call directly into the receivers - * message processing path with the caller's message. - */ -void scoutfs_roster_send(struct super_block *sb, u64 peer_id, - struct scoutfs_message *msg) -{ - struct scoutfs_sb_info *us = SCOUTFS_SB(sb); - struct scoutfs_sb_info *them; - int ret; - - mutex_lock(&roster_mutex); - - list_for_each_entry(them, &roster_list, roster_head) { - if (them->roster_id == peer_id) { - ret = process_message(them->sb, us->roster_id, msg); - break; - } - } - - /* XXX errors? */ - - mutex_unlock(&roster_mutex); -} - -/* - * Send a message to all of the current members which have an id greater - * than the caller's specified id. - * - * We don't actually send anything, we call directly into the receivers - * message processing path with the caller's message. - */ -void scoutfs_roster_broadcast(struct super_block *sb, u64 since_id, - struct scoutfs_message *msg) -{ - struct scoutfs_sb_info *us = SCOUTFS_SB(sb); - struct scoutfs_sb_info *them; - int ret; - - mutex_lock(&roster_mutex); - - list_for_each_entry(them, &roster_list, roster_head) { - if (us->roster_id != them->roster_id && - them->roster_id > since_id) { - ret = process_message(them->sb, us->roster_id, msg); - if (ret) - break; - } - } - - /* XXX errors? */ - - mutex_unlock(&roster_mutex); -} diff --git a/kmod/src/roster.h b/kmod/src/roster.h deleted file mode 100644 index e78a7cc9..00000000 --- a/kmod/src/roster.h +++ /dev/null @@ -1,14 +0,0 @@ -#ifndef _SCOUTFS_ROSTER_H_ -#define _SCOUTFS_ROSTER_H_ - -struct scoutfs_message; - -int scoutfs_roster_add(struct super_block *sb); -void scoutfs_roster_remove(struct super_block *sb); - -void scoutfs_roster_send(struct super_block *sb, u64 peer_id, - struct scoutfs_message *msg); -void scoutfs_roster_broadcast(struct super_block *sb, u64 since_id, - struct scoutfs_message *msg); - -#endif diff --git a/kmod/src/super.c b/kmod/src/super.c index 5e409db0..a90caf75 100644 --- a/kmod/src/super.c +++ b/kmod/src/super.c @@ -26,8 +26,6 @@ #include "block.h" #include "counters.h" #include "trans.h" -#include "roster.h" -#include "wrlock.h" #include "trace.h" #include "scoutfs_trace.h" @@ -162,7 +160,6 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent) spin_lock_init(&sbi->trans_write_lock); INIT_WORK(&sbi->trans_write_work, scoutfs_trans_write_func); init_waitqueue_head(&sbi->trans_write_wq); - INIT_LIST_HEAD(&sbi->roster_head); sbi->ctr = atomic64_inc_return(&scoutfs_sb_ctr); @@ -174,8 +171,6 @@ static int scoutfs_fill_super(struct super_block *sb, void *data, int silent) ret = scoutfs_setup_counters(sb) ?: read_supers(sb) ?: scoutfs_setup_trans(sb) ?: - scoutfs_wrlock_setup(sb) ?: - scoutfs_roster_add(sb) ?: scoutfs_read_buddy_chunks(sb); if (ret) return ret; @@ -206,8 +201,6 @@ static void scoutfs_kill_sb(struct super_block *sb) kill_block_super(sb); if (sbi) { - scoutfs_roster_remove(sb); - scoutfs_wrlock_teardown(sb); scoutfs_shutdown_trans(sb); scoutfs_destroy_counters(sb); if (sbi->kset) diff --git a/kmod/src/super.h b/kmod/src/super.h index c99a77d7..d588f36a 100644 --- a/kmod/src/super.h +++ b/kmod/src/super.h @@ -9,7 +9,6 @@ struct scoutfs_counters; struct buddy_alloc; -struct wrlock_context; struct scoutfs_sb_info { struct super_block *sb; @@ -19,8 +18,6 @@ struct scoutfs_sb_info { struct scoutfs_super_block super; spinlock_t next_ino_lock; - u64 next_ino; - u64 next_ino_count; spinlock_t block_lock; struct radix_tree_root block_radix; @@ -48,11 +45,6 @@ struct scoutfs_sb_info { struct kset *kset; struct scoutfs_counters *counters; - - struct list_head roster_head; - u64 roster_id; - - struct wrlock_context *wrlock_context; }; static inline struct scoutfs_sb_info *SCOUTFS_SB(struct super_block *sb) diff --git a/kmod/src/wire.h b/kmod/src/wire.h deleted file mode 100644 index 474703bf..00000000 --- a/kmod/src/wire.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef _SCOUTFS_WIRE_H_ -#define _SCOUTFS_WIRE_H_ - -/* an arbitrarily small number to keep things reasonable */ -#define SCOUTFS_WRLOCK_MAX_SHARDS 5 - -enum { - SCOUTFS_MSG_WRLOCK_REQUEST = 1, - SCOUTFS_MSG_WRLOCK_GRANT = 2, -}; - -struct scoutfs_wrlock_id { - __le64 counter; - __le32 jitter; -} __packed; - -struct scoutfs_wrlock_request { - struct scoutfs_wrlock_id wid; - u8 nr_shards; - __le32 shards[SCOUTFS_WRLOCK_MAX_SHARDS]; -} __packed; - -struct scoutfs_wrlock_grant { - struct scoutfs_wrlock_id wid; -} __packed; - -struct scoutfs_message { - u8 cmd; - u8 len; - union { - struct scoutfs_wrlock_grant grant; - struct scoutfs_wrlock_request request; - } __packed; -} __packed; - -#endif diff --git a/kmod/src/wrlock.c b/kmod/src/wrlock.c deleted file mode 100644 index 9bb7b06c..00000000 --- a/kmod/src/wrlock.c +++ /dev/null @@ -1,1000 +0,0 @@ -/* - * Copyright (C) 2016 Versity Software, Inc. All rights reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public - * License v2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ -#include -#include -#include -#include -#include - -#include "super.h" -#include "wire.h" -#include "wrlock.h" -#include "trans.h" -#include "roster.h" -#include "trace.h" - -/* - * The persistent structures in each shard in a scoutfs volume can only - * have one writer at a time. Mounts send messages around to request - * and grant locks on each shard. (Reads are fully unlocked and have - * enough metadata to detect and retry reads that raced and were - * inconsistent.) - * - * When a local task needs to lock some shards it sends a request to all - * the other mounts listing all the shards. If the receiving mounts - * don't have any of the shards locked they send a grant reply. - * - * Each mount has a granted lock and a tree of blocked lock entries for - * every shard. Local lock attempts and remote requests are always - * inserted into the tree. The first entry in the tree can be unblocked - * if the granted lock in the shard doesn't block it. When local - * entries are granted the locking task is allowed to start modifying - * the shard. While they're modifying the shard their granted locks - * block remote locks from being sent replies. Once the writers under - * the lock are done the grant can be removed and the remote entry is - * sent a reply and freed. - * - * Processes can try to lock multiple shards so entries can be present - * in the blocking tree and granted pointer on multiple shards. They're - * only unblocked when they're the first entry in all their shards' - * blocking trees. - * - * The entries have to be very carefully ordered in the trees on all the - * mounts to avoid locking cycle deadlocks. We can't have two mounts - * race to lock the same shard and both have their local ungranted entry - * blocking the remote's request entry. To address this entries aren't - * sorted in the tree by time. They're sorted by an id. This ensures - * that all of the entries will have the same blocking tree on all the - * mounts and so will always be processed in the same order. - * - * Entries are created on a mount when a task tries to lock some shards. - * The id is constructed from a counter, a random number, and the - * mount's unique id. The counter is one greater than the greatest - * counter ever seen in received lock requests. This ensures that lock - * attempts that don't race are granted in order. But attempts can race - * so entries can have the same counter. Next they're sorted by a - * random number to ensure a kind of fairness. Then if the mounts are - * unlucky enough to chose the same number we fall back to sorting by - * the unique mount id. - * - * The roster determines the set of mounts that are participating in the - * locking protocol. We have to carefully manage the entries as mounts - * join and leave the cluster. When mounts join we send them all our - * blocking locks and if they leave we remove their entries and resend - * all our blocked entries to everyone because we don't track which - * mounts had send grants to which local blocked entries, or not. - * - * XXX - * - sync if we revoke a local grant before we send a reply - */ - -/* - * Every mount tracks their write locking state for all the shards in - * the volume. - */ -struct wrlock_context { - struct super_block *sb; - wait_queue_head_t waitq; - spinlock_t lock; - - struct rb_root id_root; - struct list_head mark_list; - struct list_head send_list; - struct workqueue_struct *send_workq; - struct work_struct send_work; - - /* private copies of roster state used under the lock */ - long grants_needed; - u64 last_peer_id; - - u64 next_id_counter; - - /* XXX redundant in the super? only one for now ;) */ - u32 nr_shards; - struct wrlock_context_shard { - struct list_head mark_head; - struct rb_root blocked_root; - struct wrlock_entry *granted; - } shards[0]; -}; - -/* a native version of the wire wrlock_id that includes the roster id */ -struct wrlock_id { - u64 counter; - u32 jitter; - u64 roster_id; -}; - -/* - * Entries represent an attempt to lock multiple shards. - * - * Local entries exist on the context that initiated the request. They - * count the number of grant replies and then count the number of - * writers actively modifying the shards under the lock. - * - * Remote entries only exist while other entries are before them in the - * blocked trees in any of their shards. Once they're first in all the - * blocked trees a grant message is sent and they're freed. - */ -struct wrlock_entry { - struct rb_node id_node; - struct list_head send_head; - - /* local lock tasks wait for the entry to be granted */ - struct task_struct *waiter; - struct scoutfs_wrlock_held *held; - long grants; - long writers; - - /* tells roster broadcast who to send to */ - u64 last_peer_id; - struct wrlock_id id; - - u8 nr_shards; - struct wrlock_entry_shard { - struct rb_node blocked_node; - u32 shd; - u8 index; - } shards[SCOUTFS_WRLOCK_MAX_SHARDS]; -}; - -#define ENTF "ent id %llu.%llu.%llu" -#define ENTA(ent) ent->id.counter, ent->id.jitter, ent->id.roster_id - -static struct wrlock_entry *ent_from_blocked_node(struct rb_node *node) -{ - struct wrlock_entry_shard *shard; - - shard = container_of(node, struct wrlock_entry_shard, - blocked_node); - return container_of(shard, struct wrlock_entry, - shards[shard->index]); -} - -/* Return the first blocked entry */ -static struct wrlock_entry *blocked_ent(struct wrlock_context_shard *shard) -{ - struct rb_node *node = rb_first(&shard->blocked_root); - - return node ? ent_from_blocked_node(node) : NULL; -} - -static int cmp_u64s(u64 a, u64 b) -{ - return a < b ? -1 : a > b ? 1 : 0; -} - -static int cmp_u32s(u32 a, u32 b) -{ - return a < b ? -1 : a > b ? 1 : 0; -} - -static int cmp_ids(struct wrlock_id *a, struct wrlock_id *b) -{ - return cmp_u64s(a->counter, b->counter) ?: - cmp_u32s(a->jitter, b->jitter) ?: - cmp_u64s(a->roster_id, b->roster_id); -} - -static void insert_ent_shard(struct rb_root *root, struct wrlock_entry *ins, - struct rb_node *ins_node) -{ - struct rb_node **node = &root->rb_node; - struct rb_node *parent = NULL; - struct wrlock_entry *ent; - - while (*node) { - parent = *node; - ent = ent_from_blocked_node(*node); - - if (cmp_ids(&ins->id, &ent->id) < 0) - node = &(*node)->rb_left; - else - node = &(*node)->rb_right; - } - - rb_link_node(ins_node, parent, node); - rb_insert_color(ins_node, root); -} - -/* Insert the entry into the blocked tree for each of its shards. */ -/* - * Insert an entry in to all of its trees. All entries have to be on - * the blocked tree for all of its shards. - * - * But the id tree is a little lazy. It's only used to look up local - * entries when grants are received. It could be a hash table instead of - * a tree and remote entries don't need to be in it. But this re-use - * of the tree code is easy and isn't that expensive compared to all - * the rest of the processing. - */ -static void insert_ent(struct wrlock_context *ctx, struct wrlock_entry *ins) -{ - int i; - - insert_ent_shard(&ctx->id_root, ins, &ins->id_node); - - for (i = 0; i < ins->nr_shards; i++) - insert_ent_shard(&ctx->shards[ins->shards[i].shd].blocked_root, - ins, &ins->shards[i].blocked_node); - - scoutfs_trace(ctx->sb, "inserted "ENTF, ENTA(ins)); -} - -static struct wrlock_entry *lookup_ent(struct wrlock_context *ctx, - struct wrlock_id *id) -{ - struct rb_node *node = ctx->id_root.rb_node; - struct wrlock_entry *ent; - int cmp; - - while (node) { - ent = ent_from_blocked_node(node); - - cmp = cmp_ids(id, &ent->id); - if (cmp < 0) - node = node->rb_left; - else if (cmp > 0) - node = node->rb_right; - else - return ent; - } - - return NULL; -} - -static void erase_and_clear(struct rb_node *node, struct rb_root *root) -{ - if (!RB_EMPTY_NODE(node)) { - rb_erase(node, root); - RB_CLEAR_NODE(node); - } -} - -/* remove all of the entry's rb nodes from the context's trees */ -static void erase_ent(struct wrlock_context *ctx, struct wrlock_entry *ent) -{ - int i; - - erase_and_clear(&ent->id_node, &ctx->id_root); - - for (i = 0; i < ent->nr_shards; i++) - erase_and_clear(&ent->shards[i].blocked_node, - &ctx->shards[ent->shards[i].shd].blocked_root); - - scoutfs_trace(ctx->sb, "erased "ENTF, ENTA(ent)); -} - -static struct wrlock_entry *alloc_ent(void) -{ - struct wrlock_entry *ent; - int i; - - ent = kzalloc(sizeof(*ent), GFP_NOFS); - if (!ent) - return ERR_PTR(-ENOMEM); - - RB_CLEAR_NODE(&ent->id_node); - INIT_LIST_HEAD(&ent->send_head); - - /* for container_of to find the ent while walking shard nodes */ - for (i = 0; i < ARRAY_SIZE(ent->shards); i++) { - RB_CLEAR_NODE(&ent->shards[i].blocked_node); - ent->shards[i].index = i; - } - - return ent; -} - -/* - * Callers try to free the ent every time they remove a reference to it - * from the context and are done with it. We only free it if there are - * no more references to it in the context. This is called with the - * context lock held so it's not racing with other tasks that are - * removing references and trying to free. - */ -static void try_free_ent(struct wrlock_context *ctx, struct wrlock_entry *ent) -{ - int i; - - if (!RB_EMPTY_NODE(&ent->id_node) || !list_empty(&ent->send_head)) - return; - - for (i = 0; i < ent->nr_shards; i++) { - if (!RB_EMPTY_NODE(&ent->shards[i].blocked_node) || - ctx->shards[ent->shards[i].shd].granted == ent) - return; - } - - WARN_ON_ONCE(ent->writers); - scoutfs_trace(ctx->sb, "freed "ENTF, ENTA(ent)); - - kfree(ent); -} - -static bool is_local(struct wrlock_context *ctx, struct wrlock_entry *ent) -{ - struct scoutfs_sb_info *sbi = SCOUTFS_SB(ctx->sb); - - return ent->id.roster_id == sbi->roster_id; -} - -/* - * An entry in the blocked tree can be blocked for a few reasons: - * - * - a local entry hasn't received its grant replies yet - * - there are blocked entries before it on any of its shards - * - a remote entry is waiting for local writers to drain - * - * Note in particular that a local entry isn't blocked by granted writers - * because it'll join them and that remote entries aren't blocked by local - * grants with no writers because it revokes them to send a grant reply. - */ -static bool is_blocked(struct wrlock_context *ctx, struct wrlock_entry *ent) -{ - struct wrlock_context_shard *shard; - int i; - - if (is_local(ctx, ent) && ent->grants < ctx->grants_needed) - return true; - - for (i = 0; i < ent->nr_shards; i++) { - if (rb_prev(&ent->shards[i].blocked_node)) - return true; - - if (!is_local(ctx, ent)) { - shard = &ctx->shards[ent->shards[i].shd]; - if (shard->granted && shard->granted->writers) - return true; - } - } - - return false; -} - -/* mark a given shard for later processing to see if entries aren't blocked */ -static void mark_context_shard(struct wrlock_context *ctx, u32 shard) -{ - struct list_head *head = &ctx->shards[shard].mark_head; - - if (list_empty(head)) - list_add_tail(head, &ctx->mark_list); -} - -static void mark_ent_shards(struct wrlock_context *ctx, - struct wrlock_entry *ent) -{ - int i; - - for (i = 0; i < ent->nr_shards; i++) - mark_context_shard(ctx, ent->shards[i].shd); -} - -static void queue_send(struct wrlock_context *ctx, struct wrlock_entry *ent) -{ - if (list_empty(&ent->send_head)) { - list_add_tail(&ent->send_head, &ctx->send_list); - queue_work(ctx->send_workq, &ctx->send_work); - scoutfs_trace(ctx->sb, "queued "ENTF, ENTA(ent)); - } -} - -/* - * Try to unblock entries in the shard. We're done when the first entry - * in the shard is still blocked. - * - * If we unblock a remote entry then we have to send its grant message. - * If there is a granted local entry but it has no writers then we - * remove it so that future writers will have to request a new lock from - * the remote peer whose request we granted. - * - * If we unblock a local entry then we move it to the granted pointers - * for each of its shards. There are two tricky cases here. - * - * The first is a local entry being granted which covers more shards - * than the current granted entry on some of its shards. We don't want - * the larger unblocked entry to wait for the smaller granted entry's - * writers to drain. Instead we set the granted pointers to the new - * unblocked large entry after giving it the smaller granted entry's - * writer counters. Unlocking will drop the write counters on whatever - * entry is currently granted on its shards. - * - * The second is making sure that a waiting locking task gets a chance - * to work with a newly granted local entry before the next blocking - * remote entry revokes it. We increment the writers count the moment a - * local entry is granted. It will stay that way until the task drops - * the writer count. We just have to be careful to address all the - * races with the task sleeping, waking, and interrupting. - */ -static void unblock_shard(struct wrlock_context *ctx, - struct wrlock_context_shard *shard) -{ - struct wrlock_entry *ent; - struct wrlock_entry *gr; - int i; - - ent = blocked_ent(shard); - if (!ent) - return; - - if (is_blocked(ctx, ent)) { - /* send initial requests for local blocked entries */ - if (ent->last_peer_id < ctx->last_peer_id) - queue_send(ctx, ent); - return; - } - - scoutfs_trace(ctx->sb, "unblocked "ENTF, ENTA(ent)); - - erase_ent(ctx, ent); - mark_ent_shards(ctx, ent); - - /* unblocked remote entries remove local grants and send replies */ - if (!is_local(ctx, ent)) { - for (i = 0; i < ent->nr_shards; i++) { - shard = &ctx->shards[ent->shards[i].shd]; - - if (shard->granted) { - gr = shard->granted; - WARN_ON_ONCE(gr->writers); - - shard->granted = NULL; - try_free_ent(ctx, gr); - } - } - - queue_send(ctx, ent); - return; - } - - /* grant the entry on all its shards */ - for (i = 0; i < ent->nr_shards; i++) { - shard = &ctx->shards[ent->shards[i].shd]; - - /* the ent couldn't have been granted if it was blocked */ - WARN_ON_ONCE(shard->granted == ent); - - if (shard->granted) { - gr = shard->granted; - ent->writers += gr->writers; - - shard->granted = NULL; - try_free_ent(ctx, gr); - } - - shard->granted = ent; - if (ent->waiter) - ent->writers++; - - scoutfs_trace(ctx->sb, "granted ctx 0x%llx shd %llu wr %llu", - ctx, ent->shards[i].shd, ent->writers); - } - - if (ent->waiter) { - /* the task is responsible for the writer count if nr is set */ - ent->held->nr_shards = ent->nr_shards; - smp_mb(); /* wait_event condition isn't locked */ - wake_up_process(ent->waiter); - ent->waiter = NULL; - ent->held = NULL; - } -} - -/* - * Walk all the shards that have been marked and see if their blocked - * entry is still blocked. As we unblock entries we mark all their - * shards and keep going until the blocked entries in the shards - * stabilize. - */ -static void unblock_marked_shards(struct wrlock_context *ctx) -{ - struct wrlock_context_shard *shard; - - while ((shard = list_first_entry_or_null(&ctx->mark_list, - struct wrlock_context_shard, - mark_head))) { - list_del_init(&shard->mark_head); - unblock_shard(ctx, shard); - } -} - -/* - * Statically round robin every 1M inodes to each shard. - * - * XXX this will almost certainly need to be more clever. We'll want - * to size the batching more carefully and we'll need to cope with growing - * and shrinking the number of shards. - */ -static u32 ino_shd(struct wrlock_context *ctx, u64 ino) -{ - return (u32)(ino >> SCOUTFS_INO_BATCH_SHIFT) % ctx->nr_shards; -} - -/* - * Shards in entries are sorted and unique to make receive verification - * easier. Entries will only have a small handful of shards. - */ -static void add_ent_shd(struct wrlock_entry *ent, u32 shd) -{ - int i; - - for (i = 0; i < ent->nr_shards; i++) { - if (shd < ent->shards[i].shd) - swap(shd, ent->shards[i].shd); - else if (shd == ent->shards[i].shd) - return; - } - - ent->shards[i].shd = shd; - ent->nr_shards++; -} - -/* - * Get write locks on the shards that contain the given inodes. - * - * We always insert a new entry so that local attempts are inserted in - * the blocking tree after blocked remote entries. This way local lock - * matching doesn't stave remote lock attempts. - * - * In the fast path the inserted entry will be first and all its shards - * will be granted so we'll increase entry writer counts and return. In - * the slow path we send lock requests and sleep until we get grant - * replies. - * - * The writer counts are set when our entry is granted while we're still - * waiting for it so that we're guaranteed to get to work with our - * granted lock before a remote request has a chance to revoke it. - */ -int scoutfs_wrlock_lock(struct super_block *sb, - struct scoutfs_wrlock_held *held, int nr_inos, ...) -{ - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct wrlock_context *ctx = sbi->wrlock_context; - struct wrlock_entry *ent; - va_list args; - int ret; - int i; - - if (WARN_ON_ONCE(nr_inos <= 0 || nr_inos > SCOUTFS_WRLOCK_MAX_SHARDS) || - WARN_ON_ONCE(held->nr_shards)) - return -EINVAL; - - ent = alloc_ent(); - if (!ent) - return -ENOMEM; - - va_start(args, nr_inos); - while (nr_inos--) { - /* XXX verify inodes? */ - add_ent_shd(ent, ino_shd(ctx, va_arg(args, u64))); - } - va_end(args); - - /* held's nr_shards is set when the ent is granted and writers inced */ - for (i = 0; i < ent->nr_shards; i++) - held->shards[i] = ent->shards[i].shd; - - ent->waiter = current; - ent->held = held; - ent->id.jitter = get_random_int(); /* XXX how expensive? */ - ent->id.roster_id = sbi->roster_id; - - /* the context owns and can free the entry after we unlock */ - spin_lock(&ctx->lock); - - ent->id.counter = ctx->next_id_counter++; - - insert_ent(ctx, ent); - mark_ent_shards(ctx, ent); - unblock_marked_shards(ctx); - - spin_unlock(&ctx->lock); - - ret = wait_event_interruptible(ctx->waitq, held->nr_shards); - if (ret == 0) - ret = scoutfs_hold_trans(sb); - - scoutfs_trace(sb, "lock nr %llu ret %lld", held->nr_shards, ret); - - /* unlock on error locks the context before using held.nr_shards */ - if (ret) - scoutfs_wrlock_unlock(sb, held); - - return ret; -} - -/* - * The held shards must have had granted entries for us to increment the - * write counts. The increased write counts should have pinned entries - * to the shards so they must still be around for us to decrease the - * counts. - * - * If we're the last writer of an entry then we'll check to see if any - * of its shards have blocked remote entries that can now make progress. - * - * XXX we'd need to sync dirty blocks before sending the grant. - */ -void scoutfs_wrlock_unlock(struct super_block *sb, - struct scoutfs_wrlock_held *held) -{ - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct wrlock_context *ctx = sbi->wrlock_context; - struct wrlock_context_shard *shard; - u32 shd; - int i; - - scoutfs_release_trans(sb); - - spin_lock(&ctx->lock); - - for (i = 0; i < held->nr_shards; i++) { - shd = held->shards[i]; - shard = &ctx->shards[shd]; - - /* XXX this would imply unlocked writing, very bad indeed */ - if (WARN_ON_ONCE(!shard->granted) || - WARN_ON_ONCE(shard->granted->writers <= 0)) - continue; - - shard->granted->writers--; - - scoutfs_trace(sb, "unlock ctx 0x%llx shd %llu wr %llu", - ctx, shd, shard->granted->writers); - - if (shard->granted->writers == 0) - mark_context_shard(ctx, shd); - } - - unblock_marked_shards(ctx); - - spin_unlock(&ctx->lock); - -} - -/* - * Process an incoming request message. We allocate and insert an entry - * for the request. When it's not blocked by previous entries or a - * granted entry on all its shards then we send a reply and free the - * entry. - * - * Shard numbers in the incoming request must be unique and sorted. - */ -int scoutfs_wrlock_process_request(struct super_block *sb, u64 peer_id, - struct scoutfs_wrlock_request *req) -{ - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct wrlock_context *ctx = sbi->wrlock_context; - struct wrlock_entry *ent; - int ret = 0; - u32 shd; - u32 prev; - int i; - - ent = alloc_ent(); - if (!ent) - return -ENOMEM; - - if (req->nr_shards > SCOUTFS_WRLOCK_MAX_SHARDS) { - ret = -EINVAL; - goto out; - } - - for (i = 0, prev = 0; i < req->nr_shards; prev = shd, i++) { - shd = le32_to_cpu(req->shards[i]); - - if (shd >= ctx->nr_shards || (prev && shd <= prev)) { - ret = -EINVAL; - goto out; - } - - add_ent_shd(ent, shd); - } - - ent->id.counter = le64_to_cpu(req->wid.counter); - ent->id.jitter = le32_to_cpu(req->wid.jitter); - ent->id.roster_id = peer_id; - - spin_lock(&ctx->lock); - - ctx->next_id_counter = max(ent->id.counter + 1, ctx->next_id_counter); - - insert_ent(ctx, ent); - mark_ent_shards(ctx, ent); - unblock_marked_shards(ctx); - - spin_unlock(&ctx->lock); - -out: - if (ret) - kfree(ent); - return ret; -} - -/* - * Process an incoming grant message. The sending peer is telling us - * that they don't have any entries blocking our lock. We increment its - * count and wake the locker on the last grant. - * - * An entry won't be found at the id if the process attempting the lock - * exited and removed the entry before all the grants arrived. - * - * XXX freak out if grants is greater than grants_needed? That'd imply - * that we could have prematurely given a locker access to its shards. - */ -void scoutfs_wrlock_process_grant(struct super_block *sb, - struct scoutfs_wrlock_grant *grant) -{ - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct wrlock_context *ctx = sbi->wrlock_context; - struct wrlock_entry *ent; - struct wrlock_id id = { - .counter = le64_to_cpu(grant->wid.counter), - .jitter = le32_to_cpu(grant->wid.jitter), - .roster_id = sbi->roster_id, - }; - - spin_lock(&ctx->lock); - - ent = lookup_ent(ctx, &id); - if (ent) { - ent->grants++; - - scoutfs_trace(sb, "grant rx "ENTF" grants %llu needed %llu", - ENTA(ent), ent->grants, ctx->grants_needed); - - if (ent->grants == ctx->grants_needed) { - mark_ent_shards(ctx, ent); - unblock_marked_shards(ctx); - } - } - - spin_unlock(&ctx->lock); -} - -/* - * Send wrlock messages to peers. Entries are put on the send queue - * when we need to either broadcast requests to new peers or send a - * grant reply to a specific requesting peer. - * - * XXX As currently imagined any send failures trigger reconnection and - * recovery. We need a bit more clarity on what the roster - * implementation before worrying too much about the details of recovery - * in here. - */ -static void send_work_func(struct work_struct *work) -{ - struct wrlock_context *ctx = container_of(work, struct wrlock_context, - send_work); - struct super_block *sb = ctx->sb; - struct scoutfs_message msg; - struct wrlock_entry *ent; - struct wrlock_entry *tmp; - u64 peer_id; - int i; - - spin_lock(&ctx->lock); - - list_for_each_entry_safe(ent, tmp, &ctx->send_list, send_head) { - - if (is_local(ctx, ent)) { - msg.cmd = SCOUTFS_MSG_WRLOCK_REQUEST; - msg.request.wid.counter = cpu_to_le64(ent->id.counter); - msg.request.wid.jitter = cpu_to_le32(ent->id.jitter); - msg.request.nr_shards = ent->nr_shards; - for (i = 0; i < ent->nr_shards; i++) { - msg.request.shards[i] = - cpu_to_le32(ent->shards[i].shd); - } - - msg.len = offsetof(struct scoutfs_wrlock_request, - shards[ent->nr_shards]); - peer_id = ent->last_peer_id; - ent->last_peer_id = ctx->last_peer_id; - } else { - msg.cmd = SCOUTFS_MSG_WRLOCK_GRANT; - msg.grant.wid.counter = cpu_to_le64(ent->id.counter); - msg.grant.wid.jitter = cpu_to_le32(ent->id.jitter); - - msg.len = sizeof(msg.grant); - peer_id = ent->id.roster_id; - } - - scoutfs_trace(sb, "send "ENTF" cmd %llu", ENTA(ent), msg.cmd); - - list_del_init(&ent->send_head); - try_free_ent(ctx, ent); - - spin_unlock(&ctx->lock); - - if (msg.cmd == SCOUTFS_MSG_WRLOCK_GRANT) - scoutfs_roster_send(sb, peer_id, &msg); - else - scoutfs_roster_broadcast(sb, peer_id, &msg); - - spin_lock(&ctx->lock); - } - - spin_unlock(&ctx->lock); -} - -/* - * The roster tells us when mounts join or leave the cluster. - * - * Our job is easy if a peer is joining because they don't have any - * entries yet. They could start sending requests immediately and their - * entries could be inserted behind our blocked local entries. We send - * them all our blocked entries so that they can grant them and make - * forward progress in that case. - * - * If a peer is leaving then we have two problems. - * - * First they might have already granted some entries but we can't tell - * which. We don't track grant replies per peer. We can't adjust the - * entry grant counts to match a smaller number of needed grants. So we - * reset all the blocked local entries and resend them to everyone. We - * reset the id so that we're not confused by grants in flight. It's - * not great but it's simple and rare. - * - * XXX Worse, they might have held locks. We'd need to wait a grace - * period or fence them so that we're sure that they are no longer - * writing to shards. - */ -void scoutfs_wrlock_roster_update(struct super_block *sb, u64 peer_id, - bool join) -{ - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct wrlock_context *ctx = sbi->wrlock_context; - struct rb_node *node; - struct wrlock_entry *ent; - LIST_HEAD(list); - int i; - - spin_lock(&ctx->lock); - - /* take the peer change into account before walking entries */ - if (join) { - ctx->grants_needed++; - ctx->last_peer_id = peer_id; - } else { - ctx->grants_needed--; - } - - scoutfs_trace(sb, "update ctx 0x%llx peer_id %llu join %llu gr %llu", - ctx, peer_id, join, ctx->grants_needed); - - /* - * Walk all the blocked entries on all the shards. Entries can - * be on multiple shards so we're careful to only modify them on - * the first visit. - */ - for (i = 0; i < ctx->nr_shards; i++) { - node = rb_first(&ctx->shards[i].blocked_root); - while (node) { - ent = ent_from_blocked_node(node); - node = rb_next(node); - - /* drop remote blocked entries from a leaving peer */ - if (!join && ent->id.roster_id == peer_id) { - erase_ent(ctx, ent); - mark_ent_shards(ctx, ent); - try_free_ent(ctx, ent); - } - - /* send blocked local locks just to the new peer */ - if (join && is_local(ctx, ent)) - queue_send(ctx, ent); - - /* reset and resend local entries when leaving */ - if (!join && is_local(ctx, ent) && ent->last_peer_id) { - ent->grants = 0; - ent->last_peer_id = 0; - ent->id.counter = ctx->next_id_counter++; - ent->id.jitter = get_random_int(); - - erase_ent(ctx, ent); - insert_ent(ctx, ent); - mark_ent_shards(ctx, ent); - queue_send(ctx, ent); - } - } - } - - unblock_marked_shards(ctx); - - spin_unlock(&ctx->lock); -} - -int scoutfs_wrlock_setup(struct super_block *sb) -{ - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct wrlock_context *ctx; - u32 nr = 1; /* XXX */ - int i; - - ctx = vmalloc(offsetof(struct wrlock_context, shards[nr])); - if (!ctx) - return -ENOMEM; - - /* XXX need some kind of mount id */ - ctx->send_workq = alloc_ordered_workqueue("scoutfs-%s-%u:%u-send", 0, - sb->s_id, - MAJOR(sb->s_bdev->bd_dev), - MINOR(sb->s_bdev->bd_dev)); - if (!ctx->send_workq) { - vfree(ctx); - return -ENOMEM; - } - - ctx->sb = sb; - init_waitqueue_head(&ctx->waitq); - spin_lock_init(&ctx->lock); - ctx->id_root = RB_ROOT; - INIT_LIST_HEAD(&ctx->mark_list); - INIT_LIST_HEAD(&ctx->send_list); - INIT_WORK(&ctx->send_work, send_work_func); - ctx->nr_shards = nr; - - for (i = 0; i < nr; i++) { - INIT_LIST_HEAD(&ctx->shards[i].mark_head); - ctx->shards[i].blocked_root = RB_ROOT; - } - - sbi->wrlock_context = ctx; - - scoutfs_trace(sb, "setup ctx 0x%llx", ctx); - - return 0; -} - -/* - * Destroy the messaging work and free the wrlock entries. There should - * be no more active lockers at this point. - */ -void scoutfs_wrlock_teardown(struct super_block *sb) -{ - struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb); - struct wrlock_context *ctx = sbi->wrlock_context; - struct wrlock_context_shard *shard; - struct wrlock_entry *ent; - struct wrlock_entry *tmp; - int i; - - if (!ctx) - return; - - scoutfs_trace(sb, "teardown ctx 0x%llx", ctx); - - destroy_workqueue(ctx->send_workq); - - for (i = 0; i < ctx->nr_shards; i++) { - shard = &ctx->shards[i]; - - if (shard->granted) { - ent = shard->granted; - shard->granted = NULL; - try_free_ent(ctx, ent); - } - - list_for_each_entry_safe(ent, tmp, &ctx->send_list, send_head) { - list_del_init(&ent->send_head); - try_free_ent(ctx, ent); - } - - while ((ent = blocked_ent(shard))) { - erase_ent(ctx, ent); - try_free_ent(ctx, ent); - } - } - - vfree(ctx); -} diff --git a/kmod/src/wrlock.h b/kmod/src/wrlock.h deleted file mode 100644 index d8304498..00000000 --- a/kmod/src/wrlock.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef _SCOUTFS_WRLOCK_H_ -#define _SCOUTFS_WRLOCK_H_ - -#include "wire.h" - -struct scoutfs_wrlock_held { - u8 nr_shards; - u32 shards[SCOUTFS_WRLOCK_MAX_SHARDS]; -}; - -#define DECLARE_SCOUTFS_WRLOCK_HELD(held) \ - struct scoutfs_wrlock_held held = {0, } - -int scoutfs_wrlock_lock(struct super_block *sb, - struct scoutfs_wrlock_held *held, int nr_inos, ...); -void scoutfs_wrlock_unlock(struct super_block *sb, - struct scoutfs_wrlock_held *held); - -void scoutfs_wrlock_roster_update(struct super_block *sb, u64 peer_id, - bool join); -int scoutfs_wrlock_process_request(struct super_block *sb, u64 peer_id, - struct scoutfs_wrlock_request *req); -void scoutfs_wrlock_process_grant(struct super_block *sb, - struct scoutfs_wrlock_grant *grant); - -int scoutfs_wrlock_setup(struct super_block *sb); -void scoutfs_wrlock_teardown(struct super_block *sb); - -#endif