From e8f87ff90ac217ad82454372ca8024758cc2a5ea Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 24 Oct 2017 16:19:55 -0500 Subject: [PATCH] scoutfs: use CW locks for inode index updates This will give us concurrency yet still allow our ioctls to drive cache syncing/invalidation on other nodes. Our lock_coverage() checks evolve to handle direct dlm modes, allowing us to verify correct usage of CW locks. As a test, we can run createmany on two nodes at the same time, each working in their own directory. The following commands were run on each node: $ mkdir /scoutfs/`uname -n` $ cd /scoutfs/`uname -n` $ /root/createmany -o ./file_$i 100000 Before this patch that test wouldn't finish in any reasonable amount of time and I would kill it after some number of hours. After this patch, we make swift progress through the test: [root@fstest3 fstest3.site]# /root/createmany -o ./file_$i 100000 - created 10000 (time 1509394646.11 total 0.31 last 0.31) - created 20000 (time 1509394646.38 total 0.59 last 0.28) - created 30000 (time 1509394646.81 total 1.01 last 0.43) - created 40000 (time 1509394647.31 total 1.51 last 0.50) - created 50000 (time 1509394647.82 total 2.02 last 0.51) - created 60000 (time 1509394648.40 total 2.60 last 0.58) - created 70000 (time 1509394649.06 total 3.26 last 0.66) - created 80000 (time 1509394649.72 total 3.93 last 0.66) - created 90000 (time 1509394650.36 total 4.56 last 0.64) total: 100000 creates in 35.02 seconds: 2855.80 creates/second [root@fstest4 fstest4.fstestnet]# /root/createmany -o ./file_$i 100000 - created 10000 (time 1509394647.35 total 0.75 last 0.75) - created 20000 (time 1509394647.89 total 1.28 last 0.54) - created 30000 (time 1509394648.46 total 1.86 last 0.58) - created 40000 (time 1509394648.96 total 2.35 last 0.49) - created 50000 (time 1509394649.51 total 2.90 last 0.55) - created 60000 (time 1509394650.07 total 3.46 last 0.56) - created 70000 (time 1509394650.79 total 4.19 last 0.72) - created 80000 (time 1509394681.26 total 34.66 last 30.47) - created 90000 (time 1509394681.63 total 35.03 last 0.37) total: 100000 creates in 35.50 seconds: 2816.76 creates/second Signed-off-by: Mark Fasheh --- kmod/src/inode.c | 10 +++++----- kmod/src/item.c | 43 ++++++++++++++++++++++++++----------------- kmod/src/lock.c | 4 +++- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/kmod/src/inode.c b/kmod/src/inode.c index c9eecb9e..e0c5527e 100644 --- a/kmod/src/inode.c +++ b/kmod/src/inode.c @@ -775,7 +775,7 @@ static int update_index_items(struct super_block *sb, scoutfs_key_init(&ins, &ins_ikey, sizeof(ins_ikey)); ins_lock = find_index_lock(lock_list, type, major, minor, ino); - ret = scoutfs_item_create(sb, &ins, NULL, ins_lock); + ret = scoutfs_item_create_force(sb, &ins, NULL, ins_lock); if (ret || !will_del_index(si, type, major, minor)) return ret; @@ -791,7 +791,7 @@ static int update_index_items(struct super_block *sb, del_lock = find_index_lock(lock_list, type, si->item_majors[type], si->item_minors[type], ino); - ret = scoutfs_item_delete(sb, &del, del_lock); + ret = scoutfs_item_delete_force(sb, &del, del_lock); if (ret) { err = scoutfs_item_delete(sb, &ins, ins_lock); BUG_ON(err); @@ -1088,7 +1088,7 @@ int scoutfs_inode_index_try_lock_hold(struct super_block *sb, list_sort(NULL, list, cmp_index_lock); list_for_each_entry(ind_lock, list, head) { - ret = scoutfs_lock_inode_index(sb, DLM_LOCK_EX, ind_lock->type, + ret = scoutfs_lock_inode_index(sb, DLM_LOCK_CW, ind_lock->type, ind_lock->major, ind_lock->ino, &ind_lock->lock); if (ret) @@ -1135,7 +1135,7 @@ void scoutfs_inode_index_unlock(struct super_block *sb, struct list_head *list) struct index_lock *tmp; list_for_each_entry_safe(ind_lock, tmp, list, head) { - scoutfs_unlock(sb, ind_lock->lock, DLM_LOCK_EX); + scoutfs_unlock(sb, ind_lock->lock, DLM_LOCK_CW); list_del_init(&ind_lock->head); kfree(ind_lock); } @@ -1158,7 +1158,7 @@ static int remove_index(struct super_block *sb, u64 ino, u8 type, u64 major, scoutfs_key_init(&key, &ikey, sizeof(ikey)); lock = find_index_lock(ind_locks, type, major, minor, ino); - ret = scoutfs_item_delete(sb, &key, lock); + ret = scoutfs_item_delete_force(sb, &key, lock); if (ret == -ENOENT) ret = 0; return ret; diff --git a/kmod/src/item.c b/kmod/src/item.c index 9247ade7..c680e99a 100644 --- a/kmod/src/item.c +++ b/kmod/src/item.c @@ -738,22 +738,31 @@ restart: * it be? :). */ static bool lock_coverage(struct scoutfs_lock *lock, - struct scoutfs_key_buf *key, int rw) + struct scoutfs_key_buf *key, int op_level) { - bool writing = rw & WRITE; signed char level; - if (rw & ~WRITE) - return false; - if (!lock || !lock->start || !lock->end) return false; level = ACCESS_ONCE(lock->lockres.l_level); - if ((writing && level != DLM_LOCK_EX) || - (!writing && level != DLM_LOCK_EX && level != DLM_LOCK_PR)) + switch (op_level) { + case DLM_LOCK_CW: + if (level != DLM_LOCK_CW) + return false; + break; + case DLM_LOCK_PR: + if (level < DLM_LOCK_PR) + return false; + break; + case DLM_LOCK_EX: + if (level != DLM_LOCK_EX) + return false; + break; + default: return false; + } return scoutfs_key_compare_ranges(key, key, lock->start, lock->end) == 0; @@ -776,7 +785,7 @@ int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key_buf *key, unsigned long flags; int ret; - if (WARN_ON_ONCE(!lock_coverage(lock, key, READ))) + if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_PR))) return -EINVAL; trace_scoutfs_item_lookup(sb, key); @@ -932,7 +941,7 @@ int scoutfs_item_next(struct super_block *sb, struct scoutfs_key_buf *key, goto out; } - if (WARN_ON_ONCE(!lock_coverage(lock, key, READ))) { + if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_PR))) { ret = -EINVAL; goto out; } @@ -1077,7 +1086,7 @@ int scoutfs_item_create(struct super_block *sb, struct scoutfs_key_buf *key, if (!item) return -ENOMEM; - if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE))) + if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_EX))) return -EINVAL; do { @@ -1117,7 +1126,7 @@ int scoutfs_item_create_force(struct super_block *sb, if (invalid_key_val(key, val)) return -EINVAL; - if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE))) + if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_CW))) return -EINVAL; item = alloc_item(sb, key, val); @@ -1276,8 +1285,8 @@ int scoutfs_item_set_batch(struct super_block *sb, struct list_head *list, trace_scoutfs_item_set_batch(sb, first, last); if (WARN_ON_ONCE(scoutfs_key_compare(first, last) > 0) || - WARN_ON_ONCE(!lock_coverage(lock, first, WRITE)) || - WARN_ON_ONCE(!lock_coverage(lock, last, WRITE))) + WARN_ON_ONCE(!lock_coverage(lock, first, DLM_LOCK_EX)) || + WARN_ON_ONCE(!lock_coverage(lock, last, DLM_LOCK_EX))) return -EINVAL; range_end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE); @@ -1392,7 +1401,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key_buf *key, unsigned long flags; int ret; - if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE))) + if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_EX))) return -EINVAL; do { @@ -1436,7 +1445,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key_buf *key, if (invalid_key_val(key, val)) return -EINVAL; - if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE))) + if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_EX))) return -EINVAL; if (val) { @@ -1495,7 +1504,7 @@ int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key_buf *key, unsigned long flags; int ret; - if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE))) + if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_EX))) return -EINVAL; scoutfs_kvec_init_null(del_val); @@ -1535,7 +1544,7 @@ int scoutfs_item_delete_force(struct super_block *sb, unsigned long flags; int ret; - if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE))) + if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_CW))) return -EINVAL; scoutfs_kvec_init_null(del_val); diff --git a/kmod/src/lock.c b/kmod/src/lock.c index b42d09b8..c8aff0cc 100644 --- a/kmod/src/lock.c +++ b/kmod/src/lock.c @@ -80,7 +80,9 @@ static int invalidate_caches(struct super_block *sb, int mode, if (ret) return ret; - if (mode == DLM_LOCK_EX) { + + if (mode == DLM_LOCK_EX || + (mode == DLM_LOCK_PR && lock->lockres.l_level == DLM_LOCK_CW)) { if (lock->lock_name.zone == SCOUTFS_FS_ZONE) { ino = le64_to_cpu(lock->lock_name.first); last = ino + SCOUTFS_LOCK_INODE_GROUP_NR - 1;