scoutfs: use CW locks for inode index updates

This will give us concurrency yet still allow our ioctls to drive cache
syncing/invalidation on other nodes. Our lock_coverage() checks evolve
to handle direct dlm modes, allowing us to verify correct usage of CW
locks.

As a test, we can run createmany on two nodes at the same time, each
working in their own directory. The following commands were run on each
node:
  $ mkdir /scoutfs/`uname -n`
  $ cd /scoutfs/`uname -n`
  $ /root/createmany -o ./file_$i 100000

Before this patch that test wouldn't finish in any reasonable amount of
time and I would kill it after some number of hours.

After this patch, we make swift progress through the test:

[root@fstest3 fstest3.site]# /root/createmany -o ./file_$i 100000
 - created 10000 (time 1509394646.11 total 0.31 last 0.31)
 - created 20000 (time 1509394646.38 total 0.59 last 0.28)
 - created 30000 (time 1509394646.81 total 1.01 last 0.43)
 - created 40000 (time 1509394647.31 total 1.51 last 0.50)
 - created 50000 (time 1509394647.82 total 2.02 last 0.51)
 - created 60000 (time 1509394648.40 total 2.60 last 0.58)
 - created 70000 (time 1509394649.06 total 3.26 last 0.66)
 - created 80000 (time 1509394649.72 total 3.93 last 0.66)
 - created 90000 (time 1509394650.36 total 4.56 last 0.64)
 total: 100000 creates in 35.02 seconds: 2855.80 creates/second

[root@fstest4 fstest4.fstestnet]# /root/createmany -o ./file_$i 100000
 - created 10000 (time 1509394647.35 total 0.75 last 0.75)
 - created 20000 (time 1509394647.89 total 1.28 last 0.54)
 - created 30000 (time 1509394648.46 total 1.86 last 0.58)
 - created 40000 (time 1509394648.96 total 2.35 last 0.49)
 - created 50000 (time 1509394649.51 total 2.90 last 0.55)
 - created 60000 (time 1509394650.07 total 3.46 last 0.56)
 - created 70000 (time 1509394650.79 total 4.19 last 0.72)
 - created 80000 (time 1509394681.26 total 34.66 last 30.47)
 - created 90000 (time 1509394681.63 total 35.03 last 0.37)
 total: 100000 creates in 35.50 seconds: 2816.76 creates/second

Signed-off-by: Mark Fasheh <mfasheh@versity.com>
This commit is contained in:
Mark Fasheh
2017-10-24 16:19:55 -05:00
committed by Zach Brown
parent 5fdcd54a54
commit e8f87ff90a
3 changed files with 34 additions and 23 deletions

View File

@@ -775,7 +775,7 @@ static int update_index_items(struct super_block *sb,
scoutfs_key_init(&ins, &ins_ikey, sizeof(ins_ikey));
ins_lock = find_index_lock(lock_list, type, major, minor, ino);
ret = scoutfs_item_create(sb, &ins, NULL, ins_lock);
ret = scoutfs_item_create_force(sb, &ins, NULL, ins_lock);
if (ret || !will_del_index(si, type, major, minor))
return ret;
@@ -791,7 +791,7 @@ static int update_index_items(struct super_block *sb,
del_lock = find_index_lock(lock_list, type, si->item_majors[type],
si->item_minors[type], ino);
ret = scoutfs_item_delete(sb, &del, del_lock);
ret = scoutfs_item_delete_force(sb, &del, del_lock);
if (ret) {
err = scoutfs_item_delete(sb, &ins, ins_lock);
BUG_ON(err);
@@ -1088,7 +1088,7 @@ int scoutfs_inode_index_try_lock_hold(struct super_block *sb,
list_sort(NULL, list, cmp_index_lock);
list_for_each_entry(ind_lock, list, head) {
ret = scoutfs_lock_inode_index(sb, DLM_LOCK_EX, ind_lock->type,
ret = scoutfs_lock_inode_index(sb, DLM_LOCK_CW, ind_lock->type,
ind_lock->major, ind_lock->ino,
&ind_lock->lock);
if (ret)
@@ -1135,7 +1135,7 @@ void scoutfs_inode_index_unlock(struct super_block *sb, struct list_head *list)
struct index_lock *tmp;
list_for_each_entry_safe(ind_lock, tmp, list, head) {
scoutfs_unlock(sb, ind_lock->lock, DLM_LOCK_EX);
scoutfs_unlock(sb, ind_lock->lock, DLM_LOCK_CW);
list_del_init(&ind_lock->head);
kfree(ind_lock);
}
@@ -1158,7 +1158,7 @@ static int remove_index(struct super_block *sb, u64 ino, u8 type, u64 major,
scoutfs_key_init(&key, &ikey, sizeof(ikey));
lock = find_index_lock(ind_locks, type, major, minor, ino);
ret = scoutfs_item_delete(sb, &key, lock);
ret = scoutfs_item_delete_force(sb, &key, lock);
if (ret == -ENOENT)
ret = 0;
return ret;

View File

@@ -738,22 +738,31 @@ restart:
* it be? :).
*/
static bool lock_coverage(struct scoutfs_lock *lock,
struct scoutfs_key_buf *key, int rw)
struct scoutfs_key_buf *key, int op_level)
{
bool writing = rw & WRITE;
signed char level;
if (rw & ~WRITE)
return false;
if (!lock || !lock->start || !lock->end)
return false;
level = ACCESS_ONCE(lock->lockres.l_level);
if ((writing && level != DLM_LOCK_EX) ||
(!writing && level != DLM_LOCK_EX && level != DLM_LOCK_PR))
switch (op_level) {
case DLM_LOCK_CW:
if (level != DLM_LOCK_CW)
return false;
break;
case DLM_LOCK_PR:
if (level < DLM_LOCK_PR)
return false;
break;
case DLM_LOCK_EX:
if (level != DLM_LOCK_EX)
return false;
break;
default:
return false;
}
return scoutfs_key_compare_ranges(key, key,
lock->start, lock->end) == 0;
@@ -776,7 +785,7 @@ int scoutfs_item_lookup(struct super_block *sb, struct scoutfs_key_buf *key,
unsigned long flags;
int ret;
if (WARN_ON_ONCE(!lock_coverage(lock, key, READ)))
if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_PR)))
return -EINVAL;
trace_scoutfs_item_lookup(sb, key);
@@ -932,7 +941,7 @@ int scoutfs_item_next(struct super_block *sb, struct scoutfs_key_buf *key,
goto out;
}
if (WARN_ON_ONCE(!lock_coverage(lock, key, READ))) {
if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_PR))) {
ret = -EINVAL;
goto out;
}
@@ -1077,7 +1086,7 @@ int scoutfs_item_create(struct super_block *sb, struct scoutfs_key_buf *key,
if (!item)
return -ENOMEM;
if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE)))
if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_EX)))
return -EINVAL;
do {
@@ -1117,7 +1126,7 @@ int scoutfs_item_create_force(struct super_block *sb,
if (invalid_key_val(key, val))
return -EINVAL;
if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE)))
if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_CW)))
return -EINVAL;
item = alloc_item(sb, key, val);
@@ -1276,8 +1285,8 @@ int scoutfs_item_set_batch(struct super_block *sb, struct list_head *list,
trace_scoutfs_item_set_batch(sb, first, last);
if (WARN_ON_ONCE(scoutfs_key_compare(first, last) > 0) ||
WARN_ON_ONCE(!lock_coverage(lock, first, WRITE)) ||
WARN_ON_ONCE(!lock_coverage(lock, last, WRITE)))
WARN_ON_ONCE(!lock_coverage(lock, first, DLM_LOCK_EX)) ||
WARN_ON_ONCE(!lock_coverage(lock, last, DLM_LOCK_EX)))
return -EINVAL;
range_end = scoutfs_key_alloc(sb, SCOUTFS_MAX_KEY_SIZE);
@@ -1392,7 +1401,7 @@ int scoutfs_item_dirty(struct super_block *sb, struct scoutfs_key_buf *key,
unsigned long flags;
int ret;
if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE)))
if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_EX)))
return -EINVAL;
do {
@@ -1436,7 +1445,7 @@ int scoutfs_item_update(struct super_block *sb, struct scoutfs_key_buf *key,
if (invalid_key_val(key, val))
return -EINVAL;
if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE)))
if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_EX)))
return -EINVAL;
if (val) {
@@ -1495,7 +1504,7 @@ int scoutfs_item_delete(struct super_block *sb, struct scoutfs_key_buf *key,
unsigned long flags;
int ret;
if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE)))
if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_EX)))
return -EINVAL;
scoutfs_kvec_init_null(del_val);
@@ -1535,7 +1544,7 @@ int scoutfs_item_delete_force(struct super_block *sb,
unsigned long flags;
int ret;
if (WARN_ON_ONCE(!lock_coverage(lock, key, WRITE)))
if (WARN_ON_ONCE(!lock_coverage(lock, key, DLM_LOCK_CW)))
return -EINVAL;
scoutfs_kvec_init_null(del_val);

View File

@@ -80,7 +80,9 @@ static int invalidate_caches(struct super_block *sb, int mode,
if (ret)
return ret;
if (mode == DLM_LOCK_EX) {
if (mode == DLM_LOCK_EX ||
(mode == DLM_LOCK_PR && lock->lockres.l_level == DLM_LOCK_CW)) {
if (lock->lock_name.zone == SCOUTFS_FS_ZONE) {
ino = le64_to_cpu(lock->lock_name.first);
last = ino + SCOUTFS_LOCK_INODE_GROUP_NR - 1;