Files
scoutfs/kmod/dlm/ast.c
Mark Fasheh 08bf1fea79 dlm: Give fs/dlm the notion of ranges
Using the new interval tree code we add a tree for each lock status list to
efficiently track ranged requests. Internally, most operations on a
resources lock status list (granted, waiting, converting) then are turned
into operations within a given range.

There is no API change other than a new call, dlm_lock_range() and a new
structure, 'struct dlm_key' to define our range endpoints. Keys can have
arbitrary lengths and are compared via memcmp. A ranged blocking ast type is
defined so that users of dlm_lock_range() can know which range they are
blocking.

A rudimentary test, dlmtest.ko is included.

TODO:
 - Update userspace entry points, need to add one for new lock call
 - Manage backwards compatibility with network protocol

Signed-off-by: Mark Fasheh <mfasheh@versity.com>
2017-06-23 15:07:10 -05:00

376 lines
9.7 KiB
C

/******************************************************************************
*******************************************************************************
**
** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
**
** This copyrighted material is made available to anyone wishing to use,
** modify, copy, or redistribute it subject to the terms and conditions
** of the GNU General Public License v.2.
**
*******************************************************************************
******************************************************************************/
#include "dlm_internal.h"
#include "lock.h"
#include "user.h"
static uint64_t dlm_cb_seq;
static DEFINE_SPINLOCK(dlm_cb_seq_spin);
static void dlm_dump_lkb_callbacks(struct dlm_lkb *lkb)
{
int i;
log_print("last_bast %x %llu flags %x mode %d sb %d %x",
lkb->lkb_id,
(unsigned long long)lkb->lkb_last_bast.seq,
lkb->lkb_last_bast.flags,
lkb->lkb_last_bast.mode,
lkb->lkb_last_bast.sb_status,
lkb->lkb_last_bast.sb_flags);
log_print("last_cast %x %llu flags %x mode %d sb %d %x",
lkb->lkb_id,
(unsigned long long)lkb->lkb_last_cast.seq,
lkb->lkb_last_cast.flags,
lkb->lkb_last_cast.mode,
lkb->lkb_last_cast.sb_status,
lkb->lkb_last_cast.sb_flags);
for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
log_print("cb %x %llu flags %x mode %d sb %d %x",
lkb->lkb_id,
(unsigned long long)lkb->lkb_callbacks[i].seq,
lkb->lkb_callbacks[i].flags,
lkb->lkb_callbacks[i].mode,
lkb->lkb_callbacks[i].sb_status,
lkb->lkb_callbacks[i].sb_flags);
}
}
static void fixup_cb_pointers(struct dlm_callback *cb)
{
struct dlm_key *start = &cb->start;
struct dlm_key *end = &cb->end;
cb->range.start = start;
cb->range.end = end;
start->val = &cb->startval;
end->val = &cb->endval;
}
/*
* Range must not be NULL for DLM_CB_BAST.
*/
int dlm_add_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode,
struct dlm_range *range, int status, uint32_t sbflags,
uint64_t seq)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
uint64_t prev_seq;
int prev_mode;
int i, rv;
struct dlm_range *prev_range;
if ((flags & DLM_CB_BAST) && !range) {
/* XXX: user.c doesn't handle this yet, fail for now */
WARN_ON_ONCE(1);
return -EINVAL;
}
for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
if (lkb->lkb_callbacks[i].seq)
continue;
/*
* Suppress some redundant basts here, do more on removal.
* Don't even add a bast if the callback just before it
* is a bast for the same mode or a more restrictive mode.
* (the addional > PR check is needed for PR/CW inversion)
*/
if ((i > 0) && (flags & DLM_CB_BAST) &&
(lkb->lkb_callbacks[i-1].flags & DLM_CB_BAST)) {
prev_seq = lkb->lkb_callbacks[i-1].seq;
prev_mode = lkb->lkb_callbacks[i-1].mode;
prev_range = &lkb->lkb_callbacks[i-1].range;
/* Below check needs to look at range */
if (ranges_overlap(prev_range, range) &&
((prev_mode == mode) ||
(prev_mode > mode && prev_mode > DLM_LOCK_PR))) {
log_debug(ls, "skip %x add bast %llu mode %d "
"for bast %llu mode %d",
lkb->lkb_id,
(unsigned long long)seq,
mode,
(unsigned long long)prev_seq,
prev_mode);
rv = 0;
goto out;
}
}
lkb->lkb_callbacks[i].seq = seq;
lkb->lkb_callbacks[i].flags = flags;
lkb->lkb_callbacks[i].mode = mode;
lkb->lkb_callbacks[i].sb_status = status;
lkb->lkb_callbacks[i].sb_flags = (sbflags & 0x000000FF);
if (range) {
struct dlm_key *start = &lkb->lkb_callbacks[i].start;
struct dlm_key *end = &lkb->lkb_callbacks[i].end;
lkb->lkb_callbacks[i].range.start = start;
lkb->lkb_callbacks[i].range.end = end;
start->len = range->start->len;
start->val = &lkb->lkb_callbacks[i].startval;
end->len = range->end->len;
end->val = &lkb->lkb_callbacks[i].endval;
memcpy(start->val, range->start->val, range->start->len);
memcpy(end->val, range->end->val, range->end->len);
}
rv = 0;
break;
}
if (i == DLM_CALLBACKS_SIZE) {
log_error(ls, "no callbacks %x %llu flags %x mode %d sb %d %x",
lkb->lkb_id, (unsigned long long)seq,
flags, mode, status, sbflags);
dlm_dump_lkb_callbacks(lkb);
rv = -1;
goto out;
}
out:
return rv;
}
int dlm_rem_lkb_callback(struct dlm_ls *ls, struct dlm_lkb *lkb,
struct dlm_callback *cb, int *resid)
{
int i, rv;
*resid = 0;
if (!lkb->lkb_callbacks[0].seq) {
rv = -ENOENT;
goto out;
}
/* oldest undelivered cb is callbacks[0] */
memcpy(cb, &lkb->lkb_callbacks[0], sizeof(struct dlm_callback));
memset(&lkb->lkb_callbacks[0], 0, sizeof(struct dlm_callback));
fixup_cb_pointers(cb);
/* shift others down */
for (i = 1; i < DLM_CALLBACKS_SIZE; i++) {
if (!lkb->lkb_callbacks[i].seq)
break;
memcpy(&lkb->lkb_callbacks[i-1], &lkb->lkb_callbacks[i],
sizeof(struct dlm_callback));
memset(&lkb->lkb_callbacks[i], 0, sizeof(struct dlm_callback));
(*resid)++;
}
/* if cb is a bast, it should be skipped if the blocking mode is
compatible with the last granted mode */
if ((cb->flags & DLM_CB_BAST) && lkb->lkb_last_cast.seq) {
if (dlm_modes_compat(cb->mode, lkb->lkb_last_cast.mode)) {
cb->flags |= DLM_CB_SKIP;
log_debug(ls, "skip %x bast %llu mode %d "
"for cast %llu mode %d",
lkb->lkb_id,
(unsigned long long)cb->seq,
cb->mode,
(unsigned long long)lkb->lkb_last_cast.seq,
lkb->lkb_last_cast.mode);
rv = 0;
goto out;
}
}
if (cb->flags & DLM_CB_CAST) {
memcpy(&lkb->lkb_last_cast, cb, sizeof(struct dlm_callback));
lkb->lkb_last_cast_time = ktime_get();
}
if (cb->flags & DLM_CB_BAST) {
memcpy(&lkb->lkb_last_bast, cb, sizeof(struct dlm_callback));
lkb->lkb_last_bast_time = ktime_get();
}
rv = 0;
out:
return rv;
}
void dlm_add_cb(struct dlm_lkb *lkb, uint32_t flags, int mode,
struct dlm_range *range, int status, uint32_t sbflags)
{
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
uint64_t new_seq, prev_seq;
int rv;
spin_lock(&dlm_cb_seq_spin);
new_seq = ++dlm_cb_seq;
spin_unlock(&dlm_cb_seq_spin);
if (lkb->lkb_flags & DLM_IFL_USER) {
dlm_user_add_ast(lkb, flags, mode, status, sbflags, new_seq);
return;
}
mutex_lock(&lkb->lkb_cb_mutex);
prev_seq = lkb->lkb_callbacks[0].seq;
rv = dlm_add_lkb_callback(lkb, flags, mode, range, status, sbflags,
new_seq);
if (rv < 0)
goto out;
if (!prev_seq) {
kref_get(&lkb->lkb_ref);
if (test_bit(LSFL_CB_DELAY, &ls->ls_flags)) {
mutex_lock(&ls->ls_cb_mutex);
list_add(&lkb->lkb_cb_list, &ls->ls_cb_delay);
mutex_unlock(&ls->ls_cb_mutex);
} else {
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
}
}
out:
mutex_unlock(&lkb->lkb_cb_mutex);
}
void dlm_callback_work(struct work_struct *work)
{
struct dlm_lkb *lkb = container_of(work, struct dlm_lkb, lkb_cb_work);
struct dlm_ls *ls = lkb->lkb_resource->res_ls;
void (*castfn) (void *astparam);
void (*bastfn) (void *astparam, int mode);
void (*rbastfn) (void *astarg, int mode, struct dlm_key *start,
struct dlm_key *end);
/*
* XXX: This used to be on the stack, but the inline buffers
* added for range support blow out our stack.
*
* struct dlm_callback callbacks[DLM_CALLBACKS_SIZE];
*/
struct dlm_callback *callbacks;
int i, rv, resid;
callbacks = kcalloc(DLM_CALLBACKS_SIZE, sizeof(*callbacks), GFP_NOFS);
WARN_ON_ONCE(!callbacks);
if (!callbacks)
return;
mutex_lock(&lkb->lkb_cb_mutex);
if (!lkb->lkb_callbacks[0].seq) {
/* no callback work exists, shouldn't happen */
log_error(ls, "dlm_callback_work %x no work", lkb->lkb_id);
dlm_print_lkb(lkb);
dlm_dump_lkb_callbacks(lkb);
}
for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
rv = dlm_rem_lkb_callback(ls, lkb, &callbacks[i], &resid);
if (rv < 0)
break;
}
if (resid) {
/* cbs remain, loop should have removed all, shouldn't happen */
log_error(ls, "dlm_callback_work %x resid %d", lkb->lkb_id,
resid);
dlm_print_lkb(lkb);
dlm_dump_lkb_callbacks(lkb);
}
mutex_unlock(&lkb->lkb_cb_mutex);
castfn = lkb->lkb_astfn;
bastfn = lkb->lkb_bastfn;
rbastfn = lkb->lkb_rbastfn;
for (i = 0; i < DLM_CALLBACKS_SIZE; i++) {
if (!callbacks[i].seq)
break;
if (callbacks[i].flags & DLM_CB_SKIP) {
continue;
} else if (callbacks[i].flags & DLM_CB_BAST) {
if (rbastfn)
rbastfn(lkb->lkb_astparam, callbacks[i].mode,
&callbacks[i].start, &callbacks[i].end);
else
bastfn(lkb->lkb_astparam, callbacks[i].mode);
} else if (callbacks[i].flags & DLM_CB_CAST) {
lkb->lkb_lksb->sb_status = callbacks[i].sb_status;
lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags;
castfn(lkb->lkb_astparam);
}
}
/* undo kref_get from dlm_add_callback, may cause lkb to be freed */
dlm_put_lkb(lkb);
kfree(callbacks);
}
int dlm_callback_start(struct dlm_ls *ls)
{
ls->ls_callback_wq = alloc_workqueue("dlm_callback",
WQ_UNBOUND |
WQ_MEM_RECLAIM |
WQ_NON_REENTRANT,
0);
if (!ls->ls_callback_wq) {
log_print("can't start dlm_callback workqueue");
return -ENOMEM;
}
return 0;
}
void dlm_callback_stop(struct dlm_ls *ls)
{
if (ls->ls_callback_wq)
destroy_workqueue(ls->ls_callback_wq);
}
void dlm_callback_suspend(struct dlm_ls *ls)
{
set_bit(LSFL_CB_DELAY, &ls->ls_flags);
if (ls->ls_callback_wq)
flush_workqueue(ls->ls_callback_wq);
}
void dlm_callback_resume(struct dlm_ls *ls)
{
struct dlm_lkb *lkb, *safe;
int count = 0;
clear_bit(LSFL_CB_DELAY, &ls->ls_flags);
if (!ls->ls_callback_wq)
return;
mutex_lock(&ls->ls_cb_mutex);
list_for_each_entry_safe(lkb, safe, &ls->ls_cb_delay, lkb_cb_list) {
list_del_init(&lkb->lkb_cb_list);
queue_work(ls->ls_callback_wq, &lkb->lkb_cb_work);
count++;
}
mutex_unlock(&ls->ls_cb_mutex);
if (count)
log_debug(ls, "dlm_callback_resume %d", count);
}