mirror of
https://github.com/versity/scoutfs.git
synced 2026-05-01 10:25:43 +00:00
v5.12-rc6-9-g4f0f586bf0c8 All list_sort functions use the list_cmp_func_t type, which compares list_head member types. These are now required to be `const` as the compiler will now check them. This propagates into our callers. Signed-off-by: Auke Kok <auke.kok@versity.com>
306 lines
7.5 KiB
C
306 lines
7.5 KiB
C
/*
|
|
* Copyright (C) 2021 Versity Software, Inc. All rights reserved.
|
|
*
|
|
* This program is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU General Public
|
|
* License v2 as published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* General Public License for more details.
|
|
*/
|
|
#include <linux/kernel.h>
|
|
#include <linux/fs.h>
|
|
#include <linux/slab.h>
|
|
#include <linux/sched.h>
|
|
#include <linux/rhashtable.h>
|
|
#include <linux/rcupdate.h>
|
|
#include <linux/list_sort.h>
|
|
|
|
#include "super.h"
|
|
#include "recov.h"
|
|
#include "cmp.h"
|
|
|
|
/*
|
|
* There are a few server messages which can't be processed until they
|
|
* know that they have state for all possibly active clients. These
|
|
* little helpers track which clients have recovered what state and give
|
|
* those message handlers a call to check if recovery has completed. We
|
|
* track the timeout here, but all we do is call back into the server to
|
|
* take steps to evict timed out clients and then let us know that their
|
|
* recovery has finished.
|
|
*/
|
|
|
|
struct recov_info {
|
|
struct super_block *sb;
|
|
spinlock_t lock;
|
|
struct list_head pending;
|
|
struct timer_list timer;
|
|
void (*timeout_fn)(struct super_block *);
|
|
};
|
|
|
|
#define DECLARE_RECOV_INFO(sb, name) \
|
|
struct recov_info *name = SCOUTFS_SB(sb)->recov_info
|
|
|
|
struct recov_pending {
|
|
struct list_head head;
|
|
u64 rid;
|
|
int which;
|
|
};
|
|
|
|
static struct recov_pending *next_pending(struct recov_info *recinf, u64 rid, int which)
|
|
{
|
|
struct recov_pending *pend;
|
|
|
|
list_for_each_entry(pend, &recinf->pending, head) {
|
|
if (pend->rid > rid && pend->which & which)
|
|
return pend;
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static struct recov_pending *lookup_pending(struct recov_info *recinf, u64 rid, int which)
|
|
{
|
|
struct recov_pending *pend;
|
|
|
|
pend = next_pending(recinf, rid - 1, which);
|
|
if (pend && pend->rid == rid)
|
|
return pend;
|
|
|
|
return NULL;
|
|
}
|
|
|
|
/*
|
|
* We keep the pending list sorted by rid so that we can iterate over
|
|
* them. The list should be small and shouldn't be used often.
|
|
*/
|
|
static int cmp_pending_rid(void *priv, KC_LIST_CMP_CONST struct list_head *A, KC_LIST_CMP_CONST struct list_head *B)
|
|
{
|
|
KC_LIST_CMP_CONST struct recov_pending *a = list_entry(A, KC_LIST_CMP_CONST struct recov_pending, head);
|
|
KC_LIST_CMP_CONST struct recov_pending *b = list_entry(B, KC_LIST_CMP_CONST struct recov_pending, head);
|
|
|
|
return scoutfs_cmp_u64s(a->rid, b->rid);
|
|
}
|
|
|
|
/*
|
|
* Record that we'll be waiting for a client to recover something.
|
|
* _finished will eventually be called for every _prepare, either
|
|
* because recovery naturally finished or because it timed out and the
|
|
* server evicted the client.
|
|
*/
|
|
int scoutfs_recov_prepare(struct super_block *sb, u64 rid, int which)
|
|
{
|
|
DECLARE_RECOV_INFO(sb, recinf);
|
|
struct recov_pending *alloc;
|
|
struct recov_pending *pend;
|
|
|
|
if (WARN_ON_ONCE(which & SCOUTFS_RECOV_INVALID))
|
|
return -EINVAL;
|
|
|
|
alloc = kmalloc(sizeof(*pend), GFP_NOFS);
|
|
if (!alloc)
|
|
return -ENOMEM;
|
|
|
|
spin_lock(&recinf->lock);
|
|
|
|
pend = lookup_pending(recinf, rid, SCOUTFS_RECOV_ALL);
|
|
if (pend) {
|
|
pend->which |= which;
|
|
} else {
|
|
swap(pend, alloc);
|
|
pend->rid = rid;
|
|
pend->which = which;
|
|
list_add_tail(&pend->head, &recinf->pending);
|
|
list_sort(NULL, &recinf->pending, cmp_pending_rid);
|
|
}
|
|
|
|
spin_unlock(&recinf->lock);
|
|
|
|
kfree(alloc);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Recovery is only finished once we've begun (which sets the timer) and
|
|
* all clients have finished. If we didn't test the timer we could
|
|
* claim it finished prematurely as clients are being prepared.
|
|
*/
|
|
static int recov_finished(struct recov_info *recinf)
|
|
{
|
|
return !!(recinf->timeout_fn != NULL && list_empty(&recinf->pending));
|
|
}
|
|
|
|
static void timer_callback(struct timer_list *timer)
|
|
{
|
|
struct recov_info *recinf = from_timer(recinf, timer, timer);
|
|
|
|
recinf->timeout_fn(recinf->sb);
|
|
}
|
|
|
|
/*
|
|
* Begin waiting for recovery once we've prepared all the clients. If
|
|
* the timeout period elapses before _finish is called on all prepared
|
|
* clients then the timer will call the callback.
|
|
*
|
|
* Returns > 0 if all the prepared clients finish recovery before begin
|
|
* is called.
|
|
*/
|
|
int scoutfs_recov_begin(struct super_block *sb, void (*timeout_fn)(struct super_block *),
|
|
unsigned int timeout_ms)
|
|
{
|
|
DECLARE_RECOV_INFO(sb, recinf);
|
|
int ret;
|
|
|
|
spin_lock(&recinf->lock);
|
|
|
|
recinf->timeout_fn = timeout_fn;
|
|
recinf->timer.expires = jiffies + msecs_to_jiffies(timeout_ms);
|
|
add_timer(&recinf->timer);
|
|
|
|
ret = recov_finished(recinf);
|
|
|
|
spin_unlock(&recinf->lock);
|
|
|
|
if (ret > 0)
|
|
del_timer_sync(&recinf->timer);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* A given client has recovered the given state. If it's finished all
|
|
* recovery then we free it, and if all clients have finished recovery
|
|
* then we cancel the timeout timer.
|
|
*
|
|
* Returns > 0 if _begin has been called and all clients have finished.
|
|
* The caller will only see > 0 returned once.
|
|
*/
|
|
int scoutfs_recov_finish(struct super_block *sb, u64 rid, int which)
|
|
{
|
|
DECLARE_RECOV_INFO(sb, recinf);
|
|
struct recov_pending *pend;
|
|
int ret = 0;
|
|
|
|
spin_lock(&recinf->lock);
|
|
|
|
pend = lookup_pending(recinf, rid, which);
|
|
if (pend) {
|
|
pend->which &= ~which;
|
|
if (pend->which) {
|
|
pend = NULL;
|
|
} else {
|
|
list_del(&pend->head);
|
|
ret = recov_finished(recinf);
|
|
}
|
|
}
|
|
|
|
spin_unlock(&recinf->lock);
|
|
|
|
if (ret > 0)
|
|
del_timer_sync(&recinf->timer);
|
|
|
|
kfree(pend);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/*
|
|
* Returns true if the given client is still trying to recover
|
|
* the given state.
|
|
*/
|
|
bool scoutfs_recov_is_pending(struct super_block *sb, u64 rid, int which)
|
|
{
|
|
DECLARE_RECOV_INFO(sb, recinf);
|
|
bool is_pending;
|
|
|
|
spin_lock(&recinf->lock);
|
|
is_pending = lookup_pending(recinf, rid, which) != NULL;
|
|
spin_unlock(&recinf->lock);
|
|
|
|
return is_pending;
|
|
}
|
|
|
|
/*
|
|
* Return the next rid after the given rid of a client waiting for the
|
|
* given state to be recovered. Start with rid 0, returns 0 when there
|
|
* are no more clients waiting for recovery.
|
|
*
|
|
* This is inherently racey. Callers are responsible for resolving any
|
|
* actions taken based on pending with the recovery finishing, perhaps
|
|
* before we return.
|
|
*/
|
|
u64 scoutfs_recov_next_pending(struct super_block *sb, u64 rid, int which)
|
|
{
|
|
DECLARE_RECOV_INFO(sb, recinf);
|
|
struct recov_pending *pend;
|
|
|
|
spin_lock(&recinf->lock);
|
|
pend = next_pending(recinf, rid, which);
|
|
rid = pend ? pend->rid : 0;
|
|
spin_unlock(&recinf->lock);
|
|
|
|
return rid;
|
|
}
|
|
|
|
/*
|
|
* The server is shutting down and doesn't need to worry about recovery
|
|
* anymore. It'll be built up again by the next server, if needed.
|
|
*/
|
|
void scoutfs_recov_shutdown(struct super_block *sb)
|
|
{
|
|
DECLARE_RECOV_INFO(sb, recinf);
|
|
struct recov_pending *pend;
|
|
struct recov_pending *tmp;
|
|
LIST_HEAD(list);
|
|
|
|
del_timer_sync(&recinf->timer);
|
|
|
|
spin_lock(&recinf->lock);
|
|
list_splice_init(&recinf->pending, &list);
|
|
recinf->timeout_fn = NULL;
|
|
spin_unlock(&recinf->lock);
|
|
|
|
list_for_each_entry_safe(pend, tmp, &list, head) {
|
|
list_del(&pend->head);
|
|
kfree(pend);
|
|
}
|
|
}
|
|
|
|
int scoutfs_recov_setup(struct super_block *sb)
|
|
{
|
|
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
|
struct recov_info *recinf;
|
|
int ret;
|
|
|
|
recinf = kzalloc(sizeof(struct recov_info), GFP_KERNEL);
|
|
if (!recinf) {
|
|
ret = -ENOMEM;
|
|
goto out;
|
|
}
|
|
|
|
recinf->sb = sb;
|
|
spin_lock_init(&recinf->lock);
|
|
INIT_LIST_HEAD(&recinf->pending);
|
|
timer_setup(&recinf->timer, timer_callback, 0);
|
|
|
|
sbi->recov_info = recinf;
|
|
ret = 0;
|
|
out:
|
|
return ret;
|
|
}
|
|
|
|
void scoutfs_recov_destroy(struct super_block *sb)
|
|
{
|
|
DECLARE_RECOV_INFO(sb, recinf);
|
|
struct scoutfs_sb_info *sbi = SCOUTFS_SB(sb);
|
|
|
|
if (recinf) {
|
|
scoutfs_recov_shutdown(sb);
|
|
|
|
kfree(recinf);
|
|
sbi->recov_info = NULL;
|
|
}
|
|
}
|