scst: add pr_state sysfs attribute for PR state save/restore

Add a read/write pr_state attribute to scst_device that serializes the
current persistent reservation state (generation, reservation type/scope,
and all registrants with their transport IDs) to a text format, and
restores it from the same format.

This provides a stable interface for saving and restoring PR state across
device transitions where the in-memory state would otherwise be lost.
This commit is contained in:
Brian M
2026-03-05 13:21:58 -08:00
committed by Gleb Chesnokov
parent 7a4ab042e6
commit b92e091999
3 changed files with 313 additions and 0 deletions

View File

@@ -2611,3 +2611,225 @@ skip:
TRACE_EXIT();
}
/*
* --- PR state serialisation / deserialisation ---
*
* Text format (version 1):
* version 1
* generation <decimal>
* reservation <type_hex2> <scope_hex2> (only present when pr_is_set)
* registrant <rel_tgt_id_dec> <key_hex16> <holder_0_or_1> <tid_len_dec> <tid_hex>
* ...
*
* The generation counter is preserved so initiators that cached the previous
* value are not confused by a generation reset after failover.
*
* PAGE_SIZE (4096 bytes) supports approximately 20-50 registrants depending
* on initiator identity length. Sufficient for typical deployments
* (WSFC 2-16 nodes, VMware RDM with a handful of hosts).
*/
/**
* scst_pr_state_show - serialise in-memory PR state to a text buffer.
* @dev: SCST device whose PR state to serialise.
* @buf: Destination buffer, must be at least PAGE_SIZE bytes.
* @buf_size: Size of @buf in bytes.
*
* Must be called under dev_pr_mutex.
* Returns the number of bytes written (not including NUL terminator),
* or a negative error code.
*/
ssize_t scst_pr_state_show(struct scst_device *dev, char *buf, size_t buf_size)
{
ssize_t pos = 0;
struct scst_dev_registrant *reg;
scst_assert_pr_mutex_held(dev);
pos += scnprintf(buf + pos, buf_size - pos, "version 1\n");
pos += scnprintf(buf + pos, buf_size - pos, "generation %u\n",
dev->pr_generation);
if (dev->pr_is_set)
pos += scnprintf(buf + pos, buf_size - pos,
"reservation %02x %02x\n",
(unsigned int)dev->pr_type,
(unsigned int)dev->pr_scope);
list_for_each_entry(reg, &dev->dev_registrants_list,
dev_registrants_list_entry) {
u32 tid_size = scst_tid_size(reg->transport_id);
u8 holder = scst_pr_is_holder(dev, reg) ? 1 : 0;
u32 i;
pos += scnprintf(buf + pos, buf_size - pos,
"registrant %u %016llx %u %u ",
(unsigned int)reg->rel_tgt_id,
be64_to_cpu(reg->key),
(unsigned int)holder,
tid_size);
for (i = 0; i < tid_size; i++)
pos += scnprintf(buf + pos, buf_size - pos,
"%02x", reg->transport_id[i]);
pos += scnprintf(buf + pos, buf_size - pos, "\n");
}
return pos;
}
EXPORT_SYMBOL(scst_pr_state_show);
/**
* scst_pr_state_store - restore PR state from text produced by scst_pr_state_show().
* @dev: SCST device to apply the state to.
* @buf: Text buffer in pr_state format.
* @count: Length of @buf in bytes.
*
* Must be called under scst_mutex but not dev_pr_mutex. The device must not
* be exported (no active I_T nexuses) at the time of the call.
*
* Clears any existing PR state and replaces it with the state described in
* @buf. Returns 0 on success, negative errno on failure.
*/
int scst_pr_state_store(struct scst_device *dev, const char *buf, size_t count)
{
int res = 0;
unsigned int version = 0, generation = 0;
bool has_reservation = false;
u8 pr_type = 0, pr_scope = 0;
struct scst_dev_registrant *holder_reg = NULL;
char *kbuf, *p, *line;
/* Work on a mutable NUL-terminated copy. */
kbuf = kmemdup(buf, count + 1, GFP_KERNEL);
if (!kbuf)
return -ENOMEM;
kbuf[count] = '\0';
p = kbuf;
/* The first non-empty line must be "version N". */
line = strsep(&p, "\n");
if (!line || sscanf(line, "version %u", &version) != 1 || version != 1) {
PRINT_ERROR("%s: pr_state: invalid or missing version line",
dev->virt_name);
res = -EINVAL;
goto out_free;
}
res = mutex_lock_interruptible(&dev->dev_pr_mutex);
if (res != 0)
goto out_free;
/* Clear any existing PR state before applying the new one. */
scst_pr_remove_registrants(dev);
dev->pr_is_set = 0;
dev->pr_holder = NULL;
while ((line = strsep(&p, "\n")) != NULL) {
unsigned int rel_tgt_id, holder, tid_len;
unsigned long long key_ll;
int consumed = 0;
u8 *tid;
u32 i;
struct scst_dev_registrant *reg;
__be64 key;
const char *tid_hex;
unsigned int byte_val;
if (line[0] == '\0')
continue;
if (sscanf(line, "generation %u", &generation) == 1)
continue;
if (sscanf(line, "reservation %hhx %hhx",
&pr_type, &pr_scope) == 2) {
has_reservation = true;
continue;
}
/*
* "registrant <rel_tgt_id> <key_hex16> <holder> <tid_len> <tid_hex>"
* %n is not counted in the sscanf return value.
*/
if (sscanf(line, "registrant %u %016llx %u %u %n",
&rel_tgt_id, &key_ll, &holder,
&tid_len, &consumed) < 4) {
PRINT_WARNING("%s: pr_state: unrecognized line: %.80s",
dev->virt_name, line);
continue;
}
if (tid_len == 0 || tid_len > 512) {
PRINT_ERROR("%s: pr_state: invalid tid_len %u",
dev->virt_name, tid_len);
res = -EINVAL;
goto out_unlock;
}
tid_hex = line + consumed;
if (strlen(tid_hex) < tid_len * 2) {
PRINT_ERROR("%s: pr_state: tid hex shorter than tid_len indicates",
dev->virt_name);
res = -EINVAL;
goto out_unlock;
}
tid = kmalloc(tid_len, GFP_KERNEL);
if (!tid) {
res = -ENOMEM;
goto out_unlock;
}
for (i = 0; i < tid_len; i++) {
if (sscanf(tid_hex + 2 * i, "%02x", &byte_val) != 1) {
PRINT_ERROR("%s: pr_state: bad hex byte %u in tid",
dev->virt_name, i);
kfree(tid);
res = -EINVAL;
goto out_unlock;
}
tid[i] = (u8)byte_val;
}
key = cpu_to_be64(key_ll);
reg = scst_pr_add_registrant(dev, tid, (u16)rel_tgt_id,
key, false);
kfree(tid);
if (!reg) {
res = -ENOMEM;
goto out_unlock;
}
if (holder)
holder_reg = reg;
}
dev->pr_generation = generation;
if (has_reservation) {
bool is_all_reg =
(pr_type == TYPE_WRITE_EXCLUSIVE_ALL_REG ||
pr_type == TYPE_EXCLUSIVE_ACCESS_ALL_REG);
if (!is_all_reg && !holder_reg) {
PRINT_ERROR("%s: pr_state: non-ALL_REG reservation but no holder",
dev->virt_name);
res = -EINVAL;
goto out_unlock;
}
scst_pr_set_holder(dev, is_all_reg ? NULL : holder_reg,
pr_scope, pr_type);
}
out_unlock:
mutex_unlock(&dev->dev_pr_mutex);
out_free:
kfree(kbuf);
return res;
}

View File

@@ -152,6 +152,9 @@ void scst_pr_clear_holder(struct scst_device *dev);
void scst_pr_sync_device_file(struct scst_device *dev);
ssize_t scst_pr_state_show(struct scst_device *dev, char *buf, size_t buf_size);
int scst_pr_state_store(struct scst_device *dev, const char *buf, size_t count);
#if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
void scst_pr_dump_prs(struct scst_device *dev, bool force);
#else

View File

@@ -3280,6 +3280,93 @@ static struct kobj_attribute dev_dump_prs_attr =
#endif /* defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) */
/*
* pr_state attribute: read/write serialised PR state for failover save/restore.
* See scst_pr_state_show() / scst_pr_state_store() in scst_pres.c for the
* text format description.
*/
static ssize_t scst_dev_sysfs_pr_state_show(struct kobject *kobj,
struct kobj_attribute *attr,
char *buf)
{
struct scst_device *dev;
ssize_t res;
dev = container_of(kobj, struct scst_device, dev_kobj);
res = mutex_lock_interruptible(&dev->dev_pr_mutex);
if (res != 0)
return res;
res = scst_pr_state_show(dev, buf, PAGE_SIZE);
mutex_unlock(&dev->dev_pr_mutex);
return res;
}
static int scst_dev_sysfs_pr_state_process_store(struct scst_sysfs_work_item *work)
{
struct scst_device *dev = work->dev;
int res;
res = mutex_lock_interruptible(&scst_mutex);
if (res != 0)
goto out;
if (scst_device_is_exported(dev)) {
PRINT_ERROR("%s: pr_state write refused: device has active sessions",
dev->virt_name);
res = -EBUSY;
goto out_unlock;
}
res = scst_pr_state_store(dev, work->buf, strlen(work->buf));
out_unlock:
mutex_unlock(&scst_mutex);
out:
kobject_put(&dev->dev_kobj);
return res;
}
static ssize_t scst_dev_sysfs_pr_state_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct scst_sysfs_work_item *work;
struct scst_device *dev;
char *state_buf;
int res;
dev = container_of(kobj, struct scst_device, dev_kobj);
state_buf = kasprintf(GFP_KERNEL, "%.*s", (int)count, buf);
if (!state_buf)
return -ENOMEM;
res = scst_alloc_sysfs_work(scst_dev_sysfs_pr_state_process_store,
false, &work);
if (res != 0) {
kfree(state_buf);
return res;
}
kobject_get(&dev->dev_kobj);
work->dev = dev;
work->buf = state_buf; /* ownership transferred; freed by work destructor */
res = scst_sysfs_queue_wait_work(work);
if (res == 0)
res = count;
return res;
}
static struct kobj_attribute dev_pr_state_attr =
__ATTR(pr_state, 0644, scst_dev_sysfs_pr_state_show,
scst_dev_sysfs_pr_state_store);
static int scst_process_dev_sysfs_threads_data_store(struct scst_device *dev, int threads_num,
enum scst_dev_type_threads_pool_type threads_pool_type)
{
@@ -3772,6 +3859,7 @@ static struct attribute *scst_dev_attrs[] = {
&dev_max_tgt_dev_commands_attr.attr,
&dev_numa_node_id_attr.attr,
&dev_block_attr.attr,
&dev_pr_state_attr.attr,
NULL,
};