From b92e091999bbfc4e9035529b4493af00a1eca3aa Mon Sep 17 00:00:00 2001 From: Brian M Date: Thu, 5 Mar 2026 13:21:58 -0800 Subject: [PATCH] scst: add pr_state sysfs attribute for PR state save/restore Add a read/write pr_state attribute to scst_device that serializes the current persistent reservation state (generation, reservation type/scope, and all registrants with their transport IDs) to a text format, and restores it from the same format. This provides a stable interface for saving and restoring PR state across device transitions where the in-memory state would otherwise be lost. --- scst/src/scst_pres.c | 222 ++++++++++++++++++++++++++++++++++++++++++ scst/src/scst_pres.h | 3 + scst/src/scst_sysfs.c | 88 +++++++++++++++++ 3 files changed, 313 insertions(+) diff --git a/scst/src/scst_pres.c b/scst/src/scst_pres.c index a10be41e8..b8abb1b93 100644 --- a/scst/src/scst_pres.c +++ b/scst/src/scst_pres.c @@ -2611,3 +2611,225 @@ skip: TRACE_EXIT(); } + +/* + * --- PR state serialisation / deserialisation --- + * + * Text format (version 1): + * version 1 + * generation + * reservation (only present when pr_is_set) + * registrant + * ... + * + * The generation counter is preserved so initiators that cached the previous + * value are not confused by a generation reset after failover. + * + * PAGE_SIZE (4096 bytes) supports approximately 20-50 registrants depending + * on initiator identity length. Sufficient for typical deployments + * (WSFC 2-16 nodes, VMware RDM with a handful of hosts). + */ + +/** + * scst_pr_state_show - serialise in-memory PR state to a text buffer. + * @dev: SCST device whose PR state to serialise. + * @buf: Destination buffer, must be at least PAGE_SIZE bytes. + * @buf_size: Size of @buf in bytes. + * + * Must be called under dev_pr_mutex. + * Returns the number of bytes written (not including NUL terminator), + * or a negative error code. + */ +ssize_t scst_pr_state_show(struct scst_device *dev, char *buf, size_t buf_size) +{ + ssize_t pos = 0; + struct scst_dev_registrant *reg; + + scst_assert_pr_mutex_held(dev); + + pos += scnprintf(buf + pos, buf_size - pos, "version 1\n"); + pos += scnprintf(buf + pos, buf_size - pos, "generation %u\n", + dev->pr_generation); + + if (dev->pr_is_set) + pos += scnprintf(buf + pos, buf_size - pos, + "reservation %02x %02x\n", + (unsigned int)dev->pr_type, + (unsigned int)dev->pr_scope); + + list_for_each_entry(reg, &dev->dev_registrants_list, + dev_registrants_list_entry) { + u32 tid_size = scst_tid_size(reg->transport_id); + u8 holder = scst_pr_is_holder(dev, reg) ? 1 : 0; + u32 i; + + pos += scnprintf(buf + pos, buf_size - pos, + "registrant %u %016llx %u %u ", + (unsigned int)reg->rel_tgt_id, + be64_to_cpu(reg->key), + (unsigned int)holder, + tid_size); + + for (i = 0; i < tid_size; i++) + pos += scnprintf(buf + pos, buf_size - pos, + "%02x", reg->transport_id[i]); + + pos += scnprintf(buf + pos, buf_size - pos, "\n"); + } + + return pos; +} +EXPORT_SYMBOL(scst_pr_state_show); + +/** + * scst_pr_state_store - restore PR state from text produced by scst_pr_state_show(). + * @dev: SCST device to apply the state to. + * @buf: Text buffer in pr_state format. + * @count: Length of @buf in bytes. + * + * Must be called under scst_mutex but not dev_pr_mutex. The device must not + * be exported (no active I_T nexuses) at the time of the call. + * + * Clears any existing PR state and replaces it with the state described in + * @buf. Returns 0 on success, negative errno on failure. + */ +int scst_pr_state_store(struct scst_device *dev, const char *buf, size_t count) +{ + int res = 0; + unsigned int version = 0, generation = 0; + bool has_reservation = false; + u8 pr_type = 0, pr_scope = 0; + struct scst_dev_registrant *holder_reg = NULL; + char *kbuf, *p, *line; + + /* Work on a mutable NUL-terminated copy. */ + kbuf = kmemdup(buf, count + 1, GFP_KERNEL); + if (!kbuf) + return -ENOMEM; + kbuf[count] = '\0'; + + p = kbuf; + + /* The first non-empty line must be "version N". */ + line = strsep(&p, "\n"); + if (!line || sscanf(line, "version %u", &version) != 1 || version != 1) { + PRINT_ERROR("%s: pr_state: invalid or missing version line", + dev->virt_name); + res = -EINVAL; + goto out_free; + } + + res = mutex_lock_interruptible(&dev->dev_pr_mutex); + if (res != 0) + goto out_free; + + /* Clear any existing PR state before applying the new one. */ + scst_pr_remove_registrants(dev); + dev->pr_is_set = 0; + dev->pr_holder = NULL; + + while ((line = strsep(&p, "\n")) != NULL) { + unsigned int rel_tgt_id, holder, tid_len; + unsigned long long key_ll; + int consumed = 0; + u8 *tid; + u32 i; + struct scst_dev_registrant *reg; + __be64 key; + const char *tid_hex; + unsigned int byte_val; + + if (line[0] == '\0') + continue; + + if (sscanf(line, "generation %u", &generation) == 1) + continue; + + if (sscanf(line, "reservation %hhx %hhx", + &pr_type, &pr_scope) == 2) { + has_reservation = true; + continue; + } + + /* + * "registrant " + * %n is not counted in the sscanf return value. + */ + if (sscanf(line, "registrant %u %016llx %u %u %n", + &rel_tgt_id, &key_ll, &holder, + &tid_len, &consumed) < 4) { + PRINT_WARNING("%s: pr_state: unrecognized line: %.80s", + dev->virt_name, line); + continue; + } + + if (tid_len == 0 || tid_len > 512) { + PRINT_ERROR("%s: pr_state: invalid tid_len %u", + dev->virt_name, tid_len); + res = -EINVAL; + goto out_unlock; + } + + tid_hex = line + consumed; + if (strlen(tid_hex) < tid_len * 2) { + PRINT_ERROR("%s: pr_state: tid hex shorter than tid_len indicates", + dev->virt_name); + res = -EINVAL; + goto out_unlock; + } + + tid = kmalloc(tid_len, GFP_KERNEL); + if (!tid) { + res = -ENOMEM; + goto out_unlock; + } + + for (i = 0; i < tid_len; i++) { + if (sscanf(tid_hex + 2 * i, "%02x", &byte_val) != 1) { + PRINT_ERROR("%s: pr_state: bad hex byte %u in tid", + dev->virt_name, i); + kfree(tid); + res = -EINVAL; + goto out_unlock; + } + tid[i] = (u8)byte_val; + } + + key = cpu_to_be64(key_ll); + reg = scst_pr_add_registrant(dev, tid, (u16)rel_tgt_id, + key, false); + kfree(tid); + if (!reg) { + res = -ENOMEM; + goto out_unlock; + } + + if (holder) + holder_reg = reg; + } + + dev->pr_generation = generation; + + if (has_reservation) { + bool is_all_reg = + (pr_type == TYPE_WRITE_EXCLUSIVE_ALL_REG || + pr_type == TYPE_EXCLUSIVE_ACCESS_ALL_REG); + + if (!is_all_reg && !holder_reg) { + PRINT_ERROR("%s: pr_state: non-ALL_REG reservation but no holder", + dev->virt_name); + res = -EINVAL; + goto out_unlock; + } + + scst_pr_set_holder(dev, is_all_reg ? NULL : holder_reg, + pr_scope, pr_type); + } + +out_unlock: + mutex_unlock(&dev->dev_pr_mutex); + +out_free: + kfree(kbuf); + return res; +} diff --git a/scst/src/scst_pres.h b/scst/src/scst_pres.h index 3d68dbfeb..d37fbcbca 100644 --- a/scst/src/scst_pres.h +++ b/scst/src/scst_pres.h @@ -152,6 +152,9 @@ void scst_pr_clear_holder(struct scst_device *dev); void scst_pr_sync_device_file(struct scst_device *dev); +ssize_t scst_pr_state_show(struct scst_device *dev, char *buf, size_t buf_size); +int scst_pr_state_store(struct scst_device *dev, const char *buf, size_t count); + #if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) void scst_pr_dump_prs(struct scst_device *dev, bool force); #else diff --git a/scst/src/scst_sysfs.c b/scst/src/scst_sysfs.c index 131f1b1a5..18bf3e13b 100644 --- a/scst/src/scst_sysfs.c +++ b/scst/src/scst_sysfs.c @@ -3280,6 +3280,93 @@ static struct kobj_attribute dev_dump_prs_attr = #endif /* defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) */ +/* + * pr_state attribute: read/write serialised PR state for failover save/restore. + * See scst_pr_state_show() / scst_pr_state_store() in scst_pres.c for the + * text format description. + */ + +static ssize_t scst_dev_sysfs_pr_state_show(struct kobject *kobj, + struct kobj_attribute *attr, + char *buf) +{ + struct scst_device *dev; + ssize_t res; + + dev = container_of(kobj, struct scst_device, dev_kobj); + + res = mutex_lock_interruptible(&dev->dev_pr_mutex); + if (res != 0) + return res; + + res = scst_pr_state_show(dev, buf, PAGE_SIZE); + + mutex_unlock(&dev->dev_pr_mutex); + return res; +} + +static int scst_dev_sysfs_pr_state_process_store(struct scst_sysfs_work_item *work) +{ + struct scst_device *dev = work->dev; + int res; + + res = mutex_lock_interruptible(&scst_mutex); + if (res != 0) + goto out; + + if (scst_device_is_exported(dev)) { + PRINT_ERROR("%s: pr_state write refused: device has active sessions", + dev->virt_name); + res = -EBUSY; + goto out_unlock; + } + + res = scst_pr_state_store(dev, work->buf, strlen(work->buf)); + +out_unlock: + mutex_unlock(&scst_mutex); +out: + kobject_put(&dev->dev_kobj); + return res; +} + +static ssize_t scst_dev_sysfs_pr_state_store(struct kobject *kobj, + struct kobj_attribute *attr, + const char *buf, size_t count) +{ + struct scst_sysfs_work_item *work; + struct scst_device *dev; + char *state_buf; + int res; + + dev = container_of(kobj, struct scst_device, dev_kobj); + + state_buf = kasprintf(GFP_KERNEL, "%.*s", (int)count, buf); + if (!state_buf) + return -ENOMEM; + + res = scst_alloc_sysfs_work(scst_dev_sysfs_pr_state_process_store, + false, &work); + if (res != 0) { + kfree(state_buf); + return res; + } + + kobject_get(&dev->dev_kobj); + work->dev = dev; + work->buf = state_buf; /* ownership transferred; freed by work destructor */ + + res = scst_sysfs_queue_wait_work(work); + if (res == 0) + res = count; + + return res; +} + +static struct kobj_attribute dev_pr_state_attr = + __ATTR(pr_state, 0644, scst_dev_sysfs_pr_state_show, + scst_dev_sysfs_pr_state_store); + static int scst_process_dev_sysfs_threads_data_store(struct scst_device *dev, int threads_num, enum scst_dev_type_threads_pool_type threads_pool_type) { @@ -3772,6 +3859,7 @@ static struct attribute *scst_dev_attrs[] = { &dev_max_tgt_dev_commands_attr.attr, &dev_numa_node_id_attr.attr, &dev_block_attr.attr, + &dev_pr_state_attr.attr, NULL, };