mirror of
https://github.com/SCST-project/scst.git
synced 2026-05-17 02:31:27 +00:00
In the current implementation of SCST/trunk the recently introduced
"active" attribute for vdisk_blockio devices is managed by an ALUA
state change (start/finish), for specific ALUA states (eg,
active/standby). Relying on ALUA state changes to open/close the
back-end block device does indeed work for some SCST configurations,
however, some users desire advanced control over this setting, and
prefer to handle opening/closing a back-end block device from the
cluster resource agent (RA) script. Certain ambiguous situations can
also arise if more than one target group exists per SCST device
group... in this case, a back-end device may flip/flop (opened/closed)
and may end up in an inconsistent state if care is not taken in the
implementation of the cluster RA.
The patch below makes the "active" sysfs attribute for vdisk_blockio
readable/writable by a user, thereby giving full control over a
device's opened/closed state. The default behavior for SCST
vdisk_blockio devices is to allow ALUA state changes to control the
opened/closed ("active") state of a back-end device
(bind_alua_state=1). If the user wishes to handle setting the "active"
attribute themselves via a script or cluster RA, they would set
bind_alua_state=0 for the vdisk_blockio device, and the "active"
attribute would not be modified on ALUA state changes, it is then left
up to the user to handle this.
The patch below also fixes a bug where the attribute value is not
changed when the appropriate ALUA state is set, unless a target
session exists for the target group target; see here for more
information: https://sourceforge.net/p/scst/mailman/message/35898014/
Signed-off-by: Marc A. Smith <marc.smith@parodyne.com>
git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@7212 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
38
scst/README
38
scst/README
@@ -1203,6 +1203,25 @@ blocksize, nv_cache, read_only, removable, rotational, thin_provisioned,
|
||||
tst, dif_mode, dif_type, dif_static_app_tag, dif_filename. See
|
||||
vdisk_fileio above for description of those parameters.
|
||||
|
||||
vdisk_blockio devices have the following two additional attributes:
|
||||
|
||||
- active - if this flag is set (the default), the backing block device
|
||||
will be opened when the SCST device is added/opened. If a SCST device
|
||||
is opened with active=0 then the backing block device will not be
|
||||
opened, allowing for an active/passive SCST configuration. In addition,
|
||||
this attribute is writable via sysfs allowing the user to open/close the
|
||||
backing block device on the fly, or via a script.
|
||||
|
||||
- bind_alua_state - if this flag is set (the default), when the device is
|
||||
associated with an ALUA device group, and a target group ALUA state
|
||||
changes to the active/nonoptimized state, the active attribute will be
|
||||
set to 1 which attempts to open the backing block device. If the target
|
||||
group ALUA state changes to a value other than active/nonoptimized, the
|
||||
backing device will be closed (active=0). If bind_alua_state=0 for a
|
||||
device the ALUA state changes have NO effect on the active attribute,
|
||||
it is left up to the user to use a script, or manually set the active
|
||||
attribute to open/close the backing block device.
|
||||
|
||||
Handler vdisk_nullio provides NULLIO mode to create virtual devices. In
|
||||
this mode no real I/O is done, but success returned to initiators.
|
||||
Intended to be used for performance measurements at the same way as
|
||||
@@ -1683,8 +1702,12 @@ DEVICE_GROUP dgroup2 {
|
||||
Note, if you are using "active" BLOCKIO device attribute to prevent open
|
||||
of the backend block device on the passive node, it is not recommended
|
||||
to set both active ("active", "nonoptimized") and passive ("standby",
|
||||
etc.) ALUA states for the same device as shown above to keep internal
|
||||
"active" state of the BLOCKIO device consistent.
|
||||
etc.) ALUA states for the same device if "bind_alua_state=1" is used, as
|
||||
shown above to keep internal "active" state of the BLOCKIO device consistent.
|
||||
|
||||
If using the "active" BLOCKIO device attribute and multiple target groups
|
||||
exist per device on a SCST instance then "bind_alua_state=0" should be used
|
||||
and it is left up to the user to modify the "active" attribute value.
|
||||
|
||||
Explicit ALUA
|
||||
.............
|
||||
@@ -1710,10 +1733,13 @@ device, you need to create it with parameter "active=0". In case of DRBD
|
||||
it would be done automatically, you don't have to use the "active"
|
||||
attribute.
|
||||
|
||||
2. If you write new ALUA state in the "state" attribute, SCST BLOCKIO
|
||||
handler before transition closes open handles on all affected SCST
|
||||
devices and after transition reopens them, if the new state is active or
|
||||
nonoptimized.
|
||||
2. By default, if you write new ALUA state in the "state" attribute and
|
||||
"bind_alua_state=1" for the device, SCST BLOCKIO handler before transition
|
||||
closes open handles on all affected SCST devices and after transition
|
||||
reopens them, if the new state is active or nonoptimized. Alternatively,
|
||||
set "bind_alua_state=0" for SCST BLOCKIO devices and ALUA state changes
|
||||
will not open/close the backing block device, the user will neeed to handle
|
||||
this manually or via a cluster RA in an HA setup.
|
||||
|
||||
Thus, the recommended implicit ALUA state change procedure for primary
|
||||
to secondary transition is:
|
||||
|
||||
@@ -1063,6 +1063,25 @@ blocksize, nv_cache, read_only, removable, rotational, thin_provisioned,
|
||||
tst, dif_mode, dif_type, dif_static_app_tag, dif_filename. See
|
||||
vdisk_fileio above for description of those parameters.
|
||||
|
||||
vdisk_blockio devices have the following two additional attributes:
|
||||
|
||||
- active - if this flag is set (the default), the backing block device
|
||||
will be opened when the SCST device is added/opened. If a SCST device
|
||||
is opened with active=0 then the backing block device will not be
|
||||
opened, allowing for an active/passive SCST configuration. In addition,
|
||||
this attribute is writable via sysfs allowing the user to open/close the
|
||||
backing block device on the fly, or via a script.
|
||||
|
||||
- bind_alua_state - if this flag is set (the default), when the device is
|
||||
associated with an ALUA device group, and a target group ALUA state
|
||||
changes to the active/nonoptimized state, the active attribute will be
|
||||
set to 1 which attempts to open the backing block device. If the target
|
||||
group ALUA state changes to a value other than active/nonoptimized, the
|
||||
backing device will be closed (active=0). If bind_alua_state=0 for a
|
||||
device the ALUA state changes have NO effect on the active attribute,
|
||||
it is left up to the user to use a script, or manually set the active
|
||||
attribute to open/close the backing block device.
|
||||
|
||||
Handler vdisk_nullio provides NULLIO mode to create virtual devices. In
|
||||
this mode no real I/O is done, but success returned to initiators.
|
||||
Intended to be used for performance measurements at the same way as
|
||||
@@ -1536,8 +1555,12 @@ DEVICE_GROUP dgroup2 {
|
||||
Note, if you are using "active" BLOCKIO device attribute to prevent open
|
||||
of the backend block device on the passive node, it is not recommended
|
||||
to set both active ("active", "nonoptimized") and passive ("standby",
|
||||
etc.) ALUA states for the same device as shown above to keep internal
|
||||
"active" state of the BLOCKIO device consistent.
|
||||
etc.) ALUA states for the same device if "bind_alua_state=1" is used, as
|
||||
shown above to keep internal "active" state of the BLOCKIO device consistent.
|
||||
|
||||
If using the "active" BLOCKIO device attribute and multiple target groups
|
||||
exist per device on a SCST instance then "bind_alua_state=0" should be used
|
||||
and it is left up to the user to modify the "active" attribute value.
|
||||
|
||||
Explicit ALUA
|
||||
.............
|
||||
@@ -1563,10 +1586,13 @@ device, you need to create it with parameter "active=0". In case of DRBD
|
||||
it would be done automatically, you don't have to use the "active"
|
||||
attribute.
|
||||
|
||||
2. If you write new ALUA state in the "state" attribute, SCST BLOCKIO
|
||||
handler before transition closes open handles on all affected SCST
|
||||
devices and after transition reopens them, if the new state is active or
|
||||
nonoptimized.
|
||||
2. By default, if you write new ALUA state in the "state" attribute and
|
||||
"bind_alua_state=1" for the device, SCST BLOCKIO handler before transition
|
||||
closes open handles on all affected SCST devices and after transition
|
||||
reopens them, if the new state is active or nonoptimized. Alternatively,
|
||||
set "bind_alua_state=0" for SCST BLOCKIO devices and ALUA state changes
|
||||
will not open/close the backing block device, the user will neeed to handle
|
||||
this manually or via a cluster RA in an HA setup.
|
||||
|
||||
Thus, the recommended implicit ALUA state change procedure for primary
|
||||
to secondary transition is:
|
||||
|
||||
@@ -5728,6 +5728,11 @@ int scst_pr_set_cluster_mode(struct scst_device *dev, bool cluster_mode,
|
||||
int scst_pr_init_dev(struct scst_device *dev);
|
||||
void scst_pr_clear_dev(struct scst_device *dev);
|
||||
|
||||
/* Global ALUA lock/unlock functions */
|
||||
void scst_alua_lock(void);
|
||||
void scst_alua_unlock(void);
|
||||
void lockdep_assert_alua_lock_held(void);
|
||||
|
||||
struct scst_ext_copy_data_descr {
|
||||
uint64_t src_lba;
|
||||
uint64_t dst_lba;
|
||||
|
||||
@@ -121,6 +121,7 @@ static struct scst_trace_log vdisk_local_trace_tbl[] = {
|
||||
#define DEF_THIN_PROVISIONED 0
|
||||
#define DEF_EXPL_ALUA 0
|
||||
#define DEF_DEV_ACTIVE 1
|
||||
#define DEF_BIND_ALUA_STATE 1
|
||||
|
||||
#define VDISK_NULLIO_SIZE (5LL*1024*1024*1024*1024/2)
|
||||
|
||||
@@ -170,6 +171,7 @@ struct scst_vdisk_dev {
|
||||
* with scst_vdisk_mutex.
|
||||
*/
|
||||
unsigned int dev_active:1;
|
||||
unsigned int bind_alua_state:1;
|
||||
unsigned int rd_only:1;
|
||||
unsigned int wt_flag:1;
|
||||
unsigned int nv_cache:1;
|
||||
@@ -422,6 +424,12 @@ static ssize_t vdisk_sysfs_o_direct_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf);
|
||||
static ssize_t vdev_sysfs_active_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf);
|
||||
static ssize_t vdev_sysfs_active_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, const char *buf, size_t count);
|
||||
static ssize_t vdev_sysfs_bind_alua_state_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf);
|
||||
static ssize_t vdev_sysfs_bind_alua_state_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, const char *buf, size_t count);
|
||||
static ssize_t vdev_sysfs_dummy_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf);
|
||||
static ssize_t vdev_sysfs_rz_show(struct kobject *kobj,
|
||||
@@ -489,7 +497,12 @@ static ssize_t vcdrom_sysfs_filename_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, const char *buf, size_t count);
|
||||
|
||||
static struct kobj_attribute vdev_active_attr =
|
||||
__ATTR(active, S_IRUGO, vdev_sysfs_active_show, NULL);
|
||||
__ATTR(active, S_IWUSR|S_IRUGO, vdev_sysfs_active_show,
|
||||
vdev_sysfs_active_store);
|
||||
static struct kobj_attribute vdev_bind_alua_state_attr =
|
||||
__ATTR(bind_alua_state, S_IWUSR|S_IRUGO,
|
||||
vdev_sysfs_bind_alua_state_show,
|
||||
vdev_sysfs_bind_alua_state_store);
|
||||
static struct kobj_attribute vdev_size_ro_attr =
|
||||
__ATTR(size, S_IRUGO, vdev_sysfs_size_show, NULL);
|
||||
static struct kobj_attribute vdev_size_rw_attr =
|
||||
@@ -611,6 +624,7 @@ static const struct attribute *vdisk_fileio_attrs[] = {
|
||||
|
||||
static const struct attribute *vdisk_blockio_attrs[] = {
|
||||
&vdev_active_attr.attr,
|
||||
&vdev_bind_alua_state_attr.attr,
|
||||
&vdev_size_rw_attr.attr,
|
||||
&vdev_size_mb_rw_attr.attr,
|
||||
&vdisk_blocksize_attr.attr,
|
||||
@@ -797,6 +811,7 @@ static struct scst_dev_type vdisk_blk_devtype = {
|
||||
.dev_attrs = vdisk_blockio_attrs,
|
||||
.add_device_parameters =
|
||||
"active, "
|
||||
"bind_alua_state, "
|
||||
"blocksize, "
|
||||
"dif_mode, "
|
||||
"dif_type, "
|
||||
@@ -7442,6 +7457,11 @@ static void blockio_on_alua_state_change_start(struct scst_device *dev,
|
||||
|
||||
TRACE_ENTRY();
|
||||
|
||||
lockdep_assert_alua_lock_held();
|
||||
|
||||
if (!virt_dev->bind_alua_state)
|
||||
return;
|
||||
|
||||
/*
|
||||
* As required for on_alua_state_change_* callbacks,
|
||||
* no parallel fd activities could be here.
|
||||
@@ -7467,6 +7487,11 @@ static void blockio_on_alua_state_change_finish(struct scst_device *dev,
|
||||
|
||||
TRACE_ENTRY();
|
||||
|
||||
lockdep_assert_alua_lock_held();
|
||||
|
||||
if (!virt_dev->bind_alua_state)
|
||||
return;
|
||||
|
||||
/*
|
||||
* As required for on_alua_state_change_* callbacks,
|
||||
* no parallel fd activities could be here.
|
||||
@@ -7857,6 +7882,7 @@ static int vdev_create_node(struct scst_dev_type *devt,
|
||||
virt_dev->blk_shift = DEF_DISK_BLOCK_SHIFT;
|
||||
virt_dev->numa_node_id = NUMA_NO_NODE;
|
||||
virt_dev->dev_active = DEF_DEV_ACTIVE;
|
||||
virt_dev->bind_alua_state = DEF_BIND_ALUA_STATE;
|
||||
|
||||
if (strlen(name) >= sizeof(virt_dev->name)) {
|
||||
PRINT_ERROR("Name %s is too long (max allowed %zd)", name,
|
||||
@@ -8153,6 +8179,10 @@ static int vdev_parse_add_dev_params(struct scst_vdisk_dev *virt_dev,
|
||||
} else if (!strcasecmp("active", p)) {
|
||||
virt_dev->dev_active = ull_val;
|
||||
TRACE_DBG("ACTIVE %d", virt_dev->dev_active);
|
||||
} else if (!strcasecmp("bind_alua_state", p)) {
|
||||
virt_dev->bind_alua_state = ull_val;
|
||||
TRACE_DBG("BIND_ALUA_STATE %d",
|
||||
virt_dev->bind_alua_state);
|
||||
} else if (!strcasecmp("rotational", p)) {
|
||||
virt_dev->rotational = ull_val;
|
||||
TRACE_DBG("ROTATIONAL %d", virt_dev->rotational);
|
||||
@@ -8286,7 +8316,8 @@ static int vdev_blockio_add_device(const char *device_name, char *params)
|
||||
"removable", "blocksize", "nv_cache",
|
||||
"rotational", "cluster_mode",
|
||||
"thin_provisioned", "tst", "active",
|
||||
"numa_node_id", "dif_mode",
|
||||
"bind_alua_state", "numa_node_id",
|
||||
"dif_mode",
|
||||
"dif_type", "dif_static_app_tag",
|
||||
"dif_filename", NULL };
|
||||
struct scst_vdisk_dev *virt_dev;
|
||||
@@ -10213,6 +10244,169 @@ static ssize_t vdev_sysfs_active_show(struct kobject *kobj,
|
||||
return pos;
|
||||
}
|
||||
|
||||
static int vdev_sysfs_process_active_store(
|
||||
struct scst_sysfs_work_item *work)
|
||||
{
|
||||
struct scst_device *dev = work->dev;
|
||||
struct scst_vdisk_dev *virt_dev;
|
||||
long dev_active;
|
||||
int res;
|
||||
|
||||
res = scst_suspend_activity(SCST_SUSPEND_TIMEOUT_USER);
|
||||
if (res)
|
||||
goto out;
|
||||
|
||||
res = mutex_lock_interruptible(&scst_mutex);
|
||||
if (res)
|
||||
goto resume;
|
||||
/*
|
||||
* This is used to serialize against the *_on_alua_state_change_*()
|
||||
* calls in scst_tg.c
|
||||
*/
|
||||
scst_alua_lock();
|
||||
|
||||
/*
|
||||
* This is safe since we hold a reference on dev_kobj and since
|
||||
* scst_assign_dev_handler() waits until all dev_kobj references
|
||||
* have been dropped before invoking .detach().
|
||||
*/
|
||||
virt_dev = dev->dh_priv;
|
||||
res = kstrtol(work->buf, 0, &dev_active);
|
||||
if (res)
|
||||
goto unlock;
|
||||
res = -EINVAL;
|
||||
if (dev_active < 0 || dev_active > 1)
|
||||
goto unlock;
|
||||
if (dev_active != virt_dev->dev_active) {
|
||||
res = 0;
|
||||
if (dev_active == 0) {
|
||||
/* Close the FD here */
|
||||
vdisk_close_fd(virt_dev);
|
||||
virt_dev->dev_active = dev_active;
|
||||
} else {
|
||||
/* Re-open FD if tgt_dev_cnt is not zero */
|
||||
virt_dev->dev_active = dev_active;
|
||||
if (virt_dev->tgt_dev_cnt)
|
||||
res = vdisk_open_fd(virt_dev, dev->dev_rd_only);
|
||||
if (res == 0) {
|
||||
if (virt_dev->reexam_pending) {
|
||||
res = vdisk_reexamine(virt_dev);
|
||||
WARN_ON(res != 0);
|
||||
virt_dev->reexam_pending = 0;
|
||||
}
|
||||
} else {
|
||||
PRINT_ERROR("Unable to open FD on active -> "
|
||||
"%ld (dev %s): %d", dev_active,
|
||||
dev->virt_name, res);
|
||||
virt_dev->dev_active = 0;
|
||||
goto unlock;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
res = 0;
|
||||
}
|
||||
|
||||
unlock:
|
||||
scst_alua_unlock();
|
||||
mutex_unlock(&scst_mutex);
|
||||
|
||||
resume:
|
||||
scst_resume_activity();
|
||||
|
||||
out:
|
||||
kobject_put(&dev->dev_kobj);
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static ssize_t vdev_sysfs_active_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, const char *buf, size_t count)
|
||||
{
|
||||
struct scst_device *dev = container_of(kobj, struct scst_device,
|
||||
dev_kobj);
|
||||
struct scst_sysfs_work_item *work;
|
||||
char *arg;
|
||||
int res;
|
||||
|
||||
TRACE_ENTRY();
|
||||
|
||||
res = -ENOMEM;
|
||||
arg = kasprintf(GFP_KERNEL, "%.*s", (int)count, buf);
|
||||
if (!arg)
|
||||
goto out;
|
||||
|
||||
res = scst_alloc_sysfs_work(vdev_sysfs_process_active_store,
|
||||
false, &work);
|
||||
if (res)
|
||||
goto out;
|
||||
work->dev = dev;
|
||||
swap(work->buf, arg);
|
||||
kobject_get(&dev->dev_kobj);
|
||||
res = scst_sysfs_queue_wait_work(work);
|
||||
if (res)
|
||||
goto out;
|
||||
res = count;
|
||||
|
||||
out:
|
||||
kfree(arg);
|
||||
TRACE_EXIT_RES(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
static ssize_t vdev_sysfs_bind_alua_state_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
char *buf)
|
||||
{
|
||||
struct scst_device *dev;
|
||||
struct scst_vdisk_dev *virt_dev;
|
||||
int pos;
|
||||
unsigned int bind_alua_state;
|
||||
|
||||
TRACE_ENTRY();
|
||||
|
||||
dev = container_of(kobj, struct scst_device, dev_kobj);
|
||||
virt_dev = dev->dh_priv;
|
||||
spin_lock(&virt_dev->flags_lock);
|
||||
bind_alua_state = virt_dev->bind_alua_state;
|
||||
spin_unlock(&virt_dev->flags_lock);
|
||||
pos = sprintf(buf, "%d\n%s", bind_alua_state,
|
||||
bind_alua_state != DEF_BIND_ALUA_STATE ?
|
||||
SCST_SYSFS_KEY_MARK "\n" : "");
|
||||
|
||||
TRACE_EXIT_RES(pos);
|
||||
return pos;
|
||||
}
|
||||
|
||||
static ssize_t vdev_sysfs_bind_alua_state_store(struct kobject *kobj,
|
||||
struct kobj_attribute *attr,
|
||||
const char *buf, size_t count)
|
||||
{
|
||||
struct scst_device *dev;
|
||||
struct scst_vdisk_dev *virt_dev;
|
||||
char ch[16];
|
||||
unsigned long bind_alua_state;
|
||||
int res;
|
||||
|
||||
TRACE_ENTRY();
|
||||
|
||||
dev = container_of(kobj, struct scst_device, dev_kobj);
|
||||
virt_dev = dev->dh_priv;
|
||||
strlcpy(ch, buf, 16);
|
||||
res = kstrtoul(ch, 0, &bind_alua_state);
|
||||
if (res < 0)
|
||||
goto out;
|
||||
|
||||
spin_lock(&virt_dev->flags_lock);
|
||||
virt_dev->bind_alua_state = !!bind_alua_state;
|
||||
spin_unlock(&virt_dev->flags_lock);
|
||||
|
||||
res = count;
|
||||
|
||||
out:
|
||||
TRACE_EXIT_RES(res);
|
||||
return res;
|
||||
}
|
||||
|
||||
static ssize_t vdev_zero_copy_show(struct kobject *kobj,
|
||||
struct kobj_attribute *attr, char *buf)
|
||||
{
|
||||
|
||||
@@ -60,6 +60,25 @@ module_param(alua_invariant_check, bool, 0644);
|
||||
MODULE_PARM_DESC(alua_invariant_check,
|
||||
"Enables a run-time ALUA state invariant check.");
|
||||
|
||||
/* Global SCST ALUA lock/unlock functions (scst_dg_mutex) */
|
||||
void scst_alua_lock(void)
|
||||
{
|
||||
mutex_lock(&scst_dg_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(scst_alua_lock);
|
||||
|
||||
void scst_alua_unlock(void)
|
||||
{
|
||||
mutex_unlock(&scst_dg_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(scst_alua_unlock);
|
||||
|
||||
void lockdep_assert_alua_lock_held(void)
|
||||
{
|
||||
lockdep_assert_held(&scst_dg_mutex);
|
||||
}
|
||||
EXPORT_SYMBOL(lockdep_assert_alua_lock_held);
|
||||
|
||||
const char *scst_alua_state_name(enum scst_tg_state s)
|
||||
{
|
||||
int i;
|
||||
|
||||
Reference in New Issue
Block a user