scst_vdisk: Make optimal transfer length parameter configurable

In testing SCST performance with MD RAID arrays for the backing device,
and RHEL 7.x initiators generating I/O, with the current optimal transfer
length (512 KiB) that SCST provides initiators (via the block limits VPD
page), it was observed on the target side that partial stripe writes were
occurring, even though at the file system layer full stripe writes were
being generated. Through testing it was determined the "optimal_io_size"
sysfs attribute and/or optimal transfer length data in the VPD page were
being utilized to fragment I/O on the initiator side.

While using the value of "optimal_io_size" from the MD RAID device (via
sysfs) stops partial stripe writes, it was observed additional throughput
can be gained by doubling/tripling/quadrupling this value. And it was
also observed that different values were preferred for writes vs. reads
to yield best performance.

The default value is 512 KiB (524288 bytes).

Signed-off-by: Marc Smith <marc.smith@parodyne.com>
[bvanassche: Made opt_trans_len attribute read/write]


git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@7856 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Bart Van Assche
2019-01-03 02:28:19 +00:00
parent 9a0c18aa1c
commit f59534735d
2 changed files with 75 additions and 8 deletions

View File

@@ -1094,6 +1094,13 @@ cache. The following parameters possible for vdisk_fileio:
- blocksize - specifies block size used by this virtual device. The
block size must be power of 2 and >= 512 bytes. Default is 512.
- opt_trans_len - specifies the optimal transfer length data in the block
limits VPD page. Value is in bytes, and must be a multiple of the block
size. Default is 524288. Setting this parameter to a multiple of the
optimal transfer length below 4 MB may improve performance. Setting this
paramter to a value above 4 MB hurts performance because the SGV cache
only supports buffers up to 4 MB.
- write_through - disables write back caching. Note, this option
has sense only if you also *manually* disable write-back cache in
*all* your backstorage devices and make sure it's actually disabled,
@@ -1260,6 +1267,9 @@ Each vdisk_fileio's device has the following attributes in
- blocksize - contains block size used by this virtual device.
- opt_trans_len - contains the optimal transfer length used by this virtual
device.
- write_through - contains status of write back caching of this virtual
device.
@@ -1371,6 +1381,7 @@ For example:
/sys/kernel/scst_tgt/devices/disk1
|-- block
|-- blocksize
|-- opt_trans_len
|-- exported
| |-- export0 -> ../../../targets/iscsi/iqn.2006-10.net.vlnb:tgt/luns/0
| |-- export1 -> ../../../targets/iscsi/iqn.2006-10.net.vlnb:tgt/ini_groups/INI/luns/0

View File

@@ -106,6 +106,7 @@ static struct scst_trace_log vdisk_local_trace_tbl[] = {
#define PS 0x80 /* parameter saveable */
#define DEF_DISK_BLOCK_SHIFT 9
#define DEF_OPT_TRANS_LEN 524288
#define DEF_CDROM_BLOCK_SHIFT 11
#define DEF_SECTORS 56
#define DEF_HEADS 255
@@ -150,6 +151,7 @@ static struct scst_trace_log vdisk_local_trace_tbl[] = {
struct scst_vdisk_dev {
uint64_t nblocks;
unsigned int opt_trans_len;
/*
* Not protected, because assignments to aligned 64-bit integers are
@@ -198,6 +200,7 @@ struct scst_vdisk_dev {
unsigned int expl_alua:1;
unsigned int reexam_pending:1;
unsigned int size_key:1;
unsigned int opt_trans_len_set:1;
struct file *fd;
struct file *dif_fd;
@@ -412,6 +415,10 @@ static ssize_t vdev_sysfs_size_mb_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf);
static ssize_t vdisk_sysfs_blocksize_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf);
static ssize_t vdisk_opt_trans_len_store(struct kobject *kobj,
struct kobj_attribute *attr, const char *buf, size_t count);
static ssize_t vdisk_opt_trans_len_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf);
static ssize_t vdisk_sysfs_rd_only_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf);
static ssize_t vdisk_sysfs_wt_show(struct kobject *kobj,
@@ -529,6 +536,9 @@ static struct kobj_attribute vdev_size_mb_rw_attr =
vdev_sysfs_size_mb_store);
static struct kobj_attribute vdisk_blocksize_attr =
__ATTR(blocksize, S_IRUGO, vdisk_sysfs_blocksize_show, NULL);
static struct kobj_attribute vdisk_opt_trans_len_attr =
__ATTR(opt_trans_len, S_IWUSR|S_IRUGO, vdisk_opt_trans_len_show,
vdisk_opt_trans_len_store);
static struct kobj_attribute vdisk_rd_only_attr =
__ATTR(read_only, S_IRUGO, vdisk_sysfs_rd_only_show, NULL);
static struct kobj_attribute vdisk_wt_attr =
@@ -612,6 +622,7 @@ static const struct attribute *vdisk_fileio_attrs[] = {
&vdev_size_ro_attr.attr,
&vdev_size_mb_ro_attr.attr,
&vdisk_blocksize_attr.attr,
&vdisk_opt_trans_len_attr.attr,
&vdisk_rd_only_attr.attr,
&vdisk_wt_attr.attr,
&vdisk_tp_attr.attr,
@@ -646,6 +657,7 @@ static const struct attribute *vdisk_blockio_attrs[] = {
&vdev_size_rw_attr.attr,
&vdev_size_mb_rw_attr.attr,
&vdisk_blocksize_attr.attr,
&vdisk_opt_trans_len_attr.attr,
&vdisk_rd_only_attr.attr,
&vdisk_wt_attr.attr,
&vdisk_expl_alua_attr.attr,
@@ -675,6 +687,7 @@ static const struct attribute *vdisk_nullio_attrs[] = {
&vdev_size_rw_attr.attr,
&vdev_size_mb_rw_attr.attr,
&vdisk_blocksize_attr.attr,
&vdisk_opt_trans_len_attr.attr,
&vdisk_rd_only_attr.attr,
&vdisk_tst_attr.attr,
&vdev_dummy_attr.attr,
@@ -1869,6 +1882,14 @@ static int vdisk_open_fd(struct scst_vdisk_dev *virt_dev, bool read_only)
virt_dev->bdev = virt_dev->blockio ? file_inode(virt_dev->fd)->i_bdev : NULL;
res = 0;
/*
* For block devices, get the optimal I/O size from the block device
* characteristics.
*/
if (virt_dev->bdev && !virt_dev->opt_trans_len_set)
virt_dev->opt_trans_len = bdev_io_opt(virt_dev->bdev) ? :
virt_dev->opt_trans_len;
if (virt_dev->dif_filename != NULL) {
virt_dev->dif_fd = vdev_open_fd(virt_dev,
virt_dev->dif_filename, read_only);
@@ -4042,14 +4063,8 @@ static int vdisk_block_limits(uint8_t *buf, struct scst_cmd *cmd,
8*1024*1024l) / dev->block_size;
put_unaligned_be32(max_transfer, &buf[8]);
/*
* Let's have optimal transfer len 512KB. Better to not
* set it at all, because we don't have such limit,
* but some initiators may not understand that (?).
* From other side, too big transfers are not optimal,
* because SGV cache supports only <4M buffers.
*/
put_unaligned_be32(min_t(int, max_transfer, 512*1024 / dev->block_size),
put_unaligned_be32(min_t(int, max_transfer,
virt_dev->opt_trans_len >> dev->block_shift),
&buf[12]);
if (virt_dev->thin_provisioned) {
@@ -7648,6 +7663,7 @@ static int vdev_create_node(struct scst_dev_type *devt,
INIT_WORK(&virt_dev->vdev_inq_changed_work, vdev_inq_changed_fn);
virt_dev->blk_shift = DEF_DISK_BLOCK_SHIFT;
virt_dev->opt_trans_len = DEF_OPT_TRANS_LEN;
virt_dev->numa_node_id = NUMA_NO_NODE;
virt_dev->dev_active = DEF_DEV_ACTIVE;
virt_dev->bind_alua_state = DEF_BIND_ALUA_STATE;
@@ -8804,6 +8820,46 @@ static ssize_t vdisk_sysfs_blocksize_show(struct kobject *kobj,
return pos;
}
static ssize_t vdisk_opt_trans_len_store(struct kobject *kobj,
struct kobj_attribute *attr,
const char *buf, size_t count)
{
struct scst_device *dev =
container_of(kobj, struct scst_device, dev_kobj);
struct scst_vdisk_dev *virt_dev = dev->dh_priv;
unsigned long val;
int res;
res = kstrtoul(buf, 0, &val);
if (res)
return res;
if (val & (dev->block_size - 1))
return -EINVAL;
/*
* To do: generate a unit attention because of the opt_trans_len
* change.
*/
spin_lock(&virt_dev->flags_lock);
virt_dev->opt_trans_len_set = 1;
virt_dev->opt_trans_len = val;
spin_unlock(&virt_dev->flags_lock);
return count;
}
static ssize_t vdisk_opt_trans_len_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{
struct scst_device *dev =
container_of(kobj, struct scst_device, dev_kobj);
struct scst_vdisk_dev *virt_dev = dev->dh_priv;
return sprintf(buf, "%d\n%s", virt_dev->opt_trans_len,
virt_dev->opt_trans_len_set ? SCST_SYSFS_KEY_MARK "\n" :
"");
}
static ssize_t vdisk_sysfs_rd_only_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buf)
{