diff --git a/scst/README b/scst/README index dc0784a15..9160383e8 100644 --- a/scst/README +++ b/scst/README @@ -114,7 +114,7 @@ log message: "tgt_dev for LUN 0 not found, command to unexisting LU?" It is highly recommended to use scstadmin utility for configuring devices and security groups. -The flow of SCST initialization should be as the following: +The flow of SCST initialization should be as follows: 1. Load of SCST modules with necessary module parameters, if needed. @@ -1102,9 +1102,13 @@ cache. The following parameters possible for vdisk_fileio: - read_only - read only. Default is 0. - - o_direct - disables both read and write caching. This mode isn't - currently fully implemented, you should use user space fileio_tgt - program in O_DIRECT mode instead (see below). + - async - submit I/O asynchronously to the device handler. This mode + allows concurrent processing of SCSI commands even when using only + a single SCST command thread. + + - o_direct - disables both read and write caching if asynchronous + I/O is used. This mode bypasses the page cache and hence improves + performance. - nv_cache - enables "non-volatile cache" mode. In this mode it is assumed that the target has a GOOD UPS with ability to cleanly @@ -1139,8 +1143,8 @@ cache. The following parameters possible for vdisk_fileio: - rotational - if set, this device reported as rotational. Otherwise, it is reported as non-rotational (SSD, etc.) - - zero_copy - if set, then this device uses zero copy access to the - page cache. At the moment, only read side zero copy is implemented. + - zero_copy - ignored. For zero-copy I/O, set the async flag and + possibly also the o_direct flag and use Linux kernel v4.10 or later. - dif_mode - specifies which T10-PI, or DIF, mode this device will use. See SCSI standards from more info about T10-PI. Available DIF modes diff --git a/scst/include/backport.h b/scst/include/backport.h index 8f19fba25..c9c381d6b 100644 --- a/scst/include/backport.h +++ b/scst/include/backport.h @@ -327,6 +327,25 @@ static inline int vfs_fsync_backport(struct file *file, int datasync) #define vfs_fsync vfs_fsync_backport #endif +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 16, 0) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(4, 11, 0) +/* + * See also commit bb7462b6fd64 ("vfs: use helpers for calling + * f_op->{read,write}_iter()"). + */ +static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->read_iter(kio, iter); +} + +static inline ssize_t call_write_iter(struct file *file, struct kiocb *kio, + struct iov_iter *iter) +{ + return file->f_op->write_iter(kio, iter); +} +#endif + /* */ #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 19) diff --git a/scst/src/dev_handlers/scst_vdisk.c b/scst/src/dev_handlers/scst_vdisk.c index b62bcb190..aa4abd4ff 100644 --- a/scst/src/dev_handlers/scst_vdisk.c +++ b/scst/src/dev_handlers/scst_vdisk.c @@ -25,6 +25,7 @@ #ifndef INSIDE_KERNEL_TREE #include #endif +#include #include #include #include @@ -176,6 +177,7 @@ struct scst_vdisk_dev { unsigned int nv_cache:1; unsigned int o_direct_flag:1; unsigned int zero_copy:1; + unsigned int async:1; unsigned int media_changed:1; unsigned int prevent_allow_medium_removal:1; unsigned int nullio:1; @@ -270,6 +272,11 @@ struct vdisk_cmd_params { int iv_count; struct iovec small_iv[4]; } sync; + struct { + struct kiocb iocb; + struct kvec *kvec; + struct kvec small_kvec[4]; + } async; }; struct scst_cmd *cmd; loff_t loff; @@ -495,6 +502,8 @@ static ssize_t vdev_sysfs_inq_vend_specific_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); static ssize_t vdev_zero_copy_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); +static ssize_t vdev_async_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf); static ssize_t vdev_dif_filename_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf); @@ -590,6 +599,8 @@ static struct kobj_attribute vdev_inq_vend_specific_attr = vdev_sysfs_inq_vend_specific_store); static struct kobj_attribute vdev_zero_copy_attr = __ATTR(zero_copy, S_IRUGO, vdev_zero_copy_show, NULL); +static struct kobj_attribute vdev_async_attr = + __ATTR(async, S_IRUGO, vdev_async_show, NULL); static struct kobj_attribute vdev_dif_filename_attr = __ATTR(dif_filename, S_IRUGO, vdev_dif_filename_show, NULL); @@ -625,6 +636,7 @@ static const struct attribute *vdisk_fileio_attrs[] = { &vdev_usn_attr.attr, &vdev_inq_vend_specific_attr.attr, &vdev_zero_copy_attr.attr, + &vdev_async_attr.attr, NULL, }; @@ -759,12 +771,13 @@ static struct scst_dev_type vdisk_file_devtype = { .del_device = vdisk_del_device, .dev_attrs = vdisk_fileio_attrs, .add_device_parameters = + "async, " "blocksize, " + "cluster_mode, " "filename, " "numa_node_id, " "nv_cache, " "o_direct, " - "cluster_mode, " "read_only, " "removable, " "rotational, " @@ -977,8 +990,6 @@ static struct file *vdev_open_fd(const struct scst_vdisk_dev *virt_dev, open_flags |= O_RDONLY; else open_flags |= O_RDWR; - if (virt_dev->o_direct_flag) - open_flags |= O_DIRECT; if (virt_dev->wt_flag && !virt_dev->nv_cache) open_flags |= O_DSYNC; @@ -1742,6 +1753,12 @@ next: res = -EINVAL; goto out; } + if (!virt_dev->async && virt_dev->o_direct_flag) { + PRINT_ERROR("%s: using o_direct without setting async is not" + " supported", virt_dev->filename); + res = -EINVAL; + goto out; + } dev->dev_rd_only = virt_dev->rd_only; @@ -3194,10 +3211,139 @@ static int fileio_exec(struct scst_cmd *cmd) return vdev_do_job(cmd, ops); } +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 1, 0) +static bool do_fileio_async(const struct vdisk_cmd_params *p) +{ + struct scst_cmd *cmd = p->cmd; + struct scst_device *dev = cmd->dev; + struct scst_vdisk_dev *virt_dev = dev->dh_priv; + + return virt_dev->async && dev->dev_dif_mode == SCST_DIF_MODE_NONE; +} + +static bool vdisk_alloc_kvec(struct scst_cmd *cmd, struct vdisk_cmd_params *p) +{ + int n; + + n = scst_get_buf_count(cmd); + if (n <= ARRAY_SIZE(p->async.small_kvec)) { + p->async.kvec = &p->async.small_kvec[0]; + return true; + } + + p->async.kvec = kmalloc_array(n, sizeof(*p->async.kvec), + cmd->cmd_gfp_mask); + if (p->async.kvec == NULL) { + PRINT_ERROR("Unable to allocate kvecv (%d)", n); + return false; + } + + return true; +} + +static void fileio_async_complete(struct kiocb *iocb, long ret, long ret2) +{ + struct vdisk_cmd_params *p = container_of(iocb, typeof(*p), async.iocb); + struct scst_cmd *cmd = p->cmd; + + if (ret < 0 && + scst_cmd_get_data_direction(cmd) & SCST_DATA_WRITE) + scst_set_cmd_error(cmd, + SCST_LOAD_SENSE(scst_sense_write_error)); + else if (ret < 0) + scst_set_cmd_error(cmd, + SCST_LOAD_SENSE(scst_sense_hardw_error)); + else + scst_set_resp_data_len(cmd, ret); + cmd->completed = 1; + cmd->scst_cmd_done(cmd, SCST_CMD_STATE_DEFAULT, SCST_CONTEXT_SAME); +} + +static enum compl_status_e fileio_exec_async(struct vdisk_cmd_params *p) +{ + struct scst_cmd *cmd = p->cmd; + struct scst_device *dev = cmd->dev; + struct scst_vdisk_dev *virt_dev = dev->dh_priv; + struct file *fd = virt_dev->fd; + struct iov_iter iter = { }; + ssize_t length, total = 0; + struct kvec *kvec; + uint8_t *address; + int dir, ret; + + switch (cmd->data_direction) { + case SCST_DATA_READ: + dir = READ; + break; + case SCST_DATA_WRITE: + dir = WRITE; + break; + default: + WARN_ON_ONCE(true); + return CMD_FAILED; + } + + if (!vdisk_alloc_kvec(cmd, p)) { + scst_set_busy(cmd); + return CMD_SUCCEEDED; + } + + kvec = p->async.kvec; + length = scst_get_buf_first(cmd, &address); + while (length) { + *kvec++ = (struct kvec){ + .iov_base = address, + .iov_len = length, + }; + total += length; + length = scst_get_buf_next(cmd, &address); + } + + iov_iter_kvec(&iter, ITER_KVEC | dir, p->async.kvec, + kvec - p->async.kvec, total); + p->async.iocb = (struct kiocb) { + .ki_pos = p->loff, + .ki_filp = fd, + .ki_complete = fileio_async_complete, + }; + if (virt_dev->o_direct_flag) + p->async.iocb.ki_flags |= IOCB_DIRECT; + if (dir == WRITE) { + if (virt_dev->wt_flag && !virt_dev->nv_cache) + p->async.iocb.ki_flags |= IOCB_DSYNC; + ret = call_write_iter(fd, &p->async.iocb, &iter); + } else { + ret = call_read_iter(fd, &p->async.iocb, &iter); + } + if (p->async.kvec != p->async.small_kvec) + kfree(p->async.kvec); + if (ret != -EIOCBQUEUED) + fileio_async_complete(&p->async.iocb, ret, 0); + /* + * Return RUNNING_ASYNC even if fileio_async_complete() has been + * called because that function calls cmd->scst_cmd_done(). + */ + return RUNNING_ASYNC; +} +#else +static bool do_fileio_async(const struct vdisk_cmd_params *p) +{ + return false; +} + +static enum compl_status_e fileio_exec_async(struct vdisk_cmd_params *p) +{ + WARN_ON_ONCE(true); + return CMD_FAILED; +} +#endif + static void vdisk_on_free_cmd_params(const struct vdisk_cmd_params *p) { - if (p->sync.iv != p->sync.small_iv) - kfree(p->sync.iv); + if (!do_fileio_async(p)) { + if (p->sync.iv != p->sync.small_iv) + kfree(p->sync.iv); + } } static void fileio_on_free_cmd(struct scst_cmd *cmd) @@ -5760,6 +5906,9 @@ static enum compl_status_e fileio_exec_read(struct vdisk_cmd_params *p) EXTRACHECKS_BUG_ON(virt_dev->nullio); + if (do_fileio_async(p)) + return fileio_exec_async(p); + iv = vdisk_alloc_iv(cmd, p); if (iv == NULL) goto out_nomem; @@ -5943,6 +6092,9 @@ static enum compl_status_e fileio_exec_write(struct vdisk_cmd_params *p) EXTRACHECKS_BUG_ON(virt_dev->nullio); + if (do_fileio_async(p)) + return fileio_exec_async(p); + rc = scst_dif_process_write(cmd); if (unlikely(rc != 0)) goto out; @@ -7319,6 +7471,10 @@ static void vdisk_report_registering(const struct scst_vdisk_dev *virt_dev) i += snprintf(&buf[i], buf_size - i, "%sZERO_COPY", (j == i) ? "(" : ", "); + if (virt_dev->async) + i += snprintf(&buf[i], buf_size - i, "%sASYNC", + (j == i) ? "(" : ", "); + if (virt_dev->dummy) i += snprintf(&buf[i], buf_size - i, "%sDUMMY", (j == i) ? "(" : ", "); @@ -7770,14 +7926,8 @@ static int vdev_parse_add_dev_params(struct scst_vdisk_dev *virt_dev, virt_dev->nv_cache = ull_val; TRACE_DBG("NON-VOLATILE CACHE %d", virt_dev->nv_cache); } else if (!strcasecmp("o_direct", p)) { -#if 0 virt_dev->o_direct_flag = ull_val; TRACE_DBG("O_DIRECT %d", virt_dev->o_direct_flag); -#else - PRINT_INFO("O_DIRECT flag doesn't currently" - " work, ignoring it, use fileio_tgt " - "in O_DIRECT mode instead (device %s)", virt_dev->name); -#endif } else if (!strcasecmp("read_only", p)) { virt_dev->rd_only = ull_val; TRACE_DBG("READ ONLY %d", virt_dev->rd_only); @@ -7817,6 +7967,8 @@ static int vdev_parse_add_dev_params(struct scst_vdisk_dev *virt_dev, virt_dev->thin_provisioned); } else if (!strcasecmp("zero_copy", p)) { virt_dev->zero_copy = !!ull_val; + } else if (!strcasecmp("async", p)) { + virt_dev->async = !!ull_val; } else if (!strcasecmp("size", p)) { virt_dev->file_size = ull_val; } else if (!strcasecmp("size_mb", p)) { @@ -10157,6 +10309,17 @@ static ssize_t vdev_zero_copy_show(struct kobject *kobj, return pos; } +static ssize_t vdev_async_show(struct kobject *kobj, + struct kobj_attribute *attr, char *buf) +{ + struct scst_device *dev = + container_of(kobj, struct scst_device, dev_kobj); + struct scst_vdisk_dev *virt_dev = dev->dh_priv; + + return sprintf(buf, "%d\n%s", virt_dev->async, + virt_dev->async ? SCST_SYSFS_KEY_MARK "\n" : ""); +} + static ssize_t vdev_dif_filename_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) {