From 4bf0fb2871db2f725ee99e2526f357bb9030e79b Mon Sep 17 00:00:00 2001 From: Vladislav Bolkhovitin Date: Fri, 4 Jun 2010 11:53:29 +0000 Subject: [PATCH] Make BLOCKIO honor SYNCHRONIZE_CACHE commands git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@1738 d57e44dd-8a1f-0410-8b47-8ef2f437770f --- iscsi-scst/README | 1 + iscsi-scst/README_in-tree | 1 + qla2x00t/qla2x00-target/README | 1 + scst/README | 19 +++++- scst/README_in-tree | 19 +++++- scst/src/dev_handlers/scst_vdisk.c | 93 +++++++++++++++++++++++++++--- 6 files changed, 120 insertions(+), 14 deletions(-) diff --git a/iscsi-scst/README b/iscsi-scst/README index f9ed82c11..bd4f320d5 100644 --- a/iscsi-scst/README +++ b/iscsi-scst/README @@ -404,6 +404,7 @@ both iSCSI-SCST targets will look like: | | | `-- export0 -> ../../../targets/iscsi/iqn.2006-10.net.vlnb:tgt1/ini_groups/special_ini/luns/0 | | |-- filename | | |-- handler -> ../../handlers/vdisk_blockio +| | |-- nv_cache | | |-- read_only | | |-- removable | | |-- resync_size diff --git a/iscsi-scst/README_in-tree b/iscsi-scst/README_in-tree index 4c8f94117..1d673d3a7 100644 --- a/iscsi-scst/README_in-tree +++ b/iscsi-scst/README_in-tree @@ -301,6 +301,7 @@ both iSCSI-SCST targets will look like: | | | `-- export0 -> ../../../targets/iscsi/iqn.2006-10.net.vlnb:tgt1/ini_groups/special_ini/luns/0 | | |-- filename | | |-- handler -> ../../handlers/vdisk_blockio +| | |-- nv_cache | | |-- read_only | | |-- removable | | |-- resync_size diff --git a/qla2x00t/qla2x00-target/README b/qla2x00t/qla2x00-target/README index 990c5909c..ddaebbf78 100644 --- a/qla2x00t/qla2x00-target/README +++ b/qla2x00t/qla2x00-target/README @@ -292,6 +292,7 @@ The resulting overall SCST sysfs hierarchy with initiator | | | `-- export0 -> ../../../targets/qla2x00t/25:00:00:f0:98:87:92:f3/luns/0 | | |-- filename | | |-- handler -> ../../handlers/vdisk_blockio +| | |-- nv_cache | | |-- read_only | | |-- removable | | |-- resync_size diff --git a/scst/README b/scst/README index 528328e3f..0f6773ab8 100644 --- a/scst/README +++ b/scst/README @@ -1131,8 +1131,8 @@ between application and disk or need the large block throughput. See below for more info. The following parameters possible for vdisk_blockio: filename, -blocksize, read_only, removable. See vdisk_fileio above for description -of those parameters. +blocksize, nv_cache, read_only, removable. See vdisk_fileio above for +description of those parameters. Handler vdisk_nullio provides NULLIO mode to create virtual devices. In this mode no real I/O is done, but success returned to initiators. @@ -1213,7 +1213,7 @@ For example: `-- write_through Each vdisk_blockio's device has the following attributes in -/sys/kernel/scst_tgt/devices/device_name: blocksize, filename, +/sys/kernel/scst_tgt/devices/device_name: blocksize, filename, nv_cache, read_only, removable, resync_size, size_mb, t10_dev_id, threads_num, threads_pool_type, type, usn. See above description of those parameters. @@ -1421,6 +1421,19 @@ IMPORTANT: Since data in BLOCKIO and FILEIO modes are not consistent between simultaneously, you will almost instantly corrupt your data on that device. +IMPORTANT: If SCST 1.x BLOCKIO worked by default in NV_CACHE mode, when +========= each device reported to remote initiators as having write through + caching. But if your backend block device has internal write + back caching it might create a possibility for data loss of + the cached in the internal cache data in case of a power + failure. Starting from SCST 2.0 BLOCKIO works by default in + non-NV_CACHE mode, when each device reported to remote + initiators as having write back caching, and synchronizes the + internal device's cache on each SYNCHRONIZE_CACHE command + from the initiators. It might lead to some PERFORMANCE LOSS, + so if you are are sure in your power supply and want to + restore 1.x behavior, your should recreate your BLOCKIO + devices in NV_CACHE mode. Pass-through mode ----------------- diff --git a/scst/README_in-tree b/scst/README_in-tree index 0e4634ece..6a7fe957d 100644 --- a/scst/README_in-tree +++ b/scst/README_in-tree @@ -714,8 +714,8 @@ between application and disk or need the large block throughput. See below for more info. The following parameters possible for vdisk_blockio: filename, -blocksize, read_only, removable. See vdisk_fileio above for description -of those parameters. +blocksize, nv_cache, read_only, removable. See vdisk_fileio above for +description of those parameters. Handler vdisk_nullio provides NULLIO mode to create virtual devices. In this mode no real I/O is done, but success returned to initiators. @@ -796,7 +796,7 @@ For example: `-- write_through Each vdisk_blockio's device has the following attributes in -/sys/kernel/scst_tgt/devices/device_name: blocksize, filename, +/sys/kernel/scst_tgt/devices/device_name: blocksize, filename, nv_cache, read_only, removable, resync_size, size_mb, t10_dev_id, threads_num, threads_pool_type, type, usn. See above description of those parameters. @@ -1004,6 +1004,19 @@ IMPORTANT: Since data in BLOCKIO and FILEIO modes are not consistent between simultaneously, you will almost instantly corrupt your data on that device. +IMPORTANT: If SCST 1.x BLOCKIO worked by default in NV_CACHE mode, when +========= each device reported to remote initiators as having write through + caching. But if your backend block device has internal write + back caching it might create a possibility for data loss of + the cached in the internal cache data in case of a power + failure. Starting from SCST 2.0 BLOCKIO works by default in + non-NV_CACHE mode, when each device reported to remote + initiators as having write back caching, and synchronizes the + internal device's cache on each SYNCHRONIZE_CACHE command + from the initiators. It might lead to some PERFORMANCE LOSS, + so if you are are sure in your power supply and want to + restore 1.x behavior, your should recreate your BLOCKIO + devices in NV_CACHE mode. Pass-through mode ----------------- diff --git a/scst/src/dev_handlers/scst_vdisk.c b/scst/src/dev_handlers/scst_vdisk.c index e5d5c143a..ef8026911 100644 --- a/scst/src/dev_handlers/scst_vdisk.c +++ b/scst/src/dev_handlers/scst_vdisk.c @@ -41,6 +41,7 @@ #include #include #include +#include #define LOG_PREFIX "dev_vdisk" @@ -259,6 +260,7 @@ static void vdisk_exec_write(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, loff_t loff); static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, u64 lba_start, int write); +static int blockio_flush(struct block_device *bdev); static void vdisk_exec_verify(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, loff_t loff); static void vdisk_exec_read_capacity(struct scst_cmd *cmd); @@ -372,6 +374,7 @@ static const struct attribute *vdisk_blockio_attrs[] = { &vdev_size_attr.attr, &vdisk_blocksize_attr.attr, &vdisk_rd_only_attr.attr, + &vdisk_nv_cache_attr.attr, &vdisk_removable_attr.attr, &vdisk_filename_attr.attr, &vdisk_resync_size_attr.attr, @@ -602,6 +605,44 @@ static struct file *vdev_open_fd(const struct scst_vdisk_dev *virt_dev) return fd; } +static void vdisk_blockio_check_flush_support(struct scst_vdisk_dev *virt_dev) +{ + struct inode *inode; + struct file *fd; + + TRACE_ENTRY(); + + if (!virt_dev->blockio || virt_dev->rd_only || virt_dev->nv_cache) + goto out; + + fd = filp_open(virt_dev->filename, O_LARGEFILE, 0600); + if (IS_ERR(fd)) { + PRINT_ERROR("filp_open(%s) returned error %ld", + virt_dev->filename, PTR_ERR(fd)); + goto out; + } + + inode = fd->f_dentry->d_inode; + + if (!S_ISBLK(inode->i_mode)) { + PRINT_ERROR("%s is NOT a block device", virt_dev->filename); + goto out_close; + } + + if (blockio_flush(inode->i_bdev) != 0) { + PRINT_WARNING("Device %s doesn't support barriers, switching " + "to NV_CACHE mode", virt_dev->filename); + virt_dev->nv_cache = 1; + } + +out_close: + filp_close(fd, NULL); + +out: + TRACE_EXIT(); + return; +} + /* Returns 0 on success and file size in *file_size, error code otherwise */ static int vdisk_get_file_size(const char *filename, bool blockio, loff_t *file_size) @@ -695,7 +736,10 @@ static int vdisk_attach(struct scst_device *dev) goto out; } virt_dev->file_size = err; + TRACE_DBG("size of file: %lld", (long long unsigned int)err); + + vdisk_blockio_check_flush_support(virt_dev); } else virt_dev->file_size = 0; @@ -1612,8 +1656,11 @@ static void vdisk_exec_mode_sense(struct scst_cmd *cmd) pcode = cmd->cdb[2] & 0x3f; subpcode = cmd->cdb[3]; msense_6 = (MODE_SENSE == cmd->cdb[0]); - dev_spec = ((virt_dev->dev->rd_only || - cmd->tgt_dev->acg_dev->rd_only) ? WP : 0) | DPOFUA; + dev_spec = (virt_dev->dev->rd_only || + cmd->tgt_dev->acg_dev->rd_only) ? WP : 0; + + if (!virt_dev->blockio) + dev_spec |= DPOFUA; length = scst_get_buf_first(cmd, &address); if (unlikely(length <= 0)) { @@ -2152,15 +2199,22 @@ static int vdisk_fsync(struct scst_vdisk_thr *thr, loff_t loff, int res = 0; struct scst_vdisk_dev *virt_dev = (struct scst_vdisk_dev *)dev->dh_priv; - struct file *file = thr->fd; + struct file *file; TRACE_ENTRY(); /* Hopefully, the compiler will generate the single comparison */ - if (virt_dev->nv_cache || virt_dev->blockio || virt_dev->wt_flag || + if (virt_dev->nv_cache || virt_dev->wt_flag || virt_dev->o_direct_flag || virt_dev->nullio) goto out; + if (virt_dev->blockio) { + res = blockio_flush(thr->bdev); + goto out; + } + + file = thr->fd; + #if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32) res = sync_page_range(file->f_dentry->d_inode, file->f_mapping, loff, len); @@ -2179,8 +2233,6 @@ static int vdisk_fsync(struct scst_vdisk_thr *thr, loff_t loff, } } - /* ToDo: flush the device cache, if needed */ - out: TRACE_EXIT_RES(res); return res; @@ -2500,7 +2552,7 @@ static void blockio_endio(struct bio *bio, int error) return 1; #endif - if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) { + if (unlikely(!bio_flagged(bio, BIO_UPTODATE))) { if (error == 0) { PRINT_ERROR("Not up to date bio with error 0 for " "cmd %p, returning -EIO", blockio_work->cmd); @@ -2606,6 +2658,17 @@ static void blockio_exec_rw(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, (virt_dev->block_shift - 9); bio->bi_bdev = bdev; bio->bi_private = blockio_work; + /* + * Better to fail fast w/o any local recovery + * and retries. + */ +#ifdef BIO_RW_FAILFAST + bio->bi_rw |= (1 << BIO_RW_FAILFAST); +#else + bio->bi_rw |= (1 << BIO_RW_FAILFAST_DEV) | + (1 << BIO_RW_FAILFAST_TRANSPORT) | + (1 << BIO_RW_FAILFAST_DRIVER); +#endif #if 0 /* It could be win, but could be not, so a performance study is needed */ bio->bi_rw |= 1 << BIO_RW_SYNC; #endif @@ -2670,6 +2733,20 @@ out_no_mem: goto out; } +static int blockio_flush(struct block_device *bdev) +{ + int res = 0; + + TRACE_ENTRY(); + + res = blkdev_issue_flush(bdev, NULL); + if (res != 0) + PRINT_ERROR("blkdev_issue_flush() failed: %d", res); + + TRACE_EXIT_RES(res); + return res; +} + static void vdisk_exec_verify(struct scst_cmd *cmd, struct scst_vdisk_thr *thr, loff_t loff) { @@ -3196,7 +3273,7 @@ static int vdev_blockio_add_device(const char *device_name, char *params) { int res = 0; const char *allowed_params[] = { "filename", "read_only", "removable", - "blocksize", NULL }; + "blocksize", "nv_cache", NULL }; struct scst_vdisk_dev *virt_dev; TRACE_ENTRY();