mirror of
https://github.com/SCST-project/scst.git
synced 2026-05-14 09:11:27 +00:00
- Residuals handling fixes, part 1
- Docs updated - Cleanups git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@1666 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
73
scst/README
73
scst/README
@@ -159,14 +159,6 @@ IMPORTANT: In the current version simultaneous access to local SCSI devices
|
||||
devices READ/WRITE commands using direct disk handler look to
|
||||
be safe.
|
||||
|
||||
IMPORTANT: Some versions of Windows have a bug, which makes them consider
|
||||
========= response of READ CAPACITY(16) longer than 12 bytes as a faulty one.
|
||||
As the result, such Windows'es refuse to see SCST exported
|
||||
devices >2TB in size. This is fixed by MS in latter Windows
|
||||
versions, probably, by some hotfix. But if you're using such
|
||||
buggy Windows and experience this problem, change in
|
||||
scst_vdisk.c::vdisk_exec_read_capacity16() "#if 1" to "#if 0".
|
||||
|
||||
To uninstall, type 'make scst_uninstall'.
|
||||
|
||||
|
||||
@@ -1293,17 +1285,37 @@ Caching
|
||||
-------
|
||||
|
||||
By default for performance reasons VDISK FILEIO devices use write back
|
||||
caching policy. This is generally safe for modern applications who
|
||||
prepared to work in the write back caching environments, so know when to
|
||||
flush cache to keep their data consistent and minimize damage caused in
|
||||
case of power/hardware/software failures by lost in the cache data.
|
||||
caching policy.
|
||||
|
||||
Generally, write back caching is reasonably safe for use and danger of
|
||||
it is greatly overestimated, because:
|
||||
|
||||
1. Modern HDDs have at least 16MB of cache working in write back mode by
|
||||
default, so for a 10 drives RAID it is 160MB of a write back cache. You
|
||||
can consider, how many people are happy with it and how many disabled
|
||||
write back cache of their HDDs? Almost all and almost nobody
|
||||
correspondingly? Moreover, many HDDs lie about state of their cache and
|
||||
report write through while working in write back mode. They are also
|
||||
successfully used.
|
||||
|
||||
Also, at the moment we don't know any SDDs, which are acceptably fast,
|
||||
i.e. faster than HDDs in average, with write back caching disabled. So
|
||||
to have acceptable performance their users have to use write back
|
||||
caching, hence on a power loss all not yet committed to flash chips, but
|
||||
acknowledged as written, data will be lost.
|
||||
|
||||
2. Most, if not all, modern enterprise level applications are well
|
||||
prepared to work with write back cached storage. They know well when to
|
||||
flush the cache and how to flush it to make the lost on crash data
|
||||
acceptable.
|
||||
|
||||
For instance, journaled file systems flush cache on each meta data
|
||||
update, so they survive power/hardware/software failures pretty well.
|
||||
Note, Linux IO subsystem guarantees it work reliably only using data
|
||||
protection barriers, which, for instance, for Ext3 turned off by default
|
||||
(see http://lwn.net/Articles/283161). Some info about barriers from the
|
||||
XFS point of view could be found at
|
||||
|
||||
Note, Linux I/O subsystem guarantees this facility to work reliably only
|
||||
using data protection barriers, which usually turned off by default (see
|
||||
http://lwn.net/Articles/283161). Some info about barriers from the XFS
|
||||
point of view could be found at
|
||||
http://oss.sgi.com/projects/xfs/faq.html#wcache. On Linux initiators for
|
||||
Ext3 and ReiserFS file systems the barrier protection could be turned on
|
||||
using "barrier=1" and "barrier=flush" mount options correspondingly. You
|
||||
@@ -1316,19 +1328,21 @@ But even in case of journaled file systems your unsaved cached data will
|
||||
still be lost in case of power/hardware/software failures, so you may
|
||||
need to supply your target server with a good UPS with possibility to
|
||||
gracefully shutdown your target on power shortage or disable write back
|
||||
caching using WRITE_THROUGH flag. Note, on some real-life workloads
|
||||
write through caching might perform better, than write back one with the
|
||||
barrier protection turned on. Also note that without barriers enabled
|
||||
caching using WRITE_THROUGH flag. Note, that without barriers enabled
|
||||
(i.e. by default) Linux doesn't provide a guarantee that after
|
||||
sync()/fsync() all written data really hit permanent storage. They can
|
||||
be stored in the cache of your backstorage devices and, hence, lost on a
|
||||
power failure event. Thus, ever with write-through cache mode, you still
|
||||
either need to enable barriers on your backend file system on the target
|
||||
(for devices this is, indeed, impossible), or need a good UPS to protect
|
||||
yourself from not committed data loss.
|
||||
sync()/fsync() all written data really hit permanent storage (see
|
||||
above). They can be stored in the cache of your backstorage devices and,
|
||||
hence, lost on a power failure event. Thus, ever with write-through
|
||||
cache mode, you still either need to enable barriers on your backend
|
||||
file system on the target (for direct /dev/sdX devices this is, indeed,
|
||||
impossible), or need a good UPS to protect yourself from not committed
|
||||
data loss.
|
||||
|
||||
To limit this data loss you can use files in /proc/sys/vm to limit
|
||||
amount of unflushed data in the system cache.
|
||||
Note, on some real-life workloads write through caching might perform
|
||||
better, than write back one with the barrier protection turned on.
|
||||
|
||||
To limit this data loss with write back caching you can use files in
|
||||
/proc/sys/vm to limit amount of unflushed data in the system cache.
|
||||
|
||||
|
||||
BLOCKIO VDISK mode
|
||||
@@ -1359,8 +1373,9 @@ the system cache and the commands data buffers, so it saves a
|
||||
considerable amount of CPU power and memory bandwidth.
|
||||
|
||||
IMPORTANT: Since data in BLOCKIO and FILEIO modes are not consistent between
|
||||
========= them, if you try to use a device in both those modes simultaneously,
|
||||
you will almost instantly corrupt your data on that device.
|
||||
========= each other, if you try to use a device in both those modes
|
||||
simultaneously, you will almost instantly corrupt your data
|
||||
on that device.
|
||||
|
||||
|
||||
Pass-through mode
|
||||
|
||||
@@ -86,14 +86,6 @@ IMPORTANT: In the current version simultaneous access to local SCSI devices
|
||||
devices READ/WRITE commands using direct disk handler look to
|
||||
be safe.
|
||||
|
||||
IMPORTANT: Some versions of Windows have a bug, which makes them consider
|
||||
========= response of READ CAPACITY(16) longer than 12 bytes as a faulty one.
|
||||
As the result, such Windows'es refuse to see SCST exported
|
||||
devices >2TB in size. This is fixed by MS in latter Windows
|
||||
versions, probably, by some hotfix. But if you're using such
|
||||
buggy Windows and experience this problem, change in
|
||||
scst_vdisk.c::vdisk_exec_read_capacity16() "#if 1" to "#if 0".
|
||||
|
||||
|
||||
Usage in failover mode
|
||||
----------------------
|
||||
@@ -879,17 +871,37 @@ Caching
|
||||
-------
|
||||
|
||||
By default for performance reasons VDISK FILEIO devices use write back
|
||||
caching policy. This is generally safe for modern applications who
|
||||
prepared to work in the write back caching environments, so know when to
|
||||
flush cache to keep their data consistent and minimize damage caused in
|
||||
case of power/hardware/software failures by lost in the cache data.
|
||||
caching policy.
|
||||
|
||||
Generally, write back caching is reasonably safe for use and danger of
|
||||
it is greatly overestimated, because:
|
||||
|
||||
1. Modern HDDs have at least 16MB of cache working in write back mode by
|
||||
default, so for a 10 drives RAID it is 160MB of a write back cache. You
|
||||
can consider, how many people are happy with it and how many disabled
|
||||
write back cache of their HDDs? Almost all and almost nobody
|
||||
correspondingly? Moreover, many HDDs lie about state of their cache and
|
||||
report write through while working in write back mode. They are also
|
||||
successfully used.
|
||||
|
||||
Also, at the moment we don't know any SDDs, which are acceptably fast,
|
||||
i.e. faster than HDDs in average, with write back caching disabled. So
|
||||
to have acceptable performance their users have to use write back
|
||||
caching, hence on a power loss all not yet committed to flash chips, but
|
||||
acknowledged as written, data will be lost.
|
||||
|
||||
2. Most, if not all, modern enterprise level applications are well
|
||||
prepared to work with write back cached storage. They know well when to
|
||||
flush the cache and how to flush it to make the lost on crash data
|
||||
acceptable.
|
||||
|
||||
For instance, journaled file systems flush cache on each meta data
|
||||
update, so they survive power/hardware/software failures pretty well.
|
||||
Note, Linux IO subsystem guarantees it work reliably only using data
|
||||
protection barriers, which, for instance, for Ext3 turned off by default
|
||||
(see http://lwn.net/Articles/283161). Some info about barriers from the
|
||||
XFS point of view could be found at
|
||||
|
||||
Note, Linux I/O subsystem guarantees this facility to work reliably only
|
||||
using data protection barriers, which usually turned off by default (see
|
||||
http://lwn.net/Articles/283161). Some info about barriers from the XFS
|
||||
point of view could be found at
|
||||
http://oss.sgi.com/projects/xfs/faq.html#wcache. On Linux initiators for
|
||||
Ext3 and ReiserFS file systems the barrier protection could be turned on
|
||||
using "barrier=1" and "barrier=flush" mount options correspondingly. You
|
||||
@@ -902,19 +914,21 @@ But even in case of journaled file systems your unsaved cached data will
|
||||
still be lost in case of power/hardware/software failures, so you may
|
||||
need to supply your target server with a good UPS with possibility to
|
||||
gracefully shutdown your target on power shortage or disable write back
|
||||
caching using WRITE_THROUGH flag. Note, on some real-life workloads
|
||||
write through caching might perform better, than write back one with the
|
||||
barrier protection turned on. Also note that without barriers enabled
|
||||
caching using WRITE_THROUGH flag. Note, that without barriers enabled
|
||||
(i.e. by default) Linux doesn't provide a guarantee that after
|
||||
sync()/fsync() all written data really hit permanent storage. They can
|
||||
be stored in the cache of your backstorage devices and, hence, lost on a
|
||||
power failure event. Thus, ever with write-through cache mode, you still
|
||||
either need to enable barriers on your backend file system on the target
|
||||
(for devices this is, indeed, impossible), or need a good UPS to protect
|
||||
yourself from not committed data loss.
|
||||
sync()/fsync() all written data really hit permanent storage (see
|
||||
above). They can be stored in the cache of your backstorage devices and,
|
||||
hence, lost on a power failure event. Thus, ever with write-through
|
||||
cache mode, you still either need to enable barriers on your backend
|
||||
file system on the target (for direct /dev/sdX devices this is, indeed,
|
||||
impossible), or need a good UPS to protect yourself from not committed
|
||||
data loss.
|
||||
|
||||
To limit this data loss you can use files in /proc/sys/vm to limit
|
||||
amount of unflushed data in the system cache.
|
||||
Note, on some real-life workloads write through caching might perform
|
||||
better, than write back one with the barrier protection turned on.
|
||||
|
||||
To limit this data loss with write back caching you can use files in
|
||||
/proc/sys/vm to limit amount of unflushed data in the system cache.
|
||||
|
||||
|
||||
BLOCKIO VDISK mode
|
||||
@@ -945,8 +959,9 @@ the system cache and the commands data buffers, so it saves a
|
||||
considerable amount of CPU power and memory bandwidth.
|
||||
|
||||
IMPORTANT: Since data in BLOCKIO and FILEIO modes are not consistent between
|
||||
========= them, if you try to use a device in both those modes simultaneously,
|
||||
you will almost instantly corrupt your data on that device.
|
||||
========= each other, if you try to use a device in both those modes
|
||||
simultaneously, you will almost instantly corrupt your data
|
||||
on that device.
|
||||
|
||||
|
||||
Pass-through mode
|
||||
|
||||
@@ -310,8 +310,6 @@ enum scst_cdb_flags {
|
||||
** Misc SCSI constants
|
||||
*************************************************************/
|
||||
#define SCST_SENSE_ASC_UA_RESET 0x29
|
||||
#define READ_CAP_LEN 8
|
||||
#define READ_CAP16_LEN 32
|
||||
#define BYTCHK 0x02
|
||||
#define POSITION_LEN_SHORT 20
|
||||
#define POSITION_LEN_LONG 32
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#define CHANGER_NAME "dev_changer"
|
||||
|
||||
#define CHANGER_RETRIES 2
|
||||
#define READ_CAP_LEN 8
|
||||
|
||||
static int changer_attach(struct scst_device *);
|
||||
/* static void changer_detach(struct scst_device *); */
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#define PROCESSOR_NAME "dev_processor"
|
||||
|
||||
#define PROCESSOR_RETRIES 2
|
||||
#define READ_CAP_LEN 8
|
||||
|
||||
static int processor_attach(struct scst_device *);
|
||||
/*static void processor_detach(struct scst_device *);*/
|
||||
|
||||
@@ -28,7 +28,6 @@
|
||||
#define RAID_NAME "dev_raid"
|
||||
|
||||
#define RAID_RETRIES 2
|
||||
#define READ_CAP_LEN 8
|
||||
|
||||
static int raid_attach(struct scst_device *);
|
||||
/* static void raid_detach(struct scst_device *); */
|
||||
|
||||
@@ -665,7 +665,7 @@ static int dev_user_alloc_space(struct scst_user_cmd *ucmd)
|
||||
TRACE_ENTRY();
|
||||
|
||||
ucmd->state = UCMD_STATE_BUF_ALLOCING;
|
||||
cmd->dh_data_buf_alloced = 1;
|
||||
scst_cmd_set_dh_data_buff_alloced(cmd);
|
||||
|
||||
rc = dev_user_alloc_sg(ucmd, is_buff_cached(ucmd));
|
||||
if (rc == 0)
|
||||
|
||||
@@ -1933,7 +1933,7 @@ static void vdisk_exec_read_capacity(struct scst_cmd *cmd)
|
||||
struct scst_vdisk_dev *virt_dev;
|
||||
uint32_t blocksize;
|
||||
uint64_t nblocks;
|
||||
uint8_t buffer[READ_CAP_LEN];
|
||||
uint8_t buffer[8];
|
||||
|
||||
TRACE_ENTRY();
|
||||
|
||||
@@ -1979,8 +1979,8 @@ static void vdisk_exec_read_capacity(struct scst_cmd *cmd)
|
||||
goto out;
|
||||
}
|
||||
|
||||
if (length > READ_CAP_LEN)
|
||||
length = READ_CAP_LEN;
|
||||
length = min_t(int, length, sizeof(buffer));
|
||||
|
||||
memcpy(address, buffer, length);
|
||||
|
||||
scst_put_buf(cmd, address);
|
||||
@@ -2000,7 +2000,7 @@ static void vdisk_exec_read_capacity16(struct scst_cmd *cmd)
|
||||
struct scst_vdisk_dev *virt_dev;
|
||||
uint32_t blocksize;
|
||||
uint64_t nblocks;
|
||||
uint8_t buffer[READ_CAP16_LEN];
|
||||
uint8_t buffer[32];
|
||||
|
||||
TRACE_ENTRY();
|
||||
|
||||
@@ -2063,23 +2063,8 @@ static void vdisk_exec_read_capacity16(struct scst_cmd *cmd)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Some versions of Windows have a bug, which makes them consider
|
||||
* response of READ CAPACITY(16) longer than 12 bytes as a faulty one.
|
||||
* As the result, such Windows'es refuse to see SCST exported
|
||||
* devices >2TB in size. This is fixed by MS in latter Windows
|
||||
* versions, probably, by some hotfix.
|
||||
*
|
||||
* But if you're using such buggy Windows and experience this problem,
|
||||
* change this '1' to '0'.
|
||||
*/
|
||||
#if 0 /* there are too many such hosts */
|
||||
if (length > READ_CAP16_LEN)
|
||||
length = READ_CAP16_LEN;
|
||||
#else
|
||||
if (length > 12)
|
||||
length = 12;
|
||||
#endif
|
||||
length = min_t(int, length, sizeof(buffer));
|
||||
|
||||
memcpy(address, buffer, length);
|
||||
|
||||
scst_put_buf(cmd, address);
|
||||
|
||||
@@ -27,9 +27,10 @@
|
||||
#include <linux/cdrom.h>
|
||||
#include <linux/unistd.h>
|
||||
#include <linux/string.h>
|
||||
#include <asm/kmap_types.h>
|
||||
#include <linux/ctype.h>
|
||||
#include <linux/delay.h>
|
||||
#include <asm/kmap_types.h>
|
||||
#include <asm/unaligned.h>
|
||||
|
||||
#include "scst.h"
|
||||
#include "scst_priv.h"
|
||||
@@ -4500,7 +4501,7 @@ static int get_trans_len_block_limit(struct scst_cmd *cmd, uint8_t off)
|
||||
|
||||
static int get_trans_len_read_capacity(struct scst_cmd *cmd, uint8_t off)
|
||||
{
|
||||
cmd->bufflen = READ_CAP_LEN;
|
||||
cmd->bufflen = 8;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4512,7 +4513,7 @@ static int get_trans_len_serv_act_in(struct scst_cmd *cmd, uint8_t off)
|
||||
|
||||
if ((cmd->cdb[1] & 0x1f) == SAI_READ_CAPACITY_16) {
|
||||
cmd->op_name = "READ CAPACITY(16)";
|
||||
cmd->bufflen = READ_CAP16_LEN;
|
||||
cmd->bufflen = be32_to_cpu(get_unaligned((__be32 *)&cmd->cdb[10]));
|
||||
cmd->op_flags |= SCST_IMPLICIT_HQ|SCST_REG_RESERVE_ALLOWED;
|
||||
} else
|
||||
cmd->op_flags |= SCST_UNKNOWN_LENGTH;
|
||||
|
||||
@@ -314,6 +314,8 @@ static int do_exec(struct vdisk_cmd *vcmd)
|
||||
}
|
||||
}
|
||||
|
||||
reply->resp_data_len = cmd->bufflen;
|
||||
|
||||
switch (opcode) {
|
||||
case READ_6:
|
||||
case WRITE_6:
|
||||
@@ -1059,8 +1061,11 @@ static void exec_inquiry(struct vdisk_cmd *vcmd)
|
||||
sBUG_ON(resp_len >= (int)sizeof(buf));
|
||||
if (length > resp_len)
|
||||
length = resp_len;
|
||||
|
||||
memcpy(address, buf, length);
|
||||
reply->resp_data_len = length;
|
||||
|
||||
if (length < reply->resp_data_len)
|
||||
reply->resp_data_len = length;
|
||||
|
||||
out:
|
||||
TRACE_EXIT();
|
||||
@@ -1080,8 +1085,11 @@ static void exec_request_sense(struct vdisk_cmd *vcmd)
|
||||
l = set_sense(b, sizeof(b), SCST_LOAD_SENSE(scst_sense_no_sense));
|
||||
|
||||
length = min(l, length);
|
||||
|
||||
memcpy(address, b, length);
|
||||
reply->resp_data_len = length;
|
||||
|
||||
if (length < reply->resp_data_len)
|
||||
reply->resp_data_len = length;
|
||||
|
||||
TRACE_EXIT();
|
||||
return;
|
||||
@@ -1318,8 +1326,11 @@ static void exec_mode_sense(struct vdisk_cmd *vcmd)
|
||||
sBUG_ON(offset >= (int)sizeof(buf));
|
||||
if (offset > length)
|
||||
offset = length;
|
||||
|
||||
memcpy(address, buf, offset);
|
||||
reply->resp_data_len = offset;
|
||||
|
||||
if (offset < reply->resp_data_len)
|
||||
reply->resp_data_len = offset;
|
||||
|
||||
out:
|
||||
TRACE_EXIT();
|
||||
@@ -1420,7 +1431,7 @@ static void exec_read_capacity(struct vdisk_cmd *vcmd)
|
||||
uint8_t *address = (uint8_t*)(unsigned long)cmd->pbuf;
|
||||
uint32_t blocksize;
|
||||
uint64_t nblocks;
|
||||
uint8_t buffer[READ_CAP_LEN];
|
||||
uint8_t buffer[8];
|
||||
|
||||
TRACE_ENTRY();
|
||||
|
||||
@@ -1445,11 +1456,12 @@ static void exec_read_capacity(struct vdisk_cmd *vcmd)
|
||||
buffer[6] = (blocksize >> (BYTE * 1)) & 0xFF;
|
||||
buffer[7] = (blocksize >> (BYTE * 0)) & 0xFF;
|
||||
|
||||
if (length > READ_CAP_LEN)
|
||||
length = READ_CAP_LEN;
|
||||
length = min(length, (int)sizeof(buffer));
|
||||
|
||||
memcpy(address, buffer, length);
|
||||
|
||||
reply->resp_data_len = length;
|
||||
if (length < reply->resp_data_len)
|
||||
reply->resp_data_len = length;
|
||||
|
||||
TRACE_EXIT();
|
||||
return;
|
||||
@@ -1464,7 +1476,7 @@ static void exec_read_capacity16(struct vdisk_cmd *vcmd)
|
||||
uint8_t *address = (uint8_t*)(unsigned long)cmd->pbuf;
|
||||
uint32_t blocksize;
|
||||
uint64_t nblocks;
|
||||
uint8_t buffer[READ_CAP16_LEN];
|
||||
uint8_t buffer[32];
|
||||
|
||||
TRACE_ENTRY();
|
||||
|
||||
@@ -1506,11 +1518,12 @@ static void exec_read_capacity16(struct vdisk_cmd *vcmd)
|
||||
break;
|
||||
}
|
||||
|
||||
if (length > READ_CAP16_LEN)
|
||||
length = READ_CAP16_LEN;
|
||||
length = min(length, (int)sizeof(buffer));
|
||||
|
||||
memcpy(address, buffer, length);
|
||||
|
||||
reply->resp_data_len = length;
|
||||
if (length < reply->resp_data_len)
|
||||
reply->resp_data_len = length;
|
||||
|
||||
TRACE_EXIT();
|
||||
return;
|
||||
@@ -1585,8 +1598,11 @@ static void exec_read_toc(struct vdisk_cmd *vcmd)
|
||||
|
||||
if (off > length)
|
||||
off = length;
|
||||
|
||||
memcpy(address, buffer, off);
|
||||
reply->resp_data_len = off;
|
||||
|
||||
if (off < reply->resp_data_len)
|
||||
reply->resp_data_len = off;
|
||||
|
||||
out:
|
||||
TRACE_EXIT();
|
||||
|
||||
Reference in New Issue
Block a user