mirror of
https://github.com/SCST-project/scst.git
synced 2026-05-25 07:51:28 +00:00
A bunch of minor fixes/improvements, cleanups and updates:
- In scst_local new experimental compile-time option to force direct processing added - Attempt in SCST structures to separate read-mostly from read-write data to decrease cache ping-pong between CPUs - In scst_vdisk new module parameter num_threads added to specify a number of threads for each vdisk/vcdrom. Default is 5. - Debug logging for failed digests in iSCSI-SCST improved - Docs updates/cleanups git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@635 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
@@ -136,8 +136,8 @@ IMPORTANT: All LUN information (access control) MUST be configured
|
||||
|
||||
Also see SCST README file how to tune for the best performance.
|
||||
|
||||
CAUTION: Working of target and initiator on the same host isn't
|
||||
======= supported. See SCST README file for details.
|
||||
CAUTION: Working of target and initiator on the same host isn't
|
||||
======= supported. See SCST README file for details.
|
||||
|
||||
|
||||
Work if target's backstorage or link is too slow
|
||||
|
||||
@@ -48,9 +48,13 @@ You can check your network hardware offload capabilities by command
|
||||
"ethtool -k ethX", where X is the network device number. At least
|
||||
"tx-checksumming" and "scatter-gather" should be enabled.
|
||||
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
See in http://scst.sourceforge.net/iscsi-scst-howto.txt how to configure
|
||||
iSCSI-SCST.
|
||||
|
||||
ISCSI parameters like iSNS, CHAP and target parameters are configured in
|
||||
iscsi-scstd.conf. All LUN information is configured using the
|
||||
corresponding SCST interface. See in SCST README file section "Access
|
||||
@@ -67,16 +71,26 @@ IMPORTANT: All LUN information (access control) MUST be configured
|
||||
|
||||
Also see SCST README file how to tune for the best performance.
|
||||
|
||||
If under high load you experience I/O stalls or see in the kernel log
|
||||
abort or reset messages, then try to reduce QueuedCommands parameter in
|
||||
iscsi-scstd.conf file for the corresponding target to some lower value,
|
||||
like 8 (default is 32). See also SCST README file for more details about
|
||||
that issue.
|
||||
|
||||
CAUTION: Working of target and initiator on the same host isn't
|
||||
======= supported. See SCST README file for details.
|
||||
|
||||
|
||||
Work if target's backstorage or link is too slow
|
||||
------------------------------------------------
|
||||
|
||||
In some cases you can experience I/O stalls or see in the kernel log
|
||||
abort or reset messages. It can happen under high I/O load, when your
|
||||
target's backstorage gets overloaded, or working over a slow link, when
|
||||
the link can't serve all the queued commands on time,
|
||||
|
||||
To workaround it you can reduce QueuedCommands parameter in
|
||||
iscsi-scstd.conf file for the corresponding target to some lower value,
|
||||
like 8 (default is 32).
|
||||
|
||||
Also see SCST README file for more details about that issue and ways to
|
||||
prevent it.
|
||||
|
||||
|
||||
Performance advices
|
||||
-------------------
|
||||
|
||||
|
||||
@@ -183,6 +183,9 @@ int digest_rx_data(struct iscsi_cmnd *cmnd)
|
||||
|
||||
if (unlikely(crc != cmnd->ddigest)) {
|
||||
PRINT_ERROR("%s", "RX data digest failed");
|
||||
TRACE_MGMT_DBG("Calculated crc %x, ddigest %x, offset %d", crc,
|
||||
cmnd->ddigest, offset);
|
||||
iscsi_dump_pdu(&cmnd->pdu);
|
||||
res = -EIO;
|
||||
} else
|
||||
TRACE_DBG("RX data digest OK for cmd %p", cmnd);
|
||||
|
||||
94
scst/README
94
scst/README
@@ -13,7 +13,7 @@ SCST's Internet page http://scst.sourceforge.net.
|
||||
SCST supports the following I/O modes:
|
||||
|
||||
* Pass-through mode with one to many relationship, i.e. when multiple
|
||||
initiators can connect to the exported pass-through devices, for
|
||||
initiators can connect to the exported pass-through devices, for
|
||||
the following SCSI devices types: disks (type 0), tapes (type 1),
|
||||
processors (type 3), CDROMs (type 5), MO disks (type 7), medium
|
||||
changers (type 8) and RAID controllers (type 0xC)
|
||||
@@ -105,14 +105,14 @@ kernel logs (or run dmesg command for the few most recent messages).
|
||||
|
||||
IMPORTANT: Without loading appropriate device handler, corresponding devices
|
||||
========= will be invisible for remote initiators, which could lead to holes
|
||||
in the LUN addressing, so automatic device scanning by remote SCSI
|
||||
mid-level could not notice the devices. Therefore you will have
|
||||
to add them manually via
|
||||
in the LUN addressing, so automatic device scanning by remote SCSI
|
||||
mid-level could not notice the devices. Therefore you will have
|
||||
to add them manually via
|
||||
'echo "- - -" >/sys/class/scsi_host/hostX/scan',
|
||||
where X - is the host number.
|
||||
|
||||
IMPORTANT: Working of target and initiator on the same host isn't
|
||||
========= supported. This is a limitation of the Linux memory/cache
|
||||
========= supported. This is a limitation of the Linux memory/cache
|
||||
manager, because in this case an OOM deadlock like: system
|
||||
needs some memory -> it decides to clear some cache -> cache
|
||||
needs to write on a target exported device -> initiator sends
|
||||
@@ -163,7 +163,7 @@ the user space, which allows to create pure user space devices. The
|
||||
simplest example, where one would want it is if he/she wants to write a
|
||||
VTL. With scst_user he/she can write it purely in the user space. Or one
|
||||
would want it if he/she needs some sophisticated for kernel space
|
||||
processing of the passed data, like encrypting them or making snapshots.
|
||||
processing of the passed data, like encrypting them or making snapshots.
|
||||
|
||||
"Performance" device handlers for disks, MO disks and tapes in their
|
||||
exec() method skip (pretend to execute) all READ and WRITE operations
|
||||
@@ -302,12 +302,12 @@ interface in "/proc/scsi_tgt" directory. It contains the following
|
||||
entries:
|
||||
|
||||
- "help" file, which provides online help for SCST commands
|
||||
|
||||
|
||||
- "scsi_tgt" file, which on read provides information of serving by SCST
|
||||
devices and their dev handlers. On write it supports the following
|
||||
command:
|
||||
|
||||
* "assign H:C:I:L HANDLER_NAME" assigns dev handler "HANDLER_NAME"
|
||||
|
||||
* "assign H:C:I:L HANDLER_NAME" assigns dev handler "HANDLER_NAME"
|
||||
on device with host:channel:id:lun
|
||||
|
||||
- "sessions" file, which lists currently connected initiators (open sessions)
|
||||
@@ -318,11 +318,11 @@ entries:
|
||||
allocations requests to the kernel. "Size" - is the commands data
|
||||
size upper rounded to power of 2, "Hit" - how many there are
|
||||
allocations from the cache, "Total" - total number of allocations.
|
||||
|
||||
|
||||
- "threads" file, which allows to read and set number of SCST's threads
|
||||
|
||||
|
||||
- "version" file, which shows version of SCST
|
||||
|
||||
|
||||
- "trace_level" file, which allows to read and set trace (logging) level
|
||||
for SCST. See "help" file for list of trace levels. If you want to
|
||||
enable logging options, which produce a lot of events, like "debug",
|
||||
@@ -355,11 +355,11 @@ Access and devices visibility management allows for an initiator or
|
||||
group of initiators to see different devices with different LUNs
|
||||
with necessary access permissions.
|
||||
|
||||
SCST supports two modes of access control:
|
||||
SCST supports two modes of access control:
|
||||
|
||||
1. Target-oriented. In this mode you define for each target devices and
|
||||
their LUNs, which are accessible to all initiators, connected to that
|
||||
target. This is a regular access control mode, which people mean
|
||||
target. This is a regular access control mode, which people usually mean
|
||||
thinking about access control in general. For instance, in IET this is
|
||||
the only supported mode. In this mode you should create a security group
|
||||
with name "Default_TARGET_NAME", where "TARGET_NAME" is name of the
|
||||
@@ -379,7 +379,7 @@ mode has higher priority, than target-oriented.
|
||||
When a target driver registers itself in SCST core, it tells SCST core
|
||||
its name. Then, when there is a new connection from a remote initiator,
|
||||
the target driver registers this connection in SCST core and tells it
|
||||
name of the remote initiator. Then SCST core finds the corresponding
|
||||
the name of the remote initiator. Then SCST core finds the corresponding
|
||||
devices for it using the following algorithm:
|
||||
|
||||
1. It searches through all defined groups trying to find group
|
||||
@@ -391,6 +391,9 @@ name "Default_TARGET_NAME". If it succeeds, the found group is used.
|
||||
3. Otherwise, the group with name "Default" is used. This group is
|
||||
always defined, but empty by default.
|
||||
|
||||
Names of both target and initiator you can clarify in the kernel log. In
|
||||
it SCST reports to which group each session is assigned.
|
||||
|
||||
In /proc/scsi_tgt each group represented as "groups/GROUP_NAME/"
|
||||
subdirectory. In it there are files "devices" and "names". File
|
||||
"devices" lists devices and their LUNs in the group, file "names" lists
|
||||
@@ -400,38 +403,35 @@ To configure access and devices visibility management SCST provides the
|
||||
following files and directories under /proc/scsi_tgt:
|
||||
|
||||
- "add_group GROUP" to /proc/scsi_tgt/scsi_tgt adds group "GROUP"
|
||||
|
||||
|
||||
- "del_group GROUP" to /proc/scsi_tgt/scsi_tgt deletes group "GROUP"
|
||||
|
||||
- "add H:C:I:L lun [READ_ONLY]" to /proc/scsi_tgt/groups/GROUP/devices adds
|
||||
|
||||
- "add H:C:I:L lun [READ_ONLY]" to /proc/scsi_tgt/groups/GROUP/devices adds
|
||||
device with host:channel:id:lun with LUN "lun" in group "GROUP". Optionally,
|
||||
the device could be marked as read only.
|
||||
|
||||
|
||||
- "del H:C:I:L" to /proc/scsi_tgt/groups/GROUP/devices deletes device with
|
||||
host:channel:id:lun from group "GROUP"
|
||||
|
||||
|
||||
- "add V_NAME lun [READ_ONLY]" to /proc/scsi_tgt/groups/GROUP/devices adds
|
||||
device with virtual name "V_NAME" with LUN "lun" in group "GROUP".
|
||||
Optionally, the device could be marked as read only.
|
||||
|
||||
|
||||
- "del V_NAME" to /proc/scsi_tgt/groups/GROUP/devices deletes device with
|
||||
virtual name "V_NAME" from group "GROUP"
|
||||
|
||||
|
||||
- "clear" to /proc/scsi_tgt/groups/GROUP/devices clears the list of devices
|
||||
for group "GROUP"
|
||||
|
||||
- "add NAME" to /proc/scsi_tgt/groups/GROUP/names adds name "NAME" to group
|
||||
|
||||
- "add NAME" to /proc/scsi_tgt/groups/GROUP/names adds name "NAME" to group
|
||||
"GROUP"
|
||||
|
||||
- "del NAME" to /proc/scsi_tgt/groups/GROUP/names deletes name "NAME" from group
|
||||
|
||||
- "del NAME" to /proc/scsi_tgt/groups/GROUP/names deletes name "NAME" from group
|
||||
"GROUP"
|
||||
|
||||
|
||||
- "clear" to /proc/scsi_tgt/groups/GROUP/names clears the list of names
|
||||
for group "GROUP"
|
||||
|
||||
There must be LUN 0 in each security group, i.e. LUs numeration must not
|
||||
start from, e.g., 1.
|
||||
|
||||
Examples:
|
||||
|
||||
- "echo "add 1:0:1:0 0" >/proc/scsi_tgt/groups/Default/devices" will
|
||||
@@ -440,8 +440,8 @@ Examples:
|
||||
|
||||
- "echo "add disk1 1" >/proc/scsi_tgt/groups/Default/devices" will
|
||||
add virtual VDISK device with name "disk1" to "Default" group
|
||||
with LUN 1.
|
||||
|
||||
with LUN 1.
|
||||
|
||||
Consider you need to have an iSCSI target with name
|
||||
"iqn.2007-05.com.example:storage.disk1.sys1.xyz" (you defined it in
|
||||
iscsi-scst.conf), which should export virtual device "dev1" with LUN 0
|
||||
@@ -464,7 +464,13 @@ in this section low level interface.
|
||||
IMPORTANT
|
||||
=========
|
||||
|
||||
All the access control must be fully configured BEFORE load of the
|
||||
There must be LUN 0 in each security group, i.e. LUs numeration must not
|
||||
start from, e.g., 1.
|
||||
|
||||
IMPORTANT
|
||||
=========
|
||||
|
||||
All the access control must be fully configured BEFORE load of the
|
||||
corresponding target driver! When you load a target driver or enable
|
||||
target mode in it, as for qla2x00t driver, it will immediately start
|
||||
accepting new connections, hence creating new sessions, and those new
|
||||
@@ -492,27 +498,27 @@ After loading VDISK device handler creates in "/proc/scsi_tgt/"
|
||||
subdirectories "vdisk" and "vcdrom". They have similar layout:
|
||||
|
||||
- "trace_level" and "type" files as described for other dev handlers
|
||||
|
||||
|
||||
- "help" file, which provides online help for VDISK commands
|
||||
|
||||
|
||||
- "vdisk"/"vcdrom" files, which on read provides information of
|
||||
currently open device files. On write it supports the following
|
||||
command:
|
||||
|
||||
|
||||
* "open NAME [PATH] [BLOCK_SIZE] [FLAGS]" - opens file "PATH" as
|
||||
device "NAME" with block size "BLOCK_SIZE" bytes with flags
|
||||
"FLAGS". "PATH" could be empty only for VDISK CDROM. "BLOCK_SIZE"
|
||||
and "FLAGS" are valid only for disk VDISK. The block size must be
|
||||
power of 2 and >= 512 bytes. Default is 512. Possible flags:
|
||||
|
||||
|
||||
- WRITE_THROUGH - write back caching disabled. Note, this option
|
||||
has sense only if you also *manually* disable write-back cache
|
||||
in *all* your backstorage devices and make sure it's actually
|
||||
disabled, since many devices are known to lie about this mode to
|
||||
get better benchmark results.
|
||||
|
||||
|
||||
- READ_ONLY - read only
|
||||
|
||||
|
||||
- O_DIRECT - both read and write caching disabled. This mode
|
||||
isn't currently fully implemented, you should use user space
|
||||
fileio_tgt program in O_DIRECT mode instead (see below).
|
||||
@@ -548,7 +554,7 @@ subdirectories "vdisk" and "vcdrom". They have similar layout:
|
||||
|
||||
- REMOVABLE - with this flag set the device is reported to remote
|
||||
initiators as removable.
|
||||
|
||||
|
||||
* "close NAME" - closes device "NAME".
|
||||
|
||||
* "change NAME [PATH]" - changes a virtual CD in the VDISK CDROM.
|
||||
@@ -763,17 +769,17 @@ using debug2perf Makefile target.
|
||||
attached remotely (SCSI target case), which sometimes could lead to
|
||||
unexpectedly low throughput. You should increase read-ahead size to at
|
||||
least 512KB or even more on all initiators and the target.
|
||||
|
||||
|
||||
You should also limit on all initiators maximum amount of sectors per
|
||||
SCSI command. To do it on Linux initiators, run:
|
||||
|
||||
|
||||
echo “64” > /sys/block/sdX/queue/max_sectors_kb
|
||||
|
||||
where specify instead of X your imported from target device letter,
|
||||
like 'b', i.e. sdb.
|
||||
|
||||
To increase read-ahead size on Linux, run:
|
||||
|
||||
|
||||
blockdev --setra N /dev/sdX
|
||||
|
||||
where N is a read-ahead number in 512-byte sectors and X is a device
|
||||
@@ -944,7 +950,7 @@ Thanks to:
|
||||
suggestions, bug reports and help in debugging.
|
||||
|
||||
* Ming Zhang <mingz@ele.uri.edu> for fixes and comments.
|
||||
|
||||
|
||||
* Nathaniel Clark <nate@misrule.us> for fixes and comments.
|
||||
|
||||
* Calvin Morrow <calvin.morrow@comcast.net> for testing and useful
|
||||
|
||||
@@ -37,8 +37,6 @@ In addition, SCST supports advanced per-initiator access and devices
|
||||
visibility management, so different initiators could see different set
|
||||
of devices with different access permissions. See below for details.
|
||||
|
||||
This is quite stable (but still beta) version.
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
||||
@@ -303,11 +301,11 @@ Access and devices visibility management allows for an initiator or
|
||||
group of initiators to see different devices with different LUNs
|
||||
with necessary access permissions.
|
||||
|
||||
SCST supports two modes of access control:
|
||||
SCST supports two modes of access control:
|
||||
|
||||
1. Target-oriented. In this mode you define for each target devices and
|
||||
their LUNs, which are accessible to all initiators, connected to that
|
||||
target. This is a regular access control mode, which people mean
|
||||
target. This is a regular access control mode, which people usually mean
|
||||
thinking about access control in general. For instance, in IET this is
|
||||
the only supported mode. In this mode you should create a security group
|
||||
with name "Default_TARGET_NAME", where "TARGET_NAME" is name of the
|
||||
@@ -327,7 +325,7 @@ mode has higher priority, than target-oriented.
|
||||
When a target driver registers itself in SCST core, it tells SCST core
|
||||
its name. Then, when there is a new connection from a remote initiator,
|
||||
the target driver registers this connection in SCST core and tells it
|
||||
name of the remote initiator. Then SCST core finds the corresponding
|
||||
the name of the remote initiator. Then SCST core finds the corresponding
|
||||
devices for it using the following algorithm:
|
||||
|
||||
1. It searches through all defined groups trying to find group
|
||||
@@ -339,6 +337,9 @@ name "Default_TARGET_NAME". If it succeeds, the found group is used.
|
||||
3. Otherwise, the group with name "Default" is used. This group is
|
||||
always defined, but empty by default.
|
||||
|
||||
Names of both target and initiator you can clarify in the kernel log. In
|
||||
it SCST reports to which group each session is assigned.
|
||||
|
||||
In /proc/scsi_tgt each group represented as "groups/GROUP_NAME/"
|
||||
subdirectory. In it there are files "devices" and "names". File
|
||||
"devices" lists devices and their LUNs in the group, file "names" lists
|
||||
@@ -377,9 +378,6 @@ following files and directories under /proc/scsi_tgt:
|
||||
- "clear" to /proc/scsi_tgt/groups/GROUP/names clears the list of names
|
||||
for group "GROUP"
|
||||
|
||||
There must be LUN 0 in each security group, i.e. LUs numeration must not
|
||||
start from, e.g., 1.
|
||||
|
||||
Examples:
|
||||
|
||||
- "echo "add 1:0:1:0 0" >/proc/scsi_tgt/groups/Default/devices" will
|
||||
@@ -412,13 +410,19 @@ in this section low level interface.
|
||||
IMPORTANT
|
||||
=========
|
||||
|
||||
All the access control must be fully configured BEFORE load of the
|
||||
There must be LUN 0 in each security group, i.e. LUs numeration must not
|
||||
start from, e.g., 1.
|
||||
|
||||
IMPORTANT
|
||||
=========
|
||||
|
||||
All the access control must be fully configured BEFORE load of the
|
||||
corresponding target driver! When you load a target driver or enable
|
||||
target mode in it, as for qla2x00t driver, it will immediately start
|
||||
accepting new connections, hence creating new sessions, and those new
|
||||
sessions will be assigned to security groups according to the
|
||||
*currently* configured access control settings. For instance, to
|
||||
"Default" group, instead of "HOST004" as you need, because "HOST004"
|
||||
"Default" group, instead of "HOST004" as you may need, because "HOST004"
|
||||
doesn't exist yet. So, one must configure all the security groups before
|
||||
new connections from the initiators are created, i.e. before target
|
||||
drivers loaded.
|
||||
@@ -643,6 +647,10 @@ applications, so, if you experience large transfers stalls, you should
|
||||
check documentation for your application how to limit the transfer
|
||||
sizes.
|
||||
|
||||
Another way to solve this issue is to build SG entries with more than 1
|
||||
page each. See the following patch as an example:
|
||||
http://scst.sf.net/sgv_big_order_alloc.diff
|
||||
|
||||
User space mode using scst_user dev handler
|
||||
-------------------------------------------
|
||||
|
||||
@@ -786,17 +794,19 @@ IMPORTANT: If you use on initiator some versions of Windows (at least W2K)
|
||||
See also important notes about setting block sizes >512 bytes
|
||||
for VDISK FILEIO devices above.
|
||||
|
||||
What if target's backstorage is too slow
|
||||
----------------------------------------
|
||||
Work if target's backstorage or link is too slow
|
||||
------------------------------------------------
|
||||
|
||||
If under high load you experience I/O stalls or see in the kernel log on
|
||||
the target abort or reset messages, then your backstorage is too slow
|
||||
comparing with your target link speed and amount of simultaneously
|
||||
queued commands. On some seek intensive workloads even fast disks or
|
||||
RAIDs, which able to serve continuous data stream on 500+ MB/s speed,
|
||||
can be as slow as 0.3 MB/s. Another possible cause for that can be
|
||||
MD/LVM/RAID on your target as in http://lkml.org/lkml/2008/2/27/96
|
||||
(check the whole thread as well).
|
||||
Under high I/O load, when your target's backstorage gets overloaded, or
|
||||
working over a slow link between inititor and target, when the link
|
||||
can't serve all the queued commands on time, you can experience I/O
|
||||
stalls or see in the kernel log abort or reset messages.
|
||||
|
||||
At first, consider the case of too slow target's backstorage. On some
|
||||
seek intensive workloads even fast disks or RAIDs, which able to serve
|
||||
continuous data stream on 500+ MB/s speed, can be as slow as 0.3 MB/s.
|
||||
Another possible cause for that can be MD/LVM/RAID on your target as in
|
||||
http://lkml.org/lkml/2008/2/27/96 (check the whole thread as well).
|
||||
|
||||
Thus, in such situations simply processing of one or more commands takes
|
||||
too long time, hence initiator decides that they are stuck on the target
|
||||
@@ -809,26 +819,21 @@ backstorage speed could be more appropriate.
|
||||
Unfortunately, currently SCST lacks dynamic I/O flow control, when the
|
||||
queue depth on the target is dynamically decreased/increased based on
|
||||
how slow/fast the backstorage speed comparing to the target link. So,
|
||||
there are only 5 possible actions, which you can do to workaround or fix
|
||||
this issue:
|
||||
there are 6 possible actions, which you can do to workaround or fix this
|
||||
issue in this case:
|
||||
|
||||
1. Ignore incoming task management (TM) commands. It's fine if there are
|
||||
not too many of them, so average performance isn't hurt and the
|
||||
corresponding device isn't put offline, i.e. if the backstorage isn't
|
||||
too much slow.
|
||||
corresponding device isn't getting put offline, i.e. if the backstorage
|
||||
isn't too slow.
|
||||
|
||||
2. Decrease /sys/block/sdX/device/queue_depth on the initiator in case
|
||||
if it's Linux (see below how) or/and SCST_MAX_TGT_DEV_COMMANDS constant
|
||||
in scst_priv.h file until you stop seeing incoming TM commands.
|
||||
ISCSI-SCST driver also has its own iSCSI specific parameter for that.
|
||||
ISCSI-SCST driver also has its own iSCSI specific parameter for that,
|
||||
see its README file.
|
||||
|
||||
3. Try to avoid such seek intensive workloads.
|
||||
|
||||
4. Insrease speed of the target's backstorage.
|
||||
|
||||
5. Implement in SCST the dynamic I/O flow control.
|
||||
|
||||
To decrease device queue depth on Linux initiators run command:
|
||||
To decrease device queue depth on Linux initiators you can run command:
|
||||
|
||||
# echo Y >/sys/block/sdX/device/queue_depth
|
||||
|
||||
@@ -838,12 +843,48 @@ limitations for Y value, it can be any value from 1 to possible maximum
|
||||
(usually, 32), so start from dividing the current value on 2, i.e. set
|
||||
16, if /sys/block/sdX/device/queue_depth contains 32.
|
||||
|
||||
3. Increase the corresponding timeout on the initiator. For Linux it is
|
||||
located in
|
||||
/sys/devices/platform/host*/session*/target*:0:0/*:0:0:1/timeout. It can
|
||||
be done automatically by an udev rule. For instance, the following
|
||||
rule will increase it to 300 seconds:
|
||||
|
||||
SUBSYSTEM=="scsi", KERNEL=="[0-9]*:[0-9]*", ACTION=="add", ATTR{type}=="0|7|14", ATTR{timeout}="300"
|
||||
|
||||
By default, this timeout is 30 or 60 seconds, depending on your distribution.
|
||||
|
||||
4. Try to avoid such seek intensive workloads.
|
||||
|
||||
5. Increase speed of the target's backstorage.
|
||||
|
||||
6. Implement in SCST dynamic I/O flow control. This will be an ultimate
|
||||
solution. See "Dynamic I/O flow control" section on
|
||||
http://scst.sourceforge.net/contributing.html page for possible
|
||||
implementation idea.
|
||||
|
||||
Next, consider the case of too slow link between initiator and target,
|
||||
when the initiator tries to simultaneously push N commands to the target
|
||||
over it. In this case time to serve those commands, i.e. send or receive
|
||||
data for them over the link, can be more, than timeout for any single
|
||||
command, hence one or more commands in the tail of the queue can not be
|
||||
served on time less than the timeout, so the initiator will decide that
|
||||
they are stuck on the target and will try to recover.
|
||||
|
||||
To workaround/fix this issue in this case you can use ways 1, 2, 3, 6
|
||||
above or (7): increase speed of the link between target and initiator.
|
||||
But for some initiators implementations for WRITE commands there might
|
||||
be cases when target has no way to detect the issue, so dynamic I/O flow
|
||||
control will not be able to help. In those cases you could also need on
|
||||
the initiator(s) to either decrease the queue depth (way 2), or increase
|
||||
the corresponding timeout (way 3).
|
||||
|
||||
Note, that logged messages about QUEUE_FULL status are quite different
|
||||
by nature. This is a normal work, just SCSI flow control in action.
|
||||
Simply don't enable "mgmt_minor" logging level, or, alternatively, if
|
||||
you are confident in the worst case performance of your back-end
|
||||
storage, you can increase SCST_MAX_TGT_DEV_COMMANDS in scst_priv.h to
|
||||
64. Usually initiators don't try to push more commands on the target.
|
||||
you are confident in the worst case performance of your back-end storage
|
||||
or inititor-target link, you can increase SCST_MAX_TGT_DEV_COMMANDS in
|
||||
scst_priv.h to 64. Usually initiators don't try to push more commands on
|
||||
the target.
|
||||
|
||||
Credits
|
||||
-------
|
||||
|
||||
@@ -865,24 +865,6 @@ struct scst_tgt {
|
||||
|
||||
struct scst_tgt_template *tgtt; /* corresponding target template */
|
||||
|
||||
/* Used to wait until session finished to unregister */
|
||||
wait_queue_head_t unreg_waitQ;
|
||||
|
||||
/* Device number in /proc */
|
||||
int proc_num;
|
||||
|
||||
/*
|
||||
* The following fields used to store and retry cmds if
|
||||
* target's internal queue is full, so the target is unable to accept
|
||||
* the cmd returning QUEUE FULL
|
||||
*/
|
||||
atomic_t finished_cmds;
|
||||
int retry_cmds; /* protected by tgt_lock */
|
||||
spinlock_t tgt_lock;
|
||||
struct list_head retry_cmd_list; /* protected by tgt_lock */
|
||||
struct timer_list retry_timer;
|
||||
int retry_timer_active;
|
||||
|
||||
/*
|
||||
* Maximum SG table size. Needed here, since different cards on the
|
||||
* same target template can have different SG table limitations.
|
||||
@@ -892,6 +874,24 @@ struct scst_tgt {
|
||||
/* Used for storage of target driver private stuff */
|
||||
void *tgt_priv;
|
||||
|
||||
/*
|
||||
* The following fields used to store and retry cmds if
|
||||
* target's internal queue is full, so the target is unable to accept
|
||||
* the cmd returning QUEUE FULL
|
||||
*/
|
||||
bool retry_timer_active;
|
||||
struct timer_list retry_timer;
|
||||
atomic_t finished_cmds;
|
||||
int retry_cmds; /* protected by tgt_lock */
|
||||
spinlock_t tgt_lock;
|
||||
struct list_head retry_cmd_list; /* protected by tgt_lock */
|
||||
|
||||
/* Used to wait until session finished to unregister */
|
||||
wait_queue_head_t unreg_waitQ;
|
||||
|
||||
/* Device number in /proc */
|
||||
int proc_num;
|
||||
|
||||
/* Name on the default security group ("Default_target_name") */
|
||||
char *default_group_name;
|
||||
};
|
||||
@@ -908,9 +908,24 @@ struct scst_session {
|
||||
*/
|
||||
int init_phase;
|
||||
|
||||
atomic_t refcnt; /* get/put counter */
|
||||
struct scst_tgt *tgt; /* corresponding target */
|
||||
|
||||
/**************************************************************/
|
||||
/* Used for storage of target driver private stuff */
|
||||
void *tgt_priv;
|
||||
|
||||
/*
|
||||
* Hash list of tgt_dev's for this session, protected by scst_mutex
|
||||
* and suspended activity
|
||||
*/
|
||||
struct list_head sess_tgt_dev_list_hash[TGT_DEV_HASH_SIZE];
|
||||
|
||||
/*
|
||||
* List of cmds in this session. Used to find a cmd in the
|
||||
* session. Protected by sess_list_lock.
|
||||
*/
|
||||
struct list_head search_cmd_list;
|
||||
|
||||
atomic_t refcnt; /* get/put counter */
|
||||
|
||||
/*
|
||||
* Alive commands for this session. ToDo: make it part of the common
|
||||
@@ -920,29 +935,12 @@ struct scst_session {
|
||||
|
||||
spinlock_t sess_list_lock; /* protects search_cmd_list, etc */
|
||||
|
||||
/*
|
||||
* List of cmds in this session. Used to find a cmd in the
|
||||
* session. Protected by sess_list_lock.
|
||||
*/
|
||||
struct list_head search_cmd_list;
|
||||
|
||||
/*
|
||||
* Hash list of tgt_dev's for this session, protected by scst_mutex
|
||||
* and suspended activity
|
||||
*/
|
||||
struct list_head sess_tgt_dev_list_hash[TGT_DEV_HASH_SIZE];
|
||||
|
||||
/* Access control for this session and list entry there */
|
||||
struct scst_acg *acg;
|
||||
|
||||
/* List entry for the sessions list inside ACG */
|
||||
struct list_head acg_sess_list_entry;
|
||||
|
||||
struct scst_tgt *tgt; /* corresponding target */
|
||||
|
||||
/* Used for storage of target driver private stuff */
|
||||
void *tgt_priv;
|
||||
|
||||
/* Name of attached initiator */
|
||||
const char *initiator_name;
|
||||
|
||||
@@ -1344,18 +1342,6 @@ struct scst_mgmt_cmd {
|
||||
struct scst_device {
|
||||
struct scst_dev_type *handler; /* corresponding dev handler */
|
||||
|
||||
/* Pointer to lists of commands with the lock */
|
||||
struct scst_cmd_lists *p_cmd_lists;
|
||||
|
||||
/* Lists of commands with lock, if dedicated threads are used */
|
||||
struct scst_cmd_lists cmd_lists;
|
||||
|
||||
/* How many cmds alive on this dev */
|
||||
atomic_t dev_cmd_count;
|
||||
|
||||
/* How many write cmds alive on this dev. Temporary, ToDo */
|
||||
atomic_t write_cmd_count;
|
||||
|
||||
struct scst_mem_lim dev_mem_lim;
|
||||
|
||||
unsigned short type; /* SCSI type of the device */
|
||||
@@ -1393,6 +1379,27 @@ struct scst_device {
|
||||
|
||||
/**************************************************************/
|
||||
|
||||
/* Used for storage of dev handler private stuff */
|
||||
void *dh_priv;
|
||||
|
||||
/* Used to translate SCSI's cmd to SCST's cmd */
|
||||
struct gendisk *rq_disk;
|
||||
|
||||
/* Corresponding real SCSI device, could be NULL for virtual devices */
|
||||
struct scsi_device *scsi_dev;
|
||||
|
||||
/* Pointer to lists of commands with the lock */
|
||||
struct scst_cmd_lists *p_cmd_lists;
|
||||
|
||||
/* Lists of commands with lock, if dedicated threads are used */
|
||||
struct scst_cmd_lists cmd_lists;
|
||||
|
||||
/* How many cmds alive on this dev */
|
||||
atomic_t dev_cmd_count;
|
||||
|
||||
/* How many write cmds alive on this dev. Temporary, ToDo */
|
||||
atomic_t write_cmd_count;
|
||||
|
||||
spinlock_t dev_lock; /* device lock */
|
||||
|
||||
/*
|
||||
@@ -1409,15 +1416,6 @@ struct scst_device {
|
||||
|
||||
struct list_head blocked_cmd_list; /* protected by dev_lock */
|
||||
|
||||
/* Used for storage of dev handler private stuff */
|
||||
void *dh_priv;
|
||||
|
||||
/* Used to translate SCSI's cmd to SCST's cmd */
|
||||
struct gendisk *rq_disk;
|
||||
|
||||
/* Corresponding real SCSI device, could be NULL for virtual devices */
|
||||
struct scsi_device *scsi_dev;
|
||||
|
||||
/* Used to wait for requested amount of "on_dev" commands */
|
||||
wait_queue_head_t on_dev_waitQ;
|
||||
|
||||
@@ -1471,15 +1469,18 @@ struct scst_tgt_dev {
|
||||
struct scst_device *dev; /* to save extra dereferences */
|
||||
uint64_t lun; /* to save extra dereferences */
|
||||
|
||||
/* How many cmds alive on this dev in this session */
|
||||
atomic_t tgt_dev_cmd_count;
|
||||
|
||||
gfp_t gfp_mask;
|
||||
struct sgv_pool *pool;
|
||||
int max_sg_cnt;
|
||||
|
||||
unsigned long tgt_dev_flags; /* tgt_dev's async flags */
|
||||
|
||||
/* Used for storage of dev handler private stuff */
|
||||
void *dh_priv;
|
||||
|
||||
/* How many cmds alive on this dev in this session */
|
||||
atomic_t tgt_dev_cmd_count;
|
||||
|
||||
/*
|
||||
* Used to execute cmd's in order of arrival, honoring SCSI task
|
||||
* attributes.
|
||||
@@ -1506,9 +1507,6 @@ struct scst_tgt_dev {
|
||||
atomic_t *cur_sn_slot;
|
||||
atomic_t sn_slots[15];
|
||||
|
||||
/* Used for storage of dev handler private stuff */
|
||||
void *dh_priv;
|
||||
|
||||
/* List of scst_thr_data_hdr and lock */
|
||||
spinlock_t thr_data_lock;
|
||||
struct list_head thr_data_list;
|
||||
@@ -2042,14 +2040,25 @@ static inline int scst_cmd_atomic(struct scst_cmd *cmd)
|
||||
return res;
|
||||
}
|
||||
|
||||
static inline enum scst_exec_context scst_estimate_context(void)
|
||||
static inline enum scst_exec_context __scst_estimate_context(bool direct)
|
||||
{
|
||||
if (in_irq())
|
||||
return SCST_CONTEXT_TASKLET;
|
||||
else if (irqs_disabled())
|
||||
return SCST_CONTEXT_THREAD;
|
||||
else
|
||||
return SCST_CONTEXT_DIRECT_ATOMIC;
|
||||
return direct ? SCST_CONTEXT_DIRECT :
|
||||
SCST_CONTEXT_DIRECT_ATOMIC;
|
||||
}
|
||||
|
||||
static inline enum scst_exec_context scst_estimate_context(void)
|
||||
{
|
||||
return __scst_estimate_context(0);
|
||||
}
|
||||
|
||||
static inline enum scst_exec_context scst_estimate_context_direct(void)
|
||||
{
|
||||
return __scst_estimate_context(1);
|
||||
}
|
||||
|
||||
/* Returns cmd's session */
|
||||
|
||||
@@ -225,6 +225,13 @@ struct scst_vdisk_thr {
|
||||
|
||||
static struct kmem_cache *vdisk_thr_cachep;
|
||||
|
||||
static int num_threads;
|
||||
|
||||
#define DEF_NUM_THREADS 5
|
||||
|
||||
module_param_named(num_threads, num_threads, int, DEF_NUM_THREADS);
|
||||
MODULE_PARM_DESC(num_threads, "vdisk threads count");
|
||||
|
||||
static int vdisk_attach(struct scst_device *dev);
|
||||
static void vdisk_detach(struct scst_device *dev);
|
||||
static int vdisk_attach_tgt(struct scst_tgt_dev *tgt_dev);
|
||||
@@ -1383,7 +1390,7 @@ static int vdisk_rigid_geo_pg(unsigned char *p, int pcontrol,
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
0x3a, 0x98/* 15K RPM */, 0, 0};
|
||||
int32_t ncyl, n, rem;
|
||||
|
||||
|
||||
memcpy(p, geo_m_pg, sizeof(geo_m_pg));
|
||||
ncyl = div_s64_rem(virt_dev->nblocks, DEF_HEADS * DEF_SECTORS, &rem);
|
||||
if (rem != 0)
|
||||
@@ -3541,7 +3548,7 @@ static void exit_scst_vdisk(struct scst_dev_type *devtype,
|
||||
|
||||
static int __init init_scst_vdisk_driver(void)
|
||||
{
|
||||
int res, num_threads;
|
||||
int res;
|
||||
|
||||
vdisk_thr_cachep = KMEM_CACHE(scst_vdisk_thr, SCST_SLAB_FLAGS);
|
||||
if (vdisk_thr_cachep == NULL) {
|
||||
@@ -3549,7 +3556,12 @@ static int __init init_scst_vdisk_driver(void)
|
||||
goto out;
|
||||
}
|
||||
|
||||
num_threads = 5;
|
||||
if (num_threads < 1) {
|
||||
PRINT_ERROR("num_threads can not be less than 1, use "
|
||||
"default %d", DEF_NUM_THREADS);
|
||||
num_threads = DEF_NUM_THREADS;
|
||||
}
|
||||
|
||||
vdisk_file_devtype.threads_num = num_threads;
|
||||
vcdrom_devtype.threads_num = num_threads;
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@ SCST_DIR := $(shell pwd)/../scst/src
|
||||
EXTRA_CFLAGS += -I$(SCST_INC_DIR) -I$(SCST_DIR)
|
||||
EXTRA_CFLAGS += -Wextra -Wno-unused-parameter -Wno-missing-field-initializers
|
||||
|
||||
#EXTRA_CFLAGS += -DCONFIG_SCST_LOCAL_FORCE_DIRECT_PROCESSING
|
||||
|
||||
EXTRA_CFLAGS += -DCONFIG_SCST_EXTRACHECKS
|
||||
|
||||
#EXTRA_CFLAGS += -DCONFIG_SCST_TRACING
|
||||
|
||||
@@ -8,3 +8,15 @@ config SCST_LOCAL
|
||||
You will need the SCST subsystem as well.
|
||||
|
||||
If unsure whether you really want or need this, say N.
|
||||
|
||||
config SCST_LOCAL_FORCE_DIRECT_PROCESSING
|
||||
bool "Force local processing"
|
||||
depends on SCST_LOCAL
|
||||
help
|
||||
This experimental option forces scst_local to make SCST process
|
||||
SCSI commands in the same context, in which they was submitted.
|
||||
Otherwise, they will be processed in SCST threads. Setting this
|
||||
option to "Y" will give some performance increase, but might be
|
||||
unsafe.
|
||||
|
||||
If unsure, say "N".
|
||||
|
||||
@@ -65,7 +65,7 @@
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING)
|
||||
#if defined(CONFIG_SCST_DEBUG)
|
||||
#define trace_flag scst_local_trace_flag
|
||||
static unsigned long scst_local_trace_flag = SCST_LOCAL_DEFAULT_LOG_FLAGS;
|
||||
#endif
|
||||
@@ -529,11 +529,20 @@ static int scst_local_queuecommand(struct scsi_cmnd *SCpnt,
|
||||
/* Set the SGL things directly ... */
|
||||
scst_cmd_set_tgt_sg(scst_cmd, scsi_sglist(SCpnt), scsi_sg_count(SCpnt));
|
||||
|
||||
#ifdef CONFIG_SCST_LOCAL_FORCE_DIRECT_PROCESSING
|
||||
{
|
||||
struct Scsi_Host *h = SCpnt->device->host;
|
||||
spin_unlock_irq(h->host_lock);
|
||||
scst_cmd_init_done(scst_cmd, scst_estimate_context_direct());
|
||||
spin_lock_irq(h->host_lock);
|
||||
}
|
||||
#else
|
||||
/*
|
||||
* Unfortunately, we called with IRQs disabled, so have no choice,
|
||||
* except pass to the thread context.
|
||||
*/
|
||||
scst_cmd_init_done(scst_cmd, SCST_CONTEXT_THREAD);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* We are done here I think. Other callbacks move us forward.
|
||||
|
||||
Reference in New Issue
Block a user