Polishing for in-tree

git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@486 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
Vladislav Bolkhovitin
2008-08-01 17:59:05 +00:00
parent 2828a3db2c
commit aab4e1d1ff
26 changed files with 1093 additions and 156 deletions

View File

@@ -0,0 +1,42 @@
#
# Makefile for user space only part of iSCSI-SCST
#
# Note! Dependencies are done automagically by 'make dep', which also
# removes any old dependencies. DON'T put your own dependencies here
# unless it's something special (not a .c file).
SUBDIRS := $(shell pwd)
all: include/iscsi_scst_itf_ver.h progs
progs:
$(MAKE) -C usr
include/iscsi_scst_itf_ver.h: include/iscsi_scst.h
echo "/* Autogenerated, don't edit */" >include/iscsi_scst_itf_ver.h
echo "" >>include/iscsi_scst_itf_ver.h
echo -n "#define ISCSI_SCST_INTERFACE_VERSION " >>include/iscsi_scst_itf_ver.h
echo "\"`sha1sum include/iscsi_scst.h|awk '{printf $$1}'`\"" >>include/iscsi_scst_itf_ver.h
install: all
@install -vD usr/iscsi-scstd $(DISTDIR)/usr/local/sbin/iscsi-scstd
# -@install -vD usr/iscsi-scst-adm $(DISTDIR)/usr/local/sbin/iscsi-scst-adm
if [ -f /etc/debian_version ]; then \
install -vD -m 755 etc/initd/initd.debian $(DISTDIR)/etc/init.d/iscsi-scst; \
elif [ -f /etc/redhat-release ]; then \
install -vD -m 755 etc/initd/initd.redhat $(DISTDIR)/etc/init.d/iscsi-scst; \
elif [ -f /etc/gentoo-release ]; then \
install -vD -m 755 etc/initd/initd.gentoo $(DISTDIR)/etc/init.d/iscsi-scst; \
elif [ -f /etc/slackware-version ]; then \
install -vD -m 755 etc/initd/initd $(DISTDIR)/etc/rc.d/iscsi-scst; \
else \
install -vD -m 755 etc/initd/initd $(DISTDIR)/etc/init.d/iscsi-scst; \
fi
clean:
$(MAKE) -C usr clean
rm -f include/iscsi_scst_itf_ver.h
extraclean: clean
.PHONY: all progs install clean extraclean

View File

@@ -26,8 +26,15 @@ This version is compatible with SCST version 1.0.0 and higher.
Tested on 2.6.21.1 kernel, but it should also work on other versions,
starting from 2.6.16.x.
Installation
------------
Installation if your Linux kernel already has iSCSI-SCST built-in
-----------------------------------------------------------------
Simply run "make all", then "make install".
Installation out of Linux kernel tree
-------------------------------------
Basically as in README-IET, where file names are changed as specified
above.
@@ -106,6 +113,7 @@ or
- Unapply this patch and use iSCSI-SCST without it. Also report this
problem to the SCST mailing list scst-devel@lists.sourceforge.net.
Usage
-----
@@ -137,6 +145,7 @@ that issue.
CAUTION: Working of target and initiator on the same host isn't
======== supported. See SCST README file for details.
Known issues
------------
@@ -156,6 +165,7 @@ always be in sync with the running system.
But, if you decide to fix iscsi-scst-adm, your patches will be
appreciated.
Compilation options
-------------------
@@ -174,6 +184,7 @@ in/out in the kernel's module Makefile:
- CONFIG_SCST_ISCSI_DEBUG_DIGEST_FAILURES - simulates digest failures in
random places.
Creating version of put_page_callback patch for your kernel
-----------------------------------------------------------

View File

@@ -1,9 +1,11 @@
config SCST_ISCSI
tristate "SCST iSCSI Support"
depends on SCSI
tristate "ISCSI Target"
depends on SCST && INET
default SCST
help
iSCSI target support. The iSCSI protocol has been defined in
RFC 3720.
ISCSI target driver for SCST framework. The iSCSI protocol has been
defined in RFC 3720. To use it you should download from
http://scst.sourceforge.net the user space part of it.
config SCST_ISCSI_DEBUG_DIGEST_FAILURES
bool "Simulate iSCSI digest failures"
@@ -18,5 +20,6 @@ config SCST_ISCSI_DEBUG_DIGEST_FAILURES
digest on iSCSI packets in order to detect data corruption on an
end-to-end basis. CRCs can be used on iSCSI PDU headers and/or data.
Enabling this option allows to test digest failure recovery in the
iSCSI initiator that is talking to SCST. If unsure, disable this
option.
iSCSI initiator that is talking to SCST.
If unsure, say "N".

View File

@@ -1,5 +1,6 @@
EXTRA_CFLAGS += -Iinclude/scst
obj-m := iscsi-scst.o
iscsi-scst-objs := iscsi.o nthread.o config.o digest.o \
iscsi-scst-y := iscsi.o nthread.o config.o digest.o \
conn.o session.o target.o event.o param.o
obj-$(SCST_ISCSI) += iscsi-scst.o

View File

@@ -4,6 +4,6 @@ config FUSION_SCST
---help---
This module enables target mode for use by the SCST middle
level drivers. You will also need the SCST middle level
drivers from http://scst.sf.net/.
drivers from http://scst.sourceforge.net/.
If unsure whether you really want or need this, say N.

View File

@@ -9,8 +9,8 @@ Index: qla2x00t/qla2x00-target/Makefile
-EXTRA_CFLAGS += -DCONFIG_SCST_EXTRACHECKS
+#EXTRA_CFLAGS += -DCONFIG_SCST_EXTRACHECKS
#EXTRA_CFLAGS += -DCONFIG_SCST_TRACING
-EXTRA_CFLAGS += -DDEBUG_TGT -g -W -Wno-unused-parameter
+#EXTRA_CFLAGS += -DDEBUG_TGT -g -W -Wno-unused-parameter
#EXTRA_CFLAGS += -DDEBUG_WORK_IN_THREAD
-EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG -g -W -Wno-unused-parameter
+#EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG -g -W -Wno-unused-parameter
#EXTRA_CFLAGS += -DCONFIG_QLA_TGT_DEBUG_WORK_IN_THREAD
ifeq ($(KVER),)

View File

@@ -8,10 +8,10 @@ Index: qla2x00t/qla2x00-target/Makefile
-EXTRA_CFLAGS += -DCONFIG_SCST_EXTRACHECKS
-#EXTRA_CFLAGS += -DCONFIG_SCST_TRACING
-EXTRA_CFLAGS += -DDEBUG_TGT -g -W -Wno-unused-parameter
-EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG -g -W -Wno-unused-parameter
+#EXTRA_CFLAGS += -DCONFIG_SCST_EXTRACHECKS
+EXTRA_CFLAGS += -DCONFIG_SCST_TRACING
+#EXTRA_CFLAGS += -DDEBUG_TGT -g -W -Wno-unused-parameter
#EXTRA_CFLAGS += -DDEBUG_WORK_IN_THREAD
+#EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG -g -W -Wno-unused-parameter
#EXTRA_CFLAGS += -DCONFIG_QLA_TGT_DEBUG_WORK_IN_THREAD
ifeq ($(KVER),)

View File

@@ -27,14 +27,11 @@ config SCSI_QLA_FC
ftp://ftp.qlogic.com/outgoing/linux/firmware/
config SCSI_QLA2XXX_TARGET
bool "QLogic 2xxx target mode support"
depends on SCSI_QLA_FC
default y
bool "QLogic 2XXX target mode support"
depends on SCSI_QLA_FC && SCST
default SCST
---help---
This option enables target mode hooks used by the SCST QLA2x00tgt driver.
This option enables target mode hooks used by the SCST qla2x00t driver.
Once the qla2x00tgt module is loaded, target mode can be enable via a
sysfs interface under scsi_host, thus enabling target mode for specific
cards.
You will also need to download SCST SCSI middle level drivers from
http://scst.sf.net.

View File

@@ -0,0 +1,19 @@
config SCST_QLA_TGT_ADDON
tristate "QLogic 2XXX Target Mode Add-On"
depends on SCST && SCSI_QLA_FC && SCSI_QLA2XXX_TARGET
default SCST
help
Target mode add-on driver for QLogic 2xxx Fibre Channel host adapters.
Visit http://scst.sourceforge.net for more info about this driver.
config QLA_TGT_DEBUG_WORK_IN_THREAD
bool "Use threads context only"
depends on SCST_QLA_TGT_ADDON
help
Makes SCST process incoming commands from the qla2x00t target
driver and call the driver's callbacks in internal SCST
threads context instead of SIRQ context, where thise commands
were received. Useful for debugging and lead to some
performance loss.
If unsure, say "N".

View File

@@ -31,14 +31,14 @@
SCST_INC_DIR := $(SUBDIRS)/../../scst/include
SCST_DIR := $(shell pwd)/../../scst/src
EXTRA_CFLAGS += -I$(SCST_INC_DIR) -DFC_TARGET_SUPPORT
EXTRA_CFLAGS += -I$(SCST_INC_DIR)
INSTALL_DIR := /lib/modules/$(shell uname -r)/extra
EXTRA_CFLAGS += -DCONFIG_SCST_EXTRACHECKS
#EXTRA_CFLAGS += -DCONFIG_SCST_TRACING
EXTRA_CFLAGS += -DDEBUG_TGT -g -W -Wno-unused-parameter
#EXTRA_CFLAGS += -DDEBUG_WORK_IN_THREAD
EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG -g -W -Wno-unused-parameter
#EXTRA_CFLAGS += -DCONFIG_QLA_TGT_DEBUG_WORK_IN_THREAD
ifeq ($(KVER),)
ifeq ($(KDIR),)

View File

@@ -0,0 +1,5 @@
EXTRA_CFLAGS += -Iinclude/scst
qla2x00tgt-y := qla2x00tgt.o
obj-$(SCST_QLA_TGT_ADDON) += qla2x00tgt.o

View File

@@ -86,16 +86,18 @@ Compilation options
There are the following compilation options, that could be commented
in/out in Makefile:
- DEBUG_TGT - turns on some debugging code, including some logging. Makes
the driver considerably bigger and slower, producing large amount of
- CONFIG_SCST_DEBUG - turns on some debugging code, including some logging.
Makes the driver considerably bigger and slower, producing large amount of
log data.
- CONFIG_SCST_TRACING - turns on ability to log events. Makes the driver
considerably bigger and leads to some performance loss.
- DEBUG_WORK_IN_THREAD - makes SCST process incoming commands and
call the driver's callbacks in SCST thread context instead of tasklet
one. Useful for debugging and lead to some performance loss.
- CONFIG_QLA_TGT_DEBUG_WORK_IN_THREAD - makes SCST process incoming
commands from the qla2x00t target driver and call the driver's
callbacks in internal SCST threads context instead of SIRQ context,
where those commands were received. Useful for debugging and lead to
some performance loss.
Credits
-------

View File

@@ -245,9 +245,6 @@ How to configure QLogic target driver for 22xx/23xx cards
But as I've already loaded the device handler(scst_vdisk) on the target,
hence I don't need to perform step 17. </br></br>
[root@initiator ] echo "scsi add-single-device 11 0 0 0" >/proc/scsi/scsi</br>
where A: host number</br>
B: Lun</br></br></br>
18:After enabling the target mode on the target (see step 16),
to see new targets and devices you need either to rescan
@@ -260,9 +257,8 @@ How to configure QLogic target driver for 22xx/23xx cards
Alternatively, if your initiator already connected to the target and you simply added new devices to it, you can try to run
a manual rescan by:</br>
[root@initiator ] echo "scsi add-single-device 11 0 0 B" >/proc/scsi/scsi</br>
where A: host number</br>
B: Lun</br></br></br>
[root@initiator ] echo "- - -" >/sys/class/scsi_host/hostX/scan</br>
where X: host number</br></br></br>
</body>
</html>

View File

@@ -33,18 +33,10 @@
#include <scst.h>
/* Necessary to have equal structures with the initiator */
#if defined(FC_IP_SUPPORT)
#include <linux/ip.h>
#include <linux/if_arp.h>
#include <linux/skbuff.h>
#include "qla_ip.h"
#endif
#include "qla2x00t.h"
#ifndef FC_TARGET_SUPPORT
#error "FC_TARGET_SUPPORT is NOT DEFINED"
#if !defined(CONFIG_SCSI_QLA2XXX_TARGET)
#error "CONFIG_SCSI_QLA2XXX_TARGET is NOT DEFINED"
#endif
#ifdef CONFIG_SCST_DEBUG
@@ -88,7 +80,7 @@ struct scst_tgt_template tgt_template = {
name: "qla2x00tgt",
sg_tablesize: 0,
use_clustering: 1,
#ifdef DEBUG_WORK_IN_THREAD
#ifdef CONFIG_QLA_TGT_DEBUG_WORK_IN_THREAD
xmit_response_atomic: 0,
rdy_to_xfer_atomic: 0,
#else
@@ -681,7 +673,7 @@ static int q2t_xmit_response(struct scst_cmd *scst_cmd)
TRACE_ENTRY();
TRACE(TRACE_SCSI, "tag=%Ld", scst_cmd_get_tag(scst_cmd));
#ifdef DEBUG_WORK_IN_THREAD
#ifdef CONFIG_QLA_TGT_DEBUG_WORK_IN_THREAD
if (scst_cmd_atomic(scst_cmd))
return SCST_TGT_RES_NEED_THREAD_CTX;
#endif
@@ -838,7 +830,7 @@ static int q2t_rdy_to_xfer(struct scst_cmd *scst_cmd)
TRACE_ENTRY();
TRACE(TRACE_SCSI, "tag=%Ld", scst_cmd_get_tag(scst_cmd));
#ifdef DEBUG_WORK_IN_THREAD
#ifdef CONFIG_QLA_TGT_DEBUG_WORK_IN_THREAD
if (scst_cmd_atomic(scst_cmd))
return SCST_TGT_RES_NEED_THREAD_CTX;
#endif
@@ -1125,7 +1117,7 @@ static void q2t_do_ctio_completion(scsi_qla_host_t *ha,
if (status != CTIO_SUCCESS)
rx_status = SCST_RX_STATUS_ERROR;
#ifdef DEBUG_WORK_IN_THREAD
#ifdef CONFIG_QLA_TGT_DEBUG_WORK_IN_THREAD
context = SCST_CONTEXT_THREAD;
#endif
@@ -1267,7 +1259,7 @@ static int q2t_do_send_cmd_to_scst(scsi_qla_host_t *ha, struct q2t_cmd *cmd)
break;
}
#ifdef DEBUG_WORK_IN_THREAD
#ifdef CONFIG_QLA_TGT_DEBUG_WORK_IN_THREAD
context = SCST_CONTEXT_THREAD;
#else
context = SCST_CONTEXT_TASKLET;

View File

@@ -26,10 +26,6 @@
#include <qla_def.h>
#include "qla2x_tgt_def.h"
#ifdef DEBUG_TGT
#define CONFIG_SCST_DEBUG
#endif
#include <scst_debug.h>
/* Version numbers, the same as for the kernel */

View File

@@ -144,19 +144,11 @@ cat "${kpatch[@]}"
# Directory include/scst/
# Make sure the file iscsi-scst/iscsi_scst_itf_ver.h is up to date.
make -s -C iscsi-scst include/iscsi_scst_itf_ver.h
for f in scst/include/*h
do
add_file "${f}" "include/scst/${f#scst/include/}"
done
for f in iscsi-scst/include/*h
do
add_file "${f}" "include/scst/${f#iscsi-scst/include/}"
done
# Directory drivers/
@@ -173,7 +165,7 @@ add_file "scst/kernel/in-tree/Kconfig.scst" "drivers/scst/Kconfig"
add_file "scst/kernel/in-tree/Makefile.scst" "drivers/scst/Makefile"
add_file "scst/README" "Documentation/scst/README.scst"
add_file "scst/README_in-tree" "Documentation/scst/README.scst"
for f in scst/src/*.[ch]
do
@@ -194,13 +186,19 @@ done
# Directory drivers/scst/iscsi-scst/
# Make sure the file iscsi-scst/iscsi_scst_itf_ver.h is up to date.
make -s -C iscsi-scst include/iscsi_scst_itf_ver.h
for f in iscsi-scst/include/*h
do
add_file "${f}" "include/scst/${f#iscsi-scst/include/}"
done
add_file "iscsi-scst/kernel/Makefile.in-kernel" \
"drivers/scst/iscsi-scst/Makefile"
add_file "iscsi-scst/kernel/Kconfig" "drivers/scst/iscsi-scst/Kconfig"
add_file "iscsi-scst/README" "Documentation/scst/README.iscsi"
for f in iscsi-scst/kernel/*.[ch]
do
add_file "${f}" "drivers/scst/iscsi-scst/${f#iscsi-scst/kernel/}"
@@ -211,17 +209,17 @@ done
if [ "${qla2x00t}" = "true" ]; then
#add_file "qla2x00t/qla2x00-target/Makefile.in-kernel" \
# "drivers/scst/qla2x00-target/Makefile"
add_file "qla2x00t/qla2x00-target/Makefile_in-tree" \
"drivers/scst/qla2x00-target/Makefile"
#add_file "qla2x00t/qla2x00-target/Kconfig" \
# "drivers/scst/qla2x00-target/Kconfig"
add_file "qla2x00t/qla2x00-target/Kconfig" \
"drivers/scst/qla2x00-target/Kconfig"
#add_file "qla2x00t/qla2x_tgt_def.h" \
# "drivers/scst/qla2x00-target/qla2x_tgt_def.h"
add_file "qla2x00t/qla2x00-target/README" \
"Documentation/scst/README.qla2x00t"
# add_file "qla2x00t/qla2x00-target/README" \
# "Documentation/scst/README.qla2x00t"
for f in qla2x00t/qla2x00-target/*.[ch]
do

View File

@@ -40,8 +40,6 @@ In addition, SCST supports advanced per-initiator access and devices
visibility management, so different initiators could see different set
of devices with different access permissions. See below for details.
This is quite stable (but still beta) version.
Installation
------------
@@ -106,8 +104,8 @@ IMPORTANT: Without loading appropriate device handler, corresponding devices
in the LUN addressing, so automatic device scanning by remote SCSI
mid-level could not notice the devices. Therefore you will have
to add them manually via
'echo "scsi add-single-device A 0 0 B" >/proc/scsi/scsi',
where A - is the host number, B - LUN.
'echo "- - -" >/sys/class/scsi_host/hostX/scan',
where X - is the host number.
IMPORTANT: Working of target and initiator on the same host isn't
========= supported. This is a limitation of the Linux memory/cache
@@ -129,11 +127,6 @@ IMPORTANT: In the current version simultaneous access to local SCSI devices
To uninstall, type 'make scst_uninstall'.
If you install QLA2x00 target driver's source code in this directory,
then you can build, install or uninstall it by typing 'make qla', 'make
qla_install' or 'make qla_uninstall' correspondingly. For more details
about QLA2x00 target drivers see their README files.
Device handlers
---------------
@@ -261,9 +254,10 @@ in/out in Makefile:
Then sometimes get crazy itself. So, this option is disabled by
default.
- MEASURE_LATENCY - if defined, provides in /proc/scsi_tgt/latency file
average commands processing latency. You can clear already measured
results by writing 0 in this file.
- CONFIG_SCST_MEASURE_LATENCY - if defined, provides in /proc/scsi_tgt/latency
file average commands processing latency. You can clear already
measured results by writing 0 in this file. Note, you need a
non-preemtible kernel to have correct results.
HIGHMEM kernel configurations are fully supported, but not recommended
for performance reasons, except for scst_user, where they are not
@@ -324,7 +318,20 @@ entries:
- "version" file, which shows version of SCST
- "trace_level" file, which allows to read and set trace (logging) level
for SCST. See "help" file for list of trace levels.
for SCST. See "help" file for list of trace levels. If you want to
enable logging options, which produce a lot of events, like "debug",
to not loose logged events you should also:
* Increase in .config of your kernel CONFIG_LOG_BUF_SHIFT variable
to much bigger value, then recompile it. For example, I use 25,
but to use it I needed to modify the maximum allowed value for
CONFIG_LOG_BUF_SHIFT in the corresponding Kconfig.
* Change in your /etc/syslog.conf or other config file of your favorite
logging program to store kernel logs in async manner. For example,
I added in my rsyslog.conf line "kern.info -/var/log/kernel"
and added "kern.none" in line for /var/log/messages, so I had:
"*.info;kern.none;mail.none;authpriv.none;cron.none /var/log/messages"
Each dev handler has own subdirectory. Most dev handler have only two
files in this subdirectory: "trace_level" and "type". The first one is
@@ -643,14 +650,15 @@ II. In order to get the maximum performance you should:
1. For SCST:
- Disable in Makefile CONFIG_SCST_STRICT_SERIALIZING, CONFIG_SCST_EXTRACHECKS,
CONFIG_SCST_TRACING, DEBUG*, CONFIG_SCST_STRICT_SECURITY
CONFIG_SCST_TRACING, CONFIG_SCST_DEBUG*, CONFIG_SCST_STRICT_SECURITY
- For pass-through devices enable
CONFIG_SCST_ALLOW_PASSTHROUGH_IO_SUBMIT_IN_SIRQ.
2. For target drivers:
- Disable in Makefiles CONFIG_SCST_EXTRACHECKS, CONFIG_SCST_TRACING, DEBUG*
- Disable in Makefiles CONFIG_SCST_EXTRACHECKS, CONFIG_SCST_TRACING,
CONFIG_SCST_DEBUG*
3. For device handlers, including VDISK:
@@ -667,8 +675,8 @@ IMPORTANT: Some of the compilation options enabled by default, i.e. SCST
than for performance.
If you use SCST version taken directly from the SVN repository, you can
set the above options, except CONFIG_SCST_ALLOW_PASSTHROUGH_IO_SUBMIT_IN_SIRQ, using
debug2perf Makefile target.
set the above options, except CONFIG_SCST_ALLOW_PASSTHROUGH_IO_SUBMIT_IN_SIRQ,
using debug2perf Makefile target.
4. For other target and initiator software parts:

823
scst/README_in-tree Normal file
View File

@@ -0,0 +1,823 @@
Generic SCSI target mid-level for Linux (SCST)
==============================================
SCST is designed to provide unified, consistent interface between SCSI
target drivers and Linux kernel and simplify target drivers development
as much as possible. Detail description of SCST's features and internals
could be found in "Generic SCSI Target Middle Level for Linux" document
SCST's Internet page http://scst.sourceforge.net.
SCST supports the following I/O modes:
* Pass-through mode with one to many relationship, i.e. when multiple
initiators can connect to the exported pass-through devices, for
the following SCSI devices types: disks (type 0), tapes (type 1),
processors (type 3), CDROMs (type 5), MO disks (type 7), medium
changers (type 8) and RAID controllers (type 0xC)
* FILEIO mode, which allows to use files on file systems or block
devices as virtual remotely available SCSI disks or CDROMs with
benefits of the Linux page cache
* BLOCKIO mode, which performs direct block IO with a block device,
bypassing page-cache for all operations. This mode works ideally with
high-end storage HBAs and for applications that either do not need
caching between application and disk or need the large block
throughput
* User space mode using scst_user device handler, which allows to
implement in the user space virtual SCSI devices in the SCST
environment
* "Performance" device handlers, which provide in pseudo pass-through
mode a way for direct performance measurements without overhead of
actual data transferring from/to underlying SCSI device
In addition, SCST supports advanced per-initiator access and devices
visibility management, so different initiators could see different set
of devices with different access permissions. See below for details.
This is quite stable (but still beta) version.
Installation
------------
To see your devices remotely, you need to add them to at least "Default"
security group (see below how). By default, no local devices are seen
remotely. There must be LUN 0 in each security group, i.e. LUs
numeration must not start from, e.g., 1.
It is highly recommended to use scstadmin utility for configuring
devices and security groups.
If you experience problems during modules load or running, check your
kernel logs (or run dmesg command for the few most recent messages).
IMPORTANT: Without loading appropriate device handler, corresponding devices
========= will be invisible for remote initiators, which could lead to holes
in the LUN addressing, so automatic device scanning by remote SCSI
mid-level could not notice the devices. Therefore you will have
to add them manually via
'echo "- - -" >/sys/class/scsi_host/hostX/scan',
where X - is the host number.
IMPORTANT: Working of target and initiator on the same host isn't
========= supported. This is a limitation of the Linux memory/cache
manager, because in this case an OOM deadlock like: system
needs some memory -> it decides to clear some cache -> cache
needs to write on a target exported device -> initiator sends
request to the target -> target needs memory -> problem is
possible.
IMPORTANT: In the current version simultaneous access to local SCSI devices
========= via standard high-level SCSI drivers (sd, st, sg, etc.) and
SCST's target drivers is unsupported. Especially it is
important for execution via sg and st commands that change
the state of devices and their parameters, because that could
lead to data corruption. If any such command is done, at
least related device handler(s) must be restarted. For block
devices READ/WRITE commands using direct disk handler look to
be safe.
Device handlers
---------------
Device specific drivers (device handlers) are plugins for SCST, which
help SCST to analyze incoming requests and determine parameters,
specific to various types of devices. If an appropriate device handler
for a SCSI device type isn't loaded, SCST doesn't know how to handle
devices of this type, so they will be invisible for remote initiators
(more precisely, "LUN not supported" sense code will be returned).
In addition to device handlers for real devices, there are VDISK, user
space and "performance" device handlers.
VDISK device handler works over files on file systems and makes from
them virtual remotely available SCSI disks or CDROM's. In addition, it
allows to work directly over a block device, e.g. local IDE or SCSI disk
or ever disk partition, where there is no file systems overhead. Using
block devices comparing to sending SCSI commands directly to SCSI
mid-level via scsi_do_req()/scsi_execute_async() has advantage that data
are transferred via system cache, so it is possible to fully benefit from
caching and read ahead performed by Linux's VM subsystem. The only
disadvantage here that in the FILEIO mode there is superfluous data
copying between the cache and SCST's buffers. This issue is going to be
addressed in the next release. Virtual CDROM's are useful for remote
installation. See below for details how to setup and use VDISK device
handler.
SCST user space device handler provides an interface between SCST and
the user space, which allows to create pure user space devices. The
simplest example, where one would want it is if he/she wants to write a
VTL. With scst_user he/she can write it purely in the user space. Or one
would want it if he/she needs some sophisticated for kernel space
processing of the passed data, like encrypting them or making snapshots.
"Performance" device handlers for disks, MO disks and tapes in their
exec() method skip (pretend to execute) all READ and WRITE operations
and thus provide a way for direct link performance measurements without
overhead of actual data transferring from/to underlying SCSI device.
NOTE: Since "perf" device handlers on READ operations don't touch the
==== commands' data buffer, it is returned to remote initiators as it
was allocated, without even being zeroed. Thus, "perf" device
handlers impose some security risk, so use them with caution.
Compilation options
-------------------
There are the following compilation options, that could be change using
your favorit kernel configuration Makefile target, e.g. "make xconfig":
- CONFIG_SCST_DEBUG - if defined, turns on some debugging code,
including some logging. Makes the driver considerably bigger and slower,
producing large amount of log data.
- CONFIG_SCST_TRACING - if defined, turns on ability to log events. Makes the
driver considerably bigger and leads to some performance loss.
- CONFIG_SCST_EXTRACHECKS - if defined, adds extra validity checks in
the various places.
- CONFIG_SCST_USE_EXPECTED_VALUES - if not defined (default), initiator
supplied expected data transfer length and direction will be used only for
verification purposes to return error or warn in case if one of them
is invalid. Instead, locally decoded from SCSI command values will be
used. This is necessary for security reasons, because otherwise a
faulty initiator can crash target by supplying invalid value in one
of those parameters. This is especially important in case of
pass-through mode. If CONFIG_SCST_USE_EXPECTED_VALUES is defined, initiator
supplied expected data transfer length and direction will override
the locally decoded values. This might be necessary if internal SCST
commands translation table doesn't contain SCSI command, which is
used in your environment. You can know that if you have messages like
"Unknown opcode XX for YY. Should you update scst_scsi_op_table?" in
your kernel log and your initiator returns an error. Also report
those messages in the SCST mailing list
scst-devel@lists.sourceforge.net. Note, that not all SCSI transports
support supplying expected values.
- CONFIG_SCST_DEBUG_TM - if defined, turns on task management functions
debugging, when on LUN 0 in the default access control group some of the
commands will be delayed for about 60 sec., so making the remote
initiator send TM functions, eg ABORT TASK and TARGET RESET. Also
define CONFIG_SCST_TM_DBG_GO_OFFLINE symbol in the Makefile if you
want that the device eventually become completely unresponsive, or
otherwise to circle around ABORTs and RESETs code. Needs CONFIG_SCST_DEBUG
turned on.
- CONFIG_SCST_STRICT_SERIALIZING - if defined, makes SCST send all commands to
underlying SCSI device synchronously, one after one. This makes task
management more reliable, with cost of some performance penalty. This
is mostly actual for stateful SCSI devices like tapes, where the
result of command's execution depends from device's settings defined
by previous commands. Disk and RAID devices are stateless in the most
cases. The current SCSI core in Linux doesn't allow to abort all
commands reliably if they sent asynchronously to a stateful device.
Turned off by default, turn it on if you use stateful device(s) and
need as much error recovery reliability as possible. As a side
effect, no kernel patching is necessary.
- CONFIG_SCST_ALLOW_PASSTHROUGH_IO_SUBMIT_IN_SIRQ - if defined, it will be
allowed to submit pass-through commands to real SCSI devices via the SCSI
middle layer using scsi_execute_async() function from soft IRQ
context (tasklets). This used to be the default, but currently it
seems the SCSI middle layer starts expecting only thread context on
the IO submit path, so it is disabled now by default. Enabling it
will decrease amount of context switches and improve performance. It
is more or less safe, in the worst case, if in your configuration the
SCSI middle layer really doesn't expect SIRQ context in
scsi_execute_async() function, you will get a warning message in the
kernel log.
- CONFIG_SCST_STRICT_SECURITY - if defined, makes SCST zero allocated data
buffers. Undefining it (default) considerably improves performance
and eases CPU load, but could create a security hole (information
leakage), so enable it, if you have strict security requirements.
- CONFIG_SCST_ABORT_CONSIDER_FINISHED_TASKS_AS_NOT_EXISTING - if defined,
in case when TASK MANAGEMENT function ABORT TASK is trying to abort a
command, which has already finished, remote initiator, which sent the
ABORT TASK request, will receive TASK NOT EXIST (or ABORT FAILED)
response for the ABORT TASK request. This is more logical response,
since, because the command finished, attempt to abort it failed, but
some initiators, particularly VMware iSCSI initiator, consider TASK
NOT EXIST response as if the target got crazy and try to RESET it.
Then sometimes get crazy itself. So, this option is disabled by
default.
- CONFIG_SCST_MEASURE_LATENCY - if defined, provides in /proc/scsi_tgt/latency
file average commands processing latency. You can clear already
measured results by writing 0 in this file. Note, you need a
non-preemtible kernel to have correct results.
HIGHMEM kernel configurations are fully supported, but not recommended
for performance reasons, except for scst_user, where they are not
supported, because this module deals with user supplied memory on a
zero-copy manner. If you need to use it, consider change VMSPLIT option
or use 64-bit system configuration instead.
For changing VMSPLIT option (CONFIG_VMSPLIT to be precise) you should in
"make menuconfig" command set the following variables:
- General setup->Configure standard kernel features (for small systems): ON
- Processor type and features->High Memory Support: OFF
- Processor type and features->Memory split: according to amount of
memory you have. If it is less than 800MB, you may not touch this
option at all.
Module parameters
-----------------
Module scst supports the following parameters:
- scst_threads - allows to set count of SCST's threads. By default it
is CPU count.
- scst_max_cmd_mem - sets maximum amount of memory in Mb allowed to be
consumed by the SCST commands for data buffers at any given time. By
default it is approximately TotalMem/4.
SCST "/proc" commands
---------------------
For communications with user space programs SCST provides proc-based
interface in "/proc/scsi_tgt" directory. It contains the following
entries:
- "help" file, which provides online help for SCST commands
- "scsi_tgt" file, which on read provides information of serving by SCST
devices and their dev handlers. On write it supports the following
command:
* "assign H:C:I:L HANDLER_NAME" assigns dev handler "HANDLER_NAME"
on device with host:channel:id:lun
- "sessions" file, which lists currently connected initiators (open sessions)
- "sgv" file provides some statistic about with which block sizes
commands from remote initiators come and how effective sgv_pool in
serving those allocations from the cache, i.e. without memory
allocations requests to the kernel. "Size" - is the commands data
size upper rounded to power of 2, "Hit" - how many there are
allocations from the cache, "Total" - total number of allocations.
- "threads" file, which allows to read and set number of SCST's threads
- "version" file, which shows version of SCST
- "trace_level" file, which allows to read and set trace (logging) level
for SCST. See "help" file for list of trace levels. If you want to
enable logging options, which produce a lot of events, like "debug",
to not loose logged events you should also:
* Increase in .config of your kernel CONFIG_LOG_BUF_SHIFT variable
to much bigger value, then recompile it. For example, I use 25,
but to use it I needed to modify the maximum allowed value for
CONFIG_LOG_BUF_SHIFT in the corresponding Kconfig.
* Change in your /etc/syslog.conf or other config file of your favorite
logging program to store kernel logs in async manner. For example,
I added in my rsyslog.conf line "kern.info -/var/log/kernel"
and added "kern.none" in line for /var/log/messages, so I had:
"*.info;kern.none;mail.none;authpriv.none;cron.none /var/log/messages"
Each dev handler has own subdirectory. Most dev handler have only two
files in this subdirectory: "trace_level" and "type". The first one is
similar to main SCST "trace_level" file, the latter one shows SCSI type
number of this handler as well as some text description.
For example, "echo "assign 1:0:1:0 dev_disk" >/proc/scsi_tgt/scsi_tgt"
will assign device handler "dev_disk" to real device sitting on host 1,
channel 0, ID 1, LUN 0.
Access and devices visibility management (LUN masking)
------------------------------------------------------
Access and devices visibility management allows for an initiator or
group of initiators to have different views of LUs/LUNs (security groups)
each with appropriate access permissions. It is highly recommended to
use scstadmin utility for that purpose instead of described in this
section low level interface.
Initiator is represented as an SCST session. The session is bound to
security group on its registration time by character "name" parameter of
the registration function, which provided by target driver, based on its
internal authentication. For example, for FC "name" could be WWN or just
loop ID. For iSCSI this could be iSCSI login credentials or iSCSI
initiator name. Each security group has set of names assigned to it by
system administrator. Session is bound to security group with provided
name. If no such groups found, the session bound to either
"Default_target_name", or "Default" group, depending from either
"Default_target_name" exists or not. In "Default_target_name" target
name means name of the target.
In /proc/scsi_tgt each group represented as "groups/GROUP_NAME/"
subdirectory. In it there are files "devices" and "names". File
"devices" lists all devices and their LUNs in the group, file "names"
lists all names that should be bound to this group.
To configure access and devices visibility management SCST provides the
following files and directories under /proc/scsi_tgt:
- "add_group GROUP" to /proc/scsi_tgt/scsi_tgt adds group "GROUP"
- "del_group GROUP" to /proc/scsi_tgt/scsi_tgt deletes group "GROUP"
- "add H:C:I:L lun [READ_ONLY]" to /proc/scsi_tgt/groups/GROUP/devices adds
device with host:channel:id:lun as LUN "lun" in group "GROUP". Optionally,
the device could be marked as read only.
- "del H:C:I:L" to /proc/scsi_tgt/groups/GROUP/devices deletes device with
host:channel:id:lun from group "GROUP"
- "add V_NAME lun [READ_ONLY]" to /proc/scsi_tgt/groups/GROUP/devices adds
device with virtual name "V_NAME" as LUN "lun" in group "GROUP".
Optionally, the device could be marked as read only.
- "del V_NAME" to /proc/scsi_tgt/groups/GROUP/devices deletes device with
virtual name "V_NAME" from group "GROUP"
- "clear" to /proc/scsi_tgt/groups/GROUP/devices clears the list of devices
for group "GROUP"
- "add NAME" to /proc/scsi_tgt/groups/GROUP/names adds name "NAME" to group
"GROUP"
- "del NAME" to /proc/scsi_tgt/groups/GROUP/names deletes name "NAME" from group
"GROUP"
- "clear" to /proc/scsi_tgt/groups/GROUP/names clears the list of names
for group "GROUP"
There must be LUN 0 in each security group, i.e. LUs numeration must not
start from, e.g., 1.
Examples:
- "echo "add 1:0:1:0 0" >/proc/scsi_tgt/groups/Default/devices" will
add real device sitting on host 1, channel 0, ID 1, LUN 0 to "Default"
group with LUN 0.
- "echo "add disk1 1" >/proc/scsi_tgt/groups/Default/devices" will
add virtual VDISK device with name "disk1" to "Default" group
with LUN 1.
VDISK device handler
--------------------
After loading VDISK device handler creates in "/proc/scsi_tgt/"
subdirectories "vdisk" and "vcdrom". They have similar layout:
- "trace_level" and "type" files as described for other dev handlers
- "help" file, which provides online help for VDISK commands
- "vdisk"/"vcdrom" files, which on read provides information of
currently open device files. On write it supports the following
command:
* "open NAME [PATH] [BLOCK_SIZE] [FLAGS]" - opens file "PATH" as
device "NAME" with block size "BLOCK_SIZE" bytes with flags
"FLAGS". "PATH" could be empty only for VDISK CDROM. "BLOCK_SIZE"
and "FLAGS" are valid only for disk VDISK. The block size must be
power of 2 and >= 512 bytes. Default is 512. Possible flags:
- WRITE_THROUGH - write back caching disabled. Note, this option
has sense only if you also *manually* disable write-back cache
in *all* your backstorage devices and make sure it's actually
disabled, since many devices are known to lie about this mode to
get better benchmark results.
- READ_ONLY - read only
- O_DIRECT - both read and write caching disabled. This mode
isn't currently fully implemented, you should use user space
fileio_tgt program in O_DIRECT mode instead (see below).
- NULLIO - in this mode no real IO will be done, but success will be
returned. Intended to be used for performance measurements at the same
way as "*_perf" handlers.
- NV_CACHE - enables "non-volatile cache" mode. In this mode it is
assumed that the target has a GOOD UPS with ability to cleanly
shutdown target in case of power failure and it is
software/hardware bugs free, i.e. all data from the target's
cache are guaranteed sooner or later to go to the media. Hence
all data synchronization with media operations, like
SYNCHRONIZE_CACHE, are ignored in order to bring more
performance. Also in this mode target reports to initiators that
the corresponding device has write-through cache to disable all
write-back cache workarounds used by initiators. Use with
extreme caution, since in this mode after a crash of the target
journaled file systems don't guarantee the consistency after
journal recovery, therefore manual fsck MUST be ran. Note, that
since usually the journal barrier protection (see "IMPORTANT"
note below) turned off, enabling NV_CACHE could change nothing
from data protection point of view, since no data
synchronization with media operations will go from the
initiator. This option overrides WRITE_THROUGH.
- BLOCKIO - enables block mode, which will perform direct block
IO with a block device, bypassing page-cache for all operations.
This mode works ideally with high-end storage HBAs and for
applications that either do not need caching between application
and disk or need the large block throughput. See also below.
- REMOVABLE - with this flag set the device is reported to remote
initiators as removable.
* "close NAME" - closes device "NAME".
* "change NAME [PATH]" - changes a virtual CD in the VDISK CDROM.
By default, if neither BLOCKIO, nor NULLIO option is supplied, FILEIO
mode is used.
For example, "echo "open disk1 /vdisks/disk1" >/proc/scsi_tgt/vdisk/vdisk"
will open file /vdisks/disk1 as virtual FILEIO disk with name "disk1".
CAUTION: If you partitioned/formatted your device with block size X, *NEVER*
======== ever try to export and then mount it (even accidentally) with another
block size. Otherwise you can *instantly* damage it pretty
badly as well as all your data on it. Messages on initiator
like: "attempt to access beyond end of device" is the sign of
such damage.
Moreover, if you want to compare how well different block sizes
work for you, you **MUST** EVERY TIME AFTER CHANGING BLOCK SIZE
**COMPLETELY** **WIPE OFF** ALL THE DATA FROM THE DEVICE. In
other words, THE **WHOLE** DEVICE **MUST** HAVE ONLY **ZEROS**
AS THE DATA AFTER YOU SWITCH TO NEW BLOCK SIZE. Switching block
sizes isn't like switching between FILEIO and BLOCKIO, after
changing block size all previously written with another block
size data MUST BE ERASED. Otherwise you will have a full set of
very weird behaviors, because blocks addressing will be
changed, but initiators in most cases will not have a
possibility to detect that old addresses written on the device
in, e.g., partition table, don't refer anymore to what they are
intended to refer.
IMPORTANT: By default for performance reasons VDISK FILEIO devices use write
========= back caching policy. This is generally safe from the consistence of
journaled file systems, laying over them, point of view, but
your unsaved cached data will be lost in case of
power/hardware/software failure, so you must supply your
target server with some kind of UPS or disable write back
caching using WRITE_THROUGH flag. You also should note, that
the file systems journaling over write back caching enabled
devices works reliably *ONLY* if the order of journal writes
is guaranteed or it uses some kind of data protection
barriers (i.e. after writing journal data some kind of
synchronization with media operations is used), otherwise,
because of possible reordering in the cache, even after
successful journal rollback, you very much risk to loose your
data on the FS. Currently, Linux IO subsystem guarantees
order of write operations only using data protection
barriers. Some info about it from the XFS point of view could
be found at http://oss.sgi.com/projects/xfs/faq.html#wcache.
On Linux initiators for EXT3 and ReiserFS file systems the
barrier protection could be turned on using "barrier=1" and
"barrier=flush" mount options correspondingly. Note, that
usually it turned off by default and the status of barriers
usage isn't reported anywhere in the system logs as well as
there is no way to know it on the mounted file system (at
least no known one). Windows and, AFAIK, other UNIX'es don't
need any special explicit options and do necessary barrier
actions on write-back caching devices by default. Also note
that on some real-life workloads write through caching might
perform better, than write back one with the barrier
protection turned on.
Also you should realize that Linux doesn't provide a
guarantee that after sync()/fsync() all written data really
hit permanent storage, they can be then in the cache of your
backstorage device and lost on power failure event. Thus,
ever with write-through cache mode, you still need a good UPS
to protect yourself from your data loss (note, data loss, not
the file system integrity corruption).
IMPORTANT: Some disk and partition table management utilities don't support
========= block sizes >512 bytes, therefore make sure that your favorite one
supports it. Currently only cfdisk is known to work only with
512 bytes blocks, other utilities like fdisk on Linux or
standard disk manager on Windows are proved to work well with
non-512 bytes blocks. Note, if you export a disk file or
device with some block size, different from one, with which
it was already partitioned, you could get various weird
things like utilities hang up or other unexpected behavior.
Hence, to be sure, zero the exported file or device before
the first access to it from the remote initiator with another
block size. On Window initiator make sure you "Set Signature"
in the disk manager on the imported from the target drive
before doing any other partitioning on it. After you
successfully mounted a file system over non-512 bytes block
size device, the block size stops matter, any program will
work with files on such file system.
BLOCKIO VDISK mode
------------------
This module works best for these types of scenarios:
1) Data that are not aligned to 4K sector boundaries and <4K block sizes
are used, which is normally found in virtualization environments where
operating systems start partitions on odd sectors (Windows and it's
sector 63).
2) Large block data transfers normally found in database loads/dumps and
streaming media.
3) Advanced relational database systems that perform their own caching
which prefer or demand direct IO access and, because of the nature of
their data access, can actually see worse performance with
non-discriminate caching.
4) Multiple layers of targets were the secondary and above layers need
to have a consistent view of the primary targets in order to preserve
data integrity which a page cache backed IO type might not provide
reliably.
Also it has an advantage over FILEIO that it doesn't copy data between
the system cache and the commands data buffers, so it saves a
considerable amount of CPU power and memory bandwidth.
IMPORTANT: Since data in BLOCKIO and FILEIO modes are not consistent between
========= them, if you try to use a device in both those modes simultaneously,
you will almost instantly corrupt your data on that device.
Pass-through mode
-----------------
In the pass-through mode (i.e. using the pass-through device handlers
scst_disk, scst_tape, etc) SCSI commands, coming from remote initiators,
are passed to local SCSI hardware on target as is, without any
modifications. As any other hardware, the local SCSI hardware can not
handle commands with amount of data and/or segments count in
scatter-gather array bigger some values. Therefore, when using the
pass-through mode you should note that values for maximum number of
segments and maximum amount of transferred data for each SCSI command on
devices on initiators can not be bigger, than corresponding values of
the corresponding SCSI devices on the target. Otherwise you will see
symptoms like small transfers work well, but large ones stall and
messages like: "Unable to complete command due to SG IO count
limitation" are printed in the kernel logs.
You can't control from the user space limit of the scatter-gather
segments, but for block devices usually it is sufficient if you set on
the initiators /sys/block/DEVICE_NAME/queue/max_sectors_kb in the same
or lower value as in /sys/block/DEVICE_NAME/queue/max_hw_sectors_kb for
the corresponding devices on the target.
For not-block devices SCSI commands are usually generated directly by
applications, so, if you experience large transfers stalls, you should
check documentation for your application how to limit the transfer
sizes.
User space mode using scst_user dev handler
-------------------------------------------
User space program fileio_tgt uses interface of scst_user dev handler
and allows to see how it works in various modes. Fileio_tgt provides
mostly the same functionality as scst_vdisk handler with the most
noticeable difference that it supports O_DIRECT mode. O_DIRECT mode is
basically the same as BLOCKIO, but also supports files, so for some
loads it could be significantly faster, than the regular FILEIO access.
All the words about BLOCKIO from above apply to O_DIRECT as well. See
fileio_tgt's README file for more details.
Performance
-----------
Before doing any performance measurements note that:
I. Performance results are very much dependent from your type of load,
so it is crucial that you choose access mode (FILEIO, BLOCKIO,
O_DIRECT, pass-through), which suits your needs the best.
II. In order to get the maximum performance you should:
1. For SCST:
- Disable in Makefile CONFIG_SCST_STRICT_SERIALIZING, CONFIG_SCST_EXTRACHECKS,
CONFIG_SCST_TRACING, CONFIG_SCST_DEBUG*, CONFIG_SCST_STRICT_SECURITY
- For pass-through devices enable
CONFIG_SCST_ALLOW_PASSTHROUGH_IO_SUBMIT_IN_SIRQ.
2. For target drivers:
- Disable in Makefiles CONFIG_SCST_EXTRACHECKS, CONFIG_SCST_TRACING,
CONFIG_SCST_DEBUG*
3. For device handlers, including VDISK:
- Disable in Makefile CONFIG_SCST_TRACING and CONFIG_SCST_DEBUG.
- If your initiator(s) use dedicated exported from the target virtual
SCSI devices and have more or equal amount of memory, than the
target, it is recommended to use O_DIRECT option (currently it is
available only with fileio_tgt user space program) or BLOCKIO. With
them you could have up to 100% increase in throughput.
IMPORTANT: Some of the compilation options enabled by default, i.e. SCST
========= is optimized currently rather for development and bug hunting,
than for performance.
If you use SCST version taken directly from the SVN repository, you can
set the above options, except CONFIG_SCST_ALLOW_PASSTHROUGH_IO_SUBMIT_IN_SIRQ,
using debug2perf Makefile target.
4. For other target and initiator software parts:
- Don't enable debug/hacking features in the kernel, i.e. use them as
they are by default.
- The default kernel read-ahead and queuing settings are optimized
for locally attached disks, therefore they are not optimal if they
attached remotely (SCSI target case), which sometimes could lead to
unexpectedly low throughput. You should increase read-ahead size to at
least 512KB or even more on all initiators and the target.
You should also limit on all initiators maximum amount of sectors per
SCSI command. To do it on Linux initiators, run:
echo “64” > /sys/block/sdX/queue/max_sectors_kb
where specify instead of X your imported from target device letter,
like 'b', i.e. sdb.
To increase read-ahead size on Linux, run:
blockdev --setra N /dev/sdX
where N is a read-ahead number in 512-byte sectors and X is a device
letter like above.
Note: you need to set read-ahead setting for device sdX again after
you changed the maximum amount of sectors per SCSI command for that
device.
- You may need to increase amount of requests that OS on initiator
sends to the target device. To do it on Linux initiators, run
echo “64” > /sys/block/sdX/queue/nr_requests
where X is a device letter like above.
You may also experiment with other parameters in /sys/block/sdX
directory, they also affect performance. If you find the best values,
please share them with us.
- On the target deadline IO scheduler with read_expire and
write_expire increased on all exported devices to 5000 and 15000
correspondingly should be the fastest for BLOCKIO, but for FILEIO
seems CFQ often outperforms it. So, try on your load and use the best
one.
- It is recommended to turn the kernel preemption off, i.e. set
the kernel preemption model to "No Forced Preemption (Server)".
- Looks like XFS is the best filesystem on the target to store device
files, because it allows considerably better linear write throughput,
than ext3.
5. For hardware on target.
- Make sure that your target hardware (e.g. target FC or network card)
and underlaying IO hardware (e.g. IO card, like SATA, SCSI or RAID to
which your disks connected) don't share the same PCI bus. You can
check it using lspci utility. They have to work in parallel, so it
will be better if they don't compete for the bus. The problem is not
only in the bandwidth, which they have to share, but also in the
interaction between cards during that competition. This is very
important, because in some cases if target and backend storage
controllers share the same PCI bus, it could lead up to 5-10 times
less performance, than expected. Moreover, some motherboard (by
Supermicro, particularly) have serious stability issues if there are
several high speed devices on the same bus working in parallel. If
you have no choice, but PCI bus sharing, set in the BIOS PCI latency
as low as possible.
6. If you use VDISK IO module in FILEIO mode, NV_CACHE option will
provide you the best performance. But using it make sure you use a good
UPS with ability to shutdown the target on the power failure.
IMPORTANT: If you use on initiator some versions of Windows (at least W2K)
========= you can't get good write performance for VDISK FILEIO devices with
default 512 bytes block sizes. You could get about 10% of the
expected one. This is because of the partition alignment, which
is (simplifying) incompatible with how Linux page cache
works, so for each write the corresponding block must be read
first. Use 4096 bytes block sizes for VDISK devices and you
will have the expected write performance. Actually, any OS on
initiators, not only Windows, will benefit from block size
max(PAGE_SIZE, BLOCK_SIZE_ON_UNDERLYING_FS), where PAGE_SIZE
is the page size, BLOCK_SIZE_ON_UNDERLYING_FS is block size
on the underlying FS, on which the device file located, or 0,
if a device node is used. Both values are from the target.
See also important notes about setting block sizes >512 bytes
for VDISK FILEIO devices above.
What if target's backstorage is too slow
----------------------------------------
If under high load you experience I/O stalls or see in the kernel log on
the target abort or reset messages, then your backstorage is too slow
comparing with your target link speed and amount of simultaneously
queued commands. On some seek intensive workloads even fast disks or
RAIDs, which able to serve continuous data stream on 500+ MB/s speed,
can be as slow as 0.3 MB/s. Another possible cause for that can be
MD/LVM/RAID on your target as in http://lkml.org/lkml/2008/2/27/96
(check the whole thread as well).
Thus, in such situations simply processing of one or more commands takes
too long time, hence initiator decides that they are stuck on the target
and tries to recover. Particularly, it is known that the default amount
of simultaneously queued commands (48) is sometimes too high if you do
intensive writes from VMware on a target disk, which uses LVM in the
snapshot mode. In this case value like 16 or even 8-10 depending of your
backstorage speed could be more appropriate.
Unfortunately, currently SCST lacks dynamic I/O flow control, when the
queue depth on the target is dynamically decreased/increased based on
how slow/fast the backstorage speed comparing to the target link. So,
there are only 5 possible actions, which you can do to workaround or fix
this issue:
1. Ignore incoming task management (TM) commands. It's fine if there are
not too many of them, so average performance isn't hurt and the
corresponding device isn't put offline, i.e. if the backstorage isn't
too much slow.
2. Decrease /sys/block/sdX/device/queue_depth on the initiator in case
if it's Linux (see below how) or/and SCST_MAX_TGT_DEV_COMMANDS constant
in scst_priv.h file until you stop seeing incoming TM commands.
ISCSI-SCST driver also has its own iSCSI specific parameter for that.
3. Try to avoid such seek intensive workloads.
4. Insrease speed of the target's backstorage.
5. Implement in SCST the dynamic I/O flow control.
To decrease device queue depth on Linux initiators run command:
# echo Y >/sys/block/sdX/device/queue_depth
where Y is the new number of simultaneously queued commands, X - your
imported device letter, like 'a' for sda device. There are no special
limitations for Y value, it can be any value from 1 to possible maximum
(usually, 32), so start from dividing the current value on 2, i.e. set
16, if /sys/block/sdX/device/queue_depth contains 32.
Note, that logged messages about QUEUE_FULL status are quite different
by nature. This is a normal work, just SCSI flow control in action.
Simply don't enable "mgmt_minor" logging level, or, alternatively, if
you are confident in the worst case performance of your back-end
storage, you can increase SCST_MAX_TGT_DEV_COMMANDS in scst_priv.h to
64. Usually initiators don't try to push more commands on the target.
Credits
-------
Thanks to:
* Mark Buechler <mark.buechler@gmail.com> for a lot of useful
suggestions, bug reports and help in debugging.
* Ming Zhang <mingz@ele.uri.edu> for fixes and comments.
* Nathaniel Clark <nate@misrule.us> for fixes and comments.
* Calvin Morrow <calvin.morrow@comcast.net> for testing and useful
suggestions.
* Hu Gang <hugang@soulinfo.com> for the original version of the
LSI target driver.
* Erik Habbinga <erikhabbinga@inphase-tech.com> for fixes and support
of the LSI target driver.
* Ross S. W. Walker <rswwalker@hotmail.com> for the original block IO
code and Vu Pham <huongvp@yahoo.com> who updated it for the VDISK dev
handler.
* Michael G. Byrnes <michael.byrnes@hp.com> for fixes.
* Alessandro Premoli <a.premoli@andxor.it> for fixes
* Nathan Bullock <nbullock@yottayotta.com> for fixes.
* Terry Greeniaus <tgreeniaus@yottayotta.com> for fixes.
* Krzysztof Blaszkowski <kb@sysmikro.com.pl> for many fixes and bug reports.
* Jianxi Chen <pacers@users.sourceforge.net> for fixing problem with
devices >2TB in size
* Bart Van Assche <bart.vanassche@gmail.com> for a lot of help
Vladislav Bolkhovitin <vst@vlnb.net>, http://scst.sourceforge.net

View File

@@ -951,7 +951,7 @@ struct scst_session {
void (*unreg_done_fn) (struct scst_session *sess);
void (*unreg_cmds_done_fn) (struct scst_session *sess);
#ifdef MEASURE_LATENCY /* must be last */
#ifdef CONFIG_SCST_MEASURE_LATENCY /* must be last */
spinlock_t meas_lock;
uint64_t scst_time, processing_time;
unsigned int processed_cmds;
@@ -1225,7 +1225,7 @@ struct scst_cmd {
struct scst_cmd *orig_cmd; /* Used to issue REQUEST SENSE */
#ifdef MEASURE_LATENCY /* must be last */
#ifdef CONFIG_SCST_MEASURE_LATENCY /* must be last */
uint64_t start, pre_exec_finish, post_exec_start;
#endif
};

View File

@@ -1,81 +1,81 @@
menu "SCSI target middle level support"
menu "SCSI target (SCST) support"
config SCST
tristate "SCSI target middle level support"
default SCSI
tristate "SCSI target (SCST) support"
default n
depends on SCSI && PROC_FS
help
SCSI TARGET is designed to provide unified, consistent interface
between SCSI target drivers and Linux kernel and simplify target
drivers development as much as possible.
SCSI target (SCST) is designed to provide unified, consistent
interface between SCSI target drivers and Linux kernel and
simplify target drivers development as much as possible. Visit
http://scst.sourceforge.net for more info about it.
config SCST_DISK
tristate "SCSI target disk support"
default SCST
depends on SCSI && PROC_FS && SCST
help
SCSI TARGET handler for disk device.
SCST pass-through device handler for disk device.
config SCST_TAPE
tristate "SCSI target tape support"
default SCST
depends on SCSI && PROC_FS && SCST
help
SCSI TARGET handler for tape device.
SCST pass-through device handler for tape device.
config SCST_CDROM
tristate "SCSI target cdrom support"
tristate "SCSI target CDROM support"
default SCST
depends on SCSI && PROC_FS && SCST
help
SCSI TARGET handler for cdrom device.
SCST pass-through device handler for CDROM device.
config SCST_MODISK
tristate "SCSI target MO disk support"
default SCST
depends on SCSI && PROC_FS && SCST
help
SCSI TARGET handler for MO disk device.
SCST pass-through device handler for MO disk device.
config SCST_CHANGER
tristate "SCSI target changer support"
default SCST
depends on SCSI && PROC_FS && SCST
help
SCSI TARGET handler for changer device.
SCST pass-through device handler for changer device.
config SCST_PROCESSOR
tristate "SCSI target processor support"
default SCST
depends on SCSI && PROC_FS && SCST
help
SCSI TARGET handler for processor device.
SCST pass-through device handler for processor device.
config SCST_RAID
tristate "SCSI target storage array controller (raid) support"
tristate "SCSI target storage array controller (RAID) support"
default SCST
depends on SCSI && PROC_FS && SCST
help
SCSI TARGET handler for raid storage array controller (raid) device.
SCST pass-through device handler for raid storage array controller (RAID) device.
config SCST_VDISK
tristate "SCSI target virtual disk and/or cdrom support"
tristate "SCSI target virtual disk and/or CDROM support"
default SCST
depends on SCSI && PROC_FS && SCST
help
SCSI TARGET handler for virtual disk and/or cdrom device.
SCST device handler for virtual disk and/or CDROM device.
config SCST_USER
tristate "SCSI user space virtual target devices support"
default SCST
depends on SCSI && PROC_FS && SCST && ! HIGHMEM
help
SCSI TARGET handler for virtual user space device.
source "drivers/scst/iscsi-scst/Kconfig"
SCST device handler for virtual user space device.
config SCST_STRICT_SERIALIZING
bool "Strict serialization"
depends on SCST
help
Enable strict SCSI command serialization. When enabled, SCST sends
all SCSI commands to the underlying SCSI device synchronously, one
@@ -87,18 +87,22 @@ config SCST_STRICT_SERIALIZING
in Linux doesn't allow to abort all commands reliably if they have
been sent asynchronously to a stateful device.
Enable this option if you use stateful device(s) and need as much
error recovery reliability as possible. If unsure, leave this
option disabled.
error recovery reliability as possible.
If unsure, say "N".
config SCST_STRICT_SECURITY
bool "Strict security"
depends on SCST
help
Makes SCST clear (zero-fill) allocated data buffers. Note: this has a
significant performance penalty. If unsure, leave this option
disabled.
significant performance penalty.
If unsure, say "N".
config SCST_ALLOW_PASSTHROUGH_IO_SUBMIT_IN_SIRQ
bool "Allow pass-through commands to be sent from soft-IRQ context"
depends on SCST
help
Allows SCST to submit SCSI pass-through commands to real SCSI devices
via the SCSI middle layer using scsi_execute_async() function from
@@ -109,10 +113,13 @@ config SCST_ALLOW_PASSTHROUGH_IO_SUBMIT_IN_SIRQ
performance. It is more or less safe. In the worst case, if in your
configuration the SCSI middle layer really doesn't expect SIRQ
context in scsi_execute_async() function, you will get a warning
message in the kernel log. If unsure, leave this option disabled.
message in the kernel log.
If unsure, say "N".
config SCST_ABORT_CONSIDER_FINISHED_TASKS_AS_NOT_EXISTING
bool "Send back UNKNOWN TASK when an already finished task is aborted"
depends on SCST
help
Controls which response is sent by SCST to the initiator in case
the initiator attempts to abort (ABORT TASK) an already finished
@@ -120,10 +127,13 @@ config SCST_ABORT_CONSIDER_FINISHED_TASKS_AS_NOT_EXISTING
sent back to the initiator. However, some initiators, particularly
the VMware iSCSI initiator, interpret the UNKNOWN TASK response as
if the target got crazy and try to RESET it. Then sometimes the
initiator gets crazy itself. If unsure, leave this option disabled.
initiator gets crazy itself.
If unsure, say "N".
config SCST_USE_EXPECTED_VALUES
bool "Prefer initiator-supplied SCSI command attributes"
depends on SCST
help
When SCST receives a SCSI command from an initiator, such a SCSI
command has both data transfer length and direction attributes.
@@ -136,58 +146,75 @@ config SCST_USE_EXPECTED_VALUES
message: "Unknown opcode XX for YY. Should you update
scst_scsi_op_table?" and when the initiator complains. Please
report any unrecognized commands to scst-devel@lists.sourceforge.net.
If unsure, leave this option disabled.
If unsure, say "N".
config SCST_EXTRACHECKS
bool "Extra consistency checks"
depends on SCST
help
Enable additional consistency checks in the SCSI middle level target
code. This may be helpful for SCST developers. If unsure, leave this
option disabled.
code. This may be helpful for SCST developers. Enable it if you have
any problems.
If unsure, say "N".
config SCST_TRACING
bool "Tracing support"
depends on SCST
default y
help
Enable SCSI middle level tracing support. Tracing can be controlled
dynamically via /proc/scsi_tgt/trace_level. The traced information
is sent to the kernel log and may be very helpful when analyzing
the cause of a communication problem between initiator and target.
If unsure, leave this option disabled.
If unsure, say "Y".
config SCST_DEBUG
bool "Debugging support"
depends on SCST
select DEBUG_BUGVERBOSE
help
Enables support for debugging SCST. This may be helpful for SCST
developers. If unsure, leave this option disabled.
developers.
If unsure, say "N".
config SCST_DEBUG_OOM
bool "Out-of-memory debugging support"
depends on SCST
help
Let SCST's internal memory allocation function
(scst_alloc_sg_entries()) fail about once in every 10000 calls, at
least if the flag __GFP_NOFAIL has not been set. This allows SCST
developers to test the behavior of SCST in out-of-memory conditions.
This may be helpful for SCST developers. If unsure, leave this
option disabled.
This may be helpful for SCST developers.
If unsure, say "N".
config SCST_DEBUG_RETRY
bool "SCSI command retry debugging support"
depends on SCST
help
Let SCST's internal SCSI command transfer function
(scst_rdy_to_xfer()) fail about once in every 100 calls. This allows
SCST developers to test the behavior of SCST when SCSI queues fill
up. This may be helpful for SCST developers. If unsure, leave this
option disabled.
up. This may be helpful for SCST developers.
If unsure, say "N".
config SCST_DEBUG_SN
bool "SCSI sequence number debugging support"
depends on SCST
help
Allows to test SCSI command ordering via sequence numbers by
randomly changing the type of SCSI commands into
SCST_CMD_QUEUE_ORDERED, SCST_CMD_QUEUE_HEAD_OF_QUEUE or
SCST_CMD_QUEUE_SIMPLE for about one in 300 SCSI commands.
This may be helpful for SCST developers. If unsure, leave this
option disabled.
This may be helpful for SCST developers.
If unsure, say "N".
config SCST_DEBUG_TM
bool "Task management debugging support"
@@ -197,8 +224,9 @@ config SCST_DEBUG_TM
When enabled, some of the commands on LUN 0 in the default access
control group will be delayed for about 60 seconds. This will
cause the remote initiator send SCSI task management functions,
e.g. ABORT TASK and TARGET RESET. If unsure, leave this option
disabled.
e.g. ABORT TASK and TARGET RESET.
If unsure, say "N".
config SCST_TM_DBG_GO_OFFLINE
bool "Let devices become completely unresponsive"
@@ -206,6 +234,22 @@ config SCST_TM_DBG_GO_OFFLINE
help
Enable this option if you want that the device eventually becomes
completely unresponsive. When disabled, the device will receive
ABORT and RESET commands. If unsure, leave this option disabled.
ABORT and RESET commands.
config SCST_MEASURE_LATENCY
bool "Commands processing latency measurement facility"
depends on SCST
help
This option enables commands processing latency measurement
facility in SCST. It will provide in /proc/scsi_tgt/latency
file average commands processing latency. You can clear
already measured results by writing 0 in this file. Note, you
need a non-preemtible kernel to have correct results.
If unsure, say "N".
source "drivers/scst/iscsi-scst/Kconfig"
source "drivers/scst/qla2x00-target/Kconfig"
source "drivers/scst/srpt/Kconfig"
endmenu

View File

@@ -7,5 +7,5 @@ scst-y += scst_proc.o
scst-y += scst_mem.o
scst-y += scst_debug.o
obj-$(CONFIG_SCST) += scst.o dev_handlers/ iscsi-scst/
obj-$(CONFIG_SCST) += scst.o dev_handlers/ iscsi-scst/ qla2xxx-target/

View File

@@ -131,7 +131,7 @@ EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG -g -W -Wno-unused-parameter
#EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG_OOM
#EXTRA_CFLAGS += -DCONFIG_SCST_DEBUG_SN
#EXTRA_CFLAGS += -DMEASURE_LATENCY
#EXTRA_CFLAGS += -DCONFIG_SCST_MEASURE_LATENCY
# If defined, makes SCST zero allocated data buffers.
# Undefining it considerably improves performance and eases CPU load,

View File

@@ -1245,7 +1245,7 @@ struct scst_session *scst_alloc_session(struct scst_tgt *tgt, gfp_t gfp_mask,
INIT_LIST_HEAD(&sess->init_deferred_cmd_list);
INIT_LIST_HEAD(&sess->init_deferred_mcmd_list);
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
spin_lock_init(&sess->meas_lock);
#endif

View File

@@ -68,7 +68,7 @@ static struct scst_proc_data scst_dev_handler_proc_data;
#define SCST_PROC_GROUPS_DEVICES_ENTRY_NAME "devices"
#define SCST_PROC_GROUPS_USERS_ENTRY_NAME "names"
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
#define SCST_PROC_LAT_ENTRY_NAME "latency"
#endif
@@ -381,7 +381,7 @@ out:
#endif /* defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) */
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
static int lat_info_show(struct seq_file *seq, void *v)
{
@@ -483,12 +483,12 @@ static struct scst_proc_data scst_lat_proc_data = {
.data = "scsi_tgt",
};
#endif /* MEASURE_LATENCY */
#endif /* CONFIG_SCST_MEASURE_LATENCY */
static int __init scst_proc_init_module_log(void)
{
int res = 0;
#if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) || defined(MEASURE_LATENCY)
#if defined(CONFIG_SCST_DEBUG) || defined(CONFIG_SCST_TRACING) || defined(CONFIG_SCST_MEASURE_LATENCY)
struct proc_dir_entry *generic;
#endif
@@ -505,7 +505,7 @@ static int __init scst_proc_init_module_log(void)
}
#endif
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
if (res == 0) {
generic = scst_create_proc_entry(scst_proc_scsi_tgt,
SCST_PROC_LAT_ENTRY_NAME,
@@ -530,7 +530,7 @@ static void scst_proc_cleanup_module_log(void)
remove_proc_entry(SCST_PROC_LOG_ENTRY_NAME, scst_proc_scsi_tgt);
#endif
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
remove_proc_entry(SCST_PROC_LAT_ENTRY_NAME, scst_proc_scsi_tgt);
#endif

View File

@@ -170,7 +170,7 @@ out_redirect:
goto out;
}
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
static inline uint64_t scst_sec_to_nsec(time_t sec)
{
return (uint64_t)sec * 1000000000;
@@ -184,7 +184,7 @@ void scst_cmd_init_done(struct scst_cmd *cmd, int pref_context)
TRACE_ENTRY();
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
{
struct timespec ts;
getnstimeofday(&ts);
@@ -1078,7 +1078,7 @@ static void scst_do_cmd_done(struct scst_cmd *cmd, int result,
{
TRACE_ENTRY();
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
{
struct timespec ts;
getnstimeofday(&ts);
@@ -1213,7 +1213,7 @@ static void scst_cmd_done_local(struct scst_cmd *cmd, int next_state)
TRACE_ENTRY();
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
{
struct timespec ts;
getnstimeofday(&ts);
@@ -1925,7 +1925,7 @@ static int scst_send_to_midlev(struct scst_cmd **active_cmd)
goto out;
}
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
if (cmd->pre_exec_finish == 0) {
struct timespec ts;
getnstimeofday(&ts);
@@ -2569,7 +2569,7 @@ static int scst_pre_xmit_response(struct scst_cmd *cmd)
res = SCST_CMD_STATE_RES_CONT_SAME;
out:
#ifdef MEASURE_LATENCY
#ifdef CONFIG_SCST_MEASURE_LATENCY
{
struct timespec ts;
uint64_t finish, scst_time, proc_time;

View File

@@ -1,12 +1,12 @@
config INFINIBAND_SRPT
tristate "InfiniBand SCSI RDMA Protocol Target Mode"
depends on INFINIBAND
depends on INFINIBAND && SCST
---help---
Support for the SCSI RDMA Protocol Target mode over InfiniBand.
This allows you to turn a standard Linux box to native Infiniband
storage using SRP protocol.
You will also need the SCST middle level drivers from
http://scst.sf.net/
The SRP protocol is defined by the INCITS T10 technical
committee. See <http://www.t10.org/>.
Support for the SCSI RDMA Protocol Target mode over
InfiniBand. This allows you to turn a standard Linux box to
native Infiniband storage using SRP protocol. The SRP protocol
is defined by the INCITS T10 technical committee. See
<http://www.t10.org/>.
You will also need the SCST framework enabled.