mirror of
https://github.com/SCST-project/scst.git
synced 2026-05-22 13:11:27 +00:00
- Fixed possible crash on SN slots overflow
- Docs updates - Other minor changes git-svn-id: http://svn.code.sf.net/p/scst/svn/trunk@243 d57e44dd-8a1f-0410-8b47-8ef2f437770f
This commit is contained in:
@@ -32,6 +32,10 @@ Installation
|
||||
Basically as in README-IET, where file names are changed as specified
|
||||
above.
|
||||
|
||||
If you experience problems during kernel module load or running, check
|
||||
your system and/or kernel logs (or run dmesg command for the few most
|
||||
recent kernel messages).
|
||||
|
||||
To use full power of TCP zero-copy transmit functions, especially
|
||||
dealing with user space supplied via scst_user module memory, iSCSI-SCST
|
||||
needs to be notified when Linux networking finished data transmission.
|
||||
@@ -46,7 +50,7 @@ original IET behavior, when for data transmission:
|
||||
handlers) usage of SGV cache on transmit path (READ-type commands)
|
||||
will be disabled. The performance hit will be not big, but performance
|
||||
will still remain better, than for IET, because SGV cache will remain
|
||||
used on receive path and IET doesn't have such feature.
|
||||
used on receive path while IET doesn't have such feature.
|
||||
|
||||
- For user space allocated memory (scst_user handler) all transmitted
|
||||
data will be additionally copied into temporary TCP buffers. The
|
||||
@@ -103,13 +107,12 @@ means name of the target, for example:
|
||||
"Default_iqn.2007-05.com.example:storage.disk1.sys1.xyz", and add there
|
||||
all necessary LUNs. Check SCST README file for details.
|
||||
|
||||
Check SCST README file how to tune for the best performance.
|
||||
|
||||
If under high load you experience I/O stalls or see in the kernel log
|
||||
abort or reset messages then try to reduce QueuedCommands parameter in
|
||||
iscsi-scstd.conf file for the corresponding target. Particularly, it is
|
||||
known that the default value 32 sometimes too high if you do intensive
|
||||
writes from VMware on a target disk, which use LVM in the snapshot mode.
|
||||
In this case value like 16 or even 10 depending of your backstorage
|
||||
speed could be more appropriate.
|
||||
abort or reset messages, then try to reduce QueuedCommands parameter in
|
||||
iscsi-scstd.conf file for the corresponding target. See also SCST README
|
||||
file for more details about that issue.
|
||||
|
||||
CAUTION: Working of target and initiator on the same host isn't
|
||||
======== supported. See SCST README file for details.
|
||||
|
||||
@@ -1613,8 +1613,7 @@ static void q2t_task_mgmt_fn_done(struct scst_mgmt_cmd *scst_mcmd)
|
||||
|
||||
TRACE_ENTRY();
|
||||
|
||||
TRACE((scst_mcmd->fn == SCST_ABORT_TASK) ? TRACE_MGMT_MINOR : TRACE_MGMT,
|
||||
"scst_mcmd (%p) status %#x state %#x", scst_mcmd,
|
||||
TRACE_MGMT_DBG("scst_mcmd (%p) status %#x state %#x", scst_mcmd,
|
||||
scst_mcmd->status, scst_mcmd->state);
|
||||
|
||||
mcmd = scst_mgmt_cmd_get_tgt_priv(scst_mcmd);
|
||||
|
||||
44
scst/README
44
scst/README
@@ -87,6 +87,9 @@ Then, to see your devices remotely, you need to add them to at least
|
||||
are seen remotely. There must be LUN 0 in each security group, i.e. LUs
|
||||
numeration must not start from, e.g., 1.
|
||||
|
||||
If you experience problems during modules load or running, check your
|
||||
kernel logs (or run dmesg command for the few most recent messages).
|
||||
|
||||
IMPORTANT: Without loading appropriate device handler, corresponding devices
|
||||
========= will be invisible for remote initiators, which could lead to holes
|
||||
in the LUN addressing, so automatic device scanning by remote SCSI
|
||||
@@ -713,6 +716,47 @@ IMPORTANT: If you use on initiator some versions of Windows (at least W2K)
|
||||
See also important notes about setting block sizes >512 bytes
|
||||
for VDISK FILEIO devices above.
|
||||
|
||||
What if target's backstorage is too slow
|
||||
----------------------------------------
|
||||
|
||||
If under high load you experience I/O stalls or see in the kernel log on
|
||||
the target abort or reset messages, then your backstorage is too slow
|
||||
for your target link speed and amount of simultaneously queued commands.
|
||||
Simply processing of one or more commands takes too long, so initiator
|
||||
decides that they are stuck on the target and tries to recover.
|
||||
Particularly, it is known that the default amount of simultaneously
|
||||
queued commands (48) is sometimes too high if you do intensive writes
|
||||
from VMware on a target disk, which uses LVM in the snapshot mode. In
|
||||
this case value like 16 or even 10 depending of your backstorage speed
|
||||
could be more appropriate.
|
||||
|
||||
Unfortunately, currently SCST lacks dynamic I/O flow control, when the
|
||||
queue depth on the target is dynamically decreased/increased based on
|
||||
how slow/fast the backstorage speed comparing to the target link. So,
|
||||
there are only 4 possible actions, which you can do to workaround or fix
|
||||
this issue:
|
||||
|
||||
1. Ignore incoming task management (TM) commands. It's fine if there are
|
||||
not too many of them, i.e. if the backstorage isn't too slow.
|
||||
|
||||
2. Decrease /sys/block/sdX/device/queue_depth on the initiator in case
|
||||
if it's Linux (see below how) and/or SCST_MAX_TGT_DEV_COMMANDS constant
|
||||
in scst_priv.h file until you stop seeing incoming TM commands.
|
||||
|
||||
3. Insrease speed of the target's backstorage.
|
||||
|
||||
4. Implement in SCST the dynamic I/O flow control.
|
||||
|
||||
To decrease device queue depth on Linux initiators run command:
|
||||
|
||||
# echo Y >/sys/block/sdX/device/queue_depth
|
||||
|
||||
where Y is the new number of simultaneously queued commands, X - your
|
||||
imported device letter, like 'a' for sda device. There are no special
|
||||
limitations for Y value, it can be any value from 1 to possible maximum
|
||||
(usually, 32), so start from dividing the current value on 2, i.e. set
|
||||
16, if /sys/block/sdX/device/queue_depth contains 32.
|
||||
|
||||
Credits
|
||||
-------
|
||||
|
||||
|
||||
@@ -8,6 +8,11 @@ To be done
|
||||
the page cache (in order to avoid data copy between it and internal
|
||||
buffers). Requires modifications of the kernel.
|
||||
|
||||
- Dynamic I/O flow control, when the device queue depth on the target
|
||||
will be dynamically decreased/increased based on how slow/fast the
|
||||
backstorage speed comparing to the target link for current IO
|
||||
pattern.
|
||||
|
||||
- Fix in-kernel O_DIRECT mode.
|
||||
|
||||
- Close integration with Linux initiator SCSI mil-level, including
|
||||
|
||||
@@ -1427,11 +1427,11 @@ struct scst_tgt_dev
|
||||
* Set if the prev cmd was ORDERED. Size must allow unprotected
|
||||
* modifications
|
||||
*/
|
||||
unsigned long prev_cmd_ordered;
|
||||
unsigned long prev_cmd_ordered;
|
||||
|
||||
int num_free_sn_slots;
|
||||
int num_free_sn_slots; /* if it's <0, then all slots are busy */
|
||||
atomic_t *cur_sn_slot;
|
||||
atomic_t sn_slots[10];
|
||||
atomic_t sn_slots[15];
|
||||
|
||||
/* Used for storage of dev handler private stuff */
|
||||
void *dh_priv;
|
||||
|
||||
@@ -415,7 +415,7 @@ static struct scst_tgt_dev *scst_alloc_add_tgt_dev(struct scst_session *sess,
|
||||
INIT_LIST_HEAD(&tgt_dev->deferred_cmd_list);
|
||||
INIT_LIST_HEAD(&tgt_dev->skipped_sn_list);
|
||||
tgt_dev->expected_sn = 1;
|
||||
tgt_dev->num_free_sn_slots = ARRAY_SIZE(tgt_dev->sn_slots);
|
||||
tgt_dev->num_free_sn_slots = ARRAY_SIZE(tgt_dev->sn_slots)-1;
|
||||
tgt_dev->cur_sn_slot = &tgt_dev->sn_slots[0];
|
||||
for(i = 0; i < (int)ARRAY_SIZE(tgt_dev->sn_slots); i++)
|
||||
atomic_set(&tgt_dev->sn_slots[i], 0);
|
||||
|
||||
@@ -103,7 +103,7 @@ extern unsigned long scst_trace_flag;
|
||||
** Maximum count of uncompleted commands that an initiator could
|
||||
** queue on any device. Then it will start getting TASK QUEUE FULL status.
|
||||
**/
|
||||
#define SCST_MAX_TGT_DEV_COMMANDS 32
|
||||
#define SCST_MAX_TGT_DEV_COMMANDS 48
|
||||
|
||||
/**
|
||||
** Maximum count of uncompleted commands that could be queued on any device.
|
||||
|
||||
@@ -1772,14 +1772,16 @@ void scst_inc_expected_sn(struct scst_tgt_dev *tgt_dev, atomic_t *slot)
|
||||
|
||||
TRACE_SN("Slot is 0 (num_free_sn_slots=%d)",
|
||||
tgt_dev->num_free_sn_slots);
|
||||
if (tgt_dev->num_free_sn_slots != ARRAY_SIZE(tgt_dev->sn_slots)) {
|
||||
if (tgt_dev->num_free_sn_slots < (int)ARRAY_SIZE(tgt_dev->sn_slots)-1) {
|
||||
spin_lock_irq(&tgt_dev->sn_lock);
|
||||
if (tgt_dev->num_free_sn_slots != ARRAY_SIZE(tgt_dev->sn_slots)) {
|
||||
if (likely(tgt_dev->num_free_sn_slots < (int)ARRAY_SIZE(tgt_dev->sn_slots)-1)) {
|
||||
if (tgt_dev->num_free_sn_slots < 0)
|
||||
tgt_dev->cur_sn_slot = slot;
|
||||
smp_mb(); /* to be in-sync with SIMPLE case in scst_cmd_set_sn() */
|
||||
tgt_dev->num_free_sn_slots++;
|
||||
TRACE_SN("Incremented num_free_sn_slots (%d)",
|
||||
tgt_dev->num_free_sn_slots);
|
||||
if (tgt_dev->num_free_sn_slots == 0)
|
||||
tgt_dev->cur_sn_slot = slot;
|
||||
|
||||
}
|
||||
spin_unlock_irq(&tgt_dev->sn_lock);
|
||||
}
|
||||
@@ -2615,7 +2617,8 @@ static void scst_cmd_set_sn(struct scst_cmd *cmd)
|
||||
|
||||
tgt_dev->prev_cmd_ordered = 0;
|
||||
} else {
|
||||
TRACE(TRACE_MINOR, "%s", "Not enough SN slots");
|
||||
TRACE(TRACE_MINOR, "***WARNING*** Not enough SN slots "
|
||||
"%d", ARRAY_SIZE(tgt_dev->sn_slots));
|
||||
goto ordered;
|
||||
}
|
||||
break;
|
||||
@@ -2626,21 +2629,30 @@ static void scst_cmd_set_sn(struct scst_cmd *cmd)
|
||||
ordered:
|
||||
if (!tgt_dev->prev_cmd_ordered) {
|
||||
spin_lock_irqsave(&tgt_dev->sn_lock, flags);
|
||||
tgt_dev->num_free_sn_slots--;
|
||||
smp_mb();
|
||||
if ((tgt_dev->num_free_sn_slots >= 0) &&
|
||||
(atomic_read(tgt_dev->cur_sn_slot) > 0)) {
|
||||
do {
|
||||
tgt_dev->cur_sn_slot++;
|
||||
if (tgt_dev->cur_sn_slot ==
|
||||
tgt_dev->sn_slots +
|
||||
ARRAY_SIZE(tgt_dev->sn_slots))
|
||||
tgt_dev->cur_sn_slot = tgt_dev->sn_slots;
|
||||
} while(atomic_read(tgt_dev->cur_sn_slot) != 0);
|
||||
TRACE_SN("New cur SN slot %zd",
|
||||
tgt_dev->cur_sn_slot-tgt_dev->sn_slots);
|
||||
} else
|
||||
tgt_dev->num_free_sn_slots++;
|
||||
if (tgt_dev->num_free_sn_slots >= 0) {
|
||||
tgt_dev->num_free_sn_slots--;
|
||||
if (tgt_dev->num_free_sn_slots >= 0) {
|
||||
int i = 0;
|
||||
/*
|
||||
* Commands can finish in any order, so we don't
|
||||
* know, which slot is empty.
|
||||
*/
|
||||
while(1) {
|
||||
tgt_dev->cur_sn_slot++;
|
||||
if (tgt_dev->cur_sn_slot == tgt_dev->sn_slots +
|
||||
ARRAY_SIZE(tgt_dev->sn_slots))
|
||||
tgt_dev->cur_sn_slot = tgt_dev->sn_slots;
|
||||
|
||||
if (atomic_read(tgt_dev->cur_sn_slot) == 0)
|
||||
break;
|
||||
|
||||
i++;
|
||||
sBUG_ON(i == ARRAY_SIZE(tgt_dev->sn_slots));
|
||||
}
|
||||
TRACE_SN("New cur SN slot %zd",
|
||||
tgt_dev->cur_sn_slot-tgt_dev->sn_slots);
|
||||
}
|
||||
}
|
||||
spin_unlock_irqrestore(&tgt_dev->sn_lock, flags);
|
||||
}
|
||||
tgt_dev->prev_cmd_ordered = 1;
|
||||
|
||||
Reference in New Issue
Block a user